minishell/parser/nsrc/get_changed_ranges.c
2024-06-24 00:20:08 +02:00

547 lines
14 KiB
C

#include "./get_changed_ranges.h"
#include "./error_costs.h"
#include "./language.h"
#include "./subtree.h"
#include "./tree_cursor.h"
#include <assert.h>
// #define DEBUG_GET_CHANGED_RANGES
static void ts_range_array_add(TSRangeArray *self, Length start, Length end)
{
if (self->size > 0)
{
TSRange *last_range = array_back(self);
if (start.bytes <= last_range->end_byte)
{
last_range->end_byte = end.bytes;
last_range->end_point = end.extent;
return;
}
}
if (start.bytes < end.bytes)
{
TSRange range = {start.extent, end.extent, start.bytes, end.bytes};
array_push(self, range);
}
}
bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index, uint32_t start_byte, uint32_t end_byte)
{
for (unsigned i = start_index; i < self->size; i++)
{
TSRange *range = &self->contents[i];
if (range->end_byte > start_byte)
{
if (range->start_byte >= end_byte)
break;
return true;
}
}
return false;
}
void ts_range_array_get_changed_ranges(const TSRange *old_ranges, unsigned old_range_count, const TSRange *new_ranges,
unsigned new_range_count, TSRangeArray *differences)
{
unsigned new_index = 0;
unsigned old_index = 0;
Length current_position = length_zero();
bool in_old_range = false;
bool in_new_range = false;
while (old_index < old_range_count || new_index < new_range_count)
{
const TSRange *old_range = &old_ranges[old_index];
const TSRange *new_range = &new_ranges[new_index];
Length next_old_position;
if (in_old_range)
{
next_old_position = (Length){old_range->end_byte, old_range->end_point};
}
else if (old_index < old_range_count)
{
next_old_position = (Length){old_range->start_byte, old_range->start_point};
}
else
{
next_old_position = LENGTH_MAX;
}
Length next_new_position;
if (in_new_range)
{
next_new_position = (Length){new_range->end_byte, new_range->end_point};
}
else if (new_index < new_range_count)
{
next_new_position = (Length){new_range->start_byte, new_range->start_point};
}
else
{
next_new_position = LENGTH_MAX;
}
if (next_old_position.bytes < next_new_position.bytes)
{
if (in_old_range != in_new_range)
{
ts_range_array_add(differences, current_position, next_old_position);
}
if (in_old_range)
old_index++;
current_position = next_old_position;
in_old_range = !in_old_range;
}
else if (next_new_position.bytes < next_old_position.bytes)
{
if (in_old_range != in_new_range)
{
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_new_range)
new_index++;
current_position = next_new_position;
in_new_range = !in_new_range;
}
else
{
if (in_old_range != in_new_range)
{
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_old_range)
old_index++;
if (in_new_range)
new_index++;
in_old_range = !in_old_range;
in_new_range = !in_new_range;
current_position = next_new_position;
}
}
}
typedef struct Iterator
{
TreeCursor cursor;
const TSLanguage *language;
unsigned visible_depth;
bool in_padding;
} Iterator;
static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language)
{
array_clear(&cursor->stack);
array_push(&cursor->stack, ((TreeCursorEntry){
.subtree = tree,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
}));
return (Iterator){
.cursor = *cursor,
.language = language,
.visible_depth = 1,
.in_padding = false,
};
}
static bool iterator_done(Iterator *self)
{
return self->cursor.stack.size == 0;
}
static Length iterator_start_position(Iterator *self)
{
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (self->in_padding)
{
return entry.position;
}
else
{
return length_add(entry.position, ts_subtree_padding(*entry.subtree));
}
}
static Length iterator_end_position(Iterator *self)
{
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
if (self->in_padding)
{
return result;
}
else
{
return length_add(result, ts_subtree_size(*entry.subtree));
}
}
static bool iterator_tree_is_visible(const Iterator *self)
{
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (ts_subtree_visible(*entry.subtree))
return true;
if (self->cursor.stack.size > 1)
{
Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
return ts_language_alias_at(self->language, parent.ptr->inner.non_terminal.production_id, entry.structural_child_index) != 0;
}
return false;
}
static void iterator_get_visible_state(const Iterator *self, Subtree *tree, TSSymbol *alias_symbol, uint32_t *start_byte)
{
uint32_t i = self->cursor.stack.size - 1;
if (self->in_padding)
{
if (i == 0)
return;
i--;
}
for (; i + 1 > 0; i--)
{
TreeCursorEntry entry = self->cursor.stack.contents[i];
if (i > 0)
{
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
*alias_symbol = ts_language_alias_at(self->language, parent->ptr->inner.non_terminal.production_id, entry.structural_child_index);
}
if (ts_subtree_visible(*entry.subtree) || *alias_symbol)
{
*tree = *entry.subtree;
*start_byte = entry.position.bytes;
break;
}
}
}
static void iterator_ascend(Iterator *self)
{
if (iterator_done(self))
return;
if (iterator_tree_is_visible(self) && !self->in_padding)
self->visible_depth--;
if (array_back(&self->cursor.stack)->child_index > 0)
self->in_padding = false;
self->cursor.stack.size--;
}
static bool iterator_descend(Iterator *self, uint32_t goal_position)
{
if (self->in_padding)
return false;
bool did_descend = false;
do
{
did_descend = false;
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length position = entry.position;
uint32_t structural_child_index = 0;
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++)
{
const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
Length child_left = length_add(position, ts_subtree_padding(*child));
Length child_right = length_add(child_left, ts_subtree_size(*child));
if (child_right.bytes > goal_position)
{
array_push(&self->cursor.stack, ((TreeCursorEntry){
.subtree = child,
.position = position,
.child_index = i,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self))
{
if (child_left.bytes > goal_position)
{
self->in_padding = true;
}
else
{
self->visible_depth++;
}
return true;
}
did_descend = true;
break;
}
position = child_right;
if (!ts_subtree_extra(*child))
structural_child_index++;
}
} while (did_descend);
return false;
}
static void iterator_advance(Iterator *self)
{
if (self->in_padding)
{
self->in_padding = false;
if (iterator_tree_is_visible(self))
{
self->visible_depth++;
}
else
{
iterator_descend(self, 0);
}
return;
}
for (;;)
{
if (iterator_tree_is_visible(self))
self->visible_depth--;
TreeCursorEntry entry = array_pop(&self->cursor.stack);
if (iterator_done(self))
return;
const Subtree *parent = array_back(&self->cursor.stack)->subtree;
uint32_t child_index = entry.child_index + 1;
if (ts_subtree_child_count(*parent) > child_index)
{
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
uint32_t structural_child_index = entry.structural_child_index;
if (!ts_subtree_extra(*entry.subtree))
structural_child_index++;
const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
array_push(&self->cursor.stack, ((TreeCursorEntry){
.subtree = next_child,
.position = position,
.child_index = child_index,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self))
{
if (ts_subtree_padding(*next_child).bytes > 0)
{
self->in_padding = true;
}
else
{
self->visible_depth++;
}
}
else
{
iterator_descend(self, 0);
}
break;
}
}
}
typedef enum IteratorComparison
{
IteratorDiffers,
IteratorMayDiffer,
IteratorMatches,
} IteratorComparison;
static IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter)
{
Subtree old_tree = NULL_SUBTREE;
Subtree new_tree = NULL_SUBTREE;
uint32_t old_start = 0;
uint32_t new_start = 0;
TSSymbol old_alias_symbol = 0;
TSSymbol new_alias_symbol = 0;
iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
if (!old_tree.ptr && !new_tree.ptr)
return IteratorMatches;
if (!old_tree.ptr || !new_tree.ptr)
return IteratorDiffers;
if (old_alias_symbol == new_alias_symbol && ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree))
{
if (old_start == new_start && !ts_subtree_has_changes(old_tree) && ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
(ts_subtree_parse_state(old_tree) == ERROR_STATE) == (ts_subtree_parse_state(new_tree) == ERROR_STATE))
{
return IteratorMatches;
}
else
{
return IteratorMayDiffer;
}
}
return IteratorDiffers;
}
#ifdef DEBUG_GET_CHANGED_RANGES
static inline void iterator_print_state(Iterator *self)
{
TreeCursorEntry entry = *array_back(&self->cursor.stack);
TSPoint start = iterator_start_position(self).extent;
TSPoint end = iterator_end_position(self).extent;
const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
printf("(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", name, self->in_padding ? "(p)" : " ", self->visible_depth, start.row + 1,
start.column, end.row + 1, end.column);
}
#endif
unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree, TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language, const TSRangeArray *included_range_differences, TSRange **ranges)
{
TSRangeArray results = array_new();
Iterator old_iter = iterator_new(cursor1, old_tree, language);
Iterator new_iter = iterator_new(cursor2, new_tree, language);
unsigned included_range_difference_index = 0;
Length position = iterator_start_position(&old_iter);
Length next_position = iterator_start_position(&new_iter);
if (position.bytes < next_position.bytes)
{
ts_range_array_add(&results, position, next_position);
position = next_position;
}
else if (position.bytes > next_position.bytes)
{
ts_range_array_add(&results, next_position, position);
next_position = position;
}
do
{
#ifdef DEBUG_GET_CHANGED_RANGES
printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
iterator_print_state(&old_iter);
printf("\tvs\t");
iterator_print_state(&new_iter);
puts("");
#endif
// Compare the old and new subtrees.
IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
// Even if the two subtrees appear to be identical, they could differ
// internally if they contain a range of text that was previously
// excluded from the parse, and is now included, or vice-versa.
if (comparison == IteratorMatches && ts_range_array_intersects(included_range_differences, included_range_difference_index,
position.bytes, iterator_end_position(&old_iter).bytes))
{
comparison = IteratorMayDiffer;
}
bool is_changed = false;
switch (comparison)
{
// If the subtrees are definitely identical, move to the end
// of both subtrees.
case IteratorMatches:
next_position = iterator_end_position(&old_iter);
break;
// If the subtrees might differ internally, descend into both
// subtrees, finding the first child that spans the current position.
case IteratorMayDiffer:
if (iterator_descend(&old_iter, position.bytes))
{
if (!iterator_descend(&new_iter, position.bytes))
{
is_changed = true;
next_position = iterator_end_position(&old_iter);
}
}
else if (iterator_descend(&new_iter, position.bytes))
{
is_changed = true;
next_position = iterator_end_position(&new_iter);
}
else
{
next_position = length_min(iterator_end_position(&old_iter), iterator_end_position(&new_iter));
}
break;
// If the subtrees are different, record a change and then move
// to the end of both subtrees.
case IteratorDiffers:
is_changed = true;
next_position = length_min(iterator_end_position(&old_iter), iterator_end_position(&new_iter));
break;
}
// Ensure that both iterators are caught up to the current position.
while (!iterator_done(&old_iter) && iterator_end_position(&old_iter).bytes <= next_position.bytes)
iterator_advance(&old_iter);
while (!iterator_done(&new_iter) && iterator_end_position(&new_iter).bytes <= next_position.bytes)
iterator_advance(&new_iter);
// Ensure that both iterators are at the same depth in the tree.
while (old_iter.visible_depth > new_iter.visible_depth)
{
iterator_ascend(&old_iter);
}
while (new_iter.visible_depth > old_iter.visible_depth)
{
iterator_ascend(&new_iter);
}
if (is_changed)
{
#ifdef DEBUG_GET_CHANGED_RANGES
printf(" change: [[%u, %u] - [%u, %u]]\n", position.extent.row + 1, position.extent.column, next_position.extent.row + 1,
next_position.extent.column);
#endif
ts_range_array_add(&results, position, next_position);
}
position = next_position;
// Keep track of the current position in the included range differences
// array in order to avoid scanning the entire array on each iteration.
while (included_range_difference_index < included_range_differences->size)
{
const TSRange *range = &included_range_differences->contents[included_range_difference_index];
if (range->end_byte <= position.bytes)
{
included_range_difference_index++;
}
else
{
break;
}
}
} while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
Length old_size = ts_subtree_total_size(*old_tree);
Length new_size = ts_subtree_total_size(*new_tree);
if (old_size.bytes < new_size.bytes)
{
ts_range_array_add(&results, old_size, new_size);
}
else if (new_size.bytes < old_size.bytes)
{
ts_range_array_add(&results, new_size, old_size);
}
*cursor1 = old_iter.cursor;
*cursor2 = new_iter.cursor;
*ranges = results.contents;
return results.size;
}