From d9bf7fa721ebd771a3460f60445cc2c26c95bf72 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Sun, 30 Jun 2024 19:24:05 +0200 Subject: [PATCH] trimmed the fat --- parser/src/get_changed_ranges.c | 501 ---- parser/src/get_changed_ranges.h | 36 - parser/src/lib.c | 3 +- parser/src/parser.c | 70 +- parser/src/query.c | 4134 ------------------------------- parser/src/tree.c | 242 +- parser/src/unicode.h | 12 +- 7 files changed, 156 insertions(+), 4842 deletions(-) delete mode 100644 parser/src/get_changed_ranges.c delete mode 100644 parser/src/get_changed_ranges.h delete mode 100644 parser/src/query.c diff --git a/parser/src/get_changed_ranges.c b/parser/src/get_changed_ranges.c deleted file mode 100644 index bcf8da94..00000000 --- a/parser/src/get_changed_ranges.c +++ /dev/null @@ -1,501 +0,0 @@ -#include "./get_changed_ranges.h" -#include "./subtree.h" -#include "./language.h" -#include "./error_costs.h" -#include "./tree_cursor.h" -#include - -// #define DEBUG_GET_CHANGED_RANGES - -static void ts_range_array_add( - TSRangeArray *self, - Length start, - Length end -) { - if (self->size > 0) { - TSRange *last_range = array_back(self); - if (start.bytes <= last_range->end_byte) { - last_range->end_byte = end.bytes; - last_range->end_point = end.extent; - return; - } - } - - if (start.bytes < end.bytes) { - TSRange range = { start.extent, end.extent, start.bytes, end.bytes }; - array_push(self, range); - } -} - -bool ts_range_array_intersects( - const TSRangeArray *self, - unsigned start_index, - uint32_t start_byte, - uint32_t end_byte -) { - for (unsigned i = start_index; i < self->size; i++) { - TSRange *range = &self->contents[i]; - if (range->end_byte > start_byte) { - if (range->start_byte >= end_byte) break; - return true; - } - } - return false; -} - -void ts_range_array_get_changed_ranges( - const TSRange *old_ranges, unsigned old_range_count, - const TSRange *new_ranges, unsigned new_range_count, - TSRangeArray *differences -) { - unsigned new_index = 0; - unsigned old_index = 0; - Length current_position = length_zero(); - bool in_old_range = false; - bool in_new_range = false; - - while (old_index < old_range_count || new_index < new_range_count) { - const TSRange *old_range = &old_ranges[old_index]; - const TSRange *new_range = &new_ranges[new_index]; - - Length next_old_position; - if (in_old_range) { - next_old_position = (Length) {old_range->end_byte, old_range->end_point}; - } else if (old_index < old_range_count) { - next_old_position = (Length) {old_range->start_byte, old_range->start_point}; - } else { - next_old_position = LENGTH_MAX; - } - - Length next_new_position; - if (in_new_range) { - next_new_position = (Length) {new_range->end_byte, new_range->end_point}; - } else if (new_index < new_range_count) { - next_new_position = (Length) {new_range->start_byte, new_range->start_point}; - } else { - next_new_position = LENGTH_MAX; - } - - if (next_old_position.bytes < next_new_position.bytes) { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_old_position); - } - if (in_old_range) old_index++; - current_position = next_old_position; - in_old_range = !in_old_range; - } else if (next_new_position.bytes < next_old_position.bytes) { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_new_position); - } - if (in_new_range) new_index++; - current_position = next_new_position; - in_new_range = !in_new_range; - } else { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_new_position); - } - if (in_old_range) old_index++; - if (in_new_range) new_index++; - in_old_range = !in_old_range; - in_new_range = !in_new_range; - current_position = next_new_position; - } - } -} - -typedef struct { - TreeCursor cursor; - const TSLanguage *language; - unsigned visible_depth; - bool in_padding; -} Iterator; - -static Iterator iterator_new( - TreeCursor *cursor, - const Subtree *tree, - const TSLanguage *language -) { - array_clear(&cursor->stack); - array_push(&cursor->stack, ((TreeCursorEntry) { - .subtree = tree, - .position = length_zero(), - .child_index = 0, - .structural_child_index = 0, - })); - return (Iterator) { - .cursor = *cursor, - .language = language, - .visible_depth = 1, - .in_padding = false, - }; -} - -static bool iterator_done(Iterator *self) { - return self->cursor.stack.size == 0; -} - -static Length iterator_start_position(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - if (self->in_padding) { - return entry.position; - } else { - return length_add(entry.position, ts_subtree_padding(*entry.subtree)); - } -} - -static Length iterator_end_position(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); - if (self->in_padding) { - return result; - } else { - return length_add(result, ts_subtree_size(*entry.subtree)); - } -} - -static bool iterator_tree_is_visible(const Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - if (ts_subtree_visible(*entry.subtree)) return true; - if (self->cursor.stack.size > 1) { - Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; - return ts_language_alias_at( - self->language, - parent.ptr->production_id, - entry.structural_child_index - ) != 0; - } - return false; -} - -static void iterator_get_visible_state( - const Iterator *self, - Subtree *tree, - TSSymbol *alias_symbol, - uint32_t *start_byte -) { - uint32_t i = self->cursor.stack.size - 1; - - if (self->in_padding) { - if (i == 0) return; - i--; - } - - for (; i + 1 > 0; i--) { - TreeCursorEntry entry = self->cursor.stack.contents[i]; - - if (i > 0) { - const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; - *alias_symbol = ts_language_alias_at( - self->language, - parent->ptr->production_id, - entry.structural_child_index - ); - } - - if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { - *tree = *entry.subtree; - *start_byte = entry.position.bytes; - break; - } - } -} - -static void iterator_ascend(Iterator *self) { - if (iterator_done(self)) return; - if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; - if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; - self->cursor.stack.size--; -} - -static bool iterator_descend(Iterator *self, uint32_t goal_position) { - if (self->in_padding) return false; - - bool did_descend = false; - do { - did_descend = false; - TreeCursorEntry entry = *array_back(&self->cursor.stack); - Length position = entry.position; - uint32_t structural_child_index = 0; - for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { - const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; - Length child_left = length_add(position, ts_subtree_padding(*child)); - Length child_right = length_add(child_left, ts_subtree_size(*child)); - - if (child_right.bytes > goal_position) { - array_push(&self->cursor.stack, ((TreeCursorEntry) { - .subtree = child, - .position = position, - .child_index = i, - .structural_child_index = structural_child_index, - })); - - if (iterator_tree_is_visible(self)) { - if (child_left.bytes > goal_position) { - self->in_padding = true; - } else { - self->visible_depth++; - } - return true; - } - - did_descend = true; - break; - } - - position = child_right; - if (!ts_subtree_extra(*child)) structural_child_index++; - } - } while (did_descend); - - return false; -} - -static void iterator_advance(Iterator *self) { - if (self->in_padding) { - self->in_padding = false; - if (iterator_tree_is_visible(self)) { - self->visible_depth++; - } else { - iterator_descend(self, 0); - } - return; - } - - for (;;) { - if (iterator_tree_is_visible(self)) self->visible_depth--; - TreeCursorEntry entry = array_pop(&self->cursor.stack); - if (iterator_done(self)) return; - - const Subtree *parent = array_back(&self->cursor.stack)->subtree; - uint32_t child_index = entry.child_index + 1; - if (ts_subtree_child_count(*parent) > child_index) { - Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); - uint32_t structural_child_index = entry.structural_child_index; - if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; - const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; - - array_push(&self->cursor.stack, ((TreeCursorEntry) { - .subtree = next_child, - .position = position, - .child_index = child_index, - .structural_child_index = structural_child_index, - })); - - if (iterator_tree_is_visible(self)) { - if (ts_subtree_padding(*next_child).bytes > 0) { - self->in_padding = true; - } else { - self->visible_depth++; - } - } else { - iterator_descend(self, 0); - } - break; - } - } -} - -typedef enum { - IteratorDiffers, - IteratorMayDiffer, - IteratorMatches, -} IteratorComparison; - -static IteratorComparison iterator_compare( - const Iterator *old_iter, - const Iterator *new_iter -) { - Subtree old_tree = NULL_SUBTREE; - Subtree new_tree = NULL_SUBTREE; - uint32_t old_start = 0; - uint32_t new_start = 0; - TSSymbol old_alias_symbol = 0; - TSSymbol new_alias_symbol = 0; - iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); - iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); - - if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; - if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; - - if ( - old_alias_symbol == new_alias_symbol && - ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree) - ) { - if (old_start == new_start && - !ts_subtree_has_changes(old_tree) && - ts_subtree_symbol(old_tree) != ts_builtin_sym_error && - ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && - ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && - ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && - (ts_subtree_parse_state(old_tree) == ERROR_STATE) == - (ts_subtree_parse_state(new_tree) == ERROR_STATE)) { - return IteratorMatches; - } else { - return IteratorMayDiffer; - } - } - - return IteratorDiffers; -} - -#ifdef DEBUG_GET_CHANGED_RANGES -static inline void iterator_print_state(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - TSPoint start = iterator_start_position(self).extent; - TSPoint end = iterator_end_position(self).extent; - const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); - printf( - "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", - name, self->in_padding ? "(p)" : " ", - self->visible_depth, - start.row + 1, start.column, - end.row + 1, end.column - ); -} -#endif - -unsigned ts_subtree_get_changed_ranges( - const Subtree *old_tree, const Subtree *new_tree, - TreeCursor *cursor1, TreeCursor *cursor2, - const TSLanguage *language, - const TSRangeArray *included_range_differences, - TSRange **ranges -) { - TSRangeArray results = array_new(); - - Iterator old_iter = iterator_new(cursor1, old_tree, language); - Iterator new_iter = iterator_new(cursor2, new_tree, language); - - unsigned included_range_difference_index = 0; - - Length position = iterator_start_position(&old_iter); - Length next_position = iterator_start_position(&new_iter); - if (position.bytes < next_position.bytes) { - ts_range_array_add(&results, position, next_position); - position = next_position; - } else if (position.bytes > next_position.bytes) { - ts_range_array_add(&results, next_position, position); - next_position = position; - } - - do { - #ifdef DEBUG_GET_CHANGED_RANGES - printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); - iterator_print_state(&old_iter); - printf("\tvs\t"); - iterator_print_state(&new_iter); - puts(""); - #endif - - // Compare the old and new subtrees. - IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); - - // Even if the two subtrees appear to be identical, they could differ - // internally if they contain a range of text that was previously - // excluded from the parse, and is now included, or vice-versa. - if (comparison == IteratorMatches && ts_range_array_intersects( - included_range_differences, - included_range_difference_index, - position.bytes, - iterator_end_position(&old_iter).bytes - )) { - comparison = IteratorMayDiffer; - } - - bool is_changed = false; - switch (comparison) { - // If the subtrees are definitely identical, move to the end - // of both subtrees. - case IteratorMatches: - next_position = iterator_end_position(&old_iter); - break; - - // If the subtrees might differ internally, descend into both - // subtrees, finding the first child that spans the current position. - case IteratorMayDiffer: - if (iterator_descend(&old_iter, position.bytes)) { - if (!iterator_descend(&new_iter, position.bytes)) { - is_changed = true; - next_position = iterator_end_position(&old_iter); - } - } else if (iterator_descend(&new_iter, position.bytes)) { - is_changed = true; - next_position = iterator_end_position(&new_iter); - } else { - next_position = length_min( - iterator_end_position(&old_iter), - iterator_end_position(&new_iter) - ); - } - break; - - // If the subtrees are different, record a change and then move - // to the end of both subtrees. - case IteratorDiffers: - is_changed = true; - next_position = length_min( - iterator_end_position(&old_iter), - iterator_end_position(&new_iter) - ); - break; - } - - // Ensure that both iterators are caught up to the current position. - while ( - !iterator_done(&old_iter) && - iterator_end_position(&old_iter).bytes <= next_position.bytes - ) iterator_advance(&old_iter); - while ( - !iterator_done(&new_iter) && - iterator_end_position(&new_iter).bytes <= next_position.bytes - ) iterator_advance(&new_iter); - - // Ensure that both iterators are at the same depth in the tree. - while (old_iter.visible_depth > new_iter.visible_depth) { - iterator_ascend(&old_iter); - } - while (new_iter.visible_depth > old_iter.visible_depth) { - iterator_ascend(&new_iter); - } - - if (is_changed) { - #ifdef DEBUG_GET_CHANGED_RANGES - printf( - " change: [[%u, %u] - [%u, %u]]\n", - position.extent.row + 1, position.extent.column, - next_position.extent.row + 1, next_position.extent.column - ); - #endif - - ts_range_array_add(&results, position, next_position); - } - - position = next_position; - - // Keep track of the current position in the included range differences - // array in order to avoid scanning the entire array on each iteration. - while (included_range_difference_index < included_range_differences->size) { - const TSRange *range = &included_range_differences->contents[ - included_range_difference_index - ]; - if (range->end_byte <= position.bytes) { - included_range_difference_index++; - } else { - break; - } - } - } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); - - Length old_size = ts_subtree_total_size(*old_tree); - Length new_size = ts_subtree_total_size(*new_tree); - if (old_size.bytes < new_size.bytes) { - ts_range_array_add(&results, old_size, new_size); - } else if (new_size.bytes < old_size.bytes) { - ts_range_array_add(&results, new_size, old_size); - } - - *cursor1 = old_iter.cursor; - *cursor2 = new_iter.cursor; - *ranges = results.contents; - return results.size; -} diff --git a/parser/src/get_changed_ranges.h b/parser/src/get_changed_ranges.h deleted file mode 100644 index a1f1dbb4..00000000 --- a/parser/src/get_changed_ranges.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_ -#define TREE_SITTER_GET_CHANGED_RANGES_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./tree_cursor.h" -#include "./subtree.h" - -typedef Array(TSRange) TSRangeArray; - -void ts_range_array_get_changed_ranges( - const TSRange *old_ranges, unsigned old_range_count, - const TSRange *new_ranges, unsigned new_range_count, - TSRangeArray *differences -); - -bool ts_range_array_intersects( - const TSRangeArray *self, unsigned start_index, - uint32_t start_byte, uint32_t end_byte -); - -unsigned ts_subtree_get_changed_ranges( - const Subtree *old_tree, const Subtree *new_tree, - TreeCursor *cursor1, TreeCursor *cursor2, - const TSLanguage *language, - const TSRangeArray *included_range_differences, - TSRange **ranges -); - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_GET_CHANGED_RANGES_H_ diff --git a/parser/src/lib.c b/parser/src/lib.c index dab396b9..5644226b 100644 --- a/parser/src/lib.c +++ b/parser/src/lib.c @@ -1,12 +1,11 @@ #define _POSIX_C_SOURCE 200112L #include "./alloc.c" -#include "./get_changed_ranges.c" +// #include "./get_changed_ranges.c" #include "./language.c" #include "./lexer.c" #include "./node.c" #include "./parser.c" -#include "./query.c" #include "./stack.c" #include "./subtree.c" #include "./tree.c" diff --git a/parser/src/parser.c b/parser/src/parser.c index 5dbf93d2..2a1a841b 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -5,7 +5,6 @@ #include "./atomic.h" #include "./clock.h" #include "./error_costs.h" -#include "./get_changed_ranges.h" #include "./language.h" #include "./length.h" #include "./lexer.h" @@ -126,7 +125,6 @@ struct TSParser unsigned operation_count; const volatile size_t *cancellation_flag; Subtree old_tree; - TSRangeArray included_range_differences; unsigned included_range_difference_index; bool has_scanner_error; }; @@ -709,11 +707,11 @@ static void ts_parser__set_cached_token(TSParser *self, uint32_t byte_index, Sub cache->last_external_token = last_external_token; } -static bool ts_parser__has_included_range_difference(const TSParser *self, uint32_t start_position, uint32_t end_position) -{ - return ts_range_array_intersects(&self->included_range_differences, self->included_range_difference_index, start_position, - end_position); -} +// static bool ts_parser__has_included_range_difference(const TSParser *self, uint32_t start_position, uint32_t end_position) +// { +// return ts_range_array_intersects(&self->included_range_differences, self->included_range_difference_index, start_position, +// end_position); +// } static Subtree ts_parser__reuse_node(TSParser *self, StackVersion version, TSStateId *state, uint32_t position, Subtree last_external_token, TableEntry *table_entry) @@ -769,10 +767,10 @@ static Subtree ts_parser__reuse_node(TSParser *self, StackVersion version, TSSta { reason = "is_fragile"; } - else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) - { - reason = "contains_different_included_range"; - } + // else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) + // { + // reason = "contains_different_included_range"; + // } if (reason) { @@ -1857,7 +1855,6 @@ TSParser *ts_parser_new(void) self->end_clock = clock_null(); self->operation_count = 0; self->old_tree = NULL_SUBTREE; - self->included_range_differences = (TSRangeArray)array_new(); self->included_range_difference_index = 0; ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); return self; @@ -1874,10 +1871,6 @@ void ts_parser_delete(TSParser *self) { array_delete(&self->reduce_actions); } - if (self->included_range_differences.contents) - { - array_delete(&self->included_range_differences); - } if (self->old_tree.ptr) { ts_subtree_release(&self->tree_pool, self->old_tree); @@ -2011,6 +2004,7 @@ void ts_parser_reset(TSParser *self) TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { + (void)(old_tree); TSTree *result = NULL; if (!self->language || !input.read) return NULL; @@ -2023,7 +2017,6 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) } ts_lexer_set_input(&self->lexer, input); - array_clear(&self->included_range_differences); self->included_range_difference_index = 0; if (ts_parser_has_outstanding_parse(self)) @@ -2036,22 +2029,25 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) if (self->has_scanner_error) goto exit; - if (old_tree) + // if (old_tree) + // { + // ts_subtree_retain(old_tree->root); + // self->old_tree = old_tree->root; + // ts_range_array_get_changed_ranges(old_tree->included_ranges, old_tree->included_range_count, self->lexer.included_ranges, + // self->lexer.included_range_count, &self->included_range_differences); + // reusable_node_reset(&self->reusable_node, old_tree->root); + // LOG("parse_after_edit"); + // LOG_TREE(self->old_tree); + // for (unsigned i = 0; i < self->included_range_differences.size; i++) + // { + // TSRange *range = &self->included_range_differences.contents[i]; + // LOG("different_included_range %u - %u", range->start_byte, range->end_byte); + // } + // } + // else + if (false) { - ts_subtree_retain(old_tree->root); - self->old_tree = old_tree->root; - ts_range_array_get_changed_ranges(old_tree->included_ranges, old_tree->included_range_count, self->lexer.included_ranges, - self->lexer.included_range_count, &self->included_range_differences); - reusable_node_reset(&self->reusable_node, old_tree->root); - LOG("parse_after_edit"); - LOG_TREE(self->old_tree); - for (unsigned i = 0; i < self->included_range_differences.size; i++) - { - TSRange *range = &self->included_range_differences.contents[i]; - LOG("different_included_range %u - %u", range->start_byte, range->end_byte); - } } - else { reusable_node_clear(&self->reusable_node); LOG("new_parse"); @@ -2112,18 +2108,6 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) break; } - while (self->included_range_difference_index < self->included_range_differences.size) - { - TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index]; - if (range->end_byte <= position) - { - self->included_range_difference_index++; - } - else - { - break; - } - } } while (version_count != 0); assert(self->finished_tree.ptr); diff --git a/parser/src/query.c b/parser/src/query.c deleted file mode 100644 index 1b6e04b6..00000000 --- a/parser/src/query.c +++ /dev/null @@ -1,4134 +0,0 @@ -#include "api.h" -#include "./alloc.h" -#include "./array.h" -#include "./language.h" -#include "./point.h" -#include "./tree_cursor.h" -#include "./unicode.h" -#include - -// #define DEBUG_ANALYZE_QUERY -// #define DEBUG_EXECUTE_QUERY - -#define MAX_STEP_CAPTURE_COUNT 3 -#define MAX_NEGATED_FIELD_COUNT 8 -#define MAX_STATE_PREDECESSOR_COUNT 256 -#define MAX_ANALYSIS_STATE_DEPTH 8 -#define MAX_ANALYSIS_ITERATION_COUNT 256 - -/* - * Stream - A sequence of unicode characters derived from a UTF8 string. - * This struct is used in parsing queries from S-expressions. - */ -typedef struct { - const char *input; - const char *start; - const char *end; - int32_t next; - uint8_t next_size; -} Stream; - -/* - * QueryStep - A step in the process of matching a query. Each node within - * a query S-expression corresponds to one of these steps. An entire pattern - * is represented as a sequence of these steps. The basic properties of a - * node are represented by these fields: - * - `symbol` - The grammar symbol to match. A zero value represents the - * wildcard symbol, '_'. - * - `field` - The field name to match. A zero value means that a field name - * was not specified. - * - `capture_ids` - An array of integers representing the names of captures - * associated with this node in the pattern, terminated by a `NONE` value. - * - `depth` - The depth where this node occurs in the pattern. The root node - * of the pattern has depth zero. - * - `negated_field_list_id` - An id representing a set of fields that must - * not be present on a node matching this step. - * - * Steps have some additional fields in order to handle the `.` (or "anchor") operator, - * which forbids additional child nodes: - * - `is_immediate` - Indicates that the node matching this step cannot be preceded - * by other sibling nodes that weren't specified in the pattern. - * - `is_last_child` - Indicates that the node matching this step cannot have any - * subsequent named siblings. - * - * For simple patterns, steps are matched in sequential order. But in order to - * handle alternative/repeated/optional sub-patterns, query steps are not always - * structured as a linear sequence; they sometimes need to split and merge. This - * is done using the following fields: - * - `alternative_index` - The index of a different query step that serves as - * an alternative to this step. A `NONE` value represents no alternative. - * When a query state reaches a step with an alternative index, the state - * is duplicated, with one copy remaining at the original step, and one copy - * moving to the alternative step. The alternative may have its own alternative - * step, so this splitting is an iterative process. - * - `is_dead_end` - Indicates that this state cannot be passed directly, and - * exists only in order to redirect to an alternative index, with no splitting. - * - `is_pass_through` - Indicates that state has no matching logic of its own, - * and exists only to split a state. One copy of the state advances immediately - * to the next step, and one moves to the alternative step. - * - `alternative_is_immediate` - Indicates that this step's alternative step - * should be treated as if `is_immediate` is true. - * - * Steps also store some derived state that summarizes how they relate to other - * steps within the same pattern. This is used to optimize the matching process: - * - `contains_captures` - Indicates that this step or one of its child steps - * has a non-empty `capture_ids` list. - * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then - * it and all of its subsequent sibling steps within the same parent pattern - * are guaranteed to match. - * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but - * for the entire top-level pattern. When iterating through a query's - * captures using `ts_query_cursor_next_capture`, this field is used to - * detect that a capture can safely be returned from a match that has not - * even completed yet. - */ -typedef struct { - TSSymbol symbol; - TSSymbol supertype_symbol; - TSFieldId field; - uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t depth; - uint16_t alternative_index; - uint16_t negated_field_list_id; - bool is_named: 1; - bool is_immediate: 1; - bool is_last_child: 1; - bool is_pass_through: 1; - bool is_dead_end: 1; - bool alternative_is_immediate: 1; - bool contains_captures: 1; - bool root_pattern_guaranteed: 1; - bool parent_pattern_guaranteed: 1; -} QueryStep; - -/* - * Slice - A slice of an external array. Within a query, capture names, - * literal string values, and predicate step information are stored in three - * contiguous arrays. Individual captures, string values, and predicates are - * represented as slices of these three arrays. - */ -typedef struct { - uint32_t offset; - uint32_t length; -} Slice; - -/* - * SymbolTable - a two-way mapping of strings to ids. - */ -typedef struct { - Array(char) characters; - Array(Slice) slices; -} SymbolTable; - -/** - * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. - */ -typedef Array(uint8_t) CaptureQuantifiers; - -/* - * PatternEntry - Information about the starting point for matching a particular - * pattern. These entries are stored in a 'pattern map' - a sorted array that - * makes it possible to efficiently lookup patterns based on the symbol for their - * first step. The entry consists of the following fields: - * - `pattern_index` - the index of the pattern within the query - * - `step_index` - the index of the pattern's first step in the shared `steps` array - * - `is_rooted` - whether or not the pattern has a single root node. This property - * affects decisions about whether or not to start the pattern for nodes outside - * of a QueryCursor's range restriction. - */ -typedef struct { - uint16_t step_index; - uint16_t pattern_index; - bool is_rooted; -} PatternEntry; - -typedef struct { - Slice steps; - Slice predicate_steps; - uint32_t start_byte; - bool is_non_local; -} QueryPattern; - -typedef struct { - uint32_t byte_offset; - uint16_t step_index; -} StepOffset; - -/* - * QueryState - The state of an in-progress match of a particular pattern - * in a query. While executing, a `TSQueryCursor` must keep track of a number - * of possible in-progress matches. Each of those possible matches is - * represented as one of these states. Fields: - * - `id` - A numeric id that is exposed to the public API. This allows the - * caller to remove a given match, preventing any more of its captures - * from being returned. - * - `start_depth` - The depth in the tree where the first step of the state's - * pattern was matched. - * - `pattern_index` - The pattern that the state is matching. - * - `consumed_capture_count` - The number of captures from this match that - * have already been returned. - * - `capture_list_id` - A numeric id that can be used to retrieve the state's - * list of captures from the `CaptureListPool`. - * - `seeking_immediate_match` - A flag that indicates that the state's next - * step must be matched by the very next sibling. This is used when - * processing repetitions. - * - `has_in_progress_alternatives` - A flag that indicates that there is are - * other states that have the same captures as this state, but are at - * different steps in their pattern. This means that in order to obey the - * 'longest-match' rule, this state should not be returned as a match until - * it is clear that there can be no other alternative match with more captures. - */ -typedef struct { - uint32_t id; - uint32_t capture_list_id; - uint16_t start_depth; - uint16_t step_index; - uint16_t pattern_index; - uint16_t consumed_capture_count: 12; - bool seeking_immediate_match: 1; - bool has_in_progress_alternatives: 1; - bool dead: 1; - bool needs_parent: 1; -} QueryState; - -typedef Array(TSQueryCapture) CaptureList; - -/* - * CaptureListPool - A collection of *lists* of captures. Each query state needs - * to maintain its own list of captures. To avoid repeated allocations, this struct - * maintains a fixed set of capture lists, and keeps track of which ones are - * currently in use by a query state. - */ -typedef struct { - Array(CaptureList) list; - CaptureList empty_list; - // The maximum number of capture lists that we are allowed to allocate. We - // never allow `list` to allocate more entries than this, dropping pending - // matches if needed to stay under the limit. - uint32_t max_capture_list_count; - // The number of capture lists allocated in `list` that are not currently in - // use. We reuse those existing-but-unused capture lists before trying to - // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture - // list's length to indicate that it's not in use. - uint32_t free_capture_list_count; -} CaptureListPool; - -/* - * AnalysisState - The state needed for walking the parse table when analyzing - * a query pattern, to determine at which steps the pattern might fail to match. - */ -typedef struct { - TSStateId parse_state; - TSSymbol parent_symbol; - uint16_t child_index; - TSFieldId field_id: 15; - bool done: 1; -} AnalysisStateEntry; - -typedef struct { - AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; - uint16_t depth; - uint16_t step_index; - TSSymbol root_symbol; -} AnalysisState; - -typedef Array(AnalysisState *) AnalysisStateSet; - -typedef struct { - AnalysisStateSet states; - AnalysisStateSet next_states; - AnalysisStateSet deeper_states; - AnalysisStateSet state_pool; - Array(uint16_t) final_step_indices; - Array(TSSymbol) finished_parent_symbols; - bool did_abort; -} QueryAnalysis; - -/* - * AnalysisSubgraph - A subset of the states in the parse table that are used - * in constructing nodes with a certain symbol. Each state is accompanied by - * some information about the possible node that could be produced in - * downstream states. - */ -typedef struct { - TSStateId state; - uint16_t production_id; - uint8_t child_index: 7; - bool done: 1; -} AnalysisSubgraphNode; - -typedef struct { - TSSymbol symbol; - Array(TSStateId) start_states; - Array(AnalysisSubgraphNode) nodes; -} AnalysisSubgraph; - -typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; - -/* - * StatePredecessorMap - A map that stores the predecessors of each parse state. - * This is used during query analysis to determine which parse states can lead - * to which reduce actions. - */ -typedef struct { - TSStateId *contents; -} StatePredecessorMap; - -/* - * TSQuery - A tree query, compiled from a string of S-expressions. The query - * itself is immutable. The mutable state used in the process of executing the - * query is stored in a `TSQueryCursor`. - */ -struct TSQuery { - SymbolTable captures; - SymbolTable predicate_values; - Array(CaptureQuantifiers) capture_quantifiers; - Array(QueryStep) steps; - Array(PatternEntry) pattern_map; - Array(TSQueryPredicateStep) predicate_steps; - Array(QueryPattern) patterns; - Array(StepOffset) step_offsets; - Array(TSFieldId) negated_fields; - Array(char) string_buffer; - Array(TSSymbol) repeat_symbols_with_rootless_patterns; - const TSLanguage *language; - uint16_t wildcard_root_pattern_count; -}; - -/* - * TSQueryCursor - A stateful struct used to execute a query on a tree. - */ -struct TSQueryCursor { - const TSQuery *query; - TSTreeCursor cursor; - Array(QueryState) states; - Array(QueryState) finished_states; - CaptureListPool capture_list_pool; - uint32_t depth; - uint32_t max_start_depth; - uint32_t start_byte; - uint32_t end_byte; - TSPoint start_point; - TSPoint end_point; - uint32_t next_state_id; - bool on_visible_node; - bool ascending; - bool halted; - bool did_exceed_match_limit; -}; - -static const TSQueryError PARENT_DONE = -1; -static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; -static const uint16_t NONE = UINT16_MAX; -static const TSSymbol WILDCARD_SYMBOL = 0; - -/********** - * Stream - **********/ - -// Advance to the next unicode code point in the stream. -static bool stream_advance(Stream *self) { - self->input += self->next_size; - if (self->input < self->end) { - uint32_t size = ts_decode_utf8( - (const uint8_t *)self->input, - (uint32_t)(self->end - self->input), - &self->next - ); - if (size > 0) { - self->next_size = size; - return true; - } - } else { - self->next_size = 0; - self->next = '\0'; - } - return false; -} - -// Reset the stream to the given input position, represented as a pointer -// into the input string. -static void stream_reset(Stream *self, const char *input) { - self->input = input; - self->next_size = 0; - stream_advance(self); -} - -static Stream stream_new(const char *string, uint32_t length) { - Stream self = { - .next = 0, - .input = string, - .start = string, - .end = string + length, - }; - stream_advance(&self); - return self; -} - -static void stream_skip_whitespace(Stream *self) { - for (;;) { - if (iswspace(self->next)) { - stream_advance(self); - } else if (self->next == ';') { - // skip over comments - stream_advance(self); - while (self->next && self->next != '\n') { - if (!stream_advance(self)) break; - } - } else { - break; - } - } -} - -static bool stream_is_ident_start(Stream *self) { - return iswalnum(self->next) || self->next == '_' || self->next == '-'; -} - -static void stream_scan_identifier(Stream *stream) { - do { - stream_advance(stream); - } while ( - iswalnum(stream->next) || - stream->next == '_' || - stream->next == '-' || - stream->next == '.' || - stream->next == '?' || - stream->next == '!' - ); -} - -static uint32_t stream_offset(Stream *self) { - return (uint32_t)(self->input - self->start); -} - -/****************** - * CaptureListPool - ******************/ - -static CaptureListPool capture_list_pool_new(void) { - return (CaptureListPool) { - .list = array_new(), - .empty_list = array_new(), - .max_capture_list_count = UINT32_MAX, - .free_capture_list_count = 0, - }; -} - -static void capture_list_pool_reset(CaptureListPool *self) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - // This invalid size means that the list is not in use. - self->list.contents[i].size = UINT32_MAX; - } - self->free_capture_list_count = self->list.size; -} - -static void capture_list_pool_delete(CaptureListPool *self) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - array_delete(&self->list.contents[i]); - } - array_delete(&self->list); -} - -static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { - if (id >= self->list.size) return &self->empty_list; - return &self->list.contents[id]; -} - -static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { - assert(id < self->list.size); - return &self->list.contents[id]; -} - -static bool capture_list_pool_is_empty(const CaptureListPool *self) { - // The capture list pool is empty if all allocated lists are in use, and we - // have reached the maximum allowed number of allocated lists. - return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; -} - -static uint16_t capture_list_pool_acquire(CaptureListPool *self) { - // First see if any already allocated capture list is currently unused. - if (self->free_capture_list_count > 0) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - if (self->list.contents[i].size == UINT32_MAX) { - array_clear(&self->list.contents[i]); - self->free_capture_list_count--; - return i; - } - } - } - - // Otherwise allocate and initialize a new capture list, as long as that - // doesn't put us over the requested maximum. - uint32_t i = self->list.size; - if (i >= self->max_capture_list_count) { - return NONE; - } - CaptureList list; - array_init(&list); - array_push(&self->list, list); - return i; -} - -static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { - if (id >= self->list.size) return; - self->list.contents[id].size = UINT32_MAX; - self->free_capture_list_count++; -} - -/************** - * Quantifiers - **************/ - -static TSQuantifier quantifier_mul( - TSQuantifier left, - TSQuantifier right -) { - switch (left) - { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierZeroOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierOne: - return right; - case TSQuantifierOneOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -static TSQuantifier quantifier_join( - TSQuantifier left, - TSQuantifier right -) { - switch (left) - { - case TSQuantifierZero: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - break; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - break; - }; - break; - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - return TSQuantifierOne; - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOneOrMore: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -static TSQuantifier quantifier_add( - TSQuantifier left, - TSQuantifier right -) { - switch (left) - { - case TSQuantifierZero: - return right; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierZeroOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZeroOrMore; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierOne; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -// Create new capture quantifiers structure -static CaptureQuantifiers capture_quantifiers_new(void) { - return (CaptureQuantifiers) array_new(); -} - -// Delete capture quantifiers structure -static void capture_quantifiers_delete( - CaptureQuantifiers *self -) { - array_delete(self); -} - -// Clear capture quantifiers structure -static void capture_quantifiers_clear( - CaptureQuantifiers *self -) { - array_clear(self); -} - -// Replace capture quantifiers with the given quantifiers -static void capture_quantifiers_replace( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - array_clear(self); - array_push_all(self, quantifiers); -} - -// Return capture quantifier for the given capture id -static TSQuantifier capture_quantifier_for_id( - const CaptureQuantifiers *self, - uint16_t id -) { - return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id); -} - -// Add the given quantifier to the current value for id -static void capture_quantifiers_add_for_id( - CaptureQuantifiers *self, - uint16_t id, - TSQuantifier quantifier -) { - if (self->size <= id) { - array_grow_by(self, id + 1 - self->size); - } - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier); -} - -// Point-wise add the given quantifiers to the current values -static void capture_quantifiers_add_all( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - if (self->size < quantifiers->size) { - array_grow_by(self, quantifiers->size - self->size); - } - for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { - uint8_t *quantifier = array_get(quantifiers, id); - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); - } -} - -// Join the given quantifier with the current values -static void capture_quantifiers_mul( - CaptureQuantifiers *self, - TSQuantifier quantifier -) { - for (uint16_t id = 0; id < (uint16_t)self->size; id++) { - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); - } -} - -// Point-wise join the quantifiers from a list of alternatives with the current values -static void capture_quantifiers_join_all( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - if (self->size < quantifiers->size) { - array_grow_by(self, quantifiers->size - self->size); - } - for (uint32_t id = 0; id < quantifiers->size; id++) { - uint8_t *quantifier = array_get(quantifiers, id); - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); - } - for (uint32_t id = quantifiers->size; id < self->size; id++) { - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero); - } -} - -/************** - * SymbolTable - **************/ - -static SymbolTable symbol_table_new(void) { - return (SymbolTable) { - .characters = array_new(), - .slices = array_new(), - }; -} - -static void symbol_table_delete(SymbolTable *self) { - array_delete(&self->characters); - array_delete(&self->slices); -} - -static int symbol_table_id_for_name( - const SymbolTable *self, - const char *name, - uint32_t length -) { - for (unsigned i = 0; i < self->slices.size; i++) { - Slice slice = self->slices.contents[i]; - if ( - slice.length == length && - !strncmp(&self->characters.contents[slice.offset], name, length) - ) return i; - } - return -1; -} - -static const char *symbol_table_name_for_id( - const SymbolTable *self, - uint16_t id, - uint32_t *length -) { - Slice slice = self->slices.contents[id]; - *length = slice.length; - return &self->characters.contents[slice.offset]; -} - -static uint16_t symbol_table_insert_name( - SymbolTable *self, - const char *name, - uint32_t length -) { - int id = symbol_table_id_for_name(self, name, length); - if (id >= 0) return (uint16_t)id; - Slice slice = { - .offset = self->characters.size, - .length = length, - }; - array_grow_by(&self->characters, length + 1); - memcpy(&self->characters.contents[slice.offset], name, length); - self->characters.contents[self->characters.size - 1] = 0; - array_push(&self->slices, slice); - return self->slices.size - 1; -} - -/************ - * QueryStep - ************/ - -static QueryStep query_step__new( - TSSymbol symbol, - uint16_t depth, - bool is_immediate -) { - QueryStep step = { - .symbol = symbol, - .depth = depth, - .field = 0, - .alternative_index = NONE, - .negated_field_list_id = 0, - .contains_captures = false, - .is_last_child = false, - .is_named = false, - .is_pass_through = false, - .is_dead_end = false, - .root_pattern_guaranteed = false, - .is_immediate = is_immediate, - .alternative_is_immediate = false, - }; - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - step.capture_ids[i] = NONE; - } - return step; -} - -static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - if (self->capture_ids[i] == NONE) { - self->capture_ids[i] = capture_id; - break; - } - } -} - -static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - if (self->capture_ids[i] == capture_id) { - self->capture_ids[i] = NONE; - while (i + 1 < MAX_STEP_CAPTURE_COUNT) { - if (self->capture_ids[i + 1] == NONE) break; - self->capture_ids[i] = self->capture_ids[i + 1]; - self->capture_ids[i + 1] = NONE; - i++; - } - break; - } - } -} - -/********************** - * StatePredecessorMap - **********************/ - -static inline StatePredecessorMap state_predecessor_map_new( - const TSLanguage *language -) { - return (StatePredecessorMap) { - .contents = ts_calloc( - (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), - sizeof(TSStateId) - ), - }; -} - -static inline void state_predecessor_map_delete(StatePredecessorMap *self) { - ts_free(self->contents); -} - -static inline void state_predecessor_map_add( - StatePredecessorMap *self, - TSStateId state, - TSStateId predecessor -) { - size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - TSStateId *count = &self->contents[index]; - if ( - *count == 0 || - (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) - ) { - (*count)++; - self->contents[index + *count] = predecessor; - } -} - -static inline const TSStateId *state_predecessor_map_get( - const StatePredecessorMap *self, - TSStateId state, - unsigned *count -) { - size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - *count = self->contents[index]; - return &self->contents[index + 1]; -} - -/**************** - * AnalysisState - ****************/ - -static unsigned analysis_state__recursion_depth(const AnalysisState *self) { - unsigned result = 0; - for (unsigned i = 0; i < self->depth; i++) { - TSSymbol symbol = self->stack[i].parent_symbol; - for (unsigned j = 0; j < i; j++) { - if (self->stack[j].parent_symbol == symbol) { - result++; - break; - } - } - } - return result; -} - -static inline int analysis_state__compare_position( - AnalysisState *const *self, - AnalysisState *const *other -) { - for (unsigned i = 0; i < (*self)->depth; i++) { - if (i >= (*other)->depth) return -1; - if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1; - if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1; - } - if ((*self)->depth < (*other)->depth) return 1; - if ((*self)->step_index < (*other)->step_index) return -1; - if ((*self)->step_index > (*other)->step_index) return 1; - return 0; -} - -static inline int analysis_state__compare( - AnalysisState *const *self, - AnalysisState *const *other -) { - int result = analysis_state__compare_position(self, other); - if (result != 0) return result; - for (unsigned i = 0; i < (*self)->depth; i++) { - if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1; - if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1; - if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1; - if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1; - if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1; - if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1; - } - return 0; -} - -static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { - if (self->depth == 0) { - return &self->stack[0]; - } - return &self->stack[self->depth - 1]; -} - -static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) { - for (unsigned i = 0; i < self->depth; i++) { - if (self->stack[i].parent_symbol == symbol) return true; - } - return false; -} - -/****************** - * AnalysisStateSet - ******************/ - -// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by -// cloning one from scratch. -static inline AnalysisState *analysis_state_pool__clone_or_reuse( - AnalysisStateSet *self, - AnalysisState *borrowed_item -) { - AnalysisState *new_item; - if (self->size) { - new_item = array_pop(self); - } else { - new_item = ts_malloc(sizeof(AnalysisState)); - } - *new_item = *borrowed_item; - return new_item; -} - -// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this -// set. The set does not contain duplicates, so if the item is already present, it will not be -// inserted, and no clone will be made. -// -// The caller retains ownership of the passed-in memory. However, the clone that is created by this -// function will be managed by the state set. -static inline void analysis_state_set__insert_sorted( - AnalysisStateSet *self, - AnalysisStateSet *pool, - AnalysisState *borrowed_item -) { - unsigned index, exists; - array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); - if (!exists) { - AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); - array_insert(self, index, new_item); - } -} - -// Inserts a clone of the passed-in item at the end position of this list. -// -// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function -// `analysis_state__compare`) than largest item already in this set. If items are inserted in the -// wrong order, the set will not function properly for future use. -// -// The caller retains ownership of the passed-in memory. However, the clone that is created by this -// function will be managed by the state set. -static inline void analysis_state_set__push( - AnalysisStateSet *self, - AnalysisStateSet *pool, - AnalysisState *borrowed_item -) { - AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); - array_push(self, new_item); -} - -// Removes all items from this set, returning it to an empty state. -static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { - array_push_all(pool, self); - array_clear(self); -} - -// Releases all memory that is managed with this state set, including any items currently present. -// After calling this function, the set is no longer suitable for use. -static inline void analysis_state_set__delete(AnalysisStateSet *self) { - for (unsigned i = 0; i < self->size; i++) { - ts_free(self->contents[i]); - } - array_delete(self); -} - -/**************** - * QueryAnalyzer - ****************/ - -static inline QueryAnalysis query_analysis__new(void) { - return (QueryAnalysis) { - .states = array_new(), - .next_states = array_new(), - .deeper_states = array_new(), - .state_pool = array_new(), - .final_step_indices = array_new(), - .finished_parent_symbols = array_new(), - .did_abort = false, - }; -} - -static inline void query_analysis__delete(QueryAnalysis *self) { - analysis_state_set__delete(&self->states); - analysis_state_set__delete(&self->next_states); - analysis_state_set__delete(&self->deeper_states); - analysis_state_set__delete(&self->state_pool); - array_delete(&self->final_step_indices); - array_delete(&self->finished_parent_symbols); -} - -/*********************** - * AnalysisSubgraphNode - ***********************/ - -static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { - if (self->state < other->state) return -1; - if (self->state > other->state) return 1; - if (self->child_index < other->child_index) return -1; - if (self->child_index > other->child_index) return 1; - if (self->done < other->done) return -1; - if (self->done > other->done) return 1; - if (self->production_id < other->production_id) return -1; - if (self->production_id > other->production_id) return 1; - return 0; -} - -/********* - * Query - *********/ - -// The `pattern_map` contains a mapping from TSSymbol values to indices in the -// `steps` array. For a given syntax node, the `pattern_map` makes it possible -// to quickly find the starting steps of all of the patterns whose root matches -// that node. Each entry has two fields: a `pattern_index`, which identifies one -// of the patterns in the query, and a `step_index`, which indicates the start -// offset of that pattern's steps within the `steps` array. -// -// The entries are sorted by the patterns' root symbols, and lookups use a -// binary search. This ensures that the cost of this initial lookup step -// scales logarithmically with the number of patterns in the query. -// -// This returns `true` if the symbol is present and `false` otherwise. -// If the symbol is not present `*result` is set to the index where the -// symbol should be inserted. -static inline bool ts_query__pattern_map_search( - const TSQuery *self, - TSSymbol needle, - uint32_t *result -) { - uint32_t base_index = self->wildcard_root_pattern_count; - uint32_t size = self->pattern_map.size - base_index; - if (size == 0) { - *result = base_index; - return false; - } - while (size > 1) { - uint32_t half_size = size / 2; - uint32_t mid_index = base_index + half_size; - TSSymbol mid_symbol = self->steps.contents[ - self->pattern_map.contents[mid_index].step_index - ].symbol; - if (needle > mid_symbol) base_index = mid_index; - size -= half_size; - } - - TSSymbol symbol = self->steps.contents[ - self->pattern_map.contents[base_index].step_index - ].symbol; - - if (needle > symbol) { - base_index++; - if (base_index < self->pattern_map.size) { - symbol = self->steps.contents[ - self->pattern_map.contents[base_index].step_index - ].symbol; - } - } - - *result = base_index; - return needle == symbol; -} - -// Insert a new pattern's start index into the pattern map, maintaining -// the pattern map's ordering invariant. -static inline void ts_query__pattern_map_insert( - TSQuery *self, - TSSymbol symbol, - PatternEntry new_entry -) { - uint32_t index; - ts_query__pattern_map_search(self, symbol, &index); - - // Ensure that the entries are sorted not only by symbol, but also - // by pattern_index. This way, states for earlier patterns will be - // initiated first, which allows the ordering of the states array - // to be maintained more efficiently. - while (index < self->pattern_map.size) { - PatternEntry *entry = &self->pattern_map.contents[index]; - if ( - self->steps.contents[entry->step_index].symbol == symbol && - entry->pattern_index < new_entry.pattern_index - ) { - index++; - } else { - break; - } - } - - array_insert(&self->pattern_map, index, new_entry); -} - -// Walk the subgraph for this non-terminal, tracking all of the possible -// sequences of progress within the pattern. -static void ts_query__perform_analysis( - TSQuery *self, - const AnalysisSubgraphArray *subgraphs, - QueryAnalysis *analysis -) { - unsigned recursion_depth_limit = 0; - unsigned prev_final_step_count = 0; - array_clear(&analysis->final_step_indices); - array_clear(&analysis->finished_parent_symbols); - - for (unsigned iteration = 0;; iteration++) { - if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { - analysis->did_abort = true; - break; - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("Iteration: %u. Final step indices:", iteration); - for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { - printf(" %4u", analysis->final_step_indices.contents[j]); - } - printf("\n"); - for (unsigned j = 0; j < analysis->states.size; j++) { - AnalysisState *state = analysis->states.contents[j]; - printf(" %3u: step: %u, stack: [", j, state->step_index); - for (unsigned k = 0; k < state->depth; k++) { - printf( - " {%s, child: %u, state: %4u", - self->language->symbol_names[state->stack[k].parent_symbol], - state->stack[k].child_index, - state->stack[k].parse_state - ); - if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); - if (state->stack[k].done) printf(", DONE"); - printf("}"); - } - printf(" ]\n"); - } - #endif - - // If no further progress can be made within the current recursion depth limit, then - // bump the depth limit by one, and continue to process the states the exceeded the - // limit. But only allow this if progress has been made since the last time the depth - // limit was increased. - if (analysis->states.size == 0) { - if ( - analysis->deeper_states.size > 0 && - analysis->final_step_indices.size > prev_final_step_count - ) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); - #endif - - prev_final_step_count = analysis->final_step_indices.size; - recursion_depth_limit++; - AnalysisStateSet _states = analysis->states; - analysis->states = analysis->deeper_states; - analysis->deeper_states = _states; - continue; - } - - break; - } - - analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); - for (unsigned j = 0; j < analysis->states.size; j++) { - AnalysisState * const state = analysis->states.contents[j]; - - // For efficiency, it's important to avoid processing the same analysis state more - // than once. To achieve this, keep the states in order of ascending position within - // their hypothetical syntax trees. In each iteration of this loop, start by advancing - // the states that have made the least progress. Avoid advancing states that have already - // made more progress. - if (analysis->next_states.size > 0) { - int comparison = analysis_state__compare_position( - &state, - array_back(&analysis->next_states) - ); - if (comparison == 0) { - analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); - continue; - } else if (comparison > 0) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Terminate iteration at state %u\n", j); - #endif - while (j < analysis->states.size) { - analysis_state_set__push( - &analysis->next_states, - &analysis->state_pool, - analysis->states.contents[j] - ); - j++; - } - break; - } - } - - const TSStateId parse_state = analysis_state__top(state)->parse_state; - const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; - const TSFieldId parent_field_id = analysis_state__top(state)->field_id; - const unsigned child_index = analysis_state__top(state)->child_index; - const QueryStep * const step = &self->steps.contents[state->step_index]; - - unsigned subgraph_index, exists; - array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) continue; - const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index]; - - // Follow every possible path in the parse table, but only visit states that - // are part of the subgraph for the current symbol. - LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); - while (ts_lookahead_iterator__next(&lookahead_iterator)) { - TSSymbol sym = lookahead_iterator.symbol; - - AnalysisSubgraphNode successor = { - .state = parse_state, - .child_index = child_index, - }; - if (lookahead_iterator.action_count) { - const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; - if (action->type == TSParseActionTypeShift) { - if (!action->shift.extra) { - successor.state = action->shift.state; - successor.child_index++; - } - } else { - continue; - } - } else if (lookahead_iterator.next_state != 0) { - successor.state = lookahead_iterator.next_state; - successor.child_index++; - } else { - continue; - } - - unsigned node_index; - array_search_sorted_with( - &subgraph->nodes, - analysis_subgraph_node__compare, &successor, - &node_index, &exists - ); - while (node_index < subgraph->nodes.size) { - AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; - if (node->state != successor.state || node->child_index != successor.child_index) break; - - // Use the subgraph to determine what alias and field will eventually be applied - // to this child node. - TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); - TSSymbol visible_symbol = alias - ? alias - : self->language->symbol_metadata[sym].visible - ? self->language->public_symbol_map[sym] - : 0; - TSFieldId field_id = parent_field_id; - if (!field_id) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); - for (; field_map != field_map_end; field_map++) { - if (!field_map->inherited && field_map->child_index == child_index) { - field_id = field_map->field_id; - break; - } - } - } - - // Create a new state that has advanced past this hypothetical subtree. - AnalysisState next_state = *state; - AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); - next_state_top->child_index = successor.child_index; - next_state_top->parse_state = successor.state; - if (node->done) next_state_top->done = true; - - // Determine if this hypothetical child node would match the current step - // of the query pattern. - bool does_match = false; - if (visible_symbol) { - does_match = true; - if (step->symbol == WILDCARD_SYMBOL) { - if ( - step->is_named && - !self->language->symbol_metadata[visible_symbol].named - ) does_match = false; - } else if (step->symbol != visible_symbol) { - does_match = false; - } - if (step->field && step->field != field_id) { - does_match = false; - } - if ( - step->supertype_symbol && - !analysis_state__has_supertype(state, step->supertype_symbol) - ) does_match = false; - } - - // If this child is hidden, then descend into it and walk through its children. - // If the top entry of the stack is at the end of its rule, then that entry can - // be replaced. Otherwise, push a new entry onto the stack. - else if (sym >= self->language->token_count) { - if (!next_state_top->done) { - if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Exceeded depth limit for state %u\n", j); - #endif - - analysis->did_abort = true; - continue; - } - - next_state.depth++; - next_state_top = analysis_state__top(&next_state); - } - - *next_state_top = (AnalysisStateEntry) { - .parse_state = parse_state, - .parent_symbol = sym, - .child_index = 0, - .field_id = field_id, - .done = false, - }; - - if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { - analysis_state_set__insert_sorted( - &analysis->deeper_states, - &analysis->state_pool, - &next_state - ); - continue; - } - } - - // Pop from the stack when this state reached the end of its current syntax node. - while (next_state.depth > 0 && next_state_top->done) { - next_state.depth--; - next_state_top = analysis_state__top(&next_state); - } - - // If this hypothetical child did match the current step of the query pattern, - // then advance to the next step at the current depth. This involves skipping - // over any descendant steps of the current child. - const QueryStep *next_step = step; - if (does_match) { - for (;;) { - next_state.step_index++; - next_step = &self->steps.contents[next_state.step_index]; - if ( - next_step->depth == PATTERN_DONE_MARKER || - next_step->depth <= step->depth - ) break; - } - } else if (successor.state == parse_state) { - continue; - } - - for (;;) { - // Skip pass-through states. Although these states have alternatives, they are only - // used to implement repetitions, and query analysis does not need to process - // repetitions in order to determine whether steps are possible and definite. - if (next_step->is_pass_through) { - next_state.step_index++; - next_step++; - continue; - } - - // If the pattern is finished or hypothetical parent node is complete, then - // record that matching can terminate at this step of the pattern. Otherwise, - // add this state to the list of states to process on the next iteration. - if (!next_step->is_dead_end) { - bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; - if (did_finish_pattern) { - array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); - } else if (next_state.depth == 0) { - array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); - } else { - analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); - } - } - - // If the state has advanced to a step with an alternative step, then add another state - // at that alternative step. This process is simpler than the process of actually matching a - // pattern during query execution, because for the purposes of query analysis, there is no - // need to process repetitions. - if ( - does_match && - next_step->alternative_index != NONE && - next_step->alternative_index > next_state.step_index - ) { - next_state.step_index = next_step->alternative_index; - next_step = &self->steps.contents[next_state.step_index]; - } else { - break; - } - } - } - } - } - - AnalysisStateSet _states = analysis->states; - analysis->states = analysis->next_states; - analysis->next_states = _states; - } -} - -static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { - Array(uint16_t) non_rooted_pattern_start_steps = array_new(); - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *pattern = &self->pattern_map.contents[i]; - if (!pattern->is_rooted) { - QueryStep *step = &self->steps.contents[pattern->step_index]; - if (step->symbol != WILDCARD_SYMBOL) { - array_push(&non_rooted_pattern_start_steps, i); - } - } - } - - // Walk forward through all of the steps in the query, computing some - // basic information about each step. Mark all of the steps that contain - // captures, and record the indices of all of the steps that have child steps. - Array(uint32_t) parent_step_indices = array_new(); - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) { - step->parent_pattern_guaranteed = true; - step->root_pattern_guaranteed = true; - continue; - } - - bool has_children = false; - bool is_wildcard = step->symbol == WILDCARD_SYMBOL; - step->contains_captures = step->capture_ids[0] != NONE; - for (unsigned j = i + 1; j < self->steps.size; j++) { - QueryStep *next_step = &self->steps.contents[j]; - if ( - next_step->depth == PATTERN_DONE_MARKER || - next_step->depth <= step->depth - ) break; - if (next_step->capture_ids[0] != NONE) { - step->contains_captures = true; - } - if (!is_wildcard) { - next_step->root_pattern_guaranteed = true; - next_step->parent_pattern_guaranteed = true; - } - has_children = true; - } - - if (has_children && !is_wildcard) { - array_push(&parent_step_indices, i); - } - } - - // For every parent symbol in the query, initialize an 'analysis subgraph'. - // This subgraph lists all of the states in the parse table that are directly - // involved in building subtrees for this symbol. - // - // In addition to the parent symbols in the query, construct subgraphs for all - // of the hidden symbols in the grammar, because these might occur within - // one of the parent nodes, such that their children appear to belong to the - // parent. - AnalysisSubgraphArray subgraphs = array_new(); - for (unsigned i = 0; i < parent_step_indices.size; i++) { - uint32_t parent_step_index = parent_step_indices.contents[i]; - TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; - AnalysisSubgraph subgraph = { .symbol = parent_symbol }; - array_insert_sorted_by(&subgraphs, .symbol, subgraph); - } - for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { - if (!ts_language_symbol_metadata(self->language, sym).visible) { - AnalysisSubgraph subgraph = { .symbol = sym }; - array_insert_sorted_by(&subgraphs, .symbol, subgraph); - } - } - - // Scan the parse table to find the data needed to populate these subgraphs. - // Collect three things during this scan: - // 1) All of the parse states where one of these symbols can start. - // 2) All of the parse states where one of these symbols can end, along - // with information about the node that would be created. - // 3) A list of predecessor states for each state. - StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); - for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) { - unsigned subgraph_index, exists; - LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); - while (ts_lookahead_iterator__next(&lookahead_iterator)) { - if (lookahead_iterator.action_count) { - for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { - const TSParseAction *action = &lookahead_iterator.actions[i]; - if (action->type == TSParseActionTypeReduce) { - const TSSymbol *aliases, *aliases_end; - ts_language_aliases_for_symbol( - self->language, - action->reduce.symbol, - &aliases, - &aliases_end - ); - for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { - array_search_sorted_by( - &subgraphs, - .symbol, - *symbol, - &subgraph_index, - &exists - ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { - array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { - .state = state, - .production_id = action->reduce.production_id, - .child_index = action->reduce.child_count, - .done = true, - })); - } - } - } - } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { - TSStateId next_state = action->shift.state; - state_predecessor_map_add(&predecessor_map, next_state, state); - } - } - } else if (lookahead_iterator.next_state != 0) { - if (lookahead_iterator.next_state != state) { - state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); - } - if (ts_language_state_is_primary(self->language, state)) { - const TSSymbol *aliases, *aliases_end; - ts_language_aliases_for_symbol( - self->language, - lookahead_iterator.symbol, - &aliases, - &aliases_end - ); - for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { - array_search_sorted_by( - &subgraphs, - .symbol, - *symbol, - &subgraph_index, - &exists - ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if ( - subgraph->start_states.size == 0 || - *array_back(&subgraph->start_states) != state - ) - array_push(&subgraph->start_states, state); - } - } - } - } - } - } - - // For each subgraph, compute the preceding states by walking backward - // from the end states using the predecessor map. - Array(AnalysisSubgraphNode) next_nodes = array_new(); - for (unsigned i = 0; i < subgraphs.size; i++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[i]; - if (subgraph->nodes.size == 0) { - array_delete(&subgraph->start_states); - array_erase(&subgraphs, i); - i--; - continue; - } - array_assign(&next_nodes, &subgraph->nodes); - while (next_nodes.size > 0) { - AnalysisSubgraphNode node = array_pop(&next_nodes); - if (node.child_index > 1) { - unsigned predecessor_count; - const TSStateId *predecessors = state_predecessor_map_get( - &predecessor_map, - node.state, - &predecessor_count - ); - for (unsigned j = 0; j < predecessor_count; j++) { - AnalysisSubgraphNode predecessor_node = { - .state = predecessors[j], - .child_index = node.child_index - 1, - .production_id = node.production_id, - .done = false, - }; - unsigned index, exists; - array_search_sorted_with( - &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, - &index, &exists - ); - if (!exists) { - array_insert(&subgraph->nodes, index, predecessor_node); - array_push(&next_nodes, predecessor_node); - } - } - } - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("\nSubgraphs:\n"); - for (unsigned i = 0; i < subgraphs.size; i++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[i]; - printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); - for (unsigned j = 0; j < subgraph->start_states.size; j++) { - printf( - " {state: %u}\n", - subgraph->start_states.contents[j] - ); - } - for (unsigned j = 0; j < subgraph->nodes.size; j++) { - AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; - printf( - " {state: %u, child_index: %u, production_id: %u, done: %d}\n", - node->state, node->child_index, node->production_id, node->done - ); - } - printf("\n"); - } - #endif - - // For each non-terminal pattern, determine if the pattern can successfully match, - // and identify all of the possible children within the pattern where matching could fail. - bool all_patterns_are_valid = true; - QueryAnalysis analysis = query_analysis__new(); - for (unsigned i = 0; i < parent_step_indices.size; i++) { - uint16_t parent_step_index = parent_step_indices.contents[i]; - uint16_t parent_depth = self->steps.contents[parent_step_index].depth; - TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; - if (parent_symbol == ts_builtin_sym_error) continue; - - // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's - // root symbol is a terminal, then return an error. - unsigned subgraph_index, exists; - array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) { - unsigned first_child_step_index = parent_step_index + 1; - uint32_t j, child_exists; - array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); - assert(child_exists); - *error_offset = self->step_offsets.contents[j].byte_offset; - all_patterns_are_valid = false; - break; - } - - // Initialize an analysis state at every parse state in the table where - // this parent symbol can occur. - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - analysis_state_set__clear(&analysis.states, &analysis.state_pool); - analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); - for (unsigned j = 0; j < subgraph->start_states.size; j++) { - TSStateId parse_state = subgraph->start_states.contents[j]; - analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { - .step_index = parent_step_index + 1, - .stack = { - [0] = { - .parse_state = parse_state, - .parent_symbol = parent_symbol, - .child_index = 0, - .field_id = 0, - .done = false, - }, - }, - .depth = 1, - .root_symbol = parent_symbol, - })); - } - - #ifdef DEBUG_ANALYZE_QUERY - printf( - "\nWalk states for %s:\n", - ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol) - ); - #endif - - analysis.did_abort = false; - ts_query__perform_analysis(self, &subgraphs, &analysis); - - // If this pattern could not be fully analyzed, then every step should - // be considered fallible. - if (analysis.did_abort) { - for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { - QueryStep *step = &self->steps.contents[j]; - if ( - step->depth <= parent_depth || - step->depth == PATTERN_DONE_MARKER - ) break; - if (!step->is_dead_end) { - step->parent_pattern_guaranteed = false; - step->root_pattern_guaranteed = false; - } - } - continue; - } - - // If this pattern cannot match, store the pattern index so that it can be - // returned to the caller. - if (analysis.finished_parent_symbols.size == 0) { - assert(analysis.final_step_indices.size > 0); - uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); - uint32_t j, impossible_exists; - array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); - if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; - *error_offset = self->step_offsets.contents[j].byte_offset; - all_patterns_are_valid = false; - break; - } - - // Mark as fallible any step where a match terminated. - // Later, this property will be propagated to all of the step's predecessors. - for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { - uint32_t final_step_index = analysis.final_step_indices.contents[j]; - QueryStep *step = &self->steps.contents[final_step_index]; - if ( - step->depth != PATTERN_DONE_MARKER && - step->depth > parent_depth && - !step->is_dead_end - ) { - step->parent_pattern_guaranteed = false; - step->root_pattern_guaranteed = false; - } - } - } - - // Mark as indefinite any step with captures that are used in predicates. - Array(uint16_t) predicate_capture_ids = array_new(); - for (unsigned i = 0; i < self->patterns.size; i++) { - QueryPattern *pattern = &self->patterns.contents[i]; - - // Gather all of the captures that are used in predicates for this pattern. - array_clear(&predicate_capture_ids); - for ( - unsigned start = pattern->predicate_steps.offset, - end = start + pattern->predicate_steps.length, - j = start; j < end; j++ - ) { - TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; - if (step->type == TSQueryPredicateStepTypeCapture) { - uint16_t value_id = step->value_id; - array_insert_sorted_by(&predicate_capture_ids, , value_id); - } - } - - // Find all of the steps that have these captures. - for ( - unsigned start = pattern->steps.offset, - end = start + pattern->steps.length, - j = start; j < end; j++ - ) { - QueryStep *step = &self->steps.contents[j]; - for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { - uint16_t capture_id = step->capture_ids[k]; - if (capture_id == NONE) break; - unsigned index, exists; - array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); - if (exists) { - step->root_pattern_guaranteed = false; - break; - } - } - } - } - - // Propagate fallibility. If a pattern is fallible at a given step, then it is - // fallible at all of its preceding steps. - bool done = self->steps.size == 0; - while (!done) { - done = true; - for (unsigned i = self->steps.size - 1; i > 0; i--) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) continue; - - // Determine if this step is definite or has definite alternatives. - bool parent_pattern_guaranteed = false; - for (;;) { - if (step->root_pattern_guaranteed) { - parent_pattern_guaranteed = true; - break; - } - if (step->alternative_index == NONE || step->alternative_index < i) { - break; - } - step = &self->steps.contents[step->alternative_index]; - } - - // If not, mark its predecessor as indefinite. - if (!parent_pattern_guaranteed) { - QueryStep *prev_step = &self->steps.contents[i - 1]; - if ( - !prev_step->is_dead_end && - prev_step->depth != PATTERN_DONE_MARKER && - prev_step->root_pattern_guaranteed - ) { - prev_step->root_pattern_guaranteed = false; - done = false; - } - } - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("Steps:\n"); - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) { - printf(" %u: DONE\n", i); - } else { - printf( - " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", - i, - (step->symbol == WILDCARD_SYMBOL) - ? "ANY" - : ts_language_symbol_name(self->language, step->symbol), - (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), - step->depth, - step->parent_pattern_guaranteed, - step->root_pattern_guaranteed - ); - } - } - #endif - - // Determine which repetition symbols in this language have the possibility - // of matching non-rooted patterns in this query. These repetition symbols - // prevent certain optimizations with range restrictions. - analysis.did_abort = false; - for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { - uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i]; - PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index]; - - analysis_state_set__clear(&analysis.states, &analysis.state_pool); - analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); - for (unsigned j = 0; j < subgraphs.size; j++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[j]; - TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); - if (metadata.visible || metadata.named) continue; - - for (uint32_t k = 0; k < subgraph->start_states.size; k++) { - TSStateId parse_state = subgraph->start_states.contents[k]; - analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { - .step_index = pattern_entry->step_index, - .stack = { - [0] = { - .parse_state = parse_state, - .parent_symbol = subgraph->symbol, - .child_index = 0, - .field_id = 0, - .done = false, - }, - }, - .root_symbol = subgraph->symbol, - .depth = 1, - })); - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); - #endif - - ts_query__perform_analysis( - self, - &subgraphs, - &analysis - ); - - if (analysis.finished_parent_symbols.size > 0) { - self->patterns.contents[pattern_entry->pattern_index].is_non_local = true; - } - - for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { - TSSymbol symbol = analysis.finished_parent_symbols.contents[k]; - array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); - } - } - - #ifdef DEBUG_ANALYZE_QUERY - if (self->repeat_symbols_with_rootless_patterns.size > 0) { - printf("\nRepetition symbols with rootless patterns:\n"); - printf("aborted analysis: %d\n", analysis.did_abort); - for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { - TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; - printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); - } - printf("\n"); - } - #endif - - // Cleanup - for (unsigned i = 0; i < subgraphs.size; i++) { - array_delete(&subgraphs.contents[i].start_states); - array_delete(&subgraphs.contents[i].nodes); - } - array_delete(&subgraphs); - query_analysis__delete(&analysis); - array_delete(&next_nodes); - array_delete(&non_rooted_pattern_start_steps); - array_delete(&parent_step_indices); - array_delete(&predicate_capture_ids); - state_predecessor_map_delete(&predecessor_map); - - return all_patterns_are_valid; -} - -static void ts_query__add_negated_fields( - TSQuery *self, - uint16_t step_index, - TSFieldId *field_ids, - uint16_t field_count -) { - QueryStep *step = &self->steps.contents[step_index]; - - // The negated field array stores a list of field lists, separated by zeros. - // Try to find the start index of an existing list that matches this new list. - bool failed_match = false; - unsigned match_count = 0; - unsigned start_i = 0; - for (unsigned i = 0; i < self->negated_fields.size; i++) { - TSFieldId existing_field_id = self->negated_fields.contents[i]; - - // At each zero value, terminate the match attempt. If we've exactly - // matched the new field list, then reuse this index. Otherwise, - // start over the matching process. - if (existing_field_id == 0) { - if (match_count == field_count) { - step->negated_field_list_id = start_i; - return; - } else { - start_i = i + 1; - match_count = 0; - failed_match = false; - } - } - - // If the existing list matches our new list so far, then advance - // to the next element of the new list. - else if ( - match_count < field_count && - existing_field_id == field_ids[match_count] && - !failed_match - ) { - match_count++; - } - - // Otherwise, this existing list has failed to match. - else { - match_count = 0; - failed_match = true; - } - } - - step->negated_field_list_id = self->negated_fields.size; - array_extend(&self->negated_fields, field_count, field_ids); - array_push(&self->negated_fields, 0); -} - -static TSQueryError ts_query__parse_string_literal( - TSQuery *self, - Stream *stream -) { - const char *string_start = stream->input; - if (stream->next != '"') return TSQueryErrorSyntax; - stream_advance(stream); - const char *prev_position = stream->input; - - bool is_escaped = false; - array_clear(&self->string_buffer); - for (;;) { - if (is_escaped) { - is_escaped = false; - switch (stream->next) { - case 'n': - array_push(&self->string_buffer, '\n'); - break; - case 'r': - array_push(&self->string_buffer, '\r'); - break; - case 't': - array_push(&self->string_buffer, '\t'); - break; - case '0': - array_push(&self->string_buffer, '\0'); - break; - default: - array_extend(&self->string_buffer, stream->next_size, stream->input); - break; - } - prev_position = stream->input + stream->next_size; - } else { - if (stream->next == '\\') { - array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); - prev_position = stream->input + 1; - is_escaped = true; - } else if (stream->next == '"') { - array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); - stream_advance(stream); - return TSQueryErrorNone; - } else if (stream->next == '\n') { - stream_reset(stream, string_start); - return TSQueryErrorSyntax; - } - } - if (!stream_advance(stream)) { - stream_reset(stream, string_start); - return TSQueryErrorSyntax; - } - } -} - -// Parse a single predicate associated with a pattern, adding it to the -// query's internal `predicate_steps` array. Predicates are arbitrary -// S-expressions associated with a pattern which are meant to be handled at -// a higher level of abstraction, such as the Rust/JavaScript bindings. They -// can contain '@'-prefixed capture names, double-quoted strings, and bare -// symbols, which also represent strings. -static TSQueryError ts_query__parse_predicate( - TSQuery *self, - Stream *stream -) { - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *predicate_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - predicate_name); - uint16_t id = symbol_table_insert_name( - &self->predicate_values, - predicate_name, - length - ); - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { - .type = TSQueryPredicateStepTypeString, - .value_id = id, - })); - stream_skip_whitespace(stream); - - for (;;) { - if (stream->next == ')') { - stream_advance(stream); - stream_skip_whitespace(stream); - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { - .type = TSQueryPredicateStepTypeDone, - .value_id = 0, - })); - break; - } - - // Parse an '@'-prefixed capture name - else if (stream->next == '@') { - stream_advance(stream); - - // Parse the capture name - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *capture_name = stream->input; - stream_scan_identifier(stream); - uint32_t capture_length = (uint32_t)(stream->input - capture_name); - - // Add the capture id to the first step of the pattern - int capture_id = symbol_table_id_for_name( - &self->captures, - capture_name, - capture_length - ); - if (capture_id == -1) { - stream_reset(stream, capture_name); - return TSQueryErrorCapture; - } - - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { - .type = TSQueryPredicateStepTypeCapture, - .value_id = capture_id, - })); - } - - // Parse a string literal - else if (stream->next == '"') { - TSQueryError e = ts_query__parse_string_literal(self, stream); - if (e) return e; - uint16_t query_id = symbol_table_insert_name( - &self->predicate_values, - self->string_buffer.contents, - self->string_buffer.size - ); - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { - .type = TSQueryPredicateStepTypeString, - .value_id = query_id, - })); - } - - // Parse a bare symbol - else if (stream_is_ident_start(stream)) { - const char *symbol_start = stream->input; - stream_scan_identifier(stream); - uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); - uint16_t query_id = symbol_table_insert_name( - &self->predicate_values, - symbol_start, - symbol_length - ); - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { - .type = TSQueryPredicateStepTypeString, - .value_id = query_id, - })); - } - - else { - return TSQueryErrorSyntax; - } - - stream_skip_whitespace(stream); - } - - return 0; -} - -// Read one S-expression pattern from the stream, and incorporate it into -// the query's internal state machine representation. For nested patterns, -// this function calls itself recursively. -// -// The caller is responsible for passing in a dedicated CaptureQuantifiers. -// These should not be shared between different calls to ts_query__parse_pattern! -static TSQueryError ts_query__parse_pattern( - TSQuery *self, - Stream *stream, - uint32_t depth, - bool is_immediate, - CaptureQuantifiers *capture_quantifiers -) { - if (stream->next == 0) return TSQueryErrorSyntax; - if (stream->next == ')' || stream->next == ']') return PARENT_DONE; - - const uint32_t starting_step_index = self->steps.size; - - // Store the byte offset of each step in the query. - if ( - self->step_offsets.size == 0 || - array_back(&self->step_offsets)->step_index != starting_step_index - ) { - array_push(&self->step_offsets, ((StepOffset) { - .step_index = starting_step_index, - .byte_offset = stream_offset(stream), - })); - } - - // An open bracket is the start of an alternation. - if (stream->next == '[') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // Parse each branch, and add a placeholder step in between the branches. - Array(uint32_t) branch_step_indices = array_new(); - CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - uint32_t start_index = self->steps.size; - TSQueryError e = ts_query__parse_pattern( - self, - stream, - depth, - is_immediate, - &branch_capture_quantifiers - ); - - if (e == PARENT_DONE) { - if (stream->next == ']' && branch_step_indices.size > 0) { - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&branch_capture_quantifiers); - array_delete(&branch_step_indices); - return e; - } - - if (start_index == starting_step_index) { - capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); - } else { - capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); - } - - array_push(&branch_step_indices, start_index); - array_push(&self->steps, query_step__new(0, depth, false)); - capture_quantifiers_clear(&branch_capture_quantifiers); - } - (void)array_pop(&self->steps); - - // For all of the branches except for the last one, add the subsequent branch as an - // alternative, and link the end of the branch to the current end of the steps. - for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { - uint32_t step_index = branch_step_indices.contents[i]; - uint32_t next_step_index = branch_step_indices.contents[i + 1]; - QueryStep *start_step = &self->steps.contents[step_index]; - QueryStep *end_step = &self->steps.contents[next_step_index - 1]; - start_step->alternative_index = next_step_index; - end_step->alternative_index = self->steps.size; - end_step->is_dead_end = true; - } - - capture_quantifiers_delete(&branch_capture_quantifiers); - array_delete(&branch_step_indices); - } - - // An open parenthesis can be the start of three possible constructs: - // * A grouped sequence - // * A predicate - // * A named node - else if (stream->next == '(') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // If this parenthesis is followed by a node, then it represents a grouped sequence. - if (stream->next == '(' || stream->next == '"' || stream->next == '[') { - bool child_is_immediate = is_immediate; - CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - if (stream->next == '.') { - child_is_immediate = true; - stream_advance(stream); - stream_skip_whitespace(stream); - } - TSQueryError e = ts_query__parse_pattern( - self, - stream, - depth, - child_is_immediate, - &child_capture_quantifiers - ); - if (e == PARENT_DONE) { - if (stream->next == ')') { - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&child_capture_quantifiers); - return e; - } - - capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); - capture_quantifiers_clear(&child_capture_quantifiers); - child_is_immediate = false; - } - - capture_quantifiers_delete(&child_capture_quantifiers); - } - - // A dot/pound character indicates the start of a predicate. - else if (stream->next == '.' || stream->next == '#') { - stream_advance(stream); - return ts_query__parse_predicate(self, stream); - } - - // Otherwise, this parenthesis is the start of a named node. - else { - TSSymbol symbol; - - // Parse a normal node name - if (stream_is_ident_start(stream)) { - const char *node_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - node_name); - - // Parse the wildcard symbol - if (length == 1 && node_name[0] == '_') { - symbol = WILDCARD_SYMBOL; - } - - else { - symbol = ts_language_symbol_for_name( - self->language, - node_name, - length, - true - ); - if (!symbol) { - stream_reset(stream, node_name); - return TSQueryErrorNodeType; - } - } - } else { - return TSQueryErrorSyntax; - } - - // Add a step for the node. - array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - QueryStep *step = array_back(&self->steps); - if (ts_language_symbol_metadata(self->language, symbol).supertype) { - step->supertype_symbol = step->symbol; - step->symbol = WILDCARD_SYMBOL; - } - if (symbol == WILDCARD_SYMBOL) { - step->is_named = true; - } - - stream_skip_whitespace(stream); - - if (stream->next == '/') { - stream_advance(stream); - if (!stream_is_ident_start(stream)) { - return TSQueryErrorSyntax; - } - - const char *node_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - node_name); - - step->symbol = ts_language_symbol_for_name( - self->language, - node_name, - length, - true - ); - if (!step->symbol) { - stream_reset(stream, node_name); - return TSQueryErrorNodeType; - } - - stream_skip_whitespace(stream); - } - - // Parse the child patterns - bool child_is_immediate = false; - uint16_t last_child_step_index = 0; - uint16_t negated_field_count = 0; - TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; - CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - // Parse a negated field assertion - if (stream->next == '!') { - stream_advance(stream); - stream_skip_whitespace(stream); - if (!stream_is_ident_start(stream)) { - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorSyntax; - } - const char *field_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - field_name); - stream_skip_whitespace(stream); - - TSFieldId field_id = ts_language_field_id_for_name( - self->language, - field_name, - length - ); - if (!field_id) { - stream->input = field_name; - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorField; - } - - // Keep the field ids sorted. - if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { - negated_field_ids[negated_field_count] = field_id; - negated_field_count++; - } - - continue; - } - - // Parse a sibling anchor - if (stream->next == '.') { - child_is_immediate = true; - stream_advance(stream); - stream_skip_whitespace(stream); - } - - uint16_t step_index = self->steps.size; - TSQueryError e = ts_query__parse_pattern( - self, - stream, - depth + 1, - child_is_immediate, - &child_capture_quantifiers - ); - if (e == PARENT_DONE) { - if (stream->next == ')') { - if (child_is_immediate) { - if (last_child_step_index == 0) { - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorSyntax; - } - self->steps.contents[last_child_step_index].is_last_child = true; - } - - if (negated_field_count) { - ts_query__add_negated_fields( - self, - starting_step_index, - negated_field_ids, - negated_field_count - ); - } - - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&child_capture_quantifiers); - return e; - } - - capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); - - last_child_step_index = step_index; - child_is_immediate = false; - capture_quantifiers_clear(&child_capture_quantifiers); - } - capture_quantifiers_delete(&child_capture_quantifiers); - } - } - - // Parse a wildcard pattern - else if (stream->next == '_') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // Add a step that matches any kind of node - array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); - } - - // Parse a double-quoted anonymous leaf node expression - else if (stream->next == '"') { - const char *string_start = stream->input; - TSQueryError e = ts_query__parse_string_literal(self, stream); - if (e) return e; - - // Add a step for the node - TSSymbol symbol = ts_language_symbol_for_name( - self->language, - self->string_buffer.contents, - self->string_buffer.size, - false - ); - if (!symbol) { - stream_reset(stream, string_start + 1); - return TSQueryErrorNodeType; - } - array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - } - - // Parse a field-prefixed pattern - else if (stream_is_ident_start(stream)) { - // Parse the field name - const char *field_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - field_name); - stream_skip_whitespace(stream); - - if (stream->next != ':') { - stream_reset(stream, field_name); - return TSQueryErrorSyntax; - } - stream_advance(stream); - stream_skip_whitespace(stream); - - // Parse the pattern - CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); - TSQueryError e = ts_query__parse_pattern( - self, - stream, - depth, - is_immediate, - &field_capture_quantifiers - ); - if (e) { - capture_quantifiers_delete(&field_capture_quantifiers); - if (e == PARENT_DONE) e = TSQueryErrorSyntax; - return e; - } - - // Add the field name to the first step of the pattern - TSFieldId field_id = ts_language_field_id_for_name( - self->language, - field_name, - length - ); - if (!field_id) { - stream->input = field_name; - return TSQueryErrorField; - } - - uint32_t step_index = starting_step_index; - QueryStep *step = &self->steps.contents[step_index]; - for (;;) { - step->field = field_id; - if ( - step->alternative_index != NONE && - step->alternative_index > step_index && - step->alternative_index < self->steps.size - ) { - step_index = step->alternative_index; - step = &self->steps.contents[step_index]; - } else { - break; - } - } - - capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); - capture_quantifiers_delete(&field_capture_quantifiers); - } - - else { - return TSQueryErrorSyntax; - } - - stream_skip_whitespace(stream); - - // Parse suffixes modifiers for this pattern - TSQuantifier quantifier = TSQuantifierOne; - for (;;) { - // Parse the one-or-more operator. - if (stream->next == '+') { - quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); - repeat_step.alternative_index = starting_step_index; - repeat_step.is_pass_through = true; - repeat_step.alternative_is_immediate = true; - array_push(&self->steps, repeat_step); - } - - // Parse the zero-or-more repetition operator. - else if (stream->next == '*') { - quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); - repeat_step.alternative_index = starting_step_index; - repeat_step.is_pass_through = true; - repeat_step.alternative_is_immediate = true; - array_push(&self->steps, repeat_step); - - // Stop when `step->alternative_index` is `NONE` or it points to - // `repeat_step` or beyond. Note that having just been pushed, - // `repeat_step` occupies slot `self->steps.size - 1`. - QueryStep *step = &self->steps.contents[starting_step_index]; - while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { - step = &self->steps.contents[step->alternative_index]; - } - step->alternative_index = self->steps.size; - } - - // Parse the optional operator. - else if (stream->next == '?') { - quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep *step = &self->steps.contents[starting_step_index]; - while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { - step = &self->steps.contents[step->alternative_index]; - } - step->alternative_index = self->steps.size; - } - - // Parse an '@'-prefixed capture pattern - else if (stream->next == '@') { - stream_advance(stream); - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *capture_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - capture_name); - stream_skip_whitespace(stream); - - // Add the capture id to the first step of the pattern - uint16_t capture_id = symbol_table_insert_name( - &self->captures, - capture_name, - length - ); - - // Add the capture quantifier - capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); - - uint32_t step_index = starting_step_index; - for (;;) { - QueryStep *step = &self->steps.contents[step_index]; - query_step__add_capture(step, capture_id); - if ( - step->alternative_index != NONE && - step->alternative_index > step_index && - step->alternative_index < self->steps.size - ) { - step_index = step->alternative_index; - } else { - break; - } - } - } - - // No more suffix modifiers - else { - break; - } - } - - capture_quantifiers_mul(capture_quantifiers, quantifier); - - return 0; -} - -TSQuery *ts_query_new( - const TSLanguage *language, - const char *source, - uint32_t source_len, - uint32_t *error_offset, - TSQueryError *error_type -) { - if ( - !language || - language->version > TREE_SITTER_LANGUAGE_VERSION || - language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION - ) { - *error_type = TSQueryErrorLanguage; - return NULL; - } - - TSQuery *self = ts_malloc(sizeof(TSQuery)); - *self = (TSQuery) { - .steps = array_new(), - .pattern_map = array_new(), - .captures = symbol_table_new(), - .capture_quantifiers = array_new(), - .predicate_values = symbol_table_new(), - .predicate_steps = array_new(), - .patterns = array_new(), - .step_offsets = array_new(), - .string_buffer = array_new(), - .negated_fields = array_new(), - .repeat_symbols_with_rootless_patterns = array_new(), - .wildcard_root_pattern_count = 0, - .language = ts_language_copy(language), - }; - - array_push(&self->negated_fields, 0); - - // Parse all of the S-expressions in the given string. - Stream stream = stream_new(source, source_len); - stream_skip_whitespace(&stream); - while (stream.input < stream.end) { - uint32_t pattern_index = self->patterns.size; - uint32_t start_step_index = self->steps.size; - uint32_t start_predicate_step_index = self->predicate_steps.size; - array_push(&self->patterns, ((QueryPattern) { - .steps = (Slice) {.offset = start_step_index}, - .predicate_steps = (Slice) {.offset = start_predicate_step_index}, - .start_byte = stream_offset(&stream), - .is_non_local = false, - })); - CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); - *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); - array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); - - QueryPattern *pattern = array_back(&self->patterns); - pattern->steps.length = self->steps.size - start_step_index; - pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; - - // If any pattern could not be parsed, then report the error information - // and terminate. - if (*error_type) { - if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; - *error_offset = stream_offset(&stream); - capture_quantifiers_delete(&capture_quantifiers); - ts_query_delete(self); - return NULL; - } - - // Maintain a list of capture quantifiers for each pattern - array_push(&self->capture_quantifiers, capture_quantifiers); - - // Maintain a map that can look up patterns for a given root symbol. - uint16_t wildcard_root_alternative_index = NONE; - for (;;) { - QueryStep *step = &self->steps.contents[start_step_index]; - - // If a pattern has a wildcard at its root, but it has a non-wildcard child, - // then optimize the matching process by skipping matching the wildcard. - // Later, during the matching process, the query cursor will check that - // there is a parent node, and capture it if necessary. - if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { - QueryStep *second_step = &self->steps.contents[start_step_index + 1]; - if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { - wildcard_root_alternative_index = step->alternative_index; - start_step_index += 1; - step = second_step; - } - } - - // Determine whether the pattern has a single root node. This affects - // decisions about whether or not to start matching the pattern when - // a query cursor has a range restriction or when immediately within an - // error node. - uint32_t start_depth = step->depth; - bool is_rooted = start_depth == 0; - for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { - QueryStep *child_step = &self->steps.contents[step_index]; - if (child_step->is_dead_end) break; - if (child_step->depth == start_depth) { - is_rooted = false; - break; - } - } - - ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { - .step_index = start_step_index, - .pattern_index = pattern_index, - .is_rooted = is_rooted - }); - if (step->symbol == WILDCARD_SYMBOL) { - self->wildcard_root_pattern_count++; - } - - // If there are alternatives or options at the root of the pattern, - // then add multiple entries to the pattern map. - if (step->alternative_index != NONE) { - start_step_index = step->alternative_index; - } else if (wildcard_root_alternative_index != NONE) { - start_step_index = wildcard_root_alternative_index; - wildcard_root_alternative_index = NONE; - } else { - break; - } - } - } - - if (!ts_query__analyze_patterns(self, error_offset)) { - *error_type = TSQueryErrorStructure; - ts_query_delete(self); - return NULL; - } - - array_delete(&self->string_buffer); - return self; -} - -void ts_query_delete(TSQuery *self) { - if (self) { - array_delete(&self->steps); - array_delete(&self->pattern_map); - array_delete(&self->predicate_steps); - array_delete(&self->patterns); - array_delete(&self->step_offsets); - array_delete(&self->string_buffer); - array_delete(&self->negated_fields); - array_delete(&self->repeat_symbols_with_rootless_patterns); - ts_language_delete(self->language); - symbol_table_delete(&self->captures); - symbol_table_delete(&self->predicate_values); - for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { - CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); - capture_quantifiers_delete(capture_quantifiers); - } - array_delete(&self->capture_quantifiers); - ts_free(self); - } -} - -uint32_t ts_query_pattern_count(const TSQuery *self) { - return self->patterns.size; -} - -uint32_t ts_query_capture_count(const TSQuery *self) { - return self->captures.slices.size; -} - -uint32_t ts_query_string_count(const TSQuery *self) { - return self->predicate_values.slices.size; -} - -const char *ts_query_capture_name_for_id( - const TSQuery *self, - uint32_t index, - uint32_t *length -) { - return symbol_table_name_for_id(&self->captures, index, length); -} - -TSQuantifier ts_query_capture_quantifier_for_id( - const TSQuery *self, - uint32_t pattern_index, - uint32_t capture_index -) { - CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); - return capture_quantifier_for_id(capture_quantifiers, capture_index); -} - -const char *ts_query_string_value_for_id( - const TSQuery *self, - uint32_t index, - uint32_t *length -) { - return symbol_table_name_for_id(&self->predicate_values, index, length); -} - -const TSQueryPredicateStep *ts_query_predicates_for_pattern( - const TSQuery *self, - uint32_t pattern_index, - uint32_t *step_count -) { - Slice slice = self->patterns.contents[pattern_index].predicate_steps; - *step_count = slice.length; - if (self->predicate_steps.contents == NULL) { - return NULL; - } - return &self->predicate_steps.contents[slice.offset]; -} - -uint32_t ts_query_start_byte_for_pattern( - const TSQuery *self, - uint32_t pattern_index -) { - return self->patterns.contents[pattern_index].start_byte; -} - -bool ts_query_is_pattern_rooted( - const TSQuery *self, - uint32_t pattern_index -) { - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *entry = &self->pattern_map.contents[i]; - if (entry->pattern_index == pattern_index) { - if (!entry->is_rooted) return false; - } - } - return true; -} - -bool ts_query_is_pattern_non_local( - const TSQuery *self, - uint32_t pattern_index -) { - if (pattern_index < self->patterns.size) { - return self->patterns.contents[pattern_index].is_non_local; - } else { - return false; - } -} - -bool ts_query_is_pattern_guaranteed_at_step( - const TSQuery *self, - uint32_t byte_offset -) { - uint32_t step_index = UINT32_MAX; - for (unsigned i = 0; i < self->step_offsets.size; i++) { - StepOffset *step_offset = &self->step_offsets.contents[i]; - if (step_offset->byte_offset > byte_offset) break; - step_index = step_offset->step_index; - } - if (step_index < self->steps.size) { - return self->steps.contents[step_index].root_pattern_guaranteed; - } else { - return false; - } -} - -bool ts_query__step_is_fallible( - const TSQuery *self, - uint16_t step_index -) { - assert((uint32_t)step_index + 1 < self->steps.size); - QueryStep *step = &self->steps.contents[step_index]; - QueryStep *next_step = &self->steps.contents[step_index + 1]; - return ( - next_step->depth != PATTERN_DONE_MARKER && - next_step->depth > step->depth && - !next_step->parent_pattern_guaranteed - ); -} - -void ts_query_disable_capture( - TSQuery *self, - const char *name, - uint32_t length -) { - // Remove capture information for any pattern step that previously - // captured with the given name. - int id = symbol_table_id_for_name(&self->captures, name, length); - if (id != -1) { - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - query_step__remove_capture(step, id); - } - } -} - -void ts_query_disable_pattern( - TSQuery *self, - uint32_t pattern_index -) { - // Remove the given pattern from the pattern map. Its steps will still - // be in the `steps` array, but they will never be read. - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *pattern = &self->pattern_map.contents[i]; - if (pattern->pattern_index == pattern_index) { - array_erase(&self->pattern_map, i); - i--; - } - } -} - -/*************** - * QueryCursor - ***************/ - -TSQueryCursor *ts_query_cursor_new(void) { - TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); - *self = (TSQueryCursor) { - .did_exceed_match_limit = false, - .ascending = false, - .halted = false, - .states = array_new(), - .finished_states = array_new(), - .capture_list_pool = capture_list_pool_new(), - .start_byte = 0, - .end_byte = UINT32_MAX, - .start_point = {0, 0}, - .end_point = POINT_MAX, - .max_start_depth = UINT32_MAX, - }; - array_reserve(&self->states, 8); - array_reserve(&self->finished_states, 8); - return self; -} - -void ts_query_cursor_delete(TSQueryCursor *self) { - array_delete(&self->states); - array_delete(&self->finished_states); - ts_tree_cursor_delete(&self->cursor); - capture_list_pool_delete(&self->capture_list_pool); - ts_free(self); -} - -bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { - return self->did_exceed_match_limit; -} - -uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { - return self->capture_list_pool.max_capture_list_count; -} - -void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { - self->capture_list_pool.max_capture_list_count = limit; -} - -#ifdef DEBUG_EXECUTE_QUERY -#define LOG(...) fprintf(stderr, __VA_ARGS__) -#else -#define LOG(...) -#endif - -void ts_query_cursor_exec( - TSQueryCursor *self, - const TSQuery *query, - TSNode node -) { - if (query) { - LOG("query steps:\n"); - for (unsigned i = 0; i < query->steps.size; i++) { - QueryStep *step = &query->steps.contents[i]; - LOG(" %u: {", i); - if (step->depth == PATTERN_DONE_MARKER) { - LOG("DONE"); - } else if (step->is_dead_end) { - LOG("dead_end"); - } else if (step->is_pass_through) { - LOG("pass_through"); - } else if (step->symbol != WILDCARD_SYMBOL) { - LOG("symbol: %s", query->language->symbol_names[step->symbol]); - } else { - LOG("symbol: *"); - } - if (step->field) { - LOG(", field: %s", query->language->field_names[step->field]); - } - if (step->alternative_index != NONE) { - LOG(", alternative: %u", step->alternative_index); - } - LOG("},\n"); - } - } - - array_clear(&self->states); - array_clear(&self->finished_states); - ts_tree_cursor_reset(&self->cursor, node); - capture_list_pool_reset(&self->capture_list_pool); - self->on_visible_node = true; - self->next_state_id = 0; - self->depth = 0; - self->ascending = false; - self->halted = false; - self->query = query; - self->did_exceed_match_limit = false; -} - -void ts_query_cursor_set_byte_range( - TSQueryCursor *self, - uint32_t start_byte, - uint32_t end_byte -) { - if (end_byte == 0) { - end_byte = UINT32_MAX; - } - self->start_byte = start_byte; - self->end_byte = end_byte; -} - -void ts_query_cursor_set_point_range( - TSQueryCursor *self, - TSPoint start_point, - TSPoint end_point -) { - if (end_point.row == 0 && end_point.column == 0) { - end_point = POINT_MAX; - } - self->start_point = start_point; - self->end_point = end_point; -} - -// Search through all of the in-progress states, and find the captured -// node that occurs earliest in the document. -static bool ts_query_cursor__first_in_progress_capture( - TSQueryCursor *self, - uint32_t *state_index, - uint32_t *byte_offset, - uint32_t *pattern_index, - bool *root_pattern_guaranteed -) { - bool result = false; - *state_index = UINT32_MAX; - *byte_offset = UINT32_MAX; - *pattern_index = UINT32_MAX; - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i]; - if (state->dead) continue; - - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - if (state->consumed_capture_count >= captures->size) { - continue; - } - - TSNode node = captures->contents[state->consumed_capture_count].node; - if ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ) { - state->consumed_capture_count++; - i--; - continue; - } - - uint32_t node_start_byte = ts_node_start_byte(node); - if ( - !result || - node_start_byte < *byte_offset || - (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) - ) { - QueryStep *step = &self->query->steps.contents[state->step_index]; - if (root_pattern_guaranteed) { - *root_pattern_guaranteed = step->root_pattern_guaranteed; - } else if (step->root_pattern_guaranteed) { - continue; - } - - result = true; - *state_index = i; - *byte_offset = node_start_byte; - *pattern_index = state->pattern_index; - } - } - return result; -} - -// Determine which node is first in a depth-first traversal -int ts_query_cursor__compare_nodes(TSNode left, TSNode right) { - if (left.id != right.id) { - uint32_t left_start = ts_node_start_byte(left); - uint32_t right_start = ts_node_start_byte(right); - if (left_start < right_start) return -1; - if (left_start > right_start) return 1; - uint32_t left_node_count = ts_node_end_byte(left); - uint32_t right_node_count = ts_node_end_byte(right); - if (left_node_count > right_node_count) return -1; - if (left_node_count < right_node_count) return 1; - } - return 0; -} - -// Determine if either state contains a superset of the other state's captures. -void ts_query_cursor__compare_captures( - TSQueryCursor *self, - QueryState *left_state, - QueryState *right_state, - bool *left_contains_right, - bool *right_contains_left -) { - const CaptureList *left_captures = capture_list_pool_get( - &self->capture_list_pool, - left_state->capture_list_id - ); - const CaptureList *right_captures = capture_list_pool_get( - &self->capture_list_pool, - right_state->capture_list_id - ); - *left_contains_right = true; - *right_contains_left = true; - unsigned i = 0, j = 0; - for (;;) { - if (i < left_captures->size) { - if (j < right_captures->size) { - TSQueryCapture *left = &left_captures->contents[i]; - TSQueryCapture *right = &right_captures->contents[j]; - if (left->node.id == right->node.id && left->index == right->index) { - i++; - j++; - } else { - switch (ts_query_cursor__compare_nodes(left->node, right->node)) { - case -1: - *right_contains_left = false; - i++; - break; - case 1: - *left_contains_right = false; - j++; - break; - default: - *right_contains_left = false; - *left_contains_right = false; - i++; - j++; - break; - } - } - } else { - *right_contains_left = false; - break; - } - } else { - if (j < right_captures->size) { - *left_contains_right = false; - } - break; - } - } -} - -static void ts_query_cursor__add_state( - TSQueryCursor *self, - const PatternEntry *pattern -) { - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - - // Keep the states array in ascending order of start_depth and pattern_index, - // so that it can be processed more efficiently elsewhere. Usually, there is - // no work to do here because of two facts: - // * States with lower start_depth are naturally added first due to the - // order in which nodes are visited. - // * Earlier patterns are naturally added first because of the ordering of the - // pattern_map data structure that's used to initiate matches. - // - // This loop is only needed in cases where two conditions hold: - // * A pattern consists of more than one sibling node, so that its states - // remain in progress after exiting the node that started the match. - // * The first node in the pattern matches against multiple nodes at the - // same depth. - // - // An example of this is the pattern '((comment)* (function))'. If multiple - // `comment` nodes appear in a row, then we may initiate a new state for this - // pattern while another state for the same pattern is already in progress. - // If there are multiple patterns like this in a query, then this loop will - // need to execute in order to keep the states ordered by pattern_index. - uint32_t index = self->states.size; - while (index > 0) { - QueryState *prev_state = &self->states.contents[index - 1]; - if (prev_state->start_depth < start_depth) break; - if (prev_state->start_depth == start_depth) { - // Avoid inserting an unnecessary duplicate state, which would be - // immediately pruned by the longest-match criteria. - if ( - prev_state->pattern_index == pattern->pattern_index && - prev_state->step_index == pattern->step_index - ) return; - if (prev_state->pattern_index <= pattern->pattern_index) break; - } - index--; - } - - LOG( - " start state. pattern:%u, step:%u\n", - pattern->pattern_index, - pattern->step_index - ); - array_insert(&self->states, index, ((QueryState) { - .id = UINT32_MAX, - .capture_list_id = NONE, - .step_index = pattern->step_index, - .pattern_index = pattern->pattern_index, - .start_depth = start_depth, - .consumed_capture_count = 0, - .seeking_immediate_match = true, - .has_in_progress_alternatives = false, - .needs_parent = step->depth == 1, - .dead = false, - })); -} - -// Acquire a capture list for this state. If there are no capture lists left in the -// pool, this will steal the capture list from another existing state, and mark that -// other state as 'dead'. -static CaptureList *ts_query_cursor__prepare_to_capture( - TSQueryCursor *self, - QueryState *state, - unsigned state_index_to_preserve -) { - if (state->capture_list_id == NONE) { - state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); - - // If there are no capture lists left in the pool, then terminate whichever - // state has captured the earliest node in the document, and steal its - // capture list. - if (state->capture_list_id == NONE) { - self->did_exceed_match_limit = true; - uint32_t state_index, byte_offset, pattern_index; - if ( - ts_query_cursor__first_in_progress_capture( - self, - &state_index, - &byte_offset, - &pattern_index, - NULL - ) && - state_index != state_index_to_preserve - ) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - state_index, pattern_index, byte_offset - ); - QueryState *other_state = &self->states.contents[state_index]; - state->capture_list_id = other_state->capture_list_id; - other_state->capture_list_id = NONE; - other_state->dead = true; - CaptureList *list = capture_list_pool_get_mut( - &self->capture_list_pool, - state->capture_list_id - ); - array_clear(list); - return list; - } else { - LOG(" ran out of capture lists"); - return NULL; - } - } - } - return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); -} - -static void ts_query_cursor__capture( - TSQueryCursor *self, - QueryState *state, - QueryStep *step, - TSNode node -) { - if (state->dead) return; - CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); - if (!capture_list) { - state->dead = true; - return; - } - - for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { - uint16_t capture_id = step->capture_ids[j]; - if (step->capture_ids[j] == NONE) break; - array_push(capture_list, ((TSQueryCapture) { node, capture_id })); - LOG( - " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", - ts_node_type(node), - state->pattern_index, - capture_id, - capture_list->size - ); - } -} - -// Duplicate the given state and insert the newly-created state immediately after -// the given state in the `states` array. Ensures that the given state reference is -// still valid, even if the states array is reallocated. -static QueryState *ts_query_cursor__copy_state( - TSQueryCursor *self, - QueryState **state_ref -) { - const QueryState *state = *state_ref; - uint32_t state_index = (uint32_t)(state - self->states.contents); - QueryState copy = *state; - copy.capture_list_id = NONE; - - // If the state has captures, copy its capture list. - if (state->capture_list_id != NONE) { - CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); - if (!new_captures) return NULL; - const CaptureList *old_captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - array_push_all(new_captures, old_captures); - } - - array_insert(&self->states, state_index + 1, copy); - *state_ref = &self->states.contents[state_index]; - return &self->states.contents[state_index + 1]; -} - -static inline bool ts_query_cursor__should_descend( - TSQueryCursor *self, - bool node_intersects_range -) { - - if (node_intersects_range && self->depth < self->max_start_depth) { - return true; - } - - // If there are in-progress matches whose remaining steps occur - // deeper in the tree, then descend. - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i];; - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if ( - next_step->depth != PATTERN_DONE_MARKER && - state->start_depth + next_step->depth > self->depth - ) { - return true; - } - } - - if (self->depth >= self->max_start_depth) { - return false; - } - - // If the current node is hidden, then a non-rooted pattern might match - // one if its roots inside of this node, and match another of its roots - // as part of a sibling node, so we may need to descend. - if (!self->on_visible_node) { - // Descending into a repetition node outside of the range can be - // expensive, because these nodes can have many visible children. - // Avoid descending into repetition nodes unless we have already - // determined that this query can match rootless patterns inside - // of this type of repetition node. - Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); - if (ts_subtree_is_repetition(subtree)) { - bool exists; - uint32_t index; - array_search_sorted_by( - &self->query->repeat_symbols_with_rootless_patterns,, - ts_subtree_symbol(subtree), - &index, - &exists - ); - return exists; - } - - return true; - } - - return false; -} - -// Walk the tree, processing patterns until at least one pattern finishes, -// If one or more patterns finish, return `true` and store their states in the -// `finished_states` array. Multiple patterns can finish on the same node. If -// there are no more matches, return `false`. -static inline bool ts_query_cursor__advance( - TSQueryCursor *self, - bool stop_on_definite_step -) { - bool did_match = false; - for (;;) { - if (self->halted) { - while (self->states.size > 0) { - QueryState state = array_pop(&self->states); - capture_list_pool_release( - &self->capture_list_pool, - state.capture_list_id - ); - } - } - - if (did_match || self->halted) return did_match; - - // Exit the current node. - if (self->ascending) { - if (self->on_visible_node) { - LOG( - "leave node. depth:%u, type:%s\n", - self->depth, - ts_node_type(ts_tree_cursor_current_node(&self->cursor)) - ); - - // After leaving a node, remove any states that cannot make further progress. - uint32_t deleted_count = 0; - for (unsigned i = 0, n = self->states.size; i < n; i++) { - QueryState *state = &self->states.contents[i]; - QueryStep *step = &self->query->steps.contents[state->step_index]; - - // If a state completed its pattern inside of this node, but was deferred from finishing - // in order to search for longer matches, mark it as finished. - if ( - step->depth == PATTERN_DONE_MARKER && - (state->start_depth > self->depth || self->depth == 0) - ) { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - did_match = true; - deleted_count++; - } - - // If a state needed to match something within this node, then remove that state - // as it has failed to match. - else if ( - step->depth != PATTERN_DONE_MARKER && - (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth - ) { - LOG( - " failed to match. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - deleted_count++; - } - - else if (deleted_count > 0) { - self->states.contents[i - deleted_count] = *state; - } - } - self->states.size -= deleted_count; - } - - // Leave this node by stepping to its next sibling or to its parent. - switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { - case TreeCursorStepVisible: - if (!self->on_visible_node) { - self->depth++; - self->on_visible_node = true; - } - self->ascending = false; - break; - case TreeCursorStepHidden: - if (self->on_visible_node) { - self->depth--; - self->on_visible_node = false; - } - self->ascending = false; - break; - default: - if (ts_tree_cursor_goto_parent(&self->cursor)) { - self->depth--; - } else { - LOG("halt at root\n"); - self->halted = true; - } - } - } - - // Enter a new node. - else { - // Get the properties of the current node. - TSNode node = ts_tree_cursor_current_node(&self->cursor); - TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); - bool parent_precedes_range = !ts_node_is_null(parent_node) && ( - ts_node_end_byte(parent_node) <= self->start_byte || - point_lte(ts_node_end_point(parent_node), self->start_point) - ); - bool parent_follows_range = !ts_node_is_null(parent_node) && ( - ts_node_start_byte(parent_node) >= self->end_byte || - point_gte(ts_node_start_point(parent_node), self->end_point) - ); - bool node_precedes_range = parent_precedes_range || ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ); - bool node_follows_range = parent_follows_range || ( - ts_node_start_byte(node) >= self->end_byte || - point_gte(ts_node_start_point(node), self->end_point) - ); - bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; - bool node_intersects_range = !node_precedes_range && !node_follows_range; - - if (self->on_visible_node) { - TSSymbol symbol = ts_node_symbol(node); - bool is_named = ts_node_is_named(node); - bool has_later_siblings; - bool has_later_named_siblings; - bool can_have_later_siblings_with_this_field; - TSFieldId field_id = 0; - TSSymbol supertypes[8] = {0}; - unsigned supertype_count = 8; - ts_tree_cursor_current_status( - &self->cursor, - &field_id, - &has_later_siblings, - &has_later_named_siblings, - &can_have_later_siblings_with_this_field, - supertypes, - &supertype_count - ); - LOG( - "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", - self->depth, - ts_node_type(node), - ts_language_field_name_for_id(self->query->language, field_id), - ts_node_start_point(node).row, - self->states.size, - self->finished_states.size - ); - - bool node_is_error = symbol == ts_builtin_sym_error; - bool parent_is_error = - !ts_node_is_null(parent_node) && - ts_node_symbol(parent_node) == ts_builtin_sym_error; - - // Add new states for any patterns whose root node is a wildcard. - if (!node_is_error) { - for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { - PatternEntry *pattern = &self->query->pattern_map.contents[i]; - - // If this node matches the first step of the pattern, then add a new - // state at the start of this pattern. - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - if ( - (pattern->is_rooted ? - node_intersects_range : - (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) && - (!step->supertype_symbol || supertype_count > 0) && - (start_depth <= self->max_start_depth) - ) { - ts_query_cursor__add_state(self, pattern); - } - } - } - - // Add new states for any patterns whose root node matches this node. - unsigned i; - if (ts_query__pattern_map_search(self->query, symbol, &i)) { - PatternEntry *pattern = &self->query->pattern_map.contents[i]; - - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - do { - // If this node matches the first step of the pattern, then add a new - // state at the start of this pattern. - if ( - (pattern->is_rooted ? - node_intersects_range : - (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) && - (start_depth <= self->max_start_depth) - ) { - ts_query_cursor__add_state(self, pattern); - } - - // Advance to the next pattern whose root node matches this node. - i++; - if (i == self->query->pattern_map.size) break; - pattern = &self->query->pattern_map.contents[i]; - step = &self->query->steps.contents[pattern->step_index]; - } while (step->symbol == symbol); - } - - // Update all of the in-progress states with current node. - for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { - QueryState *state = &self->states.contents[j]; - QueryStep *step = &self->query->steps.contents[state->step_index]; - state->has_in_progress_alternatives = false; - copy_count = 0; - - // Check that the node matches all of the criteria for the next - // step of the pattern. - if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; - - // Determine if this node matches this step of the pattern, and also - // if this node can have later siblings that match this step of the - // pattern. - bool node_does_match = false; - if (step->symbol == WILDCARD_SYMBOL) { - node_does_match = !node_is_error && (is_named || !step->is_named); - } else { - node_does_match = symbol == step->symbol; - } - bool later_sibling_can_match = has_later_siblings; - if ((step->is_immediate && is_named) || state->seeking_immediate_match) { - later_sibling_can_match = false; - } - if (step->is_last_child && has_later_named_siblings) { - node_does_match = false; - } - if (step->supertype_symbol) { - bool has_supertype = false; - for (unsigned k = 0; k < supertype_count; k++) { - if (supertypes[k] == step->supertype_symbol) { - has_supertype = true; - break; - } - } - if (!has_supertype) node_does_match = false; - } - if (step->field) { - if (step->field == field_id) { - if (!can_have_later_siblings_with_this_field) { - later_sibling_can_match = false; - } - } else { - node_does_match = false; - } - } - - if (step->negated_field_list_id) { - TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; - for (;;) { - TSFieldId negated_field_id = *negated_field_ids; - if (negated_field_id) { - negated_field_ids++; - if (ts_node_child_by_field_id(node, negated_field_id).id) { - node_does_match = false; - break; - } - } else { - break; - } - } - } - - // Remove states immediately if it is ever clear that they cannot match. - if (!node_does_match) { - if (!later_sibling_can_match) { - LOG( - " discard state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->states, j); - j--; - } - continue; - } - - // Some patterns can match their root node in multiple ways, capturing different - // children. If this pattern step could match later children within the same - // parent, then this query state cannot simply be updated in place. It must be - // split into two states: one that matches this node, and one which skips over - // this node, to preserve the possibility of matching later siblings. - if (later_sibling_can_match && ( - step->contains_captures || - ts_query__step_is_fallible(self->query, state->step_index) - )) { - if (ts_query_cursor__copy_state(self, &state)) { - LOG( - " split state for capture. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - copy_count++; - } - } - - // If this pattern started with a wildcard, such that the pattern map - // actually points to the *second* step of the pattern, then check - // that the node has a parent, and capture the parent node if necessary. - if (state->needs_parent) { - TSNode parent = ts_tree_cursor_parent_node(&self->cursor); - if (ts_node_is_null(parent)) { - LOG(" missing parent node\n"); - state->dead = true; - } else { - state->needs_parent = false; - QueryStep *skipped_wildcard_step = step; - do { - skipped_wildcard_step--; - } while ( - skipped_wildcard_step->is_dead_end || - skipped_wildcard_step->is_pass_through || - skipped_wildcard_step->depth > 0 - ); - if (skipped_wildcard_step->capture_ids[0] != NONE) { - LOG(" capture wildcard parent\n"); - ts_query_cursor__capture( - self, - state, - skipped_wildcard_step, - parent - ); - } - } - } - - // If the current node is captured in this pattern, add it to the capture list. - if (step->capture_ids[0] != NONE) { - ts_query_cursor__capture(self, state, step, node); - } - - if (state->dead) { - array_erase(&self->states, j); - j--; - continue; - } - - // Advance this state to the next step of its pattern. - state->step_index++; - state->seeking_immediate_match = false; - LOG( - " advance state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; - - // If this state's next step has an alternative step, then copy the state in order - // to pursue both alternatives. The alternative step itself may have an alternative, - // so this is an interactive process. - unsigned end_index = j + 1; - for (unsigned k = j; k < end_index; k++) { - QueryState *child_state = &self->states.contents[k]; - QueryStep *child_step = &self->query->steps.contents[child_state->step_index]; - if (child_step->alternative_index != NONE) { - // A "dead-end" step exists only to add a non-sequential jump into the step sequence, - // via its alternative index. When a state reaches a dead-end step, it jumps straight - // to the step's alternative. - if (child_step->is_dead_end) { - child_state->step_index = child_step->alternative_index; - k--; - continue; - } - - // A "pass-through" step exists only to add a branch into the step sequence, - // via its alternative_index. When a state reaches a pass-through step, it splits - // in order to process the alternative step, and then it advances to the next step. - if (child_step->is_pass_through) { - child_state->step_index++; - k--; - } - - QueryState *copy = ts_query_cursor__copy_state(self, &child_state); - if (copy) { - LOG( - " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", - copy->pattern_index, - copy->step_index, - next_step->alternative_index, - next_step->alternative_is_immediate, - capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size - ); - end_index++; - copy_count++; - copy->step_index = child_step->alternative_index; - if (child_step->alternative_is_immediate) { - copy->seeking_immediate_match = true; - } - } - } - } - } - - for (unsigned j = 0; j < self->states.size; j++) { - QueryState *state = &self->states.contents[j]; - if (state->dead) { - array_erase(&self->states, j); - j--; - continue; - } - - // Enforce the longest-match criteria. When a query pattern contains optional or - // repeated nodes, this is necessary to avoid multiple redundant states, where - // one state has a strict subset of another state's captures. - bool did_remove = false; - for (unsigned k = j + 1; k < self->states.size; k++) { - QueryState *other_state = &self->states.contents[k]; - - // Query states are kept in ascending order of start_depth and pattern_index. - // Since the longest-match criteria is only used for deduping matches of the same - // pattern and root node, we only need to perform pairwise comparisons within a - // small slice of the states array. - if ( - other_state->start_depth != state->start_depth || - other_state->pattern_index != state->pattern_index - ) break; - - bool left_contains_right, right_contains_left; - ts_query_cursor__compare_captures( - self, - state, - other_state, - &left_contains_right, - &right_contains_left - ); - if (left_contains_right) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); - array_erase(&self->states, k); - k--; - continue; - } - other_state->has_in_progress_alternatives = true; - } - if (right_contains_left) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->states, j); - j--; - did_remove = true; - break; - } - state->has_in_progress_alternatives = true; - } - } - - // If the state is at the end of its pattern, remove it from the list - // of in-progress states and add it to the list of finished states. - if (!did_remove) { - LOG( - " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", - state->pattern_index, - state->start_depth, - state->step_index, - capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size - ); - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (next_step->depth == PATTERN_DONE_MARKER) { - if (state->has_in_progress_alternatives) { - LOG(" defer finishing pattern %u\n", state->pattern_index); - } else { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - array_erase(&self->states, (uint32_t)(state - self->states.contents)); - did_match = true; - j--; - } - } - } - } - } - - if (ts_query_cursor__should_descend(self, node_intersects_range)) { - switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { - case TreeCursorStepVisible: - self->depth++; - self->on_visible_node = true; - continue; - case TreeCursorStepHidden: - self->on_visible_node = false; - continue; - default: - break; - } - } - - self->ascending = true; - } - } -} - -bool ts_query_cursor_next_match( - TSQueryCursor *self, - TSQueryMatch *match -) { - if (self->finished_states.size == 0) { - if (!ts_query_cursor__advance(self, false)) { - return false; - } - } - - QueryState *state = &self->finished_states.contents[0]; - if (state->id == UINT32_MAX) state->id = self->next_state_id++; - match->id = state->id; - match->pattern_index = state->pattern_index; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - match->captures = captures->contents; - match->capture_count = captures->size; - capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->finished_states, 0); - return true; -} - -void ts_query_cursor_remove_match( - TSQueryCursor *self, - uint32_t match_id -) { - for (unsigned i = 0; i < self->finished_states.size; i++) { - const QueryState *state = &self->finished_states.contents[i]; - if (state->id == match_id) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->finished_states, i); - return; - } - } - - // Remove unfinished query states as well to prevent future - // captures for a match being removed. - for (unsigned i = 0; i < self->states.size; i++) { - const QueryState *state = &self->states.contents[i]; - if (state->id == match_id) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->states, i); - return; - } - } -} - -bool ts_query_cursor_next_capture( - TSQueryCursor *self, - TSQueryMatch *match, - uint32_t *capture_index -) { - // The goal here is to return captures in order, even though they may not - // be discovered in order, because patterns can overlap. Search for matches - // until there is a finished capture that is before any unfinished capture. - for (;;) { - // First, find the earliest capture in an unfinished match. - uint32_t first_unfinished_capture_byte; - uint32_t first_unfinished_pattern_index; - uint32_t first_unfinished_state_index; - bool first_unfinished_state_is_definite = false; - ts_query_cursor__first_in_progress_capture( - self, - &first_unfinished_state_index, - &first_unfinished_capture_byte, - &first_unfinished_pattern_index, - &first_unfinished_state_is_definite - ); - - // Then find the earliest capture in a finished match. It must occur - // before the first capture in an *unfinished* match. - QueryState *first_finished_state = NULL; - uint32_t first_finished_capture_byte = first_unfinished_capture_byte; - uint32_t first_finished_pattern_index = first_unfinished_pattern_index; - for (unsigned i = 0; i < self->finished_states.size;) { - QueryState *state = &self->finished_states.contents[i]; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - - // Remove states whose captures are all consumed. - if (state->consumed_capture_count >= captures->size) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->finished_states, i); - continue; - } - - TSNode node = captures->contents[state->consumed_capture_count].node; - - bool node_precedes_range = ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ); - bool node_follows_range = ( - ts_node_start_byte(node) >= self->end_byte || - point_gte(ts_node_start_point(node), self->end_point) - ); - bool node_outside_of_range = node_precedes_range || node_follows_range; - - // Skip captures that are outside of the cursor's range. - if (node_outside_of_range) { - state->consumed_capture_count++; - continue; - } - - uint32_t node_start_byte = ts_node_start_byte(node); - if ( - node_start_byte < first_finished_capture_byte || - ( - node_start_byte == first_finished_capture_byte && - state->pattern_index < first_finished_pattern_index - ) - ) { - first_finished_state = state; - first_finished_capture_byte = node_start_byte; - first_finished_pattern_index = state->pattern_index; - } - i++; - } - - // If there is finished capture that is clearly before any unfinished - // capture, then return its match, and its capture index. Internally - // record the fact that the capture has been 'consumed'. - QueryState *state; - if (first_finished_state) { - state = first_finished_state; - } else if (first_unfinished_state_is_definite) { - state = &self->states.contents[first_unfinished_state_index]; - } else { - state = NULL; - } - - if (state) { - if (state->id == UINT32_MAX) state->id = self->next_state_id++; - match->id = state->id; - match->pattern_index = state->pattern_index; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - match->captures = captures->contents; - match->capture_count = captures->size; - *capture_index = state->consumed_capture_count; - state->consumed_capture_count++; - return true; - } - - if (capture_list_pool_is_empty(&self->capture_list_pool)) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - first_unfinished_state_index, - first_unfinished_pattern_index, - first_unfinished_capture_byte - ); - capture_list_pool_release( - &self->capture_list_pool, - self->states.contents[first_unfinished_state_index].capture_list_id - ); - array_erase(&self->states, first_unfinished_state_index); - } - - // If there are no finished matches that are ready to be returned, then - // continue finding more matches. - if ( - !ts_query_cursor__advance(self, true) && - self->finished_states.size == 0 - ) return false; - } -} - -void ts_query_cursor_set_max_start_depth( - TSQueryCursor *self, - uint32_t max_start_depth -) { - self->max_start_depth = max_start_depth; -} - -#undef LOG diff --git a/parser/src/tree.c b/parser/src/tree.c index 1cea1794..2aa9b9fa 100644 --- a/parser/src/tree.c +++ b/parser/src/tree.c @@ -1,165 +1,167 @@ #define _POSIX_C_SOURCE 200112L -#include "api.h" +#include "./tree.h" #include "./array.h" -#include "./get_changed_ranges.h" #include "./length.h" #include "./subtree.h" #include "./tree_cursor.h" -#include "./tree.h" +#include "api.h" -TSTree *ts_tree_new( - Subtree root, const TSLanguage *language, - const TSRange *included_ranges, unsigned included_range_count -) { - TSTree *result = ts_malloc(sizeof(TSTree)); - result->root = root; - result->language = ts_language_copy(language); - result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); - memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); - result->included_range_count = included_range_count; - return result; +TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count) +{ + TSTree *result = ts_malloc(sizeof(TSTree)); + result->root = root; + result->language = ts_language_copy(language); + result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); + memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); + result->included_range_count = included_range_count; + return result; } -TSTree *ts_tree_copy(const TSTree *self) { - ts_subtree_retain(self->root); - return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); +TSTree *ts_tree_copy(const TSTree *self) +{ + ts_subtree_retain(self->root); + return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); } -void ts_tree_delete(TSTree *self) { - if (!self) return; +void ts_tree_delete(TSTree *self) +{ + if (!self) + return; - SubtreePool pool = ts_subtree_pool_new(0); - ts_subtree_release(&pool, self->root); - ts_subtree_pool_delete(&pool); - ts_language_delete(self->language); - ts_free(self->included_ranges); - ts_free(self); + SubtreePool pool = ts_subtree_pool_new(0); + ts_subtree_release(&pool, self->root); + ts_subtree_pool_delete(&pool); + ts_language_delete(self->language); + ts_free(self->included_ranges); + ts_free(self); } -TSNode ts_tree_root_node(const TSTree *self) { - return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); +TSNode ts_tree_root_node(const TSTree *self) +{ + return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); } -TSNode ts_tree_root_node_with_offset( - const TSTree *self, - uint32_t offset_bytes, - TSPoint offset_extent -) { - Length offset = {offset_bytes, offset_extent}; - return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); +TSNode ts_tree_root_node_with_offset(const TSTree *self, uint32_t offset_bytes, TSPoint offset_extent) +{ + Length offset = {offset_bytes, offset_extent}; + return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); } -const TSLanguage *ts_tree_language(const TSTree *self) { - return self->language; +const TSLanguage *ts_tree_language(const TSTree *self) +{ + return self->language; } -void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { - for (unsigned i = 0; i < self->included_range_count; i++) { - TSRange *range = &self->included_ranges[i]; - if (range->end_byte >= edit->old_end_byte) { - if (range->end_byte != UINT32_MAX) { - range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); - range->end_point = point_add( - edit->new_end_point, - point_sub(range->end_point, edit->old_end_point) - ); - if (range->end_byte < edit->new_end_byte) { - range->end_byte = UINT32_MAX; - range->end_point = POINT_MAX; - } - } - } else if (range->end_byte > edit->start_byte) { - range->end_byte = edit->start_byte; - range->end_point = edit->start_point; - } - if (range->start_byte >= edit->old_end_byte) { - range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); - range->start_point = point_add( - edit->new_end_point, - point_sub(range->start_point, edit->old_end_point) - ); - if (range->start_byte < edit->new_end_byte) { - range->start_byte = UINT32_MAX; - range->start_point = POINT_MAX; - } - } else if (range->start_byte > edit->start_byte) { - range->start_byte = edit->start_byte; - range->start_point = edit->start_point; - } - } +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) +{ + for (unsigned i = 0; i < self->included_range_count; i++) + { + TSRange *range = &self->included_ranges[i]; + if (range->end_byte >= edit->old_end_byte) + { + if (range->end_byte != UINT32_MAX) + { + range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); + range->end_point = point_add(edit->new_end_point, point_sub(range->end_point, edit->old_end_point)); + if (range->end_byte < edit->new_end_byte) + { + range->end_byte = UINT32_MAX; + range->end_point = POINT_MAX; + } + } + } + else if (range->end_byte > edit->start_byte) + { + range->end_byte = edit->start_byte; + range->end_point = edit->start_point; + } + if (range->start_byte >= edit->old_end_byte) + { + range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); + range->start_point = point_add(edit->new_end_point, point_sub(range->start_point, edit->old_end_point)); + if (range->start_byte < edit->new_end_byte) + { + range->start_byte = UINT32_MAX; + range->start_point = POINT_MAX; + } + } + else if (range->start_byte > edit->start_byte) + { + range->start_byte = edit->start_byte; + range->start_point = edit->start_point; + } + } - SubtreePool pool = ts_subtree_pool_new(0); - self->root = ts_subtree_edit(self->root, edit, &pool); - ts_subtree_pool_delete(&pool); + SubtreePool pool = ts_subtree_pool_new(0); + self->root = ts_subtree_edit(self->root, edit, &pool); + ts_subtree_pool_delete(&pool); } -TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) { - *length = self->included_range_count; - TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange)); - memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange)); - return ranges; +TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) +{ + *length = self->included_range_count; + TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange)); + memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange)); + return ranges; } -TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) { - TreeCursor cursor1 = {NULL, array_new(), 0}; - TreeCursor cursor2 = {NULL, array_new(), 0}; - ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); - ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); +// TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) +// { +// TreeCursor cursor1 = {NULL, array_new(), 0}; +// TreeCursor cursor2 = {NULL, array_new(), 0}; +// ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); +// ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); - TSRangeArray included_range_differences = array_new(); - ts_range_array_get_changed_ranges( - old_tree->included_ranges, old_tree->included_range_count, - new_tree->included_ranges, new_tree->included_range_count, - &included_range_differences - ); +// TSRangeArray included_range_differences = array_new(); +// ts_range_array_get_changed_ranges(old_tree->included_ranges, old_tree->included_range_count, new_tree->included_ranges, +// new_tree->included_range_count, &included_range_differences); - TSRange *result; - *length = ts_subtree_get_changed_ranges( - &old_tree->root, &new_tree->root, &cursor1, &cursor2, - old_tree->language, &included_range_differences, &result - ); +// TSRange *result; +// *length = ts_subtree_get_changed_ranges(&old_tree->root, &new_tree->root, &cursor1, &cursor2, old_tree->language, +// &included_range_differences, &result); - array_delete(&included_range_differences); - array_delete(&cursor1.stack); - array_delete(&cursor2.stack); - return result; -} +// array_delete(&included_range_differences); +// array_delete(&cursor1.stack); +// array_delete(&cursor2.stack); +// return result; +// } #ifdef _WIN32 -#include -#include +# include +# include -int _ts_dup(HANDLE handle) { - HANDLE dup_handle; - if (!DuplicateHandle( - GetCurrentProcess(), handle, - GetCurrentProcess(), &dup_handle, - 0, FALSE, DUPLICATE_SAME_ACCESS - )) return -1; +int _ts_dup(HANDLE handle) +{ + HANDLE dup_handle; + if (!DuplicateHandle(GetCurrentProcess(), handle, GetCurrentProcess(), &dup_handle, 0, FALSE, DUPLICATE_SAME_ACCESS)) + return -1; - return _open_osfhandle((intptr_t)dup_handle, 0); + return _open_osfhandle((intptr_t)dup_handle, 0); } -void ts_tree_print_dot_graph(const TSTree *self, int fd) { - FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); - ts_subtree_print_dot_graph(self->root, self->language, file); - fclose(file); +void ts_tree_print_dot_graph(const TSTree *self, int fd) +{ + FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); + ts_subtree_print_dot_graph(self->root, self->language, file); + fclose(file); } #else -#include +# include -int _ts_dup(int file_descriptor) { - return dup(file_descriptor); +int _ts_dup(int file_descriptor) +{ + return dup(file_descriptor); } -void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { - FILE *file = fdopen(_ts_dup(file_descriptor), "a"); - ts_subtree_print_dot_graph(self->root, self->language, file); - fclose(file); +void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) +{ + FILE *file = fdopen(_ts_dup(file_descriptor), "a"); + ts_subtree_print_dot_graph(self->root, self->language, file); + fclose(file); } #endif diff --git a/parser/src/unicode.h b/parser/src/unicode.h index 104692f2..766be865 100644 --- a/parser/src/unicode.h +++ b/parser/src/unicode.h @@ -26,12 +26,12 @@ static inline uint32_t ts_decode_ascii(const uint8_t *string, uint32_t length, i static inline uint32_t ts_decode_utf8(const uint8_t *string, uint32_t length, int32_t *code_point) { - return (ts_decode_ascii(string, length, code_point)); - /* - uint32_t i = 0; - U8_NEXT(string, i, length, *code_point); - return i; - */ + return (ts_decode_ascii(string, length, code_point)); + /* + uint32_t i = 0; + U8_NEXT(string, i, length, *code_point); + return i; + */ } static inline uint32_t ts_decode_utf16(const uint8_t *string, uint32_t length, int32_t *code_point)