diff --git a/parser/src/api.h b/parser/src/api.h index 033c0d2f..b9df14fb 100644 --- a/parser/src/api.h +++ b/parser/src/api.h @@ -1,6 +1,7 @@ #ifndef TREE_SITTER_ARRAY_H_ #define TREE_SITTER_ARRAY_H_ +#include "me/char/char.h" #include "me/mem/mem.h" #include #include @@ -8,7 +9,9 @@ #include #include #include -#include "me/char/char.h" + +#include "./api_structs.h" +#include "./array.h" #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) #define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 @@ -29,14 +32,14 @@ #define MAX_ITERATOR_COUNT 64 #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX #define TS_MAX_TREE_POOL_SIZE 32 -#define ts_builtin_sym_error ((t_symbol)-1) +#define ts_builtin_sym_error ((t_symbol) - 1) #define ts_builtin_sym_end 0 -#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + #define POINT_ZERO ((t_point){0, 0}) #define POINT_MAX ((t_point){UINT32_MAX, UINT32_MAX}) #define TS_TREE_STATE_NONE USHRT_MAX #define NULL_SUBTREE ((t_subtree){.ptr = NULL}) -#define STACK_VERSION_NONE ((t_stack_version)-1) +#define STACK_VERSION_NONE ((t_stack_version) - 1) #define TS_DECODE_ERROR (-1) #if true @@ -51,152 +54,6 @@ # define free(p) mem_free((p)) #endif -#define Array(T) \ - struct \ - { \ - T *contents; \ - uint32_t size; \ - uint32_t capacity; \ - } - -#ifndef inline -# define inline __inline__ -#endif - -/// Initialize an array. -#define array_init(self) \ - ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) - -/// Create an empty array. -#define array_new() \ - { \ - NULL, 0, 0 \ - } - -/// Get a pointer to the element at a given `index` in the array. -#define array_get(self, _index) \ - (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) - -/// Get a pointer to the first element in the array. -#define array_front(self) array_get(self, 0) - -/// Get a pointer to the last element in the array. -#define array_back(self) array_get(self, (self)->size - 1) - -/// Clear the array, setting its size to zero. Note that this does not free any -/// memory allocated for the array's contents. -#define array_clear(self) ((self)->size = 0) - -/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is -/// less than the array's current capacity, this function has no effect. -#define array_reserve(self, new_capacity) \ - _array__reserve((Array *)(self), array_elem_size(self), new_capacity) - -/// Free any memory allocated for this array. Note that this does not free any -/// memory allocated for the array's contents. -#define array_delete(self) _array__delete((Array *)(self)) - -/// Push a new `element` onto the end of the array. -#define array_push(self, element) \ - (_array__grow((Array *)(self), 1, array_elem_size(self)), \ - (self)->contents[(self)->size++] = (element)) - -/// Increase the array's size by `count` elements. -/// New elements are zero-initialized. -#define array_grow_by(self, count) \ - do \ - { \ - if ((count) == 0) \ - break; \ - _array__grow((Array *)(self), count, array_elem_size(self)); \ - memset((self)->contents + (self)->size, 0, \ - (count) * array_elem_size(self)); \ - (self)->size += (count); \ - } while (0) - -/// Append all elements from one array to the end of another. -#define array_push_all(self, other) \ - array_extend((self), (other)->size, (other)->contents) - -/// Append `count` elements to the end of the array, reading their values from -/// the `contents` pointer. -#define array_extend(self, count, contents) \ - _array__splice((Array *)(self), array_elem_size(self), (self)->size, 0, \ - count, contents) - -/// Remove `old_count` elements from the array starting at the given `index`. At -/// the same index, insert `new_count` new elements, reading their values from -/// the `new_contents` pointer. -#define array_splice(self, _index, old_count, new_count, new_contents) \ - _array__splice((Array *)(self), array_elem_size(self), _index, old_count, \ - new_count, new_contents) - -/// Insert one `element` into the array at the given `index`. -#define array_insert(self, _index, element) \ - _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, \ - &(element)) - -/// Remove one element from the array at the given `index`. -#define array_erase(self, _index) \ - _array__erase((Array *)(self), array_elem_size(self), _index) - -/// Pop the last element off the array, returning the element by value. -#define array_pop(self) ((self)->contents[--(self)->size]) - -/// Assign the contents of one array to another, reallocating if necessary. -#define array_assign(self, other) \ - _array__assign((Array *)(self), (const Array *)(other), \ - array_elem_size(self)) - -/// Swap one array with another -#define array_swap(self, other) _array__swap((Array *)(self), (Array *)(other)) - -/// Get the size of the array contents -#define array_elem_size(self) (sizeof *(self)->contents) - -/// Search a sorted array for a given `needle` value, using the given `compare` -/// callback to determine the order. -/// -/// If an existing element is found to be equal to `needle`, then the `index` -/// out-parameter is set to the existing value's index, and the `exists` -/// out-parameter is set to true. Otherwise, `index` is set to an index where -/// `needle` should be inserted in order to preserve the sorting, and `exists` -/// is set to false. -#define array_search_sorted_with(self, compare, needle, _index, _exists) \ - _array__search_sorted(self, 0, compare, , needle, _index, _exists) - -/// Search a sorted array for a given `needle` value, using integer comparisons -/// of a given struct field (specified with a leading dot) to determine the -/// order. -/// -/// See also `array_search_sorted_with`. -#define array_search_sorted_by(self, field, needle, _index, _exists) \ - _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) - -/// Insert a given `value` into a sorted array, using the given `compare` -/// callback to determine the order. -#define array_insert_sorted_with(self, compare, value) \ - do \ - { \ - unsigned _index, _exists; \ - array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ - if (!_exists) \ - array_insert(self, _index, value); \ - } while (0) - -/// Insert a given `value` into a sorted array, using integer comparisons of -/// a given struct field (specified with a leading dot) to determine the order. -/// -/// See also `array_search_sorted_by`. -#define array_insert_sorted_by(self, field, value) \ - do \ - { \ - unsigned _index, _exists; \ - array_search_sorted_by(self, field, (value)field, &_index, &_exists); \ - if (!_exists) \ - array_insert(self, _index, value); \ - } while (0) - // Get a subtree's children, which are allocated immediately before the // tree's own heap data. #define ts_subtree_children(self) \ @@ -204,153 +61,6 @@ ? NULL \ : (t_subtree *)((self).ptr) - (self).ptr->child_count) -typedef uint16_t t_state_id; -typedef uint16_t t_symbol; -typedef uint16_t t_field_id; -typedef struct s_language t_language; -typedef struct s_first_parser t_first_parser; -typedef struct s_first_tree t_first_tree; -typedef struct s_parse_query t_parse_query; -typedef struct s_query_cursor t_query_cursor; -typedef struct s_lookahead_iterator t_lookahead_iterator; - -typedef struct s_point -{ - uint32_t row; - uint32_t column; -} t_point; - -typedef struct s_length -{ - uint32_t bytes; - t_point extent; -} t_length; - -typedef enum e_input_encoding -{ - TSInputEncodingUTF8, - TSInputEncodingUTF16, -} t_input_encoding; - -typedef enum e_symbol_type -{ - TSSymbolTypeRegular, - TSSymbolTypeAnonymous, - TSSymbolTypeAuxiliary, -} t_symbol_type; - -typedef struct s_parse_range -{ - t_point start_point; - t_point end_point; - uint32_t start_byte; - uint32_t end_byte; -} t_parse_range; - -typedef struct s_parse_input -{ - void *payload; - const char *(*read)(void *payload, uint32_t byte_index, t_point position, - uint32_t *bytes_read); - t_input_encoding encoding; -} t_parse_input; - -typedef enum e_log_type -{ - TSLogTypeParse, - TSLogTypeLex, -} t_log_type; - -typedef struct s_parse_logger -{ - void *payload; - void (*log)(void *payload, t_log_type log_type, const char *buffer); -} t_parse_logger; - -typedef struct s_input_edit -{ - uint32_t start_byte; - uint32_t old_end_byte; - uint32_t new_end_byte; - t_point start_point; - t_point old_end_point; - t_point new_end_point; -} t_input_edit; - -typedef struct s_parse_node -{ - uint32_t context[4]; - const void *id; - const t_first_tree *tree; -} t_parse_node; - -typedef struct s_tree_cursor_entry -{ - const union u_subtree *subtree; - t_length position; - uint32_t child_index; - uint32_t structural_child_index; - uint32_t descendant_index; -} t_tree_cursor_entry; - -typedef struct s_tree_cursor -{ - const t_first_tree *tree; - Array(t_tree_cursor_entry) stack; - t_symbol root_alias_symbol; -} t_tree_cursor; - -typedef struct s_query_capture -{ - t_parse_node node; - uint32_t index; -} t_query_capture; - -typedef enum e_quantifier -{ - TSQuantifierZero = 0, // must match the array initialization value - TSQuantifierZeroOrOne, - TSQuantifierZeroOrMore, - TSQuantifierOne, - TSQuantifierOneOrMore, -} t_quantifier; - -typedef struct s_query_match -{ - uint32_t id; - uint16_t pattern_index; - uint16_t capture_count; - const t_query_capture *captures; -} t_query_match; - -typedef enum e_query_predicate_step_type -{ - TSQueryPredicateStepTypeDone, - TSQueryPredicateStepTypeCapture, - TSQueryPredicateStepTypeString, -} t_query_predicate_step_type; - -typedef struct s_query_predicate_step -{ - t_query_predicate_step_type type; - uint32_t value_id; -} t_query_predicate_step; - -typedef enum e_query_error -{ - TSQueryErrorNone = 0, - TSQueryErrorSyntax, - TSQueryErrorNodeType, - TSQueryErrorField, - TSQueryErrorCapture, - TSQueryErrorStructure, - TSQueryErrorLanguage, -} t_query_error; - -// Private - -typedef Array(void) Array; - /// This is not what you're looking for, see `array_delete`. static inline void _array__delete(Array *self) { @@ -497,10 +207,6 @@ static inline void _array__splice(Array *self, size_t element_size, /// function above. #define _compare_int(a, b) ((int)*(a) - (int)(b)) -#include -#include -#include - static inline size_t atomic_load(const volatile size_t *p) { #ifdef __ATOMIC_RELAXED @@ -528,25 +234,6 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) #endif } -// The serialized state of an external scanner. -// -// Every time an external token subtree is created after a call to an -// external scanner, the scanner's `serialize` function is called to -// retrieve a serialized copy of its state. The bytes are then copied -// onto the subtree itself so that the scanner's state can later be -// restored using its `deserialize` function. -// -// Small byte arrays are stored inline, and long ones are allocated -// separately on the heap. -typedef struct -{ - union { - char *long_data; - char short_data[24]; - }; - uint32_t length; -} t_external_scanner_state; - // A compact representation of a subtree. // // This representation is used for small leaf nodes that are not @@ -558,161 +245,12 @@ typedef struct // Because of alignment, for any valid pointer this will be 0, giving // us the opportunity to make use of this bit to signify whether to use // the pointer or the inline struct. -typedef struct s_subtree_inline_data t_subtree_inline_data; - -#define SUBTREE_BITS \ - bool visible : 1; \ - bool named : 1; \ - bool extra : 1; \ - bool has_changes : 1; \ - bool is_missing : 1; \ - bool is_keyword : 1; - -#define SUBTREE_SIZE \ - uint8_t padding_columns; \ - uint8_t padding_rows : 4; \ - uint8_t lookahead_bytes : 4; \ - uint8_t padding_bytes; \ - uint8_t size_bytes; - -#if TS_BIG_ENDIAN -# if TS_PTR_SIZE == 32 - -struct s_subtree_inline_data -{ - uint16_t parse_state; - uint8_t symbol; - SUBTREE_BITS - bool unused : 1; - bool is_inline : 1; - SUBTREE_SIZE -}; - -# else - -struct s_subtree_inline_data -{ - SUBTREE_SIZE - uint16_t parse_state; - uint8_t symbol; - SUBTREE_BITS - bool unused : 1; - bool is_inline : 1; -}; - -# endif -#else - -struct s_subtree_inline_data -{ - bool is_inline : 1; - SUBTREE_BITS - uint8_t symbol; - uint16_t parse_state; - SUBTREE_SIZE -}; - -#endif - -#undef SUBTREE_BITS -#undef SUBTREE_SIZE // A heap-allocated representation of a subtree. // // This representation is used for parent nodes, external tokens, // errors, and other leaf nodes whose data is too large to fit into // the inline representation. -typedef struct s_subtree_heap_data -{ - volatile uint32_t ref_count; - t_length padding; - t_length size; - uint32_t lookahead_bytes; - uint32_t error_cost; - uint32_t child_count; - t_symbol symbol; - t_state_id parse_state; - - bool visible : 1; - bool named : 1; - bool extra : 1; - bool fragile_left : 1; - bool fragile_right : 1; - bool has_changes : 1; - bool has_external_tokens : 1; - bool has_external_scanner_state_change : 1; - bool depends_on_column : 1; - bool is_missing : 1; - bool is_keyword : 1; - - union { - // Non-terminal subtrees (`child_count > 0`) - struct - { - uint32_t visible_child_count; - uint32_t named_child_count; - uint32_t visible_descendant_count; - int32_t dynamic_precedence; - uint16_t repeat_depth; - uint16_t production_id; - struct - { - t_symbol symbol; - t_state_id parse_state; - } first_leaf; - }; - - // External terminal subtrees (`child_count == 0 && - // has_external_tokens`) - t_external_scanner_state external_scanner_state; - - // Error terminal subtrees (`child_count == 0 && symbol == - // ts_builtin_sym_error`) - int32_t lookahead_char; - }; -} t_subtree_heap_data; - -// The fundamental building block of a syntax tree. -typedef union u_subtree { - t_subtree_inline_data data; - const t_subtree_heap_data *ptr; -} t_subtree; - -// Like t_subtree, but mutable. -typedef union u_mutable_subtree { - t_subtree_inline_data data; - t_subtree_heap_data *ptr; -} t_mutable_subtree; - -typedef Array(t_subtree) t_subtree_array; -typedef Array(t_mutable_subtree) t_mutable_subtree_array; - -typedef struct -{ - t_mutable_subtree_array free_trees; - t_mutable_subtree_array tree_stack; -} t_subtree_pool; - -typedef Array(t_parse_range) t_range_array; - -typedef union u_parse_action { - struct - { - uint8_t type; - t_state_id state; - bool extra; - bool repetition; - } shift; - struct - { - uint8_t type; - uint8_t child_count; - t_symbol symbol; - int16_t dynamic_precedence; - uint16_t production_id; - } reduce; - uint8_t type; -} t_parse_action; void ts_range_array_get_changed_ranges(const t_parse_range *old_ranges, unsigned old_range_count, @@ -728,133 +266,6 @@ unsigned ts_subtree_get_changed_ranges( t_tree_cursor *cursor1, t_tree_cursor *cursor2, const t_language *language, const t_range_array *included_range_differences, t_parse_range **ranges); -typedef struct s_table_entry -{ - const t_parse_action *actions; - uint32_t action_count; - bool is_reusable; -} t_table_entry; - -typedef struct s_lookahead_iterator -{ - const t_language *language; - const uint16_t *data; - const uint16_t *group_end; - t_state_id state; - uint16_t table_value; - uint16_t section_index; - uint16_t group_count; - bool is_small_state; - - const t_parse_action *actions; - t_symbol symbol; - t_state_id next_state; - uint16_t action_count; -} t_lookahead_iterator; - -typedef struct s_symbol_metadata -{ - bool visible; - bool named; - bool supertype; -} t_symbol_metadata; - -typedef enum e_parse_action_type -{ - TSParseActionTypeShift, - TSParseActionTypeReduce, - TSParseActionTypeAccept, - TSParseActionTypeRecover, -} t_parse_action_type; - -typedef union u_parse_action_entry { - t_parse_action action; - struct - { - uint8_t count; - bool reusable; - } entry; -} t_parse_action_entry; - -typedef struct s_field_map_entry -{ - t_field_id field_id; - uint8_t child_index; - bool inherited; -} t_field_map_entry; - -typedef struct s_field_map_slice -{ - uint16_t index; - uint16_t length; -} t_field_map_slice; - -typedef struct s_lexer_data t_lexer_data; - -struct s_lexer_data -{ - int32_t lookahead; - t_symbol result_symbol; - void (*advance)(t_lexer_data *, bool); - void (*mark_end)(t_lexer_data *); - uint32_t (*get_column)(t_lexer_data *); - bool (*is_at_included_range_start)(const t_lexer_data *); - bool (*eof)(const t_lexer_data *); -}; - -typedef struct s_lex_mode -{ - uint16_t lex_state; - uint16_t external_lex_state; -} t_lex_mode; - -typedef struct s_char_range -{ - int32_t start; - int32_t end; -} t_char_range; - -struct s_language -{ - uint32_t version; - uint32_t symbol_count; - uint32_t alias_count; - uint32_t token_count; - uint32_t external_token_count; - uint32_t state_count; - uint32_t large_state_count; - uint32_t production_id_count; - uint32_t field_count; - uint16_t max_alias_sequence_length; - const uint16_t *parse_table; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const t_parse_action_entry *parse_actions; - const char *const *symbol_names; - const char *const *field_names; - const t_field_map_slice *field_map_slices; - const t_field_map_entry *field_map_entries; - const t_symbol_metadata *symbol_metadata; - const t_symbol *public_symbol_map; - const uint16_t *alias_map; - const t_symbol *alias_sequences; - const t_lex_mode *lex_modes; - bool (*lex_fn)(t_lexer_data *, t_state_id); - bool (*keyword_lex_fn)(t_lexer_data *, t_state_id); - t_symbol keyword_capture_token; - struct - { - const bool *states; - const t_symbol *symbol_map; - void *(*create)(void); - void (*destroy)(void *); - bool (*scan)(void *, t_lexer_data *, const bool *symbol_whitelist); - unsigned (*serialize)(void *, char *); - void (*deserialize)(void *, const char *, unsigned); - } external_scanner; - const t_state_id *primary_state_ids; -}; - void ts_language_table_entry(const t_language *, t_state_id, t_symbol, t_table_entry *); @@ -1167,28 +578,6 @@ static inline t_length length_saturating_sub(t_length len1, t_length len2) } } -typedef struct s_lexer -{ - t_lexer_data data; - t_length current_position; - t_length token_start_position; - t_length token_end_position; - - t_parse_range *included_ranges; - const char *chunk; - t_parse_input input; - t_parse_logger logger; - - uint32_t included_range_count; - uint32_t current_included_range_index; - uint32_t chunk_start; - uint32_t chunk_size; - uint32_t lookahead_size; - bool did_get_column; - - char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; -} t_lexer; - void ts_lexer_init(t_lexer *); void ts_lexer_delete(t_lexer *); void ts_lexer_set_input(t_lexer *, t_parse_input); @@ -1288,16 +677,6 @@ static inline t_point point_max(t_point a, t_point b) return b; } -typedef struct s_reduce_action -{ - uint32_t count; - t_symbol symbol; - int dynamic_precedence; - unsigned short production_id; -} t_reduce_action; - -typedef Array(t_reduce_action) t_reduce_action_set; - static inline void ts_reduce_action_set_add(t_reduce_action_set *self, t_reduce_action new_action) { @@ -1311,19 +690,6 @@ static inline void ts_reduce_action_set_add(t_reduce_action_set *self, array_push(self, new_action); } -typedef struct s_stack_entry -{ - t_subtree tree; - uint32_t child_index; - uint32_t byte_offset; -} t_stack_entry; - -typedef struct s_reusable_node -{ - Array(t_stack_entry) stack; - t_subtree last_external_token; -} t_reusable_node; - static inline t_reusable_node reusable_node_new(void) { return (t_reusable_node){array_new(), NULL_SUBTREE}; @@ -1433,25 +799,6 @@ static inline void reusable_node_reset(t_reusable_node *self, t_subtree tree) } } -typedef struct s_stack t_stack; - -typedef unsigned t_stack_version; - -typedef struct s_stack_slice -{ - t_subtree_array subtrees; - t_stack_version version; -} t_stack_slice; -typedef Array(t_stack_slice) t_stack_slice_array; - -typedef struct s_stack_summary_entry -{ - t_length position; - unsigned depth; - t_state_id state; -} t_stack_summary_entry; -typedef Array(t_stack_summary_entry) t_stack_summary; - // Create a stack. t_stack *ts_stack_new(t_subtree_pool *); @@ -1547,8 +894,6 @@ void ts_stack_remove_version(t_stack *, t_stack_version); void ts_stack_clear(t_stack *); -typedef void (*StackIterateCallback)(void *, t_state_id, uint32_t); - void ts_external_scanner_state_init(t_external_scanner_state *, const char *, unsigned); const char *ts_external_scanner_state_data(const t_external_scanner_state *); @@ -1835,13 +1180,6 @@ static inline t_mutable_subtree ts_subtree_to_mut_unsafe(t_subtree self) return result; } -typedef enum e_tree_cursor_step -{ - TreeCursorStepNone, - TreeCursorStepHidden, - TreeCursorStepVisible, -} t_tree_cursor_step; - void ts_tree_cursor_init(t_tree_cursor *, t_parse_node); void ts_tree_cursor_current_status(const t_tree_cursor *, t_field_id *, bool *, bool *, bool *, t_symbol *, unsigned *); @@ -1859,28 +1197,9 @@ static inline t_subtree ts_tree_cursor_current_subtree( t_parse_node ts_tree_cursor_parent_node(const t_tree_cursor *); -typedef struct s_parent_cache_entry -{ - const t_subtree *child; - const t_subtree *parent; - t_length position; - t_symbol alias_symbol; -} t_parent_cache_entry; - -struct s_first_tree -{ - t_subtree root; - const t_language *language; - t_parse_range *included_ranges; - unsigned included_range_count; -}; - t_first_tree *ts_tree_new(t_subtree root, const t_language *language, const t_parse_range *, unsigned); t_parse_node ts_node_new(const t_first_tree *, const t_subtree *, t_length, t_symbol); -typedef uint64_t t_parser_clock; -typedef uint64_t t_parser_duration; - #endif // TREE_SITTER_TREE_H_ diff --git a/parser/src/api_structs.h b/parser/src/api_structs.h new file mode 100644 index 00000000..8ebc78f2 --- /dev/null +++ b/parser/src/api_structs.h @@ -0,0 +1,590 @@ +#ifndef API_STRUCTS_H +#define API_STRUCTS_H + +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +#include "./array.h" +#include "me/types.h" +#include + +typedef uint16_t t_state_id; +typedef uint16_t t_symbol; +typedef uint16_t t_field_id; +typedef unsigned t_stack_version; +typedef uint64_t t_parser_clock; +typedef uint64_t t_parser_duration; + +typedef union u_parse_action_entry t_parse_action_entry; +typedef union u_subtree t_subtree; +typedef union u_mutable_subtree t_mutable_subtree; +typedef union u_parse_action t_parse_action; + +typedef struct s_language t_language; +typedef struct s_first_parser t_first_parser; +typedef struct s_first_tree t_first_tree; +typedef struct s_parse_query t_parse_query; +typedef struct s_query_cursor t_query_cursor; +typedef struct s_lookahead_iterator t_lookahead_iterator; +typedef struct s_point t_point; +typedef struct s_length t_length; +typedef struct s_parse_range t_parse_range; +typedef struct s_parse_input t_parse_input; +typedef struct s_parse_logger t_parse_logger; +typedef struct s_input_edit t_input_edit; +typedef struct s_parse_node t_parse_node; +typedef struct s_tree_cursor t_tree_cursor; +typedef struct s_query_capture t_query_capture; +typedef struct s_query_match t_query_match; +typedef struct s_query_predicate_step t_query_predicate_step; +typedef struct s_subtree_inline_data t_subtree_inline_data; +typedef struct s_subtree_heap_data t_subtree_heap_data; +typedef struct s_subtree_pool t_subtree_pool; +typedef struct s_table_entry t_table_entry; +typedef struct s_symbol_metadata t_symbol_metadata; +typedef struct s_field_map_entry t_field_map_entry; +typedef struct s_field_map_slice t_field_map_slice; +typedef struct s_lexer_data t_lexer_data; +typedef struct s_lex_mode t_lex_mode; +typedef struct s_char_range t_char_range; +typedef struct s_tree_cursor_entry t_tree_cursor_entry; +typedef struct s_external_scanner_state t_external_scanner_state; +typedef struct s_parse_query_cursor t_parse_query_cursor; +typedef struct s_parse_query_error t_parse_query_error; +typedef struct s_parse_query_error_cost t_parse_query_error_cost; +typedef struct s_lexer_data t_lexer_data; +typedef struct s_subtree_inline_data t_subtree_inline_data; +typedef struct s_subtree_heap_data t_subtree_heap_data; +typedef struct s_subtree_pool t_subtree_pool; +typedef struct s_table_entry t_table_entry; +typedef struct s_symbol_metadata t_symbol_metadata; +typedef struct s_field_map_entry t_field_map_entry; +typedef struct s_field_map_slice t_field_map_slice; +typedef struct s_lexer_data t_lexer_data; +typedef struct s_lex_mode t_lex_mode; +typedef struct s_lexer t_lexer; +typedef struct s_parse_state t_parse_state; +typedef struct s_reduce_action t_reduce_action; +typedef struct s_stack_entry t_stack_entry; +typedef struct s_reusable_node t_reusable_node; +typedef struct s_stack_summary_entry t_stack_summary_entry; +typedef struct s_stack t_stack; +typedef struct s_stack_slice t_stack_slice; + +typedef enum e_input_encoding t_input_encoding; +typedef enum e_symbol_type t_symbol_type; +typedef enum e_log_type t_log_type; +typedef enum e_quantifier t_quantifier; +typedef enum e_query_error t_query_error; +typedef enum e_query_predicate_step_type t_query_predicate_step_type; +typedef enum e_parse_action_type t_parse_action_type; + +typedef Array(t_parse_range) t_range_array; +typedef Array(t_subtree) t_subtree_array; +typedef Array(t_mutable_subtree) t_mutable_subtree_array; +typedef Array(t_reduce_action) t_reduce_action_set; +typedef Array(void) Array; +typedef Array(t_stack_slice) t_stack_slice_array; +typedef Array(t_stack_summary_entry) t_stack_summary; +typedef void (*StackIterateCallback)(void *, t_state_id, uint32_t); + +struct s_point +{ + uint32_t row; + uint32_t column; +}; + +struct s_length +{ + uint32_t bytes; + t_point extent; +}; + +struct s_stack_slice +{ + t_subtree_array subtrees; + t_stack_version version; +}; + +struct s_stack_summary_entry +{ + t_length position; + unsigned depth; + t_state_id state; +}; + +enum e_input_encoding +{ + TSInputEncodingUTF8, + TSInputEncodingUTF16, +}; + +enum e_symbol_type +{ + TSSymbolTypeRegular, + TSSymbolTypeAnonymous, + TSSymbolTypeAuxiliary, +}; + +struct s_parse_range +{ + t_point start_point; + t_point end_point; + uint32_t start_byte; + uint32_t end_byte; +}; + +struct s_parse_input +{ + void *payload; + const char *(*read)(void *payload, uint32_t byte_index, t_point position, + uint32_t *bytes_read); + t_input_encoding encoding; +}; + +enum e_log_type +{ + TSLogTypeParse, + TSLogTypeLex, +}; + +struct s_parse_logger +{ + void *payload; + void (*log)(void *payload, t_log_type log_type, const char *buffer); +}; + +struct s_input_edit +{ + uint32_t start_byte; + uint32_t old_end_byte; + uint32_t new_end_byte; + t_point start_point; + t_point old_end_point; + t_point new_end_point; +}; + +struct s_parse_node +{ + uint32_t context[4]; + const void *id; + const t_first_tree *tree; +}; + +struct s_tree_cursor_entry +{ + const t_subtree *subtree; + t_length position; + uint32_t child_index; + uint32_t structural_child_index; + uint32_t descendant_index; +}; + +struct s_tree_cursor +{ + const t_first_tree *tree; + Array(t_tree_cursor_entry) stack; + t_symbol root_alias_symbol; +}; + +struct s_query_capture +{ + t_parse_node node; + uint32_t index; +}; + +enum e_quantifier +{ + TSQuantifierZero = 0, // must match the array initialization value + TSQuantifierZeroOrOne, + TSQuantifierZeroOrMore, + TSQuantifierOne, + TSQuantifierOneOrMore, +}; + +struct s_query_match +{ + uint32_t id; + uint16_t pattern_index; + uint16_t capture_count; + const t_query_capture *captures; +}; + +enum e_query_predicate_step_type +{ + TSQueryPredicateStepTypeDone, + TSQueryPredicateStepTypeCapture, + TSQueryPredicateStepTypeString, +}; + +struct s_query_predicate_step +{ + t_query_predicate_step_type type; + uint32_t value_id; +}; + +enum e_query_error +{ + TSQueryErrorNone = 0, + TSQueryErrorSyntax, + TSQueryErrorNodeType, + TSQueryErrorField, + TSQueryErrorCapture, + TSQueryErrorStructure, + TSQueryErrorLanguage, +}; + +struct s_parent_cache_entry +{ + const t_subtree *child; + const t_subtree *parent; + t_length position; + t_symbol alias_symbol; +}; + +typedef enum e_tree_cursor_step +{ + TreeCursorStepNone, + TreeCursorStepHidden, + TreeCursorStepVisible, +} t_tree_cursor_step; + +// The serialized state of an external scanner. +// +// Every time an external token subtree is created after a call to an +// external scanner, the scanner's `serialize` function is called to +// retrieve a serialized copy of its state. The bytes are then copied +// onto the subtree itself so that the scanner's state can later be +// restored using its `deserialize` function. +// +// Small byte arrays are stored inline, and long ones are allocated +// separately on the heap. +struct s_external_scanner_state +{ + union { + char *long_data; + char short_data[24]; + }; + uint32_t length; +}; + +#define SUBTREE_BITS \ + bool visible : 1; \ + bool named : 1; \ + bool extra : 1; \ + bool has_changes : 1; \ + bool is_missing : 1; \ + bool is_keyword : 1; + +#define SUBTREE_SIZE \ + uint8_t padding_columns; \ + uint8_t padding_rows : 4; \ + uint8_t lookahead_bytes : 4; \ + uint8_t padding_bytes; \ + uint8_t size_bytes; + +#if TS_BIG_ENDIAN +# if TS_PTR_SIZE == 32 + +struct s_subtree_inline_data +{ + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; + SUBTREE_SIZE +}; + +# else + +struct s_subtree_inline_data +{ + SUBTREE_SIZE + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; +}; + +# endif +#else + +struct s_subtree_inline_data +{ + bool is_inline : 1; + SUBTREE_BITS + uint8_t symbol; + uint16_t parse_state; + SUBTREE_SIZE +}; + +#endif + +#undef SUBTREE_BITS +#undef SUBTREE_SIZE + +struct s_subtree_heap_data +{ + volatile uint32_t ref_count; + t_length padding; + t_length size; + uint32_t lookahead_bytes; + uint32_t error_cost; + uint32_t child_count; + t_symbol symbol; + t_state_id parse_state; + + bool visible : 1; + bool named : 1; + bool extra : 1; + bool fragile_left : 1; + bool fragile_right : 1; + bool has_changes : 1; + bool has_external_tokens : 1; + bool has_external_scanner_state_change : 1; + bool depends_on_column : 1; + bool is_missing : 1; + bool is_keyword : 1; + + union { + // Non-terminal subtrees (`child_count > 0`) + struct + { + uint32_t visible_child_count; + uint32_t named_child_count; + uint32_t visible_descendant_count; + int32_t dynamic_precedence; + uint16_t repeat_depth; + uint16_t production_id; + struct + { + t_symbol symbol; + t_state_id parse_state; + } first_leaf; + }; + + // External terminal subtrees (`child_count == 0 && + // has_external_tokens`) + t_external_scanner_state external_scanner_state; + + // Error terminal subtrees (`child_count == 0 && symbol == + // ts_builtin_sym_error`) + int32_t lookahead_char; + }; +}; + +// The fundamental building block of a syntax tree. +union u_subtree { + t_subtree_inline_data data; + const t_subtree_heap_data *ptr; +}; + +// Like t_subtree, but mutable. +union u_mutable_subtree { + t_subtree_inline_data data; + t_subtree_heap_data *ptr; +}; + +struct s_subtree_pool +{ + t_mutable_subtree_array free_trees; + t_mutable_subtree_array tree_stack; +}; + +union u_parse_action { + struct + { + uint8_t type; + t_state_id state; + bool extra; + bool repetition; + } shift; + struct + { + uint8_t type; + uint8_t child_count; + t_symbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +}; + +struct s_table_entry +{ + const t_parse_action *actions; + uint32_t action_count; + bool is_reusable; +}; + +struct s_lookahead_iterator +{ + const t_language *language; + const uint16_t *data; + const uint16_t *group_end; + t_state_id state; + uint16_t table_value; + uint16_t section_index; + uint16_t group_count; + bool is_small_state; + + const t_parse_action *actions; + t_symbol symbol; + t_state_id next_state; + uint16_t action_count; +}; + +struct s_symbol_metadata +{ + bool visible; + bool named; + bool supertype; +}; + +enum e_parse_action_type +{ + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +}; + +union u_parse_action_entry { + t_parse_action action; + struct + { + uint8_t count; + bool reusable; + } entry; +}; + +struct s_field_map_entry +{ + t_field_id field_id; + uint8_t child_index; + bool inherited; +}; + +struct s_field_map_slice +{ + uint16_t index; + uint16_t length; +}; + +struct s_lexer_data +{ + int32_t lookahead; + t_symbol result_symbol; + void (*advance)(t_lexer_data *, bool); + void (*mark_end)(t_lexer_data *); + uint32_t (*get_column)(t_lexer_data *); + bool (*is_at_included_range_start)(const t_lexer_data *); + bool (*eof)(const t_lexer_data *); +}; + +struct s_lex_mode +{ + uint16_t lex_state; + uint16_t external_lex_state; +}; + +struct s_char_range +{ + int32_t start; + int32_t end; +}; + +struct s_language +{ + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const t_parse_action_entry *parse_actions; + const char *const *symbol_names; + const char *const *field_names; + const t_field_map_slice *field_map_slices; + const t_field_map_entry *field_map_entries; + const t_symbol_metadata *symbol_metadata; + const t_symbol *public_symbol_map; + const uint16_t *alias_map; + const t_symbol *alias_sequences; + const t_lex_mode *lex_modes; + bool (*lex_fn)(t_lexer_data *, t_state_id); + bool (*keyword_lex_fn)(t_lexer_data *, t_state_id); + t_symbol keyword_capture_token; + struct + { + const bool *states; + const t_symbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, t_lexer_data *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const t_state_id *primary_state_ids; +}; + +struct s_lexer +{ + t_lexer_data data; + t_length current_position; + t_length token_start_position; + t_length token_end_position; + + t_parse_range *included_ranges; + const char *chunk; + t_parse_input input; + t_parse_logger logger; + + uint32_t included_range_count; + uint32_t current_included_range_index; + uint32_t chunk_start; + uint32_t chunk_size; + uint32_t lookahead_size; + bool did_get_column; + + char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; +}; + +struct s_reduce_action +{ + uint32_t count; + t_symbol symbol; + int dynamic_precedence; + unsigned short production_id; +}; + +struct s_stack_entry +{ + t_subtree tree; + uint32_t child_index; + uint32_t byte_offset; +}; + +struct s_reusable_node +{ + Array(t_stack_entry) stack; + t_subtree last_external_token; +}; + +struct s_first_tree +{ + t_subtree root; + const t_language *language; + t_parse_range *included_ranges; + unsigned included_range_count; +}; + +#endif // API_STRUCTS_H \ No newline at end of file diff --git a/parser/src/array.h b/parser/src/array.h new file mode 100644 index 00000000..0fb3f533 --- /dev/null +++ b/parser/src/array.h @@ -0,0 +1,149 @@ +#ifndef ARRAY_H +#define ARRAY_H +#define Array(T) \ + struct \ + { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +#ifndef inline +# define inline __inline__ +#endif + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { \ + NULL, 0, 0 \ + } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + do \ + { \ + if ((count) == 0) \ + break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, \ + (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from +/// the `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice((Array *)(self), array_elem_size(self), (self)->size, 0, \ + count, contents) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from +/// the `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice((Array *)(self), array_elem_size(self), _index, old_count, \ + new_count, new_contents) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, \ + &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), \ + array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the +/// order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do \ + { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) \ + array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do \ + { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value)field, &_index, &_exists); \ + if (!_exists) \ + array_insert(self, _index, value); \ + } while (0) + +#endif // ARRAY_H \ No newline at end of file diff --git a/parser/src/combined.c b/parser/src/combined.c index 3625e63e..abd65a9e 100644 --- a/parser/src/combined.c +++ b/parser/src/combined.c @@ -1,4 +1,5 @@ #include "./api.h" +#include "./structs.h" uint32_t ts_node_end_byte(t_parse_node self); t_parse_node ts_node_parent(t_parse_node self); @@ -146,14 +147,6 @@ void ts_range_array_get_changed_ranges(const t_parse_range *old_ranges, } } -typedef struct s_iterator -{ - t_tree_cursor cursor; - const t_language *language; - unsigned visible_depth; - bool in_padding; -} t_iterator; - static t_iterator iterator_new(t_tree_cursor *cursor, const t_subtree *tree, const t_language *language) { @@ -387,13 +380,6 @@ static void iterator_advance(t_iterator *self) } } -typedef enum e_iterator_comparison -{ - IteratorDiffers, - IteratorMayDiffer, - IteratorMatches, -} t_iterator_comparison; - static t_iterator_comparison iterator_compare(const t_iterator *old_iter, const t_iterator *new_iter) { @@ -919,9 +905,6 @@ uint32_t ascii_decode(const uint8_t *chunk, uint32_t size, int32_t *codepoint) return (1); } -typedef uint32_t (*UnicodeDecodeFunction)(const uint8_t *chunk, uint32_t size, - int32_t *codepoint); - // Decode the next unicode character in the current chunk of source code. // This assumes that the lexer has already retrieved a chunk of source // code that spans the current position. @@ -939,7 +922,7 @@ static void ts_lexer__get_lookahead(t_lexer *self) } const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; - UnicodeDecodeFunction decode = ascii_decode; + t_unicode_decode_function decode = ascii_decode; self->lookahead_size = decode(chunk, size, &self->data.lookahead); @@ -1326,16 +1309,6 @@ t_parse_range *ts_lexer_included_ranges(const t_lexer *self, uint32_t *count) #undef LOG -typedef struct s_node_child_iterator -{ - t_subtree parent; - const t_first_tree *tree; - t_length position; - uint32_t child_index; - uint32_t structural_child_index; - const t_symbol *alias_sequence; -} t_node_child_iterator; - // t_parse_node - constructors t_parse_node ts_node_new(const t_first_tree *tree, const t_subtree *subtree, @@ -2269,61 +2242,6 @@ static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; -typedef struct s_token_cache -{ - t_subtree token; - t_subtree last_external_token; - uint32_t byte_index; -} t_token_cache; - -struct s_first_parser -{ - t_lexer lexer; - t_stack *stack; - t_subtree_pool tree_pool; - const t_language *language; - t_reduce_action_set reduce_actions; - t_subtree finished_tree; - t_subtree_array trailing_extras; - t_subtree_array trailing_extras2; - t_subtree_array scratch_trees; - t_token_cache token_cache; - t_reusable_node reusable_node; - void *external_scanner_payload; - t_parser_clock end_clock; - t_parser_duration timeout_duration; - unsigned accept_count; - unsigned operation_count; - const volatile size_t *cancellation_flag; - t_subtree old_tree; - t_range_array included_range_differences; - unsigned included_range_difference_index; - bool has_scanner_error; -}; - -typedef struct s_error_status -{ - unsigned cost; - unsigned node_count; - int dynamic_precedence; - bool is_in_error; -} t_error_status; - -typedef enum e_error_comparaison -{ - ErrorComparisonTakeLeft, - ErrorComparisonPreferLeft, - ErrorComparisonNone, - ErrorComparisonPreferRight, - ErrorComparisonTakeRight, -} t_error_comparaison; - -typedef struct s_string_input -{ - const char *string; - uint32_t length; -} t_string_input; - // StringInput static const char *ts_string_input_read(void *_self, uint32_t byte, @@ -4380,329 +4298,6 @@ t_first_tree *ts_parser_parse_string_encoding(t_first_parser *self, }); } -/* - * t_stream - A sequence of unicode characters derived from a UTF8 string. - * This struct is used in parsing queries from S-expressions. - */ -typedef struct s_stream -{ - const char *input; - const char *start; - const char *end; - int32_t next; - uint8_t next_size; -} t_stream; - -/* - * t_query_step - A step in the process of matching a query. Each node within - * a query S-expression corresponds to one of these steps. An entire pattern - * is represented as a sequence of these steps. The basic properties of a - * node are represented by these fields: - * - `symbol` - The grammar symbol to match. A zero value represents the - * wildcard symbol, '_'. - * - `field` - The field name to match. A zero value means that a field name - * was not specified. - * - `capture_ids` - An array of integers representing the names of captures - * associated with this node in the pattern, terminated by a `NONE` value. - * - `depth` - The depth where this node occurs in the pattern. The root node - * of the pattern has depth zero. - * - `negated_field_list_id` - An id representing a set of fields that must - * not be present on a node matching this step. - * - * Steps have some additional fields in order to handle the `.` (or "anchor") - * operator, which forbids additional child nodes: - * - `is_immediate` - Indicates that the node matching this step cannot be - * preceded by other sibling nodes that weren't specified in the pattern. - * - `is_last_child` - Indicates that the node matching this step cannot have - * any subsequent named siblings. - * - * For simple patterns, steps are matched in sequential order. But in order to - * handle alternative/repeated/optional sub-patterns, query steps are not always - * structured as a linear sequence; they sometimes need to split and merge. This - * is done using the following fields: - * - `alternative_index` - The index of a different query step that serves as - * an alternative to this step. A `NONE` value represents no alternative. - * When a query state reaches a step with an alternative index, the state - * is duplicated, with one copy remaining at the original step, and one copy - * moving to the alternative step. The alternative may have its own - * alternative step, so this splitting is an iterative process. - * - `is_dead_end` - Indicates that this state cannot be passed directly, and - * exists only in order to redirect to an alternative index, with no - * splitting. - * - `is_pass_through` - Indicates that state has no matching logic of its own, - * and exists only to split a state. One copy of the state advances - * immediately to the next step, and one moves to the alternative step. - * - `alternative_is_immediate` - Indicates that this step's alternative step - * should be treated as if `is_immediate` is true. - * - * Steps also store some derived state that summarizes how they relate to other - * steps within the same pattern. This is used to optimize the matching process: - * - `contains_captures` - Indicates that this step or one of its child steps - * has a non-empty `capture_ids` list. - * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then - * it and all of its subsequent sibling steps within the same parent pattern - * are guaranteed to match. - * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but - * for the entire top-level pattern. When iterating through a query's - * captures using `ts_query_cursor_next_capture`, this field is used to - * detect that a capture can safely be returned from a match that has not - * even completed yet. - */ -typedef struct s_query_step -{ - t_symbol symbol; - t_symbol supertype_symbol; - t_field_id field; - uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t depth; - uint16_t alternative_index; - uint16_t negated_field_list_id; - bool is_named : 1; - bool is_immediate : 1; - bool is_last_child : 1; - bool is_pass_through : 1; - bool is_dead_end : 1; - bool alternative_is_immediate : 1; - bool contains_captures : 1; - bool root_pattern_guaranteed : 1; - bool parent_pattern_guaranteed : 1; -} t_query_step; - -/* - * t_slice - A slice of an external array. Within a query, capture names, - * literal string values, and predicate step information are stored in three - * contiguous arrays. Individual captures, string values, and predicates are - * represented as slices of these three arrays. - */ -typedef struct s_slice -{ - uint32_t offset; - uint32_t length; -} t_slice; - -/* - * t_symbol_table - a two-way mapping of strings to ids. - */ -typedef struct s_symbol_table -{ - Array(char) characters; - Array(t_slice) slices; -} t_symbol_table; - -/** - * CaptureQuantififers - a data structure holding the quantifiers of pattern - * captures. - */ -typedef Array(uint8_t) t_capture_quantifiers; - -/* - * t_pattern_entry - Information about the starting point for matching a - * particular pattern. These entries are stored in a 'pattern map' - a sorted - * array that makes it possible to efficiently lookup patterns based on the - * symbol for their first step. The entry consists of the following fields: - * - `pattern_index` - the index of the pattern within the query - * - `step_index` - the index of the pattern's first step in the shared `steps` - * array - * - `is_rooted` - whether or not the pattern has a single root node. This - * property affects decisions about whether or not to start the pattern for - * nodes outside of a QueryCursor's range restriction. - */ -typedef struct s_pattern_entry -{ - uint16_t step_index; - uint16_t pattern_index; - bool is_rooted; -} t_pattern_entry; - -typedef struct s_query_pattern -{ - t_slice steps; - t_slice predicate_steps; - uint32_t start_byte; - bool is_non_local; -} t_query_pattern; - -typedef struct s_step_offset -{ - uint32_t byte_offset; - uint16_t step_index; -} t_step_offset; - -/* - * t_query_state - The state of an in-progress match of a particular pattern - * in a query. While executing, a `t_query_cursor` must keep track of a number - * of possible in-progress matches. Each of those possible matches is - * represented as one of these states. Fields: - * - `id` - A numeric id that is exposed to the public API. This allows the - * caller to remove a given match, preventing any more of its captures - * from being returned. - * - `start_depth` - The depth in the tree where the first step of the state's - * pattern was matched. - * - `pattern_index` - The pattern that the state is matching. - * - `consumed_capture_count` - The number of captures from this match that - * have already been returned. - * - `capture_list_id` - A numeric id that can be used to retrieve the state's - * list of captures from the `t_capture_list_pool`. - * - `seeking_immediate_match` - A flag that indicates that the state's next - * step must be matched by the very next sibling. This is used when - * processing repetitions. - * - `has_in_progress_alternatives` - A flag that indicates that there is are - * other states that have the same captures as this state, but are at - * different steps in their pattern. This means that in order to obey the - * 'longest-match' rule, this state should not be returned as a match until - * it is clear that there can be no other alternative match with more - * captures. - */ -typedef struct s_query_state -{ - uint32_t id; - uint32_t capture_list_id; - uint16_t start_depth; - uint16_t step_index; - uint16_t pattern_index; - uint16_t consumed_capture_count : 12; - bool seeking_immediate_match : 1; - bool has_in_progress_alternatives : 1; - bool dead : 1; - bool needs_parent : 1; -} t_query_state; - -typedef Array(t_query_capture) t_capture_list; - -/* - * t_capture_list_pool - A collection of *lists* of captures. Each query state - * needs to maintain its own list of captures. To avoid repeated allocations, - * this struct maintains a fixed set of capture lists, and keeps track of which - * ones are currently in use by a query state. - */ -typedef struct s_capture_list_pool -{ - Array(t_capture_list) list; - t_capture_list empty_list; - // The maximum number of capture lists that we are allowed to allocate. We - // never allow `list` to allocate more entries than this, dropping pending - // matches if needed to stay under the limit. - uint32_t max_capture_list_count; - // The number of capture lists allocated in `list` that are not currently in - // use. We reuse those existing-but-unused capture lists before trying to - // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture - // list's length to indicate that it's not in use. - uint32_t free_capture_list_count; -} t_capture_list_pool; - -/* - * t_analysis_state - The state needed for walking the parse table when - * analyzing a query pattern, to determine at which steps the pattern might fail - * to match. - */ -typedef struct s_analysis_state_entry -{ - t_state_id parse_state; - t_symbol parent_symbol; - uint16_t child_index; - t_field_id field_id : 15; - bool done : 1; -} t_analysis_state_entry; - -typedef struct s_analysis_state -{ - t_analysis_state_entry stack[MAX_ANALYSIS_STATE_DEPTH]; - uint16_t depth; - uint16_t step_index; - t_symbol root_symbol; -} t_analysis_state; - -typedef Array(t_analysis_state *) t_analysis_state_set; - -typedef struct s_query_analysis -{ - t_analysis_state_set states; - t_analysis_state_set next_states; - t_analysis_state_set deeper_states; - t_analysis_state_set state_pool; - Array(uint16_t) final_step_indices; - Array(t_symbol) finished_parent_symbols; - bool did_abort; -} t_query_analysis; - -/* - * t_analysis_subgraph - A subset of the states in the parse table that are used - * in constructing nodes with a certain symbol. Each state is accompanied by - * some information about the possible node that could be produced in - * downstream states. - */ -typedef struct s_analysis_subgraph_node -{ - t_state_id state; - uint16_t production_id; - uint8_t child_index : 7; - bool done : 1; -} t_analysis_subgraph_node; - -typedef struct s_analysis_subgraph -{ - t_symbol symbol; - Array(t_state_id) start_states; - Array(t_analysis_subgraph_node) nodes; -} t_analysis_subgraph; - -typedef Array(t_analysis_subgraph) t_analysis_subgraph_array; - -/* - * t_state_predecessor_map - A map that stores the predecessors of each parse - * state. This is used during query analysis to determine which parse states can - * lead to which reduce actions. - */ -typedef struct s_state_predecessor_map -{ - t_state_id *contents; -} t_state_predecessor_map; - -/* - * t_parse_query - A tree query, compiled from a string of S-expressions. The - * query itself is immutable. The mutable state used in the process of executing - * the query is stored in a `t_query_cursor`. - */ -struct s_parse_query -{ - t_symbol_table captures; - t_symbol_table predicate_values; - Array(t_capture_quantifiers) capture_quantifiers; - Array(t_query_step) steps; - Array(t_pattern_entry) pattern_map; - Array(t_query_predicate_step) predicate_steps; - Array(t_query_pattern) patterns; - Array(t_step_offset) step_offsets; - Array(t_field_id) negated_fields; - Array(char) string_buffer; - Array(t_symbol) repeat_symbols_with_rootless_patterns; - const t_language *language; - uint16_t wildcard_root_pattern_count; -}; - -/* - * t_query_cursor - A stateful struct used to execute a query on a tree. - */ -struct s_query_cursor -{ - const t_parse_query *query; - t_tree_cursor cursor; - Array(t_query_state) states; - Array(t_query_state) finished_states; - t_capture_list_pool capture_list_pool; - uint32_t depth; - uint32_t max_start_depth; - uint32_t start_byte; - uint32_t end_byte; - t_point start_point; - t_point end_point; - uint32_t next_state_id; - bool on_visible_node; - bool ascending; - bool halted; - bool did_exceed_match_limit; -}; - static const t_query_error PARENT_DONE = -1; static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; static const uint16_t NONE = UINT16_MAX; @@ -8991,74 +8586,6 @@ void ts_query_cursor_set_max_start_depth(t_query_cursor *self, #undef LOG -typedef struct s_stack_node t_stack_node; - -typedef struct s_stack_link -{ - t_stack_node *node; - t_subtree subtree; - bool is_pending; -} t_stack_link; - -struct s_stack_node -{ - t_state_id state; - t_length position; - t_stack_link links[MAX_LINK_COUNT]; - short unsigned int link_count; - uint32_t ref_count; - unsigned error_cost; - unsigned node_count; - int dynamic_precedence; -}; - -typedef struct s_stack_iterator -{ - t_stack_node *node; - t_subtree_array subtrees; - uint32_t subtree_count; - bool is_pending; -} t_stack_iterator; - -typedef Array(t_stack_node *) t_stack_node_array; - -typedef enum e_stack_status -{ - StackStatusActive, - StackStatusPaused, - StackStatusHalted, -} t_stack_status; - -typedef struct s_stack_head -{ - t_stack_node *node; - t_stack_summary *summary; - unsigned node_count_at_last_error; - t_subtree last_external_token; - t_subtree lookahead_when_paused; - t_stack_status status; -} t_stack_head; - -struct s_stack -{ - Array(t_stack_head) heads; - t_stack_slice_array slices; - Array(t_stack_iterator) iterators; - t_stack_node_array node_pool; - t_stack_node *base_node; - t_subtree_pool *subtree_pool; -}; - -typedef unsigned t_stack_action; -enum e_stack_action -{ - StackActionNone, - StackActionStop = 1, - StackActionPop = 2, -}; - -typedef t_stack_action (*t_stack_callback)(void *, const t_stack_iterator *); - static void stack_node_retain(t_stack_node *self) { if (!self) @@ -9663,12 +9190,6 @@ t_stack_slice_array ts_stack_pop_all(t_stack *self, t_stack_version version) return stack__iter(self, version, pop_all_callback, NULL, 0); } -typedef struct s_summarize_stack_session -{ - t_stack_summary *summary; - unsigned max_depth; -} t_summarize_stack_session; - static inline t_stack_action summarize_stack_callback( void *payload, const t_stack_iterator *iterator) { @@ -9894,13 +9415,6 @@ bool ts_stack_print_dot_graph(t_stack *self, const t_language *language, return (false); } -typedef struct s_edit -{ - t_length start; - t_length old_end; - t_length new_end; -} t_edit; - // t_external_scanner_state void ts_external_scanner_state_init(t_external_scanner_state *self, @@ -10659,11 +10173,6 @@ static inline void ts_subtree_set_has_changes(t_mutable_subtree *self) t_subtree ts_subtree_edit(t_subtree self, const t_input_edit *input_edit, t_subtree_pool *pool) { - typedef struct s_edit_entry - { - t_subtree *tree; - t_edit edit; - } t_edit_entry; Array(t_edit_entry) stack = array_new(); array_push( @@ -11095,17 +10604,6 @@ void ts_tree_print_dot_graph(const t_first_tree *self, int file_descriptor) #endif -typedef struct s_cursor_child_iterator -{ - t_subtree parent; - const t_first_tree *tree; - t_length position; - uint32_t child_index; - uint32_t structural_child_index; - uint32_t descendant_index; - const t_symbol *alias_sequence; -} t_cursor_child_iterator; - // t_cursor_child_iterator static inline bool ts_tree_cursor_is_entry_visible(const t_tree_cursor *self, diff --git a/parser/src/structs.h b/parser/src/structs.h new file mode 100644 index 00000000..e0ef3ce1 --- /dev/null +++ b/parser/src/structs.h @@ -0,0 +1,545 @@ +#ifndef STRUCTS_H +#define STRUCTS_H + +#include "./api.h" + +typedef unsigned t_stack_action; + +typedef struct s_edit t_edit; +typedef struct s_edit_entry t_edit_entry; +typedef struct s_cursor_child_iterator t_cursor_child_iterator; +typedef struct s_summarize_stack_session t_summarize_stack_session; +typedef struct s_stack_node t_stack_node; +typedef struct s_stack_link t_stack_link; +typedef struct s_stack_head t_stack_head; +typedef struct s_stack_iterator t_stack_iterator; +typedef struct s_stack t_stack; +typedef struct s_stack_head t_stack_head; +typedef struct s_stack_iterator t_stack_iterator; +typedef struct s_query_cursor t_query_cursor; +typedef struct s_parse_query t_parse_query; +typedef struct s_state_predecessor_map t_state_predecessor_map; +typedef struct s_analysis_subgraph t_analysis_subgraph; +typedef struct s_analysis_subgraph_node t_analysis_subgraph_node; +typedef struct s_query_analysis t_query_analysis; +typedef struct s_analysis_state t_analysis_state; +typedef struct s_analysis_state_entry t_analysis_state_entry; +typedef struct s_capture_list_pool t_capture_list_pool; +typedef struct s_query_state t_query_state; +typedef struct s_step_offset t_step_offset; +typedef struct s_query_pattern t_query_pattern; +typedef struct s_pattern_entry t_pattern_entry; +typedef struct s_symbol_table t_symbol_table; +typedef struct s_slice t_slice; +typedef struct s_query_step t_query_step; +typedef struct s_stream t_stream; +typedef struct s_string_input t_string_input; +typedef struct s_error_status t_error_status; +typedef struct s_first_parser t_first_parser; +typedef struct s_token_cache t_token_cache; +typedef struct s_node_child_iterator t_node_child_iterator; +typedef struct s_iterator t_iterator; +typedef struct s_parse_query t_parse_query; + +typedef t_stack_action (*t_stack_callback)(void *, const t_stack_iterator *); +typedef Array(t_stack_node *) t_stack_node_array; +typedef Array(t_analysis_subgraph) t_analysis_subgraph_array; +typedef Array(t_analysis_state *) t_analysis_state_set; +typedef Array(uint8_t) t_capture_quantifiers; +typedef uint32_t (*t_unicode_decode_function)(const uint8_t *chunk, + uint32_t size, + int32_t *codepoint); +typedef Array(t_query_capture) t_capture_list; + +typedef enum e_stack_status t_stack_status; +typedef enum e_error_comparaison t_error_comparaison; +typedef enum e_iterator_comparison t_iterator_comparison; + +struct s_iterator +{ + t_tree_cursor cursor; + const t_language *language; + unsigned visible_depth; + bool in_padding; +}; + +enum e_iterator_comparison +{ + IteratorDiffers, + IteratorMayDiffer, + IteratorMatches, +}; + +struct s_node_child_iterator +{ + t_subtree parent; + const t_first_tree *tree; + t_length position; + uint32_t child_index; + uint32_t structural_child_index; + const t_symbol *alias_sequence; +}; +struct s_token_cache +{ + t_subtree token; + t_subtree last_external_token; + uint32_t byte_index; +}; + +struct s_first_parser +{ + t_lexer lexer; + t_stack *stack; + t_subtree_pool tree_pool; + const t_language *language; + t_reduce_action_set reduce_actions; + t_subtree finished_tree; + t_subtree_array trailing_extras; + t_subtree_array trailing_extras2; + t_subtree_array scratch_trees; + t_token_cache token_cache; + t_reusable_node reusable_node; + void *external_scanner_payload; + t_parser_clock end_clock; + t_parser_duration timeout_duration; + unsigned accept_count; + unsigned operation_count; + const volatile size_t *cancellation_flag; + t_subtree old_tree; + t_range_array included_range_differences; + unsigned included_range_difference_index; + bool has_scanner_error; +}; + +struct s_error_status +{ + unsigned cost; + unsigned node_count; + int dynamic_precedence; + bool is_in_error; +}; + +enum e_error_comparaison +{ + ErrorComparisonTakeLeft, + ErrorComparisonPreferLeft, + ErrorComparisonNone, + ErrorComparisonPreferRight, + ErrorComparisonTakeRight, +}; + +struct s_string_input +{ + const char *string; + uint32_t length; +}; + +/* + * t_stream - A sequence of unicode characters derived from a UTF8 string. + * This struct is used in parsing queries from S-expressions. + */ +struct s_stream +{ + const char *input; + const char *start; + const char *end; + int32_t next; + uint8_t next_size; +}; + +/* + * t_query_step - A step in the process of matching a query. Each node within + * a query S-expression corresponds to one of these steps. An entire pattern + * is represented as a sequence of these steps. The basic properties of a + * node are represented by these fields: + * - `symbol` - The grammar symbol to match. A zero value represents the + * wildcard symbol, '_'. + * - `field` - The field name to match. A zero value means that a field name + * was not specified. + * - `capture_ids` - An array of integers representing the names of captures + * associated with this node in the pattern, terminated by a `NONE` value. + * - `depth` - The depth where this node occurs in the pattern. The root node + * of the pattern has depth zero. + * - `negated_field_list_id` - An id representing a set of fields that must + * not be present on a node matching this step. + * + * Steps have some additional fields in order to handle the `.` (or "anchor") + * operator, which forbids additional child nodes: + * - `is_immediate` - Indicates that the node matching this step cannot be + * preceded by other sibling nodes that weren't specified in the pattern. + * - `is_last_child` - Indicates that the node matching this step cannot have + * any subsequent named siblings. + * + * For simple patterns, steps are matched in sequential order. But in order to + * handle alternative/repeated/optional sub-patterns, query steps are not always + * structured as a linear sequence; they sometimes need to split and merge. This + * is done using the following fields: + * - `alternative_index` - The index of a different query step that serves as + * an alternative to this step. A `NONE` value represents no alternative. + * When a query state reaches a step with an alternative index, the state + * is duplicated, with one copy remaining at the original step, and one copy + * moving to the alternative step. The alternative may have its own + * alternative step, so this splitting is an iterative process. + * - `is_dead_end` - Indicates that this state cannot be passed directly, and + * exists only in order to redirect to an alternative index, with no + * splitting. + * - `is_pass_through` - Indicates that state has no matching logic of its own, + * and exists only to split a state. One copy of the state advances + * immediately to the next step, and one moves to the alternative step. + * - `alternative_is_immediate` - Indicates that this step's alternative step + * should be treated as if `is_immediate` is true. + * + * Steps also store some derived state that summarizes how they relate to other + * steps within the same pattern. This is used to optimize the matching process: + * - `contains_captures` - Indicates that this step or one of its child steps + * has a non-empty `capture_ids` list. + * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then + * it and all of its subsequent sibling steps within the same parent pattern + * are guaranteed to match. + * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but + * for the entire top-level pattern. When iterating through a query's + * captures using `ts_query_cursor_next_capture`, this field is used to + * detect that a capture can safely be returned from a match that has not + * even completed yet. + */ +struct s_query_step +{ + t_symbol symbol; + t_symbol supertype_symbol; + t_field_id field; + uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; + uint16_t depth; + uint16_t alternative_index; + uint16_t negated_field_list_id; + bool is_named : 1; + bool is_immediate : 1; + bool is_last_child : 1; + bool is_pass_through : 1; + bool is_dead_end : 1; + bool alternative_is_immediate : 1; + bool contains_captures : 1; + bool root_pattern_guaranteed : 1; + bool parent_pattern_guaranteed : 1; +}; + +/* + * t_slice - A slice of an external array. Within a query, capture names, + * literal string values, and predicate step information are stored in three + * contiguous arrays. Individual captures, string values, and predicates are + * represented as slices of these three arrays. + */ +struct s_slice +{ + uint32_t offset; + uint32_t length; +}; + +/* + * t_symbol_table - a two-way mapping of strings to ids. + */ +struct s_symbol_table +{ + Array(char) characters; + Array(t_slice) slices; +}; + +/** + * CaptureQuantififers - a data structure holding the quantifiers of pattern + * captures. + */ + +/* + * t_pattern_entry - Information about the starting point for matching a + * particular pattern. These entries are stored in a 'pattern map' - a sorted + * array that makes it possible to efficiently lookup patterns based on the + * symbol for their first step. The entry consists of the following fields: + * - `pattern_index` - the index of the pattern within the query + * - `step_index` - the index of the pattern's first step in the shared `steps` + * array + * - `is_rooted` - whether or not the pattern has a single root node. This + * property affects decisions about whether or not to start the pattern for + * nodes outside of a QueryCursor's range restriction. + */ +struct s_pattern_entry +{ + uint16_t step_index; + uint16_t pattern_index; + bool is_rooted; +}; + +struct s_query_pattern +{ + t_slice steps; + t_slice predicate_steps; + uint32_t start_byte; + bool is_non_local; +}; + +struct s_step_offset +{ + uint32_t byte_offset; + uint16_t step_index; +}; + +/* + * t_query_state - The state of an in-progress match of a particular pattern + * in a query. While executing, a `t_query_cursor` must keep track of a number + * of possible in-progress matches. Each of those possible matches is + * represented as one of these states. Fields: + * - `id` - A numeric id that is exposed to the public API. This allows the + * caller to remove a given match, preventing any more of its captures + * from being returned. + * - `start_depth` - The depth in the tree where the first step of the state's + * pattern was matched. + * - `pattern_index` - The pattern that the state is matching. + * - `consumed_capture_count` - The number of captures from this match that + * have already been returned. + * - `capture_list_id` - A numeric id that can be used to retrieve the state's + * list of captures from the `t_capture_list_pool`. + * - `seeking_immediate_match` - A flag that indicates that the state's next + * step must be matched by the very next sibling. This is used when + * processing repetitions. + * - `has_in_progress_alternatives` - A flag that indicates that there is are + * other states that have the same captures as this state, but are at + * different steps in their pattern. This means that in order to obey the + * 'longest-match' rule, this state should not be returned as a match until + * it is clear that there can be no other alternative match with more + * captures. + */ +struct s_query_state +{ + uint32_t id; + uint32_t capture_list_id; + uint16_t start_depth; + uint16_t step_index; + uint16_t pattern_index; + uint16_t consumed_capture_count : 12; + bool seeking_immediate_match : 1; + bool has_in_progress_alternatives : 1; + bool dead : 1; + bool needs_parent : 1; +}; + +/* + * t_capture_list_pool - A collection of *lists* of captures. Each query state + * needs to maintain its own list of captures. To avoid repeated allocations, + * this struct maintains a fixed set of capture lists, and keeps track of which + * ones are currently in use by a query state. + */ +struct s_capture_list_pool +{ + Array(t_capture_list) list; + t_capture_list empty_list; + // The maximum number of capture lists that we are allowed to allocate. We + // never allow `list` to allocate more entries than this, dropping pending + // matches if needed to stay under the limit. + uint32_t max_capture_list_count; + // The number of capture lists allocated in `list` that are not currently in + // use. We reuse those existing-but-unused capture lists before trying to + // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture + // list's length to indicate that it's not in use. + uint32_t free_capture_list_count; +}; + +/* + * t_analysis_state - The state needed for walking the parse table when + * analyzing a query pattern, to determine at which steps the pattern might fail + * to match. + */ +struct s_analysis_state_entry +{ + t_state_id parse_state; + t_symbol parent_symbol; + uint16_t child_index; + t_field_id field_id : 15; + bool done : 1; +}; + +struct s_analysis_state +{ + t_analysis_state_entry stack[MAX_ANALYSIS_STATE_DEPTH]; + uint16_t depth; + uint16_t step_index; + t_symbol root_symbol; +}; + +struct s_query_analysis +{ + t_analysis_state_set states; + t_analysis_state_set next_states; + t_analysis_state_set deeper_states; + t_analysis_state_set state_pool; + Array(uint16_t) final_step_indices; + Array(t_symbol) finished_parent_symbols; + bool did_abort; +}; + +/* + * t_analysis_subgraph - A subset of the states in the parse table that are used + * in constructing nodes with a certain symbol. Each state is accompanied by + * some information about the possible node that could be produced in + * downstream states. + */ +struct s_analysis_subgraph_node +{ + t_state_id state; + uint16_t production_id; + uint8_t child_index : 7; + bool done : 1; +}; + +struct s_analysis_subgraph +{ + t_symbol symbol; + Array(t_state_id) start_states; + Array(t_analysis_subgraph_node) nodes; +}; + +/* + * t_state_predecessor_map - A map that stores the predecessors of each parse + * state. This is used during query analysis to determine which parse states can + * lead to which reduce actions. + */ + +struct s_state_predecessor_map +{ + t_state_id *contents; +}; + +/* + * t_parse_query - A tree query, compiled from a string of S-expressions. The + * query itself is immutable. The mutable state used in the process of executing + * the query is stored in a `t_query_cursor`. + */ +struct s_parse_query +{ + t_symbol_table captures; + t_symbol_table predicate_values; + Array(t_capture_quantifiers) capture_quantifiers; + Array(t_query_step) steps; + Array(t_pattern_entry) pattern_map; + Array(t_query_predicate_step) predicate_steps; + Array(t_query_pattern) patterns; + Array(t_step_offset) step_offsets; + Array(t_field_id) negated_fields; + Array(char) string_buffer; + Array(t_symbol) repeat_symbols_with_rootless_patterns; + const t_language *language; + uint16_t wildcard_root_pattern_count; +}; + +/* + * t_query_cursor - A stateful struct used to execute a query on a tree. + */ +struct s_query_cursor +{ + const t_parse_query *query; + t_tree_cursor cursor; + Array(t_query_state) states; + Array(t_query_state) finished_states; + t_capture_list_pool capture_list_pool; + uint32_t depth; + uint32_t max_start_depth; + uint32_t start_byte; + uint32_t end_byte; + t_point start_point; + t_point end_point; + uint32_t next_state_id; + bool on_visible_node; + bool ascending; + bool halted; + bool did_exceed_match_limit; +}; + +struct s_stack_link +{ + t_stack_node *node; + t_subtree subtree; + bool is_pending; +}; + +struct s_stack_node +{ + t_state_id state; + t_length position; + t_stack_link links[MAX_LINK_COUNT]; + short unsigned int link_count; + uint32_t ref_count; + unsigned error_cost; + unsigned node_count; + int dynamic_precedence; +}; + +struct s_stack_iterator +{ + t_stack_node *node; + t_subtree_array subtrees; + uint32_t subtree_count; + bool is_pending; +}; + +enum e_stack_status +{ + StackStatusActive, + StackStatusPaused, + StackStatusHalted, +}; + +struct s_stack_head +{ + t_stack_node *node; + t_stack_summary *summary; + unsigned node_count_at_last_error; + t_subtree last_external_token; + t_subtree lookahead_when_paused; + t_stack_status status; +}; + +struct s_stack +{ + Array(t_stack_head) heads; + t_stack_slice_array slices; + Array(t_stack_iterator) iterators; + t_stack_node_array node_pool; + t_stack_node *base_node; + t_subtree_pool *subtree_pool; +}; + +enum e_stack_action +{ + StackActionNone, + StackActionStop = 1, + StackActionPop = 2, +}; + +struct s_summarize_stack_session +{ + t_stack_summary *summary; + unsigned max_depth; +}; + +struct s_edit +{ + t_length start; + t_length old_end; + t_length new_end; +}; + +struct s_edit_entry +{ + t_subtree *tree; + t_edit edit; +}; + +struct s_cursor_child_iterator +{ + t_subtree parent; + const t_first_tree *tree; + t_length position; + uint32_t child_index; + uint32_t structural_child_index; + uint32_t descendant_index; + const t_symbol *alias_sequence; +}; + +#endif // STRUCTS_H \ No newline at end of file