#ifndef STRUCTS_H #define STRUCTS_H #include "./api.h" typedef unsigned t_stack_action; typedef struct s_analysis_state t_analysis_state; typedef struct s_analysis_state_entry t_analysis_state_entry; typedef struct s_analysis_subgraph t_analysis_subgraph; typedef struct s_analysis_subgraph_node t_analysis_subgraph_node; typedef struct s_capture_list_pool t_capture_list_pool; typedef struct s_cursor_child_iterator t_cursor_child_iterator; typedef struct s_edit t_edit; typedef struct s_edit_entry t_edit_entry; typedef struct s_error_status t_error_status; typedef struct s_first_parser t_first_parser; typedef struct s_iterator t_iterator; typedef struct s_node_child_iterator t_node_child_iterator; typedef struct s_parse_query t_parse_query; typedef struct s_pattern_entry t_pattern_entry; typedef struct s_query_analysis t_query_analysis; typedef struct s_query_cursor t_query_cursor; typedef struct s_query_pattern t_query_pattern; typedef struct s_query_state t_query_state; typedef struct s_query_step t_query_step; typedef struct s_slice t_slice; typedef struct s_stack t_stack; typedef struct s_stack_head t_stack_head; typedef struct s_stack_iterator t_stack_iterator; typedef struct s_stack_link t_stack_link; typedef struct s_stack_node t_stack_node; typedef struct s_state_predecessor_map t_state_predecessor_map; typedef struct s_step_offset t_step_offset; typedef struct s_stream t_stream; typedef struct s_string_input t_string_input; typedef struct s_summarize_stack_session t_summarize_stack_session; typedef struct s_symbol_table t_symbol_table; typedef struct s_token_cache t_token_cache; typedef t_stack_action (*t_stack_callback)(void *, const t_stack_iterator *); typedef uint32_t (*t_unicode_decode_function)(const uint8_t *chunk, uint32_t size, int32_t *codepoint); typedef Array(t_analysis_state *) t_analysis_state_set; typedef Array(t_analysis_subgraph) t_analysis_subgraph_array; typedef Array(t_query_capture) t_capture_list; typedef Array(t_stack_node *) t_stack_node_array; typedef Array(uint8_t) t_capture_quantifiers; typedef enum e_stack_status t_stack_status; typedef enum e_error_comparaison t_error_comparaison; typedef enum e_iterator_comparison t_iterator_comparison; struct s_iterator { t_tree_cursor cursor; const t_language *language; unsigned visible_depth; bool in_padding; }; enum e_iterator_comparison { IteratorDiffers, IteratorMayDiffer, IteratorMatches, }; struct s_node_child_iterator { t_subtree parent; const t_first_tree *tree; t_length position; uint32_t child_index; uint32_t structural_child_index; const t_symbol *alias_sequence; }; struct s_token_cache { t_subtree token; t_subtree last_external_token; uint32_t byte_index; }; struct s_first_parser { t_lexer lexer; t_stack *stack; t_subtree_pool tree_pool; const t_language *language; t_reduce_action_set reduce_actions; t_subtree finished_tree; t_subtree_array trailing_extras; t_subtree_array trailing_extras2; t_subtree_array scratch_trees; t_token_cache token_cache; t_reusable_node reusable_node; void *external_scanner_payload; t_parser_clock end_clock; t_parser_duration timeout_duration; unsigned accept_count; unsigned operation_count; const volatile size_t *cancellation_flag; t_subtree old_tree; t_range_array included_range_differences; unsigned included_range_difference_index; bool has_scanner_error; }; struct s_error_status { unsigned cost; unsigned node_count; int dynamic_precedence; bool is_in_error; }; enum e_error_comparaison { ErrorComparisonTakeLeft, ErrorComparisonPreferLeft, ErrorComparisonNone, ErrorComparisonPreferRight, ErrorComparisonTakeRight, }; struct s_string_input { const char *string; uint32_t length; }; /* * t_stream - A sequence of unicode characters derived from a UTF8 string. * This struct is used in parsing queries from S-expressions. */ struct s_stream { const char *input; const char *start; const char *end; int32_t next; uint8_t next_size; }; /* * t_query_step - A step in the process of matching a query. Each node within * a query S-expression corresponds to one of these steps. An entire pattern * is represented as a sequence of these steps. The basic properties of a * node are represented by these fields: * - `symbol` - The grammar symbol to match. A zero value represents the * wildcard symbol, '_'. * - `field` - The field name to match. A zero value means that a field name * was not specified. * - `capture_ids` - An array of integers representing the names of captures * associated with this node in the pattern, terminated by a `NONE` value. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. * - `negated_field_list_id` - An id representing a set of fields that must * not be present on a node matching this step. * * Steps have some additional fields in order to handle the `.` (or "anchor") * operator, which forbids additional child nodes: * - `is_immediate` - Indicates that the node matching this step cannot be * preceded by other sibling nodes that weren't specified in the pattern. * - `is_last_child` - Indicates that the node matching this step cannot have * any subsequent named siblings. * * For simple patterns, steps are matched in sequential order. But in order to * handle alternative/repeated/optional sub-patterns, query steps are not always * structured as a linear sequence; they sometimes need to split and merge. This * is done using the following fields: * - `alternative_index` - The index of a different query step that serves as * an alternative to this step. A `NONE` value represents no alternative. * When a query state reaches a step with an alternative index, the state * is duplicated, with one copy remaining at the original step, and one copy * moving to the alternative step. The alternative may have its own * alternative step, so this splitting is an iterative process. * - `is_dead_end` - Indicates that this state cannot be passed directly, and * exists only in order to redirect to an alternative index, with no * splitting. * - `is_pass_through` - Indicates that state has no matching logic of its own, * and exists only to split a state. One copy of the state advances * immediately to the next step, and one moves to the alternative step. * - `alternative_is_immediate` - Indicates that this step's alternative step * should be treated as if `is_immediate` is true. * * Steps also store some derived state that summarizes how they relate to other * steps within the same pattern. This is used to optimize the matching process: * - `contains_captures` - Indicates that this step or one of its child steps * has a non-empty `capture_ids` list. * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then * it and all of its subsequent sibling steps within the same parent pattern * are guaranteed to match. * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but * for the entire top-level pattern. When iterating through a query's * captures using `ts_query_cursor_next_capture`, this field is used to * detect that a capture can safely be returned from a match that has not * even completed yet. */ struct s_query_step { t_symbol symbol; t_symbol supertype_symbol; t_field_id field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; uint16_t depth; uint16_t alternative_index; uint16_t negated_field_list_id; bool is_named : 1; bool is_immediate : 1; bool is_last_child : 1; bool is_pass_through : 1; bool is_dead_end : 1; bool alternative_is_immediate : 1; bool contains_captures : 1; bool root_pattern_guaranteed : 1; bool parent_pattern_guaranteed : 1; }; /* * t_slice - A slice of an external array. Within a query, capture names, * literal string values, and predicate step information are stored in three * contiguous arrays. Individual captures, string values, and predicates are * represented as slices of these three arrays. */ struct s_slice { uint32_t offset; uint32_t length; }; /* * t_symbol_table - a two-way mapping of strings to ids. */ struct s_symbol_table { Array(char) characters; Array(t_slice) slices; }; /** * CaptureQuantififers - a data structure holding the quantifiers of pattern * captures. */ /* * t_pattern_entry - Information about the starting point for matching a * particular pattern. These entries are stored in a 'pattern map' - a sorted * array that makes it possible to efficiently lookup patterns based on the * symbol for their first step. The entry consists of the following fields: * - `pattern_index` - the index of the pattern within the query * - `step_index` - the index of the pattern's first step in the shared `steps` * array * - `is_rooted` - whether or not the pattern has a single root node. This * property affects decisions about whether or not to start the pattern for * nodes outside of a QueryCursor's range restriction. */ struct s_pattern_entry { uint16_t step_index; uint16_t pattern_index; bool is_rooted; }; struct s_query_pattern { t_slice steps; t_slice predicate_steps; uint32_t start_byte; bool is_non_local; }; struct s_step_offset { uint32_t byte_offset; uint16_t step_index; }; /* * t_query_state - The state of an in-progress match of a particular pattern * in a query. While executing, a `t_query_cursor` must keep track of a number * of possible in-progress matches. Each of those possible matches is * represented as one of these states. Fields: * - `id` - A numeric id that is exposed to the public API. This allows the * caller to remove a given match, preventing any more of its captures * from being returned. * - `start_depth` - The depth in the tree where the first step of the state's * pattern was matched. * - `pattern_index` - The pattern that the state is matching. * - `consumed_capture_count` - The number of captures from this match that * have already been returned. * - `capture_list_id` - A numeric id that can be used to retrieve the state's * list of captures from the `t_capture_list_pool`. * - `seeking_immediate_match` - A flag that indicates that the state's next * step must be matched by the very next sibling. This is used when * processing repetitions. * - `has_in_progress_alternatives` - A flag that indicates that there is are * other states that have the same captures as this state, but are at * different steps in their pattern. This means that in order to obey the * 'longest-match' rule, this state should not be returned as a match until * it is clear that there can be no other alternative match with more * captures. */ struct s_query_state { uint32_t id; uint32_t capture_list_id; uint16_t start_depth; uint16_t step_index; uint16_t pattern_index; uint16_t consumed_capture_count : 12; bool seeking_immediate_match : 1; bool has_in_progress_alternatives : 1; bool dead : 1; bool needs_parent : 1; }; /* * t_capture_list_pool - A collection of *lists* of captures. Each query state * needs to maintain its own list of captures. To avoid repeated allocations, * this struct maintains a fixed set of capture lists, and keeps track of which * ones are currently in use by a query state. */ struct s_capture_list_pool { Array(t_capture_list) list; t_capture_list empty_list; // The maximum number of capture lists that we are allowed to allocate. We // never allow `list` to allocate more entries than this, dropping pending // matches if needed to stay under the limit. uint32_t max_capture_list_count; // The number of capture lists allocated in `list` that are not currently in // use. We reuse those existing-but-unused capture lists before trying to // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture // list's length to indicate that it's not in use. uint32_t free_capture_list_count; }; /* * t_analysis_state - The state needed for walking the parse table when * analyzing a query pattern, to determine at which steps the pattern might fail * to match. */ struct s_analysis_state_entry { t_state_id parse_state; t_symbol parent_symbol; uint16_t child_index; t_field_id field_id : 15; bool done : 1; }; struct s_analysis_state { t_analysis_state_entry stack[MAX_ANALYSIS_STATE_DEPTH]; uint16_t depth; uint16_t step_index; t_symbol root_symbol; }; struct s_query_analysis { t_analysis_state_set states; t_analysis_state_set next_states; t_analysis_state_set deeper_states; t_analysis_state_set state_pool; Array(uint16_t) final_step_indices; Array(t_symbol) finished_parent_symbols; bool did_abort; }; /* * t_analysis_subgraph - A subset of the states in the parse table that are used * in constructing nodes with a certain symbol. Each state is accompanied by * some information about the possible node that could be produced in * downstream states. */ struct s_analysis_subgraph_node { t_state_id state; uint16_t production_id; uint8_t child_index : 7; bool done : 1; }; struct s_analysis_subgraph { t_symbol symbol; Array(t_state_id) start_states; Array(t_analysis_subgraph_node) nodes; }; /* * t_state_predecessor_map - A map that stores the predecessors of each parse * state. This is used during query analysis to determine which parse states can * lead to which reduce actions. */ struct s_state_predecessor_map { t_state_id *contents; }; /* * t_parse_query - A tree query, compiled from a string of S-expressions. The * query itself is immutable. The mutable state used in the process of executing * the query is stored in a `t_query_cursor`. */ struct s_parse_query { t_symbol_table captures; t_symbol_table predicate_values; Array(t_capture_quantifiers) capture_quantifiers; Array(t_query_step) steps; Array(t_pattern_entry) pattern_map; Array(t_query_predicate_step) predicate_steps; Array(t_query_pattern) patterns; Array(t_step_offset) step_offsets; Array(t_field_id) negated_fields; Array(char) string_buffer; Array(t_symbol) repeat_symbols_with_rootless_patterns; const t_language *language; uint16_t wildcard_root_pattern_count; }; /* * t_query_cursor - A stateful struct used to execute a query on a tree. */ struct s_query_cursor { const t_parse_query *query; t_tree_cursor cursor; Array(t_query_state) states; Array(t_query_state) finished_states; t_capture_list_pool capture_list_pool; uint32_t depth; uint32_t max_start_depth; uint32_t start_byte; uint32_t end_byte; t_point start_point; t_point end_point; uint32_t next_state_id; bool on_visible_node; bool ascending; bool halted; bool did_exceed_match_limit; }; struct s_stack_link { t_stack_node *node; t_subtree subtree; bool is_pending; }; struct s_stack_node { t_state_id state; t_length position; t_stack_link links[MAX_LINK_COUNT]; short unsigned int link_count; uint32_t ref_count; unsigned error_cost; unsigned node_count; int dynamic_precedence; }; struct s_stack_iterator { t_stack_node *node; t_subtree_array subtrees; uint32_t subtree_count; bool is_pending; }; enum e_stack_status { StackStatusActive, StackStatusPaused, StackStatusHalted, }; struct s_stack_head { t_stack_node *node; t_stack_summary *summary; unsigned node_count_at_last_error; t_subtree last_external_token; t_subtree lookahead_when_paused; t_stack_status status; }; struct s_stack { Array(t_stack_head) heads; t_stack_slice_array slices; Array(t_stack_iterator) iterators; t_stack_node_array node_pool; t_stack_node *base_node; t_subtree_pool *subtree_pool; }; enum e_stack_action { StackActionNone, StackActionStop = 1, StackActionPop = 2, }; struct s_summarize_stack_session { t_stack_summary *summary; unsigned max_depth; }; struct s_edit { t_length start; t_length old_end; t_length new_end; }; struct s_edit_entry { t_subtree *tree; t_edit edit; }; struct s_cursor_child_iterator { t_subtree parent; const t_first_tree *tree; t_length position; uint32_t child_index; uint32_t structural_child_index; uint32_t descendant_index; const t_symbol *alias_sequence; }; #endif // STRUCTS_H