started to work again on the parser

2024-05-30 15:38:35 +02:00 · 2024-05-30 15:38:35 +02:00 · f5e048d02e
commit f5e048d02e
parent ffc7a2d0fc
5 changed files with 1293 additions and 1192 deletions
--- a/parser/src/api.h
+++ b/parser/src/api.h
@ -1,6 +1,7 @@
 #ifndef TREE_SITTER_ARRAY_H_
 #define TREE_SITTER_ARRAY_H_

+#include "me/char/char.h"
 #include "me/mem/mem.h"
 #include <assert.h>
 #include <limits.h>
@ -8,7 +9,9 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
-#include "me/char/char.h"
+
+#include "./api_structs.h"
+#include "./array.h"

 #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
 #define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14
@ -29,14 +32,14 @@
 #define MAX_ITERATOR_COUNT 64
 #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
 #define TS_MAX_TREE_POOL_SIZE 32
-#define ts_builtin_sym_error ((t_symbol)-1)
+#define ts_builtin_sym_error ((t_symbol) - 1)
 #define ts_builtin_sym_end 0
-#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
 #define POINT_ZERO ((t_point){0, 0})
 #define POINT_MAX ((t_point){UINT32_MAX, UINT32_MAX})
 #define TS_TREE_STATE_NONE USHRT_MAX
 #define NULL_SUBTREE ((t_subtree){.ptr = NULL})
-#define STACK_VERSION_NONE ((t_stack_version)-1)
+#define STACK_VERSION_NONE ((t_stack_version) - 1)
 #define TS_DECODE_ERROR (-1)

 #if true
@ -51,152 +54,6 @@
 # define free(p) mem_free((p))
 #endif

-#define Array(T)                                                               \
-	struct                                                                     \
-	{                                                                          \
-		T		*contents;                                                     \
-		uint32_t size;                                                         \
-		uint32_t capacity;                                                     \
-	}
-
-#ifndef inline
-# define inline __inline__
-#endif
-
-/// Initialize an array.
-#define array_init(self)                                                       \
-	((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
-
-/// Create an empty array.
-#define array_new()                                                            \
-	{                                                                          \
-		NULL, 0, 0                                                             \
-	}
-
-/// Get a pointer to the element at a given `index` in the array.
-#define array_get(self, _index)                                                \
-	(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
-
-/// Get a pointer to the first element in the array.
-#define array_front(self) array_get(self, 0)
-
-/// Get a pointer to the last element in the array.
-#define array_back(self) array_get(self, (self)->size - 1)
-
-/// Clear the array, setting its size to zero. Note that this does not free any
-/// memory allocated for the array's contents.
-#define array_clear(self) ((self)->size = 0)
-
-/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
-/// less than the array's current capacity, this function has no effect.
-#define array_reserve(self, new_capacity)                                      \
-	_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
-
-/// Free any memory allocated for this array. Note that this does not free any
-/// memory allocated for the array's contents.
-#define array_delete(self) _array__delete((Array *)(self))
-
-/// Push a new `element` onto the end of the array.
-#define array_push(self, element)                                              \
-	(_array__grow((Array *)(self), 1, array_elem_size(self)),                  \
-	 (self)->contents[(self)->size++] = (element))
-
-/// Increase the array's size by `count` elements.
-/// New elements are zero-initialized.
-#define array_grow_by(self, count)                                             \
-	do                                                                         \
-	{                                                                          \
-		if ((count) == 0)                                                      \
-			break;                                                             \
-		_array__grow((Array *)(self), count, array_elem_size(self));           \
-		memset((self)->contents + (self)->size, 0,                             \
-			   (count) * array_elem_size(self));                               \
-		(self)->size += (count);                                               \
-	} while (0)
-
-/// Append all elements from one array to the end of another.
-#define array_push_all(self, other)                                            \
-	array_extend((self), (other)->size, (other)->contents)
-
-/// Append `count` elements to the end of the array, reading their values from
-/// the `contents` pointer.
-#define array_extend(self, count, contents)                                    \
-	_array__splice((Array *)(self), array_elem_size(self), (self)->size, 0,    \
-				   count, contents)
-
-/// Remove `old_count` elements from the array starting at the given `index`. At
-/// the same index, insert `new_count` new elements, reading their values from
-/// the `new_contents` pointer.
-#define array_splice(self, _index, old_count, new_count, new_contents)         \
-	_array__splice((Array *)(self), array_elem_size(self), _index, old_count,  \
-				   new_count, new_contents)
-
-/// Insert one `element` into the array at the given `index`.
-#define array_insert(self, _index, element)                                    \
-	_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1,       \
-				   &(element))
-
-/// Remove one element from the array at the given `index`.
-#define array_erase(self, _index)                                              \
-	_array__erase((Array *)(self), array_elem_size(self), _index)
-
-/// Pop the last element off the array, returning the element by value.
-#define array_pop(self) ((self)->contents[--(self)->size])
-
-/// Assign the contents of one array to another, reallocating if necessary.
-#define array_assign(self, other)                                              \
-	_array__assign((Array *)(self), (const Array *)(other),                    \
-				   array_elem_size(self))
-
-/// Swap one array with another
-#define array_swap(self, other) _array__swap((Array *)(self), (Array *)(other))
-
-/// Get the size of the array contents
-#define array_elem_size(self) (sizeof *(self)->contents)
-
-/// Search a sorted array for a given `needle` value, using the given `compare`
-/// callback to determine the order.
-///
-/// If an existing element is found to be equal to `needle`, then the `index`
-/// out-parameter is set to the existing value's index, and the `exists`
-/// out-parameter is set to true. Otherwise, `index` is set to an index where
-/// `needle` should be inserted in order to preserve the sorting, and `exists`
-/// is set to false.
-#define array_search_sorted_with(self, compare, needle, _index, _exists)       \
-	_array__search_sorted(self, 0, compare, , needle, _index, _exists)
-
-/// Search a sorted array for a given `needle` value, using integer comparisons
-/// of a given struct field (specified with a leading dot) to determine the
-/// order.
-///
-/// See also `array_search_sorted_with`.
-#define array_search_sorted_by(self, field, needle, _index, _exists)           \
-	_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
-
-/// Insert a given `value` into a sorted array, using the given `compare`
-/// callback to determine the order.
-#define array_insert_sorted_with(self, compare, value)                         \
-	do                                                                         \
-	{                                                                          \
-		unsigned _index, _exists;                                              \
-		array_search_sorted_with(self, compare, &(value), &_index, &_exists);  \
-		if (!_exists)                                                          \
-			array_insert(self, _index, value);                                 \
-	} while (0)
-
-/// Insert a given `value` into a sorted array, using integer comparisons of
-/// a given struct field (specified with a leading dot) to determine the order.
-///
-/// See also `array_search_sorted_by`.
-#define array_insert_sorted_by(self, field, value)                             \
-	do                                                                         \
-	{                                                                          \
-		unsigned _index, _exists;                                              \
-		array_search_sorted_by(self, field, (value)field, &_index, &_exists);  \
-		if (!_exists)                                                          \
-			array_insert(self, _index, value);                                 \
-	} while (0)
-
 // Get a subtree's children, which are allocated immediately before the
 // tree's own heap data.
 #define ts_subtree_children(self)                                              \
@ -204,153 +61,6 @@
 		 ? NULL                                                                \
 		 : (t_subtree *)((self).ptr) - (self).ptr->child_count)

-typedef uint16_t					t_state_id;
-typedef uint16_t					t_symbol;
-typedef uint16_t					t_field_id;
-typedef struct s_language			t_language;
-typedef struct s_first_parser		t_first_parser;
-typedef struct s_first_tree			t_first_tree;
-typedef struct s_parse_query		t_parse_query;
-typedef struct s_query_cursor		t_query_cursor;
-typedef struct s_lookahead_iterator t_lookahead_iterator;
-
-typedef struct s_point
-{
-	uint32_t row;
-	uint32_t column;
-} t_point;
-
-typedef struct s_length
-{
-	uint32_t bytes;
-	t_point	 extent;
-} t_length;
-
-typedef enum e_input_encoding
-{
-	TSInputEncodingUTF8,
-	TSInputEncodingUTF16,
-} t_input_encoding;
-
-typedef enum e_symbol_type
-{
-	TSSymbolTypeRegular,
-	TSSymbolTypeAnonymous,
-	TSSymbolTypeAuxiliary,
-} t_symbol_type;
-
-typedef struct s_parse_range
-{
-	t_point	 start_point;
-	t_point	 end_point;
-	uint32_t start_byte;
-	uint32_t end_byte;
-} t_parse_range;
-
-typedef struct s_parse_input
-{
-	void *payload;
-	const char *(*read)(void *payload, uint32_t byte_index, t_point position,
-						uint32_t *bytes_read);
-	t_input_encoding encoding;
-} t_parse_input;
-
-typedef enum e_log_type
-{
-	TSLogTypeParse,
-	TSLogTypeLex,
-} t_log_type;
-
-typedef struct s_parse_logger
-{
-	void *payload;
-	void (*log)(void *payload, t_log_type log_type, const char *buffer);
-} t_parse_logger;
-
-typedef struct s_input_edit
-{
-	uint32_t start_byte;
-	uint32_t old_end_byte;
-	uint32_t new_end_byte;
-	t_point	 start_point;
-	t_point	 old_end_point;
-	t_point	 new_end_point;
-} t_input_edit;
-
-typedef struct s_parse_node
-{
-	uint32_t			context[4];
-	const void		   *id;
-	const t_first_tree *tree;
-} t_parse_node;
-
-typedef struct s_tree_cursor_entry
-{
-	const union u_subtree *subtree;
-	t_length			   position;
-	uint32_t			   child_index;
-	uint32_t			   structural_child_index;
-	uint32_t			   descendant_index;
-} t_tree_cursor_entry;
-
-typedef struct s_tree_cursor
-{
-	const t_first_tree *tree;
-	Array(t_tree_cursor_entry) stack;
-	t_symbol root_alias_symbol;
-} t_tree_cursor;
-
-typedef struct s_query_capture
-{
-	t_parse_node node;
-	uint32_t	 index;
-} t_query_capture;
-
-typedef enum e_quantifier
-{
-	TSQuantifierZero = 0, // must match the array initialization value
-	TSQuantifierZeroOrOne,
-	TSQuantifierZeroOrMore,
-	TSQuantifierOne,
-	TSQuantifierOneOrMore,
-} t_quantifier;
-
-typedef struct s_query_match
-{
-	uint32_t			   id;
-	uint16_t			   pattern_index;
-	uint16_t			   capture_count;
-	const t_query_capture *captures;
-} t_query_match;
-
-typedef enum e_query_predicate_step_type
-{
-	TSQueryPredicateStepTypeDone,
-	TSQueryPredicateStepTypeCapture,
-	TSQueryPredicateStepTypeString,
-} t_query_predicate_step_type;
-
-typedef struct s_query_predicate_step
-{
-	t_query_predicate_step_type type;
-	uint32_t					value_id;
-} t_query_predicate_step;
-
-typedef enum e_query_error
-{
-	TSQueryErrorNone = 0,
-	TSQueryErrorSyntax,
-	TSQueryErrorNodeType,
-	TSQueryErrorField,
-	TSQueryErrorCapture,
-	TSQueryErrorStructure,
-	TSQueryErrorLanguage,
-} t_query_error;
-
-// Private
-
-typedef Array(void) Array;
-
 /// This is not what you're looking for, see `array_delete`.
 static inline void _array__delete(Array *self)
 {
@ -497,10 +207,6 @@ static inline void _array__splice(Array *self, size_t element_size,
 /// function above.
 #define _compare_int(a, b) ((int)*(a) - (int)(b))

-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-
 static inline size_t atomic_load(const volatile size_t *p)
 {
 #ifdef __ATOMIC_RELAXED
@ -528,25 +234,6 @@ static inline uint32_t atomic_dec(volatile uint32_t *p)
 #endif
 }

-// The serialized state of an external scanner.
-//
-// Every time an external token subtree is created after a call to an
-// external scanner, the scanner's `serialize` function is called to
-// retrieve a serialized copy of its state. The bytes are then copied
-// onto the subtree itself so that the scanner's state can later be
-// restored using its `deserialize` function.
-//
-// Small byte arrays are stored inline, and long ones are allocated
-// separately on the heap.
-typedef struct
-{
-	union {
-		char *long_data;
-		char  short_data[24];
-	};
-	uint32_t length;
-} t_external_scanner_state;
-
 // A compact representation of a subtree.
 //
 // This representation is used for small leaf nodes that are not
@ -558,161 +245,12 @@ typedef struct
 // Because of alignment, for any valid pointer this will be 0, giving
 // us the opportunity to make use of this bit to signify whether to use
 // the pointer or the inline struct.
-typedef struct s_subtree_inline_data t_subtree_inline_data;
-
-#define SUBTREE_BITS                                                           \
-	bool visible : 1;                                                          \
-	bool named : 1;                                                            \
-	bool extra : 1;                                                            \
-	bool has_changes : 1;                                                      \
-	bool is_missing : 1;                                                       \
-	bool is_keyword : 1;
-
-#define SUBTREE_SIZE                                                           \
-	uint8_t padding_columns;                                                   \
-	uint8_t padding_rows : 4;                                                  \
-	uint8_t lookahead_bytes : 4;                                               \
-	uint8_t padding_bytes;                                                     \
-	uint8_t size_bytes;
-
-#if TS_BIG_ENDIAN
-# if TS_PTR_SIZE == 32
-
-struct s_subtree_inline_data
-{
-	uint16_t parse_state;
-	uint8_t	 symbol;
-	SUBTREE_BITS
-	bool unused : 1;
-	bool is_inline : 1;
-	SUBTREE_SIZE
-};
-
-# else
-
-struct s_subtree_inline_data
-{
-	SUBTREE_SIZE
-	uint16_t parse_state;
-	uint8_t	 symbol;
-	SUBTREE_BITS
-	bool unused : 1;
-	bool is_inline : 1;
-};
-
-# endif
-#else
-
-struct s_subtree_inline_data
-{
-	bool is_inline : 1;
-	SUBTREE_BITS
-	uint8_t	 symbol;
-	uint16_t parse_state;
-	SUBTREE_SIZE
-};
-
-#endif
-
-#undef SUBTREE_BITS
-#undef SUBTREE_SIZE

 // A heap-allocated representation of a subtree.
 //
 // This representation is used for parent nodes, external tokens,
 // errors, and other leaf nodes whose data is too large to fit into
 // the inline representation.
-typedef struct s_subtree_heap_data
-{
-	volatile uint32_t ref_count;
-	t_length		  padding;
-	t_length		  size;
-	uint32_t		  lookahead_bytes;
-	uint32_t		  error_cost;
-	uint32_t		  child_count;
-	t_symbol		  symbol;
-	t_state_id		  parse_state;
-
-	bool visible : 1;
-	bool named : 1;
-	bool extra : 1;
-	bool fragile_left : 1;
-	bool fragile_right : 1;
-	bool has_changes : 1;
-	bool has_external_tokens : 1;
-	bool has_external_scanner_state_change : 1;
-	bool depends_on_column : 1;
-	bool is_missing : 1;
-	bool is_keyword : 1;
-
-	union {
-		// Non-terminal subtrees (`child_count > 0`)
-		struct
-		{
-			uint32_t visible_child_count;
-			uint32_t named_child_count;
-			uint32_t visible_descendant_count;
-			int32_t	 dynamic_precedence;
-			uint16_t repeat_depth;
-			uint16_t production_id;
-			struct
-			{
-				t_symbol   symbol;
-				t_state_id parse_state;
-			} first_leaf;
-		};
-
-		// External terminal subtrees (`child_count == 0 &&
-		// has_external_tokens`)
-		t_external_scanner_state external_scanner_state;
-
-		// Error terminal subtrees (`child_count == 0 && symbol ==
-		// ts_builtin_sym_error`)
-		int32_t lookahead_char;
-	};
-} t_subtree_heap_data;
-
-// The fundamental building block of a syntax tree.
-typedef union u_subtree {
-	t_subtree_inline_data	   data;
-	const t_subtree_heap_data *ptr;
-} t_subtree;
-
-// Like t_subtree, but mutable.
-typedef union u_mutable_subtree {
-	t_subtree_inline_data data;
-	t_subtree_heap_data	 *ptr;
-} t_mutable_subtree;
-
-typedef Array(t_subtree) t_subtree_array;
-typedef Array(t_mutable_subtree) t_mutable_subtree_array;
-
-typedef struct
-{
-	t_mutable_subtree_array free_trees;
-	t_mutable_subtree_array tree_stack;
-} t_subtree_pool;
-
-typedef Array(t_parse_range) t_range_array;
-
-typedef union u_parse_action {
-	struct
-	{
-		uint8_t	   type;
-		t_state_id state;
-		bool	   extra;
-		bool	   repetition;
-	} shift;
-	struct
-	{
-		uint8_t	 type;
-		uint8_t	 child_count;
-		t_symbol symbol;
-		int16_t	 dynamic_precedence;
-		uint16_t production_id;
-	} reduce;
-	uint8_t type;
-} t_parse_action;

 void ts_range_array_get_changed_ranges(const t_parse_range *old_ranges,
 									   unsigned				old_range_count,
@ -728,133 +266,6 @@ unsigned ts_subtree_get_changed_ranges(
 	t_tree_cursor *cursor1, t_tree_cursor *cursor2, const t_language *language,
 	const t_range_array *included_range_differences, t_parse_range **ranges);

-typedef struct s_table_entry
-{
-	const t_parse_action *actions;
-	uint32_t			  action_count;
-	bool				  is_reusable;
-} t_table_entry;
-
-typedef struct s_lookahead_iterator
-{
-	const t_language *language;
-	const uint16_t	 *data;
-	const uint16_t	 *group_end;
-	t_state_id		  state;
-	uint16_t		  table_value;
-	uint16_t		  section_index;
-	uint16_t		  group_count;
-	bool			  is_small_state;
-
-	const t_parse_action *actions;
-	t_symbol			  symbol;
-	t_state_id			  next_state;
-	uint16_t			  action_count;
-} t_lookahead_iterator;
-
-typedef struct s_symbol_metadata
-{
-	bool visible;
-	bool named;
-	bool supertype;
-} t_symbol_metadata;
-
-typedef enum e_parse_action_type
-{
-	TSParseActionTypeShift,
-	TSParseActionTypeReduce,
-	TSParseActionTypeAccept,
-	TSParseActionTypeRecover,
-} t_parse_action_type;
-
-typedef union u_parse_action_entry {
-	t_parse_action action;
-	struct
-	{
-		uint8_t count;
-		bool	reusable;
-	} entry;
-} t_parse_action_entry;
-
-typedef struct s_field_map_entry
-{
-	t_field_id field_id;
-	uint8_t	   child_index;
-	bool	   inherited;
-} t_field_map_entry;
-
-typedef struct s_field_map_slice
-{
-	uint16_t index;
-	uint16_t length;
-} t_field_map_slice;
-
-typedef struct s_lexer_data t_lexer_data;
-
-struct s_lexer_data
-{
-	int32_t	 lookahead;
-	t_symbol result_symbol;
-	void (*advance)(t_lexer_data *, bool);
-	void (*mark_end)(t_lexer_data *);
-	uint32_t (*get_column)(t_lexer_data *);
-	bool (*is_at_included_range_start)(const t_lexer_data *);
-	bool (*eof)(const t_lexer_data *);
-};
-
-typedef struct s_lex_mode
-{
-	uint16_t lex_state;
-	uint16_t external_lex_state;
-} t_lex_mode;
-
-typedef struct s_char_range
-{
-	int32_t start;
-	int32_t end;
-} t_char_range;
-
-struct s_language
-{
-	uint32_t					version;
-	uint32_t					symbol_count;
-	uint32_t					alias_count;
-	uint32_t					token_count;
-	uint32_t					external_token_count;
-	uint32_t					state_count;
-	uint32_t					large_state_count;
-	uint32_t					production_id_count;
-	uint32_t					field_count;
-	uint16_t					max_alias_sequence_length;
-	const uint16_t			   *parse_table;
-	const uint16_t			   *small_parse_table;
-	const uint32_t			   *small_parse_table_map;
-	const t_parse_action_entry *parse_actions;
-	const char *const		   *symbol_names;
-	const char *const		   *field_names;
-	const t_field_map_slice	   *field_map_slices;
-	const t_field_map_entry	   *field_map_entries;
-	const t_symbol_metadata	   *symbol_metadata;
-	const t_symbol			   *public_symbol_map;
-	const uint16_t			   *alias_map;
-	const t_symbol			   *alias_sequences;
-	const t_lex_mode		   *lex_modes;
-	bool (*lex_fn)(t_lexer_data *, t_state_id);
-	bool (*keyword_lex_fn)(t_lexer_data *, t_state_id);
-	t_symbol keyword_capture_token;
-	struct
-	{
-		const bool	   *states;
-		const t_symbol *symbol_map;
-		void *(*create)(void);
-		void (*destroy)(void *);
-		bool (*scan)(void *, t_lexer_data *, const bool *symbol_whitelist);
-		unsigned (*serialize)(void *, char *);
-		void (*deserialize)(void *, const char *, unsigned);
-	} external_scanner;
-	const t_state_id *primary_state_ids;
-};
-
 void ts_language_table_entry(const t_language *, t_state_id, t_symbol,
 							 t_table_entry *);

@ -1167,28 +578,6 @@ static inline t_length length_saturating_sub(t_length len1, t_length len2)
 	}
 }

-typedef struct s_lexer
-{
-	t_lexer_data data;
-	t_length	 current_position;
-	t_length	 token_start_position;
-	t_length	 token_end_position;
-
-	t_parse_range *included_ranges;
-	const char	  *chunk;
-	t_parse_input  input;
-	t_parse_logger logger;
-
-	uint32_t included_range_count;
-	uint32_t current_included_range_index;
-	uint32_t chunk_start;
-	uint32_t chunk_size;
-	uint32_t lookahead_size;
-	bool	 did_get_column;
-
-	char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
-} t_lexer;
-
 void ts_lexer_init(t_lexer *);
 void ts_lexer_delete(t_lexer *);
 void ts_lexer_set_input(t_lexer *, t_parse_input);
@ -1288,16 +677,6 @@ static inline t_point point_max(t_point a, t_point b)
 		return b;
 }

-typedef struct s_reduce_action
-{
-	uint32_t	   count;
-	t_symbol	   symbol;
-	int			   dynamic_precedence;
-	unsigned short production_id;
-} t_reduce_action;
-
-typedef Array(t_reduce_action) t_reduce_action_set;
-
 static inline void ts_reduce_action_set_add(t_reduce_action_set *self,
 											t_reduce_action		 new_action)
 {
@ -1311,19 +690,6 @@ static inline void ts_reduce_action_set_add(t_reduce_action_set *self,
 	array_push(self, new_action);
 }

-typedef struct s_stack_entry
-{
-	t_subtree tree;
-	uint32_t  child_index;
-	uint32_t  byte_offset;
-} t_stack_entry;
-
-typedef struct s_reusable_node
-{
-	Array(t_stack_entry) stack;
-	t_subtree last_external_token;
-} t_reusable_node;
-
 static inline t_reusable_node reusable_node_new(void)
 {
 	return (t_reusable_node){array_new(), NULL_SUBTREE};
@ -1433,25 +799,6 @@ static inline void reusable_node_reset(t_reusable_node *self, t_subtree tree)
 	}
 }

-typedef struct s_stack t_stack;
-
-typedef unsigned t_stack_version;
-
-typedef struct s_stack_slice
-{
-	t_subtree_array subtrees;
-	t_stack_version version;
-} t_stack_slice;
-typedef Array(t_stack_slice) t_stack_slice_array;
-
-typedef struct s_stack_summary_entry
-{
-	t_length   position;
-	unsigned   depth;
-	t_state_id state;
-} t_stack_summary_entry;
-typedef Array(t_stack_summary_entry) t_stack_summary;
-
 // Create a stack.
 t_stack *ts_stack_new(t_subtree_pool *);

@ -1547,8 +894,6 @@ void ts_stack_remove_version(t_stack *, t_stack_version);

 void ts_stack_clear(t_stack *);

-typedef void (*StackIterateCallback)(void *, t_state_id, uint32_t);
-
 void ts_external_scanner_state_init(t_external_scanner_state *, const char *,
 									unsigned);
 const char *ts_external_scanner_state_data(const t_external_scanner_state *);
@ -1835,13 +1180,6 @@ static inline t_mutable_subtree ts_subtree_to_mut_unsafe(t_subtree self)
 	return result;
 }

-typedef enum e_tree_cursor_step
-{
-	TreeCursorStepNone,
-	TreeCursorStepHidden,
-	TreeCursorStepVisible,
-} t_tree_cursor_step;
-
 void ts_tree_cursor_init(t_tree_cursor *, t_parse_node);
 void ts_tree_cursor_current_status(const t_tree_cursor *, t_field_id *, bool *,
 								   bool *, bool *, t_symbol *, unsigned *);
@ -1859,28 +1197,9 @@ static inline t_subtree ts_tree_cursor_current_subtree(

 t_parse_node ts_tree_cursor_parent_node(const t_tree_cursor *);

-typedef struct s_parent_cache_entry
-{
-	const t_subtree *child;
-	const t_subtree *parent;
-	t_length		 position;
-	t_symbol		 alias_symbol;
-} t_parent_cache_entry;
-
-struct s_first_tree
-{
-	t_subtree		  root;
-	const t_language *language;
-	t_parse_range	 *included_ranges;
-	unsigned		  included_range_count;
-};
-
 t_first_tree *ts_tree_new(t_subtree root, const t_language *language,
 						  const t_parse_range *, unsigned);
 t_parse_node  ts_node_new(const t_first_tree *, const t_subtree *, t_length,
 						  t_symbol);

-typedef uint64_t t_parser_clock;
-typedef uint64_t t_parser_duration;
-
 #endif // TREE_SITTER_TREE_H_
--- a/parser/src/api_structs.h
+++ b/parser/src/api_structs.h
@ -0,0 +1,590 @@
+#ifndef API_STRUCTS_H
+#define API_STRUCTS_H
+
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#include "./array.h"
+#include "me/types.h"
+#include <stdint.h>
+
+typedef uint16_t t_state_id;
+typedef uint16_t t_symbol;
+typedef uint16_t t_field_id;
+typedef unsigned t_stack_version;
+typedef uint64_t t_parser_clock;
+typedef uint64_t t_parser_duration;
+
+typedef union u_parse_action_entry t_parse_action_entry;
+typedef union u_subtree			   t_subtree;
+typedef union u_mutable_subtree	   t_mutable_subtree;
+typedef union u_parse_action	   t_parse_action;
+
+typedef struct s_language				t_language;
+typedef struct s_first_parser			t_first_parser;
+typedef struct s_first_tree				t_first_tree;
+typedef struct s_parse_query			t_parse_query;
+typedef struct s_query_cursor			t_query_cursor;
+typedef struct s_lookahead_iterator		t_lookahead_iterator;
+typedef struct s_point					t_point;
+typedef struct s_length					t_length;
+typedef struct s_parse_range			t_parse_range;
+typedef struct s_parse_input			t_parse_input;
+typedef struct s_parse_logger			t_parse_logger;
+typedef struct s_input_edit				t_input_edit;
+typedef struct s_parse_node				t_parse_node;
+typedef struct s_tree_cursor			t_tree_cursor;
+typedef struct s_query_capture			t_query_capture;
+typedef struct s_query_match			t_query_match;
+typedef struct s_query_predicate_step	t_query_predicate_step;
+typedef struct s_subtree_inline_data	t_subtree_inline_data;
+typedef struct s_subtree_heap_data		t_subtree_heap_data;
+typedef struct s_subtree_pool			t_subtree_pool;
+typedef struct s_table_entry			t_table_entry;
+typedef struct s_symbol_metadata		t_symbol_metadata;
+typedef struct s_field_map_entry		t_field_map_entry;
+typedef struct s_field_map_slice		t_field_map_slice;
+typedef struct s_lexer_data				t_lexer_data;
+typedef struct s_lex_mode				t_lex_mode;
+typedef struct s_char_range				t_char_range;
+typedef struct s_tree_cursor_entry		t_tree_cursor_entry;
+typedef struct s_external_scanner_state t_external_scanner_state;
+typedef struct s_parse_query_cursor		t_parse_query_cursor;
+typedef struct s_parse_query_error		t_parse_query_error;
+typedef struct s_parse_query_error_cost t_parse_query_error_cost;
+typedef struct s_lexer_data				t_lexer_data;
+typedef struct s_subtree_inline_data	t_subtree_inline_data;
+typedef struct s_subtree_heap_data		t_subtree_heap_data;
+typedef struct s_subtree_pool			t_subtree_pool;
+typedef struct s_table_entry			t_table_entry;
+typedef struct s_symbol_metadata		t_symbol_metadata;
+typedef struct s_field_map_entry		t_field_map_entry;
+typedef struct s_field_map_slice		t_field_map_slice;
+typedef struct s_lexer_data				t_lexer_data;
+typedef struct s_lex_mode				t_lex_mode;
+typedef struct s_lexer					t_lexer;
+typedef struct s_parse_state			t_parse_state;
+typedef struct s_reduce_action			t_reduce_action;
+typedef struct s_stack_entry			t_stack_entry;
+typedef struct s_reusable_node			t_reusable_node;
+typedef struct s_stack_summary_entry	t_stack_summary_entry;
+typedef struct s_stack					t_stack;
+typedef struct s_stack_slice			t_stack_slice;
+
+typedef enum e_input_encoding			 t_input_encoding;
+typedef enum e_symbol_type				 t_symbol_type;
+typedef enum e_log_type					 t_log_type;
+typedef enum e_quantifier				 t_quantifier;
+typedef enum e_query_error				 t_query_error;
+typedef enum e_query_predicate_step_type t_query_predicate_step_type;
+typedef enum e_parse_action_type		 t_parse_action_type;
+
+typedef Array(t_parse_range) t_range_array;
+typedef Array(t_subtree) t_subtree_array;
+typedef Array(t_mutable_subtree) t_mutable_subtree_array;
+typedef Array(t_reduce_action) t_reduce_action_set;
+typedef Array(void) Array;
+typedef Array(t_stack_slice) t_stack_slice_array;
+typedef Array(t_stack_summary_entry) t_stack_summary;
+typedef void (*StackIterateCallback)(void *, t_state_id, uint32_t);
+
+struct s_point
+{
+	uint32_t row;
+	uint32_t column;
+};
+
+struct s_length
+{
+	uint32_t bytes;
+	t_point	 extent;
+};
+
+struct s_stack_slice
+{
+	t_subtree_array subtrees;
+	t_stack_version version;
+};
+
+struct s_stack_summary_entry
+{
+	t_length   position;
+	unsigned   depth;
+	t_state_id state;
+};
+
+enum e_input_encoding
+{
+	TSInputEncodingUTF8,
+	TSInputEncodingUTF16,
+};
+
+enum e_symbol_type
+{
+	TSSymbolTypeRegular,
+	TSSymbolTypeAnonymous,
+	TSSymbolTypeAuxiliary,
+};
+
+struct s_parse_range
+{
+	t_point	 start_point;
+	t_point	 end_point;
+	uint32_t start_byte;
+	uint32_t end_byte;
+};
+
+struct s_parse_input
+{
+	void *payload;
+	const char *(*read)(void *payload, uint32_t byte_index, t_point position,
+						uint32_t *bytes_read);
+	t_input_encoding encoding;
+};
+
+enum e_log_type
+{
+	TSLogTypeParse,
+	TSLogTypeLex,
+};
+
+struct s_parse_logger
+{
+	void *payload;
+	void (*log)(void *payload, t_log_type log_type, const char *buffer);
+};
+
+struct s_input_edit
+{
+	uint32_t start_byte;
+	uint32_t old_end_byte;
+	uint32_t new_end_byte;
+	t_point	 start_point;
+	t_point	 old_end_point;
+	t_point	 new_end_point;
+};
+
+struct s_parse_node
+{
+	uint32_t			context[4];
+	const void		   *id;
+	const t_first_tree *tree;
+};
+
+struct s_tree_cursor_entry
+{
+	const t_subtree *subtree;
+	t_length		 position;
+	uint32_t		 child_index;
+	uint32_t		 structural_child_index;
+	uint32_t		 descendant_index;
+};
+
+struct s_tree_cursor
+{
+	const t_first_tree *tree;
+	Array(t_tree_cursor_entry) stack;
+	t_symbol root_alias_symbol;
+};
+
+struct s_query_capture
+{
+	t_parse_node node;
+	uint32_t	 index;
+};
+
+enum e_quantifier
+{
+	TSQuantifierZero = 0, // must match the array initialization value
+	TSQuantifierZeroOrOne,
+	TSQuantifierZeroOrMore,
+	TSQuantifierOne,
+	TSQuantifierOneOrMore,
+};
+
+struct s_query_match
+{
+	uint32_t			   id;
+	uint16_t			   pattern_index;
+	uint16_t			   capture_count;
+	const t_query_capture *captures;
+};
+
+enum e_query_predicate_step_type
+{
+	TSQueryPredicateStepTypeDone,
+	TSQueryPredicateStepTypeCapture,
+	TSQueryPredicateStepTypeString,
+};
+
+struct s_query_predicate_step
+{
+	t_query_predicate_step_type type;
+	uint32_t					value_id;
+};
+
+enum e_query_error
+{
+	TSQueryErrorNone = 0,
+	TSQueryErrorSyntax,
+	TSQueryErrorNodeType,
+	TSQueryErrorField,
+	TSQueryErrorCapture,
+	TSQueryErrorStructure,
+	TSQueryErrorLanguage,
+};
+
+struct s_parent_cache_entry
+{
+	const t_subtree *child;
+	const t_subtree *parent;
+	t_length		 position;
+	t_symbol		 alias_symbol;
+};
+
+typedef enum e_tree_cursor_step
+{
+	TreeCursorStepNone,
+	TreeCursorStepHidden,
+	TreeCursorStepVisible,
+} t_tree_cursor_step;
+
+// The serialized state of an external scanner.
+//
+// Every time an external token subtree is created after a call to an
+// external scanner, the scanner's `serialize` function is called to
+// retrieve a serialized copy of its state. The bytes are then copied
+// onto the subtree itself so that the scanner's state can later be
+// restored using its `deserialize` function.
+//
+// Small byte arrays are stored inline, and long ones are allocated
+// separately on the heap.
+struct s_external_scanner_state
+{
+	union {
+		char *long_data;
+		char  short_data[24];
+	};
+	uint32_t length;
+};
+
+#define SUBTREE_BITS                                                           \
+	bool visible : 1;                                                          \
+	bool named : 1;                                                            \
+	bool extra : 1;                                                            \
+	bool has_changes : 1;                                                      \
+	bool is_missing : 1;                                                       \
+	bool is_keyword : 1;
+
+#define SUBTREE_SIZE                                                           \
+	uint8_t padding_columns;                                                   \
+	uint8_t padding_rows : 4;                                                  \
+	uint8_t lookahead_bytes : 4;                                               \
+	uint8_t padding_bytes;                                                     \
+	uint8_t size_bytes;
+
+#if TS_BIG_ENDIAN
+# if TS_PTR_SIZE == 32
+
+struct s_subtree_inline_data
+{
+	uint16_t parse_state;
+	uint8_t	 symbol;
+	SUBTREE_BITS
+	bool unused : 1;
+	bool is_inline : 1;
+	SUBTREE_SIZE
+};
+
+# else
+
+struct s_subtree_inline_data
+{
+	SUBTREE_SIZE
+	uint16_t parse_state;
+	uint8_t	 symbol;
+	SUBTREE_BITS
+	bool unused : 1;
+	bool is_inline : 1;
+};
+
+# endif
+#else
+
+struct s_subtree_inline_data
+{
+	bool is_inline : 1;
+	SUBTREE_BITS
+	uint8_t	 symbol;
+	uint16_t parse_state;
+	SUBTREE_SIZE
+};
+
+#endif
+
+#undef SUBTREE_BITS
+#undef SUBTREE_SIZE
+
+struct s_subtree_heap_data
+{
+	volatile uint32_t ref_count;
+	t_length		  padding;
+	t_length		  size;
+	uint32_t		  lookahead_bytes;
+	uint32_t		  error_cost;
+	uint32_t		  child_count;
+	t_symbol		  symbol;
+	t_state_id		  parse_state;
+
+	bool visible : 1;
+	bool named : 1;
+	bool extra : 1;
+	bool fragile_left : 1;
+	bool fragile_right : 1;
+	bool has_changes : 1;
+	bool has_external_tokens : 1;
+	bool has_external_scanner_state_change : 1;
+	bool depends_on_column : 1;
+	bool is_missing : 1;
+	bool is_keyword : 1;
+
+	union {
+		// Non-terminal subtrees (`child_count > 0`)
+		struct
+		{
+			uint32_t visible_child_count;
+			uint32_t named_child_count;
+			uint32_t visible_descendant_count;
+			int32_t	 dynamic_precedence;
+			uint16_t repeat_depth;
+			uint16_t production_id;
+			struct
+			{
+				t_symbol   symbol;
+				t_state_id parse_state;
+			} first_leaf;
+		};
+
+		// External terminal subtrees (`child_count == 0 &&
+		// has_external_tokens`)
+		t_external_scanner_state external_scanner_state;
+
+		// Error terminal subtrees (`child_count == 0 && symbol ==
+		// ts_builtin_sym_error`)
+		int32_t lookahead_char;
+	};
+};
+
+// The fundamental building block of a syntax tree.
+union u_subtree {
+	t_subtree_inline_data	   data;
+	const t_subtree_heap_data *ptr;
+};
+
+// Like t_subtree, but mutable.
+union u_mutable_subtree {
+	t_subtree_inline_data data;
+	t_subtree_heap_data	 *ptr;
+};
+
+struct s_subtree_pool
+{
+	t_mutable_subtree_array free_trees;
+	t_mutable_subtree_array tree_stack;
+};
+
+union u_parse_action {
+	struct
+	{
+		uint8_t	   type;
+		t_state_id state;
+		bool	   extra;
+		bool	   repetition;
+	} shift;
+	struct
+	{
+		uint8_t	 type;
+		uint8_t	 child_count;
+		t_symbol symbol;
+		int16_t	 dynamic_precedence;
+		uint16_t production_id;
+	} reduce;
+	uint8_t type;
+};
+
+struct s_table_entry
+{
+	const t_parse_action *actions;
+	uint32_t			  action_count;
+	bool				  is_reusable;
+};
+
+struct s_lookahead_iterator
+{
+	const t_language *language;
+	const uint16_t	 *data;
+	const uint16_t	 *group_end;
+	t_state_id		  state;
+	uint16_t		  table_value;
+	uint16_t		  section_index;
+	uint16_t		  group_count;
+	bool			  is_small_state;
+
+	const t_parse_action *actions;
+	t_symbol			  symbol;
+	t_state_id			  next_state;
+	uint16_t			  action_count;
+};
+
+struct s_symbol_metadata
+{
+	bool visible;
+	bool named;
+	bool supertype;
+};
+
+enum e_parse_action_type
+{
+	TSParseActionTypeShift,
+	TSParseActionTypeReduce,
+	TSParseActionTypeAccept,
+	TSParseActionTypeRecover,
+};
+
+union u_parse_action_entry {
+	t_parse_action action;
+	struct
+	{
+		uint8_t count;
+		bool	reusable;
+	} entry;
+};
+
+struct s_field_map_entry
+{
+	t_field_id field_id;
+	uint8_t	   child_index;
+	bool	   inherited;
+};
+
+struct s_field_map_slice
+{
+	uint16_t index;
+	uint16_t length;
+};
+
+struct s_lexer_data
+{
+	int32_t	 lookahead;
+	t_symbol result_symbol;
+	void (*advance)(t_lexer_data *, bool);
+	void (*mark_end)(t_lexer_data *);
+	uint32_t (*get_column)(t_lexer_data *);
+	bool (*is_at_included_range_start)(const t_lexer_data *);
+	bool (*eof)(const t_lexer_data *);
+};
+
+struct s_lex_mode
+{
+	uint16_t lex_state;
+	uint16_t external_lex_state;
+};
+
+struct s_char_range
+{
+	int32_t start;
+	int32_t end;
+};
+
+struct s_language
+{
+	uint32_t					version;
+	uint32_t					symbol_count;
+	uint32_t					alias_count;
+	uint32_t					token_count;
+	uint32_t					external_token_count;
+	uint32_t					state_count;
+	uint32_t					large_state_count;
+	uint32_t					production_id_count;
+	uint32_t					field_count;
+	uint16_t					max_alias_sequence_length;
+	const uint16_t			   *parse_table;
+	const uint16_t			   *small_parse_table;
+	const uint32_t			   *small_parse_table_map;
+	const t_parse_action_entry *parse_actions;
+	const char *const		   *symbol_names;
+	const char *const		   *field_names;
+	const t_field_map_slice	   *field_map_slices;
+	const t_field_map_entry	   *field_map_entries;
+	const t_symbol_metadata	   *symbol_metadata;
+	const t_symbol			   *public_symbol_map;
+	const uint16_t			   *alias_map;
+	const t_symbol			   *alias_sequences;
+	const t_lex_mode		   *lex_modes;
+	bool (*lex_fn)(t_lexer_data *, t_state_id);
+	bool (*keyword_lex_fn)(t_lexer_data *, t_state_id);
+	t_symbol keyword_capture_token;
+	struct
+	{
+		const bool	   *states;
+		const t_symbol *symbol_map;
+		void *(*create)(void);
+		void (*destroy)(void *);
+		bool (*scan)(void *, t_lexer_data *, const bool *symbol_whitelist);
+		unsigned (*serialize)(void *, char *);
+		void (*deserialize)(void *, const char *, unsigned);
+	} external_scanner;
+	const t_state_id *primary_state_ids;
+};
+
+struct s_lexer
+{
+	t_lexer_data data;
+	t_length	 current_position;
+	t_length	 token_start_position;
+	t_length	 token_end_position;
+
+	t_parse_range *included_ranges;
+	const char	  *chunk;
+	t_parse_input  input;
+	t_parse_logger logger;
+
+	uint32_t included_range_count;
+	uint32_t current_included_range_index;
+	uint32_t chunk_start;
+	uint32_t chunk_size;
+	uint32_t lookahead_size;
+	bool	 did_get_column;
+
+	char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
+};
+
+struct s_reduce_action
+{
+	uint32_t	   count;
+	t_symbol	   symbol;
+	int			   dynamic_precedence;
+	unsigned short production_id;
+};
+
+struct s_stack_entry
+{
+	t_subtree tree;
+	uint32_t  child_index;
+	uint32_t  byte_offset;
+};
+
+struct s_reusable_node
+{
+	Array(t_stack_entry) stack;
+	t_subtree last_external_token;
+};
+
+struct s_first_tree
+{
+	t_subtree		  root;
+	const t_language *language;
+	t_parse_range	 *included_ranges;
+	unsigned		  included_range_count;
+};
+
+#endif // API_STRUCTS_H
--- a/parser/src/array.h
+++ b/parser/src/array.h
@ -0,0 +1,149 @@
+#ifndef ARRAY_H
+#define ARRAY_H
+#define Array(T)                                                               \
+	struct                                                                     \
+	{                                                                          \
+		T		*contents;                                                     \
+		uint32_t size;                                                         \
+		uint32_t capacity;                                                     \
+	}
+
+#ifndef inline
+# define inline __inline__
+#endif
+
+/// Initialize an array.
+#define array_init(self)                                                       \
+	((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new()                                                            \
+	{                                                                          \
+		NULL, 0, 0                                                             \
+	}
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index)                                                \
+	(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity)                                      \
+	_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element)                                              \
+	(_array__grow((Array *)(self), 1, array_elem_size(self)),                  \
+	 (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count)                                             \
+	do                                                                         \
+	{                                                                          \
+		if ((count) == 0)                                                      \
+			break;                                                             \
+		_array__grow((Array *)(self), count, array_elem_size(self));           \
+		memset((self)->contents + (self)->size, 0,                             \
+			   (count) * array_elem_size(self));                               \
+		(self)->size += (count);                                               \
+	} while (0)
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other)                                            \
+	array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from
+/// the `contents` pointer.
+#define array_extend(self, count, contents)                                    \
+	_array__splice((Array *)(self), array_elem_size(self), (self)->size, 0,    \
+				   count, contents)
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from
+/// the `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents)         \
+	_array__splice((Array *)(self), array_elem_size(self), _index, old_count,  \
+				   new_count, new_contents)
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element)                                    \
+	_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1,       \
+				   &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index)                                              \
+	_array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other)                                              \
+	_array__assign((Array *)(self), (const Array *)(other),                    \
+				   array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists)       \
+	_array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the
+/// order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists)           \
+	_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value)                         \
+	do                                                                         \
+	{                                                                          \
+		unsigned _index, _exists;                                              \
+		array_search_sorted_with(self, compare, &(value), &_index, &_exists);  \
+		if (!_exists)                                                          \
+			array_insert(self, _index, value);                                 \
+	} while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value)                             \
+	do                                                                         \
+	{                                                                          \
+		unsigned _index, _exists;                                              \
+		array_search_sorted_by(self, field, (value)field, &_index, &_exists);  \
+		if (!_exists)                                                          \
+			array_insert(self, _index, value);                                 \
+	} while (0)
+
+#endif // ARRAY_H
--- a/parser/src/combined.c
+++ b/parser/src/combined.c
@ -1,4 +1,5 @@
 #include "./api.h"
+#include "./structs.h"

 uint32_t	 ts_node_end_byte(t_parse_node self);
 t_parse_node ts_node_parent(t_parse_node self);
@ -146,14 +147,6 @@ void ts_range_array_get_changed_ranges(const t_parse_range *old_ranges,
 	}
 }

-typedef struct s_iterator
-{
-	t_tree_cursor	  cursor;
-	const t_language *language;
-	unsigned		  visible_depth;
-	bool			  in_padding;
-} t_iterator;
-
 static t_iterator iterator_new(t_tree_cursor *cursor, const t_subtree *tree,
 							   const t_language *language)
 {
@ -387,13 +380,6 @@ static void iterator_advance(t_iterator *self)
 	}
 }

-typedef enum e_iterator_comparison
-{
-	IteratorDiffers,
-	IteratorMayDiffer,
-	IteratorMatches,
-} t_iterator_comparison;
-
 static t_iterator_comparison iterator_compare(const t_iterator *old_iter,
 											  const t_iterator *new_iter)
 {
@ -919,9 +905,6 @@ uint32_t ascii_decode(const uint8_t *chunk, uint32_t size, int32_t *codepoint)
 	return (1);
 }

-typedef uint32_t (*UnicodeDecodeFunction)(const uint8_t *chunk, uint32_t size,
-										  int32_t *codepoint);
-
 // Decode the next unicode character in the current chunk of source code.
 // This assumes that the lexer has already retrieved a chunk of source
 // code that spans the current position.
@ -939,7 +922,7 @@ static void ts_lexer__get_lookahead(t_lexer *self)
 	}

 	const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
-	UnicodeDecodeFunction decode = ascii_decode;
+	t_unicode_decode_function decode = ascii_decode;

 	self->lookahead_size = decode(chunk, size, &self->data.lookahead);

@ -1326,16 +1309,6 @@ t_parse_range *ts_lexer_included_ranges(const t_lexer *self, uint32_t *count)

 #undef LOG

-typedef struct s_node_child_iterator
-{
-	t_subtree			parent;
-	const t_first_tree *tree;
-	t_length			position;
-	uint32_t			child_index;
-	uint32_t			structural_child_index;
-	const t_symbol	   *alias_sequence;
-} t_node_child_iterator;
-
 // t_parse_node - constructors

 t_parse_node ts_node_new(const t_first_tree *tree, const t_subtree *subtree,
@ -2269,61 +2242,6 @@ static const unsigned MAX_SUMMARY_DEPTH = 16;
 static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
 static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;

-typedef struct s_token_cache
-{
-	t_subtree token;
-	t_subtree last_external_token;
-	uint32_t  byte_index;
-} t_token_cache;
-
-struct s_first_parser
-{
-	t_lexer				   lexer;
-	t_stack				  *stack;
-	t_subtree_pool		   tree_pool;
-	const t_language	  *language;
-	t_reduce_action_set	   reduce_actions;
-	t_subtree			   finished_tree;
-	t_subtree_array		   trailing_extras;
-	t_subtree_array		   trailing_extras2;
-	t_subtree_array		   scratch_trees;
-	t_token_cache		   token_cache;
-	t_reusable_node		   reusable_node;
-	void				  *external_scanner_payload;
-	t_parser_clock		   end_clock;
-	t_parser_duration	   timeout_duration;
-	unsigned			   accept_count;
-	unsigned			   operation_count;
-	const volatile size_t *cancellation_flag;
-	t_subtree			   old_tree;
-	t_range_array		   included_range_differences;
-	unsigned			   included_range_difference_index;
-	bool				   has_scanner_error;
-};
-
-typedef struct s_error_status
-{
-	unsigned cost;
-	unsigned node_count;
-	int		 dynamic_precedence;
-	bool	 is_in_error;
-} t_error_status;
-
-typedef enum e_error_comparaison
-{
-	ErrorComparisonTakeLeft,
-	ErrorComparisonPreferLeft,
-	ErrorComparisonNone,
-	ErrorComparisonPreferRight,
-	ErrorComparisonTakeRight,
-} t_error_comparaison;
-
-typedef struct s_string_input
-{
-	const char *string;
-	uint32_t	length;
-} t_string_input;
-
 // StringInput

 static const char *ts_string_input_read(void *_self, uint32_t byte,
@ -4380,329 +4298,6 @@ t_first_tree *ts_parser_parse_string_encoding(t_first_parser	 *self,
 						   });
 }

-/*
- * t_stream - A sequence of unicode characters derived from a UTF8 string.
- * This struct is used in parsing queries from S-expressions.
- */
-typedef struct s_stream
-{
-	const char *input;
-	const char *start;
-	const char *end;
-	int32_t		next;
-	uint8_t		next_size;
-} t_stream;
-
-/*
- * t_query_step - A step in the process of matching a query. Each node within
- * a query S-expression corresponds to one of these steps. An entire pattern
- * is represented as a sequence of these steps. The basic properties of a
- * node are represented by these fields:
- * - `symbol` - The grammar symbol to match. A zero value represents the
- *    wildcard symbol, '_'.
- * - `field` - The field name to match. A zero value means that a field name
- *    was not specified.
- * - `capture_ids` - An array of integers representing the names of captures
- *    associated with this node in the pattern, terminated by a `NONE` value.
- * - `depth` - The depth where this node occurs in the pattern. The root node
- *    of the pattern has depth zero.
- * - `negated_field_list_id` - An id representing a set of fields that must
- *    not be present on a node matching this step.
- *
- * Steps have some additional fields in order to handle the `.` (or "anchor")
- * operator, which forbids additional child nodes:
- * - `is_immediate` - Indicates that the node matching this step cannot be
- * preceded by other sibling nodes that weren't specified in the pattern.
- * - `is_last_child` - Indicates that the node matching this step cannot have
- * any subsequent named siblings.
- *
- * For simple patterns, steps are matched in sequential order. But in order to
- * handle alternative/repeated/optional sub-patterns, query steps are not always
- * structured as a linear sequence; they sometimes need to split and merge. This
- * is done using the following fields:
- *  - `alternative_index` - The index of a different query step that serves as
- *    an alternative to this step. A `NONE` value represents no alternative.
- *    When a query state reaches a step with an alternative index, the state
- *    is duplicated, with one copy remaining at the original step, and one copy
- *    moving to the alternative step. The alternative may have its own
- * alternative step, so this splitting is an iterative process.
- * - `is_dead_end` - Indicates that this state cannot be passed directly, and
- *    exists only in order to redirect to an alternative index, with no
- * splitting.
- * - `is_pass_through` - Indicates that state has no matching logic of its own,
- *    and exists only to split a state. One copy of the state advances
- * immediately to the next step, and one moves to the alternative step.
- * - `alternative_is_immediate` - Indicates that this step's alternative step
- *    should be treated as if `is_immediate` is true.
- *
- * Steps also store some derived state that summarizes how they relate to other
- * steps within the same pattern. This is used to optimize the matching process:
- *  - `contains_captures` - Indicates that this step or one of its child steps
- *     has a non-empty `capture_ids` list.
- *  - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
- *     it and all of its subsequent sibling steps within the same parent pattern
- *     are guaranteed to match.
- *  - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
- *     for the entire top-level pattern. When iterating through a query's
- *     captures using `ts_query_cursor_next_capture`, this field is used to
- *     detect that a capture can safely be returned from a match that has not
- *     even completed  yet.
- */
-typedef struct s_query_step
-{
-	t_symbol   symbol;
-	t_symbol   supertype_symbol;
-	t_field_id field;
-	uint16_t   capture_ids[MAX_STEP_CAPTURE_COUNT];
-	uint16_t   depth;
-	uint16_t   alternative_index;
-	uint16_t   negated_field_list_id;
-	bool	   is_named : 1;
-	bool	   is_immediate : 1;
-	bool	   is_last_child : 1;
-	bool	   is_pass_through : 1;
-	bool	   is_dead_end : 1;
-	bool	   alternative_is_immediate : 1;
-	bool	   contains_captures : 1;
-	bool	   root_pattern_guaranteed : 1;
-	bool	   parent_pattern_guaranteed : 1;
-} t_query_step;
-
-/*
- * t_slice - A slice of an external array. Within a query, capture names,
- * literal string values, and predicate step information are stored in three
- * contiguous arrays. Individual captures, string values, and predicates are
- * represented as slices of these three arrays.
- */
-typedef struct s_slice
-{
-	uint32_t offset;
-	uint32_t length;
-} t_slice;
-
-/*
- * t_symbol_table - a two-way mapping of strings to ids.
- */
-typedef struct s_symbol_table
-{
-	Array(char) characters;
-	Array(t_slice) slices;
-} t_symbol_table;
-
-/**
- * CaptureQuantififers - a data structure holding the quantifiers of pattern
- * captures.
- */
-typedef Array(uint8_t) t_capture_quantifiers;
-
-/*
- * t_pattern_entry - Information about the starting point for matching a
- * particular pattern. These entries are stored in a 'pattern map' - a sorted
- * array that makes it possible to efficiently lookup patterns based on the
- * symbol for their first step. The entry consists of the following fields:
- * - `pattern_index` - the index of the pattern within the query
- * - `step_index` - the index of the pattern's first step in the shared `steps`
- * array
- * - `is_rooted` - whether or not the pattern has a single root node. This
- * property affects decisions about whether or not to start the pattern for
- * nodes outside of a QueryCursor's range restriction.
- */
-typedef struct s_pattern_entry
-{
-	uint16_t step_index;
-	uint16_t pattern_index;
-	bool	 is_rooted;
-} t_pattern_entry;
-
-typedef struct s_query_pattern
-{
-	t_slice	 steps;
-	t_slice	 predicate_steps;
-	uint32_t start_byte;
-	bool	 is_non_local;
-} t_query_pattern;
-
-typedef struct s_step_offset
-{
-	uint32_t byte_offset;
-	uint16_t step_index;
-} t_step_offset;
-
-/*
- * t_query_state - The state of an in-progress match of a particular pattern
- * in a query. While executing, a `t_query_cursor` must keep track of a number
- * of possible in-progress matches. Each of those possible matches is
- * represented as one of these states. Fields:
- * - `id` - A numeric id that is exposed to the public API. This allows the
- *    caller to remove a given match, preventing any more of its captures
- *    from being returned.
- * - `start_depth` - The depth in the tree where the first step of the state's
- *    pattern was matched.
- * - `pattern_index` - The pattern that the state is matching.
- * - `consumed_capture_count` - The number of captures from this match that
- *    have already been returned.
- * - `capture_list_id` - A numeric id that can be used to retrieve the state's
- *    list of captures from the `t_capture_list_pool`.
- * - `seeking_immediate_match` - A flag that indicates that the state's next
- *    step must be matched by the very next sibling. This is used when
- *    processing repetitions.
- * - `has_in_progress_alternatives` - A flag that indicates that there is are
- *    other states that have the same captures as this state, but are at
- *    different steps in their pattern. This means that in order to obey the
- *    'longest-match' rule, this state should not be returned as a match until
- *    it is clear that there can be no other alternative match with more
- * captures.
- */
-typedef struct s_query_state
-{
-	uint32_t id;
-	uint32_t capture_list_id;
-	uint16_t start_depth;
-	uint16_t step_index;
-	uint16_t pattern_index;
-	uint16_t consumed_capture_count : 12;
-	bool	 seeking_immediate_match : 1;
-	bool	 has_in_progress_alternatives : 1;
-	bool	 dead : 1;
-	bool	 needs_parent : 1;
-} t_query_state;
-
-typedef Array(t_query_capture) t_capture_list;
-
-/*
- * t_capture_list_pool - A collection of *lists* of captures. Each query state
- * needs to maintain its own list of captures. To avoid repeated allocations,
- * this struct maintains a fixed set of capture lists, and keeps track of which
- * ones are currently in use by a query state.
- */
-typedef struct s_capture_list_pool
-{
-	Array(t_capture_list) list;
-	t_capture_list empty_list;
-	// The maximum number of capture lists that we are allowed to allocate. We
-	// never allow `list` to allocate more entries than this, dropping pending
-	// matches if needed to stay under the limit.
-	uint32_t max_capture_list_count;
-	// The number of capture lists allocated in `list` that are not currently in
-	// use. We reuse those existing-but-unused capture lists before trying to
-	// allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
-	// list's length to indicate that it's not in use.
-	uint32_t free_capture_list_count;
-} t_capture_list_pool;
-
-/*
- * t_analysis_state - The state needed for walking the parse table when
- * analyzing a query pattern, to determine at which steps the pattern might fail
- * to match.
- */
-typedef struct s_analysis_state_entry
-{
-	t_state_id parse_state;
-	t_symbol   parent_symbol;
-	uint16_t   child_index;
-	t_field_id field_id : 15;
-	bool	   done : 1;
-} t_analysis_state_entry;
-
-typedef struct s_analysis_state
-{
-	t_analysis_state_entry stack[MAX_ANALYSIS_STATE_DEPTH];
-	uint16_t			   depth;
-	uint16_t			   step_index;
-	t_symbol			   root_symbol;
-} t_analysis_state;
-
-typedef Array(t_analysis_state *) t_analysis_state_set;
-
-typedef struct s_query_analysis
-{
-	t_analysis_state_set states;
-	t_analysis_state_set next_states;
-	t_analysis_state_set deeper_states;
-	t_analysis_state_set state_pool;
-	Array(uint16_t) final_step_indices;
-	Array(t_symbol) finished_parent_symbols;
-	bool did_abort;
-} t_query_analysis;
-
-/*
- * t_analysis_subgraph - A subset of the states in the parse table that are used
- * in constructing nodes with a certain symbol. Each state is accompanied by
- * some information about the possible node that could be produced in
- * downstream states.
- */
-typedef struct s_analysis_subgraph_node
-{
-	t_state_id state;
-	uint16_t   production_id;
-	uint8_t	   child_index : 7;
-	bool	   done : 1;
-} t_analysis_subgraph_node;
-
-typedef struct s_analysis_subgraph
-{
-	t_symbol symbol;
-	Array(t_state_id) start_states;
-	Array(t_analysis_subgraph_node) nodes;
-} t_analysis_subgraph;
-
-typedef Array(t_analysis_subgraph) t_analysis_subgraph_array;
-
-/*
- * t_state_predecessor_map - A map that stores the predecessors of each parse
- * state. This is used during query analysis to determine which parse states can
- * lead to which reduce actions.
- */
-typedef struct s_state_predecessor_map
-{
-	t_state_id *contents;
-} t_state_predecessor_map;
-
-/*
- * t_parse_query - A tree query, compiled from a string of S-expressions. The
- * query itself is immutable. The mutable state used in the process of executing
- * the query is stored in a `t_query_cursor`.
- */
-struct s_parse_query
-{
-	t_symbol_table captures;
-	t_symbol_table predicate_values;
-	Array(t_capture_quantifiers) capture_quantifiers;
-	Array(t_query_step) steps;
-	Array(t_pattern_entry) pattern_map;
-	Array(t_query_predicate_step) predicate_steps;
-	Array(t_query_pattern) patterns;
-	Array(t_step_offset) step_offsets;
-	Array(t_field_id) negated_fields;
-	Array(char) string_buffer;
-	Array(t_symbol) repeat_symbols_with_rootless_patterns;
-	const t_language *language;
-	uint16_t		  wildcard_root_pattern_count;
-};
-
-/*
- * t_query_cursor - A stateful struct used to execute a query on a tree.
- */
-struct s_query_cursor
-{
-	const t_parse_query *query;
-	t_tree_cursor		 cursor;
-	Array(t_query_state) states;
-	Array(t_query_state) finished_states;
-	t_capture_list_pool capture_list_pool;
-	uint32_t			depth;
-	uint32_t			max_start_depth;
-	uint32_t			start_byte;
-	uint32_t			end_byte;
-	t_point				start_point;
-	t_point				end_point;
-	uint32_t			next_state_id;
-	bool				on_visible_node;
-	bool				ascending;
-	bool				halted;
-	bool				did_exceed_match_limit;
-};
-
 static const t_query_error PARENT_DONE = -1;
 static const uint16_t	   PATTERN_DONE_MARKER = UINT16_MAX;
 static const uint16_t	   NONE = UINT16_MAX;
@ -8991,74 +8586,6 @@ void ts_query_cursor_set_max_start_depth(t_query_cursor *self,

 #undef LOG

-typedef struct s_stack_node t_stack_node;
-
-typedef struct s_stack_link
-{
-	t_stack_node *node;
-	t_subtree	  subtree;
-	bool		  is_pending;
-} t_stack_link;
-
-struct s_stack_node
-{
-	t_state_id		   state;
-	t_length		   position;
-	t_stack_link	   links[MAX_LINK_COUNT];
-	short unsigned int link_count;
-	uint32_t		   ref_count;
-	unsigned		   error_cost;
-	unsigned		   node_count;
-	int				   dynamic_precedence;
-};
-
-typedef struct s_stack_iterator
-{
-	t_stack_node   *node;
-	t_subtree_array subtrees;
-	uint32_t		subtree_count;
-	bool			is_pending;
-} t_stack_iterator;
-
-typedef Array(t_stack_node *) t_stack_node_array;
-
-typedef enum e_stack_status
-{
-	StackStatusActive,
-	StackStatusPaused,
-	StackStatusHalted,
-} t_stack_status;
-
-typedef struct s_stack_head
-{
-	t_stack_node	*node;
-	t_stack_summary *summary;
-	unsigned		 node_count_at_last_error;
-	t_subtree		 last_external_token;
-	t_subtree		 lookahead_when_paused;
-	t_stack_status	 status;
-} t_stack_head;
-
-struct s_stack
-{
-	Array(t_stack_head) heads;
-	t_stack_slice_array slices;
-	Array(t_stack_iterator) iterators;
-	t_stack_node_array node_pool;
-	t_stack_node	  *base_node;
-	t_subtree_pool	  *subtree_pool;
-};
-
-typedef unsigned t_stack_action;
-enum e_stack_action
-{
-	StackActionNone,
-	StackActionStop = 1,
-	StackActionPop = 2,
-};
-
-typedef t_stack_action (*t_stack_callback)(void *, const t_stack_iterator *);
-
 static void stack_node_retain(t_stack_node *self)
 {
 	if (!self)
@ -9663,12 +9190,6 @@ t_stack_slice_array ts_stack_pop_all(t_stack *self, t_stack_version version)
 	return stack__iter(self, version, pop_all_callback, NULL, 0);
 }

-typedef struct s_summarize_stack_session
-{
-	t_stack_summary *summary;
-	unsigned		 max_depth;
-} t_summarize_stack_session;
-
 static inline t_stack_action summarize_stack_callback(
 	void *payload, const t_stack_iterator *iterator)
 {
@ -9894,13 +9415,6 @@ bool ts_stack_print_dot_graph(t_stack *self, const t_language *language,
 	return (false);
 }

-typedef struct s_edit
-{
-	t_length start;
-	t_length old_end;
-	t_length new_end;
-} t_edit;
-
 // t_external_scanner_state

 void ts_external_scanner_state_init(t_external_scanner_state *self,
@ -10659,11 +10173,6 @@ static inline void ts_subtree_set_has_changes(t_mutable_subtree *self)
 t_subtree ts_subtree_edit(t_subtree self, const t_input_edit *input_edit,
 						  t_subtree_pool *pool)
 {
-	typedef struct s_edit_entry
-	{
-		t_subtree *tree;
-		t_edit	   edit;
-	} t_edit_entry;

 	Array(t_edit_entry) stack = array_new();
 	array_push(
@ -11095,17 +10604,6 @@ void ts_tree_print_dot_graph(const t_first_tree *self, int file_descriptor)

 #endif

-typedef struct s_cursor_child_iterator
-{
-	t_subtree			parent;
-	const t_first_tree *tree;
-	t_length			position;
-	uint32_t			child_index;
-	uint32_t			structural_child_index;
-	uint32_t			descendant_index;
-	const t_symbol	   *alias_sequence;
-} t_cursor_child_iterator;
-
 // t_cursor_child_iterator

 static inline bool ts_tree_cursor_is_entry_visible(const t_tree_cursor *self,
--- a/parser/src/structs.h
+++ b/parser/src/structs.h
@ -0,0 +1,545 @@
+#ifndef STRUCTS_H
+#define STRUCTS_H
+
+#include "./api.h"
+
+typedef unsigned t_stack_action;
+
+typedef struct s_edit					 t_edit;
+typedef struct s_edit_entry				 t_edit_entry;
+typedef struct s_cursor_child_iterator	 t_cursor_child_iterator;
+typedef struct s_summarize_stack_session t_summarize_stack_session;
+typedef struct s_stack_node				 t_stack_node;
+typedef struct s_stack_link				 t_stack_link;
+typedef struct s_stack_head				 t_stack_head;
+typedef struct s_stack_iterator			 t_stack_iterator;
+typedef struct s_stack					 t_stack;
+typedef struct s_stack_head				 t_stack_head;
+typedef struct s_stack_iterator			 t_stack_iterator;
+typedef struct s_query_cursor			 t_query_cursor;
+typedef struct s_parse_query			 t_parse_query;
+typedef struct s_state_predecessor_map	 t_state_predecessor_map;
+typedef struct s_analysis_subgraph		 t_analysis_subgraph;
+typedef struct s_analysis_subgraph_node	 t_analysis_subgraph_node;
+typedef struct s_query_analysis			 t_query_analysis;
+typedef struct s_analysis_state			 t_analysis_state;
+typedef struct s_analysis_state_entry	 t_analysis_state_entry;
+typedef struct s_capture_list_pool		 t_capture_list_pool;
+typedef struct s_query_state			 t_query_state;
+typedef struct s_step_offset			 t_step_offset;
+typedef struct s_query_pattern			 t_query_pattern;
+typedef struct s_pattern_entry			 t_pattern_entry;
+typedef struct s_symbol_table			 t_symbol_table;
+typedef struct s_slice					 t_slice;
+typedef struct s_query_step				 t_query_step;
+typedef struct s_stream					 t_stream;
+typedef struct s_string_input			 t_string_input;
+typedef struct s_error_status			 t_error_status;
+typedef struct s_first_parser			 t_first_parser;
+typedef struct s_token_cache			 t_token_cache;
+typedef struct s_node_child_iterator	 t_node_child_iterator;
+typedef struct s_iterator				 t_iterator;
+typedef struct s_parse_query			 t_parse_query;
+
+typedef t_stack_action (*t_stack_callback)(void *, const t_stack_iterator *);
+typedef Array(t_stack_node *) t_stack_node_array;
+typedef Array(t_analysis_subgraph) t_analysis_subgraph_array;
+typedef Array(t_analysis_state *) t_analysis_state_set;
+typedef Array(uint8_t) t_capture_quantifiers;
+typedef uint32_t (*t_unicode_decode_function)(const uint8_t *chunk,
+											  uint32_t		 size,
+											  int32_t		*codepoint);
+typedef Array(t_query_capture) t_capture_list;
+
+typedef enum e_stack_status		   t_stack_status;
+typedef enum e_error_comparaison   t_error_comparaison;
+typedef enum e_iterator_comparison t_iterator_comparison;
+
+struct s_iterator
+{
+	t_tree_cursor	  cursor;
+	const t_language *language;
+	unsigned		  visible_depth;
+	bool			  in_padding;
+};
+
+enum e_iterator_comparison
+{
+	IteratorDiffers,
+	IteratorMayDiffer,
+	IteratorMatches,
+};
+
+struct s_node_child_iterator
+{
+	t_subtree			parent;
+	const t_first_tree *tree;
+	t_length			position;
+	uint32_t			child_index;
+	uint32_t			structural_child_index;
+	const t_symbol	   *alias_sequence;
+};
+struct s_token_cache
+{
+	t_subtree token;
+	t_subtree last_external_token;
+	uint32_t  byte_index;
+};
+
+struct s_first_parser
+{
+	t_lexer				   lexer;
+	t_stack				  *stack;
+	t_subtree_pool		   tree_pool;
+	const t_language	  *language;
+	t_reduce_action_set	   reduce_actions;
+	t_subtree			   finished_tree;
+	t_subtree_array		   trailing_extras;
+	t_subtree_array		   trailing_extras2;
+	t_subtree_array		   scratch_trees;
+	t_token_cache		   token_cache;
+	t_reusable_node		   reusable_node;
+	void				  *external_scanner_payload;
+	t_parser_clock		   end_clock;
+	t_parser_duration	   timeout_duration;
+	unsigned			   accept_count;
+	unsigned			   operation_count;
+	const volatile size_t *cancellation_flag;
+	t_subtree			   old_tree;
+	t_range_array		   included_range_differences;
+	unsigned			   included_range_difference_index;
+	bool				   has_scanner_error;
+};
+
+struct s_error_status
+{
+	unsigned cost;
+	unsigned node_count;
+	int		 dynamic_precedence;
+	bool	 is_in_error;
+};
+
+enum e_error_comparaison
+{
+	ErrorComparisonTakeLeft,
+	ErrorComparisonPreferLeft,
+	ErrorComparisonNone,
+	ErrorComparisonPreferRight,
+	ErrorComparisonTakeRight,
+};
+
+struct s_string_input
+{
+	const char *string;
+	uint32_t	length;
+};
+
+/*
+ * t_stream - A sequence of unicode characters derived from a UTF8 string.
+ * This struct is used in parsing queries from S-expressions.
+ */
+struct s_stream
+{
+	const char *input;
+	const char *start;
+	const char *end;
+	int32_t		next;
+	uint8_t		next_size;
+};
+
+/*
+ * t_query_step - A step in the process of matching a query. Each node within
+ * a query S-expression corresponds to one of these steps. An entire pattern
+ * is represented as a sequence of these steps. The basic properties of a
+ * node are represented by these fields:
+ * - `symbol` - The grammar symbol to match. A zero value represents the
+ *    wildcard symbol, '_'.
+ * - `field` - The field name to match. A zero value means that a field name
+ *    was not specified.
+ * - `capture_ids` - An array of integers representing the names of captures
+ *    associated with this node in the pattern, terminated by a `NONE` value.
+ * - `depth` - The depth where this node occurs in the pattern. The root node
+ *    of the pattern has depth zero.
+ * - `negated_field_list_id` - An id representing a set of fields that must
+ *    not be present on a node matching this step.
+ *
+ * Steps have some additional fields in order to handle the `.` (or "anchor")
+ * operator, which forbids additional child nodes:
+ * - `is_immediate` - Indicates that the node matching this step cannot be
+ * preceded by other sibling nodes that weren't specified in the pattern.
+ * - `is_last_child` - Indicates that the node matching this step cannot have
+ * any subsequent named siblings.
+ *
+ * For simple patterns, steps are matched in sequential order. But in order to
+ * handle alternative/repeated/optional sub-patterns, query steps are not always
+ * structured as a linear sequence; they sometimes need to split and merge. This
+ * is done using the following fields:
+ *  - `alternative_index` - The index of a different query step that serves as
+ *    an alternative to this step. A `NONE` value represents no alternative.
+ *    When a query state reaches a step with an alternative index, the state
+ *    is duplicated, with one copy remaining at the original step, and one copy
+ *    moving to the alternative step. The alternative may have its own
+ * alternative step, so this splitting is an iterative process.
+ * - `is_dead_end` - Indicates that this state cannot be passed directly, and
+ *    exists only in order to redirect to an alternative index, with no
+ * splitting.
+ * - `is_pass_through` - Indicates that state has no matching logic of its own,
+ *    and exists only to split a state. One copy of the state advances
+ * immediately to the next step, and one moves to the alternative step.
+ * - `alternative_is_immediate` - Indicates that this step's alternative step
+ *    should be treated as if `is_immediate` is true.
+ *
+ * Steps also store some derived state that summarizes how they relate to other
+ * steps within the same pattern. This is used to optimize the matching process:
+ *  - `contains_captures` - Indicates that this step or one of its child steps
+ *     has a non-empty `capture_ids` list.
+ *  - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
+ *     it and all of its subsequent sibling steps within the same parent pattern
+ *     are guaranteed to match.
+ *  - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
+ *     for the entire top-level pattern. When iterating through a query's
+ *     captures using `ts_query_cursor_next_capture`, this field is used to
+ *     detect that a capture can safely be returned from a match that has not
+ *     even completed  yet.
+ */
+struct s_query_step
+{
+	t_symbol   symbol;
+	t_symbol   supertype_symbol;
+	t_field_id field;
+	uint16_t   capture_ids[MAX_STEP_CAPTURE_COUNT];
+	uint16_t   depth;
+	uint16_t   alternative_index;
+	uint16_t   negated_field_list_id;
+	bool	   is_named : 1;
+	bool	   is_immediate : 1;
+	bool	   is_last_child : 1;
+	bool	   is_pass_through : 1;
+	bool	   is_dead_end : 1;
+	bool	   alternative_is_immediate : 1;
+	bool	   contains_captures : 1;
+	bool	   root_pattern_guaranteed : 1;
+	bool	   parent_pattern_guaranteed : 1;
+};
+
+/*
+ * t_slice - A slice of an external array. Within a query, capture names,
+ * literal string values, and predicate step information are stored in three
+ * contiguous arrays. Individual captures, string values, and predicates are
+ * represented as slices of these three arrays.
+ */
+struct s_slice
+{
+	uint32_t offset;
+	uint32_t length;
+};
+
+/*
+ * t_symbol_table - a two-way mapping of strings to ids.
+ */
+struct s_symbol_table
+{
+	Array(char) characters;
+	Array(t_slice) slices;
+};
+
+/**
+ * CaptureQuantififers - a data structure holding the quantifiers of pattern
+ * captures.
+ */
+
+/*
+ * t_pattern_entry - Information about the starting point for matching a
+ * particular pattern. These entries are stored in a 'pattern map' - a sorted
+ * array that makes it possible to efficiently lookup patterns based on the
+ * symbol for their first step. The entry consists of the following fields:
+ * - `pattern_index` - the index of the pattern within the query
+ * - `step_index` - the index of the pattern's first step in the shared `steps`
+ * array
+ * - `is_rooted` - whether or not the pattern has a single root node. This
+ * property affects decisions about whether or not to start the pattern for
+ * nodes outside of a QueryCursor's range restriction.
+ */
+struct s_pattern_entry
+{
+	uint16_t step_index;
+	uint16_t pattern_index;
+	bool	 is_rooted;
+};
+
+struct s_query_pattern
+{
+	t_slice	 steps;
+	t_slice	 predicate_steps;
+	uint32_t start_byte;
+	bool	 is_non_local;
+};
+
+struct s_step_offset
+{
+	uint32_t byte_offset;
+	uint16_t step_index;
+};
+
+/*
+ * t_query_state - The state of an in-progress match of a particular pattern
+ * in a query. While executing, a `t_query_cursor` must keep track of a number
+ * of possible in-progress matches. Each of those possible matches is
+ * represented as one of these states. Fields:
+ * - `id` - A numeric id that is exposed to the public API. This allows the
+ *    caller to remove a given match, preventing any more of its captures
+ *    from being returned.
+ * - `start_depth` - The depth in the tree where the first step of the state's
+ *    pattern was matched.
+ * - `pattern_index` - The pattern that the state is matching.
+ * - `consumed_capture_count` - The number of captures from this match that
+ *    have already been returned.
+ * - `capture_list_id` - A numeric id that can be used to retrieve the state's
+ *    list of captures from the `t_capture_list_pool`.
+ * - `seeking_immediate_match` - A flag that indicates that the state's next
+ *    step must be matched by the very next sibling. This is used when
+ *    processing repetitions.
+ * - `has_in_progress_alternatives` - A flag that indicates that there is are
+ *    other states that have the same captures as this state, but are at
+ *    different steps in their pattern. This means that in order to obey the
+ *    'longest-match' rule, this state should not be returned as a match until
+ *    it is clear that there can be no other alternative match with more
+ * captures.
+ */
+struct s_query_state
+{
+	uint32_t id;
+	uint32_t capture_list_id;
+	uint16_t start_depth;
+	uint16_t step_index;
+	uint16_t pattern_index;
+	uint16_t consumed_capture_count : 12;
+	bool	 seeking_immediate_match : 1;
+	bool	 has_in_progress_alternatives : 1;
+	bool	 dead : 1;
+	bool	 needs_parent : 1;
+};
+
+/*
+ * t_capture_list_pool - A collection of *lists* of captures. Each query state
+ * needs to maintain its own list of captures. To avoid repeated allocations,
+ * this struct maintains a fixed set of capture lists, and keeps track of which
+ * ones are currently in use by a query state.
+ */
+struct s_capture_list_pool
+{
+	Array(t_capture_list) list;
+	t_capture_list empty_list;
+	// The maximum number of capture lists that we are allowed to allocate. We
+	// never allow `list` to allocate more entries than this, dropping pending
+	// matches if needed to stay under the limit.
+	uint32_t max_capture_list_count;
+	// The number of capture lists allocated in `list` that are not currently in
+	// use. We reuse those existing-but-unused capture lists before trying to
+	// allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
+	// list's length to indicate that it's not in use.
+	uint32_t free_capture_list_count;
+};
+
+/*
+ * t_analysis_state - The state needed for walking the parse table when
+ * analyzing a query pattern, to determine at which steps the pattern might fail
+ * to match.
+ */
+struct s_analysis_state_entry
+{
+	t_state_id parse_state;
+	t_symbol   parent_symbol;
+	uint16_t   child_index;
+	t_field_id field_id : 15;
+	bool	   done : 1;
+};
+
+struct s_analysis_state
+{
+	t_analysis_state_entry stack[MAX_ANALYSIS_STATE_DEPTH];
+	uint16_t			   depth;
+	uint16_t			   step_index;
+	t_symbol			   root_symbol;
+};
+
+struct s_query_analysis
+{
+	t_analysis_state_set states;
+	t_analysis_state_set next_states;
+	t_analysis_state_set deeper_states;
+	t_analysis_state_set state_pool;
+	Array(uint16_t) final_step_indices;
+	Array(t_symbol) finished_parent_symbols;
+	bool did_abort;
+};
+
+/*
+ * t_analysis_subgraph - A subset of the states in the parse table that are used
+ * in constructing nodes with a certain symbol. Each state is accompanied by
+ * some information about the possible node that could be produced in
+ * downstream states.
+ */
+struct s_analysis_subgraph_node
+{
+	t_state_id state;
+	uint16_t   production_id;
+	uint8_t	   child_index : 7;
+	bool	   done : 1;
+};
+
+struct s_analysis_subgraph
+{
+	t_symbol symbol;
+	Array(t_state_id) start_states;
+	Array(t_analysis_subgraph_node) nodes;
+};
+
+/*
+ * t_state_predecessor_map - A map that stores the predecessors of each parse
+ * state. This is used during query analysis to determine which parse states can
+ * lead to which reduce actions.
+ */
+
+struct s_state_predecessor_map
+{
+	t_state_id *contents;
+};
+
+/*
+ * t_parse_query - A tree query, compiled from a string of S-expressions. The
+ * query itself is immutable. The mutable state used in the process of executing
+ * the query is stored in a `t_query_cursor`.
+ */
+struct s_parse_query
+{
+	t_symbol_table captures;
+	t_symbol_table predicate_values;
+	Array(t_capture_quantifiers) capture_quantifiers;
+	Array(t_query_step) steps;
+	Array(t_pattern_entry) pattern_map;
+	Array(t_query_predicate_step) predicate_steps;
+	Array(t_query_pattern) patterns;
+	Array(t_step_offset) step_offsets;
+	Array(t_field_id) negated_fields;
+	Array(char) string_buffer;
+	Array(t_symbol) repeat_symbols_with_rootless_patterns;
+	const t_language *language;
+	uint16_t		  wildcard_root_pattern_count;
+};
+
+/*
+ * t_query_cursor - A stateful struct used to execute a query on a tree.
+ */
+struct s_query_cursor
+{
+	const t_parse_query *query;
+	t_tree_cursor		 cursor;
+	Array(t_query_state) states;
+	Array(t_query_state) finished_states;
+	t_capture_list_pool capture_list_pool;
+	uint32_t			depth;
+	uint32_t			max_start_depth;
+	uint32_t			start_byte;
+	uint32_t			end_byte;
+	t_point				start_point;
+	t_point				end_point;
+	uint32_t			next_state_id;
+	bool				on_visible_node;
+	bool				ascending;
+	bool				halted;
+	bool				did_exceed_match_limit;
+};
+
+struct s_stack_link
+{
+	t_stack_node *node;
+	t_subtree	  subtree;
+	bool		  is_pending;
+};
+
+struct s_stack_node
+{
+	t_state_id		   state;
+	t_length		   position;
+	t_stack_link	   links[MAX_LINK_COUNT];
+	short unsigned int link_count;
+	uint32_t		   ref_count;
+	unsigned		   error_cost;
+	unsigned		   node_count;
+	int				   dynamic_precedence;
+};
+
+struct s_stack_iterator
+{
+	t_stack_node   *node;
+	t_subtree_array subtrees;
+	uint32_t		subtree_count;
+	bool			is_pending;
+};
+
+enum e_stack_status
+{
+	StackStatusActive,
+	StackStatusPaused,
+	StackStatusHalted,
+};
+
+struct s_stack_head
+{
+	t_stack_node	*node;
+	t_stack_summary *summary;
+	unsigned		 node_count_at_last_error;
+	t_subtree		 last_external_token;
+	t_subtree		 lookahead_when_paused;
+	t_stack_status	 status;
+};
+
+struct s_stack
+{
+	Array(t_stack_head) heads;
+	t_stack_slice_array slices;
+	Array(t_stack_iterator) iterators;
+	t_stack_node_array node_pool;
+	t_stack_node	  *base_node;
+	t_subtree_pool	  *subtree_pool;
+};
+
+enum e_stack_action
+{
+	StackActionNone,
+	StackActionStop = 1,
+	StackActionPop = 2,
+};
+
+struct s_summarize_stack_session
+{
+	t_stack_summary *summary;
+	unsigned		 max_depth;
+};
+
+struct s_edit
+{
+	t_length start;
+	t_length old_end;
+	t_length new_end;
+};
+
+struct s_edit_entry
+{
+	t_subtree *tree;
+	t_edit	   edit;
+};
+
+struct s_cursor_child_iterator
+{
+	t_subtree			parent;
+	const t_first_tree *tree;
+	t_length			position;
+	uint32_t			child_index;
+	uint32_t			structural_child_index;
+	uint32_t			descendant_index;
+	const t_symbol	   *alias_sequence;
+};
+
+#endif // STRUCTS_H