From 019d25174ca85e7fdeeb7c0d3d2a15cfdc66327e Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Tue, 30 Apr 2024 13:42:00 +0200 Subject: [PATCH 01/14] WIP --- includes/app/node.h | 4 +- parser/Makefile | 4 +- parser/includes/api.h | 82 +++---- parser/includes/parser_length.h | 64 ++++++ parser/includes/types/types_scanner_ctx.h | 26 +++ parser/src/host.h | 21 -- parser/src/language.c | 18 +- parser/src/length.h | 52 ----- parser/src/lexer.c | 18 +- parser/src/lexer.h | 16 +- parser/src/node.c | 4 +- parser/src/parser.c | 40 ++-- parser/src/parser.h | 0 parser/src/scanner.c | 44 ++-- parser/src/stack.c | 6 +- parser/src/stack.h | 4 +- parser/src/subtree.c | 28 +-- parser/src/subtree.h | 23 +- parser/src/tree.c | 4 +- parser/src/tree.h | 4 +- parser/src/tree_cursor.c | 12 +- parser/src/tree_cursor.h | 2 +- sources/main.c | 267 ++++++++++++---------- sources/node/node.c | 8 +- 24 files changed, 388 insertions(+), 363 deletions(-) create mode 100644 parser/includes/parser_length.h create mode 100644 parser/includes/types/types_scanner_ctx.h delete mode 100644 parser/src/host.h delete mode 100644 parser/src/length.h delete mode 100644 parser/src/parser.h diff --git a/includes/app/node.h b/includes/app/node.h index b37bebb6..e44c339e 100644 --- a/includes/app/node.h +++ b/includes/app/node.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/28 18:35:22 by maiboyer #+# #+# */ -/* Updated: 2024/04/28 18:53:13 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 13:02:06 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -28,7 +28,7 @@ typedef struct s_node struct s_node *childs; } t_node; -t_node build_node(TSNode curr, t_const_str input); +t_node build_node(t_parse_node curr, t_const_str input); t_str node_getstr(t_node *node); void free_node(t_node t); diff --git a/parser/Makefile b/parser/Makefile index 91965b94..2aa6a50d 100644 --- a/parser/Makefile +++ b/parser/Makefile @@ -6,7 +6,7 @@ # By: maiboyer +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2023/11/03 13:20:01 by maiboyer #+# #+# # -# Updated: 2024/04/29 14:31:52 by maiboyer ### ########.fr # +# Updated: 2024/04/30 13:35:56 by maiboyer ### ########.fr # # # # **************************************************************************** # @@ -18,7 +18,7 @@ NAME = libgmr.a LIB_NAME ?= TARGET = $(BUILD_DIR)/$(NAME) CC = cc -CFLAGS = -Wall -Wextra -Werror -g3 -MMD -I./includes -I../includes -I../output/include +CFLAGS = -Wall -Wextra -Werror -MMD -I./includes -I../includes -I../output/include include ./Filelist.mk diff --git a/parser/includes/api.h b/parser/includes/api.h index e7c9ace5..fdfd61b6 100644 --- a/parser/includes/api.h +++ b/parser/includes/api.h @@ -36,13 +36,13 @@ typedef struct s_parser t_parser; typedef struct t_parse_tree t_parse_tree; typedef struct t_query t_query; typedef struct t_query_cursor t_query_cursor; -typedef struct TSLookaheadIterator TSLookaheadIterator; +typedef struct t_lookahead_iterator t_lookahead_iterator; -typedef enum TSInputEncoding +typedef enum t_input_encoding { - TSInputEncodingUTF8, - TSInputEncodingUTF16, -} TSInputEncoding; + InputEncoding8, + InputEncoding16, +} t_input_encoding; typedef enum t_symbol_type { @@ -51,25 +51,25 @@ typedef enum t_symbol_type SymbolTypeAuxiliary, } t_symbol_type; -typedef struct TSInput +typedef struct t_parse_input { void *payload; const char *(*read)(void *payload, t_u32 byte_index, t_point position, t_u32 *bytes_read); - TSInputEncoding encoding; -} TSInput; + t_input_encoding encoding; +} t_parse_input; -typedef enum TSLogType +typedef enum t_parse_log_type { - TSLogTypeParse, - TSLogTypeLex, -} TSLogType; + LogTypeParse, + LogTypeLex, +} t_parse_log_type; -typedef struct TSLogger +typedef struct t_parse_logger { void *payload; - void (*log)(void *payload, TSLogType log_type, const char *buffer); -} TSLogger; + void (*log)(void *payload, t_parse_log_type log_type, const char *buffer); +} t_parse_logger; typedef struct t_input_edit { @@ -101,14 +101,14 @@ typedef struct t_queryCapture t_u32 index; } t_queryCapture; -typedef enum TSQuantifier +typedef enum t_parse_quantifier { - TSQuantifierZero = 0, // must match the array initialization value - TSQuantifierZeroOrOne, - TSQuantifierZeroOrMore, - TSQuantifierOne, - TSQuantifierOneOrMore, -} TSQuantifier; + ParseQuantifierZero = 0, // must match the array initialization value + ParseQuantifierZeroOrOne, + ParseQuantifierZeroOrMore, + ParseQuantifierOne, + ParseQuantifierOneOrMore, +} t_parse_quantifier; typedef struct t_query_match { @@ -221,7 +221,7 @@ const t_parser_range *ts_parser_included_ranges(const t_parser *self, a * way that exactly matches the source code changes. * - * The [`TSInput`] parameter lets you specify how to read the text. It has + * The [`t_parse_input`] parameter lets you specify how to read the text. It has the * following three fields: * 1. [`read`]: A function to retrieve a chunk of text at a given byte @@ -237,7 +237,7 @@ const t_parser_range *ts_parser_included_ranges(const t_parser *self, invocation * of the [`read`] function. * 3. [`encoding`]: An indication of how the text is encoded. Either - * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. + * `InputEncoding8` or `InputEncoding16`. * * This function returns a syntax tree on success, and `NULL` on failure. There @@ -259,13 +259,13 @@ const t_parser_range *ts_parser_included_ranges(const t_parser *self, with * the same arguments. * - * [`read`]: TSInput::read - * [`payload`]: TSInput::payload - * [`encoding`]: TSInput::encoding - * [`bytes_read`]: TSInput::read + * [`read`]: t_parse_input::read + * [`payload`]: t_parse_input::payload + * [`encoding`]: t_parse_input::encoding + * [`bytes_read`]: t_parse_input::read */ t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, - TSInput input); + t_parse_input input); /** * Use the parser to parse some source code stored in one contiguous buffer. @@ -286,7 +286,7 @@ t_parse_tree *ts_parser_parse_string(t_parser *self, t_parse_tree *ts_parser_parse_string_encoding(t_parser *self, const t_parse_tree *old_tree, const char *string, t_u32 length, - TSInputEncoding encoding); + t_input_encoding encoding); /** * Instruct the parser to start the next parse from the beginning. @@ -334,12 +334,12 @@ const size_t *ts_parser_cancellation_flag(const t_parser *self); * was previously assigned, the caller is responsible for releasing any * memory owned by the previous logger. */ -void ts_parser_set_logger(t_parser *self, TSLogger logger); +void ts_parser_set_logger(t_parser *self, t_parse_logger logger); /** * Get the parser's current logger. */ -TSLogger ts_parser_logger(const t_parser *self); +t_parse_logger ts_parser_logger(const t_parser *self); /** * Set the file descriptor to which the parser should write debugging graphs @@ -888,7 +888,7 @@ const char *ts_query_capture_name_for_id(const t_query *self, t_u32 index, * with a numeric id based on the order that it appeared in the query's * source. */ -TSQuantifier ts_query_capture_quantifier_for_id(const t_query *self, +t_parse_quantifier ts_query_capture_quantifier_for_id(const t_query *self, t_u32 pattern_index, t_u32 capture_index); @@ -1110,13 +1110,13 @@ t_state_id ts_language_next_state(const t_language *self, t_state_id state, * lookahead iterator created on the previous non-extra leaf node may be * appropriate. */ -TSLookaheadIterator *ts_lookahead_iterator_new(const t_language *self, +t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state); /** * Delete a lookahead iterator freeing all the memory used. */ -void ts_lookahead_iterator_delete(TSLookaheadIterator *self); +void ts_lookahead_iterator_delete(t_lookahead_iterator *self); /** * Reset the lookahead iterator to another state. @@ -1124,7 +1124,7 @@ void ts_lookahead_iterator_delete(TSLookaheadIterator *self); * This returns `true` if the iterator was reset to the given state and * `false` otherwise. */ -bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, +bool ts_lookahead_iterator_reset_state(t_lookahead_iterator *self, t_state_id state); /** @@ -1133,33 +1133,33 @@ bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, * This returns `true` if the language was set successfully and `false` * otherwise. */ -bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, +bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state); /** * Get the current language of the lookahead iterator. */ const t_language *ts_lookahead_iterator_language( - const TSLookaheadIterator *self); + const t_lookahead_iterator *self); /** * Advance the lookahead iterator to the next symbol. * * This returns `true` if there is a new symbol and `false` otherwise. */ -bool ts_lookahead_iterator_next(TSLookaheadIterator *self); +bool ts_lookahead_iterator_next(t_lookahead_iterator *self); /** * Get the current symbol of the lookahead iterator; */ -t_symbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self); +t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self); /** * Get the current symbol type of the lookahead iterator as a null * terminated string. */ const char *ts_lookahead_iterator_current_symbol_name( - const TSLookaheadIterator *self); + const t_lookahead_iterator *self); /**********************************/ /* Section - Global Configuration */ diff --git a/parser/includes/parser_length.h b/parser/includes/parser_length.h new file mode 100644 index 00000000..5a9137d8 --- /dev/null +++ b/parser/includes/parser_length.h @@ -0,0 +1,64 @@ +#ifndef TREE_SITTER_LENGTH_H_ +#define TREE_SITTER_LENGTH_H_ + +#include "../src/point.h" +#include "parser/api.h" +#include +#include + +typedef struct s_parse_length +{ + t_u32 bytes; + t_point extent; +} t_parse_length; + +static const t_parse_length LENGTH_UNDEFINED = {0, {0, 1}}; +static const t_parse_length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; + +static inline bool length_is_undefined(t_parse_length length) +{ + return (length.bytes == 0 && length.extent.column != 0); +} + +static inline t_parse_length length_min(t_parse_length len1, + t_parse_length len2) +{ + if (len1.bytes < len2.bytes) + return (len1); + else + return (len2); +} + +static inline t_parse_length length_add(t_parse_length len1, + t_parse_length len2) +{ + t_parse_length result; + result.bytes = len1.bytes + len2.bytes; + result.extent = point_add(len1.extent, len2.extent); + return (result); +} + +static inline t_parse_length length_sub(t_parse_length len1, + t_parse_length len2) +{ + t_parse_length result; + result.bytes = len1.bytes - len2.bytes; + result.extent = point_sub(len1.extent, len2.extent); + return (result); +} + +static inline t_parse_length length_zero(void) +{ + return ((t_parse_length){0, {0, 0}}); +} + +static inline t_parse_length length_saturating_sub(t_parse_length len1, + t_parse_length len2) +{ + if (len1.bytes > len2.bytes) + return (length_sub(len1, len2)); + else + return (length_zero()); +} + +#endif diff --git a/parser/includes/types/types_scanner_ctx.h b/parser/includes/types/types_scanner_ctx.h new file mode 100644 index 00000000..b1807c68 --- /dev/null +++ b/parser/includes/types/types_scanner_ctx.h @@ -0,0 +1,26 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* types_scanner_ctx.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 13:41:02 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 13:41:29 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef TYPES_SCANNER_CTX_H +#define TYPES_SCANNER_CTX_H + +#include "me/types.h" + +typedef struct s_scanner_ctx +{ + t_u8 last_glob_paren_depth; + bool ext_was_in_double_quote; + bool ext_saw_outside_quote; + // Array(t_heredoc) heredocs; +} t_scanner_ctx; + +#endif /* TYPES_SCANNER_CTX_H */ diff --git a/parser/src/host.h b/parser/src/host.h deleted file mode 100644 index a07e9f89..00000000 --- a/parser/src/host.h +++ /dev/null @@ -1,21 +0,0 @@ - -// Determine endian and pointer size based on known defines. -// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments -// to override this. - -#if !defined(TS_BIG_ENDIAN) -#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ - || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__))) -#define TS_BIG_ENDIAN 1 -#else -#define TS_BIG_ENDIAN 0 -#endif -#endif - -#if !defined(TS_PTR_SIZE) -#if UINTPTR_MAX == 0xFFFFFFFF -#define TS_PTR_SIZE 32 -#else -#define TS_PTR_SIZE 64 -#endif -#endif diff --git a/parser/src/language.c b/parser/src/language.c index 658e38fd..3f5acb60 100644 --- a/parser/src/language.c +++ b/parser/src/language.c @@ -169,47 +169,47 @@ t_field_id ts_language_field_id_for_name( return 0; } -TSLookaheadIterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { +t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { if (state >= self->state_count) return NULL; LookaheadIterator *iterator = malloc(sizeof(LookaheadIterator)); *iterator = ts_language_lookaheads(self, state); - return (TSLookaheadIterator *)iterator; + return (t_lookahead_iterator *)iterator; } -void ts_lookahead_iterator_delete(TSLookaheadIterator *self) { +void ts_lookahead_iterator_delete(t_lookahead_iterator *self) { free(self); } -bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, t_state_id state) { +bool ts_lookahead_iterator_reset_state(t_lookahead_iterator * self, t_state_id state) { LookaheadIterator *iterator = (LookaheadIterator *)self; if (state >= iterator->language->state_count) return false; *iterator = ts_language_lookaheads(iterator->language, state); return true; } -const t_language *ts_lookahead_iterator_language(const TSLookaheadIterator *self) { +const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self) { const LookaheadIterator *iterator = (const LookaheadIterator *)self; return iterator->language; } -bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const t_language *language, t_state_id state) { +bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state) { if (state >= language->state_count) return false; LookaheadIterator *iterator = (LookaheadIterator *)self; *iterator = ts_language_lookaheads(language, state); return true; } -bool ts_lookahead_iterator_next(TSLookaheadIterator *self) { +bool ts_lookahead_iterator_next(t_lookahead_iterator *self) { LookaheadIterator *iterator = (LookaheadIterator *)self; return ts_lookahead_iterator__next(iterator); } -t_symbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) { +t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self) { const LookaheadIterator *iterator = (const LookaheadIterator *)self; return iterator->symbol; } -const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) { +const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self) { const LookaheadIterator *iterator = (const LookaheadIterator *)self; return ts_language_symbol_name(iterator->language, iterator->symbol); } diff --git a/parser/src/length.h b/parser/src/length.h deleted file mode 100644 index 708d5961..00000000 --- a/parser/src/length.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef TREE_SITTER_LENGTH_H_ -#define TREE_SITTER_LENGTH_H_ - -#include -#include -#include "./point.h" -#include "parser/api.h" - -typedef struct { - t_u32 bytes; - t_point extent; -} Length; - -static const Length LENGTH_UNDEFINED = {0, {0, 1}}; -static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; - -static inline bool length_is_undefined(Length length) { - return length.bytes == 0 && length.extent.column != 0; -} - -static inline Length length_min(Length len1, Length len2) { - return (len1.bytes < len2.bytes) ? len1 : len2; -} - -static inline Length length_add(Length len1, Length len2) { - Length result; - result.bytes = len1.bytes + len2.bytes; - result.extent = point_add(len1.extent, len2.extent); - return result; -} - -static inline Length length_sub(Length len1, Length len2) { - Length result; - result.bytes = len1.bytes - len2.bytes; - result.extent = point_sub(len1.extent, len2.extent); - return result; -} - -static inline Length length_zero(void) { - Length result = {0, {0, 0}}; - return result; -} - -static inline Length length_saturating_sub(Length len1, Length len2) { - if (len1.bytes > len2.bytes) { - return length_sub(len1, len2); - } else { - return length_zero(); - } -} - -#endif diff --git a/parser/src/lexer.c b/parser/src/lexer.c index ee5dcdff..208244da 100644 --- a/parser/src/lexer.c +++ b/parser/src/lexer.c @@ -1,5 +1,5 @@ #include "./lexer.h" -#include "./length.h" +#include "parser/parser_length.h" #include "./subtree.h" #include #include @@ -12,7 +12,7 @@ " character:'%c'" \ : message " character:%d", \ character); \ - self->logger.log(self->logger.payload, TSLogTypeLex, \ + self->logger.log(self->logger.payload, LogTypeLex, \ self->debug_buffer); \ } @@ -113,7 +113,7 @@ static void ts_lexer__get_lookahead(t_liblexer *self) } } -static void ts_lexer_goto(t_liblexer *self, Length position) +static void ts_lexer_goto(t_liblexer *self, t_parse_length position) { self->current_position = position; @@ -127,7 +127,7 @@ static void ts_lexer_goto(t_liblexer *self, Length position) { if (included_range->start_byte >= self->current_position.bytes) { - self->current_position = (Length){ + self->current_position = (t_parse_length){ .bytes = included_range->start_byte, .extent = included_range->start_point, }; @@ -161,7 +161,7 @@ static void ts_lexer_goto(t_liblexer *self, Length position) self->current_included_range_index = self->included_range_count; t_parser_range *last_included_range = &self->included_ranges[self->included_range_count - 1]; - self->current_position = (Length){ + self->current_position = (t_parse_length){ .bytes = last_included_range->end_byte, .extent = last_included_range->end_point, }; @@ -200,7 +200,7 @@ static void ts_lexer__do_advance(t_liblexer *self, bool skip) if (self->current_included_range_index < self->included_range_count) { current_range++; - self->current_position = (Length){ + self->current_position = (t_parse_length){ current_range->start_byte, current_range->start_point, }; @@ -260,7 +260,7 @@ static void ts_lexer__mark_end(t_lexer *_self) { t_parser_range *previous_included_range = current_included_range - 1; - self->token_end_position = (Length){ + self->token_end_position = (t_parse_length){ previous_included_range->end_byte, previous_included_range->end_point, }; @@ -355,7 +355,7 @@ void ts_lexer_delete(t_liblexer *self) free(self->included_ranges); } -void ts_lexer_set_input(t_liblexer *self, TSInput input) +void ts_lexer_set_input(t_liblexer *self, t_parse_input input) { self->input = input; ts_lexer__clear_chunk(self); @@ -364,7 +364,7 @@ void ts_lexer_set_input(t_liblexer *self, TSInput input) // Move the lexer to the given position. This doesn't do any work // if the parser is already at the given position. -void ts_lexer_reset(t_liblexer *self, Length position) +void ts_lexer_reset(t_liblexer *self, t_parse_length position) { if (position.bytes != self->current_position.bytes) { diff --git a/parser/src/lexer.h b/parser/src/lexer.h index 634d1c0e..4a29c288 100644 --- a/parser/src/lexer.h +++ b/parser/src/lexer.h @@ -13,7 +13,7 @@ #ifndef TREE_SITTER_LEXER_H_ #define TREE_SITTER_LEXER_H_ -#include "./length.h" +#include "parser/parser_length.h" #include "./parser.h" #include "./subtree.h" #include "parser/api.h" @@ -26,14 +26,14 @@ typedef struct s_liblexer { t_lexer data; - Length current_position; - Length token_start_position; - Length token_end_position; + t_parse_length current_position; + t_parse_length token_start_position; + t_parse_length token_end_position; t_parser_range *included_ranges; const char *chunk; - TSInput input; - TSLogger logger; + t_parse_input input; + t_parse_logger logger; t_u32 included_range_count; t_u32 current_included_range_index; @@ -47,8 +47,8 @@ typedef struct s_liblexer void ts_lexer_init(t_liblexer *); void ts_lexer_delete(t_liblexer *); -void ts_lexer_set_input(t_liblexer *, TSInput); -void ts_lexer_reset(t_liblexer *, Length); +void ts_lexer_set_input(t_liblexer *, t_parse_input); +void ts_lexer_reset(t_liblexer *, t_parse_length); void ts_lexer_start(t_liblexer *); void ts_lexer_finish(t_liblexer *, t_i32 *); void ts_lexer_advance_to_end(t_liblexer *); diff --git a/parser/src/node.c b/parser/src/node.c index a5b93288..44da00ae 100644 --- a/parser/src/node.c +++ b/parser/src/node.c @@ -7,7 +7,7 @@ typedef struct { Subtree parent; const t_parse_tree *tree; - Length position; + t_parse_length position; t_u32 child_index; t_u32 structural_child_index; const t_symbol *alias_sequence; @@ -16,7 +16,7 @@ typedef struct // t_parse_node - constructors t_parse_node ts_node_new(const t_parse_tree *tree, const Subtree *subtree, - Length position, t_symbol alias) + t_parse_length position, t_symbol alias) { return (t_parse_node){ {position.bytes, position.extent.row, position.extent.column, alias}, diff --git a/parser/src/parser.c b/parser/src/parser.c index 4591ad93..15f1dfb0 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -1,7 +1,7 @@ #include "./array.h" #include "./error_costs.h" #include "./language.h" -#include "./length.h" +#include "parser/parser_length.h" #include "./lexer.h" #include "./reduce_action.h" #include "./reusable_node.h" @@ -185,7 +185,7 @@ static void ts_parser__log(t_parser *self) { if (self->lexer.logger.log) { - self->lexer.logger.log(self->lexer.logger.payload, TSLogTypeParse, + self->lexer.logger.log(self->lexer.logger.payload, LogTypeParse, self->lexer.debug_buffer); } @@ -367,7 +367,7 @@ static bool ts_parser__better_version_exists(t_parser *self, return true; } - Length position = ts_stack_position(self->stack, version); + t_parse_length position = ts_stack_position(self->stack, version); t_error_status status = { .cost = cost, .is_in_error = is_in_error, @@ -506,7 +506,7 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, return NULL_SUBTREE; } - const Length start_position = ts_stack_position(self->stack, version); + const t_parse_length start_position = ts_stack_position(self->stack, version); const Subtree external_token = ts_stack_last_external_token(self->stack, version); @@ -515,8 +515,8 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, bool skipped_error = false; bool called_get_column = false; t_i32 first_error_character = 0; - Length error_start_position = length_zero(); - Length error_end_position = length_zero(); + t_parse_length error_start_position = length_zero(); + t_parse_length error_end_position = length_zero(); t_i32 lookahead_end_byte = 0; t_i32 external_scanner_state_len = 0; bool external_scanner_state_changed = false; @@ -525,7 +525,7 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, for (;;) { bool found_token = false; - Length current_position = self->lexer.current_position; + t_parse_length current_position = self->lexer.current_position; if (lex_mode.external_lex_state != 0) { @@ -626,8 +626,8 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, Subtree result; if (skipped_error) { - Length padding = length_sub(error_start_position, start_position); - Length size = length_sub(error_end_position, error_start_position); + t_parse_length padding = length_sub(error_start_position, start_position); + t_parse_length size = length_sub(error_end_position, error_start_position); t_u32 lookahead_bytes = lookahead_end_byte - error_end_position.bytes; result = ts_subtree_new_error(&self->tree_pool, first_error_character, @@ -638,9 +638,9 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, { bool is_keyword = false; t_symbol symbol = self->lexer.data.result_symbol; - Length padding = + t_parse_length padding = length_sub(self->lexer.token_start_position, start_position); - Length size = length_sub(self->lexer.token_end_position, + t_parse_length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); t_u32 lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; @@ -1300,7 +1300,7 @@ static void ts_parser__recover(t_parser *self, StackVersion version, { bool did_recover = false; unsigned previous_version_count = ts_stack_version_count(self->stack); - Length position = ts_stack_position(self->stack, version); + t_parse_length position = ts_stack_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); @@ -1515,7 +1515,7 @@ static void ts_parser__handle_error(t_parser *self, StackVersion version, // find a token that would have allowed a reduction to take place. ts_parser__do_all_potential_reductions(self, version, 0); t_u32 version_count = ts_stack_version_count(self->stack); - Length position = ts_stack_position(self->stack, version); + t_parse_length position = ts_stack_position(self->stack, version); // Push a discontinuity onto the stack. Merge all of the stack versions that // were created in the previous step. @@ -1547,7 +1547,7 @@ static void ts_parser__handle_error(t_parser *self, StackVersion version, // assigned to position it within the next included range. ts_lexer_reset(&self->lexer, position); ts_lexer_mark_end(&self->lexer); - Length padding = + t_parse_length padding = length_sub(self->lexer.token_end_position, position); t_u32 lookahead_bytes = ts_subtree_total_bytes(lookahead) + @@ -2053,12 +2053,12 @@ bool ts_parser_set_language(t_parser *self, const t_language *language) return true; } -TSLogger ts_parser_logger(const t_parser *self) +t_parse_logger ts_parser_logger(const t_parser *self) { return self->lexer.logger; } -void ts_parser_set_logger(t_parser *self, TSLogger logger) +void ts_parser_set_logger(t_parser *self, t_parse_logger logger) { self->lexer.logger = logger; } @@ -2141,7 +2141,7 @@ void ts_parser_reset(t_parser *self) self->has_scanner_error = false; } -t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, TSInput input) +t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, t_parse_input input) { t_parse_tree *result = NULL; old_tree = NULL; @@ -2258,16 +2258,16 @@ t_parse_tree *ts_parser_parse_string(t_parser *self, const t_parse_tree *old_tre const char *string, t_u32 length) { return ts_parser_parse_string_encoding(self, old_tree, string, length, - TSInputEncodingUTF8); + InputEncoding8); } t_parse_tree *ts_parser_parse_string_encoding(t_parser *self, const t_parse_tree *old_tree, const char *string, t_u32 length, - TSInputEncoding encoding) + t_input_encoding encoding) { t_string_input input = {string, length}; return ts_parser_parse(self, old_tree, - (TSInput){ + (t_parse_input){ &input, ts_string_inpt_read, encoding, diff --git a/parser/src/parser.h b/parser/src/parser.h deleted file mode 100644 index e69de29b..00000000 diff --git a/parser/src/scanner.c b/parser/src/scanner.c index e703b571..abc2d8f5 100644 --- a/parser/src/scanner.c +++ b/parser/src/scanner.c @@ -1,11 +1,11 @@ #include "array.h" -#include "parser.h" #include "parser/types/types_lexer.h" +#include "parser/types/types_scanner.h" +#include "parser/types/types_scanner_ctx.h" #include #include #include -#include #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 @@ -65,14 +65,6 @@ static inline t_heredoc heredoc_new(void) }); } -typedef struct s_scanner -{ - t_u8 last_glob_paren_depth; - bool ext_was_in_double_quote; - bool ext_saw_outside_quote; - Array(t_heredoc) heredocs; -} t_scanner; - static inline void advance(t_lexer *lexer) { lexer->advance(lexer, false); @@ -119,8 +111,8 @@ static inline void reset(t_scanner *scanner) static unsigned serialize(t_scanner *scanner, char *buffer) { - t_u32 size; - t_u32 i; + t_u32 size; + t_u32 i; t_heredoc *heredoc; size = 0; @@ -153,10 +145,10 @@ static unsigned serialize(t_scanner *scanner, char *buffer) static void deserialize(t_scanner *scanner, const char *buffer, unsigned length) { - t_u32 size; - t_u32 heredoc_count; + t_u32 size; + t_u32 heredoc_count; t_heredoc *heredoc; - t_u32 i; + t_u32 i; size = 0; if (length == 0) @@ -205,7 +197,7 @@ static void deserialize(t_scanner *scanner, const char *buffer, unsigned length) */ static bool advance_word(t_lexer *lexer, t_string *unquoted_word) { - bool empty; + bool empty; t_i32 quote; quote = 0; @@ -449,11 +441,11 @@ static bool regex_scan(t_scanner *scanner, t_lexer *lexer, { typedef struct { - bool done; - bool advanced_once; - bool found_non_alnumdollarunderdash; - bool last_was_escape; - bool in_single_quote; + bool done; + bool advanced_once; + bool found_non_alnumdollarunderdash; + bool last_was_escape; + bool in_single_quote; t_u32 paren_depth; t_u32 bracket_depth; t_u32 brace_depth; @@ -793,8 +785,8 @@ static bool extglob_pattern_scan(t_scanner *scanner, t_lexer *lexer, typedef struct { - bool done; - bool saw_non_alphadot; + bool done; + bool saw_non_alphadot; t_u32 paren_depth; t_u32 bracket_depth; t_u32 brace_depth; @@ -930,7 +922,7 @@ static bool expansion_word_scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) { (void)(scanner); - + if (valid_symbols[EXPANSION_WORD]) { bool advanced_once = false; @@ -1027,14 +1019,14 @@ static bool expansion_word_scan(t_scanner *scanner, t_lexer *lexer, advance(lexer); } } - return (false); + return (false); } static bool brace_start_scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) { (void)(scanner); - + if (valid_symbols[BRACE_START] && !in_error_recovery(valid_symbols)) { while (isspace(lexer->lookahead)) diff --git a/parser/src/stack.c b/parser/src/stack.c index 48b6ad25..c02c6ad6 100644 --- a/parser/src/stack.c +++ b/parser/src/stack.c @@ -3,7 +3,7 @@ #include "./subtree.h" #include "./array.h" #include "./stack.h" -#include "./length.h" +#include "parser/parser_length.h" #include #include #include @@ -28,7 +28,7 @@ typedef struct { struct StackNode { t_state_id state; - Length position; + t_parse_length position; StackLink links[MAX_LINK_COUNT]; short unsigned int link_count; t_u32 ref_count; @@ -464,7 +464,7 @@ t_state_id ts_stack_state(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->state; } -Length ts_stack_position(const Stack *self, StackVersion version) { +t_parse_length ts_stack_position(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->position; } diff --git a/parser/src/stack.h b/parser/src/stack.h index fb5fbb23..f3f64bac 100644 --- a/parser/src/stack.h +++ b/parser/src/stack.h @@ -22,7 +22,7 @@ typedef struct { typedef Array(StackSlice) StackSliceArray; typedef struct { - Length position; + t_parse_length position; unsigned depth; t_state_id state; } StackSummaryEntry; @@ -48,7 +48,7 @@ Subtree ts_stack_last_external_token(const Stack *, StackVersion); void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree ); // Get the position of the given version of the stack within the document. -Length ts_stack_position(const Stack *, StackVersion); +t_parse_length ts_stack_position(const Stack *, StackVersion); // Push a tree and state onto the given version of the stack. // diff --git a/parser/src/subtree.c b/parser/src/subtree.c index c8008465..e63308f5 100644 --- a/parser/src/subtree.c +++ b/parser/src/subtree.c @@ -9,15 +9,15 @@ #include "./error_costs.h" #include "./language.h" -#include "./length.h" +#include "parser/parser_length.h" #include "./subtree.h" #include typedef struct { - Length start; - Length old_end; - Length new_end; + t_parse_length start; + t_parse_length old_end; + t_parse_length new_end; } Edit; #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX @@ -193,7 +193,7 @@ static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) // Subtree -static inline bool ts_subtree_can_inline(Length padding, Length size, +static inline bool ts_subtree_can_inline(t_parse_length padding, t_parse_length size, t_u32 lookahead_bytes) { return padding.bytes < TS_MAX_INLINE_TREE_LENGTH && @@ -204,8 +204,8 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, lookahead_bytes < 16; } -Subtree ts_subtree_new_leaf(SubtreePool *pool, t_symbol symbol, Length padding, - Length size, t_u32 lookahead_bytes, +Subtree ts_subtree_new_leaf(SubtreePool *pool, t_symbol symbol, t_parse_length padding, + t_parse_length size, t_u32 lookahead_bytes, t_state_id parse_state, bool has_external_tokens, bool depends_on_column, bool is_keyword, const t_language *language) @@ -283,7 +283,7 @@ void ts_subtree_set_symbol(MutableSubtree *self, t_symbol symbol, } Subtree ts_subtree_new_error(SubtreePool *pool, t_i32 lookahead_char, - Length padding, Length size, + t_parse_length padding, t_parse_length size, t_u32 bytes_scanned, t_state_id parse_state, const t_language *language) { @@ -654,7 +654,7 @@ Subtree ts_subtree_new_error_node(SubtreeArray *children, bool extra, // This node is treated as 'extra'. Its children are prevented from having // having any effect on the parse state. Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, t_symbol symbol, - Length padding, t_u32 lookahead_bytes, + t_parse_length padding, t_u32 lookahead_bytes, const t_language *language) { Subtree result = @@ -807,9 +807,9 @@ Subtree ts_subtree_edit(Subtree self, const t_input_edit *inpt_edit, bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); - Length size = ts_subtree_size(*entry.tree); - Length padding = ts_subtree_padding(*entry.tree); - Length total_size = length_add(padding, size); + t_parse_length size = ts_subtree_size(*entry.tree); + t_parse_length padding = ts_subtree_padding(*entry.tree); + t_parse_length total_size = length_add(padding, size); t_u32 lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); t_u32 end_byte = total_size.bytes + lookahead_bytes; if (edit.start.bytes > end_byte || @@ -895,12 +895,12 @@ Subtree ts_subtree_edit(Subtree self, const t_input_edit *inpt_edit, ts_subtree_set_has_changes(&result); *entry.tree = ts_subtree_from_mut(result); - Length child_left, child_right = length_zero(); + t_parse_length child_left, child_right = length_zero(); for (t_u32 i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { Subtree *child = &ts_subtree_children(*entry.tree)[i]; - Length child_size = ts_subtree_total_size(*child); + t_parse_length child_size = ts_subtree_total_size(*child); child_left = child_right; child_right = length_add(child_left, child_size); diff --git a/parser/src/subtree.h b/parser/src/subtree.h index 110660d7..1955fa83 100644 --- a/parser/src/subtree.h +++ b/parser/src/subtree.h @@ -6,8 +6,7 @@ #include "./array.h" #include "./error_costs.h" -#include "./host.h" -#include "./length.h" +#include "parser/parser_length.h" #include "./parser.h" #include "parser/api.h" #include @@ -75,8 +74,8 @@ struct s_subtree_inline_data typedef struct { volatile t_u32 ref_count; - Length padding; - Length size; + t_parse_length padding; + t_parse_length size; t_u32 lookahead_bytes; t_u32 error_cost; t_u32 child_count; @@ -159,14 +158,14 @@ void ts_subtree_array_reverse(SubtreeArray *); SubtreePool ts_subtree_pool_new(t_u32 capacity); void ts_subtree_pool_delete(SubtreePool *); -Subtree ts_subtree_new_leaf(SubtreePool *, t_symbol, Length, Length, t_u32, +Subtree ts_subtree_new_leaf(SubtreePool *, t_symbol, t_parse_length, t_parse_length, t_u32, t_state_id, bool, bool, bool, const t_language *); -Subtree ts_subtree_new_error(SubtreePool *, t_i32, Length, Length, t_u32, +Subtree ts_subtree_new_error(SubtreePool *, t_i32, t_parse_length, t_parse_length, t_u32, t_state_id, const t_language *); MutableSubtree ts_subtree_new_node(t_symbol, SubtreeArray *, unsigned, const t_language *); Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const t_language *); -Subtree ts_subtree_new_missing_leaf(SubtreePool *, t_symbol, Length, t_u32, +Subtree ts_subtree_new_missing_leaf(SubtreePool *, t_symbol, t_parse_length, t_u32, const t_language *); MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); void ts_subtree_retain(Subtree); @@ -271,11 +270,11 @@ static inline t_state_id ts_subtree_leaf_parse_state(Subtree self) return self.ptr->first_leaf.parse_state; } -static inline Length ts_subtree_padding(Subtree self) +static inline t_parse_length ts_subtree_padding(Subtree self) { if (self.data.is_inline) { - Length result = {self.data.padding_bytes, + t_parse_length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}; return result; } @@ -285,11 +284,11 @@ static inline Length ts_subtree_padding(Subtree self) } } -static inline Length ts_subtree_size(Subtree self) +static inline t_parse_length ts_subtree_size(Subtree self) { if (self.data.is_inline) { - Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; + t_parse_length result = {self.data.size_bytes, {0, self.data.size_bytes}}; return result; } else @@ -298,7 +297,7 @@ static inline Length ts_subtree_size(Subtree self) } } -static inline Length ts_subtree_total_size(Subtree self) +static inline t_parse_length ts_subtree_total_size(Subtree self) { return length_add(ts_subtree_padding(self), ts_subtree_size(self)); } diff --git a/parser/src/tree.c b/parser/src/tree.c index a90aa6fc..0b033126 100644 --- a/parser/src/tree.c +++ b/parser/src/tree.c @@ -3,7 +3,7 @@ #include "parser/api.h" #include "./array.h" -#include "./length.h" +#include "parser/parser_length.h" #include "./subtree.h" #include "./tree_cursor.h" #include "./tree.h" @@ -46,7 +46,7 @@ t_parse_node ts_tree_root_node_with_offset( t_u32 offset_bytes, t_point offset_extent ) { - Length offset = {offset_bytes, offset_extent}; + t_parse_length offset = {offset_bytes, offset_extent}; return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); } diff --git a/parser/src/tree.h b/parser/src/tree.h index c88c0ba8..e13c7d41 100644 --- a/parser/src/tree.h +++ b/parser/src/tree.h @@ -10,7 +10,7 @@ extern "C" { typedef struct { const Subtree *child; const Subtree *parent; - Length position; + t_parse_length position; t_symbol alias_symbol; } ParentCacheEntry; @@ -22,7 +22,7 @@ struct t_parse_tree { }; t_parse_tree *ts_tree_new(Subtree root, const t_language *language, const t_parser_range *, unsigned); -t_parse_node ts_node_new(const t_parse_tree *, const Subtree *, Length, t_symbol); +t_parse_node ts_node_new(const t_parse_tree *, const Subtree *, t_parse_length, t_symbol); #ifdef __cplusplus } diff --git a/parser/src/tree_cursor.c b/parser/src/tree_cursor.c index bcc95155..7f7b129a 100644 --- a/parser/src/tree_cursor.c +++ b/parser/src/tree_cursor.c @@ -7,7 +7,7 @@ typedef struct { Subtree parent; const t_parse_tree *tree; - Length position; + t_parse_length position; t_u32 child_index; t_u32 structural_child_index; t_u32 descendant_index; @@ -101,12 +101,12 @@ static inline bool ts_tree_cursor_child_iterator_next( // can only be computed if `b` has zero rows. Otherwise, this function // returns `LENGTH_UNDEFINED`, and the caller needs to recompute // the position some other way. -static inline Length length_backtrack(Length a, Length b) { +static inline t_parse_length length_backtrack(t_parse_length a, t_parse_length b) { if (length_is_undefined(a) || b.extent.row != 0) { return LENGTH_UNDEFINED; } - Length result; + t_parse_length result; result.bytes = a.bytes - b.bytes; result.extent.row = a.extent.row; result.extent.column = a.extent.column - b.extent.column; @@ -141,7 +141,7 @@ static inline bool ts_tree_cursor_child_iterator_previous( // unsigned can underflow so compare it to child_count if (self->child_index < self->parent.ptr->child_count) { Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; - Length size = ts_subtree_size(previous_child); + t_parse_length size = ts_subtree_size(previous_child); self->position = length_backtrack(self->position, size); } @@ -273,7 +273,7 @@ static inline t_i64 ts_tree_cursor_goto_first_child_for_byte_and_point( TreeCursorEntry entry; CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); + t_parse_length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); t_u32 visible_child_count = ts_subtree_visible_child_count(*entry.subtree); if (at_goal) { @@ -374,7 +374,7 @@ TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_parse_tree_cursor // restore position from the parent node const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; - Length position = parent->position; + t_parse_length position = parent->position; t_u32 child_index = array_back(&self->stack)->child_index; const Subtree *children = ts_subtree_children((*(parent->subtree))); diff --git a/parser/src/tree_cursor.h b/parser/src/tree_cursor.h index 6e8dd7a5..e63c9757 100644 --- a/parser/src/tree_cursor.h +++ b/parser/src/tree_cursor.h @@ -5,7 +5,7 @@ typedef struct { const Subtree *subtree; - Length position; + t_parse_length position; t_u32 child_index; t_u32 structural_child_index; t_u32 descendant_index; diff --git a/sources/main.c b/sources/main.c index 9ed61a75..c7bd4fd9 100644 --- a/sources/main.c +++ b/sources/main.c @@ -6,105 +6,113 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */ -/* Updated: 2024/04/29 13:29:01 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 13:02:39 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ #include "../includes/minishell.h" -t_i32 ft_check_type_operators(t_str operators) { - if (operators == NULL) - printf("End of input"); - else if (ft_strcmp(operators, ">") == 0) - printf("Have to redirect in the file\n"); - else if (ft_strcmp(operators, ">>") == 0) - printf("Have to redirect at the end of the file after\n"); - else if (ft_strcmp(operators, ">&") == 0) - printf("Have to redirect the stdout in the file\n"); - else if (ft_strcmp(operators, "<") == 0) - printf("Have to redirect at the end of the file before\n"); - else if (ft_strcmp(operators, "<<") == 0) - printf("Have to redirect at the end of the file after\n"); - else if (ft_strcmp(operators, "<&") == 0) - printf("Have to redirect the stdout in the file\n"); - else if (ft_strcmp(operators, ";") == 0) - printf("Have to execute one more command\n"); - else if (ft_strcmp(operators, ";") == 0) - printf("Have to execute one more command\n"); - else if (ft_strcmp(operators, "|") == 0) - printf("I have to pipe a operators !\n"); - else if (ft_strcmp(operators, "||") == 0) - printf("Or something\n"); - else if (ft_strcmp(operators, "&&") == 0) - printf("Only if the first has exit status 0\n"); - else if (ft_strcmp(operators, "&") == 0) - printf("Parreil mais chelou\n"); - else - return (0); - return (1); +t_i32 ft_check_type_operators(t_str operators) +{ + if (operators == NULL) + printf("End of input"); + else if (ft_strcmp(operators, ">") == 0) + printf("Have to redirect in the file\n"); + else if (ft_strcmp(operators, ">>") == 0) + printf("Have to redirect at the end of the file after\n"); + else if (ft_strcmp(operators, ">&") == 0) + printf("Have to redirect the stdout in the file\n"); + else if (ft_strcmp(operators, "<") == 0) + printf("Have to redirect at the end of the file before\n"); + else if (ft_strcmp(operators, "<<") == 0) + printf("Have to redirect at the end of the file after\n"); + else if (ft_strcmp(operators, "<&") == 0) + printf("Have to redirect the stdout in the file\n"); + else if (ft_strcmp(operators, ";") == 0) + printf("Have to execute one more command\n"); + else if (ft_strcmp(operators, ";") == 0) + printf("Have to execute one more command\n"); + else if (ft_strcmp(operators, "|") == 0) + printf("I have to pipe a operators !\n"); + else if (ft_strcmp(operators, "||") == 0) + printf("Or something\n"); + else if (ft_strcmp(operators, "&&") == 0) + printf("Only if the first has exit status 0\n"); + else if (ft_strcmp(operators, "&") == 0) + printf("Parreil mais chelou\n"); + else + return (0); + return (1); } -void ft_check(t_utils *shcat, char **input) { - t_usize i; - t_usize prev_i; +void ft_check(t_utils *shcat, char **input) +{ + t_usize i; + t_usize prev_i; - i = 0; - prev_i = 0; - while (input[i] != NULL) { - while (ft_check_type_operators(input[i]) == 1) - i++; - if (ft_strcmp(input[i], "exit") == 0) - ft_exit(shcat, 0); - else if (ft_strcmp(input[i], "pwd") == 0) - ft_pwd(); - else if (ft_strcmp(input[i], "echo") == 0) - ft_echo("ECHO MAIS PAS ARG BORDEL !\n", "flag"); - else - ft_other_cmd(shcat, i, prev_i); - prev_i = i; - i++; - } + i = 0; + prev_i = 0; + while (input[i] != NULL) + { + while (ft_check_type_operators(input[i]) == 1) + i++; + if (ft_strcmp(input[i], "exit") == 0) + ft_exit(shcat, 0); + else if (ft_strcmp(input[i], "pwd") == 0) + ft_pwd(); + else if (ft_strcmp(input[i], "echo") == 0) + ft_echo("ECHO MAIS PAS ARG BORDEL !\n", "flag"); + else + ft_other_cmd(shcat, i, prev_i); + prev_i = i; + i++; + } } -void ft_take_args(t_utils *shcat) { - t_i32 i; +void ft_take_args(t_utils *shcat) +{ + t_i32 i; - i = 0; - while (1) { - shcat->str_input = readline((t_const_str)shcat->name_shell); - if (!shcat->str_input) - ft_exit(shcat, 0); - shcat->strs_input = ft_split(shcat->str_input, ' '); - if (!shcat->strs_input) - exit(1); - ft_check(shcat, shcat->strs_input); - add_history(shcat->str_input); - ft_free_strs(shcat->strs_input); - free(shcat->str_input); - i++; - } + i = 0; + while (1) + { + shcat->str_input = readline((t_const_str)shcat->name_shell); + if (!shcat->str_input) + ft_exit(shcat, 0); + shcat->strs_input = ft_split(shcat->str_input, ' '); + if (!shcat->strs_input) + exit(1); + ft_check(shcat, shcat->strs_input); + add_history(shcat->str_input); + ft_free_strs(shcat->strs_input); + free(shcat->str_input); + i++; + } } -void ft_init_arge(t_str arge[], t_utils *utils) { - size_t i; - char *temp; +void ft_init_arge(t_str arge[], t_utils *utils) +{ + size_t i; + char *temp; - i = 0; - temp = NULL; - while (arge[i] != NULL) { - if (arge[i][0] == 'P' && arge[i][1] == 'A' && arge[i][2] == 'T' && - arge[i][3] == 'H' && arge[i][4] == '=') { - temp = ft_strdup(arge[i] + 5); - if (!temp) - ft_exit(utils, 1); - else - utils->path = ft_split(temp, ':'); - break; - } - i++; - } - if (temp != NULL) - free(temp); + i = 0; + temp = NULL; + while (arge[i] != NULL) + { + if (arge[i][0] == 'P' && arge[i][1] == 'A' && arge[i][2] == 'T' && + arge[i][3] == 'H' && arge[i][4] == '=') + { + temp = ft_strdup(arge[i] + 5); + if (!temp) + ft_exit(utils, 1); + else + utils->path = ft_split(temp, ':'); + break; + } + i++; + } + if (temp != NULL) + free(temp); } #include "app/node.h" @@ -112,58 +120,67 @@ void ft_init_arge(t_str arge[], t_utils *utils) { #include "parser/api.h" #include "parser/parser.h" -TSLanguage *tree_sitter_bash(void); +t_language *tree_sitter_bash(void); -t_node parse_to_nodes(TSParser *parser, t_const_str input) { - TSTree *tree; - TSNode node; - t_node ret; +t_node parse_to_nodes(t_parser *parser, t_const_str input) +{ + t_parse_tree *tree; + t_parse_node node; + t_node ret; - tree = ts_parser_parse_string(parser, NULL, input, str_len(input)); - node = ts_tree_root_node(tree); - ret = build_node(node, input); - ts_tree_delete(tree); - return (ret); + tree = ts_parser_parse_string(parser, NULL, input, str_len(input)); + node = ts_tree_root_node(tree); + ret = build_node(node, input); + ts_tree_delete(tree); + return (ret); } -void print_node_data(t_node *t, t_usize depth) { - t_usize idx; +void print_node_data(t_node *t, t_usize depth) +{ + t_usize idx; - idx = 0; - while (idx++ < depth) - printf("\t"); - idx = 0; - printf("%s = %s\n", t->kind_str, node_getstr(t)); - while (idx < t->childs_count) - print_node_data(&t->childs[idx++], depth + 1); + idx = 0; + while (idx++ < depth) + printf("\t"); + idx = 0; + printf("%s = %s\n", t->kind_str, node_getstr(t)); + while (idx < t->childs_count) + print_node_data(&t->childs[idx++], depth + 1); } -typedef struct s_myparser { - TSParser *parser; +typedef struct s_myparser +{ + t_parser *parser; } t_myparser; -t_myparser create_myparser(void) { - TSLanguage *lang; - TSParser *parser; +t_myparser create_myparser(void) +{ + t_language *lang; + t_parser *parser; - lang = tree_sitter_bash(); - parser = ts_parser_new(); - ts_parser_set_language(parser, lang); - return ((t_myparser){.parser = parser}); + lang = tree_sitter_bash(); + parser = ts_parser_new(); + ts_parser_set_language(parser, lang); + return ((t_myparser){.parser = parser}); } -void free_myparser(t_myparser self) { ts_parser_delete(self.parser); } - -t_node parse_string(t_myparser *parser, t_const_str input) { - return (parse_to_nodes(parser->parser, input)); +void free_myparser(t_myparser self) +{ + ts_parser_delete(self.parser); } -t_i32 main() { - t_myparser parser; - t_node node; - - parser = create_myparser(); - node = parse_string(&parser, "banane \"$VAR\"'truc'"); - print_node_data(&node, 0); - free_node(node); +t_node parse_string(t_myparser *parser, t_const_str input) +{ + return (parse_to_nodes(parser->parser, input)); +} + +t_i32 main() +{ + t_myparser parser; + t_node node; + + parser = create_myparser(); + node = parse_string(&parser, "banane \"$VAR\"'truc'"); + print_node_data(&node, 0); + free_node(node); } diff --git a/sources/node/node.c b/sources/node/node.c index 6c1cfc85..776ba23b 100644 --- a/sources/node/node.c +++ b/sources/node/node.c @@ -16,13 +16,13 @@ #include "me/string/str_l_copy.h" #include "parser/api.h" -t_node build_node(TSNode curr, t_const_str input); +t_node build_node(t_parse_node curr, t_const_str input); -t_node *build_childs(TSNode parent, t_const_str input, t_usize count) +t_node *build_childs(t_parse_node parent, t_const_str input, t_usize count) { t_node *ret; t_usize idx; - TSNode child; + t_parse_node child; ret = mem_alloc_array(sizeof(*ret), count); if (ret == NULL) @@ -37,7 +37,7 @@ t_node *build_childs(TSNode parent, t_const_str input, t_usize count) return (ret); } -t_node build_node(TSNode curr, t_const_str input) +t_node build_node(t_parse_node curr, t_const_str input) { t_node out; From a22b9ea23438b79511bf31c5f4e4b0670b59bd51 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Tue, 30 Apr 2024 14:20:06 +0200 Subject: [PATCH 02/14] Compiling! --- Minishell.mk | 6 +- gen.list | 3 + input.toml | 8 + output/include/me/vec/vec_parser_heredoc.h | 58 ++++++ output/include/me/vec/vec_parser_range.h | 1 + output/src/vec/vec_parser_heredoc.c | 115 ++++++++++++ .../src/vec/vec_parser_heredoc_functions2.c | 112 +++++++++++ .../src/vec/vec_parser_heredoc_functions3.c | 84 +++++++++ output/src/vec/vec_parser_range_functions3.c | 11 ++ parser/includes/types/types_heredoc.h | 28 +++ parser/includes/types/types_scanner_ctx.h | 11 +- parser/src/scanner.c | 175 ++++++++---------- .../header/vec_C__PREFIX__.h__TEMPLATE__ | 1 + .../vec_C__PREFIX___functions3.c__TEMPLATE__ | 11 ++ stdme/include/me/buffered_str/buf_str.h | 3 +- stdme/output/include/me/vec/vec_buf_str.h | 1 + stdme/output/include/me/vec/vec_str.h | 1 + stdme/output/include/me/vec/vec_u8.h | 1 + stdme/output/src/vec/vec_buf_str_functions3.c | 11 ++ stdme/output/src/vec/vec_str_functions3.c | 11 ++ stdme/output/src/vec/vec_u8_functions3.c | 11 ++ stdme/src.list | 1 + stdme/src/buffered_str/mod.c | 43 +++-- stdme/src/buffered_str/push_char.c | 13 ++ 24 files changed, 607 insertions(+), 113 deletions(-) create mode 100644 output/include/me/vec/vec_parser_heredoc.h create mode 100644 output/src/vec/vec_parser_heredoc.c create mode 100644 output/src/vec/vec_parser_heredoc_functions2.c create mode 100644 output/src/vec/vec_parser_heredoc_functions3.c create mode 100644 parser/includes/types/types_heredoc.h create mode 100644 stdme/src/buffered_str/push_char.c diff --git a/Minishell.mk b/Minishell.mk index 4baef4b2..f4ebf28a 100644 --- a/Minishell.mk +++ b/Minishell.mk @@ -6,7 +6,7 @@ # By: maiboyer +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2024/04/28 17:28:30 by maiboyer #+# #+# # -# Updated: 2024/04/29 14:08:10 by maiboyer ### ########.fr # +# Updated: 2024/04/30 14:19:46 by maiboyer ### ########.fr # # # # **************************************************************************** # @@ -64,7 +64,7 @@ bonus: $(OBJ) $(LIB_OBJ) $(OBJDIRNAME)/libme.a $(OBJDIRNAME)/libgmr.a @mkdir -p $(OBJDIRNAME)/$(LIBDIRNAME) @mkdir -p $(OBJDIRNAME)/$(SRCDIRNAME) @printf '$(GREY) Be Carefull ur in $(END)$(GREEN)Debug Mode$(END)\n' - @cc $(CFLAGS) -D DEBUG=42 -o $(NAME) $(OBJ) -L$(OBJDIRNAME) -lme -lgmr + @cc $(CFLAGS) -D DEBUG=42 -o $(NAME) $(OBJ) -L$(OBJDIRNAME) -lgmr -lme # Dependences for all $(NAME): $(OBJ) $(LIB_OBJ) $(OBJDIRNAME)/libgmr.a $(OBJDIRNAME)/libme.a @@ -72,7 +72,7 @@ $(NAME): $(OBJ) $(LIB_OBJ) $(OBJDIRNAME)/libgmr.a $(OBJDIRNAME)/libme.a @mkdir -p $(OBJDIRNAME)/$(LIBDIRNAME) @mkdir -p $(OBJDIRNAME)/$(SRCDIRNAME) @echo "$(GREY) Linking $(END)$(GREEN)$(NAME)$(END)" - @cc $(CFLAGS) -o $(NAME) $(OBJ) $(LIB_OBJ) -L$(OBJDIRNAME) -lme -lgmr + @cc $(CFLAGS) -o $(NAME) $(OBJ) $(LIB_OBJ) -L$(OBJDIRNAME) -lgmr -lme # Creating the objects $(OBJDIRNAME)/%.o: %.c diff --git a/gen.list b/gen.list index 9d07c83a..82ebe88f 100644 --- a/gen.list +++ b/gen.list @@ -1,3 +1,6 @@ +src/vec/vec_parser_heredoc.c +src/vec/vec_parser_heredoc_functions2.c +src/vec/vec_parser_heredoc_functions3.c src/vec/vec_parser_range.c src/vec/vec_parser_range_functions2.c src/vec/vec_parser_range_functions3.c diff --git a/input.toml b/input.toml index 86e0ac3f..c2f4d221 100644 --- a/input.toml +++ b/input.toml @@ -44,3 +44,11 @@ replace.C__TYPENAME__ = "t_parser_range" replace.C__TYPEHEADER__ = '#include "parser/types/types_parser_range.h"' replace.C__PREFIX__ = "parser_range" replace.C__PREFIXUP__ = "PARSER_RANGE" + +[[create.vec]] +sources_output = "src/vec/" +headers_output = "include/me/vec/" +replace.C__TYPENAME__ = "t_heredoc" +replace.C__TYPEHEADER__ = '#include "parser/types/types_heredoc.h"' +replace.C__PREFIX__ = "parser_heredoc" +replace.C__PREFIXUP__ = "PARSER_HEREDOC" diff --git a/output/include/me/vec/vec_parser_heredoc.h b/output/include/me/vec/vec_parser_heredoc.h new file mode 100644 index 00000000..b6780051 --- /dev/null +++ b/output/include/me/vec/vec_parser_heredoc.h @@ -0,0 +1,58 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_parser_heredoc.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/04 18:46:53 by maiboyer #+# #+# */ +/* Updated: 2023/12/09 17:53:00 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef VEC_PARSER_HEREDOC_H +#define VEC_PARSER_HEREDOC_H + +#include "parser/types/types_heredoc.h" +#include "me/types.h" + +typedef bool (*t_vec_parser_heredoc_sort_fn)(t_heredoc *, t_heredoc *); +typedef void (*t_free_parser_heredoc_item)(t_heredoc); + +typedef struct s_vec_parser_heredoc +{ + t_free_parser_heredoc_item free_func; + t_usize len; + t_usize capacity; + t_heredoc *buffer; +} t_vec_parser_heredoc; + +t_vec_parser_heredoc vec_parser_heredoc_new(t_usize capacity, + t_free_parser_heredoc_item free_function); +t_error vec_parser_heredoc_push(t_vec_parser_heredoc *vec, t_heredoc element); +t_error vec_parser_heredoc_push_front(t_vec_parser_heredoc *vec, + t_heredoc element); +t_error vec_parser_heredoc_pop(t_vec_parser_heredoc *vec, t_heredoc *value); +t_error vec_parser_heredoc_pop_front(t_vec_parser_heredoc *vec, t_heredoc *value); +void vec_parser_heredoc_free(t_vec_parser_heredoc vec); +t_error vec_parser_heredoc_reserve(t_vec_parser_heredoc *vec, + t_usize wanted_capacity); +t_error vec_parser_heredoc_find(t_vec_parser_heredoc *vec, + bool (*fn)(const t_heredoc *), t_usize *index); +t_error vec_parser_heredoc_find_starting(t_vec_parser_heredoc *vec, + bool (*fn)(const t_heredoc *), + t_usize starting_index, t_usize *index); +t_error vec_parser_heredoc_all(t_vec_parser_heredoc *vec, + bool (*fn)(const t_heredoc *), bool *result); +t_error vec_parser_heredoc_any(t_vec_parser_heredoc *vec, + bool (*fn)(const t_heredoc *), bool *result); +void vec_parser_heredoc_iter(t_vec_parser_heredoc *vec, + void (*fn)(t_usize index, t_heredoc *value, + void *state), + void *state); +void vec_parser_heredoc_reverse(t_vec_parser_heredoc *vec); +void vec_parser_heredoc_sort(t_vec_parser_heredoc *vec, + t_vec_parser_heredoc_sort_fn is_sorted); +t_error vec_parser_heredoc_back(t_vec_parser_heredoc *vec, t_heredoc **out); + +#endif diff --git a/output/include/me/vec/vec_parser_range.h b/output/include/me/vec/vec_parser_range.h index 1eac9f77..7beecdad 100644 --- a/output/include/me/vec/vec_parser_range.h +++ b/output/include/me/vec/vec_parser_range.h @@ -53,5 +53,6 @@ void vec_parser_range_iter(t_vec_parser_range *vec, void vec_parser_range_reverse(t_vec_parser_range *vec); void vec_parser_range_sort(t_vec_parser_range *vec, t_vec_parser_range_sort_fn is_sorted); +t_error vec_parser_range_back(t_vec_parser_range *vec, t_parser_range **out); #endif diff --git a/output/src/vec/vec_parser_heredoc.c b/output/src/vec/vec_parser_heredoc.c new file mode 100644 index 00000000..34db7c57 --- /dev/null +++ b/output/src/vec/vec_parser_heredoc.c @@ -0,0 +1,115 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_parser_heredoc.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/05 18:46:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/09 17:54:11 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem_alloc_array.h" +#include "me/mem/mem_copy.h" +#include "me/mem/mem_set_zero.h" +#include "me/types.h" +#include "me/vec/vec_parser_heredoc.h" +#include + +t_vec_parser_heredoc vec_parser_heredoc_new(t_usize capacity, + t_free_parser_heredoc_item free_function) +{ + t_vec_parser_heredoc out; + + out = (t_vec_parser_heredoc){0}; + out.free_func = free_function; + out.buffer = mem_alloc_array(capacity, sizeof(t_heredoc)); + if (out.buffer) + out.capacity = capacity; + return (out); +} + +/// Return true in case of an error +t_error vec_parser_heredoc_push(t_vec_parser_heredoc *vec, t_heredoc element) +{ + t_heredoc *temp_buffer; + size_t new_capacity; + + if (vec == NULL) + return (ERROR); + if (vec->len + 1 > vec->capacity) + { + new_capacity = (vec->capacity * 3) / 2 + 1; + while (vec->len + 1 > new_capacity) + new_capacity = (new_capacity * 3) / 2 + 1; + temp_buffer = mem_alloc_array(new_capacity, sizeof(t_heredoc)); + if (temp_buffer == NULL) + return (ERROR); + mem_copy(temp_buffer, vec->buffer, vec->len * sizeof(t_heredoc)); + free(vec->buffer); + vec->buffer = temp_buffer; + vec->capacity = new_capacity; + } + vec->buffer[vec->len] = element; + vec->len += 1; + return (NO_ERROR); +} + +/// Return true in case of an error +t_error vec_parser_heredoc_reserve(t_vec_parser_heredoc *vec, t_usize wanted_capacity) +{ + t_heredoc *temp_buffer; + size_t new_capacity; + + if (vec == NULL) + return (ERROR); + if (wanted_capacity > vec->capacity) + { + new_capacity = (vec->capacity * 3) / 2 + 1; + while (wanted_capacity > new_capacity) + new_capacity = (new_capacity * 3) / 2 + 1; + temp_buffer = mem_alloc_array(new_capacity, sizeof(t_heredoc)); + if (temp_buffer == NULL) + return (ERROR); + mem_copy(temp_buffer, vec->buffer, vec->len * sizeof(t_heredoc)); + free(vec->buffer); + vec->buffer = temp_buffer; + vec->capacity = new_capacity; + } + return (NO_ERROR); +} + +/// Return true if the vector is empty +/// This function is safe to call with value being NULL +t_error vec_parser_heredoc_pop(t_vec_parser_heredoc *vec, t_heredoc *value) +{ + t_heredoc temp_value; + t_heredoc *ptr; + + if (vec == NULL) + return (ERROR); + ptr = value; + if (vec->len == 0) + return (ERROR); + if (value == NULL) + ptr = &temp_value; + vec->len--; + *ptr = vec->buffer[vec->len]; + mem_set_zero(&vec->buffer[vec->len], sizeof(t_heredoc)); + return (NO_ERROR); +} + +/// This function is safe to call with `free_elem` being NULL +void vec_parser_heredoc_free(t_vec_parser_heredoc vec) +{ + if (vec.free_func) + { + while (vec.len) + { + vec.free_func(vec.buffer[vec.len - 1]); + vec.len--; + } + } + free(vec.buffer); +} diff --git a/output/src/vec/vec_parser_heredoc_functions2.c b/output/src/vec/vec_parser_heredoc_functions2.c new file mode 100644 index 00000000..f38b9f24 --- /dev/null +++ b/output/src/vec/vec_parser_heredoc_functions2.c @@ -0,0 +1,112 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_parser_heredoc.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem_alloc_array.h" +#include "me/mem/mem_copy.h" +#include "me/mem/mem_set_zero.h" +#include "me/types.h" +#include "me/vec/vec_parser_heredoc.h" +#include + +t_error vec_parser_heredoc_find(t_vec_parser_heredoc *vec, + bool (*fn)(const t_heredoc *), t_usize *index) +{ + t_usize idx; + + if (vec == NULL || fn == NULL || index == NULL) + return (ERROR); + idx = 0; + while (idx < vec->len) + { + if (fn(&vec->buffer[idx])) + { + *index = idx; + return (NO_ERROR); + } + idx++; + } + return (ERROR); +} + +t_error vec_parser_heredoc_find_starting(t_vec_parser_heredoc *vec, + bool (*fn)(const t_heredoc *), + t_usize starting_index, t_usize *index) +{ + t_usize idx; + + if (vec == NULL || fn == NULL || index == NULL) + return (ERROR); + idx = starting_index; + while (idx < vec->len) + { + if (fn(&vec->buffer[idx])) + { + *index = idx; + return (NO_ERROR); + } + idx++; + } + return (ERROR); +} + +t_error vec_parser_heredoc_all(t_vec_parser_heredoc *vec, + bool (*fn)(const t_heredoc *), bool *result) +{ + t_usize idx; + + if (vec == NULL || fn == NULL || result == NULL) + return (ERROR); + idx = 0; + *result = true; + while (*result && idx < vec->len) + { + if (!fn(&vec->buffer[idx])) + *result = false; + idx++; + } + return (ERROR); +} + +t_error vec_parser_heredoc_any(t_vec_parser_heredoc *vec, + bool (*fn)(const t_heredoc *), bool *result) +{ + t_usize idx; + + if (vec == NULL || fn == NULL || result == NULL) + return (ERROR); + idx = 0; + *result = false; + while (*result && idx < vec->len) + { + if (fn(&vec->buffer[idx])) + *result = true; + idx++; + } + return (ERROR); +} + +void vec_parser_heredoc_iter(t_vec_parser_heredoc *vec, + void (*fn)(t_usize index, t_heredoc *value, + void *state), + void *state) +{ + t_usize idx; + + if (vec == NULL || fn == NULL) + return; + idx = 0; + while (idx < vec->len) + { + fn(idx, &vec->buffer[idx], state); + idx++; + } +} diff --git a/output/src/vec/vec_parser_heredoc_functions3.c b/output/src/vec/vec_parser_heredoc_functions3.c new file mode 100644 index 00000000..f9677148 --- /dev/null +++ b/output/src/vec/vec_parser_heredoc_functions3.c @@ -0,0 +1,84 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_parser_heredoc.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem_alloc_array.h" +#include "me/mem/mem_copy.h" +#include "me/mem/mem_set_zero.h" +#include "me/types.h" +#include "me/vec/vec_parser_heredoc.h" +#include + +t_error vec_parser_heredoc_push_front(t_vec_parser_heredoc *vec, + t_heredoc element) +{ + t_usize i; + + if (vec->len == 0) + return (vec_parser_heredoc_push(vec, element)); + i = vec->len - 1; + if (vec->capacity < vec->len + 1 && + vec_parser_heredoc_reserve(vec, 3 * vec->len / 2 + 1)) + return (ERROR); + while (i > 0) + { + vec->buffer[i + 1] = vec->buffer[i]; + i--; + } + vec->buffer[1] = vec->buffer[0]; + vec->buffer[0] = element; + vec->len++; + return (NO_ERROR); +} + +t_error vec_parser_heredoc_pop_front(t_vec_parser_heredoc *vec, t_heredoc *value) +{ + t_usize i; + + if (vec->len <= 1) + return (vec_parser_heredoc_pop(vec, value)); + i = 0; + *value = vec->buffer[0]; + vec->len--; + while (i < vec->len) + { + vec->buffer[i] = vec->buffer[i + 1]; + i++; + } + mem_set_zero(&vec->buffer[i], sizeof(*vec->buffer)); + return (NO_ERROR); +} + +void vec_parser_heredoc_reverse(t_vec_parser_heredoc *vec) +{ + t_heredoc temporary; + t_usize i; + + i = 0; + while (i < vec->len / 2) + { + temporary = vec->buffer[vec->len - 1 - i]; + vec->buffer[vec->len - 1 - i] = vec->buffer[i]; + vec->buffer[i] = temporary; + i++; + } +} + +t_error vec_parser_heredoc_back(t_vec_parser_heredoc *vec, t_heredoc **out) +{ + t_heredoc *temporary; + + if (out == NULL) + out = &temporary; + if (vec->len != 0) + return (*out = &vec->buffer[vec->len - 1], true); + return (false); +} diff --git a/output/src/vec/vec_parser_range_functions3.c b/output/src/vec/vec_parser_range_functions3.c index b7c4446e..a7ac9012 100644 --- a/output/src/vec/vec_parser_range_functions3.c +++ b/output/src/vec/vec_parser_range_functions3.c @@ -71,3 +71,14 @@ void vec_parser_range_reverse(t_vec_parser_range *vec) i++; } } + +t_error vec_parser_range_back(t_vec_parser_range *vec, t_parser_range **out) +{ + t_parser_range *temporary; + + if (out == NULL) + out = &temporary; + if (vec->len != 0) + return (*out = &vec->buffer[vec->len - 1], true); + return (false); +} diff --git a/parser/includes/types/types_heredoc.h b/parser/includes/types/types_heredoc.h new file mode 100644 index 00000000..6d34f004 --- /dev/null +++ b/parser/includes/types/types_heredoc.h @@ -0,0 +1,28 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* types_heredoc.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 13:47:07 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 13:48:19 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef TYPES_HEREDOC_H +#define TYPES_HEREDOC_H + +#include "me/buffered_str/buf_str.h" +#include "me/types.h" + +typedef struct s_heredoc +{ + bool is_raw; + bool started; + bool allows_indent; + t_buffer_str delimiter; + t_buffer_str current_leading_word; +} t_heredoc; + +#endif /* TYPES_HEREDOC_H */ diff --git a/parser/includes/types/types_scanner_ctx.h b/parser/includes/types/types_scanner_ctx.h index b1807c68..0aba3d08 100644 --- a/parser/includes/types/types_scanner_ctx.h +++ b/parser/includes/types/types_scanner_ctx.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/30 13:41:02 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 13:41:29 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 13:50:24 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -14,13 +14,14 @@ #define TYPES_SCANNER_CTX_H #include "me/types.h" +#include "me/vec/vec_parser_heredoc.h" typedef struct s_scanner_ctx { - t_u8 last_glob_paren_depth; - bool ext_was_in_double_quote; - bool ext_saw_outside_quote; - // Array(t_heredoc) heredocs; + t_u8 last_glob_paren_depth; + bool ext_was_in_double_quote; + bool ext_saw_outside_quote; + t_vec_parser_heredoc heredocs; } t_scanner_ctx; #endif /* TYPES_SCANNER_CTX_H */ diff --git a/parser/src/scanner.c b/parser/src/scanner.c index abc2d8f5..4657955c 100644 --- a/parser/src/scanner.c +++ b/parser/src/scanner.c @@ -1,6 +1,7 @@ -#include "array.h" +#include "me/mem/mem_alloc.h" +#include "me/types.h" +#include "me/vec/vec_parser_heredoc.h" #include "parser/types/types_lexer.h" -#include "parser/types/types_scanner.h" #include "parser/types/types_scanner_ctx.h" #include @@ -42,26 +43,14 @@ enum TokenType ERROR_RECOVERY, }; -typedef Array(char) t_string; -// typedef void *String; - -typedef struct s_heredoc -{ - bool is_raw; - bool started; - bool allows_indent; - t_string delimiter; - t_string current_leading_word; -} t_heredoc; - static inline t_heredoc heredoc_new(void) { return ((t_heredoc){ .is_raw = false, .started = false, .allows_indent = false, - .delimiter = array_new(), - .current_leading_word = array_new(), + .delimiter = alloc_new_buffer(0), + .current_leading_word = alloc_new_buffer(0), }); } @@ -80,12 +69,12 @@ static inline bool in_error_recovery(const bool *valid_symbols) return valid_symbols[ERROR_RECOVERY]; } -static inline void reset_string(t_string *string) +static inline void reset_string(t_buffer_str *string) { - if (string->size > 0) + if (string->len > 0) { - memset(string->contents, 0, string->size); - array_clear(string); + memset(string->buf, 0, string->len); + string->len = 0; } } @@ -97,19 +86,19 @@ static inline void reset_heredoc(t_heredoc *heredoc) reset_string(&heredoc->delimiter); } -static inline void reset(t_scanner *scanner) +static inline void reset(t_scanner_ctx *scanner) { t_u32 i; i = 0; - while (i < scanner->heredocs.size) + while (i < scanner->heredocs.len) { - reset_heredoc(array_get(&scanner->heredocs, i)); + reset_heredoc(&scanner->heredocs.buffer[i]); i++; } } -static unsigned serialize(t_scanner *scanner, char *buffer) +static unsigned serialize(t_scanner_ctx *scanner, char *buffer) { t_u32 size; t_u32 i; @@ -119,31 +108,32 @@ static unsigned serialize(t_scanner *scanner, char *buffer) buffer[size++] = (char)scanner->last_glob_paren_depth; buffer[size++] = (char)scanner->ext_was_in_double_quote; buffer[size++] = (char)scanner->ext_saw_outside_quote; - buffer[size++] = (char)scanner->heredocs.size; + buffer[size++] = (char)scanner->heredocs.len; i = 0; - while (i < scanner->heredocs.size) + while (i < scanner->heredocs.len) { - heredoc = array_get(&scanner->heredocs, i); - if (heredoc->delimiter.size + 3 + size >= + heredoc = &scanner->heredocs.buffer[i]; + if (heredoc->delimiter.len + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0; buffer[size++] = (char)heredoc->is_raw; buffer[size++] = (char)heredoc->started; buffer[size++] = (char)heredoc->allows_indent; - memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(t_u32)); + memcpy(&buffer[size], &heredoc->delimiter.len, sizeof(t_u32)); size += sizeof(t_u32); - if (heredoc->delimiter.size > 0) + if (heredoc->delimiter.len > 0) { - memcpy(&buffer[size], heredoc->delimiter.contents, - heredoc->delimiter.size); - size += heredoc->delimiter.size; + memcpy(&buffer[size], heredoc->delimiter.buf, + heredoc->delimiter.len); + size += heredoc->delimiter.len; } i++; } return size; } -static void deserialize(t_scanner *scanner, const char *buffer, unsigned length) +static void deserialize(t_scanner_ctx *scanner, const char *buffer, + unsigned length) { t_u32 size; t_u32 heredoc_count; @@ -163,24 +153,24 @@ static void deserialize(t_scanner *scanner, const char *buffer, unsigned length) while (i < heredoc_count) { heredoc = NULL; - if (i < scanner->heredocs.size) - heredoc = array_get(&scanner->heredocs, i); + if (i < scanner->heredocs.len) + heredoc = &scanner->heredocs.buffer[i]; else { - array_push(&scanner->heredocs, heredoc_new()); - heredoc = array_back(&scanner->heredocs); + vec_parser_heredoc_push(&scanner->heredocs, heredoc_new()); + heredoc = &scanner->heredocs.buffer[scanner->heredocs.len - 1]; } heredoc->is_raw = buffer[size++]; heredoc->started = buffer[size++]; heredoc->allows_indent = buffer[size++]; - memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(t_u32)); + memcpy(&heredoc->delimiter.len, &buffer[size], sizeof(t_u32)); size += sizeof(t_u32); - array_reserve(&heredoc->delimiter, heredoc->delimiter.size); - if (heredoc->delimiter.size > 0) + str_reserve(&heredoc->delimiter, heredoc->delimiter.len); + if (heredoc->delimiter.len > 0) { - memcpy(heredoc->delimiter.contents, &buffer[size], - heredoc->delimiter.size); - size += heredoc->delimiter.size; + memcpy(heredoc->delimiter.buf, &buffer[size], + heredoc->delimiter.len); + size += heredoc->delimiter.len; } i++; } @@ -195,7 +185,7 @@ static void deserialize(t_scanner *scanner, const char *buffer, unsigned length) * POSIX-mandated substitution, and assumes the default value for * IFS. */ -static bool advance_word(t_lexer *lexer, t_string *unquoted_word) +static bool advance_word(t_lexer *lexer, t_buffer_str *unquoted_word) { bool empty; t_i32 quote; @@ -216,10 +206,9 @@ static bool advance_word(t_lexer *lexer, t_string *unquoted_word) return (false); } empty = false; - array_push(unquoted_word, lexer->lookahead); + push_str_char(unquoted_word, lexer->lookahead); advance(lexer); } - array_push(unquoted_word, '\0'); if (quote && lexer->lookahead == quote) advance(lexer); return (!empty); @@ -269,31 +258,29 @@ static bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer) t_i32 size; size = 0; - if (heredoc->delimiter.size > 0) + if (heredoc->delimiter.len > 0) { while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && - (t_i32)*array_get(&heredoc->delimiter, size) == - lexer->lookahead && - heredoc->current_leading_word.size < heredoc->delimiter.size) + (t_i32) * (&heredoc->delimiter.buf[size]) == lexer->lookahead && + heredoc->current_leading_word.len < heredoc->delimiter.len) { - array_push(&heredoc->current_leading_word, lexer->lookahead); + push_str_char(&heredoc->current_leading_word, lexer->lookahead); advance(lexer); size++; } } - array_push(&heredoc->current_leading_word, '\0'); - return heredoc->delimiter.size == 0 + return heredoc->delimiter.len == 0 ? false - : strcmp(heredoc->current_leading_word.contents, - heredoc->delimiter.contents) == 0; + : strcmp(heredoc->current_leading_word.buf, + heredoc->delimiter.buf) == 0; } -static bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, +static bool scan_heredoc_content(t_scanner_ctx *scanner, t_lexer *lexer, enum TokenType middle_type, enum TokenType end_type) { bool did_advance = false; - t_heredoc *heredoc = array_back(&scanner->heredocs); + t_heredoc *heredoc = (&scanner->heredocs.buffer[scanner->heredocs.len - 1]); for (;;) { @@ -368,9 +355,7 @@ static bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, if (scan_heredoc_end_identifier(heredoc, lexer)) { if (lexer->result_symbol == HEREDOC_END) - { - array_pop(&scanner->heredocs); - } + vec_parser_heredoc_pop(&scanner->heredocs, NULL); return true; } break; @@ -417,7 +402,7 @@ static bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, } } } -static bool regex_scan(t_scanner *scanner, t_lexer *lexer, +static bool regex_scan(t_scanner_ctx *scanner, t_lexer *lexer, const bool *valid_symbols) { (void)(scanner); @@ -642,7 +627,7 @@ static bool regex_scan(t_scanner *scanner, t_lexer *lexer, return (false); } -static bool extglob_pattern_scan(t_scanner *scanner, t_lexer *lexer, +static bool extglob_pattern_scan(t_scanner_ctx *scanner, t_lexer *lexer, const bool *valid_symbols) { if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) @@ -918,7 +903,7 @@ static bool extglob_pattern_scan(t_scanner *scanner, t_lexer *lexer, return (false); } -static bool expansion_word_scan(t_scanner *scanner, t_lexer *lexer, +static bool expansion_word_scan(t_scanner_ctx *scanner, t_lexer *lexer, const bool *valid_symbols) { (void)(scanner); @@ -1022,7 +1007,7 @@ static bool expansion_word_scan(t_scanner *scanner, t_lexer *lexer, return (false); } -static bool brace_start_scan(t_scanner *scanner, t_lexer *lexer, +static bool brace_start_scan(t_scanner_ctx *scanner, t_lexer *lexer, const bool *valid_symbols) { (void)(scanner); @@ -1074,7 +1059,8 @@ static bool brace_start_scan(t_scanner *scanner, t_lexer *lexer, } return (false); } -static bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) +static bool scan(t_scanner_ctx *scanner, t_lexer *lexer, + const bool *valid_symbols) { if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) { @@ -1178,38 +1164,41 @@ static bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) return (true); } } - + t_heredoc *back; + vec_parser_heredoc_back(&scanner->heredocs, &back); if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && - scanner->heredocs.size > 0 && - !array_back(&scanner->heredocs)->started && + scanner->heredocs.len > 0 && !back->started && !in_error_recovery(valid_symbols)) return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY)); - if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0) + if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0) { - t_heredoc *heredoc = array_back(&scanner->heredocs); + t_heredoc *heredoc; + vec_parser_heredoc_back(&scanner->heredocs, &heredoc); if (scan_heredoc_end_identifier(heredoc, lexer)) { - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); - array_pop(&scanner->heredocs); + str_free(heredoc->current_leading_word); + str_free(heredoc->delimiter); + scanner->heredocs.len -= 1; lexer->result_symbol = HEREDOC_END; return (true); } } - if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && - array_back(&scanner->heredocs)->started && - !in_error_recovery(valid_symbols)) + vec_parser_heredoc_back(&scanner->heredocs, &back); + if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && + back->started && !in_error_recovery(valid_symbols)) return ( scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END)); if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && - scanner->heredocs.size > 0) - return (scan_heredoc_start(array_back(&scanner->heredocs), lexer)); - + scanner->heredocs.len > 0) + { + vec_parser_heredoc_back(&scanner->heredocs, &back); + return (scan_heredoc_start(back, lexer)); + } if (valid_symbols[TEST_OPERATOR] && !valid_symbols[EXPANSION_WORD]) { while (isspace(lexer->lookahead) && lexer->lookahead != '\n') @@ -1368,7 +1357,7 @@ static bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) advance(lexer); t_heredoc heredoc = heredoc_new(); heredoc.allows_indent = true; - array_push(&scanner->heredocs, heredoc); + vec_parser_heredoc_push(&scanner->heredocs, heredoc); lexer->result_symbol = HEREDOC_ARROW_DASH; } else if (lexer->lookahead == '<' || lexer->lookahead == '=') @@ -1376,7 +1365,7 @@ static bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) else { t_heredoc heredoc = heredoc_new(); - array_push(&scanner->heredocs, heredoc); + vec_parser_heredoc_push(&scanner->heredocs, heredoc); lexer->result_symbol = HEREDOC_ARROW; } return (true); @@ -1475,21 +1464,21 @@ static bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) void *tree_sitter_bash_external_scanner_create() { - t_scanner *scanner = calloc(1, sizeof(t_scanner)); - array_init(&scanner->heredocs); + t_scanner_ctx *scanner = mem_alloc(sizeof(t_scanner_ctx)); + scanner->heredocs = vec_parser_heredoc_new(5, NULL); return (scanner); } bool tree_sitter_bash_external_scanner_scan(void *payload, t_lexer *lexer, const bool *valid_symbols) { - t_scanner *scanner = (t_scanner *)payload; + t_scanner_ctx *scanner = (t_scanner_ctx *)payload; return (scan(scanner, lexer, valid_symbols)); } unsigned tree_sitter_bash_external_scanner_serialize(void *payload, char *state) { - t_scanner *scanner = (t_scanner *)payload; + t_scanner_ctx *scanner = (t_scanner_ctx *)payload; return (serialize(scanner, state)); } @@ -1497,19 +1486,19 @@ void tree_sitter_bash_external_scanner_deserialize(void *payload, const char *state, unsigned length) { - t_scanner *scanner = (t_scanner *)payload; + t_scanner_ctx *scanner = (t_scanner_ctx *)payload; deserialize(scanner, state, length); } void tree_sitter_bash_external_scanner_destroy(void *payload) { - t_scanner *scanner = (t_scanner *)payload; - for (size_t i = 0; i < scanner->heredocs.size; i++) + t_scanner_ctx *scanner = (t_scanner_ctx *)payload; + for (size_t i = 0; i < scanner->heredocs.len; i++) { - t_heredoc *heredoc = array_get(&scanner->heredocs, i); - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); + t_heredoc *heredoc = &scanner->heredocs.buffer[i]; + str_free(heredoc->current_leading_word); + str_free(heredoc->delimiter); } - array_delete(&scanner->heredocs); + vec_parser_heredoc_free(scanner->heredocs); free(scanner); } diff --git a/stdme/generic_sources/header/vec_C__PREFIX__.h__TEMPLATE__ b/stdme/generic_sources/header/vec_C__PREFIX__.h__TEMPLATE__ index ea5be649..312c0967 100644 --- a/stdme/generic_sources/header/vec_C__PREFIX__.h__TEMPLATE__ +++ b/stdme/generic_sources/header/vec_C__PREFIX__.h__TEMPLATE__ @@ -53,5 +53,6 @@ void vec_C__PREFIX___iter(t_vec_C__PREFIX__ *vec, void vec_C__PREFIX___reverse(t_vec_C__PREFIX__ *vec); void vec_C__PREFIX___sort(t_vec_C__PREFIX__ *vec, t_vec_C__PREFIX___sort_fn is_sorted); +t_error vec_C__PREFIX___back(t_vec_C__PREFIX__ *vec, C__TYPENAME__ **out); #endif diff --git a/stdme/generic_sources/src/vec_C__PREFIX___functions3.c__TEMPLATE__ b/stdme/generic_sources/src/vec_C__PREFIX___functions3.c__TEMPLATE__ index 0051d6e6..d743a407 100644 --- a/stdme/generic_sources/src/vec_C__PREFIX___functions3.c__TEMPLATE__ +++ b/stdme/generic_sources/src/vec_C__PREFIX___functions3.c__TEMPLATE__ @@ -71,3 +71,14 @@ void vec_C__PREFIX___reverse(t_vec_C__PREFIX__ *vec) i++; } } + +t_error vec_C__PREFIX___back(t_vec_C__PREFIX__ *vec, C__TYPENAME__ **out) +{ + C__TYPENAME__ *temporary; + + if (out == NULL) + out = &temporary; + if (vec->len != 0) + return (*out = &vec->buffer[vec->len - 1], true); + return (false); +} diff --git a/stdme/include/me/buffered_str/buf_str.h b/stdme/include/me/buffered_str/buf_str.h index 6bf1092e..568b1975 100644 --- a/stdme/include/me/buffered_str/buf_str.h +++ b/stdme/include/me/buffered_str/buf_str.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2023/11/16 17:54:28 by maiboyer #+# #+# */ -/* Updated: 2023/12/31 15:34:29 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 14:14:42 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -25,6 +25,7 @@ bool push_str_buffer(t_buffer_str *buf, t_const_str to_push); bool push_str_char(t_buffer_str *buf, char to_push); void str_clear(t_buffer_str *buf); t_buffer_str alloc_new_buffer(t_usize capacity); +t_error str_reserve(t_buffer_str *buf, t_usize size); static inline void str_free(t_buffer_str buf) { diff --git a/stdme/output/include/me/vec/vec_buf_str.h b/stdme/output/include/me/vec/vec_buf_str.h index fe01b85a..ff230fc3 100644 --- a/stdme/output/include/me/vec/vec_buf_str.h +++ b/stdme/output/include/me/vec/vec_buf_str.h @@ -53,5 +53,6 @@ void vec_buf_str_iter(t_vec_buf_str *vec, void vec_buf_str_reverse(t_vec_buf_str *vec); void vec_buf_str_sort(t_vec_buf_str *vec, t_vec_buf_str_sort_fn is_sorted); +t_error vec_buf_str_back(t_vec_buf_str *vec, t_buffer_str **out); #endif diff --git a/stdme/output/include/me/vec/vec_str.h b/stdme/output/include/me/vec/vec_str.h index d8d11fc3..deaf0727 100644 --- a/stdme/output/include/me/vec/vec_str.h +++ b/stdme/output/include/me/vec/vec_str.h @@ -53,5 +53,6 @@ void vec_str_iter(t_vec_str *vec, void vec_str_reverse(t_vec_str *vec); void vec_str_sort(t_vec_str *vec, t_vec_str_sort_fn is_sorted); +t_error vec_str_back(t_vec_str *vec, t_str **out); #endif diff --git a/stdme/output/include/me/vec/vec_u8.h b/stdme/output/include/me/vec/vec_u8.h index 7eaa06dd..ec7fbedf 100644 --- a/stdme/output/include/me/vec/vec_u8.h +++ b/stdme/output/include/me/vec/vec_u8.h @@ -53,5 +53,6 @@ void vec_u8_iter(t_vec_u8 *vec, void vec_u8_reverse(t_vec_u8 *vec); void vec_u8_sort(t_vec_u8 *vec, t_vec_u8_sort_fn is_sorted); +t_error vec_u8_back(t_vec_u8 *vec, t_u8 **out); #endif diff --git a/stdme/output/src/vec/vec_buf_str_functions3.c b/stdme/output/src/vec/vec_buf_str_functions3.c index b468dac1..8b2205b4 100644 --- a/stdme/output/src/vec/vec_buf_str_functions3.c +++ b/stdme/output/src/vec/vec_buf_str_functions3.c @@ -71,3 +71,14 @@ void vec_buf_str_reverse(t_vec_buf_str *vec) i++; } } + +t_error vec_buf_str_back(t_vec_buf_str *vec, t_buffer_str **out) +{ + t_buffer_str *temporary; + + if (out == NULL) + out = &temporary; + if (vec->len != 0) + return (*out = &vec->buffer[vec->len - 1], true); + return (false); +} diff --git a/stdme/output/src/vec/vec_str_functions3.c b/stdme/output/src/vec/vec_str_functions3.c index 0ac7153f..b0fd48e7 100644 --- a/stdme/output/src/vec/vec_str_functions3.c +++ b/stdme/output/src/vec/vec_str_functions3.c @@ -71,3 +71,14 @@ void vec_str_reverse(t_vec_str *vec) i++; } } + +t_error vec_str_back(t_vec_str *vec, t_str **out) +{ + t_str *temporary; + + if (out == NULL) + out = &temporary; + if (vec->len != 0) + return (*out = &vec->buffer[vec->len - 1], true); + return (false); +} diff --git a/stdme/output/src/vec/vec_u8_functions3.c b/stdme/output/src/vec/vec_u8_functions3.c index 32994a01..f244463c 100644 --- a/stdme/output/src/vec/vec_u8_functions3.c +++ b/stdme/output/src/vec/vec_u8_functions3.c @@ -71,3 +71,14 @@ void vec_u8_reverse(t_vec_u8 *vec) i++; } } + +t_error vec_u8_back(t_vec_u8 *vec, t_u8 **out) +{ + t_u8 *temporary; + + if (out == NULL) + out = &temporary; + if (vec->len != 0) + return (*out = &vec->buffer[vec->len - 1], true); + return (false); +} diff --git a/stdme/src.list b/stdme/src.list index a36af7c7..cb42365a 100644 --- a/stdme/src.list +++ b/stdme/src.list @@ -16,6 +16,7 @@ blx/sprite/get_pixel blx/sprite/new_image blx/sprite/sprite_draw_onto_sprite buffered_str/mod +buffered_str/push_char char/isalnum char/isalpha char/isascii diff --git a/stdme/src/buffered_str/mod.c b/stdme/src/buffered_str/mod.c index 4301d3ce..deb4ff8c 100644 --- a/stdme/src/buffered_str/mod.c +++ b/stdme/src/buffered_str/mod.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2023/11/16 17:52:12 by maiboyer #+# #+# */ -/* Updated: 2024/04/28 20:05:41 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 14:14:03 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -19,11 +19,32 @@ #include "me/types.h" #include -bool push_str_buffer(t_buffer_str *buf, t_const_str to_push) +t_error str_reserve(t_buffer_str *buf, t_usize size) { - t_usize to_push_len; t_str temp_buffer; - t_usize new_capacity; + t_usize new_capacity; + + if (buf == NULL) + return (ERROR); + while (size > buf->capacity) + { + new_capacity = (buf->capacity * 3) / 2 + 1; + temp_buffer = mem_alloc(new_capacity); + if (temp_buffer == NULL) + return (true); + str_l_copy(temp_buffer, buf->buf, new_capacity); + free(buf->buf); + buf->buf = temp_buffer; + buf->capacity = new_capacity; + } + return (NO_ERROR); +} + +bool push_str_buffer(t_buffer_str *buf, t_const_str to_push) +{ + t_usize to_push_len; + t_str temp_buffer; + t_usize new_capacity; if (buf == NULL || to_push == NULL) return (true); @@ -44,26 +65,26 @@ bool push_str_buffer(t_buffer_str *buf, t_const_str to_push) return (false); } -bool push_str_char(t_buffer_str *buf, char to_push) +bool push_str_char(t_buffer_str *buf, char to_push) { - char push_str[2]; + char push_str[2]; push_str[0] = to_push; push_str[1] = 0; return (push_str_buffer(buf, push_str)); } -void str_clear(t_buffer_str *buf) +void str_clear(t_buffer_str *buf) { mem_set_zero(buf->buf, buf->capacity); buf->len = 0; - return ; + return; } -t_buffer_str alloc_new_buffer(t_usize capacity) +t_buffer_str alloc_new_buffer(t_usize capacity) { - t_buffer_str out; - t_str buf; + t_buffer_str out; + t_str buf; if (capacity == 0) capacity = 16; diff --git a/stdme/src/buffered_str/push_char.c b/stdme/src/buffered_str/push_char.c new file mode 100644 index 00000000..730580b5 --- /dev/null +++ b/stdme/src/buffered_str/push_char.c @@ -0,0 +1,13 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* push_char.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 14:17:47 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 14:17:47 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + + From dfdd8e45033d8c1adbd2cea19ab945c32360b310 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Tue, 30 Apr 2024 14:30:58 +0200 Subject: [PATCH 03/14] moved some more headers --- parser/includes/error_costs.h | 23 ++++++++++++++ parser/includes/lexer.h | 49 ++++++++++++++++++++++------ parser/includes/parser.h | 2 +- parser/src/error_costs.h | 11 ------- parser/src/lexer.c | 2 +- parser/src/lexer.h | 60 ----------------------------------- parser/src/parser.c | 4 +-- parser/src/stack.h | 2 +- parser/src/subtree.c | 2 +- parser/src/subtree.h | 2 +- 10 files changed, 70 insertions(+), 87 deletions(-) create mode 100644 parser/includes/error_costs.h delete mode 100644 parser/src/error_costs.h delete mode 100644 parser/src/lexer.h diff --git a/parser/includes/error_costs.h b/parser/includes/error_costs.h new file mode 100644 index 00000000..e73e4a2e --- /dev/null +++ b/parser/includes/error_costs.h @@ -0,0 +1,23 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* error_costs.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 14:26:02 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 14:26:04 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef ERROR_COSTS_H +#define ERROR_COSTS_H + +#define ERROR_STATE 0 +#define ERROR_COST_PER_RECOVERY 500 +#define ERROR_COST_PER_MISSING_TREE 110 +#define ERROR_COST_PER_SKIPPED_TREE 100 +#define ERROR_COST_PER_SKIPPED_LINE 30 +#define ERROR_COST_PER_SKIPPED_CHAR 1 + +#endif /* ERROR_COSTS_H */ diff --git a/parser/includes/lexer.h b/parser/includes/lexer.h index a187ef85..ee6be79f 100644 --- a/parser/includes/lexer.h +++ b/parser/includes/lexer.h @@ -6,21 +6,52 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/23 19:51:24 by maiboyer #+# #+# */ -/* Updated: 2024/04/24 23:03:33 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 14:28:34 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ #ifndef LEXER_H #define LEXER_H -#include -#include +#include "me/types.h" +#include "parser/api.h" +#include "parser/parser_length.h" +#include "parser/types/types_lexer.h" -#ifndef TREE_SITTER_API_H_ -typedef uint16_t t_state_id; -typedef uint16_t t_symbol; -typedef uint16_t t_field_id; -typedef struct s_language t_language; -#endif +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +typedef struct s_liblexer +{ + t_lexer data; + t_parse_length current_position; + t_parse_length token_start_position; + t_parse_length token_end_position; + + t_parser_range *included_ranges; + const char *chunk; + t_parse_input input; + t_parse_logger logger; + + t_u32 included_range_count; + t_u32 current_included_range_index; + t_u32 chunk_start; + t_u32 chunk_size; + t_u32 lookahead_size; + bool did_get_column; + + char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; +} t_liblexer; + +void ts_lexer_init(t_liblexer *self); +void ts_lexer_delete(t_liblexer *self); +void ts_lexer_set_input(t_liblexer *self, t_parse_input input); +void ts_lexer_reset(t_liblexer *self, t_parse_length range); +void ts_lexer_start(t_liblexer *self); +void ts_lexer_finish(t_liblexer *self, t_i32 *data); +void ts_lexer_advance_to_end(t_liblexer *self); +void ts_lexer_mark_end(t_liblexer *self); +bool ts_lexer_set_included_ranges(t_liblexer *self, + const t_parser_range *ranges, t_u32 count); +t_parser_range *ts_lexer_included_ranges(const t_liblexer *self, t_u32 *count); #endif /* LEXER_H */ diff --git a/parser/includes/parser.h b/parser/includes/parser.h index 72451ada..298bf954 100644 --- a/parser/includes/parser.h +++ b/parser/includes/parser.h @@ -2,7 +2,7 @@ #define TREE_SITTER_PARSER_H_ #include "../parse_types.h" -#include "./lexer.h" +#include "parser/lexer.h" #include #include #include diff --git a/parser/src/error_costs.h b/parser/src/error_costs.h deleted file mode 100644 index 32d3666a..00000000 --- a/parser/src/error_costs.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef TREE_SITTER_ERROR_COSTS_H_ -#define TREE_SITTER_ERROR_COSTS_H_ - -#define ERROR_STATE 0 -#define ERROR_COST_PER_RECOVERY 500 -#define ERROR_COST_PER_MISSING_TREE 110 -#define ERROR_COST_PER_SKIPPED_TREE 100 -#define ERROR_COST_PER_SKIPPED_LINE 30 -#define ERROR_COST_PER_SKIPPED_CHAR 1 - -#endif diff --git a/parser/src/lexer.c b/parser/src/lexer.c index 208244da..4d387b5b 100644 --- a/parser/src/lexer.c +++ b/parser/src/lexer.c @@ -1,4 +1,4 @@ -#include "./lexer.h" +#include "parser/lexer.h" #include "parser/parser_length.h" #include "./subtree.h" #include diff --git a/parser/src/lexer.h b/parser/src/lexer.h deleted file mode 100644 index 4a29c288..00000000 --- a/parser/src/lexer.h +++ /dev/null @@ -1,60 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* lexer.h :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/04/29 16:17:29 by maiboyer #+# #+# */ -/* Updated: 2024/04/29 16:55:37 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#ifndef TREE_SITTER_LEXER_H_ -#define TREE_SITTER_LEXER_H_ - -#include "parser/parser_length.h" -#include "./parser.h" -#include "./subtree.h" -#include "parser/api.h" - -#include "me/types.h" -#include "parser/types/types_lexer.h" - -#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - -typedef struct s_liblexer -{ - t_lexer data; - t_parse_length current_position; - t_parse_length token_start_position; - t_parse_length token_end_position; - - t_parser_range *included_ranges; - const char *chunk; - t_parse_input input; - t_parse_logger logger; - - t_u32 included_range_count; - t_u32 current_included_range_index; - t_u32 chunk_start; - t_u32 chunk_size; - t_u32 lookahead_size; - bool did_get_column; - - char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; -} t_liblexer; - -void ts_lexer_init(t_liblexer *); -void ts_lexer_delete(t_liblexer *); -void ts_lexer_set_input(t_liblexer *, t_parse_input); -void ts_lexer_reset(t_liblexer *, t_parse_length); -void ts_lexer_start(t_liblexer *); -void ts_lexer_finish(t_liblexer *, t_i32 *); -void ts_lexer_advance_to_end(t_liblexer *); -void ts_lexer_mark_end(t_liblexer *); -bool ts_lexer_set_included_ranges(t_liblexer *self, - const t_parser_range *ranges, t_u32 count); -t_parser_range *ts_lexer_included_ranges(const t_liblexer *self, t_u32 *count); - -#endif // TREE_SITTER_LEXER_H_ diff --git a/parser/src/parser.c b/parser/src/parser.c index 15f1dfb0..3c0e1c91 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -1,8 +1,8 @@ #include "./array.h" -#include "./error_costs.h" +#include "parser/error_costs.h" #include "./language.h" #include "parser/parser_length.h" -#include "./lexer.h" +#include "parser/lexer.h" #include "./reduce_action.h" #include "./reusable_node.h" #include "./stack.h" diff --git a/parser/src/stack.h b/parser/src/stack.h index f3f64bac..c9309303 100644 --- a/parser/src/stack.h +++ b/parser/src/stack.h @@ -7,7 +7,7 @@ extern "C" { #include "./array.h" #include "./subtree.h" -#include "./error_costs.h" +#include "parser/error_costs.h" #include typedef struct Stack Stack; diff --git a/parser/src/subtree.c b/parser/src/subtree.c index e63308f5..577a2bf7 100644 --- a/parser/src/subtree.c +++ b/parser/src/subtree.c @@ -7,7 +7,7 @@ #include "./array.h" -#include "./error_costs.h" +#include "parser/error_costs.h" #include "./language.h" #include "parser/parser_length.h" #include "./subtree.h" diff --git a/parser/src/subtree.h b/parser/src/subtree.h index 1955fa83..c748c5c8 100644 --- a/parser/src/subtree.h +++ b/parser/src/subtree.h @@ -5,7 +5,7 @@ #include "parser/types/types_symbol.h" #include "./array.h" -#include "./error_costs.h" +#include "parser/error_costs.h" #include "parser/parser_length.h" #include "./parser.h" #include "parser/api.h" From 5bec1546aabfa9dc75c00b3300dbf9e8975ffc44 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Tue, 30 Apr 2024 15:06:33 +0200 Subject: [PATCH 04/14] removed src/point.h --- parser/includes/parser_length.h | 2 +- parser/includes/point.h | 21 +++++++++++ parser/includes/point/inline1.h | 50 ++++++++++++++++++++++++++ parser/includes/point/inline2.h | 49 ++++++++++++++++++++++++++ parser/includes/point/inline3.h | 29 +++++++++++++++ parser/src/point.h | 62 --------------------------------- parser/src/tree.c | 4 +-- parser/src/tree_cursor.c | 2 +- 8 files changed, 153 insertions(+), 66 deletions(-) create mode 100644 parser/includes/point.h create mode 100644 parser/includes/point/inline1.h create mode 100644 parser/includes/point/inline2.h create mode 100644 parser/includes/point/inline3.h delete mode 100644 parser/src/point.h diff --git a/parser/includes/parser_length.h b/parser/includes/parser_length.h index 5a9137d8..d47b11de 100644 --- a/parser/includes/parser_length.h +++ b/parser/includes/parser_length.h @@ -1,7 +1,7 @@ #ifndef TREE_SITTER_LENGTH_H_ #define TREE_SITTER_LENGTH_H_ -#include "../src/point.h" +#include "parser/point.h" #include "parser/api.h" #include #include diff --git a/parser/includes/point.h b/parser/includes/point.h new file mode 100644 index 00000000..f315dd73 --- /dev/null +++ b/parser/includes/point.h @@ -0,0 +1,21 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* point.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 14:35:22 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 14:46:18 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef POINT_H +#define POINT_H + +#include "parser/point/inline1.h" +#include "parser/point/inline2.h" +#include "parser/point/inline3.h" + + +#endif /* POINT_H */ diff --git a/parser/includes/point/inline1.h b/parser/includes/point/inline1.h new file mode 100644 index 00000000..746241a8 --- /dev/null +++ b/parser/includes/point/inline1.h @@ -0,0 +1,50 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* inline1.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 14:35:50 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 14:43:49 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef INLINE1_H +#define INLINE1_H + +#include "parser/types/types_point.h" + +static inline t_point point__new(unsigned row, unsigned column) +{ + t_point result = {row, column}; + return result; +} + +static inline t_point point_add(t_point a, t_point b) +{ + if (b.row > 0) + return point__new(a.row + b.row, b.column); + else + return point__new(a.row, a.column + b.column); +} + +static inline t_point point_sub(t_point a, t_point b) +{ + if (a.row > b.row) + return point__new(a.row - b.row, a.column); + else + return point__new(0, a.column - b.column); +} + +static inline bool point_lte(t_point a, t_point b) +{ + return (a.row < b.row) || (a.row == b.row && a.column <= b.column); +} + +static inline bool point_lt(t_point a, t_point b) +{ + return (a.row < b.row) || (a.row == b.row && a.column < b.column); +} + +#endif /* INLINE1_H */ diff --git a/parser/includes/point/inline2.h b/parser/includes/point/inline2.h new file mode 100644 index 00000000..8d0e455e --- /dev/null +++ b/parser/includes/point/inline2.h @@ -0,0 +1,49 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* inline2.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 14:43:58 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 14:44:12 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef INLINE2_H +#define INLINE2_H + +#include "parser/types/types_point.h" + +static inline bool point_gt(t_point a, t_point b) +{ + return (a.row > b.row) || (a.row == b.row && a.column > b.column); +} + +static inline bool point_gte(t_point a, t_point b) +{ + return (a.row > b.row) || (a.row == b.row && a.column >= b.column); +} + +static inline bool point_eq(t_point a, t_point b) +{ + return a.row == b.row && a.column == b.column; +} + +static inline t_point point_min(t_point a, t_point b) +{ + if (a.row < b.row || (a.row == b.row && a.column < b.column)) + return a; + else + return b; +} + +static inline t_point point_max(t_point a, t_point b) +{ + if (a.row > b.row || (a.row == b.row && a.column > b.column)) + return a; + else + return b; +} + +#endif /* INLINE2_H */ diff --git a/parser/includes/point/inline3.h b/parser/includes/point/inline3.h new file mode 100644 index 00000000..5d68736b --- /dev/null +++ b/parser/includes/point/inline3.h @@ -0,0 +1,29 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* inline3.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 14:44:49 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 15:04:39 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef INLINE3_H +#define INLINE3_H + +#include "parser/types/types_point.h" +#include + +static inline t_point point_val_zero(void) +{ + return ((t_point){0, 0}); +} + +static inline t_point point_val_max(void) +{ + return ((t_point){UINT32_MAX, UINT32_MAX}); +} + +#endif /* INLINE3_H */ diff --git a/parser/src/point.h b/parser/src/point.h deleted file mode 100644 index ce1d9ed5..00000000 --- a/parser/src/point.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef TREE_SITTER_POINT_H_ -#define TREE_SITTER_POINT_H_ - -#include "parser/api.h" - -#define POINT_ZERO ((t_point) {0, 0}) -#define POINT_MAX ((t_point) {UINT32_MAX, UINT32_MAX}) - -static inline t_point point__new(unsigned row, unsigned column) { - t_point result = {row, column}; - return result; -} - -static inline t_point point_add(t_point a, t_point b) { - if (b.row > 0) - return point__new(a.row + b.row, b.column); - else - return point__new(a.row, a.column + b.column); -} - -static inline t_point point_sub(t_point a, t_point b) { - if (a.row > b.row) - return point__new(a.row - b.row, a.column); - else - return point__new(0, a.column - b.column); -} - -static inline bool point_lte(t_point a, t_point b) { - return (a.row < b.row) || (a.row == b.row && a.column <= b.column); -} - -static inline bool point_lt(t_point a, t_point b) { - return (a.row < b.row) || (a.row == b.row && a.column < b.column); -} - -static inline bool point_gt(t_point a, t_point b) { - return (a.row > b.row) || (a.row == b.row && a.column > b.column); -} - -static inline bool point_gte(t_point a, t_point b) { - return (a.row > b.row) || (a.row == b.row && a.column >= b.column); -} - -static inline bool point_eq(t_point a, t_point b) { - return a.row == b.row && a.column == b.column; -} - -static inline t_point point_min(t_point a, t_point b) { - if (a.row < b.row || (a.row == b.row && a.column < b.column)) - return a; - else - return b; -} - -static inline t_point point_max(t_point a, t_point b) { - if (a.row > b.row || (a.row == b.row && a.column > b.column)) - return a; - else - return b; -} - -#endif diff --git a/parser/src/tree.c b/parser/src/tree.c index 0b033126..6122ea08 100644 --- a/parser/src/tree.c +++ b/parser/src/tree.c @@ -66,7 +66,7 @@ void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) { ); if (range->end_byte < edit->new_end_byte) { range->end_byte = UINT32_MAX; - range->end_point = POINT_MAX; + range->end_point = point_val_max(); } } } else if (range->end_byte > edit->start_byte) { @@ -81,7 +81,7 @@ void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) { ); if (range->start_byte < edit->new_end_byte) { range->start_byte = UINT32_MAX; - range->start_point = POINT_MAX; + range->start_point = point_val_max(); } } else if (range->start_byte > edit->start_byte) { range->start_byte = edit->start_byte; diff --git a/parser/src/tree_cursor.c b/parser/src/tree_cursor.c index 7f7b129a..e863c961 100644 --- a/parser/src/tree_cursor.c +++ b/parser/src/tree_cursor.c @@ -299,7 +299,7 @@ static inline t_i64 ts_tree_cursor_goto_first_child_for_byte_and_point( } t_i64 ts_tree_cursor_goto_first_child_for_byte(t_parse_tree_cursor *self, t_u32 goal_byte) { - return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); + return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, point_val_zero()); } t_i64 ts_tree_cursor_goto_first_child_for_point(t_parse_tree_cursor *self, t_point goal_point) { From c7fb66c70d010e80b9de7d750976d3d3a1713af0 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Tue, 30 Apr 2024 15:57:07 +0200 Subject: [PATCH 05/14] Update --- gen.list | 3 + input.toml | 8 + output/include/me/vec/vec_reduce_action.h | 58 ++++++ output/src/vec/vec_reduce_action.c | 115 ++++++++++++ output/src/vec/vec_reduce_action_functions2.c | 112 ++++++++++++ output/src/vec/vec_reduce_action_functions3.c | 84 +++++++++ parser/includes/api.h | 8 +- parser/includes/reduce_action.h | 36 ++++ parser/includes/types/types_reduce_action.h | 27 +++ parser/src/language.c | 16 +- parser/src/language.h | 29 ++- parser/src/parser.c | 167 +++++++++--------- parser/src/reduce_action.h | 34 ---- parser/src/subtree.h | 1 - parser/src/tree.h | 38 ++-- 15 files changed, 572 insertions(+), 164 deletions(-) create mode 100644 output/include/me/vec/vec_reduce_action.h create mode 100644 output/src/vec/vec_reduce_action.c create mode 100644 output/src/vec/vec_reduce_action_functions2.c create mode 100644 output/src/vec/vec_reduce_action_functions3.c create mode 100644 parser/includes/reduce_action.h create mode 100644 parser/includes/types/types_reduce_action.h delete mode 100644 parser/src/reduce_action.h diff --git a/gen.list b/gen.list index 82ebe88f..1bca00f9 100644 --- a/gen.list +++ b/gen.list @@ -4,3 +4,6 @@ src/vec/vec_parser_heredoc_functions3.c src/vec/vec_parser_range.c src/vec/vec_parser_range_functions2.c src/vec/vec_parser_range_functions3.c +src/vec/vec_reduce_action.c +src/vec/vec_reduce_action_functions2.c +src/vec/vec_reduce_action_functions3.c diff --git a/input.toml b/input.toml index c2f4d221..8afc11ce 100644 --- a/input.toml +++ b/input.toml @@ -52,3 +52,11 @@ replace.C__TYPENAME__ = "t_heredoc" replace.C__TYPEHEADER__ = '#include "parser/types/types_heredoc.h"' replace.C__PREFIX__ = "parser_heredoc" replace.C__PREFIXUP__ = "PARSER_HEREDOC" + +[[create.vec]] +sources_output = "src/vec/" +headers_output = "include/me/vec/" +replace.C__TYPENAME__ = "t_reduce_action" +replace.C__TYPEHEADER__ = '#include "parser/types/types_reduce_action.h"' +replace.C__PREFIX__ = "reduce_action" +replace.C__PREFIXUP__ = "REDUCE_ACTION" diff --git a/output/include/me/vec/vec_reduce_action.h b/output/include/me/vec/vec_reduce_action.h new file mode 100644 index 00000000..6b6362a0 --- /dev/null +++ b/output/include/me/vec/vec_reduce_action.h @@ -0,0 +1,58 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_reduce_action.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/04 18:46:53 by maiboyer #+# #+# */ +/* Updated: 2023/12/09 17:53:00 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef VEC_REDUCE_ACTION_H +#define VEC_REDUCE_ACTION_H + +#include "parser/types/types_reduce_action.h" +#include "me/types.h" + +typedef bool (*t_vec_reduce_action_sort_fn)(t_reduce_action *, t_reduce_action *); +typedef void (*t_free_reduce_action_item)(t_reduce_action); + +typedef struct s_vec_reduce_action +{ + t_free_reduce_action_item free_func; + t_usize len; + t_usize capacity; + t_reduce_action *buffer; +} t_vec_reduce_action; + +t_vec_reduce_action vec_reduce_action_new(t_usize capacity, + t_free_reduce_action_item free_function); +t_error vec_reduce_action_push(t_vec_reduce_action *vec, t_reduce_action element); +t_error vec_reduce_action_push_front(t_vec_reduce_action *vec, + t_reduce_action element); +t_error vec_reduce_action_pop(t_vec_reduce_action *vec, t_reduce_action *value); +t_error vec_reduce_action_pop_front(t_vec_reduce_action *vec, t_reduce_action *value); +void vec_reduce_action_free(t_vec_reduce_action vec); +t_error vec_reduce_action_reserve(t_vec_reduce_action *vec, + t_usize wanted_capacity); +t_error vec_reduce_action_find(t_vec_reduce_action *vec, + bool (*fn)(const t_reduce_action *), t_usize *index); +t_error vec_reduce_action_find_starting(t_vec_reduce_action *vec, + bool (*fn)(const t_reduce_action *), + t_usize starting_index, t_usize *index); +t_error vec_reduce_action_all(t_vec_reduce_action *vec, + bool (*fn)(const t_reduce_action *), bool *result); +t_error vec_reduce_action_any(t_vec_reduce_action *vec, + bool (*fn)(const t_reduce_action *), bool *result); +void vec_reduce_action_iter(t_vec_reduce_action *vec, + void (*fn)(t_usize index, t_reduce_action *value, + void *state), + void *state); +void vec_reduce_action_reverse(t_vec_reduce_action *vec); +void vec_reduce_action_sort(t_vec_reduce_action *vec, + t_vec_reduce_action_sort_fn is_sorted); +t_error vec_reduce_action_back(t_vec_reduce_action *vec, t_reduce_action **out); + +#endif diff --git a/output/src/vec/vec_reduce_action.c b/output/src/vec/vec_reduce_action.c new file mode 100644 index 00000000..cfbe90d4 --- /dev/null +++ b/output/src/vec/vec_reduce_action.c @@ -0,0 +1,115 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_reduce_action.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/05 18:46:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/09 17:54:11 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem_alloc_array.h" +#include "me/mem/mem_copy.h" +#include "me/mem/mem_set_zero.h" +#include "me/types.h" +#include "me/vec/vec_reduce_action.h" +#include + +t_vec_reduce_action vec_reduce_action_new(t_usize capacity, + t_free_reduce_action_item free_function) +{ + t_vec_reduce_action out; + + out = (t_vec_reduce_action){0}; + out.free_func = free_function; + out.buffer = mem_alloc_array(capacity, sizeof(t_reduce_action)); + if (out.buffer) + out.capacity = capacity; + return (out); +} + +/// Return true in case of an error +t_error vec_reduce_action_push(t_vec_reduce_action *vec, t_reduce_action element) +{ + t_reduce_action *temp_buffer; + size_t new_capacity; + + if (vec == NULL) + return (ERROR); + if (vec->len + 1 > vec->capacity) + { + new_capacity = (vec->capacity * 3) / 2 + 1; + while (vec->len + 1 > new_capacity) + new_capacity = (new_capacity * 3) / 2 + 1; + temp_buffer = mem_alloc_array(new_capacity, sizeof(t_reduce_action)); + if (temp_buffer == NULL) + return (ERROR); + mem_copy(temp_buffer, vec->buffer, vec->len * sizeof(t_reduce_action)); + free(vec->buffer); + vec->buffer = temp_buffer; + vec->capacity = new_capacity; + } + vec->buffer[vec->len] = element; + vec->len += 1; + return (NO_ERROR); +} + +/// Return true in case of an error +t_error vec_reduce_action_reserve(t_vec_reduce_action *vec, t_usize wanted_capacity) +{ + t_reduce_action *temp_buffer; + size_t new_capacity; + + if (vec == NULL) + return (ERROR); + if (wanted_capacity > vec->capacity) + { + new_capacity = (vec->capacity * 3) / 2 + 1; + while (wanted_capacity > new_capacity) + new_capacity = (new_capacity * 3) / 2 + 1; + temp_buffer = mem_alloc_array(new_capacity, sizeof(t_reduce_action)); + if (temp_buffer == NULL) + return (ERROR); + mem_copy(temp_buffer, vec->buffer, vec->len * sizeof(t_reduce_action)); + free(vec->buffer); + vec->buffer = temp_buffer; + vec->capacity = new_capacity; + } + return (NO_ERROR); +} + +/// Return true if the vector is empty +/// This function is safe to call with value being NULL +t_error vec_reduce_action_pop(t_vec_reduce_action *vec, t_reduce_action *value) +{ + t_reduce_action temp_value; + t_reduce_action *ptr; + + if (vec == NULL) + return (ERROR); + ptr = value; + if (vec->len == 0) + return (ERROR); + if (value == NULL) + ptr = &temp_value; + vec->len--; + *ptr = vec->buffer[vec->len]; + mem_set_zero(&vec->buffer[vec->len], sizeof(t_reduce_action)); + return (NO_ERROR); +} + +/// This function is safe to call with `free_elem` being NULL +void vec_reduce_action_free(t_vec_reduce_action vec) +{ + if (vec.free_func) + { + while (vec.len) + { + vec.free_func(vec.buffer[vec.len - 1]); + vec.len--; + } + } + free(vec.buffer); +} diff --git a/output/src/vec/vec_reduce_action_functions2.c b/output/src/vec/vec_reduce_action_functions2.c new file mode 100644 index 00000000..29025453 --- /dev/null +++ b/output/src/vec/vec_reduce_action_functions2.c @@ -0,0 +1,112 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_reduce_action.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem_alloc_array.h" +#include "me/mem/mem_copy.h" +#include "me/mem/mem_set_zero.h" +#include "me/types.h" +#include "me/vec/vec_reduce_action.h" +#include + +t_error vec_reduce_action_find(t_vec_reduce_action *vec, + bool (*fn)(const t_reduce_action *), t_usize *index) +{ + t_usize idx; + + if (vec == NULL || fn == NULL || index == NULL) + return (ERROR); + idx = 0; + while (idx < vec->len) + { + if (fn(&vec->buffer[idx])) + { + *index = idx; + return (NO_ERROR); + } + idx++; + } + return (ERROR); +} + +t_error vec_reduce_action_find_starting(t_vec_reduce_action *vec, + bool (*fn)(const t_reduce_action *), + t_usize starting_index, t_usize *index) +{ + t_usize idx; + + if (vec == NULL || fn == NULL || index == NULL) + return (ERROR); + idx = starting_index; + while (idx < vec->len) + { + if (fn(&vec->buffer[idx])) + { + *index = idx; + return (NO_ERROR); + } + idx++; + } + return (ERROR); +} + +t_error vec_reduce_action_all(t_vec_reduce_action *vec, + bool (*fn)(const t_reduce_action *), bool *result) +{ + t_usize idx; + + if (vec == NULL || fn == NULL || result == NULL) + return (ERROR); + idx = 0; + *result = true; + while (*result && idx < vec->len) + { + if (!fn(&vec->buffer[idx])) + *result = false; + idx++; + } + return (ERROR); +} + +t_error vec_reduce_action_any(t_vec_reduce_action *vec, + bool (*fn)(const t_reduce_action *), bool *result) +{ + t_usize idx; + + if (vec == NULL || fn == NULL || result == NULL) + return (ERROR); + idx = 0; + *result = false; + while (*result && idx < vec->len) + { + if (fn(&vec->buffer[idx])) + *result = true; + idx++; + } + return (ERROR); +} + +void vec_reduce_action_iter(t_vec_reduce_action *vec, + void (*fn)(t_usize index, t_reduce_action *value, + void *state), + void *state) +{ + t_usize idx; + + if (vec == NULL || fn == NULL) + return; + idx = 0; + while (idx < vec->len) + { + fn(idx, &vec->buffer[idx], state); + idx++; + } +} diff --git a/output/src/vec/vec_reduce_action_functions3.c b/output/src/vec/vec_reduce_action_functions3.c new file mode 100644 index 00000000..2dc18ad3 --- /dev/null +++ b/output/src/vec/vec_reduce_action_functions3.c @@ -0,0 +1,84 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_reduce_action.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem_alloc_array.h" +#include "me/mem/mem_copy.h" +#include "me/mem/mem_set_zero.h" +#include "me/types.h" +#include "me/vec/vec_reduce_action.h" +#include + +t_error vec_reduce_action_push_front(t_vec_reduce_action *vec, + t_reduce_action element) +{ + t_usize i; + + if (vec->len == 0) + return (vec_reduce_action_push(vec, element)); + i = vec->len - 1; + if (vec->capacity < vec->len + 1 && + vec_reduce_action_reserve(vec, 3 * vec->len / 2 + 1)) + return (ERROR); + while (i > 0) + { + vec->buffer[i + 1] = vec->buffer[i]; + i--; + } + vec->buffer[1] = vec->buffer[0]; + vec->buffer[0] = element; + vec->len++; + return (NO_ERROR); +} + +t_error vec_reduce_action_pop_front(t_vec_reduce_action *vec, t_reduce_action *value) +{ + t_usize i; + + if (vec->len <= 1) + return (vec_reduce_action_pop(vec, value)); + i = 0; + *value = vec->buffer[0]; + vec->len--; + while (i < vec->len) + { + vec->buffer[i] = vec->buffer[i + 1]; + i++; + } + mem_set_zero(&vec->buffer[i], sizeof(*vec->buffer)); + return (NO_ERROR); +} + +void vec_reduce_action_reverse(t_vec_reduce_action *vec) +{ + t_reduce_action temporary; + t_usize i; + + i = 0; + while (i < vec->len / 2) + { + temporary = vec->buffer[vec->len - 1 - i]; + vec->buffer[vec->len - 1 - i] = vec->buffer[i]; + vec->buffer[i] = temporary; + i++; + } +} + +t_error vec_reduce_action_back(t_vec_reduce_action *vec, t_reduce_action **out) +{ + t_reduce_action *temporary; + + if (out == NULL) + out = &temporary; + if (vec->len != 0) + return (*out = &vec->buffer[vec->len - 1], true); + return (false); +} diff --git a/parser/includes/api.h b/parser/includes/api.h index fdfd61b6..712c57fb 100644 --- a/parser/includes/api.h +++ b/parser/includes/api.h @@ -33,10 +33,10 @@ /*******************/ typedef struct s_parser t_parser; -typedef struct t_parse_tree t_parse_tree; -typedef struct t_query t_query; -typedef struct t_query_cursor t_query_cursor; -typedef struct t_lookahead_iterator t_lookahead_iterator; +typedef struct s_parse_tree t_parse_tree; +typedef struct s_query t_query; +typedef struct s_query_cursor t_query_cursor; +typedef struct s_lookahead_iterator t_lookahead_iterator; typedef enum t_input_encoding { diff --git a/parser/includes/reduce_action.h b/parser/includes/reduce_action.h new file mode 100644 index 00000000..53295342 --- /dev/null +++ b/parser/includes/reduce_action.h @@ -0,0 +1,36 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* reduce_action.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 15:25:12 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 15:25:38 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef REDUCE_ACTION_H +#define REDUCE_ACTION_H + +#include "me/types.h" +#include "me/vec/vec_reduce_action.h" +#include "parser/api.h" +#include "parser/types/types_reduce_action.h" + +static inline void ts_reduce_action_set_add(t_vec_reduce_action *self, + t_reduce_action new_action) +{ + t_reduce_action action; + + for (t_u32 i = 0; i < self->len; i++) + { + action = self->buffer[i]; + if (action.symbol == new_action.symbol && + action.count == new_action.count) + return; + } + vec_reduce_action_push(self, new_action); +} + +#endif /* REDUCE_ACTION_H */ diff --git a/parser/includes/types/types_reduce_action.h b/parser/includes/types/types_reduce_action.h new file mode 100644 index 00000000..6c98455e --- /dev/null +++ b/parser/includes/types/types_reduce_action.h @@ -0,0 +1,27 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* types_reduce_action.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/04/30 15:21:59 by maiboyer #+# #+# */ +/* Updated: 2024/04/30 15:22:18 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef TYPES_REDUCE_ACTION_H +#define TYPES_REDUCE_ACTION_H + +#include "me/types.h" +#include "parser/types/types_symbol.h" + +typedef struct s_reduce_action +{ + t_u32 count; + t_symbol symbol; + t_i32 dynamic_precedence; + t_u16 production_id; +} t_reduce_action; + +#endif /* TYPES_REDUCE_ACTION_H */ diff --git a/parser/src/language.c b/parser/src/language.c index 3f5acb60..c08707c4 100644 --- a/parser/src/language.c +++ b/parser/src/language.c @@ -30,7 +30,7 @@ void ts_language_table_entry( const t_language *self, t_state_id state, t_symbol symbol, - TableEntry *result + t_table_entry *result ) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { result->action_count = 0; @@ -171,7 +171,7 @@ t_field_id ts_language_field_id_for_name( t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { if (state >= self->state_count) return NULL; - LookaheadIterator *iterator = malloc(sizeof(LookaheadIterator)); + t_lookahead_iterator *iterator = malloc(sizeof(t_lookahead_iterator)); *iterator = ts_language_lookaheads(self, state); return (t_lookahead_iterator *)iterator; } @@ -181,35 +181,35 @@ void ts_lookahead_iterator_delete(t_lookahead_iterator *self) { } bool ts_lookahead_iterator_reset_state(t_lookahead_iterator * self, t_state_id state) { - LookaheadIterator *iterator = (LookaheadIterator *)self; + t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; if (state >= iterator->language->state_count) return false; *iterator = ts_language_lookaheads(iterator->language, state); return true; } const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; + const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; return iterator->language; } bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state) { if (state >= language->state_count) return false; - LookaheadIterator *iterator = (LookaheadIterator *)self; + t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; *iterator = ts_language_lookaheads(language, state); return true; } bool ts_lookahead_iterator_next(t_lookahead_iterator *self) { - LookaheadIterator *iterator = (LookaheadIterator *)self; + t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; return ts_lookahead_iterator__next(iterator); } t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; + const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; return iterator->symbol; } const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; + const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; return ts_language_symbol_name(iterator->language, iterator->symbol); } diff --git a/parser/src/language.h b/parser/src/language.h index eb7edcfa..a26c9f6c 100644 --- a/parser/src/language.h +++ b/parser/src/language.h @@ -1,7 +1,6 @@ #ifndef TREE_SITTER_LANGUAGE_H_ #define TREE_SITTER_LANGUAGE_H_ -#include "./parser.h" #include "./subtree.h" #include "parser/types/types_parse_action_type.h" #include "parser/types/types_state_id.h" @@ -12,14 +11,14 @@ #define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 #define LANGUAGE_VERSION_USABLE_VIA_WASM 13 -typedef struct +typedef struct s_table_entry { const t_parse_actions *actions; t_u32 action_count; bool is_reusable; -} TableEntry; +} t_table_entry; -typedef struct +typedef struct s_lookahead_iterator { const t_language *language; const t_u16 *data; @@ -34,17 +33,17 @@ typedef struct t_symbol symbol; t_state_id next_state; t_u16 action_count; -} LookaheadIterator; +} t_lookahead_iterator; void ts_language_table_entry(const t_language *, t_state_id, t_symbol, - TableEntry *); + t_table_entry *); t_symbol_metadata ts_language_symbol_metadata(const t_language *, t_symbol); t_symbol ts_language_public_symbol(const t_language *, t_symbol); t_state_id ts_language_next_state(const t_language *self, t_state_id state, - t_symbol symbol); + t_symbol symbol); static inline bool ts_language_is_symbol_external(const t_language *self, t_symbol symbol) @@ -54,10 +53,10 @@ static inline bool ts_language_is_symbol_external(const t_language *self, static inline const t_parse_actions *ts_language_actions(const t_language *self, t_state_id state, - t_symbol symbol, - t_u32 *count) + t_symbol symbol, + t_u32 *count) { - TableEntry entry; + t_table_entry entry; ts_language_table_entry(self, state, symbol, &entry); *count = entry.action_count; return entry.actions; @@ -67,7 +66,7 @@ static inline bool ts_language_has_reduce_action(const t_language *self, t_state_id state, t_symbol symbol) { - TableEntry entry; + t_table_entry entry; ts_language_table_entry(self, state, symbol, &entry); return entry.action_count > 0 && entry.actions[0].type == ActionTypeReduce; } @@ -118,8 +117,8 @@ static inline bool ts_language_has_actions(const t_language *self, // all possible symbols and checking the parse table for each one. // For 'small' parse states, this exploits the structure of the // table to only visit the valid symbols. -static inline LookaheadIterator ts_language_lookaheads(const t_language *self, - t_state_id state) +static inline t_lookahead_iterator ts_language_lookaheads( + const t_language *self, t_state_id state) { bool is_small_state = state >= self->large_state_count; const t_u16 *data; @@ -137,7 +136,7 @@ static inline LookaheadIterator ts_language_lookaheads(const t_language *self, { data = &self->parse_table[state * self->symbol_count] - 1; } - return (LookaheadIterator){ + return (t_lookahead_iterator){ .language = self, .data = data, .group_end = group_end, @@ -148,7 +147,7 @@ static inline LookaheadIterator ts_language_lookaheads(const t_language *self, }; } -static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) +static inline bool ts_lookahead_iterator__next(t_lookahead_iterator *self) { // For small parse states, valid symbols are listed explicitly, // grouped by their value. There's no need to look up the actions diff --git a/parser/src/parser.c b/parser/src/parser.c index 3c0e1c91..05eab7ef 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -1,15 +1,9 @@ -#include "./array.h" -#include "parser/error_costs.h" #include "./language.h" -#include "parser/parser_length.h" -#include "parser/lexer.h" -#include "./reduce_action.h" #include "./reusable_node.h" #include "./stack.h" #include "./subtree.h" #include "./tree.h" -#include "parser/api.h" #include #include #include @@ -18,6 +12,12 @@ #include #include "me/vec/vec_parser_range.h" +#include "me/vec/vec_reduce_action.h" +#include "parser/api.h" +#include "parser/error_costs.h" +#include "parser/lexer.h" +#include "parser/parser_length.h" +#include "parser/reduce_action.h" #include "parser/types/types_language.h" typedef t_u64 t_duration; @@ -117,7 +117,7 @@ typedef struct s_parser SubtreePool tree_pool; const t_language *language; void *wasm_store; - ReduceActionSet reduce_actions; + t_vec_reduce_action reduce_actions; Subtree finished_tree; SubtreeArray trailing_extras; SubtreeArray trailing_extras2; @@ -157,13 +157,13 @@ typedef enum e_error_comparison typedef struct s_string_input { const char *string; - t_u32 length; + t_u32 length; } t_string_input; // StringInput -static const char *ts_string_inpt_read(void *_self, t_u32 byte, - t_point point, t_u32 *length) +static const char *ts_string_inpt_read(void *_self, t_u32 byte, t_point point, + t_u32 *length) { (void)point; t_string_input *self = (t_string_input *)_self; @@ -219,7 +219,7 @@ static bool ts_parser__breakdown_top_of_stack(t_parser *self, for (t_u32 i = 0; i < pop.size; i++) { StackSlice slice = pop.contents[i]; - t_state_id state = ts_stack_state(self->stack, slice.version); + t_state_id state = ts_stack_state(self->stack, slice.version); Subtree parent = *array_front(&slice.subtrees); for (t_u32 j = 0, n = ts_subtree_child_count(parent); j < n; j++) @@ -367,7 +367,7 @@ static bool ts_parser__better_version_exists(t_parser *self, return true; } - t_parse_length position = ts_stack_position(self->stack, version); + t_parse_length position = ts_stack_position(self->stack, version); t_error_status status = { .cost = cost, .is_in_error = is_in_error, @@ -440,7 +440,7 @@ static void ts_parser__external_scanner_deserialize(t_parser *self, Subtree external_token) { const char *data = NULL; - t_u32 length = 0; + t_u32 length = 0; if (external_token.ptr) { data = ts_external_scanner_state_data( @@ -452,7 +452,7 @@ static void ts_parser__external_scanner_deserialize(t_parser *self, data, length); } -static bool ts_parser__external_scanner_scan(t_parser *self, +static bool ts_parser__external_scanner_scan(t_parser *self, t_state_id external_lex_state) { const bool *valid_external_tokens = @@ -463,12 +463,12 @@ static bool ts_parser__external_scanner_scan(t_parser *self, } static bool ts_parser__can_reuse_first_leaf(t_parser *self, t_state_id state, - Subtree tree, - TableEntry *table_entry) + Subtree tree, + t_table_entry *table_entry) { t_lex_modes current_lex_mode = self->language->lex_modes[state]; - t_symbol leaf_symbol = ts_subtree_leaf_symbol(tree); - t_state_id leaf_state = ts_subtree_leaf_parse_state(tree); + t_symbol leaf_symbol = ts_subtree_leaf_symbol(tree); + t_state_id leaf_state = ts_subtree_leaf_parse_state(tree); t_lex_modes leaf_lex_mode = self->language->lex_modes[leaf_state]; // At the end of a non-terminal extra node, the lexer normally returns @@ -506,25 +506,26 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, return NULL_SUBTREE; } - const t_parse_length start_position = ts_stack_position(self->stack, version); + const t_parse_length start_position = + ts_stack_position(self->stack, version); const Subtree external_token = ts_stack_last_external_token(self->stack, version); - bool found_external_token = false; - bool error_mode = parse_state == ERROR_STATE; - bool skipped_error = false; - bool called_get_column = false; - t_i32 first_error_character = 0; - t_parse_length error_start_position = length_zero(); - t_parse_length error_end_position = length_zero(); - t_i32 lookahead_end_byte = 0; - t_i32 external_scanner_state_len = 0; - bool external_scanner_state_changed = false; + bool found_external_token = false; + bool error_mode = parse_state == ERROR_STATE; + bool skipped_error = false; + bool called_get_column = false; + t_i32 first_error_character = 0; + t_parse_length error_start_position = length_zero(); + t_parse_length error_end_position = length_zero(); + t_i32 lookahead_end_byte = 0; + t_i32 external_scanner_state_len = 0; + bool external_scanner_state_changed = false; ts_lexer_reset(&self->lexer, start_position); for (;;) { - bool found_token = false; + bool found_token = false; t_parse_length current_position = self->lexer.current_position; if (lex_mode.external_lex_state != 0) @@ -626,23 +627,24 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, Subtree result; if (skipped_error) { - t_parse_length padding = length_sub(error_start_position, start_position); - t_parse_length size = length_sub(error_end_position, error_start_position); - t_u32 lookahead_bytes = - lookahead_end_byte - error_end_position.bytes; + t_parse_length padding = + length_sub(error_start_position, start_position); + t_parse_length size = + length_sub(error_end_position, error_start_position); + t_u32 lookahead_bytes = lookahead_end_byte - error_end_position.bytes; result = ts_subtree_new_error(&self->tree_pool, first_error_character, padding, size, lookahead_bytes, parse_state, self->language); } else { - bool is_keyword = false; - t_symbol symbol = self->lexer.data.result_symbol; - t_parse_length padding = + bool is_keyword = false; + t_symbol symbol = self->lexer.data.result_symbol; + t_parse_length padding = length_sub(self->lexer.token_start_position, start_position); - t_parse_length size = length_sub(self->lexer.token_end_position, - self->lexer.token_start_position); - t_u32 lookahead_bytes = + t_parse_length size = length_sub(self->lexer.token_end_position, + self->lexer.token_start_position); + t_u32 lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; if (found_external_token) @@ -688,9 +690,9 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, } static Subtree ts_parser__get_cached_token(t_parser *self, t_state_id state, - size_t position, - Subtree last_external_token, - TableEntry *table_entry) + size_t position, + Subtree last_external_token, + t_table_entry *table_entry) { t_token_cache *cache = &self->token_cache; if (cache->token.ptr && cache->byte_index == position && @@ -729,8 +731,8 @@ static void ts_parser__set_cached_token(t_parser *self, t_u32 byte_index, static Subtree ts_parser__reuse_node(t_parser *self, StackVersion version, t_state_id *state, t_u32 position, - Subtree last_external_token, - TableEntry *table_entry) + Subtree last_external_token, + t_table_entry *table_entry) { Subtree result; while ((result = reusable_node_tree(&self->reusable_node)).ptr) @@ -823,7 +825,8 @@ static Subtree ts_parser__reuse_node(t_parser *self, StackVersion version, // The decision is based on the trees' error costs (if any), their dynamic // precedence, and finally, as a default, by a recursive comparison of the // trees' symbols. -static bool ts_parser__select_parse_tree(t_parser *self, Subtree left, Subtree right) +static bool ts_parser__select_parse_tree(t_parser *self, Subtree left, + Subtree right) { if (!left.ptr) return true; @@ -901,7 +904,7 @@ static bool ts_parser__select_children(t_parser *self, Subtree left, ts_subtree_symbol(left), &self->scratch_trees, 0, self->language); return ts_parser__select_parse_tree(self, left, - ts_subtree_from_mut(scratch_tree)); + ts_subtree_from_mut(scratch_tree)); } static void ts_parser__shift(t_parser *self, StackVersion version, @@ -928,7 +931,7 @@ static void ts_parser__shift(t_parser *self, StackVersion version, static StackVersion ts_parser__reduce(t_parser *self, StackVersion version, t_symbol symbol, t_u32 count, - int dynamic_precedence, + int dynamic_precedence, t_u16 production_id, bool is_fragile, bool end_of_non_terminal_extra) { @@ -940,7 +943,7 @@ static StackVersion ts_parser__reduce(t_parser *self, StackVersion version, // contain the popped children, and push it onto the stack in place of the // children. StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); - t_u32 removed_version_count = 0; + t_u32 removed_version_count = 0; for (t_u32 i = 0; i < pop.size; i++) { StackSlice slice = pop.contents[i]; @@ -1074,7 +1077,7 @@ static void ts_parser__accept(t_parser *self, StackVersion version, if (!ts_subtree_extra(tree)) { assert(!tree.data.is_inline); - t_u32 child_count = ts_subtree_child_count(tree); + t_u32 child_count = ts_subtree_child_count(tree); const Subtree *children = ts_subtree_children(tree); for (t_u32 k = 0; k < child_count; k++) { @@ -1140,8 +1143,8 @@ static bool ts_parser__do_all_potential_reductions( continue; t_state_id state = ts_stack_state(self->stack, version); - bool has_shift_action = false; - array_clear(&self->reduce_actions); + bool has_shift_action = false; + self->reduce_actions.len = 0; t_symbol first_symbol, end_symbol; if (lookahead_symbol != 0) @@ -1157,7 +1160,7 @@ static bool ts_parser__do_all_potential_reductions( for (t_symbol symbol = first_symbol; symbol < end_symbol; symbol++) { - TableEntry entry; + t_table_entry entry; ts_language_table_entry(self->language, state, symbol, &entry); for (t_u32 j = 0; j < entry.action_count; j++) { @@ -1173,7 +1176,7 @@ static bool ts_parser__do_all_potential_reductions( if (action.reduce.child_count > 0) ts_reduce_action_set_add( &self->reduce_actions, - (ReduceAction){ + (t_reduce_action){ .symbol = action.reduce.symbol, .count = action.reduce.child_count, .dynamic_precedence = @@ -1188,9 +1191,9 @@ static bool ts_parser__do_all_potential_reductions( } StackVersion reduction_version = STACK_VERSION_NONE; - for (t_u32 j = 0; j < self->reduce_actions.size; j++) + for (t_u32 j = 0; j < self->reduce_actions.len; j++) { - ReduceAction action = self->reduce_actions.contents[j]; + t_reduce_action action = self->reduce_actions.buffer[j]; reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, @@ -1255,8 +1258,8 @@ static bool ts_parser__recover_to_state(t_parser *self, StackVersion version, if (error_trees.size > 0) { assert(error_trees.size == 1); - Subtree error_tree = error_trees.contents[0]; - t_u32 error_child_count = ts_subtree_child_count(error_tree); + Subtree error_tree = error_trees.contents[0]; + t_u32 error_child_count = ts_subtree_child_count(error_tree); if (error_child_count > 0) { array_splice(&slice.subtrees, 0, 0, error_child_count, @@ -1298,11 +1301,11 @@ static bool ts_parser__recover_to_state(t_parser *self, StackVersion version, static void ts_parser__recover(t_parser *self, StackVersion version, Subtree lookahead) { - bool did_recover = false; - unsigned previous_version_count = ts_stack_version_count(self->stack); - t_parse_length position = ts_stack_position(self->stack, version); - StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned node_count_since_error = + bool did_recover = false; + unsigned previous_version_count = ts_stack_version_count(self->stack); + t_parse_length position = ts_stack_position(self->stack, version); + StackSummary *summary = ts_stack_get_summary(self->stack, version); + unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); unsigned current_error_cost = ts_stack_error_cost(self->stack, version); @@ -1440,7 +1443,7 @@ static void ts_parser__recover(t_parser *self, StackVersion version, // If the current lookahead token is an extra token, mark it as extra. This // means it won't be counted in error cost calculations. - unsigned n; + unsigned n; const t_parse_actions *actions = ts_language_actions( self->language, 1, ts_subtree_symbol(lookahead), &n); if (n > 0 && actions[n - 1].type == ActionTypeShift && @@ -1514,8 +1517,8 @@ static void ts_parser__handle_error(t_parser *self, StackVersion version, // lookahead. After skipping one or more invalid tokens, the parser might // find a token that would have allowed a reduction to take place. ts_parser__do_all_potential_reductions(self, version, 0); - t_u32 version_count = ts_stack_version_count(self->stack); - t_parse_length position = ts_stack_position(self->stack, version); + t_u32 version_count = ts_stack_version_count(self->stack); + t_parse_length position = ts_stack_position(self->stack, version); // Push a discontinuity onto the stack. Merge all of the stack versions that // were created in the previous step. @@ -1610,13 +1613,13 @@ static bool ts_parser__advance(t_parser *self, StackVersion version, bool allow_node_reuse) { t_state_id state = ts_stack_state(self->stack, version); - t_u32 position = ts_stack_position(self->stack, version).bytes; - Subtree last_external_token = + t_u32 position = ts_stack_position(self->stack, version).bytes; + Subtree last_external_token = ts_stack_last_external_token(self->stack, version); - bool did_reuse = true; - Subtree lookahead = NULL_SUBTREE; - TableEntry table_entry = {.action_count = 0}; + bool did_reuse = true; + Subtree lookahead = NULL_SUBTREE; + t_table_entry table_entry = {.action_count = 0}; // If possible, reuse a node from the previous syntax tree. if (allow_node_reuse) @@ -1980,8 +1983,7 @@ t_parser *ts_parser_new(void) { t_parser *self = calloc(1, sizeof(t_parser)); ts_lexer_init(&self->lexer); - array_init(&self->reduce_actions); - array_reserve(&self->reduce_actions, 4); + self->reduce_actions = vec_reduce_action_new(4, NULL); self->tree_pool = ts_subtree_pool_new(32); self->stack = ts_stack_new(&self->tree_pool); self->finished_tree = NULL_SUBTREE; @@ -2008,7 +2010,7 @@ void ts_parser_delete(t_parser *self) ts_parser_set_language(self, NULL); ts_stack_delete(self->stack); - if (self->reduce_actions.contents) + if (self->reduce_actions.buffer) { array_delete(&self->reduce_actions); } @@ -2113,7 +2115,7 @@ bool ts_parser_set_included_ranges(t_parser *self, const t_parser_range *ranges, } const t_parser_range *ts_parser_included_ranges(const t_parser *self, - t_u32 *count) + t_u32 *count) { return ts_lexer_included_ranges(&self->lexer, count); } @@ -2141,7 +2143,8 @@ void ts_parser_reset(t_parser *self) self->has_scanner_error = false; } -t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, t_parse_input input) +t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, + t_parse_input input) { t_parse_tree *result = NULL; old_tree = NULL; @@ -2254,16 +2257,18 @@ exit: return result; } -t_parse_tree *ts_parser_parse_string(t_parser *self, const t_parse_tree *old_tree, - const char *string, t_u32 length) +t_parse_tree *ts_parser_parse_string(t_parser *self, + const t_parse_tree *old_tree, + const char *string, t_u32 length) { return ts_parser_parse_string_encoding(self, old_tree, string, length, InputEncoding8); } -t_parse_tree *ts_parser_parse_string_encoding(t_parser *self, const t_parse_tree *old_tree, - const char *string, t_u32 length, - t_input_encoding encoding) +t_parse_tree *ts_parser_parse_string_encoding(t_parser *self, + const t_parse_tree *old_tree, + const char *string, t_u32 length, + t_input_encoding encoding) { t_string_input input = {string, length}; return ts_parser_parse(self, old_tree, diff --git a/parser/src/reduce_action.h b/parser/src/reduce_action.h deleted file mode 100644 index 0cdb1e52..00000000 --- a/parser/src/reduce_action.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef TREE_SITTER_REDUCE_ACTION_H_ -#define TREE_SITTER_REDUCE_ACTION_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./array.h" -#include "parser/api.h" - -typedef struct { - t_u32 count; - t_symbol symbol; - int dynamic_precedence; - unsigned short production_id; -} ReduceAction; - -typedef Array(ReduceAction) ReduceActionSet; - -static inline void ts_reduce_action_set_add(ReduceActionSet *self, - ReduceAction new_action) { - for (t_u32 i = 0; i < self->size; i++) { - ReduceAction action = self->contents[i]; - if (action.symbol == new_action.symbol && action.count == new_action.count) - return; - } - array_push(self, new_action); -} - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_REDUCE_ACTION_H_ diff --git a/parser/src/subtree.h b/parser/src/subtree.h index c748c5c8..3fbe3eb1 100644 --- a/parser/src/subtree.h +++ b/parser/src/subtree.h @@ -7,7 +7,6 @@ #include "./array.h" #include "parser/error_costs.h" #include "parser/parser_length.h" -#include "./parser.h" #include "parser/api.h" #include #include diff --git a/parser/src/tree.h b/parser/src/tree.h index e13c7d41..360974f0 100644 --- a/parser/src/tree.h +++ b/parser/src/tree.h @@ -3,29 +3,25 @@ #include "./subtree.h" -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - const Subtree *child; - const Subtree *parent; - t_parse_length position; - t_symbol alias_symbol; +typedef struct +{ + const Subtree *child; + const Subtree *parent; + t_parse_length position; + t_symbol alias_symbol; } ParentCacheEntry; -struct t_parse_tree { - Subtree root; - const t_language *language; - t_parser_range *included_ranges; - unsigned included_range_count; +struct s_parse_tree +{ + Subtree root; + const t_language *language; + t_parser_range *included_ranges; + t_u32 included_range_count; }; -t_parse_tree *ts_tree_new(Subtree root, const t_language *language, const t_parser_range *, unsigned); -t_parse_node ts_node_new(const t_parse_tree *, const Subtree *, t_parse_length, t_symbol); +t_parse_tree *ts_tree_new(Subtree root, const t_language *language, + const t_parser_range *, t_u32); +t_parse_node ts_node_new(const t_parse_tree *, const Subtree *, t_parse_length, + t_symbol); -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_TREE_H_ +#endif // TREE_SITTER_TREE_H_ From 126bd817129b5432c9bfd6182cdc6f86da9956a8 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Tue, 30 Apr 2024 16:09:01 +0200 Subject: [PATCH 06/14] fixed some stuff --- parser/src/parser.c | 67 +-------------------------------------------- sources/ft_exit.c | 12 ++------ sources/main.c | 4 +-- 3 files changed, 4 insertions(+), 79 deletions(-) diff --git a/parser/src/parser.c b/parser/src/parser.c index 2201d321..87c2e594 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -367,11 +367,7 @@ static bool ts_parser__better_version_exists(t_parser *self, return true; } -<<<<<<< HEAD t_parse_length position = ts_stack_position(self->stack, version); -======= - t_parse_length position = ts_stack_position(self->stack, version); ->>>>>>> master t_error_status status = { .cost = cost, .is_in_error = is_in_error, @@ -510,7 +506,6 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, return NULL_SUBTREE; } -<<<<<<< HEAD const t_parse_length start_position = ts_stack_position(self->stack, version); const Subtree external_token = @@ -526,31 +521,11 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, t_i32 lookahead_end_byte = 0; t_i32 external_scanner_state_len = 0; bool external_scanner_state_changed = false; -======= - const t_parse_length start_position = ts_stack_position(self->stack, version); - const Subtree external_token = - ts_stack_last_external_token(self->stack, version); - - bool found_external_token = false; - bool error_mode = parse_state == ERROR_STATE; - bool skipped_error = false; - bool called_get_column = false; - t_i32 first_error_character = 0; - t_parse_length error_start_position = length_zero(); - t_parse_length error_end_position = length_zero(); - t_i32 lookahead_end_byte = 0; - t_i32 external_scanner_state_len = 0; - bool external_scanner_state_changed = false; ->>>>>>> master + bool found_token; ts_lexer_reset(&self->lexer, start_position); for (;;) { -<<<<<<< HEAD - bool found_token = false; -======= - bool found_token = false; ->>>>>>> master t_parse_length current_position = self->lexer.current_position; if (lex_mode.external_lex_state != 0) @@ -652,25 +627,17 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, Subtree result; if (skipped_error) { -<<<<<<< HEAD t_parse_length padding = length_sub(error_start_position, start_position); t_parse_length size = length_sub(error_end_position, error_start_position); t_u32 lookahead_bytes = lookahead_end_byte - error_end_position.bytes; -======= - t_parse_length padding = length_sub(error_start_position, start_position); - t_parse_length size = length_sub(error_end_position, error_start_position); - t_u32 lookahead_bytes = - lookahead_end_byte - error_end_position.bytes; ->>>>>>> master result = ts_subtree_new_error(&self->tree_pool, first_error_character, padding, size, lookahead_bytes, parse_state, self->language); } else { -<<<<<<< HEAD bool is_keyword = false; t_symbol symbol = self->lexer.data.result_symbol; t_parse_length padding = @@ -678,15 +645,6 @@ static Subtree ts_parser__lex(t_parser *self, StackVersion version, t_parse_length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); t_u32 lookahead_bytes = -======= - bool is_keyword = false; - t_symbol symbol = self->lexer.data.result_symbol; - t_parse_length padding = - length_sub(self->lexer.token_start_position, start_position); - t_parse_length size = length_sub(self->lexer.token_end_position, - self->lexer.token_start_position); - t_u32 lookahead_bytes = ->>>>>>> master lookahead_end_byte - self->lexer.token_end_position.bytes; if (found_external_token) @@ -1343,19 +1301,11 @@ static bool ts_parser__recover_to_state(t_parser *self, StackVersion version, static void ts_parser__recover(t_parser *self, StackVersion version, Subtree lookahead) { -<<<<<<< HEAD bool did_recover = false; unsigned previous_version_count = ts_stack_version_count(self->stack); t_parse_length position = ts_stack_position(self->stack, version); StackSummary *summary = ts_stack_get_summary(self->stack, version); unsigned node_count_since_error = -======= - bool did_recover = false; - unsigned previous_version_count = ts_stack_version_count(self->stack); - t_parse_length position = ts_stack_position(self->stack, version); - StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned node_count_since_error = ->>>>>>> master ts_stack_node_count_since_error(self->stack, version); unsigned current_error_cost = ts_stack_error_cost(self->stack, version); @@ -1567,13 +1517,8 @@ static void ts_parser__handle_error(t_parser *self, StackVersion version, // lookahead. After skipping one or more invalid tokens, the parser might // find a token that would have allowed a reduction to take place. ts_parser__do_all_potential_reductions(self, version, 0); -<<<<<<< HEAD t_u32 version_count = ts_stack_version_count(self->stack); t_parse_length position = ts_stack_position(self->stack, version); -======= - t_u32 version_count = ts_stack_version_count(self->stack); - t_parse_length position = ts_stack_position(self->stack, version); ->>>>>>> master // Push a discontinuity onto the stack. Merge all of the stack versions that // were created in the previous step. @@ -2198,12 +2143,8 @@ void ts_parser_reset(t_parser *self) self->has_scanner_error = false; } -<<<<<<< HEAD t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, t_parse_input input) -======= -t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, t_parse_input input) ->>>>>>> master { t_parse_tree *result = NULL; old_tree = NULL; @@ -2324,16 +2265,10 @@ t_parse_tree *ts_parser_parse_string(t_parser *self, InputEncoding8); } -<<<<<<< HEAD t_parse_tree *ts_parser_parse_string_encoding(t_parser *self, const t_parse_tree *old_tree, const char *string, t_u32 length, t_input_encoding encoding) -======= -t_parse_tree *ts_parser_parse_string_encoding(t_parser *self, const t_parse_tree *old_tree, - const char *string, t_u32 length, - t_input_encoding encoding) ->>>>>>> master { t_string_input input = {string, length}; return ts_parser_parse(self, old_tree, diff --git a/sources/ft_exit.c b/sources/ft_exit.c index 3a6518a8..44d844c8 100644 --- a/sources/ft_exit.c +++ b/sources/ft_exit.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/29 11:35:51 by rparodi #+# #+# */ -/* Updated: 2024/04/13 20:15:37 by rparodi ### ########.fr */ +/* Updated: 2024/04/30 16:07:48 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -34,15 +34,7 @@ void ft_free_strs(t_str *strs) void ft_free_utils(t_utils *s) { - if (s->name_shell) - ft_free(s->name_shell); - if (s->str_input) - ft_free(s->str_input); - if (s->strs_input) - ft_free_strs(s->strs_input); - if (s->path) - ft_free_strs(s->path); - free(s); + (void)(s); } void ft_exit(t_utils *maiboyerlpb, t_u8 exit_status) diff --git a/sources/main.c b/sources/main.c index 26c24532..481c945d 100644 --- a/sources/main.c +++ b/sources/main.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */ -/* Updated: 2024/04/30 13:02:39 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 16:06:00 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -46,9 +46,7 @@ void ft_take_args(t_utils *shcat) shcat->strs_input = ft_split(shcat->str_input, ' '); if (!shcat->strs_input) exit(1); - ft_check(shcat, shcat->strs_input); add_history(shcat->str_input); - ft_free_strs(shcat->strs_input); free(shcat->str_input); i++; } From 91e2c5227096977b50508046069635c7ad6a0f4a Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Tue, 30 Apr 2024 17:37:59 +0200 Subject: [PATCH 07/14] make things work normally --- includes/app/node.h | 2 +- includes/minishell.h | 2 +- parser/Filelist.mk | 11 +- parser/Makefile | 5 +- parser/create_language.c | 3 +- parser/includes/lexer.h | 2 +- parser/includes/parser_length.h | 2 +- parser/includes/reduce_action.h | 2 +- parser/src/alloc.c | 48 + parser/src/alloc.h | 41 + parser/src/api.h | 1273 ++++++++++ parser/src/array.h | 372 ++- parser/src/atomic.h | 68 + parser/src/clock.h | 146 ++ parser/src/error_costs.h | 11 + parser/src/get_changed_ranges.c | 501 ++++ parser/src/get_changed_ranges.h | 36 + parser/src/host.h | 21 + parser/src/language.c | 143 +- parser/src/language.h | 496 ++-- parser/src/length.h | 52 + parser/src/lexer.c | 716 +++--- parser/src/lexer.h | 49 + parser/src/lib.c | 13 + parser/src/node.c | 1355 +++++----- parser/src/parser.c | 3789 ++++++++++++++-------------- parser/src/parser.h | 265 ++ parser/src/point.h | 62 + parser/src/query.c | 4134 +++++++++++++++++++++++++++++++ parser/src/reduce_action.h | 34 + parser/src/reusable_node.h | 10 +- parser/src/scanner.c | 2573 +++++++++---------- parser/src/stack.c | 78 +- parser/src/stack.h | 20 +- parser/src/subtree.c | 1918 +++++++------- parser/src/subtree.h | 545 ++-- parser/src/tree.c | 83 +- parser/src/tree.h | 38 +- parser/src/tree_cursor.c | 152 +- parser/src/tree_cursor.h | 28 +- parser/src/unicode.h | 50 + sources/main.c | 87 +- sources/node/node.c | 2 +- 43 files changed, 12576 insertions(+), 6662 deletions(-) create mode 100644 parser/src/alloc.c create mode 100644 parser/src/alloc.h create mode 100644 parser/src/api.h create mode 100644 parser/src/atomic.h create mode 100644 parser/src/clock.h create mode 100644 parser/src/error_costs.h create mode 100644 parser/src/get_changed_ranges.c create mode 100644 parser/src/get_changed_ranges.h create mode 100644 parser/src/host.h create mode 100644 parser/src/length.h create mode 100644 parser/src/lexer.h create mode 100644 parser/src/lib.c create mode 100644 parser/src/parser.h create mode 100644 parser/src/point.h create mode 100644 parser/src/query.c create mode 100644 parser/src/reduce_action.h create mode 100644 parser/src/unicode.h diff --git a/includes/app/node.h b/includes/app/node.h index e44c339e..7d884d6a 100644 --- a/includes/app/node.h +++ b/includes/app/node.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/28 18:35:22 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 13:02:06 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 16:41:44 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/includes/minishell.h b/includes/minishell.h index dcbb8284..80e87435 100644 --- a/includes/minishell.h +++ b/includes/minishell.h @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/28 14:41:15 by rparodi #+# #+# */ -/* Updated: 2024/04/30 15:42:51 by rparodi ### ########.fr */ +/* Updated: 2024/04/30 16:41:57 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/parser/Filelist.mk b/parser/Filelist.mk index 2d9180e2..b9e4a64b 100644 --- a/parser/Filelist.mk +++ b/parser/Filelist.mk @@ -4376,13 +4376,4 @@ static/unique_symbols_map/unique_symbols_map_2 \ static/lex_funcs/lex_normal/state_helper \ static/lex_funcs/lex_normal/state_helper2 \ static/lex_funcs/lex_keywords/state_0_bis \ -static/lex_funcs/lex_keywords/state_4_bis \ -src/language \ -src/lexer \ -src/node \ -src/parser \ -src/scanner \ -src/stack \ -src/subtree \ -src/tree \ -src/tree_cursor \ +static/lex_funcs/lex_keywords/state_4_bis \ No newline at end of file diff --git a/parser/Makefile b/parser/Makefile index 2aa6a50d..d0a4931c 100644 --- a/parser/Makefile +++ b/parser/Makefile @@ -6,7 +6,7 @@ # By: maiboyer +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2023/11/03 13:20:01 by maiboyer #+# #+# # -# Updated: 2024/04/30 13:35:56 by maiboyer ### ########.fr # +# Updated: 2024/04/30 17:20:27 by maiboyer ### ########.fr # # # # **************************************************************************** # @@ -22,6 +22,7 @@ CFLAGS = -Wall -Wextra -Werror -MMD -I./includes -I../includes -I../output/inc include ./Filelist.mk +SRC_FILES += ./src/lib ./src/scanner SRC = $(addsuffix .c,$(addprefix $(SRC_DIR)/,$(SRC_FILES))) OBJ = $(addsuffix .o,$(addprefix $(BUILD_DIR)/,$(SRC_FILES))) DEPS = $(addsuffix .d,$(addprefix $(BUILD_DIR)/,$(SRC_FILES))) @@ -77,4 +78,4 @@ re: generate_filelist: @/usr/bin/env zsh -c "tree -iFf --noreport $(SRC_DIR) | rg '^$(SRC_DIR)/(.*)\.c\$$' --replace '\$$1' | sort -u" > ./source_files.list --include $(DEPS) +# -include $(DEPS) diff --git a/parser/create_language.c b/parser/create_language.c index f62dcde0..75aa116d 100644 --- a/parser/create_language.c +++ b/parser/create_language.c @@ -6,11 +6,10 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/25 16:13:52 by maiboyer #+# #+# */ -/* Updated: 2024/04/28 17:15:16 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 16:37:30 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ -#include "./includes/parser.h" #include "./static/headers/constants.h" #include "./static/headers/symbols.h" #include "./parse_types.h" diff --git a/parser/includes/lexer.h b/parser/includes/lexer.h index ee6be79f..79651d7a 100644 --- a/parser/includes/lexer.h +++ b/parser/includes/lexer.h @@ -14,7 +14,7 @@ #define LEXER_H #include "me/types.h" -#include "parser/api.h" +#include "./api.h" #include "parser/parser_length.h" #include "parser/types/types_lexer.h" diff --git a/parser/includes/parser_length.h b/parser/includes/parser_length.h index d47b11de..78b37591 100644 --- a/parser/includes/parser_length.h +++ b/parser/includes/parser_length.h @@ -2,7 +2,7 @@ #define TREE_SITTER_LENGTH_H_ #include "parser/point.h" -#include "parser/api.h" +#include "./api.h" #include #include diff --git a/parser/includes/reduce_action.h b/parser/includes/reduce_action.h index 53295342..228ae3e8 100644 --- a/parser/includes/reduce_action.h +++ b/parser/includes/reduce_action.h @@ -15,7 +15,7 @@ #include "me/types.h" #include "me/vec/vec_reduce_action.h" -#include "parser/api.h" +#include "./api.h" #include "parser/types/types_reduce_action.h" static inline void ts_reduce_action_set_add(t_vec_reduce_action *self, diff --git a/parser/src/alloc.c b/parser/src/alloc.c new file mode 100644 index 00000000..79844287 --- /dev/null +++ b/parser/src/alloc.c @@ -0,0 +1,48 @@ +#include "alloc.h" +#include "./api.h" +#include + +static void *ts_malloc_default(size_t size) { + void *result = malloc(size); + if (size > 0 && !result) { + fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); + abort(); + } + return result; +} + +static void *ts_calloc_default(size_t count, size_t size) { + void *result = calloc(count, size); + if (count > 0 && !result) { + fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); + abort(); + } + return result; +} + +static void *ts_realloc_default(void *buffer, size_t size) { + void *result = realloc(buffer, size); + if (size > 0 && !result) { + fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); + abort(); + } + return result; +} + +// Allow clients to override allocation functions dynamically +TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default; +TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; +TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; +TS_PUBLIC void (*ts_current_free)(void *) = free; + +void ts_set_allocator( + void *(*new_malloc)(size_t size), + void *(*new_calloc)(size_t count, size_t size), + void *(*new_realloc)(void *ptr, size_t size), + void (*new_free)(void *ptr) +) { + ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; + ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; + ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; + ts_current_free = new_free ? new_free : free; +} diff --git a/parser/src/alloc.h b/parser/src/alloc.h new file mode 100644 index 00000000..a0eadb7a --- /dev/null +++ b/parser/src/alloc.h @@ -0,0 +1,41 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32) +#define TS_PUBLIC +#else +#define TS_PUBLIC __attribute__((visibility("default"))) +#endif + +TS_PUBLIC extern void *(*ts_current_malloc)(size_t); +TS_PUBLIC extern void *(*ts_current_calloc)(size_t, size_t); +TS_PUBLIC extern void *(*ts_current_realloc)(void *, size_t); +TS_PUBLIC extern void (*ts_current_free)(void *); + +// Allow clients to override allocation functions +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/parser/src/api.h b/parser/src/api.h new file mode 100644 index 00000000..deb2364e --- /dev/null +++ b/parser/src/api.h @@ -0,0 +1,1273 @@ +#ifndef TREE_SITTER_API_H_ +#define TREE_SITTER_API_H_ + +#ifndef TREE_SITTER_HIDE_SYMBOLS +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC visibility push(default) +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/****************************/ +/* Section - ABI Versioning */ +/****************************/ + +/** + * The latest ABI version that is supported by the current version of the + * library. When Languages are generated by the Tree-sitter CLI, they are + * assigned an ABI version number that corresponds to the current CLI version. + * The Tree-sitter library is generally backwards-compatible with languages + * generated using older CLI versions, but is not forwards-compatible. + */ +#define TREE_SITTER_LANGUAGE_VERSION 14 + +/** + * The earliest ABI version that is supported by the current version of the + * library. + */ +#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13 + +/*******************/ +/* Section - Types */ +/*******************/ + +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +typedef struct TSParser TSParser; +typedef struct TSTree TSTree; +typedef struct TSQuery TSQuery; +typedef struct TSQueryCursor TSQueryCursor; +typedef struct TSLookaheadIterator TSLookaheadIterator; + +typedef enum TSInputEncoding { + TSInputEncodingUTF8, + TSInputEncodingUTF16, +} TSInputEncoding; + +typedef enum TSSymbolType { + TSSymbolTypeRegular, + TSSymbolTypeAnonymous, + TSSymbolTypeAuxiliary, +} TSSymbolType; + +typedef struct TSPoint { + uint32_t row; + uint32_t column; +} TSPoint; + +typedef struct TSRange { + TSPoint start_point; + TSPoint end_point; + uint32_t start_byte; + uint32_t end_byte; +} TSRange; + +typedef struct TSInput { + void *payload; + const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read); + TSInputEncoding encoding; +} TSInput; + +typedef enum TSLogType { + TSLogTypeParse, + TSLogTypeLex, +} TSLogType; + +typedef struct TSLogger { + void *payload; + void (*log)(void *payload, TSLogType log_type, const char *buffer); +} TSLogger; + +typedef struct TSInputEdit { + uint32_t start_byte; + uint32_t old_end_byte; + uint32_t new_end_byte; + TSPoint start_point; + TSPoint old_end_point; + TSPoint new_end_point; +} TSInputEdit; + +typedef struct TSNode { + uint32_t context[4]; + const void *id; + const TSTree *tree; +} TSNode; + +typedef struct TSTreeCursor { + const void *tree; + const void *id; + uint32_t context[3]; +} TSTreeCursor; + +typedef struct TSQueryCapture { + TSNode node; + uint32_t index; +} TSQueryCapture; + +typedef enum TSQuantifier { + TSQuantifierZero = 0, // must match the array initialization value + TSQuantifierZeroOrOne, + TSQuantifierZeroOrMore, + TSQuantifierOne, + TSQuantifierOneOrMore, +} TSQuantifier; + +typedef struct TSQueryMatch { + uint32_t id; + uint16_t pattern_index; + uint16_t capture_count; + const TSQueryCapture *captures; +} TSQueryMatch; + +typedef enum TSQueryPredicateStepType { + TSQueryPredicateStepTypeDone, + TSQueryPredicateStepTypeCapture, + TSQueryPredicateStepTypeString, +} TSQueryPredicateStepType; + +typedef struct TSQueryPredicateStep { + TSQueryPredicateStepType type; + uint32_t value_id; +} TSQueryPredicateStep; + +typedef enum TSQueryError { + TSQueryErrorNone = 0, + TSQueryErrorSyntax, + TSQueryErrorNodeType, + TSQueryErrorField, + TSQueryErrorCapture, + TSQueryErrorStructure, + TSQueryErrorLanguage, +} TSQueryError; + +/********************/ +/* Section - Parser */ +/********************/ + +/** + * Create a new parser. + */ +TSParser *ts_parser_new(void); + +/** + * Delete the parser, freeing all of the memory that it used. + */ +void ts_parser_delete(TSParser *self); + +/** + * Get the parser's current language. + */ +const TSLanguage *ts_parser_language(const TSParser *self); + +/** + * Set the language that the parser should use for parsing. + * + * Returns a boolean indicating whether or not the language was successfully + * assigned. True means assignment succeeded. False means there was a version + * mismatch: the language was generated with an incompatible version of the + * Tree-sitter CLI. Check the language's version using [`ts_language_version`] + * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and + * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. + */ +bool ts_parser_set_language(TSParser *self, const TSLanguage *language); + +/** + * Set the ranges of text that the parser should include when parsing. + * + * By default, the parser will always include entire documents. This function + * allows you to parse only a *portion* of a document but still return a syntax + * tree whose ranges match up with the document as a whole. You can also pass + * multiple disjoint ranges. + * + * The second and third parameters specify the location and length of an array + * of ranges. The parser does *not* take ownership of these ranges; it copies + * the data, so it doesn't matter how these ranges are allocated. + * + * If `count` is zero, then the entire document will be parsed. Otherwise, + * the given ranges must be ordered from earliest to latest in the document, + * and they must not overlap. That is, the following must hold for all: + * + * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte` + * + * If this requirement is not satisfied, the operation will fail, the ranges + * will not be assigned, and this function will return `false`. On success, + * this function returns `true` + */ +bool ts_parser_set_included_ranges( + TSParser *self, + const TSRange *ranges, + uint32_t count +); + +/** + * Get the ranges of text that the parser will include when parsing. + * + * The returned pointer is owned by the parser. The caller should not free it + * or write to it. The length of the array will be written to the given + * `count` pointer. + */ +const TSRange *ts_parser_included_ranges( + const TSParser *self, + uint32_t *count +); + +/** + * Use the parser to parse some source code and create a syntax tree. + * + * If you are parsing this document for the first time, pass `NULL` for the + * `old_tree` parameter. Otherwise, if you have already parsed an earlier + * version of this document and the document has since been edited, pass the + * previous syntax tree so that the unchanged parts of it can be reused. + * This will save time and memory. For this to work correctly, you must have + * already edited the old syntax tree using the [`ts_tree_edit`] function in a + * way that exactly matches the source code changes. + * + * The [`TSInput`] parameter lets you specify how to read the text. It has the + * following three fields: + * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset + * and (row, column) position. The function should return a pointer to the + * text and write its length to the [`bytes_read`] pointer. The parser does + * not take ownership of this buffer; it just borrows it until it has + * finished reading it. The function should write a zero value to the + * [`bytes_read`] pointer to indicate the end of the document. + * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation + * of the [`read`] function. + * 3. [`encoding`]: An indication of how the text is encoded. Either + * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. + * + * This function returns a syntax tree on success, and `NULL` on failure. There + * are three possible reasons for failure: + * 1. The parser does not have a language assigned. Check for this using the + [`ts_parser_language`] function. + * 2. Parsing was cancelled due to a timeout that was set by an earlier call to + * the [`ts_parser_set_timeout_micros`] function. You can resume parsing from + * where the parser left out by calling [`ts_parser_parse`] again with the + * same arguments. Or you can start parsing from scratch by first calling + * [`ts_parser_reset`]. + * 3. Parsing was cancelled using a cancellation flag that was set by an + * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing + * from where the parser left out by calling [`ts_parser_parse`] again with + * the same arguments. + * + * [`read`]: TSInput::read + * [`payload`]: TSInput::payload + * [`encoding`]: TSInput::encoding + * [`bytes_read`]: TSInput::read + */ +TSTree *ts_parser_parse( + TSParser *self, + const TSTree *old_tree, + TSInput input +); + +/** + * Use the parser to parse some source code stored in one contiguous buffer. + * The first two parameters are the same as in the [`ts_parser_parse`] function + * above. The second two parameters indicate the location of the buffer and its + * length in bytes. + */ +TSTree *ts_parser_parse_string( + TSParser *self, + const TSTree *old_tree, + const char *string, + uint32_t length +); + +/** + * Use the parser to parse some source code stored in one contiguous buffer with + * a given encoding. The first four parameters work the same as in the + * [`ts_parser_parse_string`] method above. The final parameter indicates whether + * the text is encoded as UTF8 or UTF16. + */ +TSTree *ts_parser_parse_string_encoding( + TSParser *self, + const TSTree *old_tree, + const char *string, + uint32_t length, + TSInputEncoding encoding +); + +/** + * Instruct the parser to start the next parse from the beginning. + * + * If the parser previously failed because of a timeout or a cancellation, then + * by default, it will resume where it left off on the next call to + * [`ts_parser_parse`] or other parsing functions. If you don't want to resume, + * and instead intend to use this parser to parse some other document, you must + * call [`ts_parser_reset`] first. + */ +void ts_parser_reset(TSParser *self); + +/** + * Set the maximum duration in microseconds that parsing should be allowed to + * take before halting. + * + * If parsing takes longer than this, it will halt early, returning NULL. + * See [`ts_parser_parse`] for more information. + */ +void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros); + +/** + * Get the duration in microseconds that parsing is allowed to take. + */ +uint64_t ts_parser_timeout_micros(const TSParser *self); + +/** + * Set the parser's current cancellation flag pointer. + * + * If a non-null pointer is assigned, then the parser will periodically read + * from this pointer during parsing. If it reads a non-zero value, it will + * halt early, returning NULL. See [`ts_parser_parse`] for more information. + */ +void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag); + +/** + * Get the parser's current cancellation flag pointer. + */ +const size_t *ts_parser_cancellation_flag(const TSParser *self); + +/** + * Set the logger that a parser should use during parsing. + * + * The parser does not take ownership over the logger payload. If a logger was + * previously assigned, the caller is responsible for releasing any memory + * owned by the previous logger. + */ +void ts_parser_set_logger(TSParser *self, TSLogger logger); + +/** + * Get the parser's current logger. + */ +TSLogger ts_parser_logger(const TSParser *self); + +/** + * Set the file descriptor to which the parser should write debugging graphs + * during parsing. The graphs are formatted in the DOT language. You may want + * to pipe these graphs directly to a `dot(1)` process in order to generate + * SVG output. You can turn off this logging by passing a negative number. + */ +void ts_parser_print_dot_graphs(TSParser *self, int fd); + +/******************/ +/* Section - Tree */ +/******************/ + +/** + * Create a shallow copy of the syntax tree. This is very fast. + * + * You need to copy a syntax tree in order to use it on more than one thread at + * a time, as syntax trees are not thread safe. + */ +TSTree *ts_tree_copy(const TSTree *self); + +/** + * Delete the syntax tree, freeing all of the memory that it used. + */ +void ts_tree_delete(TSTree *self); + +/** + * Get the root node of the syntax tree. + */ +TSNode ts_tree_root_node(const TSTree *self); + +/** + * Get the root node of the syntax tree, but with its position + * shifted forward by the given offset. + */ +TSNode ts_tree_root_node_with_offset( + const TSTree *self, + uint32_t offset_bytes, + TSPoint offset_extent +); + +/** + * Get the language that was used to parse the syntax tree. + */ +const TSLanguage *ts_tree_language(const TSTree *self); + +/** + * Get the array of included ranges that was used to parse the syntax tree. + * + * The returned pointer must be freed by the caller. + */ +TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length); + +/** + * Edit the syntax tree to keep it in sync with source code that has been + * edited. + * + * You must describe the edit both in terms of byte offsets and in terms of + * (row, column) coordinates. + */ +void ts_tree_edit(TSTree *self, const TSInputEdit *edit); + +/** + * Compare an old edited syntax tree to a new syntax tree representing the same + * document, returning an array of ranges whose syntactic structure has changed. + * + * For this to work correctly, the old syntax tree must have been edited such + * that its ranges match up to the new tree. Generally, you'll want to call + * this function right after calling one of the [`ts_parser_parse`] functions. + * You need to pass the old tree that was passed to parse, as well as the new + * tree that was returned from that function. + * + * The returned array is allocated using `malloc` and the caller is responsible + * for freeing it using `free`. The length of the array will be written to the + * given `length` pointer. + */ +TSRange *ts_tree_get_changed_ranges( + const TSTree *old_tree, + const TSTree *new_tree, + uint32_t *length +); + +/** + * Write a DOT graph describing the syntax tree to the given file. + */ +void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor); + +/******************/ +/* Section - Node */ +/******************/ + +/** + * Get the node's type as a null-terminated string. + */ +const char *ts_node_type(TSNode self); + +/** + * Get the node's type as a numerical id. + */ +TSSymbol ts_node_symbol(TSNode self); + +/** + * Get the node's language. + */ +const TSLanguage *ts_node_language(TSNode self); + +/** + * Get the node's type as it appears in the grammar ignoring aliases as a + * null-terminated string. + */ +const char *ts_node_grammar_type(TSNode self); + +/** + * Get the node's type as a numerical id as it appears in the grammar ignoring + * aliases. This should be used in [`ts_language_next_state`] instead of + * [`ts_node_symbol`]. + */ +TSSymbol ts_node_grammar_symbol(TSNode self); + +/** + * Get the node's start byte. + */ +uint32_t ts_node_start_byte(TSNode self); + +/** + * Get the node's start position in terms of rows and columns. + */ +TSPoint ts_node_start_point(TSNode self); + +/** + * Get the node's end byte. + */ +uint32_t ts_node_end_byte(TSNode self); + +/** + * Get the node's end position in terms of rows and columns. + */ +TSPoint ts_node_end_point(TSNode self); + +/** + * Get an S-expression representing the node as a string. + * + * This string is allocated with `malloc` and the caller is responsible for + * freeing it using `free`. + */ +char *ts_node_string(TSNode self); + +/** + * Check if the node is null. Functions like [`ts_node_child`] and + * [`ts_node_next_sibling`] will return a null node to indicate that no such node + * was found. + */ +bool ts_node_is_null(TSNode self); + +/** + * Check if the node is *named*. Named nodes correspond to named rules in the + * grammar, whereas *anonymous* nodes correspond to string literals in the + * grammar. + */ +bool ts_node_is_named(TSNode self); + +/** + * Check if the node is *missing*. Missing nodes are inserted by the parser in + * order to recover from certain kinds of syntax errors. + */ +bool ts_node_is_missing(TSNode self); + +/** + * Check if the node is *extra*. Extra nodes represent things like comments, + * which are not required the grammar, but can appear anywhere. + */ +bool ts_node_is_extra(TSNode self); + +/** + * Check if a syntax node has been edited. + */ +bool ts_node_has_changes(TSNode self); + +/** + * Check if the node is a syntax error or contains any syntax errors. + */ +bool ts_node_has_error(TSNode self); + +/** + * Check if the node is a syntax error. +*/ +bool ts_node_is_error(TSNode self); + +/** + * Get this node's parse state. +*/ +TSStateId ts_node_parse_state(TSNode self); + +/** + * Get the parse state after this node. +*/ +TSStateId ts_node_next_parse_state(TSNode self); + +/** + * Get the node's immediate parent. + * Prefer [`ts_node_child_containing_descendant`] for + * iterating over the node's ancestors. + */ +TSNode ts_node_parent(TSNode self); + +/** + * Get the node's child that contains `descendant`. + */ +TSNode ts_node_child_containing_descendant(TSNode self, TSNode descendant); + +/** + * Get the node's child at the given index, where zero represents the first + * child. + */ +TSNode ts_node_child(TSNode self, uint32_t child_index); + +/** + * Get the field name for node's child at the given index, where zero represents + * the first child. Returns NULL, if no field is found. + */ +const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index); + +/** + * Get the node's number of children. + */ +uint32_t ts_node_child_count(TSNode self); + +/** + * Get the node's *named* child at the given index. + * + * See also [`ts_node_is_named`]. + */ +TSNode ts_node_named_child(TSNode self, uint32_t child_index); + +/** + * Get the node's number of *named* children. + * + * See also [`ts_node_is_named`]. + */ +uint32_t ts_node_named_child_count(TSNode self); + +/** + * Get the node's child with the given field name. + */ +TSNode ts_node_child_by_field_name( + TSNode self, + const char *name, + uint32_t name_length +); + +/** + * Get the node's child with the given numerical field id. + * + * You can convert a field name to an id using the + * [`ts_language_field_id_for_name`] function. + */ +TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id); + +/** + * Get the node's next / previous sibling. + */ +TSNode ts_node_next_sibling(TSNode self); +TSNode ts_node_prev_sibling(TSNode self); + +/** + * Get the node's next / previous *named* sibling. + */ +TSNode ts_node_next_named_sibling(TSNode self); +TSNode ts_node_prev_named_sibling(TSNode self); + +/** + * Get the node's first child that extends beyond the given byte offset. + */ +TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte); + +/** + * Get the node's first named child that extends beyond the given byte offset. + */ +TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte); + +/** + * Get the node's number of descendants, including one for the node itself. + */ +uint32_t ts_node_descendant_count(TSNode self); + +/** + * Get the smallest node within this node that spans the given range of bytes + * or (row, column) positions. + */ +TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); +TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); + +/** + * Get the smallest named node within this node that spans the given range of + * bytes or (row, column) positions. + */ +TSNode ts_node_named_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); +TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); + +/** + * Edit the node to keep it in-sync with source code that has been edited. + * + * This function is only rarely needed. When you edit a syntax tree with the + * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree + * afterward will already reflect the edit. You only need to use [`ts_node_edit`] + * when you have a [`TSNode`] instance that you want to keep and continue to use + * after an edit. + */ +void ts_node_edit(TSNode *self, const TSInputEdit *edit); + +/** + * Check if two nodes are identical. + */ +bool ts_node_eq(TSNode self, TSNode other); + +/************************/ +/* Section - TreeCursor */ +/************************/ + +/** + * Create a new tree cursor starting from the given node. + * + * A tree cursor allows you to walk a syntax tree more efficiently than is + * possible using the [`TSNode`] functions. It is a mutable object that is always + * on a certain syntax node, and can be moved imperatively to different nodes. + */ +TSTreeCursor ts_tree_cursor_new(TSNode node); + +/** + * Delete a tree cursor, freeing all of the memory that it used. + */ +void ts_tree_cursor_delete(TSTreeCursor *self); + +/** + * Re-initialize a tree cursor to start at a different node. + */ +void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node); + +/** + * Re-initialize a tree cursor to the same position as another cursor. + * + * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and + * allows reusing already created cursors. +*/ +void ts_tree_cursor_reset_to(TSTreeCursor *dst, const TSTreeCursor *src); + +/** + * Get the tree cursor's current node. + */ +TSNode ts_tree_cursor_current_node(const TSTreeCursor *self); + +/** + * Get the field name of the tree cursor's current node. + * + * This returns `NULL` if the current node doesn't have a field. + * See also [`ts_node_child_by_field_name`]. + */ +const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self); + +/** + * Get the field id of the tree cursor's current node. + * + * This returns zero if the current node doesn't have a field. + * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]. + */ +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self); + +/** + * Move the cursor to the parent of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` + * if there was no parent node (the cursor was already on the root node). + */ +bool ts_tree_cursor_goto_parent(TSTreeCursor *self); + +/** + * Move the cursor to the next sibling of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` + * if there was no next sibling node. + */ +bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self); + +/** + * Move the cursor to the previous sibling of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` if + * there was no previous sibling node. + * + * Note, that this function may be slower than + * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In + * the worst case, this will need to iterate through all the children upto the + * previous sibling node to recalculate its position. + */ +bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self); + +/** + * Move the cursor to the first child of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` + * if there were no children. + */ +bool ts_tree_cursor_goto_first_child(TSTreeCursor *self); + +/** + * Move the cursor to the last child of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` if + * there were no children. + * + * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`] + * because it needs to iterate through all the children to compute the child's + * position. + */ +bool ts_tree_cursor_goto_last_child(TSTreeCursor *self); + +/** + * Move the cursor to the node that is the nth descendant of + * the original node that the cursor was constructed with, where + * zero represents the original node itself. + */ +void ts_tree_cursor_goto_descendant(TSTreeCursor *self, uint32_t goal_descendant_index); + +/** + * Get the index of the cursor's current node out of all of the + * descendants of the original node that the cursor was constructed with. + */ +uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *self); + +/** + * Get the depth of the cursor's current node relative to the original + * node that the cursor was constructed with. + */ +uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *self); + +/** + * Move the cursor to the first child of its current node that extends beyond + * the given byte offset or point. + * + * This returns the index of the child node if one was found, and returns -1 + * if no such child was found. + */ +int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte); +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point); + +TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor); + +/*******************/ +/* Section - Query */ +/*******************/ + +/** + * Create a new query from a string containing one or more S-expression + * patterns. The query is associated with a particular language, and can + * only be run on syntax nodes parsed with that language. + * + * If all of the given patterns are valid, this returns a [`TSQuery`]. + * If a pattern is invalid, this returns `NULL`, and provides two pieces + * of information about the problem: + * 1. The byte offset of the error is written to the `error_offset` parameter. + * 2. The type of error is written to the `error_type` parameter. + */ +TSQuery *ts_query_new( + const TSLanguage *language, + const char *source, + uint32_t source_len, + uint32_t *error_offset, + TSQueryError *error_type +); + +/** + * Delete a query, freeing all of the memory that it used. + */ +void ts_query_delete(TSQuery *self); + +/** + * Get the number of patterns, captures, or string literals in the query. + */ +uint32_t ts_query_pattern_count(const TSQuery *self); +uint32_t ts_query_capture_count(const TSQuery *self); +uint32_t ts_query_string_count(const TSQuery *self); + +/** + * Get the byte offset where the given pattern starts in the query's source. + * + * This can be useful when combining queries by concatenating their source + * code strings. + */ +uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index); + +/** + * Get all of the predicates for the given pattern in the query. + * + * The predicates are represented as a single array of steps. There are three + * types of steps in this array, which correspond to the three legal values for + * the `type` field: + * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names + * of captures. Their `value_id` can be used with the + * [`ts_query_capture_name_for_id`] function to obtain the name of the capture. + * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal + * strings. Their `value_id` can be used with the + * [`ts_query_string_value_for_id`] function to obtain their string value. + * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels* + * that represent the end of an individual predicate. If a pattern has two + * predicates, then there will be two steps with this `type` in the array. + */ +const TSQueryPredicateStep *ts_query_predicates_for_pattern( + const TSQuery *self, + uint32_t pattern_index, + uint32_t *step_count +); + +/* + * Check if the given pattern in the query has a single root node. + */ +bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index); + +/* + * Check if the given pattern in the query is 'non local'. + * + * A non-local pattern has multiple root nodes and can match within a + * repeating sequence of nodes, as specified by the grammar. Non-local + * patterns disable certain optimizations that would otherwise be possible + * when executing a query on a specific range of a syntax tree. + */ +bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index); + +/* + * Check if a given pattern is guaranteed to match once a given step is reached. + * The step is specified by its byte offset in the query's source code. + */ +bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset); + +/** + * Get the name and length of one of the query's captures, or one of the + * query's string literals. Each capture and string is associated with a + * numeric id based on the order that it appeared in the query's source. + */ +const char *ts_query_capture_name_for_id( + const TSQuery *self, + uint32_t index, + uint32_t *length +); + +/** + * Get the quantifier of the query's captures. Each capture is * associated + * with a numeric id based on the order that it appeared in the query's source. + */ +TSQuantifier ts_query_capture_quantifier_for_id( + const TSQuery *self, + uint32_t pattern_index, + uint32_t capture_index +); + +const char *ts_query_string_value_for_id( + const TSQuery *self, + uint32_t index, + uint32_t *length +); + +/** + * Disable a certain capture within a query. + * + * This prevents the capture from being returned in matches, and also avoids + * any resource usage associated with recording the capture. Currently, there + * is no way to undo this. + */ +void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length); + +/** + * Disable a certain pattern within a query. + * + * This prevents the pattern from matching and removes most of the overhead + * associated with the pattern. Currently, there is no way to undo this. + */ +void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index); + +/** + * Create a new cursor for executing a given query. + * + * The cursor stores the state that is needed to iteratively search + * for matches. To use the query cursor, first call [`ts_query_cursor_exec`] + * to start running a given query on a given syntax node. Then, there are + * two options for consuming the results of the query: + * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the + * *matches* in the order that they were found. Each match contains the + * index of the pattern that matched, and an array of captures. Because + * multiple patterns can match the same set of nodes, one match may contain + * captures that appear *before* some of the captures from a previous match. + * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the + * individual *captures* in the order that they appear. This is useful if + * don't care about which pattern matched, and just want a single ordered + * sequence of captures. + * + * If you don't care about consuming all of the results, you can stop calling + * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point. + * You can then start executing another query on another node by calling + * [`ts_query_cursor_exec`] again. + */ +TSQueryCursor *ts_query_cursor_new(void); + +/** + * Delete a query cursor, freeing all of the memory that it used. + */ +void ts_query_cursor_delete(TSQueryCursor *self); + +/** + * Start running a given query on a given node. + */ +void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node); + +/** + * Manage the maximum number of in-progress matches allowed by this query + * cursor. + * + * Query cursors have an optional maximum capacity for storing lists of + * in-progress captures. If this capacity is exceeded, then the + * earliest-starting match will silently be dropped to make room for further + * matches. This maximum capacity is optional — by default, query cursors allow + * any number of pending matches, dynamically allocating new space for them as + * needed as the query is executed. + */ +bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self); +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self); +void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit); + +/** + * Set the range of bytes or (row, column) positions in which the query + * will be executed. + */ +void ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte); +void ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, TSPoint end_point); + +/** + * Advance to the next match of the currently running query. + * + * If there is a match, write it to `*match` and return `true`. + * Otherwise, return `false`. + */ +bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match); +void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id); + +/** + * Advance to the next capture of the currently running query. + * + * If there is a capture, write its match to `*match` and its index within + * the matche's capture list to `*capture_index`. Otherwise, return `false`. + */ +bool ts_query_cursor_next_capture( + TSQueryCursor *self, + TSQueryMatch *match, + uint32_t *capture_index +); + +/** + * Set the maximum start depth for a query cursor. + * + * This prevents cursors from exploring children nodes at a certain depth. + * Note if a pattern includes many children, then they will still be checked. + * + * The zero max start depth value can be used as a special behavior and + * it helps to destructure a subtree by staying on a node and using captures + * for interested parts. Note that the zero max start depth only limit a search + * depth for a pattern's root node but other nodes that are parts of the pattern + * may be searched at any depth what defined by the pattern structure. + * + * Set to `UINT32_MAX` to remove the maximum start depth. + */ +void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, uint32_t max_start_depth); + +/**********************/ +/* Section - Language */ +/**********************/ + +/** + * Get another reference to the given language. + */ +const TSLanguage *ts_language_copy(const TSLanguage *self); + +/** + * Free any dynamically-allocated resources for this language, if + * this is the last reference. + */ +void ts_language_delete(const TSLanguage *self); + +/** + * Get the number of distinct node types in the language. + */ +uint32_t ts_language_symbol_count(const TSLanguage *self); + +/** + * Get the number of valid states in this language. +*/ +uint32_t ts_language_state_count(const TSLanguage *self); + +/** + * Get a node type string for the given numerical id. + */ +const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol); + +/** + * Get the numerical id for the given node type string. + */ +TSSymbol ts_language_symbol_for_name( + const TSLanguage *self, + const char *string, + uint32_t length, + bool is_named +); + +/** + * Get the number of distinct field names in the language. + */ +uint32_t ts_language_field_count(const TSLanguage *self); + +/** + * Get the field name string for the given numerical id. + */ +const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id); + +/** + * Get the numerical id for the given field name string. + */ +TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name, uint32_t name_length); + +/** + * Check whether the given node type id belongs to named nodes, anonymous nodes, + * or a hidden nodes. + * + * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API. + */ +TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); + +/** + * Get the ABI version number for this language. This version number is used + * to ensure that languages were generated by a compatible version of + * Tree-sitter. + * + * See also [`ts_parser_set_language`]. + */ +uint32_t ts_language_version(const TSLanguage *self); + +/** + * Get the next parse state. Combine this with lookahead iterators to generate + * completion suggestions or valid symbols in error nodes. Use + * [`ts_node_grammar_symbol`] for valid symbols. +*/ +TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); + +/********************************/ +/* Section - Lookahead Iterator */ +/********************************/ + +/** + * Create a new lookahead iterator for the given language and parse state. + * + * This returns `NULL` if state is invalid for the language. + * + * Repeatedly using [`ts_lookahead_iterator_next`] and + * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the + * given parse state. Newly created lookahead iterators will contain the `ERROR` + * symbol. + * + * Lookahead iterators can be useful to generate suggestions and improve syntax + * error diagnostics. To get symbols valid in an ERROR node, use the lookahead + * iterator on its first leaf node state. For `MISSING` nodes, a lookahead + * iterator created on the previous non-extra leaf node may be appropriate. +*/ +TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state); + +/** + * Delete a lookahead iterator freeing all the memory used. +*/ +void ts_lookahead_iterator_delete(TSLookaheadIterator *self); + +/** + * Reset the lookahead iterator to another state. + * + * This returns `true` if the iterator was reset to the given state and `false` + * otherwise. +*/ +bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, TSStateId state); + +/** + * Reset the lookahead iterator. + * + * This returns `true` if the language was set successfully and `false` + * otherwise. +*/ +bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state); + +/** + * Get the current language of the lookahead iterator. +*/ +const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self); + +/** + * Advance the lookahead iterator to the next symbol. + * + * This returns `true` if there is a new symbol and `false` otherwise. +*/ +bool ts_lookahead_iterator_next(TSLookaheadIterator *self); + +/** + * Get the current symbol of the lookahead iterator; +*/ +TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self); + +/** + * Get the current symbol type of the lookahead iterator as a null terminated + * string. +*/ +const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self); + +/*************************************/ +/* Section - WebAssembly Integration */ +/************************************/ + +typedef struct wasm_engine_t TSWasmEngine; +typedef struct TSWasmStore TSWasmStore; + +typedef enum { + TSWasmErrorKindNone = 0, + TSWasmErrorKindParse, + TSWasmErrorKindCompile, + TSWasmErrorKindInstantiate, + TSWasmErrorKindAllocate, +} TSWasmErrorKind; + +typedef struct { + TSWasmErrorKind kind; + char *message; +} TSWasmError; + +/** + * Create a Wasm store. + */ +TSWasmStore *ts_wasm_store_new( + TSWasmEngine *engine, + TSWasmError *error +); + +/** + * Free the memory associated with the given Wasm store. + */ +void ts_wasm_store_delete(TSWasmStore *); + +/** + * Create a language from a buffer of Wasm. The resulting language behaves + * like any other Tree-sitter language, except that in order to use it with + * a parser, that parser must have a Wasm store. Note that the language + * can be used with any Wasm store, it doesn't need to be the same store that + * was used to originally load it. + */ +const TSLanguage *ts_wasm_store_load_language( + TSWasmStore *, + const char *name, + const char *wasm, + uint32_t wasm_len, + TSWasmError *error +); + +/** + * Get the number of languages instantiated in the given wasm store. + */ +size_t ts_wasm_store_language_count(const TSWasmStore *); + +/** + * Check if the language came from a Wasm module. If so, then in order to use + * this language with a Parser, that parser must have a Wasm store assigned. + */ +bool ts_language_is_wasm(const TSLanguage *); + +/** + * Assign the given Wasm store to the parser. A parser must have a Wasm store + * in order to use Wasm languages. + */ +void ts_parser_set_wasm_store(TSParser *, TSWasmStore *); + +/** + * Remove the parser's current Wasm store and return it. This returns NULL if + * the parser doesn't have a Wasm store. + */ +TSWasmStore *ts_parser_take_wasm_store(TSParser *); + +/**********************************/ +/* Section - Global Configuration */ +/**********************************/ + +/** + * Set the allocation functions used by the library. + * + * By default, Tree-sitter uses the standard libc allocation functions, + * but aborts the process when an allocation fails. This function lets + * you supply alternative allocation functions at runtime. + * + * If you pass `NULL` for any parameter, Tree-sitter will switch back to + * its default implementation of that function. + * + * If you call this function after the library has already been used, then + * you must ensure that either: + * 1. All the existing objects have been freed. + * 2. The new allocator shares its state with the old one, so it is capable + * of freeing memory that was allocated by the old allocator. + */ +void ts_set_allocator( + void *(*new_malloc)(size_t), + void *(*new_calloc)(size_t, size_t), + void *(*new_realloc)(void *, size_t), + void (*new_free)(void *) +); + +#ifdef __cplusplus +} +#endif + +#ifndef TREE_SITTER_HIDE_SYMBOLS +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC visibility pop +#endif +#endif + +#endif // TREE_SITTER_API_H_ diff --git a/parser/src/array.h b/parser/src/array.h index e952261e..15a3b233 100644 --- a/parser/src/array.h +++ b/parser/src/array.h @@ -1,7 +1,11 @@ #ifndef TREE_SITTER_ARRAY_H_ #define TREE_SITTER_ARRAY_H_ -#include "me/types.h" +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" #include #include @@ -9,27 +13,31 @@ #include #include -#define Array(T) \ - struct \ - { \ - T *contents; \ - t_u32 size; \ - t_u32 capacity; \ - } +#ifdef _MSC_VER +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T) \ + struct { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } /// Initialize an array. -#define array_init(self) \ - ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) /// Create an empty array. -#define array_new() \ - { \ - NULL, 0, 0 \ - } +#define array_new() \ + { NULL, 0, 0 } /// Get a pointer to the element at a given `index` in the array. -#define array_get(self, _index) \ - (assert((t_u32)(_index) < (self)->size), &(self)->contents[_index]) +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) /// Get a pointer to the first element in the array. #define array_front(self) array_get(self, 0) @@ -43,67 +51,67 @@ /// Reserve `new_capacity` elements of space in the array. If `new_capacity` is /// less than the array's current capacity, this function has no effect. -#define array_reserve(self, new_capacity) \ - _array__reserve((Array *)(self), array_elem_size(self), new_capacity) +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) /// Free any memory allocated for this array. Note that this does not free any /// memory allocated for the array's contents. #define array_delete(self) _array__delete((Array *)(self)) /// Push a new `element` onto the end of the array. -#define array_push(self, element) \ - (_array__grow((Array *)(self), 1, array_elem_size(self)), \ - (self)->contents[(self)->size++] = (element)) +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) /// Increase the array's size by `count` elements. /// New elements are zero-initialized. -#define array_grow_by(self, count) \ - do \ - { \ - if ((count) == 0) \ - break; \ - _array__grow((Array *)(self), count, array_elem_size(self)); \ - memset((self)->contents + (self)->size, 0, \ - (count) * array_elem_size(self)); \ - (self)->size += (count); \ - } while (0) +#define array_grow_by(self, count) \ + do { \ + if ((count) == 0) break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) /// Append all elements from one array to the end of another. -#define array_push_all(self, other) \ - array_extend((self), (other)->size, (other)->contents) +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) -/// Append `count` elements to the end of the array, reading their values from -/// the `contents` pointer. -#define array_extend(self, count, contents) \ - _array__splice((Array *)(self), array_elem_size(self), (self)->size, 0, \ - count, contents) +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ + 0, count, contents \ + ) /// Remove `old_count` elements from the array starting at the given `index`. At -/// the same index, insert `new_count` new elements, reading their values from -/// the `new_contents` pointer. -#define array_splice(self, _index, old_count, new_count, new_contents) \ - _array__splice((Array *)(self), array_elem_size(self), _index, old_count, \ - new_count, new_contents) +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ + old_count, new_count, new_contents \ + ) /// Insert one `element` into the array at the given `index`. -#define array_insert(self, _index, element) \ - _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, \ - &(element)) +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) /// Remove one element from the array at the given `index`. -#define array_erase(self, _index) \ - _array__erase((Array *)(self), array_elem_size(self), _index) +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) /// Pop the last element off the array, returning the element by value. #define array_pop(self) ((self)->contents[--(self)->size]) /// Assign the contents of one array to another, reallocating if necessary. -#define array_assign(self, other) \ - _array__assign((Array *)(self), (const Array *)(other), \ - array_elem_size(self)) +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) /// Swap one array with another -#define array_swap(self, other) _array__swap((Array *)(self), (Array *)(other)) +#define array_swap(self, other) \ + _array__swap((Array *)(self), (Array *)(other)) /// Get the size of the array contents #define array_elem_size(self) (sizeof *(self)->contents) @@ -116,187 +124,167 @@ /// out-parameter is set to true. Otherwise, `index` is set to an index where /// `needle` should be inserted in order to preserve the sorting, and `exists` /// is set to false. -#define array_search_sorted_with(self, compare, needle, _index, _exists) \ - _array__search_sorted(self, 0, compare, , needle, _index, _exists) +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) /// Search a sorted array for a given `needle` value, using integer comparisons -/// of a given struct field (specified with a leading dot) to determine the -/// order. +/// of a given struct field (specified with a leading dot) to determine the order. /// /// See also `array_search_sorted_with`. -#define array_search_sorted_by(self, field, needle, _index, _exists) \ - _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) /// Insert a given `value` into a sorted array, using the given `compare` /// callback to determine the order. -#define array_insert_sorted_with(self, compare, value) \ - do \ - { \ - unsigned _index, _exists; \ - array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ - if (!_exists) \ - array_insert(self, _index, value); \ - } while (0) +#define array_insert_sorted_with(self, compare, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) /// Insert a given `value` into a sorted array, using integer comparisons of /// a given struct field (specified with a leading dot) to determine the order. /// /// See also `array_search_sorted_by`. -#define array_insert_sorted_by(self, field, value) \ - do \ - { \ - unsigned _index, _exists; \ - array_search_sorted_by(self, field, (value)field, &_index, &_exists); \ - if (!_exists) \ - array_insert(self, _index, value); \ - } while (0) +#define array_insert_sorted_by(self, field, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) // Private typedef Array(void) Array; /// This is not what you're looking for, see `array_delete`. -static inline void _array__delete(Array *self) -{ - if (self->contents) - { - free(self->contents); - self->contents = NULL; - self->size = 0; - self->capacity = 0; - } +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } } /// This is not what you're looking for, see `array_erase`. -static inline void _array__erase(Array *self, size_t element_size, t_u32 index) -{ - assert(index < self->size); - char *contents = (char *)self->contents; - memmove(contents + index * element_size, - contents + (index + 1) * element_size, - (self->size - index - 1) * element_size); - self->size--; +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; } /// This is not what you're looking for, see `array_reserve`. -static inline void _array__reserve(Array *self, size_t element_size, - t_u32 new_capacity) -{ - if (new_capacity > self->capacity) - { - if (self->contents) - { - self->contents = - realloc(self->contents, new_capacity * element_size); - } - else - { - self->contents = malloc(new_capacity * element_size); - } - self->capacity = new_capacity; - } +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { + if (new_capacity > self->capacity) { + if (self->contents) { + self->contents = ts_realloc(self->contents, new_capacity * element_size); + } else { + self->contents = ts_malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } } /// This is not what you're looking for, see `array_assign`. -static inline void _array__assign(Array *self, const Array *other, - size_t element_size) -{ - _array__reserve(self, element_size, other->size); - self->size = other->size; - memcpy(self->contents, other->contents, self->size * element_size); +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); } /// This is not what you're looking for, see `array_swap`. -static inline void _array__swap(Array *self, Array *other) -{ - Array swap = *other; - *other = *self; - *self = swap; +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; + *other = *self; + *self = swap; } /// This is not what you're looking for, see `array_push` or `array_grow_by`. -static inline void _array__grow(Array *self, t_u32 count, size_t element_size) -{ - t_u32 new_size = self->size + count; - if (new_size > self->capacity) - { - t_u32 new_capacity = self->capacity * 2; - if (new_capacity < 8) - new_capacity = 8; - if (new_capacity < new_size) - new_capacity = new_size; - _array__reserve(self, element_size, new_capacity); - } +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { + uint32_t new_size = self->size + count; + if (new_size > self->capacity) { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) new_capacity = 8; + if (new_capacity < new_size) new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } } /// This is not what you're looking for, see `array_splice`. -static inline void _array__splice(Array *self, size_t element_size, t_u32 index, - t_u32 old_count, t_u32 new_count, - const void *elements) -{ - t_u32 new_size = self->size + new_count - old_count; - t_u32 old_end = index + old_count; - t_u32 new_end = index + new_count; - assert(old_end <= self->size); +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); - _array__reserve(self, element_size, new_size); + _array__reserve(self, element_size, new_size); - char *contents = (char *)self->contents; - if (self->size > old_end) - { - memmove(contents + new_end * element_size, - contents + old_end * element_size, - (self->size - old_end) * element_size); - } - if (new_count > 0) - { - if (elements) - { - memcpy((contents + index * element_size), elements, - new_count * element_size); - } - else - { - memset((contents + index * element_size), 0, - new_count * element_size); - } - } - self->size += new_count - old_count; + char *contents = (char *)self->contents; + if (self->size > old_end) { + memmove( + contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size + ); + } + if (new_count > 0) { + if (elements) { + memcpy( + (contents + index * element_size), + elements, + new_count * element_size + ); + } else { + memset( + (contents + index * element_size), + 0, + new_count * element_size + ); + } + } + self->size += new_count - old_count; } /// A binary search routine, based on Rust's `std::slice::binary_search_by`. -/// This is not what you're looking for, see `array_search_sorted_with` or -/// `array_search_sorted_by`. -#define _array__search_sorted(self, start, compare, suffix, needle, _index, \ - _exists) \ - do \ - { \ - *(_index) = start; \ - *(_exists) = false; \ - t_u32 size = (self)->size - *(_index); \ - if (size == 0) \ - break; \ - int comparison; \ - while (size > 1) \ - { \ - t_u32 half_size = size / 2; \ - t_u32 mid_index = *(_index) + half_size; \ - comparison = \ - compare(&((self)->contents[mid_index] suffix), (needle)); \ - if (comparison <= 0) \ - *(_index) = mid_index; \ - size -= half_size; \ - } \ - comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ - if (comparison == 0) \ - *(_exists) = true; \ - else if (comparison < 0) \ - *(_index) += 1; \ - } while (0) +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ + do { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ + } while (0) -/// Helper macro for the `_sorted_by` routines below. This takes the left -/// (existing) parameter by reference in order to work with the generic sorting -/// function above. +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. #define _compare_int(a, b) ((int)*(a) - (int)(b)) -#endif // TREE_SITTER_ARRAY_H_ +#ifdef _MSC_VER +#pragma warning(default : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ARRAY_H_ diff --git a/parser/src/atomic.h b/parser/src/atomic.h new file mode 100644 index 00000000..e680b60e --- /dev/null +++ b/parser/src/atomic.h @@ -0,0 +1,68 @@ +#ifndef TREE_SITTER_ATOMIC_H_ +#define TREE_SITTER_ATOMIC_H_ + +#include +#include +#include + +#ifdef __TINYC__ + +static inline size_t atomic_load(const volatile size_t *p) { + return *p; +} + +static inline uint32_t atomic_inc(volatile uint32_t *p) { + *p += 1; + return *p; +} + +static inline uint32_t atomic_dec(volatile uint32_t *p) { + *p-= 1; + return *p; +} + +#elif defined(_WIN32) + +#include + +static inline size_t atomic_load(const volatile size_t *p) { + return *p; +} + +static inline uint32_t atomic_inc(volatile uint32_t *p) { + return InterlockedIncrement((long volatile *)p); +} + +static inline uint32_t atomic_dec(volatile uint32_t *p) { + return InterlockedDecrement((long volatile *)p); +} + +#else + +static inline size_t atomic_load(const volatile size_t *p) { +#ifdef __ATOMIC_RELAXED + return __atomic_load_n(p, __ATOMIC_RELAXED); +#else + return __sync_fetch_and_add((volatile size_t *)p, 0); +#endif +} + +static inline uint32_t atomic_inc(volatile uint32_t *p) { + #ifdef __ATOMIC_RELAXED + return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST); + #else + return __sync_add_and_fetch(p, 1U); + #endif +} + +static inline uint32_t atomic_dec(volatile uint32_t *p) { + #ifdef __ATOMIC_RELAXED + return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST); + #else + return __sync_sub_and_fetch(p, 1U); + #endif +} + +#endif + +#endif // TREE_SITTER_ATOMIC_H_ diff --git a/parser/src/clock.h b/parser/src/clock.h new file mode 100644 index 00000000..6e75729e --- /dev/null +++ b/parser/src/clock.h @@ -0,0 +1,146 @@ +#ifndef TREE_SITTER_CLOCK_H_ +#define TREE_SITTER_CLOCK_H_ + +#include +#include + +typedef uint64_t TSDuration; + +#ifdef _WIN32 + +// Windows: +// * Represent a time as a performance counter value. +// * Represent a duration as a number of performance counter ticks. + +#include +typedef uint64_t TSClock; + +static inline TSDuration duration_from_micros(uint64_t micros) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return micros * (uint64_t)frequency.QuadPart / 1000000; +} + +static inline uint64_t duration_to_micros(TSDuration self) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return self * 1000000 / (uint64_t)frequency.QuadPart; +} + +static inline TSClock clock_null(void) { + return 0; +} + +static inline TSClock clock_now(void) { + LARGE_INTEGER result; + QueryPerformanceCounter(&result); + return (uint64_t)result.QuadPart; +} + +static inline TSClock clock_after(TSClock base, TSDuration duration) { + return base + duration; +} + +static inline bool clock_is_null(TSClock self) { + return !self; +} + +static inline bool clock_is_gt(TSClock self, TSClock other) { + return self > other; +} + +#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__) + +// POSIX with monotonic clock support (Linux) +// * Represent a time as a monotonic (seconds, nanoseconds) pair. +// * Represent a duration as a number of microseconds. +// +// On these platforms, parse timeouts will correspond accurately to +// real time, regardless of what other processes are running. + +#include +typedef struct timespec TSClock; + +static inline TSDuration duration_from_micros(uint64_t micros) { + return micros; +} + +static inline uint64_t duration_to_micros(TSDuration self) { + return self; +} + +static inline TSClock clock_now(void) { + TSClock result; + clock_gettime(CLOCK_MONOTONIC, &result); + return result; +} + +static inline TSClock clock_null(void) { + return (TSClock) {0, 0}; +} + +static inline TSClock clock_after(TSClock base, TSDuration duration) { + TSClock result = base; + result.tv_sec += duration / 1000000; + result.tv_nsec += (duration % 1000000) * 1000; + if (result.tv_nsec >= 1000000000) { + result.tv_nsec -= 1000000000; + ++(result.tv_sec); + } + return result; +} + +static inline bool clock_is_null(TSClock self) { + return !self.tv_sec; +} + +static inline bool clock_is_gt(TSClock self, TSClock other) { + if (self.tv_sec > other.tv_sec) return true; + if (self.tv_sec < other.tv_sec) return false; + return self.tv_nsec > other.tv_nsec; +} + +#else + +// macOS or POSIX without monotonic clock support +// * Represent a time as a process clock value. +// * Represent a duration as a number of process clock ticks. +// +// On these platforms, parse timeouts may be affected by other processes, +// which is not ideal, but is better than using a non-monotonic time API +// like `gettimeofday`. + +#include +typedef uint64_t TSClock; + +static inline TSDuration duration_from_micros(uint64_t micros) { + return micros * (uint64_t)CLOCKS_PER_SEC / 1000000; +} + +static inline uint64_t duration_to_micros(TSDuration self) { + return self * 1000000 / (uint64_t)CLOCKS_PER_SEC; +} + +static inline TSClock clock_null(void) { + return 0; +} + +static inline TSClock clock_now(void) { + return (uint64_t)clock(); +} + +static inline TSClock clock_after(TSClock base, TSDuration duration) { + return base + duration; +} + +static inline bool clock_is_null(TSClock self) { + return !self; +} + +static inline bool clock_is_gt(TSClock self, TSClock other) { + return self > other; +} + +#endif + +#endif // TREE_SITTER_CLOCK_H_ diff --git a/parser/src/error_costs.h b/parser/src/error_costs.h new file mode 100644 index 00000000..32d3666a --- /dev/null +++ b/parser/src/error_costs.h @@ -0,0 +1,11 @@ +#ifndef TREE_SITTER_ERROR_COSTS_H_ +#define TREE_SITTER_ERROR_COSTS_H_ + +#define ERROR_STATE 0 +#define ERROR_COST_PER_RECOVERY 500 +#define ERROR_COST_PER_MISSING_TREE 110 +#define ERROR_COST_PER_SKIPPED_TREE 100 +#define ERROR_COST_PER_SKIPPED_LINE 30 +#define ERROR_COST_PER_SKIPPED_CHAR 1 + +#endif diff --git a/parser/src/get_changed_ranges.c b/parser/src/get_changed_ranges.c new file mode 100644 index 00000000..bcf8da94 --- /dev/null +++ b/parser/src/get_changed_ranges.c @@ -0,0 +1,501 @@ +#include "./get_changed_ranges.h" +#include "./subtree.h" +#include "./language.h" +#include "./error_costs.h" +#include "./tree_cursor.h" +#include + +// #define DEBUG_GET_CHANGED_RANGES + +static void ts_range_array_add( + TSRangeArray *self, + Length start, + Length end +) { + if (self->size > 0) { + TSRange *last_range = array_back(self); + if (start.bytes <= last_range->end_byte) { + last_range->end_byte = end.bytes; + last_range->end_point = end.extent; + return; + } + } + + if (start.bytes < end.bytes) { + TSRange range = { start.extent, end.extent, start.bytes, end.bytes }; + array_push(self, range); + } +} + +bool ts_range_array_intersects( + const TSRangeArray *self, + unsigned start_index, + uint32_t start_byte, + uint32_t end_byte +) { + for (unsigned i = start_index; i < self->size; i++) { + TSRange *range = &self->contents[i]; + if (range->end_byte > start_byte) { + if (range->start_byte >= end_byte) break; + return true; + } + } + return false; +} + +void ts_range_array_get_changed_ranges( + const TSRange *old_ranges, unsigned old_range_count, + const TSRange *new_ranges, unsigned new_range_count, + TSRangeArray *differences +) { + unsigned new_index = 0; + unsigned old_index = 0; + Length current_position = length_zero(); + bool in_old_range = false; + bool in_new_range = false; + + while (old_index < old_range_count || new_index < new_range_count) { + const TSRange *old_range = &old_ranges[old_index]; + const TSRange *new_range = &new_ranges[new_index]; + + Length next_old_position; + if (in_old_range) { + next_old_position = (Length) {old_range->end_byte, old_range->end_point}; + } else if (old_index < old_range_count) { + next_old_position = (Length) {old_range->start_byte, old_range->start_point}; + } else { + next_old_position = LENGTH_MAX; + } + + Length next_new_position; + if (in_new_range) { + next_new_position = (Length) {new_range->end_byte, new_range->end_point}; + } else if (new_index < new_range_count) { + next_new_position = (Length) {new_range->start_byte, new_range->start_point}; + } else { + next_new_position = LENGTH_MAX; + } + + if (next_old_position.bytes < next_new_position.bytes) { + if (in_old_range != in_new_range) { + ts_range_array_add(differences, current_position, next_old_position); + } + if (in_old_range) old_index++; + current_position = next_old_position; + in_old_range = !in_old_range; + } else if (next_new_position.bytes < next_old_position.bytes) { + if (in_old_range != in_new_range) { + ts_range_array_add(differences, current_position, next_new_position); + } + if (in_new_range) new_index++; + current_position = next_new_position; + in_new_range = !in_new_range; + } else { + if (in_old_range != in_new_range) { + ts_range_array_add(differences, current_position, next_new_position); + } + if (in_old_range) old_index++; + if (in_new_range) new_index++; + in_old_range = !in_old_range; + in_new_range = !in_new_range; + current_position = next_new_position; + } + } +} + +typedef struct { + TreeCursor cursor; + const TSLanguage *language; + unsigned visible_depth; + bool in_padding; +} Iterator; + +static Iterator iterator_new( + TreeCursor *cursor, + const Subtree *tree, + const TSLanguage *language +) { + array_clear(&cursor->stack); + array_push(&cursor->stack, ((TreeCursorEntry) { + .subtree = tree, + .position = length_zero(), + .child_index = 0, + .structural_child_index = 0, + })); + return (Iterator) { + .cursor = *cursor, + .language = language, + .visible_depth = 1, + .in_padding = false, + }; +} + +static bool iterator_done(Iterator *self) { + return self->cursor.stack.size == 0; +} + +static Length iterator_start_position(Iterator *self) { + TreeCursorEntry entry = *array_back(&self->cursor.stack); + if (self->in_padding) { + return entry.position; + } else { + return length_add(entry.position, ts_subtree_padding(*entry.subtree)); + } +} + +static Length iterator_end_position(Iterator *self) { + TreeCursorEntry entry = *array_back(&self->cursor.stack); + Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); + if (self->in_padding) { + return result; + } else { + return length_add(result, ts_subtree_size(*entry.subtree)); + } +} + +static bool iterator_tree_is_visible(const Iterator *self) { + TreeCursorEntry entry = *array_back(&self->cursor.stack); + if (ts_subtree_visible(*entry.subtree)) return true; + if (self->cursor.stack.size > 1) { + Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; + return ts_language_alias_at( + self->language, + parent.ptr->production_id, + entry.structural_child_index + ) != 0; + } + return false; +} + +static void iterator_get_visible_state( + const Iterator *self, + Subtree *tree, + TSSymbol *alias_symbol, + uint32_t *start_byte +) { + uint32_t i = self->cursor.stack.size - 1; + + if (self->in_padding) { + if (i == 0) return; + i--; + } + + for (; i + 1 > 0; i--) { + TreeCursorEntry entry = self->cursor.stack.contents[i]; + + if (i > 0) { + const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; + *alias_symbol = ts_language_alias_at( + self->language, + parent->ptr->production_id, + entry.structural_child_index + ); + } + + if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { + *tree = *entry.subtree; + *start_byte = entry.position.bytes; + break; + } + } +} + +static void iterator_ascend(Iterator *self) { + if (iterator_done(self)) return; + if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; + if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; + self->cursor.stack.size--; +} + +static bool iterator_descend(Iterator *self, uint32_t goal_position) { + if (self->in_padding) return false; + + bool did_descend = false; + do { + did_descend = false; + TreeCursorEntry entry = *array_back(&self->cursor.stack); + Length position = entry.position; + uint32_t structural_child_index = 0; + for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { + const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; + Length child_left = length_add(position, ts_subtree_padding(*child)); + Length child_right = length_add(child_left, ts_subtree_size(*child)); + + if (child_right.bytes > goal_position) { + array_push(&self->cursor.stack, ((TreeCursorEntry) { + .subtree = child, + .position = position, + .child_index = i, + .structural_child_index = structural_child_index, + })); + + if (iterator_tree_is_visible(self)) { + if (child_left.bytes > goal_position) { + self->in_padding = true; + } else { + self->visible_depth++; + } + return true; + } + + did_descend = true; + break; + } + + position = child_right; + if (!ts_subtree_extra(*child)) structural_child_index++; + } + } while (did_descend); + + return false; +} + +static void iterator_advance(Iterator *self) { + if (self->in_padding) { + self->in_padding = false; + if (iterator_tree_is_visible(self)) { + self->visible_depth++; + } else { + iterator_descend(self, 0); + } + return; + } + + for (;;) { + if (iterator_tree_is_visible(self)) self->visible_depth--; + TreeCursorEntry entry = array_pop(&self->cursor.stack); + if (iterator_done(self)) return; + + const Subtree *parent = array_back(&self->cursor.stack)->subtree; + uint32_t child_index = entry.child_index + 1; + if (ts_subtree_child_count(*parent) > child_index) { + Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); + uint32_t structural_child_index = entry.structural_child_index; + if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; + const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; + + array_push(&self->cursor.stack, ((TreeCursorEntry) { + .subtree = next_child, + .position = position, + .child_index = child_index, + .structural_child_index = structural_child_index, + })); + + if (iterator_tree_is_visible(self)) { + if (ts_subtree_padding(*next_child).bytes > 0) { + self->in_padding = true; + } else { + self->visible_depth++; + } + } else { + iterator_descend(self, 0); + } + break; + } + } +} + +typedef enum { + IteratorDiffers, + IteratorMayDiffer, + IteratorMatches, +} IteratorComparison; + +static IteratorComparison iterator_compare( + const Iterator *old_iter, + const Iterator *new_iter +) { + Subtree old_tree = NULL_SUBTREE; + Subtree new_tree = NULL_SUBTREE; + uint32_t old_start = 0; + uint32_t new_start = 0; + TSSymbol old_alias_symbol = 0; + TSSymbol new_alias_symbol = 0; + iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); + iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); + + if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; + if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; + + if ( + old_alias_symbol == new_alias_symbol && + ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree) + ) { + if (old_start == new_start && + !ts_subtree_has_changes(old_tree) && + ts_subtree_symbol(old_tree) != ts_builtin_sym_error && + ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && + ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && + ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && + (ts_subtree_parse_state(old_tree) == ERROR_STATE) == + (ts_subtree_parse_state(new_tree) == ERROR_STATE)) { + return IteratorMatches; + } else { + return IteratorMayDiffer; + } + } + + return IteratorDiffers; +} + +#ifdef DEBUG_GET_CHANGED_RANGES +static inline void iterator_print_state(Iterator *self) { + TreeCursorEntry entry = *array_back(&self->cursor.stack); + TSPoint start = iterator_start_position(self).extent; + TSPoint end = iterator_end_position(self).extent; + const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); + printf( + "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", + name, self->in_padding ? "(p)" : " ", + self->visible_depth, + start.row + 1, start.column, + end.row + 1, end.column + ); +} +#endif + +unsigned ts_subtree_get_changed_ranges( + const Subtree *old_tree, const Subtree *new_tree, + TreeCursor *cursor1, TreeCursor *cursor2, + const TSLanguage *language, + const TSRangeArray *included_range_differences, + TSRange **ranges +) { + TSRangeArray results = array_new(); + + Iterator old_iter = iterator_new(cursor1, old_tree, language); + Iterator new_iter = iterator_new(cursor2, new_tree, language); + + unsigned included_range_difference_index = 0; + + Length position = iterator_start_position(&old_iter); + Length next_position = iterator_start_position(&new_iter); + if (position.bytes < next_position.bytes) { + ts_range_array_add(&results, position, next_position); + position = next_position; + } else if (position.bytes > next_position.bytes) { + ts_range_array_add(&results, next_position, position); + next_position = position; + } + + do { + #ifdef DEBUG_GET_CHANGED_RANGES + printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); + iterator_print_state(&old_iter); + printf("\tvs\t"); + iterator_print_state(&new_iter); + puts(""); + #endif + + // Compare the old and new subtrees. + IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); + + // Even if the two subtrees appear to be identical, they could differ + // internally if they contain a range of text that was previously + // excluded from the parse, and is now included, or vice-versa. + if (comparison == IteratorMatches && ts_range_array_intersects( + included_range_differences, + included_range_difference_index, + position.bytes, + iterator_end_position(&old_iter).bytes + )) { + comparison = IteratorMayDiffer; + } + + bool is_changed = false; + switch (comparison) { + // If the subtrees are definitely identical, move to the end + // of both subtrees. + case IteratorMatches: + next_position = iterator_end_position(&old_iter); + break; + + // If the subtrees might differ internally, descend into both + // subtrees, finding the first child that spans the current position. + case IteratorMayDiffer: + if (iterator_descend(&old_iter, position.bytes)) { + if (!iterator_descend(&new_iter, position.bytes)) { + is_changed = true; + next_position = iterator_end_position(&old_iter); + } + } else if (iterator_descend(&new_iter, position.bytes)) { + is_changed = true; + next_position = iterator_end_position(&new_iter); + } else { + next_position = length_min( + iterator_end_position(&old_iter), + iterator_end_position(&new_iter) + ); + } + break; + + // If the subtrees are different, record a change and then move + // to the end of both subtrees. + case IteratorDiffers: + is_changed = true; + next_position = length_min( + iterator_end_position(&old_iter), + iterator_end_position(&new_iter) + ); + break; + } + + // Ensure that both iterators are caught up to the current position. + while ( + !iterator_done(&old_iter) && + iterator_end_position(&old_iter).bytes <= next_position.bytes + ) iterator_advance(&old_iter); + while ( + !iterator_done(&new_iter) && + iterator_end_position(&new_iter).bytes <= next_position.bytes + ) iterator_advance(&new_iter); + + // Ensure that both iterators are at the same depth in the tree. + while (old_iter.visible_depth > new_iter.visible_depth) { + iterator_ascend(&old_iter); + } + while (new_iter.visible_depth > old_iter.visible_depth) { + iterator_ascend(&new_iter); + } + + if (is_changed) { + #ifdef DEBUG_GET_CHANGED_RANGES + printf( + " change: [[%u, %u] - [%u, %u]]\n", + position.extent.row + 1, position.extent.column, + next_position.extent.row + 1, next_position.extent.column + ); + #endif + + ts_range_array_add(&results, position, next_position); + } + + position = next_position; + + // Keep track of the current position in the included range differences + // array in order to avoid scanning the entire array on each iteration. + while (included_range_difference_index < included_range_differences->size) { + const TSRange *range = &included_range_differences->contents[ + included_range_difference_index + ]; + if (range->end_byte <= position.bytes) { + included_range_difference_index++; + } else { + break; + } + } + } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); + + Length old_size = ts_subtree_total_size(*old_tree); + Length new_size = ts_subtree_total_size(*new_tree); + if (old_size.bytes < new_size.bytes) { + ts_range_array_add(&results, old_size, new_size); + } else if (new_size.bytes < old_size.bytes) { + ts_range_array_add(&results, new_size, old_size); + } + + *cursor1 = old_iter.cursor; + *cursor2 = new_iter.cursor; + *ranges = results.contents; + return results.size; +} diff --git a/parser/src/get_changed_ranges.h b/parser/src/get_changed_ranges.h new file mode 100644 index 00000000..a1f1dbb4 --- /dev/null +++ b/parser/src/get_changed_ranges.h @@ -0,0 +1,36 @@ +#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_ +#define TREE_SITTER_GET_CHANGED_RANGES_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./tree_cursor.h" +#include "./subtree.h" + +typedef Array(TSRange) TSRangeArray; + +void ts_range_array_get_changed_ranges( + const TSRange *old_ranges, unsigned old_range_count, + const TSRange *new_ranges, unsigned new_range_count, + TSRangeArray *differences +); + +bool ts_range_array_intersects( + const TSRangeArray *self, unsigned start_index, + uint32_t start_byte, uint32_t end_byte +); + +unsigned ts_subtree_get_changed_ranges( + const Subtree *old_tree, const Subtree *new_tree, + TreeCursor *cursor1, TreeCursor *cursor2, + const TSLanguage *language, + const TSRangeArray *included_range_differences, + TSRange **ranges +); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_GET_CHANGED_RANGES_H_ diff --git a/parser/src/host.h b/parser/src/host.h new file mode 100644 index 00000000..a07e9f89 --- /dev/null +++ b/parser/src/host.h @@ -0,0 +1,21 @@ + +// Determine endian and pointer size based on known defines. +// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments +// to override this. + +#if !defined(TS_BIG_ENDIAN) +#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ + || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__))) +#define TS_BIG_ENDIAN 1 +#else +#define TS_BIG_ENDIAN 0 +#endif +#endif + +#if !defined(TS_PTR_SIZE) +#if UINTPTR_MAX == 0xFFFFFFFF +#define TS_PTR_SIZE 32 +#else +#define TS_PTR_SIZE 64 +#endif +#endif diff --git a/parser/src/language.c b/parser/src/language.c index c08707c4..d3d6ef5e 100644 --- a/parser/src/language.c +++ b/parser/src/language.c @@ -1,36 +1,37 @@ #include "./language.h" -#include "parser/api.h" + +#include "./api.h" #include -const t_language *ts_language_copy(const t_language *self) { +const TSLanguage *ts_language_copy(const TSLanguage *self) { return self; } -void ts_language_delete(const t_language *self) { - (void)(self); +void ts_language_delete(const TSLanguage *self) { + (void)(self); } -t_u32 ts_language_symbol_count(const t_language *self) { +uint32_t ts_language_symbol_count(const TSLanguage *self) { return self->symbol_count + self->alias_count; } -t_u32 ts_language_state_count(const t_language *self) { +uint32_t ts_language_state_count(const TSLanguage *self) { return self->state_count; } -t_u32 ts_language_version(const t_language *self) { +uint32_t ts_language_version(const TSLanguage *self) { return self->version; } -t_u32 ts_language_field_count(const t_language *self) { +uint32_t ts_language_field_count(const TSLanguage *self) { return self->field_count; } void ts_language_table_entry( - const t_language *self, - t_state_id state, - t_symbol symbol, - t_table_entry *result + const TSLanguage *self, + TSStateId state, + TSSymbol symbol, + TableEntry *result ) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { result->action_count = 0; @@ -38,48 +39,48 @@ void ts_language_table_entry( result->actions = NULL; } else { assert(symbol < self->token_count); - t_u32 action_index = ts_language_lookup(self, state, symbol); - const t_parse_action_entry *entry = &self->parse_actions[action_index]; + uint32_t action_index = ts_language_lookup(self, state, symbol); + const TSParseActionEntry *entry = &self->parse_actions[action_index]; result->action_count = entry->entry.count; result->is_reusable = entry->entry.reusable; - result->actions = (const t_parse_actions *)(entry + 1); + result->actions = (const TSParseAction *)(entry + 1); } } -t_symbol_metadata ts_language_symbol_metadata( - const t_language *self, - t_symbol symbol +TSSymbolMetadata ts_language_symbol_metadata( + const TSLanguage *self, + TSSymbol symbol ) { if (symbol == ts_builtin_sym_error) { - return (t_symbol_metadata) {.visible = true, .named = true}; + return (TSSymbolMetadata) {.visible = true, .named = true}; } else if (symbol == ts_builtin_sym_error_repeat) { - return (t_symbol_metadata) {.visible = false, .named = false}; + return (TSSymbolMetadata) {.visible = false, .named = false}; } else { return self->symbol_metadata[symbol]; } } -t_symbol ts_language_public_symbol( - const t_language *self, - t_symbol symbol +TSSymbol ts_language_public_symbol( + const TSLanguage *self, + TSSymbol symbol ) { if (symbol == ts_builtin_sym_error) return symbol; return self->public_symbol_map[symbol]; } -t_state_id ts_language_next_state( - const t_language *self, - t_state_id state, - t_symbol symbol +TSStateId ts_language_next_state( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol ) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { return 0; } else if (symbol < self->token_count) { - t_u32 count; - const t_parse_actions *actions = ts_language_actions(self, state, symbol, &count); + uint32_t count; + const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); if (count > 0) { - t_parse_actions action = actions[count - 1]; - if (action.type == ActionTypeShift) { + TSParseAction action = actions[count - 1]; + if (action.type == TSParseActionTypeShift) { return action.shift.extra ? state : action.shift.state; } } @@ -90,8 +91,8 @@ t_state_id ts_language_next_state( } const char *ts_language_symbol_name( - const t_language *self, - t_symbol symbol + const TSLanguage *self, + TSSymbol symbol ) { if (symbol == ts_builtin_sym_error) { return "ERROR"; @@ -104,16 +105,16 @@ const char *ts_language_symbol_name( } } -t_symbol ts_language_symbol_for_name( - const t_language *self, +TSSymbol ts_language_symbol_for_name( + const TSLanguage *self, const char *string, - t_u32 length, + uint32_t length, bool is_named ) { if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; - t_u16 count = (t_u16)ts_language_symbol_count(self); - for (t_symbol i = 0; i < count; i++) { - t_symbol_metadata metadata = ts_language_symbol_metadata(self, i); + uint16_t count = (uint16_t)ts_language_symbol_count(self); + for (TSSymbol i = 0; i < count; i++) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; const char *symbol_name = self->symbol_names[i]; if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { @@ -123,25 +124,25 @@ t_symbol ts_language_symbol_for_name( return 0; } -t_symbol_type ts_language_symbol_type( - const t_language *self, - t_symbol symbol +TSSymbolType ts_language_symbol_type( + const TSLanguage *self, + TSSymbol symbol ) { - t_symbol_metadata metadata = ts_language_symbol_metadata(self, symbol); + TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); if (metadata.named && metadata.visible) { - return SymbolTypeRegular; + return TSSymbolTypeRegular; } else if (metadata.visible) { - return SymbolTypeAnonymous; + return TSSymbolTypeAnonymous; } else { - return SymbolTypeAuxiliary; + return TSSymbolTypeAuxiliary; } } const char *ts_language_field_name_for_id( - const t_language *self, - t_field_id id + const TSLanguage *self, + TSFieldId id ) { - t_u32 count = ts_language_field_count(self); + uint32_t count = ts_language_field_count(self); if (count && id <= count) { return self->field_names[id]; } else { @@ -149,13 +150,13 @@ const char *ts_language_field_name_for_id( } } -t_field_id ts_language_field_id_for_name( - const t_language *self, +TSFieldId ts_language_field_id_for_name( + const TSLanguage *self, const char *name, - t_u32 name_length + uint32_t name_length ) { - t_u16 count = (t_u16)ts_language_field_count(self); - for (t_symbol i = 1; i < count + 1; i++) { + uint16_t count = (uint16_t)ts_language_field_count(self); + for (TSSymbol i = 1; i < count + 1; i++) { switch (strncmp(name, self->field_names[i], name_length)) { case 0: if (self->field_names[i][name_length] == 0) return i; @@ -169,47 +170,47 @@ t_field_id ts_language_field_id_for_name( return 0; } -t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { +TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) { if (state >= self->state_count) return NULL; - t_lookahead_iterator *iterator = malloc(sizeof(t_lookahead_iterator)); + LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); *iterator = ts_language_lookaheads(self, state); - return (t_lookahead_iterator *)iterator; + return (TSLookaheadIterator *)iterator; } -void ts_lookahead_iterator_delete(t_lookahead_iterator *self) { - free(self); +void ts_lookahead_iterator_delete(TSLookaheadIterator *self) { + ts_free(self); } -bool ts_lookahead_iterator_reset_state(t_lookahead_iterator * self, t_state_id state) { - t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; +bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) { + LookaheadIterator *iterator = (LookaheadIterator *)self; if (state >= iterator->language->state_count) return false; *iterator = ts_language_lookaheads(iterator->language, state); return true; } -const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self) { - const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; +const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; return iterator->language; } -bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state) { +bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) { if (state >= language->state_count) return false; - t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; + LookaheadIterator *iterator = (LookaheadIterator *)self; *iterator = ts_language_lookaheads(language, state); return true; } -bool ts_lookahead_iterator_next(t_lookahead_iterator *self) { - t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; +bool ts_lookahead_iterator_next(TSLookaheadIterator *self) { + LookaheadIterator *iterator = (LookaheadIterator *)self; return ts_lookahead_iterator__next(iterator); } -t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self) { - const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; +TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; return iterator->symbol; } -const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self) { - const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; +const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; return ts_language_symbol_name(iterator->language, iterator->symbol); } diff --git a/parser/src/language.h b/parser/src/language.h index a26c9f6c..4e2769b4 100644 --- a/parser/src/language.h +++ b/parser/src/language.h @@ -1,74 +1,72 @@ #ifndef TREE_SITTER_LANGUAGE_H_ #define TREE_SITTER_LANGUAGE_H_ +#ifdef __cplusplus +extern "C" { +#endif + #include "./subtree.h" -#include "parser/types/types_parse_action_type.h" -#include "parser/types/types_state_id.h" -#include "parser/types/types_symbol.h" +#include "./parser.h" #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) #define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 #define LANGUAGE_VERSION_USABLE_VIA_WASM 13 -typedef struct s_table_entry -{ - const t_parse_actions *actions; - t_u32 action_count; - bool is_reusable; -} t_table_entry; +typedef struct { + const TSParseAction *actions; + uint32_t action_count; + bool is_reusable; +} TableEntry; -typedef struct s_lookahead_iterator -{ - const t_language *language; - const t_u16 *data; - const t_u16 *group_end; - t_state_id state; - t_u16 table_value; - t_u16 section_index; - t_u16 group_count; - bool is_small_state; +typedef struct { + const TSLanguage *language; + const uint16_t *data; + const uint16_t *group_end; + TSStateId state; + uint16_t table_value; + uint16_t section_index; + uint16_t group_count; + bool is_small_state; - const t_parse_actions *actions; - t_symbol symbol; - t_state_id next_state; - t_u16 action_count; -} t_lookahead_iterator; + const TSParseAction *actions; + TSSymbol symbol; + TSStateId next_state; + uint16_t action_count; +} LookaheadIterator; -void ts_language_table_entry(const t_language *, t_state_id, t_symbol, - t_table_entry *); +void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *); -t_symbol_metadata ts_language_symbol_metadata(const t_language *, t_symbol); +TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); -t_symbol ts_language_public_symbol(const t_language *, t_symbol); +TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol); -t_state_id ts_language_next_state(const t_language *self, t_state_id state, - t_symbol symbol); +TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); -static inline bool ts_language_is_symbol_external(const t_language *self, - t_symbol symbol) -{ - return 0 < symbol && symbol < self->external_token_count + 1; +static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) { + return 0 < symbol && symbol < self->external_token_count + 1; } -static inline const t_parse_actions *ts_language_actions(const t_language *self, - t_state_id state, - t_symbol symbol, - t_u32 *count) -{ - t_table_entry entry; - ts_language_table_entry(self, state, symbol, &entry); - *count = entry.action_count; - return entry.actions; +static inline const TSParseAction *ts_language_actions( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol, + uint32_t *count +) { + TableEntry entry; + ts_language_table_entry(self, state, symbol, &entry); + *count = entry.action_count; + return entry.actions; } -static inline bool ts_language_has_reduce_action(const t_language *self, - t_state_id state, - t_symbol symbol) -{ - t_table_entry entry; - ts_language_table_entry(self, state, symbol, &entry); - return entry.action_count > 0 && entry.actions[0].type == ActionTypeReduce; +static inline bool ts_language_has_reduce_action( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { + TableEntry entry; + ts_language_table_entry(self, state, symbol, &entry); + return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce; } // Lookup the table value for a given symbol and state. @@ -78,37 +76,34 @@ static inline bool ts_language_has_reduce_action(const t_language *self, // For 'large' parse states, this is a direct lookup. For 'small' parse // states, this requires searching through the symbol groups to find // the given symbol. -static inline t_u16 ts_language_lookup(const t_language *self, t_state_id state, - t_symbol symbol) -{ - if (state >= self->large_state_count) - { - t_u32 index = - self->small_parse_table_map[state - self->large_state_count]; - const t_u16 *data = &self->small_parse_table[index]; - t_u16 group_count = *(data++); - for (unsigned i = 0; i < group_count; i++) - { - t_u16 section_value = *(data++); - t_u16 symbol_count = *(data++); - for (unsigned j = 0; j < symbol_count; j++) - { - if (*(data++) == symbol) - return section_value; - } - } - return 0; - } - else - { - return self->parse_table[state * self->symbol_count + symbol]; - } +static inline uint16_t ts_language_lookup( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { + if (state >= self->large_state_count) { + uint32_t index = self->small_parse_table_map[state - self->large_state_count]; + const uint16_t *data = &self->small_parse_table[index]; + uint16_t group_count = *(data++); + for (unsigned i = 0; i < group_count; i++) { + uint16_t section_value = *(data++); + uint16_t symbol_count = *(data++); + for (unsigned j = 0; j < symbol_count; j++) { + if (*(data++) == symbol) return section_value; + } + } + return 0; + } else { + return self->parse_table[state * self->symbol_count + symbol]; + } } -static inline bool ts_language_has_actions(const t_language *self, - t_state_id state, t_symbol symbol) -{ - return ts_language_lookup(self, state, symbol) != 0; +static inline bool ts_language_has_actions( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { + return ts_language_lookup(self, state, symbol) != 0; } // Iterate over all of the symbols that are valid in the given state. @@ -117,209 +112,188 @@ static inline bool ts_language_has_actions(const t_language *self, // all possible symbols and checking the parse table for each one. // For 'small' parse states, this exploits the structure of the // table to only visit the valid symbols. -static inline t_lookahead_iterator ts_language_lookaheads( - const t_language *self, t_state_id state) -{ - bool is_small_state = state >= self->large_state_count; - const t_u16 *data; - const t_u16 *group_end = NULL; - t_u16 group_count = 0; - if (is_small_state) - { - t_u32 index = - self->small_parse_table_map[state - self->large_state_count]; - data = &self->small_parse_table[index]; - group_end = data + 1; - group_count = *data; - } - else - { - data = &self->parse_table[state * self->symbol_count] - 1; - } - return (t_lookahead_iterator){ - .language = self, - .data = data, - .group_end = group_end, - .group_count = group_count, - .is_small_state = is_small_state, - .symbol = UINT16_MAX, - .next_state = 0, - }; +static inline LookaheadIterator ts_language_lookaheads( + const TSLanguage *self, + TSStateId state +) { + bool is_small_state = state >= self->large_state_count; + const uint16_t *data; + const uint16_t *group_end = NULL; + uint16_t group_count = 0; + if (is_small_state) { + uint32_t index = self->small_parse_table_map[state - self->large_state_count]; + data = &self->small_parse_table[index]; + group_end = data + 1; + group_count = *data; + } else { + data = &self->parse_table[state * self->symbol_count] - 1; + } + return (LookaheadIterator) { + .language = self, + .data = data, + .group_end = group_end, + .group_count = group_count, + .is_small_state = is_small_state, + .symbol = UINT16_MAX, + .next_state = 0, + }; } -static inline bool ts_lookahead_iterator__next(t_lookahead_iterator *self) -{ - // For small parse states, valid symbols are listed explicitly, - // grouped by their value. There's no need to look up the actions - // again until moving to the next group. - if (self->is_small_state) - { - self->data++; - if (self->data == self->group_end) - { - if (self->group_count == 0) - return false; - self->group_count--; - self->table_value = *(self->data++); - unsigned symbol_count = *(self->data++); - self->group_end = self->data + symbol_count; - self->symbol = *self->data; - } - else - { - self->symbol = *self->data; - return true; - } - } +static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) { + // For small parse states, valid symbols are listed explicitly, + // grouped by their value. There's no need to look up the actions + // again until moving to the next group. + if (self->is_small_state) { + self->data++; + if (self->data == self->group_end) { + if (self->group_count == 0) return false; + self->group_count--; + self->table_value = *(self->data++); + unsigned symbol_count = *(self->data++); + self->group_end = self->data + symbol_count; + self->symbol = *self->data; + } else { + self->symbol = *self->data; + return true; + } + } - // For large parse states, iterate through every symbol until one - // is found that has valid actions. - else - { - do - { - self->data++; - self->symbol++; - if (self->symbol >= self->language->symbol_count) - return false; - self->table_value = *self->data; - } while (!self->table_value); - } + // For large parse states, iterate through every symbol until one + // is found that has valid actions. + else { + do { + self->data++; + self->symbol++; + if (self->symbol >= self->language->symbol_count) return false; + self->table_value = *self->data; + } while (!self->table_value); + } - // Depending on if the symbols is terminal or non-terminal, the table - // value either represents a list of actions or a successor state. - if (self->symbol < self->language->token_count) - { - const t_parse_action_entry *entry = - &self->language->parse_actions[self->table_value]; - self->action_count = entry->entry.count; - self->actions = (const t_parse_actions *)(entry + 1); - self->next_state = 0; - } - else - { - self->action_count = 0; - self->next_state = self->table_value; - } - return true; + // Depending on if the symbols is terminal or non-terminal, the table value either + // represents a list of actions or a successor state. + if (self->symbol < self->language->token_count) { + const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value]; + self->action_count = entry->entry.count; + self->actions = (const TSParseAction *)(entry + 1); + self->next_state = 0; + } else { + self->action_count = 0; + self->next_state = self->table_value; + } + return true; } -// Whether the state is a "primary state". If this returns false, it -// indicates that there exists another state that behaves identically to -// this one with respect to query analysis. -static inline bool ts_language_state_is_primary(const t_language *self, - t_state_id state) -{ - if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) - { - return state == self->primary_state_ids[state]; - } - else - { - return true; - } +// Whether the state is a "primary state". If this returns false, it indicates that there exists +// another state that behaves identically to this one with respect to query analysis. +static inline bool ts_language_state_is_primary( + const TSLanguage *self, + TSStateId state +) { + if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { + return state == self->primary_state_ids[state]; + } else { + return true; + } } static inline const bool *ts_language_enabled_external_tokens( - const t_language *self, unsigned external_scanner_state) -{ - if (external_scanner_state == 0) - { - return NULL; - } - else - { - return self->external_scanner.states + - self->external_token_count * external_scanner_state; - } + const TSLanguage *self, + unsigned external_scanner_state +) { + if (external_scanner_state == 0) { + return NULL; + } else { + return self->external_scanner.states + self->external_token_count * external_scanner_state; + } } -static inline const t_symbol *ts_language_alias_sequence(const t_language *self, - t_u32 production_id) -{ - return production_id - ? &self->alias_sequences[production_id * - self->max_alias_sequence_length] - : NULL; +static inline const TSSymbol *ts_language_alias_sequence( + const TSLanguage *self, + uint32_t production_id +) { + return production_id ? + &self->alias_sequences[production_id * self->max_alias_sequence_length] : + NULL; } -static inline t_symbol ts_language_alias_at(const t_language *self, - t_u32 production_id, - t_u32 child_index) -{ - return production_id - ? self->alias_sequences[production_id * - self->max_alias_sequence_length + - child_index] - : 0; +static inline TSSymbol ts_language_alias_at( + const TSLanguage *self, + uint32_t production_id, + uint32_t child_index +) { + return production_id ? + self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] : + 0; } -static inline void ts_language_field_map(const t_language *self, - t_u32 production_id, - const t_field_map_entry **start, - const t_field_map_entry **end) -{ - if (self->field_count == 0) - { - *start = NULL; - *end = NULL; - return; - } +static inline void ts_language_field_map( + const TSLanguage *self, + uint32_t production_id, + const TSFieldMapEntry **start, + const TSFieldMapEntry **end +) { + if (self->field_count == 0) { + *start = NULL; + *end = NULL; + return; + } - t_field_map_slice slice = self->field_map_slices[production_id]; - *start = &self->field_map_entries[slice.index]; - *end = &self->field_map_entries[slice.index] + slice.length; + TSFieldMapSlice slice = self->field_map_slices[production_id]; + *start = &self->field_map_entries[slice.index]; + *end = &self->field_map_entries[slice.index] + slice.length; } -static inline void ts_language_aliases_for_symbol(const t_language *self, - t_symbol original_symbol, - const t_symbol **start, - const t_symbol **end) -{ - *start = &self->public_symbol_map[original_symbol]; - *end = *start + 1; +static inline void ts_language_aliases_for_symbol( + const TSLanguage *self, + TSSymbol original_symbol, + const TSSymbol **start, + const TSSymbol **end +) { + *start = &self->public_symbol_map[original_symbol]; + *end = *start + 1; - unsigned idx = 0; - for (;;) - { - t_symbol symbol = self->alias_map[idx++]; - if (symbol == 0 || symbol > original_symbol) - break; - t_u16 count = self->alias_map[idx++]; - if (symbol == original_symbol) - { - *start = &self->alias_map[idx]; - *end = &self->alias_map[idx + count]; - break; - } - idx += count; - } + unsigned idx = 0; + for (;;) { + TSSymbol symbol = self->alias_map[idx++]; + if (symbol == 0 || symbol > original_symbol) break; + uint16_t count = self->alias_map[idx++]; + if (symbol == original_symbol) { + *start = &self->alias_map[idx]; + *end = &self->alias_map[idx + count]; + break; + } + idx += count; + } } static inline void ts_language_write_symbol_as_dot_string( - const t_language *self, FILE *f, t_symbol symbol) -{ - const char *name = ts_language_symbol_name(self, symbol); - for (const char *chr = name; *chr; chr++) - { - switch (*chr) - { - case '"': - case '\\': - fputc('\\', f); - fputc(*chr, f); - break; - case '\n': - fputs("\\n", f); - break; - case '\t': - fputs("\\t", f); - break; - default: - fputc(*chr, f); - break; - } - } + const TSLanguage *self, + FILE *f, + TSSymbol symbol +) { + const char *name = ts_language_symbol_name(self, symbol); + for (const char *chr = name; *chr; chr++) { + switch (*chr) { + case '"': + case '\\': + fputc('\\', f); + fputc(*chr, f); + break; + case '\n': + fputs("\\n", f); + break; + case '\t': + fputs("\\t", f); + break; + default: + fputc(*chr, f); + break; + } + } } -#endif // TREE_SITTER_LANGUAGE_H_ +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_LANGUAGE_H_ diff --git a/parser/src/length.h b/parser/src/length.h new file mode 100644 index 00000000..82003c02 --- /dev/null +++ b/parser/src/length.h @@ -0,0 +1,52 @@ +#ifndef TREE_SITTER_LENGTH_H_ +#define TREE_SITTER_LENGTH_H_ + +#include +#include +#include "./point.h" +#include "./api.h" + +typedef struct { + uint32_t bytes; + TSPoint extent; +} Length; + +static const Length LENGTH_UNDEFINED = {0, {0, 1}}; +static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; + +static inline bool length_is_undefined(Length length) { + return length.bytes == 0 && length.extent.column != 0; +} + +static inline Length length_min(Length len1, Length len2) { + return (len1.bytes < len2.bytes) ? len1 : len2; +} + +static inline Length length_add(Length len1, Length len2) { + Length result; + result.bytes = len1.bytes + len2.bytes; + result.extent = point_add(len1.extent, len2.extent); + return result; +} + +static inline Length length_sub(Length len1, Length len2) { + Length result; + result.bytes = len1.bytes - len2.bytes; + result.extent = point_sub(len1.extent, len2.extent); + return result; +} + +static inline Length length_zero(void) { + Length result = {0, {0, 0}}; + return result; +} + +static inline Length length_saturating_sub(Length len1, Length len2) { + if (len1.bytes > len2.bytes) { + return length_sub(len1, len2); + } else { + return length_zero(); + } +} + +#endif diff --git a/parser/src/lexer.c b/parser/src/lexer.c index 4d387b5b..b6c19cf1 100644 --- a/parser/src/lexer.c +++ b/parser/src/lexer.c @@ -1,458 +1,438 @@ -#include "parser/lexer.h" -#include "parser/parser_length.h" -#include "./subtree.h" -#include #include +#include "./lexer.h" +#include "./subtree.h" +#include "./length.h" +//#include "./unicode.h" -#define LOG(message, character) \ - if (self->logger.log) \ - { \ - snprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ - 32 <= character && character < 127 ? message \ - " character:'%c'" \ - : message " character:%d", \ - character); \ - self->logger.log(self->logger.payload, LogTypeLex, \ - self->debug_buffer); \ - } +#define LOG(message, character) \ + if (self->logger.log) { \ + snprintf( \ + self->debug_buffer, \ + TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ + 32 <= character && character < 127 ? \ + message " character:'%c'" : \ + message " character:%d", \ + character \ + ); \ + self->logger.log( \ + self->logger.payload, \ + TSLogTypeLex, \ + self->debug_buffer \ + ); \ + } -static const t_i32 BYTE_ORDER_MARK = 0xFEFF; +static const int32_t BYTE_ORDER_MARK = 0xFEFF; -static const t_parser_range DEFAULT_RANGE = {.start_point = - { - .row = 0, - .column = 0, - }, - .end_point = - { - .row = UINT32_MAX, - .column = UINT32_MAX, - }, - .start_byte = 0, - .end_byte = UINT32_MAX}; +static const TSRange DEFAULT_RANGE = { + .start_point = { + .row = 0, + .column = 0, + }, + .end_point = { + .row = UINT32_MAX, + .column = UINT32_MAX, + }, + .start_byte = 0, + .end_byte = UINT32_MAX +}; // Check if the lexer has reached EOF. This state is stored // by setting the lexer's `current_included_range_index` such that // it has consumed all of its available ranges. -static bool ts_lexer__eof(const t_lexer *_self) -{ - t_liblexer *self = (t_liblexer *)_self; - return self->current_included_range_index == self->included_range_count; +static bool ts_lexer__eof(const TSLexer *_self) { + Lexer *self = (Lexer *)_self; + return self->current_included_range_index == self->included_range_count; } // Clear the currently stored chunk of source code, because the lexer's // position has changed. -static void ts_lexer__clear_chunk(t_liblexer *self) -{ - self->chunk = NULL; - self->chunk_size = 0; - self->chunk_start = 0; +static void ts_lexer__clear_chunk(Lexer *self) { + self->chunk = NULL; + self->chunk_size = 0; + self->chunk_start = 0; } // Call the lexer's input callback to obtain a new chunk of source code // for the current position. -static void ts_lexer__get_chunk(t_liblexer *self) -{ - self->chunk_start = self->current_position.bytes; - self->chunk = - self->input.read(self->input.payload, self->current_position.bytes, - self->current_position.extent, &self->chunk_size); - if (!self->chunk_size) - { - self->current_included_range_index = self->included_range_count; - self->chunk = NULL; - } +static void ts_lexer__get_chunk(Lexer *self) { + self->chunk_start = self->current_position.bytes; + self->chunk = self->input.read( + self->input.payload, + self->current_position.bytes, + self->current_position.extent, + &self->chunk_size + ); + if (!self->chunk_size) { + self->current_included_range_index = self->included_range_count; + self->chunk = NULL; + } } +typedef uint32_t (*DecodeFunc)( + const uint8_t *string, + uint32_t length, + int32_t *code_point +); -typedef t_i32 (*UnicodeDecodeFunction)(const t_i8 *chunk, t_i32 size, - t_i32 *lookahead); - -t_i32 my_decode(const t_i8 *chunk, t_i32 size, t_i32 *lookahead) -{ - (void)(size); - *((t_i32 *)lookahead) = *chunk; - return (1); +static uint32_t ts_decode_ascii( + const uint8_t *string, + uint32_t length, + int32_t *code_point +) { + uint32_t i = 1; + (void)(length); + *code_point = *string; + return i; } -#define TS_DECODE_ERROR -1 - // Decode the next unicode character in the current chunk of source code. // This assumes that the lexer has already retrieved a chunk of source // code that spans the current position. -static void ts_lexer__get_lookahead(t_liblexer *self) -{ - t_i32 position_in_chunk = - self->current_position.bytes - self->chunk_start; - t_i32 size = self->chunk_size - position_in_chunk; +static void ts_lexer__get_lookahead(Lexer *self) { + uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; + uint32_t size = self->chunk_size - position_in_chunk; - if (size == 0) - { - self->lookahead_size = 1; - self->data.lookahead = '\0'; - return; - } + if (size == 0) { + self->lookahead_size = 1; + self->data.lookahead = '\0'; + return; + } - const t_i8 *chunk = (const t_i8 *)self->chunk + position_in_chunk; - UnicodeDecodeFunction decode = my_decode; + #define TS_DECODE_ERROR -1 - self->lookahead_size = decode(chunk, size, &self->data.lookahead); + const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; + // UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8 + // ? ts_decode_utf8 + // : ts_decode_utf16; - // If this chunk ended in the middle of a multi-byte character, - // try again with a fresh chunk. - if (self->data.lookahead == TS_DECODE_ERROR && size < 4) - { - ts_lexer__get_chunk(self); - chunk = (const t_i8 *)self->chunk; - size = self->chunk_size; - self->lookahead_size = decode(chunk, size, &self->data.lookahead); - } - if (self->data.lookahead == TS_DECODE_ERROR) - { - self->lookahead_size = 1; - } + self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead); + + // If this chunk ended in the middle of a multi-byte character, + // try again with a fresh chunk. + if (self->data.lookahead == TS_DECODE_ERROR && size < 4) { + ts_lexer__get_chunk(self); + chunk = (const uint8_t *)self->chunk; + size = self->chunk_size; + self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead); + } + + if (self->data.lookahead == TS_DECODE_ERROR) { + self->lookahead_size = 1; + } } -static void ts_lexer_goto(t_liblexer *self, t_parse_length position) -{ - self->current_position = position; +static void ts_lexer_goto(Lexer *self, Length position) { + self->current_position = position; - // Move to the first valid position at or after the given position. - bool found_included_range = false; - for (unsigned i = 0; i < self->included_range_count; i++) - { - t_parser_range *included_range = &self->included_ranges[i]; - if (included_range->end_byte > self->current_position.bytes && - included_range->end_byte > included_range->start_byte) - { - if (included_range->start_byte >= self->current_position.bytes) - { - self->current_position = (t_parse_length){ - .bytes = included_range->start_byte, - .extent = included_range->start_point, - }; - } + // Move to the first valid position at or after the given position. + bool found_included_range = false; + for (unsigned i = 0; i < self->included_range_count; i++) { + TSRange *included_range = &self->included_ranges[i]; + if ( + included_range->end_byte > self->current_position.bytes && + included_range->end_byte > included_range->start_byte + ) { + if (included_range->start_byte >= self->current_position.bytes) { + self->current_position = (Length) { + .bytes = included_range->start_byte, + .extent = included_range->start_point, + }; + } - self->current_included_range_index = i; - found_included_range = true; - break; - } - } + self->current_included_range_index = i; + found_included_range = true; + break; + } + } - if (found_included_range) - { - // If the current position is outside of the current chunk of text, - // then clear out the current chunk of text. - if (self->chunk && (self->current_position.bytes < self->chunk_start || - self->current_position.bytes >= - self->chunk_start + self->chunk_size)) - { - ts_lexer__clear_chunk(self); - } + if (found_included_range) { + // If the current position is outside of the current chunk of text, + // then clear out the current chunk of text. + if (self->chunk && ( + self->current_position.bytes < self->chunk_start || + self->current_position.bytes >= self->chunk_start + self->chunk_size + )) { + ts_lexer__clear_chunk(self); + } - self->lookahead_size = 0; - self->data.lookahead = '\0'; - } + self->lookahead_size = 0; + self->data.lookahead = '\0'; + } - // If the given position is beyond any of included ranges, move to the EOF - // state - past the end of the included ranges. - else - { - self->current_included_range_index = self->included_range_count; - t_parser_range *last_included_range = - &self->included_ranges[self->included_range_count - 1]; - self->current_position = (t_parse_length){ - .bytes = last_included_range->end_byte, - .extent = last_included_range->end_point, - }; - ts_lexer__clear_chunk(self); - self->lookahead_size = 1; - self->data.lookahead = '\0'; - } + // If the given position is beyond any of included ranges, move to the EOF + // state - past the end of the included ranges. + else { + self->current_included_range_index = self->included_range_count; + TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; + self->current_position = (Length) { + .bytes = last_included_range->end_byte, + .extent = last_included_range->end_point, + }; + ts_lexer__clear_chunk(self); + self->lookahead_size = 1; + self->data.lookahead = '\0'; + } } // Intended to be called only from functions that control logging. -static void ts_lexer__do_advance(t_liblexer *self, bool skip) -{ - if (self->lookahead_size) - { - self->current_position.bytes += self->lookahead_size; - if (self->data.lookahead == '\n') - { - self->current_position.extent.row++; - self->current_position.extent.column = 0; - } - else - { - self->current_position.extent.column += self->lookahead_size; - } - } +static void ts_lexer__do_advance(Lexer *self, bool skip) { + if (self->lookahead_size) { + self->current_position.bytes += self->lookahead_size; + if (self->data.lookahead == '\n') { + self->current_position.extent.row++; + self->current_position.extent.column = 0; + } else { + self->current_position.extent.column += self->lookahead_size; + } + } - const t_parser_range *current_range = - &self->included_ranges[self->current_included_range_index]; - while (self->current_position.bytes >= current_range->end_byte || - current_range->end_byte == current_range->start_byte) - { - if (self->current_included_range_index < self->included_range_count) - { - self->current_included_range_index++; - } - if (self->current_included_range_index < self->included_range_count) - { - current_range++; - self->current_position = (t_parse_length){ - current_range->start_byte, - current_range->start_point, - }; - } - else - { - current_range = NULL; - break; - } - } + const TSRange *current_range = &self->included_ranges[self->current_included_range_index]; + while ( + self->current_position.bytes >= current_range->end_byte || + current_range->end_byte == current_range->start_byte + ) { + if (self->current_included_range_index < self->included_range_count) { + self->current_included_range_index++; + } + if (self->current_included_range_index < self->included_range_count) { + current_range++; + self->current_position = (Length) { + current_range->start_byte, + current_range->start_point, + }; + } else { + current_range = NULL; + break; + } + } - if (skip) - self->token_start_position = self->current_position; + if (skip) self->token_start_position = self->current_position; - if (current_range) - { - if (self->current_position.bytes < self->chunk_start || - self->current_position.bytes >= - self->chunk_start + self->chunk_size) - { - ts_lexer__get_chunk(self); - } - ts_lexer__get_lookahead(self); - } - else - { - ts_lexer__clear_chunk(self); - self->data.lookahead = '\0'; - self->lookahead_size = 1; - } + if (current_range) { + if ( + self->current_position.bytes < self->chunk_start || + self->current_position.bytes >= self->chunk_start + self->chunk_size + ) { + ts_lexer__get_chunk(self); + } + ts_lexer__get_lookahead(self); + } else { + ts_lexer__clear_chunk(self); + self->data.lookahead = '\0'; + self->lookahead_size = 1; + } } // Advance to the next character in the source code, retrieving a new // chunk of source code if needed. -static void ts_lexer__advance(t_lexer *_self, bool skip) -{ - t_liblexer *self = (t_liblexer *)_self; - if (!self->chunk) - return; - ts_lexer__do_advance(self, skip); +static void ts_lexer__advance(TSLexer *_self, bool skip) { + Lexer *self = (Lexer *)_self; + if (!self->chunk) return; + + if (skip) { + LOG("skip", self->data.lookahead) + } else { + LOG("consume", self->data.lookahead) + } + + ts_lexer__do_advance(self, skip); } // Mark that a token match has completed. This can be called multiple // times if a longer match is found later. -static void ts_lexer__mark_end(t_lexer *_self) -{ - t_liblexer *self = (t_liblexer *)_self; - if (!ts_lexer__eof(&self->data)) - { - // If the lexer is right at the beginning of included range, - // then the token should be considered to end at the *end* of the - // previous included range, rather than here. - t_parser_range *current_included_range = - &self->included_ranges[self->current_included_range_index]; - if (self->current_included_range_index > 0 && - self->current_position.bytes == current_included_range->start_byte) - { - t_parser_range *previous_included_range = - current_included_range - 1; - self->token_end_position = (t_parse_length){ - previous_included_range->end_byte, - previous_included_range->end_point, - }; - return; - } - } - self->token_end_position = self->current_position; +static void ts_lexer__mark_end(TSLexer *_self) { + Lexer *self = (Lexer *)_self; + if (!ts_lexer__eof(&self->data)) { + // If the lexer is right at the beginning of included range, + // then the token should be considered to end at the *end* of the + // previous included range, rather than here. + TSRange *current_included_range = &self->included_ranges[ + self->current_included_range_index + ]; + if ( + self->current_included_range_index > 0 && + self->current_position.bytes == current_included_range->start_byte + ) { + TSRange *previous_included_range = current_included_range - 1; + self->token_end_position = (Length) { + previous_included_range->end_byte, + previous_included_range->end_point, + }; + return; + } + } + self->token_end_position = self->current_position; } -static t_i32 ts_lexer__get_column(t_lexer *_self) -{ - t_liblexer *self = (t_liblexer *)_self; +static uint32_t ts_lexer__get_column(TSLexer *_self) { + Lexer *self = (Lexer *)_self; - t_u32 goal_byte = self->current_position.bytes; + uint32_t goal_byte = self->current_position.bytes; - self->did_get_column = true; - self->current_position.bytes -= self->current_position.extent.column; - self->current_position.extent.column = 0; + self->did_get_column = true; + self->current_position.bytes -= self->current_position.extent.column; + self->current_position.extent.column = 0; - if (self->current_position.bytes < self->chunk_start) - { - ts_lexer__get_chunk(self); - } + if (self->current_position.bytes < self->chunk_start) { + ts_lexer__get_chunk(self); + } - t_i32 result = 0; - if (!ts_lexer__eof(_self)) - { - ts_lexer__get_lookahead(self); - while (self->current_position.bytes < goal_byte && self->chunk) - { - result++; - ts_lexer__do_advance(self, false); - if (ts_lexer__eof(_self)) - break; - } - } + uint32_t result = 0; + if (!ts_lexer__eof(_self)) { + ts_lexer__get_lookahead(self); + while (self->current_position.bytes < goal_byte && self->chunk) { + result++; + ts_lexer__do_advance(self, false); + if (ts_lexer__eof(_self)) break; + } + } - return result; + return result; } // Is the lexer at a boundary between two disjoint included ranges of // source code? This is exposed as an API because some languages' external // scanners need to perform custom actions at these boundaries. -static bool ts_lexer__is_at_included_range_start(const t_lexer *_self) -{ - const t_liblexer *self = (const t_liblexer *)_self; - if (self->current_included_range_index < self->included_range_count) - { - t_parser_range *current_range = - &self->included_ranges[self->current_included_range_index]; - return self->current_position.bytes == current_range->start_byte; - } - else - { - return false; - } +static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { + const Lexer *self = (const Lexer *)_self; + if (self->current_included_range_index < self->included_range_count) { + TSRange *current_range = &self->included_ranges[self->current_included_range_index]; + return self->current_position.bytes == current_range->start_byte; + } else { + return false; + } } -void ts_lexer_init(t_liblexer *self) -{ - *self = (t_liblexer){ - .data = - { - // The lexer's methods are stored as struct fields so that - // generated - // parsers can call them without needing to be linked against - // this - // library. - .advance = ts_lexer__advance, - .mark_end = ts_lexer__mark_end, - .get_column = ts_lexer__get_column, - .is_at_included_range_start = - ts_lexer__is_at_included_range_start, - .eof = ts_lexer__eof, - .lookahead = 0, - .result_symbol = 0, - }, - .chunk = NULL, - .chunk_size = 0, - .chunk_start = 0, - .current_position = {0, {0, 0}}, - .logger = {.payload = NULL, .log = NULL}, - .included_ranges = NULL, - .included_range_count = 0, - .current_included_range_index = 0, - }; - ts_lexer_set_included_ranges(self, NULL, 0); +void ts_lexer_init(Lexer *self) { + *self = (Lexer) { + .data = { + // The lexer's methods are stored as struct fields so that generated + // parsers can call them without needing to be linked against this + // library. + .advance = ts_lexer__advance, + .mark_end = ts_lexer__mark_end, + .get_column = ts_lexer__get_column, + .is_at_included_range_start = ts_lexer__is_at_included_range_start, + .eof = ts_lexer__eof, + .lookahead = 0, + .result_symbol = 0, + }, + .chunk = NULL, + .chunk_size = 0, + .chunk_start = 0, + .current_position = {0, {0, 0}}, + .logger = { + .payload = NULL, + .log = NULL + }, + .included_ranges = NULL, + .included_range_count = 0, + .current_included_range_index = 0, + }; + ts_lexer_set_included_ranges(self, NULL, 0); } -void ts_lexer_delete(t_liblexer *self) -{ - free(self->included_ranges); +void ts_lexer_delete(Lexer *self) { + ts_free(self->included_ranges); } -void ts_lexer_set_input(t_liblexer *self, t_parse_input input) -{ - self->input = input; - ts_lexer__clear_chunk(self); - ts_lexer_goto(self, self->current_position); +void ts_lexer_set_input(Lexer *self, TSInput input) { + self->input = input; + ts_lexer__clear_chunk(self); + ts_lexer_goto(self, self->current_position); } // Move the lexer to the given position. This doesn't do any work // if the parser is already at the given position. -void ts_lexer_reset(t_liblexer *self, t_parse_length position) -{ - if (position.bytes != self->current_position.bytes) - { - ts_lexer_goto(self, position); - } +void ts_lexer_reset(Lexer *self, Length position) { + if (position.bytes != self->current_position.bytes) { + ts_lexer_goto(self, position); + } } -void ts_lexer_start(t_liblexer *self) -{ - self->token_start_position = self->current_position; - self->token_end_position = LENGTH_UNDEFINED; - self->data.result_symbol = 0; - self->did_get_column = false; - if (!ts_lexer__eof(&self->data)) - { - if (!self->chunk_size) - ts_lexer__get_chunk(self); - if (!self->lookahead_size) - ts_lexer__get_lookahead(self); - if (self->current_position.bytes == 0 && - self->data.lookahead == BYTE_ORDER_MARK) - ts_lexer__advance(&self->data, true); - } +void ts_lexer_start(Lexer *self) { + self->token_start_position = self->current_position; + self->token_end_position = LENGTH_UNDEFINED; + self->data.result_symbol = 0; + self->did_get_column = false; + if (!ts_lexer__eof(&self->data)) { + if (!self->chunk_size) ts_lexer__get_chunk(self); + if (!self->lookahead_size) ts_lexer__get_lookahead(self); + if ( + self->current_position.bytes == 0 && + self->data.lookahead == BYTE_ORDER_MARK + ) ts_lexer__advance(&self->data, true); + } } -void ts_lexer_finish(t_liblexer *self, t_i32 *lookahead_end_byte) -{ - if (length_is_undefined(self->token_end_position)) - { - ts_lexer__mark_end(&self->data); - } +void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { + if (length_is_undefined(self->token_end_position)) { + ts_lexer__mark_end(&self->data); + } - // If the token ended at an included range boundary, then its end position - // will have been reset to the end of the preceding range. Reset the start - // position to match. - if (self->token_end_position.bytes < self->token_start_position.bytes) - { - self->token_start_position = self->token_end_position; - } + // If the token ended at an included range boundary, then its end position + // will have been reset to the end of the preceding range. Reset the start + // position to match. + if (self->token_end_position.bytes < self->token_start_position.bytes) { + self->token_start_position = self->token_end_position; + } - t_i32 current_lookahead_end_byte = self->current_position.bytes + 1; + uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; - // In order to determine that a byte sequence is invalid UTF8 or UTF16, - // the character decoding algorithm may have looked at the following byte. - // Therefore, the next byte *after* the current (invalid) character - // affects the interpretation of the current character. - if (self->data.lookahead == TS_DECODE_ERROR) - { - current_lookahead_end_byte++; - } + // In order to determine that a byte sequence is invalid UTF8 or UTF16, + // the character decoding algorithm may have looked at the following byte. + // Therefore, the next byte *after* the current (invalid) character + // affects the interpretation of the current character. + if (self->data.lookahead == TS_DECODE_ERROR) { + current_lookahead_end_byte++; + } - if (current_lookahead_end_byte > *lookahead_end_byte) - { - *lookahead_end_byte = current_lookahead_end_byte; - } + if (current_lookahead_end_byte > *lookahead_end_byte) { + *lookahead_end_byte = current_lookahead_end_byte; + } } -void ts_lexer_advance_to_end(t_liblexer *self) -{ - while (self->chunk) - { - ts_lexer__advance(&self->data, false); - } +void ts_lexer_advance_to_end(Lexer *self) { + while (self->chunk) { + ts_lexer__advance(&self->data, false); + } } -void ts_lexer_mark_end(t_liblexer *self) -{ - ts_lexer__mark_end(&self->data); +void ts_lexer_mark_end(Lexer *self) { + ts_lexer__mark_end(&self->data); } -bool ts_lexer_set_included_ranges(t_liblexer *self, - const t_parser_range *ranges, t_u32 count) -{ - ranges = &DEFAULT_RANGE; - count = 1; - size_t size = count * sizeof(t_parser_range); - self->included_ranges = realloc(self->included_ranges, size); - memcpy(self->included_ranges, ranges, size); - self->included_range_count = count; - ts_lexer_goto(self, self->current_position); - return true; +bool ts_lexer_set_included_ranges( + Lexer *self, + const TSRange *ranges, + uint32_t count +) { + if (count == 0 || !ranges) { + ranges = &DEFAULT_RANGE; + count = 1; + } else { + uint32_t previous_byte = 0; + for (unsigned i = 0; i < count; i++) { + const TSRange *range = &ranges[i]; + if ( + range->start_byte < previous_byte || + range->end_byte < range->start_byte + ) return false; + previous_byte = range->end_byte; + } + } + + size_t size = count * sizeof(TSRange); + self->included_ranges = ts_realloc(self->included_ranges, size); + memcpy(self->included_ranges, ranges, size); + self->included_range_count = count; + ts_lexer_goto(self, self->current_position); + return true; } -t_parser_range *ts_lexer_included_ranges(const t_liblexer *self, - t_u32 *count) -{ - *count = self->included_range_count; - return self->included_ranges; +TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { + *count = self->included_range_count; + return self->included_ranges; } #undef LOG diff --git a/parser/src/lexer.h b/parser/src/lexer.h new file mode 100644 index 00000000..1d9482b5 --- /dev/null +++ b/parser/src/lexer.h @@ -0,0 +1,49 @@ +#ifndef TREE_SITTER_LEXER_H_ +#define TREE_SITTER_LEXER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./length.h" +#include "./subtree.h" +#include "./api.h" +#include "./parser.h" + +typedef struct { + TSLexer data; + Length current_position; + Length token_start_position; + Length token_end_position; + + TSRange *included_ranges; + const char *chunk; + TSInput input; + TSLogger logger; + + uint32_t included_range_count; + uint32_t current_included_range_index; + uint32_t chunk_start; + uint32_t chunk_size; + uint32_t lookahead_size; + bool did_get_column; + + char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; +} Lexer; + +void ts_lexer_init(Lexer *); +void ts_lexer_delete(Lexer *); +void ts_lexer_set_input(Lexer *, TSInput); +void ts_lexer_reset(Lexer *, Length); +void ts_lexer_start(Lexer *); +void ts_lexer_finish(Lexer *, uint32_t *); +void ts_lexer_advance_to_end(Lexer *); +void ts_lexer_mark_end(Lexer *); +bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count); +TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_LEXER_H_ diff --git a/parser/src/lib.c b/parser/src/lib.c new file mode 100644 index 00000000..4054eb3c --- /dev/null +++ b/parser/src/lib.c @@ -0,0 +1,13 @@ +#define _POSIX_C_SOURCE 200112L + +#include "./alloc.c" +#include "./get_changed_ranges.c" +#include "./language.c" +#include "./lexer.c" +#include "./node.c" +#include "./parser.c" +#include "./query.c" +#include "./stack.c" +#include "./subtree.c" +#include "./tree_cursor.c" +#include "./tree.c" diff --git a/parser/src/node.c b/parser/src/node.c index 44da00ae..203d79b2 100644 --- a/parser/src/node.c +++ b/parser/src/node.c @@ -1,925 +1,776 @@ -#include "./language.h" +#include #include "./subtree.h" #include "./tree.h" -#include +#include "./language.h" -typedef struct -{ - Subtree parent; - const t_parse_tree *tree; - t_parse_length position; - t_u32 child_index; - t_u32 structural_child_index; - const t_symbol *alias_sequence; +typedef struct { + Subtree parent; + const TSTree *tree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + const TSSymbol *alias_sequence; } NodeChildIterator; -// t_parse_node - constructors +// TSNode - constructors -t_parse_node ts_node_new(const t_parse_tree *tree, const Subtree *subtree, - t_parse_length position, t_symbol alias) -{ - return (t_parse_node){ - {position.bytes, position.extent.row, position.extent.column, alias}, - subtree, - tree, - }; +TSNode ts_node_new( + const TSTree *tree, + const Subtree *subtree, + Length position, + TSSymbol alias +) { + return (TSNode) { + {position.bytes, position.extent.row, position.extent.column, alias}, + subtree, + tree, + }; } -static inline t_parse_node ts_node__null(void) -{ - return ts_node_new(NULL, NULL, length_zero(), 0); +static inline TSNode ts_node__null(void) { + return ts_node_new(NULL, NULL, length_zero(), 0); } -// t_parse_node - accessors +// TSNode - accessors -t_u32 ts_node_start_byte(t_parse_node self) -{ - return self.context[0]; +uint32_t ts_node_start_byte(TSNode self) { + return self.context[0]; } -t_point ts_node_start_point(t_parse_node self) -{ - return (t_point){self.context[1], self.context[2]}; +TSPoint ts_node_start_point(TSNode self) { + return (TSPoint) {self.context[1], self.context[2]}; } -static inline t_u32 ts_node__alias(const t_parse_node *self) -{ - return self->context[3]; +static inline uint32_t ts_node__alias(const TSNode *self) { + return self->context[3]; } -static inline Subtree ts_node__subtree(t_parse_node self) -{ - return *(const Subtree *)self.id; +static inline Subtree ts_node__subtree(TSNode self) { + return *(const Subtree *)self.id; } // NodeChildIterator -static inline NodeChildIterator ts_node_iterate_children( - const t_parse_node *node) -{ - Subtree subtree = ts_node__subtree(*node); - if (ts_subtree_child_count(subtree) == 0) - { - return (NodeChildIterator){ - NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; - } - const t_symbol *alias_sequence = ts_language_alias_sequence( - node->tree->language, subtree.ptr->production_id); - return (NodeChildIterator){ - .tree = node->tree, - .parent = subtree, - .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, - .child_index = 0, - .structural_child_index = 0, - .alias_sequence = alias_sequence, - }; +static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { + Subtree subtree = ts_node__subtree(*node); + if (ts_subtree_child_count(subtree) == 0) { + return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; + } + const TSSymbol *alias_sequence = ts_language_alias_sequence( + node->tree->language, + subtree.ptr->production_id + ); + return (NodeChildIterator) { + .tree = node->tree, + .parent = subtree, + .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, + .child_index = 0, + .structural_child_index = 0, + .alias_sequence = alias_sequence, + }; } -static inline bool ts_node_child_iterator_done(NodeChildIterator *self) -{ - return self->child_index == self->parent.ptr->child_count; +static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { + return self->child_index == self->parent.ptr->child_count; } -static inline bool ts_node_child_iterator_next(NodeChildIterator *self, - t_parse_node *result) -{ - if (!self->parent.ptr || ts_node_child_iterator_done(self)) - return false; - const Subtree *child = - &ts_subtree_children(self->parent)[self->child_index]; - t_symbol alias_symbol = 0; - if (!ts_subtree_extra(*child)) - { - if (self->alias_sequence) - { - alias_symbol = self->alias_sequence[self->structural_child_index]; - } - self->structural_child_index++; - } - if (self->child_index > 0) - { - self->position = length_add(self->position, ts_subtree_padding(*child)); - } - *result = ts_node_new(self->tree, child, self->position, alias_symbol); - self->position = length_add(self->position, ts_subtree_size(*child)); - self->child_index++; - return true; +static inline bool ts_node_child_iterator_next( + NodeChildIterator *self, + TSNode *result +) { + if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; + TSSymbol alias_symbol = 0; + if (!ts_subtree_extra(*child)) { + if (self->alias_sequence) { + alias_symbol = self->alias_sequence[self->structural_child_index]; + } + self->structural_child_index++; + } + if (self->child_index > 0) { + self->position = length_add(self->position, ts_subtree_padding(*child)); + } + *result = ts_node_new( + self->tree, + child, + self->position, + alias_symbol + ); + self->position = length_add(self->position, ts_subtree_size(*child)); + self->child_index++; + return true; } -// t_parse_node - private +// TSNode - private -static inline bool ts_node__is_relevant(t_parse_node self, - bool include_anonymous) -{ - Subtree tree = ts_node__subtree(self); - if (include_anonymous) - { - return ts_subtree_visible(tree) || ts_node__alias(&self); - } - else - { - t_symbol alias = ts_node__alias(&self); - if (alias) - { - return ts_language_symbol_metadata(self.tree->language, alias) - .named; - } - else - { - return ts_subtree_visible(tree) && ts_subtree_named(tree); - } - } +static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { + Subtree tree = ts_node__subtree(self); + if (include_anonymous) { + return ts_subtree_visible(tree) || ts_node__alias(&self); + } else { + TSSymbol alias = ts_node__alias(&self); + if (alias) { + return ts_language_symbol_metadata(self.tree->language, alias).named; + } else { + return ts_subtree_visible(tree) && ts_subtree_named(tree); + } + } } -static inline t_u32 ts_node__relevant_child_count(t_parse_node self, - bool include_anonymous) -{ - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) - { - if (include_anonymous) - { - return tree.ptr->visible_child_count; - } - else - { - return tree.ptr->named_child_count; - } - } - else - { - return 0; - } +static inline uint32_t ts_node__relevant_child_count( + TSNode self, + bool include_anonymous +) { + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) { + if (include_anonymous) { + return tree.ptr->visible_child_count; + } else { + return tree.ptr->named_child_count; + } + } else { + return 0; + } } -static inline t_parse_node ts_node__child(t_parse_node self, t_u32 child_index, - bool include_anonymous) -{ - t_parse_node result = self; - bool did_descend = true; +static inline TSNode ts_node__child( + TSNode self, + uint32_t child_index, + bool include_anonymous +) { + TSNode result = self; + bool did_descend = true; - while (did_descend) - { - did_descend = false; + while (did_descend) { + did_descend = false; - t_parse_node child; - t_u32 index = 0; - NodeChildIterator iterator = ts_node_iterate_children(&result); - while (ts_node_child_iterator_next(&iterator, &child)) - { - if (ts_node__is_relevant(child, include_anonymous)) - { - if (index == child_index) - { - return child; - } - index++; - } - else - { - t_u32 grandchild_index = child_index - index; - t_u32 grandchild_count = - ts_node__relevant_child_count(child, include_anonymous); - if (grandchild_index < grandchild_count) - { - did_descend = true; - result = child; - child_index = grandchild_index; - break; - } - index += grandchild_count; - } - } - } + TSNode child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node__is_relevant(child, include_anonymous)) { + if (index == child_index) { + return child; + } + index++; + } else { + uint32_t grandchild_index = child_index - index; + uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous); + if (grandchild_index < grandchild_count) { + did_descend = true; + result = child; + child_index = grandchild_index; + break; + } + index += grandchild_count; + } + } + } - return ts_node__null(); + return ts_node__null(); } -static bool ts_subtree_has_trailing_empty_descendant(Subtree self, - Subtree other) -{ - for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) - { - Subtree child = ts_subtree_children(self)[i]; - if (ts_subtree_total_bytes(child) > 0) - break; - if (child.ptr == other.ptr || - ts_subtree_has_trailing_empty_descendant(child, other)) - { - return true; - } - } - return false; +static bool ts_subtree_has_trailing_empty_descendant( + Subtree self, + Subtree other +) { + for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { + Subtree child = ts_subtree_children(self)[i]; + if (ts_subtree_total_bytes(child) > 0) break; + if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) { + return true; + } + } + return false; } -static inline t_parse_node ts_node__prev_sibling(t_parse_node self, - bool include_anonymous) -{ - Subtree self_subtree = ts_node__subtree(self); - bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; - t_u32 target_end_byte = ts_node_end_byte(self); +static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) { + Subtree self_subtree = ts_node__subtree(self); + bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; + uint32_t target_end_byte = ts_node_end_byte(self); - t_parse_node node = ts_node_parent(self); - t_parse_node earlier_node = ts_node__null(); - bool earlier_node_is_relevant = false; + TSNode node = ts_node_parent(self); + TSNode earlier_node = ts_node__null(); + bool earlier_node_is_relevant = false; - while (!ts_node_is_null(node)) - { - t_parse_node earlier_child = ts_node__null(); - bool earlier_child_is_relevant = false; - bool found_child_containing_target = false; + while (!ts_node_is_null(node)) { + TSNode earlier_child = ts_node__null(); + bool earlier_child_is_relevant = false; + bool found_child_containing_target = false; - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) - { - if (child.id == self.id) - break; - if (iterator.position.bytes > target_end_byte) - { - found_child_containing_target = true; - break; - } + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (child.id == self.id) break; + if (iterator.position.bytes > target_end_byte) { + found_child_containing_target = true; + break; + } - if (iterator.position.bytes == target_end_byte && - (!self_is_empty || ts_subtree_has_trailing_empty_descendant( - ts_node__subtree(child), self_subtree))) - { - found_child_containing_target = true; - break; - } + if (iterator.position.bytes == target_end_byte && + (!self_is_empty || + ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) { + found_child_containing_target = true; + break; + } - if (ts_node__is_relevant(child, include_anonymous)) - { - earlier_child = child; - earlier_child_is_relevant = true; - } - else if (ts_node__relevant_child_count(child, include_anonymous) > - 0) - { - earlier_child = child; - earlier_child_is_relevant = false; - } - } + if (ts_node__is_relevant(child, include_anonymous)) { + earlier_child = child; + earlier_child_is_relevant = true; + } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { + earlier_child = child; + earlier_child_is_relevant = false; + } + } - if (found_child_containing_target) - { - if (!ts_node_is_null(earlier_child)) - { - earlier_node = earlier_child; - earlier_node_is_relevant = earlier_child_is_relevant; - } - node = child; - } - else if (earlier_child_is_relevant) - { - return earlier_child; - } - else if (!ts_node_is_null(earlier_child)) - { - node = earlier_child; - } - else if (earlier_node_is_relevant) - { - return earlier_node; - } - else - { - node = earlier_node; - earlier_node = ts_node__null(); - earlier_node_is_relevant = false; - } - } + if (found_child_containing_target) { + if (!ts_node_is_null(earlier_child)) { + earlier_node = earlier_child; + earlier_node_is_relevant = earlier_child_is_relevant; + } + node = child; + } else if (earlier_child_is_relevant) { + return earlier_child; + } else if (!ts_node_is_null(earlier_child)) { + node = earlier_child; + } else if (earlier_node_is_relevant) { + return earlier_node; + } else { + node = earlier_node; + earlier_node = ts_node__null(); + earlier_node_is_relevant = false; + } + } - return ts_node__null(); + return ts_node__null(); } -static inline t_parse_node ts_node__next_sibling(t_parse_node self, - bool include_anonymous) -{ - t_u32 target_end_byte = ts_node_end_byte(self); +static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) { + uint32_t target_end_byte = ts_node_end_byte(self); - t_parse_node node = ts_node_parent(self); - t_parse_node later_node = ts_node__null(); - bool later_node_is_relevant = false; + TSNode node = ts_node_parent(self); + TSNode later_node = ts_node__null(); + bool later_node_is_relevant = false; - while (!ts_node_is_null(node)) - { - t_parse_node later_child = ts_node__null(); - bool later_child_is_relevant = false; - t_parse_node child_containing_target = ts_node__null(); + while (!ts_node_is_null(node)) { + TSNode later_child = ts_node__null(); + bool later_child_is_relevant = false; + TSNode child_containing_target = ts_node__null(); - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) - { - if (iterator.position.bytes < target_end_byte) - continue; - if (ts_node_start_byte(child) <= ts_node_start_byte(self)) - { - if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) - { - child_containing_target = child; - } - } - else if (ts_node__is_relevant(child, include_anonymous)) - { - later_child = child; - later_child_is_relevant = true; - break; - } - else if (ts_node__relevant_child_count(child, include_anonymous) > - 0) - { - later_child = child; - later_child_is_relevant = false; - break; - } - } + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (iterator.position.bytes < target_end_byte) continue; + if (ts_node_start_byte(child) <= ts_node_start_byte(self)) { + if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) { + child_containing_target = child; + } + } else if (ts_node__is_relevant(child, include_anonymous)) { + later_child = child; + later_child_is_relevant = true; + break; + } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { + later_child = child; + later_child_is_relevant = false; + break; + } + } - if (!ts_node_is_null(child_containing_target)) - { - if (!ts_node_is_null(later_child)) - { - later_node = later_child; - later_node_is_relevant = later_child_is_relevant; - } - node = child_containing_target; - } - else if (later_child_is_relevant) - { - return later_child; - } - else if (!ts_node_is_null(later_child)) - { - node = later_child; - } - else if (later_node_is_relevant) - { - return later_node; - } - else - { - node = later_node; - } - } + if (!ts_node_is_null(child_containing_target)) { + if (!ts_node_is_null(later_child)) { + later_node = later_child; + later_node_is_relevant = later_child_is_relevant; + } + node = child_containing_target; + } else if (later_child_is_relevant) { + return later_child; + } else if (!ts_node_is_null(later_child)) { + node = later_child; + } else if (later_node_is_relevant) { + return later_node; + } else { + node = later_node; + } + } - return ts_node__null(); + return ts_node__null(); } -static inline t_parse_node ts_node__first_child_for_byte(t_parse_node self, - t_u32 goal, - bool include_anonymous) -{ - t_parse_node node = self; - bool did_descend = true; +static inline TSNode ts_node__first_child_for_byte( + TSNode self, + uint32_t goal, + bool include_anonymous +) { + TSNode node = self; + bool did_descend = true; - while (did_descend) - { - did_descend = false; + while (did_descend) { + did_descend = false; - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) - { - if (ts_node_end_byte(child) > goal) - { - if (ts_node__is_relevant(child, include_anonymous)) - { - return child; - } - else if (ts_node_child_count(child) > 0) - { - did_descend = true; - node = child; - break; - } - } - } - } + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node_end_byte(child) > goal) { + if (ts_node__is_relevant(child, include_anonymous)) { + return child; + } else if (ts_node_child_count(child) > 0) { + did_descend = true; + node = child; + break; + } + } + } + } - return ts_node__null(); + return ts_node__null(); } -static inline t_parse_node ts_node__descendant_for_byte_range( - t_parse_node self, t_u32 range_start, t_u32 range_end, - bool include_anonymous) -{ - t_parse_node node = self; - t_parse_node last_visible_node = self; +static inline TSNode ts_node__descendant_for_byte_range( + TSNode self, + uint32_t range_start, + uint32_t range_end, + bool include_anonymous +) { + TSNode node = self; + TSNode last_visible_node = self; - bool did_descend = true; - while (did_descend) - { - did_descend = false; + bool did_descend = true; + while (did_descend) { + did_descend = false; - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) - { - t_u32 node_end = iterator.position.bytes; + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + uint32_t node_end = iterator.position.bytes; - // The end of this node must extend far enough forward to touch - // the end of the range and exceed the start of the range. - if (node_end < range_end) - continue; - if (node_end <= range_start) - continue; + // The end of this node must extend far enough forward to touch + // the end of the range and exceed the start of the range. + if (node_end < range_end) continue; + if (node_end <= range_start) continue; - // The start of this node must extend far enough backward to - // touch the start of the range. - if (range_start < ts_node_start_byte(child)) - break; + // The start of this node must extend far enough backward to + // touch the start of the range. + if (range_start < ts_node_start_byte(child)) break; - node = child; - if (ts_node__is_relevant(node, include_anonymous)) - { - last_visible_node = node; - } - did_descend = true; - break; - } - } + node = child; + if (ts_node__is_relevant(node, include_anonymous)) { + last_visible_node = node; + } + did_descend = true; + break; + } + } - return last_visible_node; + return last_visible_node; } -static inline t_parse_node ts_node__descendant_for_point_range( - t_parse_node self, t_point range_start, t_point range_end, - bool include_anonymous) -{ - t_parse_node node = self; - t_parse_node last_visible_node = self; +static inline TSNode ts_node__descendant_for_point_range( + TSNode self, + TSPoint range_start, + TSPoint range_end, + bool include_anonymous +) { + TSNode node = self; + TSNode last_visible_node = self; - bool did_descend = true; - while (did_descend) - { - did_descend = false; + bool did_descend = true; + while (did_descend) { + did_descend = false; - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) - { - t_point node_end = iterator.position.extent; + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + TSPoint node_end = iterator.position.extent; - // The end of this node must extend far enough forward to touch - // the end of the range and exceed the start of the range. - if (point_lt(node_end, range_end)) - continue; - if (point_lte(node_end, range_start)) - continue; + // The end of this node must extend far enough forward to touch + // the end of the range and exceed the start of the range. + if (point_lt(node_end, range_end)) continue; + if (point_lte(node_end, range_start)) continue; - // The start of this node must extend far enough backward to - // touch the start of the range. - if (point_lt(range_start, ts_node_start_point(child))) - break; + // The start of this node must extend far enough backward to + // touch the start of the range. + if (point_lt(range_start, ts_node_start_point(child))) break; - node = child; - if (ts_node__is_relevant(node, include_anonymous)) - { - last_visible_node = node; - } - did_descend = true; - break; - } - } + node = child; + if (ts_node__is_relevant(node, include_anonymous)) { + last_visible_node = node; + } + did_descend = true; + break; + } + } - return last_visible_node; + return last_visible_node; } -// t_parse_node - public +// TSNode - public -t_u32 ts_node_end_byte(t_parse_node self) -{ - return ts_node_start_byte(self) + - ts_subtree_size(ts_node__subtree(self)).bytes; +uint32_t ts_node_end_byte(TSNode self) { + return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes; } -t_point ts_node_end_point(t_parse_node self) -{ - return point_add(ts_node_start_point(self), - ts_subtree_size(ts_node__subtree(self)).extent); +TSPoint ts_node_end_point(TSNode self) { + return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent); } -t_symbol ts_node_symbol(t_parse_node self) -{ - t_symbol symbol = ts_node__alias(&self); - if (!symbol) - symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_public_symbol(self.tree->language, symbol); +TSSymbol ts_node_symbol(TSNode self) { + TSSymbol symbol = ts_node__alias(&self); + if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_public_symbol(self.tree->language, symbol); } -const char *ts_node_type(t_parse_node self) -{ - t_symbol symbol = ts_node__alias(&self); - if (!symbol) - symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_symbol_name(self.tree->language, symbol); +const char *ts_node_type(TSNode self) { + TSSymbol symbol = ts_node__alias(&self); + if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_symbol_name(self.tree->language, symbol); } -const t_language *ts_node_language(t_parse_node self) -{ - return self.tree->language; +const TSLanguage *ts_node_language(TSNode self) { + return self.tree->language; } -t_symbol ts_node_grammar_symbol(t_parse_node self) -{ - return ts_subtree_symbol(ts_node__subtree(self)); +TSSymbol ts_node_grammar_symbol(TSNode self) { + return ts_subtree_symbol(ts_node__subtree(self)); } -const char *ts_node_grammar_type(t_parse_node self) -{ - t_symbol symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_symbol_name(self.tree->language, symbol); +const char *ts_node_grammar_type(TSNode self) { + TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_symbol_name(self.tree->language, symbol); } -char *ts_node_string(t_parse_node self) -{ - t_symbol alias_symbol = ts_node__alias(&self); - return ts_subtree_string( - ts_node__subtree(self), alias_symbol, - ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, - self.tree->language, false); +char *ts_node_string(TSNode self) { + TSSymbol alias_symbol = ts_node__alias(&self); + return ts_subtree_string( + ts_node__subtree(self), + alias_symbol, + ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, + self.tree->language, + false + ); } -bool ts_node_eq(t_parse_node self, t_parse_node other) -{ - return self.tree == other.tree && self.id == other.id; +bool ts_node_eq(TSNode self, TSNode other) { + return self.tree == other.tree && self.id == other.id; } -bool ts_node_is_null(t_parse_node self) -{ - return self.id == 0; +bool ts_node_is_null(TSNode self) { + return self.id == 0; } -bool ts_node_is_extra(t_parse_node self) -{ - return ts_subtree_extra(ts_node__subtree(self)); +bool ts_node_is_extra(TSNode self) { + return ts_subtree_extra(ts_node__subtree(self)); } -bool ts_node_is_named(t_parse_node self) -{ - t_symbol alias = ts_node__alias(&self); - return alias ? ts_language_symbol_metadata(self.tree->language, alias).named - : ts_subtree_named(ts_node__subtree(self)); +bool ts_node_is_named(TSNode self) { + TSSymbol alias = ts_node__alias(&self); + return alias + ? ts_language_symbol_metadata(self.tree->language, alias).named + : ts_subtree_named(ts_node__subtree(self)); } -bool ts_node_is_missing(t_parse_node self) -{ - return ts_subtree_missing(ts_node__subtree(self)); +bool ts_node_is_missing(TSNode self) { + return ts_subtree_missing(ts_node__subtree(self)); } -bool ts_node_has_changes(t_parse_node self) -{ - return ts_subtree_has_changes(ts_node__subtree(self)); +bool ts_node_has_changes(TSNode self) { + return ts_subtree_has_changes(ts_node__subtree(self)); } -bool ts_node_has_error(t_parse_node self) -{ - return ts_subtree_error_cost(ts_node__subtree(self)) > 0; +bool ts_node_has_error(TSNode self) { + return ts_subtree_error_cost(ts_node__subtree(self)) > 0; } -bool ts_node_is_error(t_parse_node self) -{ - t_symbol symbol = ts_node_symbol(self); - return symbol == ts_builtin_sym_error; +bool ts_node_is_error(TSNode self) { + TSSymbol symbol = ts_node_symbol(self); + return symbol == ts_builtin_sym_error; } -t_u32 ts_node_descendant_count(t_parse_node self) -{ - return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; +uint32_t ts_node_descendant_count(TSNode self) { + return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; } -t_state_id ts_node_parse_state(t_parse_node self) -{ - return ts_subtree_parse_state(ts_node__subtree(self)); +TSStateId ts_node_parse_state(TSNode self) { + return ts_subtree_parse_state(ts_node__subtree(self)); } -t_state_id ts_node_next_parse_state(t_parse_node self) -{ - const t_language *language = self.tree->language; - t_u16 state = ts_node_parse_state(self); - if (state == TS_TREE_STATE_NONE) - { - return TS_TREE_STATE_NONE; - } - t_u16 symbol = ts_node_grammar_symbol(self); - return ts_language_next_state(language, state, symbol); +TSStateId ts_node_next_parse_state(TSNode self) { + const TSLanguage *language = self.tree->language; + uint16_t state = ts_node_parse_state(self); + if (state == TS_TREE_STATE_NONE) { + return TS_TREE_STATE_NONE; + } + uint16_t symbol = ts_node_grammar_symbol(self); + return ts_language_next_state(language, state, symbol); } -t_parse_node ts_node_parent(t_parse_node self) -{ - t_parse_node node = ts_tree_root_node(self.tree); - t_u32 end_byte = ts_node_end_byte(self); - if (node.id == self.id) - return ts_node__null(); +TSNode ts_node_parent(TSNode self) { + TSNode node = ts_tree_root_node(self.tree); + if (node.id == self.id) return ts_node__null(); - t_parse_node last_visible_node = node; - bool did_descend = true; - while (did_descend) - { - did_descend = false; + while (true) { + TSNode next_node = ts_node_child_containing_descendant(node, self); + if (ts_node_is_null(next_node)) break; + node = next_node; + } - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) - { - if (ts_node_start_byte(child) > ts_node_start_byte(self) || - child.id == self.id) - break; - if (iterator.position.bytes >= end_byte && - ts_node_child_count(child) > 0) - { - node = child; - if (ts_node__is_relevant(child, true)) - { - last_visible_node = node; - } - did_descend = true; - break; - } - } - } - - return last_visible_node; + return node; } -t_parse_node ts_node_child(t_parse_node self, t_u32 child_index) -{ - return ts_node__child(self, child_index, true); +TSNode ts_node_child_containing_descendant(TSNode self, TSNode subnode) { + uint32_t start_byte = ts_node_start_byte(subnode); + uint32_t end_byte = ts_node_end_byte(subnode); + + do { + NodeChildIterator iter = ts_node_iterate_children(&self); + do { + if ( + !ts_node_child_iterator_next(&iter, &self) + || ts_node_start_byte(self) > start_byte + || self.id == subnode.id + ) { + return ts_node__null(); + } + } while (iter.position.bytes < end_byte || ts_node_child_count(self) == 0); + } while (!ts_node__is_relevant(self, true)); + + return self; } -t_parse_node ts_node_named_child(t_parse_node self, t_u32 child_index) -{ - return ts_node__child(self, child_index, false); +TSNode ts_node_child(TSNode self, uint32_t child_index) { + return ts_node__child(self, child_index, true); } -t_parse_node ts_node_child_by_field_id(t_parse_node self, t_field_id field_id) -{ +TSNode ts_node_named_child(TSNode self, uint32_t child_index) { + return ts_node__child(self, child_index, false); +} + +TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { recur: - if (!field_id || ts_node_child_count(self) == 0) - return ts_node__null(); + if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); - const t_field_map_entry *field_map, *field_map_end; - ts_language_field_map(self.tree->language, - ts_node__subtree(self).ptr->production_id, &field_map, - &field_map_end); - if (field_map == field_map_end) - return ts_node__null(); + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->production_id, + &field_map, + &field_map_end + ); + if (field_map == field_map_end) return ts_node__null(); - // The field mappings are sorted by their field id. Scan all - // the mappings to find the ones for the given field id. - while (field_map->field_id < field_id) - { - field_map++; - if (field_map == field_map_end) - return ts_node__null(); - } - while (field_map_end[-1].field_id > field_id) - { - field_map_end--; - if (field_map == field_map_end) - return ts_node__null(); - } + // The field mappings are sorted by their field id. Scan all + // the mappings to find the ones for the given field id. + while (field_map->field_id < field_id) { + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + while (field_map_end[-1].field_id > field_id) { + field_map_end--; + if (field_map == field_map_end) return ts_node__null(); + } - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&self); - while (ts_node_child_iterator_next(&iterator, &child)) - { - if (!ts_subtree_extra(ts_node__subtree(child))) - { - t_u32 index = iterator.structural_child_index - 1; - if (index < field_map->child_index) - continue; + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&self); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (!ts_subtree_extra(ts_node__subtree(child))) { + uint32_t index = iterator.structural_child_index - 1; + if (index < field_map->child_index) continue; - // Hidden nodes' fields are "inherited" by their visible parent. - if (field_map->inherited) - { + // Hidden nodes' fields are "inherited" by their visible parent. + if (field_map->inherited) { - // If this is the *last* possible child node for this field, - // then perform a tail call to avoid recursion. - if (field_map + 1 == field_map_end) - { - self = child; - goto recur; - } + // If this is the *last* possible child node for this field, + // then perform a tail call to avoid recursion. + if (field_map + 1 == field_map_end) { + self = child; + goto recur; + } - // Otherwise, descend into this child, but if it doesn't contain - // the field, continue searching subsequent children. - else - { - t_parse_node result = - ts_node_child_by_field_id(child, field_id); - if (result.id) - return result; - field_map++; - if (field_map == field_map_end) - return ts_node__null(); - } - } + // Otherwise, descend into this child, but if it doesn't contain + // the field, continue searching subsequent children. + else { + TSNode result = ts_node_child_by_field_id(child, field_id); + if (result.id) return result; + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + } - else if (ts_node__is_relevant(child, true)) - { - return child; - } + else if (ts_node__is_relevant(child, true)) { + return child; + } - // If the field refers to a hidden node with visible children, - // return the first visible child. - else if (ts_node_child_count(child) > 0) - { - return ts_node_child(child, 0); - } + // If the field refers to a hidden node with visible children, + // return the first visible child. + else if (ts_node_child_count(child) > 0 ) { + return ts_node_child(child, 0); + } - // Otherwise, continue searching subsequent children. - else - { - field_map++; - if (field_map == field_map_end) - return ts_node__null(); - } - } - } + // Otherwise, continue searching subsequent children. + else { + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + } + } - return ts_node__null(); + return ts_node__null(); } -static inline const char *ts_node__field_name_from_language( - t_parse_node self, t_u32 structural_child_index) -{ - const t_field_map_entry *field_map, *field_map_end; - ts_language_field_map(self.tree->language, - ts_node__subtree(self).ptr->production_id, &field_map, - &field_map_end); - for (; field_map != field_map_end; field_map++) - { - if (!field_map->inherited && - field_map->child_index == structural_child_index) - { - return self.tree->language->field_names[field_map->field_id]; - } - } - return NULL; +static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->production_id, + &field_map, + &field_map_end + ); + for (; field_map != field_map_end; field_map++) { + if (!field_map->inherited && field_map->child_index == structural_child_index) { + return self.tree->language->field_names[field_map->field_id]; + } + } + return NULL; } -const char *ts_node_field_name_for_child(t_parse_node self, t_u32 child_index) -{ - t_parse_node result = self; - bool did_descend = true; - const char *inherited_field_name = NULL; +const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { + TSNode result = self; + bool did_descend = true; + const char *inherited_field_name = NULL; - while (did_descend) - { - did_descend = false; + while (did_descend) { + did_descend = false; - t_parse_node child; - t_u32 index = 0; - NodeChildIterator iterator = ts_node_iterate_children(&result); - while (ts_node_child_iterator_next(&iterator, &child)) - { - if (ts_node__is_relevant(child, true)) - { - if (index == child_index) - { - const char *field_name = ts_node__field_name_from_language( - result, iterator.structural_child_index - 1); - if (field_name) - return field_name; - return inherited_field_name; - } - index++; - } - else - { - t_u32 grandchild_index = child_index - index; - t_u32 grandchild_count = - ts_node__relevant_child_count(child, true); - if (grandchild_index < grandchild_count) - { - const char *field_name = ts_node__field_name_from_language( - result, iterator.structural_child_index - 1); - if (field_name) - inherited_field_name = field_name; + TSNode child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node__is_relevant(child, true)) { + if (index == child_index) { + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) return field_name; + return inherited_field_name; + } + index++; + } else { + uint32_t grandchild_index = child_index - index; + uint32_t grandchild_count = ts_node__relevant_child_count(child, true); + if (grandchild_index < grandchild_count) { + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) inherited_field_name = field_name; - did_descend = true; - result = child; - child_index = grandchild_index; - break; - } - index += grandchild_count; - } - } - } + did_descend = true; + result = child; + child_index = grandchild_index; + break; + } + index += grandchild_count; + } + } + } - return NULL; + return NULL; } -t_parse_node ts_node_child_by_field_name(t_parse_node self, const char *name, - t_u32 name_length) -{ - t_field_id field_id = - ts_language_field_id_for_name(self.tree->language, name, name_length); - return ts_node_child_by_field_id(self, field_id); +TSNode ts_node_child_by_field_name( + TSNode self, + const char *name, + uint32_t name_length +) { + TSFieldId field_id = ts_language_field_id_for_name( + self.tree->language, + name, + name_length + ); + return ts_node_child_by_field_id(self, field_id); } -t_u32 ts_node_child_count(t_parse_node self) -{ - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) - { - return tree.ptr->visible_child_count; - } - else - { - return 0; - } +uint32_t ts_node_child_count(TSNode self) { + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) { + return tree.ptr->visible_child_count; + } else { + return 0; + } } -t_u32 ts_node_named_child_count(t_parse_node self) -{ - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) - { - return tree.ptr->named_child_count; - } - else - { - return 0; - } +uint32_t ts_node_named_child_count(TSNode self) { + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) { + return tree.ptr->named_child_count; + } else { + return 0; + } } -t_parse_node ts_node_next_sibling(t_parse_node self) -{ - return ts_node__next_sibling(self, true); +TSNode ts_node_next_sibling(TSNode self) { + return ts_node__next_sibling(self, true); } -t_parse_node ts_node_next_named_sibling(t_parse_node self) -{ - return ts_node__next_sibling(self, false); +TSNode ts_node_next_named_sibling(TSNode self) { + return ts_node__next_sibling(self, false); } -t_parse_node ts_node_prev_sibling(t_parse_node self) -{ - return ts_node__prev_sibling(self, true); +TSNode ts_node_prev_sibling(TSNode self) { + return ts_node__prev_sibling(self, true); } -t_parse_node ts_node_prev_named_sibling(t_parse_node self) -{ - return ts_node__prev_sibling(self, false); +TSNode ts_node_prev_named_sibling(TSNode self) { + return ts_node__prev_sibling(self, false); } -t_parse_node ts_node_first_child_for_byte(t_parse_node self, t_u32 byte) -{ - return ts_node__first_child_for_byte(self, byte, true); +TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) { + return ts_node__first_child_for_byte(self, byte, true); } -t_parse_node ts_node_first_named_child_for_byte(t_parse_node self, t_u32 byte) -{ - return ts_node__first_child_for_byte(self, byte, false); +TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) { + return ts_node__first_child_for_byte(self, byte, false); } -t_parse_node ts_node_descendant_for_byte_range(t_parse_node self, t_u32 start, - t_u32 end) -{ - return ts_node__descendant_for_byte_range(self, start, end, true); +TSNode ts_node_descendant_for_byte_range( + TSNode self, + uint32_t start, + uint32_t end +) { + return ts_node__descendant_for_byte_range(self, start, end, true); } -t_parse_node ts_node_named_descendant_for_byte_range(t_parse_node self, - t_u32 start, t_u32 end) -{ - return ts_node__descendant_for_byte_range(self, start, end, false); +TSNode ts_node_named_descendant_for_byte_range( + TSNode self, + uint32_t start, + uint32_t end +) { + return ts_node__descendant_for_byte_range(self, start, end, false); } -t_parse_node ts_node_descendant_for_point_range(t_parse_node self, - t_point start, t_point end) -{ - return ts_node__descendant_for_point_range(self, start, end, true); +TSNode ts_node_descendant_for_point_range( + TSNode self, + TSPoint start, + TSPoint end +) { + return ts_node__descendant_for_point_range(self, start, end, true); } -t_parse_node ts_node_named_descendant_for_point_range(t_parse_node self, - t_point start, - t_point end) -{ - return ts_node__descendant_for_point_range(self, start, end, false); +TSNode ts_node_named_descendant_for_point_range( + TSNode self, + TSPoint start, + TSPoint end +) { + return ts_node__descendant_for_point_range(self, start, end, false); } -void ts_node_edit(t_parse_node *self, const t_input_edit *edit) -{ - t_u32 start_byte = ts_node_start_byte(*self); - t_point start_point = ts_node_start_point(*self); +void ts_node_edit(TSNode *self, const TSInputEdit *edit) { + uint32_t start_byte = ts_node_start_byte(*self); + TSPoint start_point = ts_node_start_point(*self); - if (start_byte >= edit->old_end_byte) - { - start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); - start_point = point_add(edit->new_end_point, - point_sub(start_point, edit->old_end_point)); - } - else if (start_byte > edit->start_byte) - { - start_byte = edit->new_end_byte; - start_point = edit->new_end_point; - } + if (start_byte >= edit->old_end_byte) { + start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); + start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point)); + } else if (start_byte > edit->start_byte) { + start_byte = edit->new_end_byte; + start_point = edit->new_end_point; + } - self->context[0] = start_byte; - self->context[1] = start_point.row; - self->context[2] = start_point.column; + self->context[0] = start_byte; + self->context[1] = start_point.row; + self->context[2] = start_point.column; } diff --git a/parser/src/parser.c b/parser/src/parser.c index c4aa40ae..ac145cea 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -1,97 +1,75 @@ +#define _POSIX_C_SOURCE 200112L + +#include +#include +#include +#include +#include +#include +#include "./api.h" +#include "./alloc.h" +#include "./array.h" +#include "./atomic.h" +#include "./clock.h" +#include "./error_costs.h" +#include "./get_changed_ranges.h" #include "./language.h" +#include "./length.h" +#include "./lexer.h" +#include "./reduce_action.h" #include "./reusable_node.h" #include "./stack.h" #include "./subtree.h" #include "./tree.h" -#include -#include -#include -#include -#include -#include -#include "me/vec/vec_parser_range.h" -#include "me/vec/vec_reduce_action.h" -#include "parser/api.h" -#include "parser/error_costs.h" -#include "parser/lexer.h" -#include "parser/parser_length.h" -#include "parser/reduce_action.h" -#include "parser/types/types_language.h" +#define LOG(...) \ + if (self->lexer.logger.log || self->dot_graph_file) { \ + snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ + ts_parser__log(self); \ + } -typedef t_u64 t_duration; -typedef t_u64 t_clock; +#define LOG_LOOKAHEAD(symbol_name, size) \ + if (self->lexer.logger.log || self->dot_graph_file) { \ + char *buf = self->lexer.debug_buffer; \ + const char *symbol = symbol_name; \ + int off = sprintf(buf, "lexed_lookahead sym:"); \ + for ( \ + int i = 0; \ + symbol[i] != '\0' \ + && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ + i++ \ + ) { \ + switch (symbol[i]) { \ + case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \ + case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \ + case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \ + case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \ + case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \ + case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \ + default: buf[off++] = symbol[i]; break; \ + } \ + } \ + snprintf( \ + buf + off, \ + TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ + ", size:%u", \ + size \ + ); \ + ts_parser__log(self); \ + } -#define LOG(...) \ - if (self->lexer.logger.log || self->dot_graph_file) \ - { \ - snprintf(self->lexer.debug_buffer, \ - TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ - ts_parser__log(self); \ - } +#define LOG_STACK() \ + if (self->dot_graph_file) { \ + ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ + fputs("\n\n", self->dot_graph_file); \ + } -#define LOG_LOOKAHEAD(symbol_name, size) \ - if (self->lexer.logger.log || self->dot_graph_file) \ - { \ - char *buf = self->lexer.debug_buffer; \ - const char *symbol = symbol_name; \ - int off = sprintf(buf, "lexed_lookahead sym:"); \ - for (int i = 0; \ - symbol[i] != '\0' && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ - i++) \ - { \ - switch (symbol[i]) \ - { \ - case '\t': \ - buf[off++] = '\\'; \ - buf[off++] = 't'; \ - break; \ - case '\n': \ - buf[off++] = '\\'; \ - buf[off++] = 'n'; \ - break; \ - case '\v': \ - buf[off++] = '\\'; \ - buf[off++] = 'v'; \ - break; \ - case '\f': \ - buf[off++] = '\\'; \ - buf[off++] = 'f'; \ - break; \ - case '\r': \ - buf[off++] = '\\'; \ - buf[off++] = 'r'; \ - break; \ - case '\\': \ - buf[off++] = '\\'; \ - buf[off++] = '\\'; \ - break; \ - default: \ - buf[off++] = symbol[i]; \ - break; \ - } \ - } \ - snprintf(buf + off, TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ - ", size:%u", size); \ - ts_parser__log(self); \ - } - -#define LOG_STACK() \ - if (self->dot_graph_file) \ - { \ - ts_stack_print_dot_graph(self->stack, self->language, \ - self->dot_graph_file); \ - fputs("\n\n", self->dot_graph_file); \ - } - -#define LOG_TREE(tree) \ - if (self->dot_graph_file) \ - { \ - ts_subtree_print_dot_graph(tree, self->language, \ - self->dot_graph_file); \ - fputs("\n", self->dot_graph_file); \ - } +#define LOG_TREE(tree) \ + if (self->dot_graph_file) { \ + ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \ + fputs("\n", self->dot_graph_file); \ + } #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) @@ -101,2178 +79,2013 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; -// static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; +static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; -typedef struct s_token_cache -{ - Subtree token; - Subtree last_external_token; - t_u32 byte_index; -} t_token_cache; +typedef struct { + Subtree token; + Subtree last_external_token; + uint32_t byte_index; +} TokenCache; -typedef struct s_parser -{ - t_liblexer lexer; - Stack *stack; - SubtreePool tree_pool; - const t_language *language; - void *wasm_store; - t_vec_reduce_action reduce_actions; - Subtree finished_tree; - SubtreeArray trailing_extras; - SubtreeArray trailing_extras2; - SubtreeArray scratch_trees; - t_token_cache token_cache; - ReusableNode reusable_node; - void *external_scanner_payload; - FILE *dot_graph_file; - t_clock end_clock; - t_duration timeot_duration; - t_u32 accept_count; - t_u32 operation_count; - const volatile size_t *cancellation_flag; - Subtree old_tree; - t_vec_parser_range included_range_differences; - t_u32 included_range_difference_index; - bool has_scanner_error; -} t_parser; +struct TSParser { + Lexer lexer; + Stack *stack; + SubtreePool tree_pool; + const TSLanguage *language; + ReduceActionSet reduce_actions; + Subtree finished_tree; + SubtreeArray trailing_extras; + SubtreeArray trailing_extras2; + SubtreeArray scratch_trees; + TokenCache token_cache; + ReusableNode reusable_node; + void *external_scanner_payload; + FILE *dot_graph_file; + TSClock end_clock; + TSDuration timeout_duration; + unsigned accept_count; + unsigned operation_count; + const volatile size_t *cancellation_flag; + Subtree old_tree; + TSRangeArray included_range_differences; + unsigned included_range_difference_index; + bool has_scanner_error; +}; -typedef struct s_error_status -{ - t_u32 cost; - t_u32 node_count; - t_i32 dynamic_precedence; - bool is_in_error; -} t_error_status; +typedef struct { + unsigned cost; + unsigned node_count; + int dynamic_precedence; + bool is_in_error; +} ErrorStatus; -typedef enum e_error_comparison -{ - ErrorComparisonTakeLeft, - ErrorComparisonPreferLeft, - ErrorComparisonNone, - ErrorComparisonPreferRight, - ErrorComparisonTakeRight, -} t_error_comparison; +typedef enum { + ErrorComparisonTakeLeft, + ErrorComparisonPreferLeft, + ErrorComparisonNone, + ErrorComparisonPreferRight, + ErrorComparisonTakeRight, +} ErrorComparison; -typedef struct s_string_input -{ - const char *string; - t_u32 length; -} t_string_input; +typedef struct { + const char *string; + uint32_t length; +} TSStringInput; // StringInput -static const char *ts_string_inpt_read(void *_self, t_u32 byte, t_point point, - t_u32 *length) -{ - (void)point; - t_string_input *self = (t_string_input *)_self; - if (byte >= self->length) - { - *length = 0; - return ""; - } - else - { - *length = self->length - byte; - return self->string + byte; - } +static const char *ts_string_input_read( + void *_self, + uint32_t byte, + TSPoint point, + uint32_t *length +) { + (void)point; + TSStringInput *self = (TSStringInput *)_self; + if (byte >= self->length) { + *length = 0; + return ""; + } else { + *length = self->length - byte; + return self->string + byte; + } } // Parser - Private -static void ts_parser__log(t_parser *self) -{ - if (self->lexer.logger.log) - { - self->lexer.logger.log(self->lexer.logger.payload, LogTypeParse, - self->lexer.debug_buffer); - } +static void ts_parser__log(TSParser *self) { + if (self->lexer.logger.log) { + self->lexer.logger.log( + self->lexer.logger.payload, + TSLogTypeParse, + self->lexer.debug_buffer + ); + } - if (self->dot_graph_file) - { - fprintf(self->dot_graph_file, "graph {\nlabel=\""); - for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) - { - if (*chr == '"' || *chr == '\\') - fputc('\\', self->dot_graph_file); - fputc(*chr, self->dot_graph_file); - } - fprintf(self->dot_graph_file, "\"\n}\n\n"); - } + if (self->dot_graph_file) { + fprintf(self->dot_graph_file, "graph {\nlabel=\""); + for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) { + if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file); + fputc(*chr, self->dot_graph_file); + } + fprintf(self->dot_graph_file, "\"\n}\n\n"); + } } -static bool ts_parser__breakdown_top_of_stack(t_parser *self, - StackVersion version) -{ - bool did_break_down = false; - bool pending = false; +static bool ts_parser__breakdown_top_of_stack( + TSParser *self, + StackVersion version +) { + bool did_break_down = false; + bool pending = false; - do - { - StackSliceArray pop = ts_stack_pop_pending(self->stack, version); - if (!pop.size) - break; + do { + StackSliceArray pop = ts_stack_pop_pending(self->stack, version); + if (!pop.size) break; - did_break_down = true; - pending = false; - for (t_u32 i = 0; i < pop.size; i++) - { - StackSlice slice = pop.contents[i]; - t_state_id state = ts_stack_state(self->stack, slice.version); - Subtree parent = *array_front(&slice.subtrees); + did_break_down = true; + pending = false; + for (uint32_t i = 0; i < pop.size; i++) { + StackSlice slice = pop.contents[i]; + TSStateId state = ts_stack_state(self->stack, slice.version); + Subtree parent = *array_front(&slice.subtrees); - for (t_u32 j = 0, n = ts_subtree_child_count(parent); j < n; j++) - { - Subtree child = ts_subtree_children(parent)[j]; - pending = ts_subtree_child_count(child) > 0; + for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { + Subtree child = ts_subtree_children(parent)[j]; + pending = ts_subtree_child_count(child) > 0; - if (ts_subtree_is_error(child)) - { - state = ERROR_STATE; - } - else if (!ts_subtree_extra(child)) - { - state = ts_language_next_state(self->language, state, - ts_subtree_symbol(child)); - } + if (ts_subtree_is_error(child)) { + state = ERROR_STATE; + } else if (!ts_subtree_extra(child)) { + state = ts_language_next_state(self->language, state, ts_subtree_symbol(child)); + } - ts_subtree_retain(child); - ts_stack_push(self->stack, slice.version, child, pending, - state); - } + ts_subtree_retain(child); + ts_stack_push(self->stack, slice.version, child, pending, state); + } - for (t_u32 j = 1; j < slice.subtrees.size; j++) - { - Subtree tree = slice.subtrees.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, state); - } + for (uint32_t j = 1; j < slice.subtrees.size; j++) { + Subtree tree = slice.subtrees.contents[j]; + ts_stack_push(self->stack, slice.version, tree, false, state); + } - ts_subtree_release(&self->tree_pool, parent); - array_delete(&slice.subtrees); + ts_subtree_release(&self->tree_pool, parent); + array_delete(&slice.subtrees); - LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent)); - LOG_STACK(); - } - } while (pending); + LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent)); + LOG_STACK(); + } + } while (pending); - return did_break_down; + return did_break_down; } -static void ts_parser__breakdown_lookahead(t_parser *self, Subtree *lookahead, - t_state_id state, - ReusableNode *reusable_node) -{ - bool did_descend = false; - Subtree tree = reusable_node_tree(reusable_node); - while (ts_subtree_child_count(tree) > 0 && - ts_subtree_parse_state(tree) != state) - { - LOG("state_mismatch sym:%s", TREE_NAME(tree)); - reusable_node_descend(reusable_node); - tree = reusable_node_tree(reusable_node); - did_descend = true; - } +static void ts_parser__breakdown_lookahead( + TSParser *self, + Subtree *lookahead, + TSStateId state, + ReusableNode *reusable_node +) { + bool did_descend = false; + Subtree tree = reusable_node_tree(reusable_node); + while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) { + LOG("state_mismatch sym:%s", TREE_NAME(tree)); + reusable_node_descend(reusable_node); + tree = reusable_node_tree(reusable_node); + did_descend = true; + } - if (did_descend) - { - ts_subtree_release(&self->tree_pool, *lookahead); - *lookahead = tree; - ts_subtree_retain(*lookahead); - } + if (did_descend) { + ts_subtree_release(&self->tree_pool, *lookahead); + *lookahead = tree; + ts_subtree_retain(*lookahead); + } } -static t_error_comparison ts_parser__compare_versions(t_parser *self, - t_error_status a, - t_error_status b) -{ - (void)self; - if (!a.is_in_error && b.is_in_error) - { - if (a.cost < b.cost) - { - return ErrorComparisonTakeLeft; - } - else - { - return ErrorComparisonPreferLeft; - } - } +static ErrorComparison ts_parser__compare_versions( + TSParser *self, + ErrorStatus a, + ErrorStatus b +) { + (void)self; + if (!a.is_in_error && b.is_in_error) { + if (a.cost < b.cost) { + return ErrorComparisonTakeLeft; + } else { + return ErrorComparisonPreferLeft; + } + } - if (a.is_in_error && !b.is_in_error) - { - if (b.cost < a.cost) - { - return ErrorComparisonTakeRight; - } - else - { - return ErrorComparisonPreferRight; - } - } + if (a.is_in_error && !b.is_in_error) { + if (b.cost < a.cost) { + return ErrorComparisonTakeRight; + } else { + return ErrorComparisonPreferRight; + } + } - if (a.cost < b.cost) - { - if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) - { - return ErrorComparisonTakeLeft; - } - else - { - return ErrorComparisonPreferLeft; - } - } + if (a.cost < b.cost) { + if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { + return ErrorComparisonTakeLeft; + } else { + return ErrorComparisonPreferLeft; + } + } - if (b.cost < a.cost) - { - if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) - { - return ErrorComparisonTakeRight; - } - else - { - return ErrorComparisonPreferRight; - } - } + if (b.cost < a.cost) { + if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { + return ErrorComparisonTakeRight; + } else { + return ErrorComparisonPreferRight; + } + } - if (a.dynamic_precedence > b.dynamic_precedence) - return ErrorComparisonPreferLeft; - if (b.dynamic_precedence > a.dynamic_precedence) - return ErrorComparisonPreferRight; - return ErrorComparisonNone; + if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft; + if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight; + return ErrorComparisonNone; } -static t_error_status ts_parser__version_status(t_parser *self, - StackVersion version) -{ - unsigned cost = ts_stack_error_cost(self->stack, version); - bool is_paused = ts_stack_is_paused(self->stack, version); - if (is_paused) - cost += ERROR_COST_PER_SKIPPED_TREE; - return (t_error_status){ - .cost = cost, - .node_count = ts_stack_node_count_since_error(self->stack, version), - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .is_in_error = - is_paused || ts_stack_state(self->stack, version) == ERROR_STATE}; +static ErrorStatus ts_parser__version_status( + TSParser *self, + StackVersion version +) { + unsigned cost = ts_stack_error_cost(self->stack, version); + bool is_paused = ts_stack_is_paused(self->stack, version); + if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; + return (ErrorStatus) { + .cost = cost, + .node_count = ts_stack_node_count_since_error(self->stack, version), + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE + }; } -static bool ts_parser__better_version_exists(t_parser *self, - StackVersion version, - bool is_in_error, unsigned cost) -{ - if (self->finished_tree.ptr && - ts_subtree_error_cost(self->finished_tree) <= cost) - { - return true; - } +static bool ts_parser__better_version_exists( + TSParser *self, + StackVersion version, + bool is_in_error, + unsigned cost +) { + if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) { + return true; + } - t_parse_length position = ts_stack_position(self->stack, version); - t_error_status status = { - .cost = cost, - .is_in_error = is_in_error, - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .node_count = ts_stack_node_count_since_error(self->stack, version), - }; + Length position = ts_stack_position(self->stack, version); + ErrorStatus status = { + .cost = cost, + .is_in_error = is_in_error, + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .node_count = ts_stack_node_count_since_error(self->stack, version), + }; - for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; - i++) - { - if (i == version || !ts_stack_is_active(self->stack, i) || - ts_stack_position(self->stack, i).bytes < position.bytes) - continue; - t_error_status status_i = ts_parser__version_status(self, i); - switch (ts_parser__compare_versions(self, status, status_i)) - { - case ErrorComparisonTakeRight: - return true; - case ErrorComparisonPreferRight: - if (ts_stack_can_merge(self->stack, i, version)) - return true; - break; - default: - break; - } - } + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { + if (i == version || + !ts_stack_is_active(self->stack, i) || + ts_stack_position(self->stack, i).bytes < position.bytes) continue; + ErrorStatus status_i = ts_parser__version_status(self, i); + switch (ts_parser__compare_versions(self, status, status_i)) { + case ErrorComparisonTakeRight: + return true; + case ErrorComparisonPreferRight: + if (ts_stack_can_merge(self->stack, i, version)) return true; + break; + default: + break; + } + } - return false; + return false; } -static bool ts_parser__call_main_lex_fn(t_parser *self, t_lex_modes lex_mode) -{ - (void)(lex_mode); - return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); +static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) { + + return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); + } -static bool ts_parser__call_keyword_lex_fn(t_parser *self, t_lex_modes lex_mode) -{ - (void)(lex_mode); - return self->language->keyword_lex_fn(&self->lexer.data, 0); +static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) { + (void)(lex_mode); + + return self->language->keyword_lex_fn(&self->lexer.data, 0); + } -static void ts_parser__external_scanner_create(t_parser *self) -{ - if (self->language && self->language->external_scanner.states) - { - self->external_scanner_payload = - self->language->external_scanner.create(); - } +static void ts_parser__external_scanner_create( + TSParser *self +) { + if (self->language && self->language->external_scanner.states) { +if (self->language->external_scanner.create) { + self->external_scanner_payload = self->language->external_scanner.create(); + + } +}} + +static void ts_parser__external_scanner_destroy( + TSParser *self +) { + if ( + self->language && + self->external_scanner_payload && + self->language->external_scanner.destroy + ) { + self->language->external_scanner.destroy( + self->external_scanner_payload + ); + } + self->external_scanner_payload = NULL; } -static void ts_parser__external_scanner_destroy(t_parser *self) -{ - if (self->language && self->external_scanner_payload && - self->language->external_scanner.destroy) - { - self->language->external_scanner.destroy( - self->external_scanner_payload); - } - self->external_scanner_payload = NULL; +static unsigned ts_parser__external_scanner_serialize( + TSParser *self +) { + uint32_t length = self->language->external_scanner.serialize( + self->external_scanner_payload, + self->lexer.debug_buffer + ); + assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + return length; + } -static unsigned ts_parser__external_scanner_serialize(t_parser *self) -{ - return self->language->external_scanner.serialize( - self->external_scanner_payload, self->lexer.debug_buffer); +static void ts_parser__external_scanner_deserialize( + TSParser *self, + Subtree external_token +) { + const char *data = NULL; + uint32_t length = 0; + if (external_token.ptr) { + data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state); + length = external_token.ptr->external_scanner_state.length; + } + + + self->language->external_scanner.deserialize( + self->external_scanner_payload, + data, + length + ); + } -static void ts_parser__external_scanner_deserialize(t_parser *self, - Subtree external_token) -{ - const char *data = NULL; - t_u32 length = 0; - if (external_token.ptr) - { - data = ts_external_scanner_state_data( - &external_token.ptr->external_scanner_state); - length = external_token.ptr->external_scanner_state.length; - } - - self->language->external_scanner.deserialize(self->external_scanner_payload, - data, length); +static bool ts_parser__external_scanner_scan( + TSParser *self, + TSStateId external_lex_state +) { + + const bool *valid_external_tokens = ts_language_enabled_external_tokens( + self->language, + external_lex_state + ); + return self->language->external_scanner.scan( + self->external_scanner_payload, + &self->lexer.data, + valid_external_tokens + ); + } -static bool ts_parser__external_scanner_scan(t_parser *self, - t_state_id external_lex_state) -{ - const bool *valid_external_tokens = - ts_language_enabled_external_tokens(self->language, external_lex_state); - return self->language->external_scanner.scan(self->external_scanner_payload, - &self->lexer.data, - valid_external_tokens); +static bool ts_parser__can_reuse_first_leaf( + TSParser *self, + TSStateId state, + Subtree tree, + TableEntry *table_entry +) { + TSLexMode current_lex_mode = self->language->lex_modes[state]; + TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree); + TSStateId leaf_state = ts_subtree_leaf_parse_state(tree); + TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state]; + + // At the end of a non-terminal extra node, the lexer normally returns + // NULL, which indicates that the parser should look for a reduce action + // at symbol `0`. Avoid reusing tokens in this situation to ensure that + // the same thing happens when incrementally reparsing. + if (current_lex_mode.lex_state == (uint16_t)(-1)) return false; + + // If the token was created in a state with the same set of lookaheads, it is reusable. + if ( + table_entry->action_count > 0 && + memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexMode)) == 0 && + ( + leaf_symbol != self->language->keyword_capture_token || + (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state) + ) + ) return true; + + // Empty tokens are not reusable in states with different lookaheads. + if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false; + + // If the current state allows external tokens or other tokens that conflict with this + // token, this token is not reusable. + return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; } -static bool ts_parser__can_reuse_first_leaf(t_parser *self, t_state_id state, - Subtree tree, - t_table_entry *table_entry) -{ - t_lex_modes current_lex_mode = self->language->lex_modes[state]; - t_symbol leaf_symbol = ts_subtree_leaf_symbol(tree); - t_state_id leaf_state = ts_subtree_leaf_parse_state(tree); - t_lex_modes leaf_lex_mode = self->language->lex_modes[leaf_state]; +static Subtree ts_parser__lex( + TSParser *self, + StackVersion version, + TSStateId parse_state +) { + TSLexMode lex_mode = self->language->lex_modes[parse_state]; + if (lex_mode.lex_state == (uint16_t)-1) { + LOG("no_lookahead_after_non_terminal_extra"); + return NULL_SUBTREE; + } - // At the end of a non-terminal extra node, the lexer normally returns - // NULL, which indicates that the parser should look for a reduce action - // at symbol `0`. Avoid reusing tokens in this situation to ensure that - // the same thing happens when incrementally reparsing. - if (current_lex_mode.lex_state == (t_u16)(-1)) - return false; + const Length start_position = ts_stack_position(self->stack, version); + const Subtree external_token = ts_stack_last_external_token(self->stack, version); - // If the token was created in a state with the same set of lookaheads, it - // is reusable. - if (table_entry->action_count > 0 && - memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(t_lex_modes)) == 0 && - (leaf_symbol != self->language->keyword_capture_token || - (!ts_subtree_is_keyword(tree) && - ts_subtree_parse_state(tree) == state))) - return true; + bool found_external_token = false; + bool error_mode = parse_state == ERROR_STATE; + bool skipped_error = false; + bool called_get_column = false; + int32_t first_error_character = 0; + Length error_start_position = length_zero(); + Length error_end_position = length_zero(); + uint32_t lookahead_end_byte = 0; + uint32_t external_scanner_state_len = 0; + bool external_scanner_state_changed = false; + ts_lexer_reset(&self->lexer, start_position); - // Empty tokens are not reusable in states with different lookaheads. - if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) - return false; + for (;;) { + bool found_token = false; + Length current_position = self->lexer.current_position; - // If the current state allows external tokens or other tokens that conflict - // with this token, this token is not reusable. - return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; + if (lex_mode.external_lex_state != 0) { + LOG( + "lex_external state:%d, row:%u, column:%u", + lex_mode.external_lex_state, + current_position.extent.row, + current_position.extent.column + ); + ts_lexer_start(&self->lexer); + ts_parser__external_scanner_deserialize(self, external_token); + found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state); + if (self->has_scanner_error) return NULL_SUBTREE; + ts_lexer_finish(&self->lexer, &lookahead_end_byte); + + if (found_token) { + external_scanner_state_len = ts_parser__external_scanner_serialize(self); + external_scanner_state_changed = !ts_external_scanner_state_eq( + ts_subtree_external_scanner_state(external_token), + self->lexer.debug_buffer, + external_scanner_state_len + ); + + // When recovering from an error, ignore any zero-length external tokens + // unless they have changed the external scanner's state. This helps to + // avoid infinite loops which could otherwise occur, because the lexer is + // looking for any possible token, instead of looking for the specific set of + // tokens that are valid in some parse state. + // + // Note that it's possible that the token end position may be *before* the + // original position of the lexer because of the way that tokens are positioned + // at included range boundaries: when a token is terminated at the start of + // an included range, it is marked as ending at the *end* of the preceding + // included range. + if ( + self->lexer.token_end_position.bytes <= current_position.bytes && + (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) && + !external_scanner_state_changed + ) { + LOG( + "ignore_empty_external_token symbol:%s", + SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol]) + ) + found_token = false; + } + } + + if (found_token) { + found_external_token = true; + called_get_column = self->lexer.did_get_column; + break; + } + + ts_lexer_reset(&self->lexer, current_position); + } + + LOG( + "lex_internal state:%d, row:%u, column:%u", + lex_mode.lex_state, + current_position.extent.row, + current_position.extent.column + ); + ts_lexer_start(&self->lexer); + found_token = ts_parser__call_main_lex_fn(self, lex_mode); + ts_lexer_finish(&self->lexer, &lookahead_end_byte); + if (found_token) break; + + if (!error_mode) { + error_mode = true; + lex_mode = self->language->lex_modes[ERROR_STATE]; + ts_lexer_reset(&self->lexer, start_position); + continue; + } + + if (!skipped_error) { + LOG("skip_unrecognized_character"); + skipped_error = true; + error_start_position = self->lexer.token_start_position; + error_end_position = self->lexer.token_start_position; + first_error_character = self->lexer.data.lookahead; + } + + if (self->lexer.current_position.bytes == error_end_position.bytes) { + if (self->lexer.data.eof(&self->lexer.data)) { + self->lexer.data.result_symbol = ts_builtin_sym_error; + break; + } + self->lexer.data.advance(&self->lexer.data, false); + } + + error_end_position = self->lexer.current_position; + } + + Subtree result; + if (skipped_error) { + Length padding = length_sub(error_start_position, start_position); + Length size = length_sub(error_end_position, error_start_position); + uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes; + result = ts_subtree_new_error( + &self->tree_pool, + first_error_character, + padding, + size, + lookahead_bytes, + parse_state, + self->language + ); + } else { + bool is_keyword = false; + TSSymbol symbol = self->lexer.data.result_symbol; + Length padding = length_sub(self->lexer.token_start_position, start_position); + Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); + uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; + + if (found_external_token) { + symbol = self->language->external_scanner.symbol_map[symbol]; + } else if (symbol == self->language->keyword_capture_token && symbol != 0) { + uint32_t end_byte = self->lexer.token_end_position.bytes; + ts_lexer_reset(&self->lexer, self->lexer.token_start_position); + ts_lexer_start(&self->lexer); + + is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode); + + if ( + is_keyword && + self->lexer.token_end_position.bytes == end_byte && + ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) + ) { + symbol = self->lexer.data.result_symbol; + } + } + + result = ts_subtree_new_leaf( + &self->tree_pool, + symbol, + padding, + size, + lookahead_bytes, + parse_state, + found_external_token, + called_get_column, + is_keyword, + self->language + ); + + if (found_external_token) { + MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result); + ts_external_scanner_state_init( + &mut_result.ptr->external_scanner_state, + self->lexer.debug_buffer, + external_scanner_state_len + ); + mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed; + } + } + + LOG_LOOKAHEAD( + SYM_NAME(ts_subtree_symbol(result)), + ts_subtree_total_size(result).bytes + ); + return result; } -static Subtree ts_parser__lex(t_parser *self, StackVersion version, - t_state_id parse_state) -{ - t_lex_modes lex_mode = self->language->lex_modes[parse_state]; - if (lex_mode.lex_state == (t_u16)-1) - { - LOG("no_lookahead_after_non_terminal_extra"); - return NULL_SUBTREE; - } - - const t_parse_length start_position = - ts_stack_position(self->stack, version); - const Subtree external_token = - ts_stack_last_external_token(self->stack, version); - - bool found_external_token = false; - bool error_mode = parse_state == ERROR_STATE; - bool skipped_error = false; - bool called_get_column = false; - t_i32 first_error_character = 0; - t_parse_length error_start_position = length_zero(); - t_parse_length error_end_position = length_zero(); - t_i32 lookahead_end_byte = 0; - t_i32 external_scanner_state_len = 0; - bool external_scanner_state_changed = false; - bool found_token; - ts_lexer_reset(&self->lexer, start_position); - - for (;;) - { - t_parse_length current_position = self->lexer.current_position; - - if (lex_mode.external_lex_state != 0) - { - LOG("lex_external state:%d, row:%u, column:%u", - lex_mode.external_lex_state, current_position.extent.row, - current_position.extent.column); - ts_lexer_start(&self->lexer); - ts_parser__external_scanner_deserialize(self, external_token); - found_token = ts_parser__external_scanner_scan( - self, lex_mode.external_lex_state); - if (self->has_scanner_error) - return NULL_SUBTREE; - ts_lexer_finish(&self->lexer, &lookahead_end_byte); - - if (found_token) - { - external_scanner_state_len = - ts_parser__external_scanner_serialize(self); - external_scanner_state_changed = !ts_external_scanner_state_eq( - ts_subtree_external_scanner_state(external_token), - self->lexer.debug_buffer, external_scanner_state_len); - - // When recovering from an error, ignore any zero-length - // external tokens unless they have changed the external - // scanner's state. This helps to avoid infinite loops which - // could otherwise occur, because the lexer is looking for any - // possible token, instead of looking for the specific set of - // tokens that are valid in some parse state. - // - // Note that it's possible that the token end position may be - // *before* the original position of the lexer because of the - // way that tokens are positioned at included range boundaries: - // when a token is terminated at the start of an included range, - // it is marked as ending at the *end* of the preceding included - // range. - if (self->lexer.token_end_position.bytes <= - current_position.bytes && - (error_mode || !ts_stack_has_advanced_since_error( - self->stack, version)) && - !external_scanner_state_changed) - { - LOG("ignore_empty_external_token symbol:%s", - SYM_NAME( - self->language->external_scanner - .symbol_map[self->lexer.data.result_symbol])) - found_token = false; - } - } - - if (found_token) - { - found_external_token = true; - called_get_column = self->lexer.did_get_column; - break; - } - - ts_lexer_reset(&self->lexer, current_position); - } - - LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state, - current_position.extent.row, current_position.extent.column); - ts_lexer_start(&self->lexer); - found_token = ts_parser__call_main_lex_fn(self, lex_mode); - ts_lexer_finish(&self->lexer, &lookahead_end_byte); - if (found_token) - break; - - if (!error_mode) - { - error_mode = true; - lex_mode = self->language->lex_modes[ERROR_STATE]; - ts_lexer_reset(&self->lexer, start_position); - continue; - } - - if (!skipped_error) - { - LOG("skip_unrecognized_character"); - skipped_error = true; - error_start_position = self->lexer.token_start_position; - error_end_position = self->lexer.token_start_position; - first_error_character = self->lexer.data.lookahead; - } - - if (self->lexer.current_position.bytes == error_end_position.bytes) - { - if (self->lexer.data.eof(&self->lexer.data)) - { - self->lexer.data.result_symbol = ts_builtin_sym_error; - break; - } - self->lexer.data.advance(&self->lexer.data, false); - } - - error_end_position = self->lexer.current_position; - } - - Subtree result; - if (skipped_error) - { - t_parse_length padding = - length_sub(error_start_position, start_position); - t_parse_length size = - length_sub(error_end_position, error_start_position); - t_u32 lookahead_bytes = lookahead_end_byte - error_end_position.bytes; - result = ts_subtree_new_error(&self->tree_pool, first_error_character, - padding, size, lookahead_bytes, - parse_state, self->language); - } - else - { - bool is_keyword = false; - t_symbol symbol = self->lexer.data.result_symbol; - t_parse_length padding = - length_sub(self->lexer.token_start_position, start_position); - t_parse_length size = length_sub(self->lexer.token_end_position, - self->lexer.token_start_position); - t_u32 lookahead_bytes = - lookahead_end_byte - self->lexer.token_end_position.bytes; - - if (found_external_token) - { - symbol = self->language->external_scanner.symbol_map[symbol]; - } - else if (symbol == self->language->keyword_capture_token && symbol != 0) - { - t_u32 end_byte = self->lexer.token_end_position.bytes; - ts_lexer_reset(&self->lexer, self->lexer.token_start_position); - ts_lexer_start(&self->lexer); - - is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode); - - if (is_keyword && - self->lexer.token_end_position.bytes == end_byte && - ts_language_has_actions(self->language, parse_state, - self->lexer.data.result_symbol)) - { - symbol = self->lexer.data.result_symbol; - } - } - - result = ts_subtree_new_leaf(&self->tree_pool, symbol, padding, size, - lookahead_bytes, parse_state, - found_external_token, called_get_column, - is_keyword, self->language); - - if (found_external_token) - { - MutableSubtree mt_result = ts_subtree_to_mt_unsafe(result); - ts_external_scanner_state_init( - &mt_result.ptr->external_scanner_state, - self->lexer.debug_buffer, external_scanner_state_len); - mt_result.ptr->has_external_scanner_state_change = - external_scanner_state_changed; - } - } - - LOG_LOOKAHEAD(SYM_NAME(ts_subtree_symbol(result)), - ts_subtree_total_size(result).bytes); - return result; +static Subtree ts_parser__get_cached_token( + TSParser *self, + TSStateId state, + size_t position, + Subtree last_external_token, + TableEntry *table_entry +) { + TokenCache *cache = &self->token_cache; + if ( + cache->token.ptr && cache->byte_index == position && + ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token) + ) { + ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry); + if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) { + ts_subtree_retain(cache->token); + return cache->token; + } + } + return NULL_SUBTREE; } -static Subtree ts_parser__get_cached_token(t_parser *self, t_state_id state, - size_t position, - Subtree last_external_token, - t_table_entry *table_entry) -{ - t_token_cache *cache = &self->token_cache; - if (cache->token.ptr && cache->byte_index == position && - ts_subtree_external_scanner_state_eq(cache->last_external_token, - last_external_token)) - { - ts_language_table_entry(self->language, state, - ts_subtree_symbol(cache->token), table_entry); - if (ts_parser__can_reuse_first_leaf(self, state, cache->token, - table_entry)) - { - ts_subtree_retain(cache->token); - return cache->token; - } - } - return NULL_SUBTREE; +static void ts_parser__set_cached_token( + TSParser *self, + uint32_t byte_index, + Subtree last_external_token, + Subtree token +) { + TokenCache *cache = &self->token_cache; + if (token.ptr) ts_subtree_retain(token); + if (last_external_token.ptr) ts_subtree_retain(last_external_token); + if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token); + if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token); + cache->token = token; + cache->byte_index = byte_index; + cache->last_external_token = last_external_token; } -static void ts_parser__set_cached_token(t_parser *self, t_u32 byte_index, - Subtree last_external_token, - Subtree token) -{ - t_token_cache *cache = &self->token_cache; - if (token.ptr) - ts_subtree_retain(token); - if (last_external_token.ptr) - ts_subtree_retain(last_external_token); - if (cache->token.ptr) - ts_subtree_release(&self->tree_pool, cache->token); - if (cache->last_external_token.ptr) - ts_subtree_release(&self->tree_pool, cache->last_external_token); - cache->token = token; - cache->byte_index = byte_index; - cache->last_external_token = last_external_token; +static bool ts_parser__has_included_range_difference( + const TSParser *self, + uint32_t start_position, + uint32_t end_position +) { + return ts_range_array_intersects( + &self->included_range_differences, + self->included_range_difference_index, + start_position, + end_position + ); } -static Subtree ts_parser__reuse_node(t_parser *self, StackVersion version, - t_state_id *state, t_u32 position, - Subtree last_external_token, - t_table_entry *table_entry) -{ - Subtree result; - while ((result = reusable_node_tree(&self->reusable_node)).ptr) - { - t_u32 byte_offset = reusable_node_byte_offset(&self->reusable_node); - t_u32 end_byte_offset = byte_offset + ts_subtree_total_bytes(result); +static Subtree ts_parser__reuse_node( + TSParser *self, + StackVersion version, + TSStateId *state, + uint32_t position, + Subtree last_external_token, + TableEntry *table_entry +) { + Subtree result; + while ((result = reusable_node_tree(&self->reusable_node)).ptr) { + uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node); + uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result); - // Do not reuse an EOF node if the included ranges array has changes - // later on in the file. - if (ts_subtree_is_eof(result)) - end_byte_offset = UINT32_MAX; + // Do not reuse an EOF node if the included ranges array has changes + // later on in the file. + if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX; - if (byte_offset > position) - { - LOG("before_reusable_node symbol:%s", TREE_NAME(result)); - break; - } + if (byte_offset > position) { + LOG("before_reusable_node symbol:%s", TREE_NAME(result)); + break; + } - if (byte_offset < position) - { - LOG("past_reusable_node symbol:%s", TREE_NAME(result)); - if (end_byte_offset <= position || - !reusable_node_descend(&self->reusable_node)) - { - reusable_node_advance(&self->reusable_node); - } - continue; - } + if (byte_offset < position) { + LOG("past_reusable_node symbol:%s", TREE_NAME(result)); + if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) { + reusable_node_advance(&self->reusable_node); + } + continue; + } - if (!ts_subtree_external_scanner_state_eq( - self->reusable_node.last_external_token, last_external_token)) - { - LOG("reusable_node_has_different_external_scanner_state symbol:%s", - TREE_NAME(result)); - reusable_node_advance(&self->reusable_node); - continue; - } + if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) { + LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result)); + reusable_node_advance(&self->reusable_node); + continue; + } - const char *reason = NULL; - if (ts_subtree_has_changes(result)) - { - reason = "has_changes"; - } - else if (ts_subtree_is_error(result)) - { - reason = "is_error"; - } - else if (ts_subtree_missing(result)) - { - reason = "is_missing"; - } - else if (ts_subtree_is_fragile(result)) - { - reason = "is_fragile"; - } + const char *reason = NULL; + if (ts_subtree_has_changes(result)) { + reason = "has_changes"; + } else if (ts_subtree_is_error(result)) { + reason = "is_error"; + } else if (ts_subtree_missing(result)) { + reason = "is_missing"; + } else if (ts_subtree_is_fragile(result)) { + reason = "is_fragile"; + } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) { + reason = "contains_different_included_range"; + } - if (reason) - { - LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result)); - if (!reusable_node_descend(&self->reusable_node)) - { - reusable_node_advance(&self->reusable_node); - ts_parser__breakdown_top_of_stack(self, version); - *state = ts_stack_state(self->stack, version); - } - continue; - } + if (reason) { + LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result)); + if (!reusable_node_descend(&self->reusable_node)) { + reusable_node_advance(&self->reusable_node); + ts_parser__breakdown_top_of_stack(self, version); + *state = ts_stack_state(self->stack, version); + } + continue; + } - t_symbol leaf_symbol = ts_subtree_leaf_symbol(result); - ts_language_table_entry(self->language, *state, leaf_symbol, - table_entry); - if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) - { - LOG("cant_reuse_node symbol:%s, first_leaf_symbol:%s", - TREE_NAME(result), SYM_NAME(leaf_symbol)); - reusable_node_advance_past_leaf(&self->reusable_node); - break; - } + TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result); + ts_language_table_entry(self->language, *state, leaf_symbol, table_entry); + if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { + LOG( + "cant_reuse_node symbol:%s, first_leaf_symbol:%s", + TREE_NAME(result), + SYM_NAME(leaf_symbol) + ); + reusable_node_advance_past_leaf(&self->reusable_node); + break; + } - LOG("reuse_node symbol:%s", TREE_NAME(result)); - ts_subtree_retain(result); - return result; - } + LOG("reuse_node symbol:%s", TREE_NAME(result)); + ts_subtree_retain(result); + return result; + } - return NULL_SUBTREE; + return NULL_SUBTREE; } // Determine if a given tree should be replaced by an alternative tree. // -// The decision is based on the trees' error costs (if any), their dynamic -// precedence, and finally, as a default, by a recursive comparison of the -// trees' symbols. -static bool ts_parser__select_parse_tree(t_parser *self, Subtree left, - Subtree right) -{ - if (!left.ptr) - return true; - if (!right.ptr) - return false; +// The decision is based on the trees' error costs (if any), their dynamic precedence, +// and finally, as a default, by a recursive comparison of the trees' symbols. +static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) { + if (!left.ptr) return true; + if (!right.ptr) return false; - if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) - { - LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), - TREE_NAME(left)); - return true; - } + if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) { + LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); + return true; + } - if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) - { - LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), - TREE_NAME(right)); - return false; - } + if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) { + LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); + return false; + } - if (ts_subtree_dynamic_precedence(right) > - ts_subtree_dynamic_precedence(left)) - { - LOG("select_higher_precedence symbol:%s, prec:%" PRId32 - ", over_symbol:%s, other_prec:%" PRId32, - TREE_NAME(right), ts_subtree_dynamic_precedence(right), - TREE_NAME(left), ts_subtree_dynamic_precedence(left)); - return true; - } + if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) { + LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, + TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left), + ts_subtree_dynamic_precedence(left)); + return true; + } - if (ts_subtree_dynamic_precedence(left) > - ts_subtree_dynamic_precedence(right)) - { - LOG("select_higher_precedence symbol:%s, prec:%" PRId32 - ", over_symbol:%s, other_prec:%" PRId32, - TREE_NAME(left), ts_subtree_dynamic_precedence(left), - TREE_NAME(right), ts_subtree_dynamic_precedence(right)); - return false; - } + if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) { + LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, + TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right), + ts_subtree_dynamic_precedence(right)); + return false; + } - if (ts_subtree_error_cost(left) > 0) - return true; + if (ts_subtree_error_cost(left) > 0) return true; - int comparison = ts_subtree_compare(left, right, &self->tree_pool); - switch (comparison) - { - case -1: - LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), - TREE_NAME(right)); - return false; - break; - case 1: - LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), - TREE_NAME(left)); - return true; - default: - LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), - TREE_NAME(right)); - return false; - } + int comparison = ts_subtree_compare(left, right, &self->tree_pool); + switch (comparison) { + case -1: + LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); + return false; + break; + case 1: + LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); + return true; + default: + LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); + return false; + } } // Determine if a given tree's children should be replaced by an alternative // array of children. -static bool ts_parser__select_children(t_parser *self, Subtree left, - const SubtreeArray *children) -{ - array_assign(&self->scratch_trees, children); +static bool ts_parser__select_children( + TSParser *self, + Subtree left, + const SubtreeArray *children +) { + array_assign(&self->scratch_trees, children); - // Create a temporary subtree using the scratch trees array. This node does - // not perform any allocation except for possibly growing the array to make - // room for its own heap data. The scratch tree is never explicitly - // released, so the same 'scratch trees' array can be reused again later. - MutableSubtree scratch_tree = ts_subtree_new_node( - ts_subtree_symbol(left), &self->scratch_trees, 0, self->language); + // Create a temporary subtree using the scratch trees array. This node does + // not perform any allocation except for possibly growing the array to make + // room for its own heap data. The scratch tree is never explicitly released, + // so the same 'scratch trees' array can be reused again later. + MutableSubtree scratch_tree = ts_subtree_new_node( + ts_subtree_symbol(left), + &self->scratch_trees, + 0, + self->language + ); - return ts_parser__select_parse_tree(self, left, - ts_subtree_from_mut(scratch_tree)); + return ts_parser__select_tree( + self, + left, + ts_subtree_from_mut(scratch_tree) + ); } -static void ts_parser__shift(t_parser *self, StackVersion version, - t_state_id state, Subtree lookahead, bool extra) -{ - bool is_leaf = ts_subtree_child_count(lookahead) == 0; - Subtree subtree_to_push = lookahead; - if (extra != ts_subtree_extra(lookahead) && is_leaf) - { - MutableSubtree result = - ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_extra(&result, extra); - subtree_to_push = ts_subtree_from_mut(result); - } +static void ts_parser__shift( + TSParser *self, + StackVersion version, + TSStateId state, + Subtree lookahead, + bool extra +) { + bool is_leaf = ts_subtree_child_count(lookahead) == 0; + Subtree subtree_to_push = lookahead; + if (extra != ts_subtree_extra(lookahead) && is_leaf) { + MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_extra(&result, extra); + subtree_to_push = ts_subtree_from_mut(result); + } - ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); - if (ts_subtree_has_external_tokens(subtree_to_push)) - { - ts_stack_set_last_external_token( - self->stack, version, - ts_subtree_last_external_token(subtree_to_push)); - } + ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); + if (ts_subtree_has_external_tokens(subtree_to_push)) { + ts_stack_set_last_external_token( + self->stack, version, ts_subtree_last_external_token(subtree_to_push) + ); + } } -static StackVersion ts_parser__reduce(t_parser *self, StackVersion version, - t_symbol symbol, t_u32 count, - int dynamic_precedence, - t_u16 production_id, bool is_fragile, - bool end_of_non_terminal_extra) -{ - t_u32 initial_version_count = ts_stack_version_count(self->stack); +static StackVersion ts_parser__reduce( + TSParser *self, + StackVersion version, + TSSymbol symbol, + uint32_t count, + int dynamic_precedence, + uint16_t production_id, + bool is_fragile, + bool end_of_non_terminal_extra +) { + uint32_t initial_version_count = ts_stack_version_count(self->stack); - // Pop the given number of nodes from the given version of the parse stack. - // If stack versions have previously merged, then there may be more than one - // path back through the stack. For each path, create a new parent node to - // contain the popped children, and push it onto the stack in place of the - // children. - StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); - t_u32 removed_version_count = 0; - for (t_u32 i = 0; i < pop.size; i++) - { - StackSlice slice = pop.contents[i]; - StackVersion slice_version = slice.version - removed_version_count; + // Pop the given number of nodes from the given version of the parse stack. + // If stack versions have previously merged, then there may be more than one + // path back through the stack. For each path, create a new parent node to + // contain the popped children, and push it onto the stack in place of the + // children. + StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); + uint32_t removed_version_count = 0; + for (uint32_t i = 0; i < pop.size; i++) { + StackSlice slice = pop.contents[i]; + StackVersion slice_version = slice.version - removed_version_count; - // This is where new versions are added to the parse stack. The versions - // will all be sorted and truncated at the end of the outer parsing - // loop. Allow the maximum version count to be temporarily exceeded, but - // only by a limited threshold. - if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) - { - ts_stack_remove_version(self->stack, slice_version); - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - removed_version_count++; - while (i + 1 < pop.size) - { - StackSlice next_slice = pop.contents[i + 1]; - if (next_slice.version != slice.version) - break; - ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); - i++; - } - continue; - } + // This is where new versions are added to the parse stack. The versions + // will all be sorted and truncated at the end of the outer parsing loop. + // Allow the maximum version count to be temporarily exceeded, but only + // by a limited threshold. + if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) { + ts_stack_remove_version(self->stack, slice_version); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + removed_version_count++; + while (i + 1 < pop.size) { + StackSlice next_slice = pop.contents[i + 1]; + if (next_slice.version != slice.version) break; + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); + i++; + } + continue; + } - // Extra tokens on top of the stack should not be included in this new - // parent node. They will be re-pushed onto the stack after the parent - // node is created and pushed. - SubtreeArray children = slice.subtrees; - ts_subtree_array_remove_trailing_extras(&children, - &self->trailing_extras); + // Extra tokens on top of the stack should not be included in this new parent + // node. They will be re-pushed onto the stack after the parent node is + // created and pushed. + SubtreeArray children = slice.subtrees; + ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras); - MutableSubtree parent = ts_subtree_new_node( - symbol, &children, production_id, self->language); + MutableSubtree parent = ts_subtree_new_node( + symbol, &children, production_id, self->language + ); - // This pop operation may have caused multiple stack versions to - // collapse into one, because they all diverged from a common state. In - // that case, choose one of the arrays of trees to be the parent node's - // children, and delete the rest of the tree arrays. - while (i + 1 < pop.size) - { - StackSlice next_slice = pop.contents[i + 1]; - if (next_slice.version != slice.version) - break; - i++; + // This pop operation may have caused multiple stack versions to collapse + // into one, because they all diverged from a common state. In that case, + // choose one of the arrays of trees to be the parent node's children, and + // delete the rest of the tree arrays. + while (i + 1 < pop.size) { + StackSlice next_slice = pop.contents[i + 1]; + if (next_slice.version != slice.version) break; + i++; - SubtreeArray next_slice_children = next_slice.subtrees; - ts_subtree_array_remove_trailing_extras(&next_slice_children, - &self->trailing_extras2); + SubtreeArray next_slice_children = next_slice.subtrees; + ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2); - if (ts_parser__select_children(self, ts_subtree_from_mut(parent), - &next_slice_children)) - { - ts_subtree_array_clear(&self->tree_pool, - &self->trailing_extras); - ts_subtree_release(&self->tree_pool, - ts_subtree_from_mut(parent)); - array_swap(&self->trailing_extras, &self->trailing_extras2); - parent = ts_subtree_new_node(symbol, &next_slice_children, - production_id, self->language); - } - else - { - array_clear(&self->trailing_extras2); - ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); - } - } + if (ts_parser__select_children( + self, + ts_subtree_from_mut(parent), + &next_slice_children + )) { + ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); + ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); + array_swap(&self->trailing_extras, &self->trailing_extras2); + parent = ts_subtree_new_node( + symbol, &next_slice_children, production_id, self->language + ); + } else { + array_clear(&self->trailing_extras2); + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); + } + } - t_state_id state = ts_stack_state(self->stack, slice_version); - t_state_id next_state = - ts_language_next_state(self->language, state, symbol); - if (end_of_non_terminal_extra && next_state == state) - { - parent.ptr->extra = true; - } - if (is_fragile || pop.size > 1 || initial_version_count > 1) - { - parent.ptr->fragile_left = true; - parent.ptr->fragile_right = true; - parent.ptr->parse_state = TS_TREE_STATE_NONE; - } - else - { - parent.ptr->parse_state = state; - } - parent.ptr->dynamic_precedence += dynamic_precedence; + TSStateId state = ts_stack_state(self->stack, slice_version); + TSStateId next_state = ts_language_next_state(self->language, state, symbol); + if (end_of_non_terminal_extra && next_state == state) { + parent.ptr->extra = true; + } + if (is_fragile || pop.size > 1 || initial_version_count > 1) { + parent.ptr->fragile_left = true; + parent.ptr->fragile_right = true; + parent.ptr->parse_state = TS_TREE_STATE_NONE; + } else { + parent.ptr->parse_state = state; + } + parent.ptr->dynamic_precedence += dynamic_precedence; - // Push the parent node onto the stack, along with any extra tokens that - // were previously on top of the stack. - ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), - false, next_state); - for (t_u32 j = 0; j < self->trailing_extras.size; j++) - { - ts_stack_push(self->stack, slice_version, - self->trailing_extras.contents[j], false, next_state); - } + // Push the parent node onto the stack, along with any extra tokens that + // were previously on top of the stack. + ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); + for (uint32_t j = 0; j < self->trailing_extras.size; j++) { + ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); + } - for (StackVersion j = 0; j < slice_version; j++) - { - if (j == version) - continue; - if (ts_stack_merge(self->stack, j, slice_version)) - { - removed_version_count++; - break; - } - } - } + for (StackVersion j = 0; j < slice_version; j++) { + if (j == version) continue; + if (ts_stack_merge(self->stack, j, slice_version)) { + removed_version_count++; + break; + } + } + } - // Return the first new stack version that was created. - return ts_stack_version_count(self->stack) > initial_version_count - ? initial_version_count - : STACK_VERSION_NONE; + // Return the first new stack version that was created. + return ts_stack_version_count(self->stack) > initial_version_count + ? initial_version_count + : STACK_VERSION_NONE; } -static void ts_parser__accept(t_parser *self, StackVersion version, - Subtree lookahead) -{ - assert(ts_subtree_is_eof(lookahead)); - ts_stack_push(self->stack, version, lookahead, false, 1); +static void ts_parser__accept( + TSParser *self, + StackVersion version, + Subtree lookahead +) { + assert(ts_subtree_is_eof(lookahead)); + ts_stack_push(self->stack, version, lookahead, false, 1); - StackSliceArray pop = ts_stack_pop_all(self->stack, version); - for (t_u32 i = 0; i < pop.size; i++) - { - SubtreeArray trees = pop.contents[i].subtrees; + StackSliceArray pop = ts_stack_pop_all(self->stack, version); + for (uint32_t i = 0; i < pop.size; i++) { + SubtreeArray trees = pop.contents[i].subtrees; - Subtree root = NULL_SUBTREE; - for (t_u32 j = trees.size - 1; j + 1 > 0; j--) - { - Subtree tree = trees.contents[j]; - if (!ts_subtree_extra(tree)) - { - assert(!tree.data.is_inline); - t_u32 child_count = ts_subtree_child_count(tree); - const Subtree *children = ts_subtree_children(tree); - for (t_u32 k = 0; k < child_count; k++) - { - ts_subtree_retain(children[k]); - } - array_splice(&trees, j, 1, child_count, children); - root = ts_subtree_from_mut(ts_subtree_new_node( - ts_subtree_symbol(tree), &trees, tree.ptr->production_id, - self->language)); - ts_subtree_release(&self->tree_pool, tree); - break; - } - } + Subtree root = NULL_SUBTREE; + for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { + Subtree tree = trees.contents[j]; + if (!ts_subtree_extra(tree)) { + assert(!tree.data.is_inline); + uint32_t child_count = ts_subtree_child_count(tree); + const Subtree *children = ts_subtree_children(tree); + for (uint32_t k = 0; k < child_count; k++) { + ts_subtree_retain(children[k]); + } + array_splice(&trees, j, 1, child_count, children); + root = ts_subtree_from_mut(ts_subtree_new_node( + ts_subtree_symbol(tree), + &trees, + tree.ptr->production_id, + self->language + )); + ts_subtree_release(&self->tree_pool, tree); + break; + } + } - assert(root.ptr); - self->accept_count++; + assert(root.ptr); + self->accept_count++; - if (self->finished_tree.ptr) - { - if (ts_parser__select_parse_tree(self, self->finished_tree, root)) - { - ts_subtree_release(&self->tree_pool, self->finished_tree); - self->finished_tree = root; - } - else - { - ts_subtree_release(&self->tree_pool, root); - } - } - else - { - self->finished_tree = root; - } - } + if (self->finished_tree.ptr) { + if (ts_parser__select_tree(self, self->finished_tree, root)) { + ts_subtree_release(&self->tree_pool, self->finished_tree); + self->finished_tree = root; + } else { + ts_subtree_release(&self->tree_pool, root); + } + } else { + self->finished_tree = root; + } + } - ts_stack_remove_version(self->stack, pop.contents[0].version); - ts_stack_halt(self->stack, version); + ts_stack_remove_version(self->stack, pop.contents[0].version); + ts_stack_halt(self->stack, version); } static bool ts_parser__do_all_potential_reductions( - t_parser *self, StackVersion starting_version, t_symbol lookahead_symbol) -{ - t_u32 initial_version_count = ts_stack_version_count(self->stack); + TSParser *self, + StackVersion starting_version, + TSSymbol lookahead_symbol +) { + uint32_t initial_version_count = ts_stack_version_count(self->stack); - bool can_shift_lookahead_symbol = false; - StackVersion version = starting_version; - for (unsigned i = 0; true; i++) - { - t_u32 version_count = ts_stack_version_count(self->stack); - if (version >= version_count) - break; + bool can_shift_lookahead_symbol = false; + StackVersion version = starting_version; + for (unsigned i = 0; true; i++) { + uint32_t version_count = ts_stack_version_count(self->stack); + if (version >= version_count) break; - bool merged = false; - for (StackVersion j = initial_version_count; j < version; j++) - { - if (ts_stack_merge(self->stack, j, version)) - { - merged = true; - break; - } - } - if (merged) - continue; + bool merged = false; + for (StackVersion j = initial_version_count; j < version; j++) { + if (ts_stack_merge(self->stack, j, version)) { + merged = true; + break; + } + } + if (merged) continue; - t_state_id state = ts_stack_state(self->stack, version); - bool has_shift_action = false; - self->reduce_actions.len = 0; + TSStateId state = ts_stack_state(self->stack, version); + bool has_shift_action = false; + array_clear(&self->reduce_actions); - t_symbol first_symbol, end_symbol; - if (lookahead_symbol != 0) - { - first_symbol = lookahead_symbol; - end_symbol = lookahead_symbol + 1; - } - else - { - first_symbol = 1; - end_symbol = self->language->token_count; - } + TSSymbol first_symbol, end_symbol; + if (lookahead_symbol != 0) { + first_symbol = lookahead_symbol; + end_symbol = lookahead_symbol + 1; + } else { + first_symbol = 1; + end_symbol = self->language->token_count; + } - for (t_symbol symbol = first_symbol; symbol < end_symbol; symbol++) - { - t_table_entry entry; - ts_language_table_entry(self->language, state, symbol, &entry); - for (t_u32 j = 0; j < entry.action_count; j++) - { - t_parse_actions action = entry.actions[j]; - switch (action.type) - { - case ActionTypeShift: - case ActionTypeRecover: - if (!action.shift.extra && !action.shift.repetition) - has_shift_action = true; - break; - case ActionTypeReduce: - if (action.reduce.child_count > 0) - ts_reduce_action_set_add( - &self->reduce_actions, - (t_reduce_action){ - .symbol = action.reduce.symbol, - .count = action.reduce.child_count, - .dynamic_precedence = - action.reduce.dynamic_precedence, - .production_id = action.reduce.production_id, - }); - break; - default: - break; - } - } - } + for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) { + TableEntry entry; + ts_language_table_entry(self->language, state, symbol, &entry); + for (uint32_t j = 0; j < entry.action_count; j++) { + TSParseAction action = entry.actions[j]; + switch (action.type) { + case TSParseActionTypeShift: + case TSParseActionTypeRecover: + if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; + break; + case TSParseActionTypeReduce: + if (action.reduce.child_count > 0) + ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) { + .symbol = action.reduce.symbol, + .count = action.reduce.child_count, + .dynamic_precedence = action.reduce.dynamic_precedence, + .production_id = action.reduce.production_id, + }); + break; + default: + break; + } + } + } - StackVersion reduction_version = STACK_VERSION_NONE; - for (t_u32 j = 0; j < self->reduce_actions.len; j++) - { - t_reduce_action action = self->reduce_actions.buffer[j]; + StackVersion reduction_version = STACK_VERSION_NONE; + for (uint32_t j = 0; j < self->reduce_actions.size; j++) { + ReduceAction action = self->reduce_actions.contents[j]; - reduction_version = ts_parser__reduce( - self, version, action.symbol, action.count, - action.dynamic_precedence, action.production_id, true, false); - } + reduction_version = ts_parser__reduce( + self, version, action.symbol, action.count, + action.dynamic_precedence, action.production_id, + true, false + ); + } - if (has_shift_action) - { - can_shift_lookahead_symbol = true; - } - else if (reduction_version != STACK_VERSION_NONE && - i < MAX_VERSION_COUNT) - { - ts_stack_renumber_version(self->stack, reduction_version, version); - continue; - } - else if (lookahead_symbol != 0) - { - ts_stack_remove_version(self->stack, version); - } + if (has_shift_action) { + can_shift_lookahead_symbol = true; + } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) { + ts_stack_renumber_version(self->stack, reduction_version, version); + continue; + } else if (lookahead_symbol != 0) { + ts_stack_remove_version(self->stack, version); + } - if (version == starting_version) - { - version = version_count; - } - else - { - version++; - } - } + if (version == starting_version) { + version = version_count; + } else { + version++; + } + } - return can_shift_lookahead_symbol; + return can_shift_lookahead_symbol; } -static bool ts_parser__recover_to_state(t_parser *self, StackVersion version, - unsigned depth, t_state_id goal_state) -{ - StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); - StackVersion previous_version = STACK_VERSION_NONE; +static bool ts_parser__recover_to_state( + TSParser *self, + StackVersion version, + unsigned depth, + TSStateId goal_state +) { + StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); + StackVersion previous_version = STACK_VERSION_NONE; - for (unsigned i = 0; i < pop.size; i++) - { - StackSlice slice = pop.contents[i]; + for (unsigned i = 0; i < pop.size; i++) { + StackSlice slice = pop.contents[i]; - if (slice.version == previous_version) - { - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - array_erase(&pop, i--); - continue; - } + if (slice.version == previous_version) { + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + array_erase(&pop, i--); + continue; + } - if (ts_stack_state(self->stack, slice.version) != goal_state) - { - ts_stack_halt(self->stack, slice.version); - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - array_erase(&pop, i--); - continue; - } + if (ts_stack_state(self->stack, slice.version) != goal_state) { + ts_stack_halt(self->stack, slice.version); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + array_erase(&pop, i--); + continue; + } - SubtreeArray error_trees = - ts_stack_pop_error(self->stack, slice.version); - if (error_trees.size > 0) - { - assert(error_trees.size == 1); - Subtree error_tree = error_trees.contents[0]; - t_u32 error_child_count = ts_subtree_child_count(error_tree); - if (error_child_count > 0) - { - array_splice(&slice.subtrees, 0, 0, error_child_count, - ts_subtree_children(error_tree)); - for (unsigned j = 0; j < error_child_count; j++) - { - ts_subtree_retain(slice.subtrees.contents[j]); - } - } - ts_subtree_array_delete(&self->tree_pool, &error_trees); - } + SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); + if (error_trees.size > 0) { + assert(error_trees.size == 1); + Subtree error_tree = error_trees.contents[0]; + uint32_t error_child_count = ts_subtree_child_count(error_tree); + if (error_child_count > 0) { + array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree)); + for (unsigned j = 0; j < error_child_count; j++) { + ts_subtree_retain(slice.subtrees.contents[j]); + } + } + ts_subtree_array_delete(&self->tree_pool, &error_trees); + } - ts_subtree_array_remove_trailing_extras(&slice.subtrees, - &self->trailing_extras); + ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); - if (slice.subtrees.size > 0) - { - Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, - self->language); - ts_stack_push(self->stack, slice.version, error, false, goal_state); - } - else - { - array_delete(&slice.subtrees); - } + if (slice.subtrees.size > 0) { + Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); + ts_stack_push(self->stack, slice.version, error, false, goal_state); + } else { + array_delete(&slice.subtrees); + } - for (unsigned j = 0; j < self->trailing_extras.size; j++) - { - Subtree tree = self->trailing_extras.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, goal_state); - } + for (unsigned j = 0; j < self->trailing_extras.size; j++) { + Subtree tree = self->trailing_extras.contents[j]; + ts_stack_push(self->stack, slice.version, tree, false, goal_state); + } - previous_version = slice.version; - } + previous_version = slice.version; + } - return previous_version != STACK_VERSION_NONE; + return previous_version != STACK_VERSION_NONE; } -static void ts_parser__recover(t_parser *self, StackVersion version, - Subtree lookahead) -{ - bool did_recover = false; - unsigned previous_version_count = ts_stack_version_count(self->stack); - t_parse_length position = ts_stack_position(self->stack, version); - StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned node_count_since_error = - ts_stack_node_count_since_error(self->stack, version); - unsigned current_error_cost = ts_stack_error_cost(self->stack, version); +static void ts_parser__recover( + TSParser *self, + StackVersion version, + Subtree lookahead +) { + bool did_recover = false; + unsigned previous_version_count = ts_stack_version_count(self->stack); + Length position = ts_stack_position(self->stack, version); + StackSummary *summary = ts_stack_get_summary(self->stack, version); + unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); + unsigned current_error_cost = ts_stack_error_cost(self->stack, version); - // When the parser is in the error state, there are two strategies for - // recovering with a given lookahead token: - // 1. Find a previous state on the stack in which that lookahead token would - // be valid. Then, - // create a new stack version that is in that state again. This entails - // popping all of the subtrees that have been pushed onto the stack since - // that previous state, and wrapping them in an ERROR node. - // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto - // the stack, and - // move on to the next lookahead token, remaining in the error state. - // - // First, try the strategy 1. Upon entering the error state, the parser - // recorded a summary of the previous parse states and their depths. Look at - // each state in the summary, to see if the current lookahead token would be - // valid in that state. - if (summary && !ts_subtree_is_error(lookahead)) - { - for (unsigned i = 0; i < summary->size; i++) - { - StackSummaryEntry entry = summary->contents[i]; + // When the parser is in the error state, there are two strategies for recovering with a + // given lookahead token: + // 1. Find a previous state on the stack in which that lookahead token would be valid. Then, + // create a new stack version that is in that state again. This entails popping all of the + // subtrees that have been pushed onto the stack since that previous state, and wrapping + // them in an ERROR node. + // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and + // move on to the next lookahead token, remaining in the error state. + // + // First, try the strategy 1. Upon entering the error state, the parser recorded a summary + // of the previous parse states and their depths. Look at each state in the summary, to see + // if the current lookahead token would be valid in that state. + if (summary && !ts_subtree_is_error(lookahead)) { + for (unsigned i = 0; i < summary->size; i++) { + StackSummaryEntry entry = summary->contents[i]; - if (entry.state == ERROR_STATE) - continue; - if (entry.position.bytes == position.bytes) - continue; - unsigned depth = entry.depth; - if (node_count_since_error > 0) - depth++; + if (entry.state == ERROR_STATE) continue; + if (entry.position.bytes == position.bytes) continue; + unsigned depth = entry.depth; + if (node_count_since_error > 0) depth++; - // Do not recover in ways that create redundant stack versions. - bool would_merge = false; - for (unsigned j = 0; j < previous_version_count; j++) - { - if (ts_stack_state(self->stack, j) == entry.state && - ts_stack_position(self->stack, j).bytes == position.bytes) - { - would_merge = true; - break; - } - } - if (would_merge) - continue; + // Do not recover in ways that create redundant stack versions. + bool would_merge = false; + for (unsigned j = 0; j < previous_version_count; j++) { + if ( + ts_stack_state(self->stack, j) == entry.state && + ts_stack_position(self->stack, j).bytes == position.bytes + ) { + would_merge = true; + break; + } + } + if (would_merge) continue; - // Do not recover if the result would clearly be worse than some - // existing stack version. - unsigned new_cost = - current_error_cost + entry.depth * ERROR_COST_PER_SKIPPED_TREE + - (position.bytes - entry.position.bytes) * - ERROR_COST_PER_SKIPPED_CHAR + - (position.extent.row - entry.position.extent.row) * - ERROR_COST_PER_SKIPPED_LINE; - if (ts_parser__better_version_exists(self, version, false, - new_cost)) - break; + // Do not recover if the result would clearly be worse than some existing stack version. + unsigned new_cost = + current_error_cost + + entry.depth * ERROR_COST_PER_SKIPPED_TREE + + (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + + (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; + if (ts_parser__better_version_exists(self, version, false, new_cost)) break; - // If the current lookahead token is valid in some previous state, - // recover to that state. Then stop looking for further recoveries. - if (ts_language_has_actions(self->language, entry.state, - ts_subtree_symbol(lookahead))) - { - if (ts_parser__recover_to_state(self, version, depth, - entry.state)) - { - did_recover = true; - LOG("recover_to_previous state:%u, depth:%u", entry.state, - depth); - LOG_STACK(); - break; - } - } - } - } + // If the current lookahead token is valid in some previous state, recover to that state. + // Then stop looking for further recoveries. + if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) { + if (ts_parser__recover_to_state(self, version, depth, entry.state)) { + did_recover = true; + LOG("recover_to_previous state:%u, depth:%u", entry.state, depth); + LOG_STACK(); + break; + } + } + } + } - // In the process of attempting to recover, some stack versions may have - // been created and subsequently halted. Remove those versions. - for (unsigned i = previous_version_count; - i < ts_stack_version_count(self->stack); i++) - { - if (!ts_stack_is_active(self->stack, i)) - { - ts_stack_remove_version(self->stack, i--); - } - } + // In the process of attempting to recover, some stack versions may have been created + // and subsequently halted. Remove those versions. + for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { + if (!ts_stack_is_active(self->stack, i)) { + ts_stack_remove_version(self->stack, i--); + } + } - // If strategy 1 succeeded, a new stack version will have been created which - // is able to handle the current lookahead token. Now, in addition, try - // strategy 2 described above: skip the current lookahead token by wrapping - // it in an ERROR node. + // If strategy 1 succeeded, a new stack version will have been created which is able to handle + // the current lookahead token. Now, in addition, try strategy 2 described above: skip the + // current lookahead token by wrapping it in an ERROR node. - // Don't pursue this additional strategy if there are already too many stack - // versions. - if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) - { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } + // Don't pursue this additional strategy if there are already too many stack versions. + if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } - if (did_recover && ts_subtree_has_external_scanner_state_change(lookahead)) - { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } + if ( + did_recover && + ts_subtree_has_external_scanner_state_change(lookahead) + ) { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } - // If the parser is still in the error state at the end of the file, just - // wrap everything in an ERROR node and terminate. - if (ts_subtree_is_eof(lookahead)) - { - LOG("recover_eof"); - SubtreeArray children = array_new(); - Subtree parent = - ts_subtree_new_error_node(&children, false, self->language); - ts_stack_push(self->stack, version, parent, false, 1); - ts_parser__accept(self, version, lookahead); - return; - } + // If the parser is still in the error state at the end of the file, just wrap everything + // in an ERROR node and terminate. + if (ts_subtree_is_eof(lookahead)) { + LOG("recover_eof"); + SubtreeArray children = array_new(); + Subtree parent = ts_subtree_new_error_node(&children, false, self->language); + ts_stack_push(self->stack, version, parent, false, 1); + ts_parser__accept(self, version, lookahead); + return; + } - // Do not recover if the result would clearly be worse than some existing - // stack version. - unsigned new_cost = - current_error_cost + ERROR_COST_PER_SKIPPED_TREE + - ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + - ts_subtree_total_size(lookahead).extent.row * - ERROR_COST_PER_SKIPPED_LINE; - if (ts_parser__better_version_exists(self, version, false, new_cost)) - { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } + // Do not recover if the result would clearly be worse than some existing stack version. + unsigned new_cost = + current_error_cost + ERROR_COST_PER_SKIPPED_TREE + + ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + + ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; + if (ts_parser__better_version_exists(self, version, false, new_cost)) { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } - // If the current lookahead token is an extra token, mark it as extra. This - // means it won't be counted in error cost calculations. - unsigned n; - const t_parse_actions *actions = ts_language_actions( - self->language, 1, ts_subtree_symbol(lookahead), &n); - if (n > 0 && actions[n - 1].type == ActionTypeShift && - actions[n - 1].shift.extra) - { - MutableSubtree mutable_lookahead = - ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_extra(&mutable_lookahead, true); - lookahead = ts_subtree_from_mut(mutable_lookahead); - } + // If the current lookahead token is an extra token, mark it as extra. This means it won't + // be counted in error cost calculations. + unsigned n; + const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { + MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_extra(&mutable_lookahead, true); + lookahead = ts_subtree_from_mut(mutable_lookahead); + } - // Wrap the lookahead token in an ERROR. - LOG("skip_token symbol:%s", TREE_NAME(lookahead)); - SubtreeArray children = array_new(); - array_reserve(&children, 1); - array_push(&children, lookahead); - MutableSubtree error_repeat = ts_subtree_new_node( - ts_builtin_sym_error_repeat, &children, 0, self->language); + // Wrap the lookahead token in an ERROR. + LOG("skip_token symbol:%s", TREE_NAME(lookahead)); + SubtreeArray children = array_new(); + array_reserve(&children, 1); + array_push(&children, lookahead); + MutableSubtree error_repeat = ts_subtree_new_node( + ts_builtin_sym_error_repeat, + &children, + 0, + self->language + ); - // If other tokens have already been skipped, so there is already an ERROR - // at the top of the stack, then pop that ERROR off the stack and wrap the - // two ERRORs together into one larger ERROR. - if (node_count_since_error > 0) - { - StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); + // If other tokens have already been skipped, so there is already an ERROR at the top of the + // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger + // ERROR. + if (node_count_since_error > 0) { + StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); - // TODO: Figure out how to make this condition occur. - // See https://github.com/atom/atom/issues/18450#issuecomment-439579778 - // If multiple stack versions have merged at this point, just pick one - // of the errors arbitrarily and discard the rest. - if (pop.size > 1) - { - for (unsigned i = 1; i < pop.size; i++) - { - ts_subtree_array_delete(&self->tree_pool, - &pop.contents[i].subtrees); - } - while (ts_stack_version_count(self->stack) > - pop.contents[0].version + 1) - { - ts_stack_remove_version(self->stack, - pop.contents[0].version + 1); - } - } + // TODO: Figure out how to make this condition occur. + // See https://github.com/atom/atom/issues/18450#issuecomment-439579778 + // If multiple stack versions have merged at this point, just pick one of the errors + // arbitrarily and discard the rest. + if (pop.size > 1) { + for (unsigned i = 1; i < pop.size; i++) { + ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees); + } + while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) { + ts_stack_remove_version(self->stack, pop.contents[0].version + 1); + } + } - ts_stack_renumber_version(self->stack, pop.contents[0].version, - version); - array_push(&pop.contents[0].subtrees, - ts_subtree_from_mut(error_repeat)); - error_repeat = - ts_subtree_new_node(ts_builtin_sym_error_repeat, - &pop.contents[0].subtrees, 0, self->language); - } + ts_stack_renumber_version(self->stack, pop.contents[0].version, version); + array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat)); + error_repeat = ts_subtree_new_node( + ts_builtin_sym_error_repeat, + &pop.contents[0].subtrees, + 0, + self->language + ); + } - // Push the new ERROR onto the stack. - ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), - false, ERROR_STATE); - if (ts_subtree_has_external_tokens(lookahead)) - { - ts_stack_set_last_external_token( - self->stack, version, ts_subtree_last_external_token(lookahead)); - } + // Push the new ERROR onto the stack. + ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE); + if (ts_subtree_has_external_tokens(lookahead)) { + ts_stack_set_last_external_token( + self->stack, version, ts_subtree_last_external_token(lookahead) + ); + } } -static void ts_parser__handle_error(t_parser *self, StackVersion version, - Subtree lookahead) -{ - t_u32 previous_version_count = ts_stack_version_count(self->stack); +static void ts_parser__handle_error( + TSParser *self, + StackVersion version, + Subtree lookahead +) { + uint32_t previous_version_count = ts_stack_version_count(self->stack); - // Perform any reductions that can happen in this state, regardless of the - // lookahead. After skipping one or more invalid tokens, the parser might - // find a token that would have allowed a reduction to take place. - ts_parser__do_all_potential_reductions(self, version, 0); - t_u32 version_count = ts_stack_version_count(self->stack); - t_parse_length position = ts_stack_position(self->stack, version); + // Perform any reductions that can happen in this state, regardless of the lookahead. After + // skipping one or more invalid tokens, the parser might find a token that would have allowed + // a reduction to take place. + ts_parser__do_all_potential_reductions(self, version, 0); + uint32_t version_count = ts_stack_version_count(self->stack); + Length position = ts_stack_position(self->stack, version); - // Push a discontinuity onto the stack. Merge all of the stack versions that - // were created in the previous step. - bool did_insert_missing_token = false; - for (StackVersion v = version; v < version_count;) - { - if (!did_insert_missing_token) - { - t_state_id state = ts_stack_state(self->stack, v); - for (t_symbol missing_symbol = 1; - missing_symbol < (t_u16)self->language->token_count; - missing_symbol++) - { - t_state_id state_after_missing_symbol = ts_language_next_state( - self->language, state, missing_symbol); - if (state_after_missing_symbol == 0 || - state_after_missing_symbol == state) - { - continue; - } + // Push a discontinuity onto the stack. Merge all of the stack versions that + // were created in the previous step. + bool did_insert_missing_token = false; + for (StackVersion v = version; v < version_count;) { + if (!did_insert_missing_token) { + TSStateId state = ts_stack_state(self->stack, v); + for ( + TSSymbol missing_symbol = 1; + missing_symbol < (uint16_t)self->language->token_count; + missing_symbol++ + ) { + TSStateId state_after_missing_symbol = ts_language_next_state( + self->language, state, missing_symbol + ); + if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) { + continue; + } - if (ts_language_has_reduce_action( - self->language, state_after_missing_symbol, - ts_subtree_leaf_symbol(lookahead))) - { - // In case the parser is currently outside of any included - // range, the lexer will snap to the beginning of the next - // included range. The missing token's padding must be - // assigned to position it within the next included range. - ts_lexer_reset(&self->lexer, position); - ts_lexer_mark_end(&self->lexer); - t_parse_length padding = - length_sub(self->lexer.token_end_position, position); - t_u32 lookahead_bytes = - ts_subtree_total_bytes(lookahead) + - ts_subtree_lookahead_bytes(lookahead); + if (ts_language_has_reduce_action( + self->language, + state_after_missing_symbol, + ts_subtree_leaf_symbol(lookahead) + )) { + // In case the parser is currently outside of any included range, the lexer will + // snap to the beginning of the next included range. The missing token's padding + // must be assigned to position it within the next included range. + ts_lexer_reset(&self->lexer, position); + ts_lexer_mark_end(&self->lexer); + Length padding = length_sub(self->lexer.token_end_position, position); + uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead); - StackVersion version_with_missing_tree = - ts_stack_copy_version(self->stack, v); - Subtree missing_tree = ts_subtree_new_missing_leaf( - &self->tree_pool, missing_symbol, padding, - lookahead_bytes, self->language); - ts_stack_push(self->stack, version_with_missing_tree, - missing_tree, false, - state_after_missing_symbol); + StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); + Subtree missing_tree = ts_subtree_new_missing_leaf( + &self->tree_pool, missing_symbol, + padding, lookahead_bytes, + self->language + ); + ts_stack_push( + self->stack, version_with_missing_tree, + missing_tree, false, + state_after_missing_symbol + ); - if (ts_parser__do_all_potential_reductions( - self, version_with_missing_tree, - ts_subtree_leaf_symbol(lookahead))) - { - LOG("recover_with_missing symbol:%s, state:%u", - SYM_NAME(missing_symbol), - ts_stack_state(self->stack, - version_with_missing_tree)); - did_insert_missing_token = true; - break; - } - } - } - } + if (ts_parser__do_all_potential_reductions( + self, version_with_missing_tree, + ts_subtree_leaf_symbol(lookahead) + )) { + LOG( + "recover_with_missing symbol:%s, state:%u", + SYM_NAME(missing_symbol), + ts_stack_state(self->stack, version_with_missing_tree) + ); + did_insert_missing_token = true; + break; + } + } + } + } - ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); - v = (v == version) ? previous_version_count : v + 1; - } + ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); + v = (v == version) ? previous_version_count : v + 1; + } - for (unsigned i = previous_version_count; i < version_count; i++) - { - bool did_merge = - ts_stack_merge(self->stack, version, previous_version_count); - assert(did_merge); - (void)did_merge; // fix warning/error with clang -Os - } + for (unsigned i = previous_version_count; i < version_count; i++) { + bool did_merge = ts_stack_merge(self->stack, version, previous_version_count); + assert(did_merge); + (void)did_merge; // fix warning/error with clang -Os + } - ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); + ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); - // Begin recovery with the current lookahead node, rather than waiting for - // the next turn of the parse loop. This ensures that the tree accounts for - // the current lookahead token's "lookahead bytes" value, which describes - // how far the lexer needed to look ahead beyond the content of the token in - // order to recognize it. - if (ts_subtree_child_count(lookahead) > 0) - { - ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, - &self->reusable_node); - } - ts_parser__recover(self, version, lookahead); + // Begin recovery with the current lookahead node, rather than waiting for the + // next turn of the parse loop. This ensures that the tree accounts for the + // current lookahead token's "lookahead bytes" value, which describes how far + // the lexer needed to look ahead beyond the content of the token in order to + // recognize it. + if (ts_subtree_child_count(lookahead) > 0) { + ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); + } + ts_parser__recover(self, version, lookahead); - LOG_STACK(); + LOG_STACK(); } -static bool ts_parser__advance(t_parser *self, StackVersion version, - bool allow_node_reuse) -{ - t_state_id state = ts_stack_state(self->stack, version); - t_u32 position = ts_stack_position(self->stack, version).bytes; - Subtree last_external_token = - ts_stack_last_external_token(self->stack, version); +static bool ts_parser__advance( + TSParser *self, + StackVersion version, + bool allow_node_reuse +) { + TSStateId state = ts_stack_state(self->stack, version); + uint32_t position = ts_stack_position(self->stack, version).bytes; + Subtree last_external_token = ts_stack_last_external_token(self->stack, version); - bool did_reuse = true; - Subtree lookahead = NULL_SUBTREE; - t_table_entry table_entry = {.action_count = 0}; + bool did_reuse = true; + Subtree lookahead = NULL_SUBTREE; + TableEntry table_entry = {.action_count = 0}; - // If possible, reuse a node from the previous syntax tree. - if (allow_node_reuse) - { - lookahead = ts_parser__reuse_node(self, version, &state, position, - last_external_token, &table_entry); - } + // If possible, reuse a node from the previous syntax tree. + if (allow_node_reuse) { + lookahead = ts_parser__reuse_node( + self, version, &state, position, last_external_token, &table_entry + ); + } - // If no node from the previous syntax tree could be reused, then try to - // reuse the token previously returned by the lexer. - if (!lookahead.ptr) - { - did_reuse = false; - lookahead = ts_parser__get_cached_token( - self, state, position, last_external_token, &table_entry); - } + // If no node from the previous syntax tree could be reused, then try to + // reuse the token previously returned by the lexer. + if (!lookahead.ptr) { + did_reuse = false; + lookahead = ts_parser__get_cached_token( + self, state, position, last_external_token, &table_entry + ); + } - bool needs_lex = !lookahead.ptr; - for (;;) - { - // Otherwise, re-run the lexer. - if (needs_lex) - { - needs_lex = false; - lookahead = ts_parser__lex(self, version, state); - if (self->has_scanner_error) - return false; + bool needs_lex = !lookahead.ptr; + for (;;) { + // Otherwise, re-run the lexer. + if (needs_lex) { + needs_lex = false; + lookahead = ts_parser__lex(self, version, state); + if (self->has_scanner_error) return false; - if (lookahead.ptr) - { - ts_parser__set_cached_token(self, position, last_external_token, - lookahead); - ts_language_table_entry(self->language, state, - ts_subtree_symbol(lookahead), - &table_entry); - } + if (lookahead.ptr) { + ts_parser__set_cached_token(self, position, last_external_token, lookahead); + ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); + } - // When parsing a non-terminal extra, a null lookahead indicates the - // end of the rule. The reduction is stored in the EOF table entry. - // After the reduction, the lexer needs to be run again. - else - { - ts_language_table_entry(self->language, state, - ts_builtin_sym_end, &table_entry); - } - } + // When parsing a non-terminal extra, a null lookahead indicates the + // end of the rule. The reduction is stored in the EOF table entry. + // After the reduction, the lexer needs to be run again. + else { + ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); + } + } - // If a cancellation flag or a timeout was provided, then check every - // time a fixed number of parse actions has been processed. + // If a cancellation flag or a timeout was provided, then check every + // time a fixed number of parse actions has been processed. + if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { + self->operation_count = 0; + } + if ( + self->operation_count == 0 && + ((self->cancellation_flag && atomic_load(self->cancellation_flag)) || + (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock))) + ) { + if (lookahead.ptr) { + ts_subtree_release(&self->tree_pool, lookahead); + } + return false; + } - // Process each parse action for the current lookahead token in - // the current state. If there are multiple actions, then this is - // an ambiguous state. REDUCE actions always create a new stack - // version, whereas SHIFT actions update the existing stack version - // and terminate this loop. - StackVersion last_reduction_version = STACK_VERSION_NONE; - for (t_u32 i = 0; i < table_entry.action_count; i++) - { - t_parse_actions action = table_entry.actions[i]; + // Process each parse action for the current lookahead token in + // the current state. If there are multiple actions, then this is + // an ambiguous state. REDUCE actions always create a new stack + // version, whereas SHIFT actions update the existing stack version + // and terminate this loop. + StackVersion last_reduction_version = STACK_VERSION_NONE; + for (uint32_t i = 0; i < table_entry.action_count; i++) { + TSParseAction action = table_entry.actions[i]; - switch (action.type) - { - case ActionTypeShift: { - if (action.shift.repetition) - break; - t_state_id next_state; - if (action.shift.extra) - { - next_state = state; - LOG("shift_extra"); - } - else - { - next_state = action.shift.state; - LOG("shift state:%u", next_state); - } + switch (action.type) { + case TSParseActionTypeShift: { + if (action.shift.repetition) break; + TSStateId next_state; + if (action.shift.extra) { + next_state = state; + LOG("shift_extra"); + } else { + next_state = action.shift.state; + LOG("shift state:%u", next_state); + } - if (ts_subtree_child_count(lookahead) > 0) - { - ts_parser__breakdown_lookahead(self, &lookahead, state, - &self->reusable_node); - next_state = ts_language_next_state( - self->language, state, ts_subtree_symbol(lookahead)); - } + if (ts_subtree_child_count(lookahead) > 0) { + ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node); + next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); + } - ts_parser__shift(self, version, next_state, lookahead, - action.shift.extra); - if (did_reuse) - reusable_node_advance(&self->reusable_node); - return true; - } + ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); + if (did_reuse) reusable_node_advance(&self->reusable_node); + return true; + } - case ActionTypeReduce: { - bool is_fragile = table_entry.action_count > 1; - bool end_of_non_terminal_extra = lookahead.ptr == NULL; - LOG("reduce sym:%s, child_count:%u", - SYM_NAME(action.reduce.symbol), action.reduce.child_count); - StackVersion reduction_version = ts_parser__reduce( - self, version, action.reduce.symbol, - action.reduce.child_count, action.reduce.dynamic_precedence, - action.reduce.production_id, is_fragile, - end_of_non_terminal_extra); - if (reduction_version != STACK_VERSION_NONE) - { - last_reduction_version = reduction_version; - } - break; - } + case TSParseActionTypeReduce: { + bool is_fragile = table_entry.action_count > 1; + bool end_of_non_terminal_extra = lookahead.ptr == NULL; + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); + StackVersion reduction_version = ts_parser__reduce( + self, version, action.reduce.symbol, action.reduce.child_count, + action.reduce.dynamic_precedence, action.reduce.production_id, + is_fragile, end_of_non_terminal_extra + ); + if (reduction_version != STACK_VERSION_NONE) { + last_reduction_version = reduction_version; + } + break; + } - case ActionTypeAccept: { - LOG("accept"); - ts_parser__accept(self, version, lookahead); - return true; - } + case TSParseActionTypeAccept: { + LOG("accept"); + ts_parser__accept(self, version, lookahead); + return true; + } - case ActionTypeRecover: { - if (ts_subtree_child_count(lookahead) > 0) - { - ts_parser__breakdown_lookahead( - self, &lookahead, ERROR_STATE, &self->reusable_node); - } + case TSParseActionTypeRecover: { + if (ts_subtree_child_count(lookahead) > 0) { + ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); + } - ts_parser__recover(self, version, lookahead); - if (did_reuse) - reusable_node_advance(&self->reusable_node); - return true; - } - } - } + ts_parser__recover(self, version, lookahead); + if (did_reuse) reusable_node_advance(&self->reusable_node); + return true; + } + } + } - // If a reduction was performed, then replace the current stack version - // with one of the stack versions created by a reduction, and continue - // processing this version of the stack with the same lookahead symbol. - if (last_reduction_version != STACK_VERSION_NONE) - { - ts_stack_renumber_version(self->stack, last_reduction_version, - version); - LOG_STACK(); - state = ts_stack_state(self->stack, version); + // If a reduction was performed, then replace the current stack version + // with one of the stack versions created by a reduction, and continue + // processing this version of the stack with the same lookahead symbol. + if (last_reduction_version != STACK_VERSION_NONE) { + ts_stack_renumber_version(self->stack, last_reduction_version, version); + LOG_STACK(); + state = ts_stack_state(self->stack, version); - // At the end of a non-terminal extra rule, the lexer will return a - // null subtree, because the parser needs to perform a fixed - // reduction regardless of the lookahead node. After performing that - // reduction, (and completing the non-terminal extra rule) run the - // lexer again based on the current parse state. - if (!lookahead.ptr) - { - needs_lex = true; - } - else - { - ts_language_table_entry(self->language, state, - ts_subtree_leaf_symbol(lookahead), - &table_entry); - } + // At the end of a non-terminal extra rule, the lexer will return a + // null subtree, because the parser needs to perform a fixed reduction + // regardless of the lookahead node. After performing that reduction, + // (and completing the non-terminal extra rule) run the lexer again based + // on the current parse state. + if (!lookahead.ptr) { + needs_lex = true; + } else { + ts_language_table_entry( + self->language, + state, + ts_subtree_leaf_symbol(lookahead), + &table_entry + ); + } - continue; - } + continue; + } - // A non-terminal extra rule was reduced and merged into an existing - // stack version. This version can be discarded. - if (!lookahead.ptr) - { - ts_stack_halt(self->stack, version); - return true; - } + // A non-terminal extra rule was reduced and merged into an existing + // stack version. This version can be discarded. + if (!lookahead.ptr) { + ts_stack_halt(self->stack, version); + return true; + } - // If there were no parse actions for the current lookahead token, then - // it is not valid in this state. If the current lookahead token is a - // keyword, then switch to treating it as the normal word token if that - // token is valid in this state. - if (ts_subtree_is_keyword(lookahead) && - ts_subtree_symbol(lookahead) != - self->language->keyword_capture_token) - { - ts_language_table_entry(self->language, state, - self->language->keyword_capture_token, - &table_entry); - if (table_entry.action_count > 0) - { - LOG("switch from_keyword:%s, to_word_token:%s", - TREE_NAME(lookahead), - SYM_NAME(self->language->keyword_capture_token)); + // If there were no parse actions for the current lookahead token, then + // it is not valid in this state. If the current lookahead token is a + // keyword, then switch to treating it as the normal word token if that + // token is valid in this state. + if ( + ts_subtree_is_keyword(lookahead) && + ts_subtree_symbol(lookahead) != self->language->keyword_capture_token + ) { + ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry); + if (table_entry.action_count > 0) { + LOG( + "switch from_keyword:%s, to_word_token:%s", + TREE_NAME(lookahead), + SYM_NAME(self->language->keyword_capture_token) + ); - MutableSubtree mutable_lookahead = - ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_symbol(&mutable_lookahead, - self->language->keyword_capture_token, - self->language); - lookahead = ts_subtree_from_mut(mutable_lookahead); - continue; - } - } + MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language); + lookahead = ts_subtree_from_mut(mutable_lookahead); + continue; + } + } - // If the current lookahead token is not valid and the parser is - // already in the error state, restart the error recovery process. - // TODO - can this be unified with the other `RECOVER` case above? - if (state == ERROR_STATE) - { - ts_parser__recover(self, version, lookahead); - return true; - } + // If the current lookahead token is not valid and the parser is + // already in the error state, restart the error recovery process. + // TODO - can this be unified with the other `RECOVER` case above? + if (state == ERROR_STATE) { + ts_parser__recover(self, version, lookahead); + return true; + } - // If the current lookahead token is not valid and the previous - // subtree on the stack was reused from an old tree, it isn't actually - // valid to reuse it. Remove it from the stack, and in its place, - // push each of its children. Then try again to process the current - // lookahead. - if (ts_parser__breakdown_top_of_stack(self, version)) - { - state = ts_stack_state(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - needs_lex = true; - continue; - } + // If the current lookahead token is not valid and the previous + // subtree on the stack was reused from an old tree, it isn't actually + // valid to reuse it. Remove it from the stack, and in its place, + // push each of its children. Then try again to process the current + // lookahead. + if (ts_parser__breakdown_top_of_stack(self, version)) { + state = ts_stack_state(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + needs_lex = true; + continue; + } - // At this point, the current lookahead token is definitely not valid - // for this parse stack version. Mark this version as paused and - // continue processing any other stack versions that might exist. If - // some other version advances successfully, then this version can - // simply be removed. But if all versions end up paused, then error - // recovery is needed. - LOG("detect_error"); - ts_stack_pause(self->stack, version, lookahead); - return true; - } + // At this point, the current lookahead token is definitely not valid + // for this parse stack version. Mark this version as paused and continue + // processing any other stack versions that might exist. If some other + // version advances successfully, then this version can simply be removed. + // But if all versions end up paused, then error recovery is needed. + LOG("detect_error"); + ts_stack_pause(self->stack, version, lookahead); + return true; + } } -static unsigned ts_parser__condense_stack(t_parser *self) -{ - bool made_changes = false; - unsigned min_error_cost = UINT_MAX; - for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) - { - // Prune any versions that have been marked for removal. - if (ts_stack_is_halted(self->stack, i)) - { - ts_stack_remove_version(self->stack, i); - i--; - continue; - } +static unsigned ts_parser__condense_stack(TSParser *self) { + bool made_changes = false; + unsigned min_error_cost = UINT_MAX; + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { + // Prune any versions that have been marked for removal. + if (ts_stack_is_halted(self->stack, i)) { + ts_stack_remove_version(self->stack, i); + i--; + continue; + } - // Keep track of the minimum error cost of any stack version so - // that it can be returned. - t_error_status status_i = ts_parser__version_status(self, i); - if (!status_i.is_in_error && status_i.cost < min_error_cost) - { - min_error_cost = status_i.cost; - } + // Keep track of the minimum error cost of any stack version so + // that it can be returned. + ErrorStatus status_i = ts_parser__version_status(self, i); + if (!status_i.is_in_error && status_i.cost < min_error_cost) { + min_error_cost = status_i.cost; + } - // Examine each pair of stack versions, removing any versions that - // are clearly worse than another version. Ensure that the versions - // are ordered from most promising to least promising. - for (StackVersion j = 0; j < i; j++) - { - t_error_status status_j = ts_parser__version_status(self, j); + // Examine each pair of stack versions, removing any versions that + // are clearly worse than another version. Ensure that the versions + // are ordered from most promising to least promising. + for (StackVersion j = 0; j < i; j++) { + ErrorStatus status_j = ts_parser__version_status(self, j); - switch (ts_parser__compare_versions(self, status_j, status_i)) - { - case ErrorComparisonTakeLeft: - made_changes = true; - ts_stack_remove_version(self->stack, i); - i--; - j = i; - break; + switch (ts_parser__compare_versions(self, status_j, status_i)) { + case ErrorComparisonTakeLeft: + made_changes = true; + ts_stack_remove_version(self->stack, i); + i--; + j = i; + break; - case ErrorComparisonPreferLeft: - case ErrorComparisonNone: - if (ts_stack_merge(self->stack, j, i)) - { - made_changes = true; - i--; - j = i; - } - break; + case ErrorComparisonPreferLeft: + case ErrorComparisonNone: + if (ts_stack_merge(self->stack, j, i)) { + made_changes = true; + i--; + j = i; + } + break; - case ErrorComparisonPreferRight: - made_changes = true; - if (ts_stack_merge(self->stack, j, i)) - { - i--; - j = i; - } - else - { - ts_stack_swap_versions(self->stack, i, j); - } - break; + case ErrorComparisonPreferRight: + made_changes = true; + if (ts_stack_merge(self->stack, j, i)) { + i--; + j = i; + } else { + ts_stack_swap_versions(self->stack, i, j); + } + break; - case ErrorComparisonTakeRight: - made_changes = true; - ts_stack_remove_version(self->stack, j); - i--; - j--; - break; - } - } - } + case ErrorComparisonTakeRight: + made_changes = true; + ts_stack_remove_version(self->stack, j); + i--; + j--; + break; + } + } + } - // Enforce a hard upper bound on the number of stack versions by - // discarding the least promising versions. - while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) - { - ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); - made_changes = true; - } + // Enforce a hard upper bound on the number of stack versions by + // discarding the least promising versions. + while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); + made_changes = true; + } - // If the best-performing stack version is currently paused, or all - // versions are paused, then resume the best paused version and begin - // the error recovery process. Otherwise, remove the paused versions. - if (ts_stack_version_count(self->stack) > 0) - { - bool has_unpaused_version = false; - for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; - i++) - { - if (ts_stack_is_paused(self->stack, i)) - { - if (!has_unpaused_version && - self->accept_count < MAX_VERSION_COUNT) - { - LOG("resume version:%u", i); - min_error_cost = ts_stack_error_cost(self->stack, i); - Subtree lookahead = ts_stack_resume(self->stack, i); - ts_parser__handle_error(self, i, lookahead); - has_unpaused_version = true; - } - else - { - ts_stack_remove_version(self->stack, i); - i--; - n--; - } - } - else - { - has_unpaused_version = true; - } - } - } + // If the best-performing stack version is currently paused, or all + // versions are paused, then resume the best paused version and begin + // the error recovery process. Otherwise, remove the paused versions. + if (ts_stack_version_count(self->stack) > 0) { + bool has_unpaused_version = false; + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { + if (ts_stack_is_paused(self->stack, i)) { + if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) { + LOG("resume version:%u", i); + min_error_cost = ts_stack_error_cost(self->stack, i); + Subtree lookahead = ts_stack_resume(self->stack, i); + ts_parser__handle_error(self, i, lookahead); + has_unpaused_version = true; + } else { + ts_stack_remove_version(self->stack, i); + i--; + n--; + } + } else { + has_unpaused_version = true; + } + } + } - if (made_changes) - { - LOG("condense"); - LOG_STACK(); - } + if (made_changes) { + LOG("condense"); + LOG_STACK(); + } - return min_error_cost; + return min_error_cost; } -static bool ts_parser_has_outstanding_parse(t_parser *self) -{ - return (self->external_scanner_payload || - ts_stack_state(self->stack, 0) != 1 || - ts_stack_node_count_since_error(self->stack, 0) != 0); +static bool ts_parser_has_outstanding_parse(TSParser *self) { + return ( + self->external_scanner_payload || + ts_stack_state(self->stack, 0) != 1 || + ts_stack_node_count_since_error(self->stack, 0) != 0 + ); } // Parser - Public -t_parser *ts_parser_new(void) -{ - t_parser *self = calloc(1, sizeof(t_parser)); - ts_lexer_init(&self->lexer); - self->reduce_actions = vec_reduce_action_new(4, NULL); - self->tree_pool = ts_subtree_pool_new(32); - self->stack = ts_stack_new(&self->tree_pool); - self->finished_tree = NULL_SUBTREE; - self->reusable_node = reusable_node_new(); - self->dot_graph_file = NULL; - self->cancellation_flag = NULL; - self->timeot_duration = 0; - self->language = NULL; - self->has_scanner_error = false; - self->external_scanner_payload = NULL; - self->end_clock = 0; - self->operation_count = 0; - self->old_tree = NULL_SUBTREE; - self->included_range_differences = vec_parser_range_new(0, NULL); - self->included_range_difference_index = 0; - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - return self; +TSParser *ts_parser_new(void) { + TSParser *self = ts_calloc(1, sizeof(TSParser)); + ts_lexer_init(&self->lexer); + array_init(&self->reduce_actions); + array_reserve(&self->reduce_actions, 4); + self->tree_pool = ts_subtree_pool_new(32); + self->stack = ts_stack_new(&self->tree_pool); + self->finished_tree = NULL_SUBTREE; + self->reusable_node = reusable_node_new(); + self->dot_graph_file = NULL; + self->cancellation_flag = NULL; + self->timeout_duration = 0; + self->language = NULL; + self->has_scanner_error = false; + self->external_scanner_payload = NULL; + self->end_clock = clock_null(); + self->operation_count = 0; + self->old_tree = NULL_SUBTREE; + self->included_range_differences = (TSRangeArray) array_new(); + self->included_range_difference_index = 0; + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + return self; } -void ts_parser_delete(t_parser *self) -{ - if (!self) - return; +void ts_parser_delete(TSParser *self) { + if (!self) return; - ts_parser_set_language(self, NULL); - ts_stack_delete(self->stack); - if (self->reduce_actions.buffer) - vec_reduce_action_free(self->reduce_actions); - if (self->included_range_differences.buffer) - array_delete(&self->included_range_differences); - if (self->old_tree.ptr) - { - ts_subtree_release(&self->tree_pool, self->old_tree); - self->old_tree = NULL_SUBTREE; - } - ts_lexer_delete(&self->lexer); - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - ts_subtree_pool_delete(&self->tree_pool); - reusable_node_delete(&self->reusable_node); - array_delete(&self->trailing_extras); - array_delete(&self->trailing_extras2); - array_delete(&self->scratch_trees); - free(self); + ts_parser_set_language(self, NULL); + ts_stack_delete(self->stack); + if (self->reduce_actions.contents) { + array_delete(&self->reduce_actions); + } + if (self->included_range_differences.contents) { + array_delete(&self->included_range_differences); + } + if (self->old_tree.ptr) { + ts_subtree_release(&self->tree_pool, self->old_tree); + self->old_tree = NULL_SUBTREE; + } + ts_lexer_delete(&self->lexer); + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + ts_subtree_pool_delete(&self->tree_pool); + reusable_node_delete(&self->reusable_node); + array_delete(&self->trailing_extras); + array_delete(&self->trailing_extras2); + array_delete(&self->scratch_trees); + ts_free(self); } -const t_language *ts_parser_language(const t_parser *self) -{ - return self->language; +const TSLanguage *ts_parser_language(const TSParser *self) { + return self->language; } -bool ts_parser_set_language(t_parser *self, const t_language *language) -{ - ts_parser_reset(self); - ts_language_delete(self->language); - self->language = NULL; +bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { + ts_parser_reset(self); + ts_language_delete(self->language); + self->language = NULL; - if (language) - { - if (language->version > TREE_SITTER_LANGUAGE_VERSION || - language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) - return false; - } + if (language) { + if ( + language->version > TREE_SITTER_LANGUAGE_VERSION || + language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + ) return false; - self->language = ts_language_copy(language); - return true; + + } + + self->language = ts_language_copy(language); + return true; } -t_parse_logger ts_parser_logger(const t_parser *self) -{ - return self->lexer.logger; +TSLogger ts_parser_logger(const TSParser *self) { + return self->lexer.logger; } -void ts_parser_set_logger(t_parser *self, t_parse_logger logger) -{ - self->lexer.logger = logger; +void ts_parser_set_logger(TSParser *self, TSLogger logger) { + self->lexer.logger = logger; } -void ts_parser_print_dot_graphs(t_parser *self, int fd) -{ - if (self->dot_graph_file) - { - fclose(self->dot_graph_file); - } +void ts_parser_print_dot_graphs(TSParser *self, int fd) { + if (self->dot_graph_file) { + fclose(self->dot_graph_file); + } - if (fd >= 0) - { -#ifdef _WIN32 - self->dot_graph_file = _fdopen(fd, "a"); -#else - self->dot_graph_file = fdopen(fd, "a"); -#endif - } - else - { - self->dot_graph_file = NULL; - } + if (fd >= 0) { + #ifdef _WIN32 + self->dot_graph_file = _fdopen(fd, "a"); + #else + self->dot_graph_file = fdopen(fd, "a"); + #endif + } else { + self->dot_graph_file = NULL; + } } -const size_t *ts_parser_cancellation_flag(const t_parser *self) -{ - return (const size_t *)self->cancellation_flag; +const size_t *ts_parser_cancellation_flag(const TSParser *self) { + return (const size_t *)self->cancellation_flag; } -void ts_parser_set_cancellation_flag(t_parser *self, const size_t *flag) -{ - self->cancellation_flag = (const volatile size_t *)flag; +void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) { + self->cancellation_flag = (const volatile size_t *)flag; } -t_u64 ts_parser_timeot_micros(const t_parser *self) -{ - (void)(self); - return 0; +uint64_t ts_parser_timeout_micros(const TSParser *self) { + return duration_to_micros(self->timeout_duration); } -void ts_parser_set_timeot_micros(t_parser *self, t_u64 timeot_micros) -{ - (void)(timeot_micros); - self->timeot_duration = 0; +void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { + self->timeout_duration = duration_from_micros(timeout_micros); } -bool ts_parser_set_included_ranges(t_parser *self, const t_parser_range *ranges, - t_u32 count) -{ - return ts_lexer_set_included_ranges(&self->lexer, ranges, count); +bool ts_parser_set_included_ranges( + TSParser *self, + const TSRange *ranges, + uint32_t count +) { + return ts_lexer_set_included_ranges(&self->lexer, ranges, count); } -const t_parser_range *ts_parser_included_ranges(const t_parser *self, - t_u32 *count) -{ - return ts_lexer_included_ranges(&self->lexer, count); +const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) { + return ts_lexer_included_ranges(&self->lexer, count); } -void ts_parser_reset(t_parser *self) -{ - ts_parser__external_scanner_destroy(self); +void ts_parser_reset(TSParser *self) { + ts_parser__external_scanner_destroy(self); - if (self->old_tree.ptr) - { - ts_subtree_release(&self->tree_pool, self->old_tree); - self->old_tree = NULL_SUBTREE; - } + if (self->old_tree.ptr) { + ts_subtree_release(&self->tree_pool, self->old_tree); + self->old_tree = NULL_SUBTREE; + } - reusable_node_clear(&self->reusable_node); - ts_lexer_reset(&self->lexer, length_zero()); - ts_stack_clear(self->stack); - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - if (self->finished_tree.ptr) - { - ts_subtree_release(&self->tree_pool, self->finished_tree); - self->finished_tree = NULL_SUBTREE; - } - self->accept_count = 0; - self->has_scanner_error = false; + reusable_node_clear(&self->reusable_node); + ts_lexer_reset(&self->lexer, length_zero()); + ts_stack_clear(self->stack); + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + if (self->finished_tree.ptr) { + ts_subtree_release(&self->tree_pool, self->finished_tree); + self->finished_tree = NULL_SUBTREE; + } + self->accept_count = 0; + self->has_scanner_error = false; } -t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, - t_parse_input input) -{ - t_parse_tree *result = NULL; - old_tree = NULL; - (void)(old_tree); - if (!self->language || !input.read) - return NULL; +TSTree *ts_parser_parse( + TSParser *self, + const TSTree *old_tree, + TSInput input +) { + TSTree *result = NULL; + if (!self->language || !input.read) return NULL; - ts_lexer_set_input(&self->lexer, input); - self->included_range_differences.len = 0; - self->included_range_difference_index = 0; + - if (ts_parser_has_outstanding_parse(self)) - { - LOG("resume_parsing"); - } - else - { - ts_parser__external_scanner_create(self); - if (self->has_scanner_error) - goto exit; + ts_lexer_set_input(&self->lexer, input); + array_clear(&self->included_range_differences); + self->included_range_difference_index = 0; - reusable_node_clear(&self->reusable_node); - LOG("new_parse"); - } + if (ts_parser_has_outstanding_parse(self)) { + LOG("resume_parsing"); + } else { + ts_parser__external_scanner_create(self); + if (self->has_scanner_error) goto exit; - self->operation_count = 0; + if (old_tree) { + ts_subtree_retain(old_tree->root); + self->old_tree = old_tree->root; + ts_range_array_get_changed_ranges( + old_tree->included_ranges, old_tree->included_range_count, + self->lexer.included_ranges, self->lexer.included_range_count, + &self->included_range_differences + ); + reusable_node_reset(&self->reusable_node, old_tree->root); + LOG("parse_after_edit"); + LOG_TREE(self->old_tree); + for (unsigned i = 0; i < self->included_range_differences.size; i++) { + TSRange *range = &self->included_range_differences.contents[i]; + LOG("different_included_range %u - %u", range->start_byte, range->end_byte); + } + } else { + reusable_node_clear(&self->reusable_node); + LOG("new_parse"); + } + } - t_u32 position = 0, last_position = 0, version_count = 0; - do - { - for (StackVersion version = 0; - version_count = ts_stack_version_count(self->stack), - version < version_count; - version++) - { - bool allow_node_reuse = version_count == 1; - while (ts_stack_is_active(self->stack, version)) - { - LOG("process version:%u, version_count:%u, state:%d, row:%u, " - "col:%u", - version, ts_stack_version_count(self->stack), - ts_stack_state(self->stack, version), - ts_stack_position(self->stack, version).extent.row, - ts_stack_position(self->stack, version).extent.column); + self->operation_count = 0; + if (self->timeout_duration) { + self->end_clock = clock_after(clock_now(), self->timeout_duration); + } else { + self->end_clock = clock_null(); + } - if (!ts_parser__advance(self, version, allow_node_reuse)) - { - if (self->has_scanner_error) - goto exit; - return NULL; - } + uint32_t position = 0, last_position = 0, version_count = 0; + do { + for ( + StackVersion version = 0; + version_count = ts_stack_version_count(self->stack), + version < version_count; + version++ + ) { + bool allow_node_reuse = version_count == 1; + while (ts_stack_is_active(self->stack, version)) { + LOG( + "process version:%u, version_count:%u, state:%d, row:%u, col:%u", + version, + ts_stack_version_count(self->stack), + ts_stack_state(self->stack, version), + ts_stack_position(self->stack, version).extent.row, + ts_stack_position(self->stack, version).extent.column + ); - LOG_STACK(); + if (!ts_parser__advance(self, version, allow_node_reuse)) { + if (self->has_scanner_error) goto exit; + return NULL; + } - position = ts_stack_position(self->stack, version).bytes; - if (position > last_position || - (version > 0 && position == last_position)) - { - last_position = position; - break; - } - } - } + LOG_STACK(); - // After advancing each version of the stack, re-sort the versions by - // their cost, removing any versions that are no longer worth pursuing. - unsigned min_error_cost = ts_parser__condense_stack(self); + position = ts_stack_position(self->stack, version).bytes; + if (position > last_position || (version > 0 && position == last_position)) { + last_position = position; + break; + } + } + } - // If there's already a finished parse tree that's better than any - // in-progress version, then terminate parsing. Clear the parse stack to - // remove any extra references to subtrees within the finished tree, - // ensuring that these subtrees can be safely mutated in-place for - // rebalancing. - if (self->finished_tree.ptr && - ts_subtree_error_cost(self->finished_tree) < min_error_cost) - { - ts_stack_clear(self->stack); - break; - } + // After advancing each version of the stack, re-sort the versions by their cost, + // removing any versions that are no longer worth pursuing. + unsigned min_error_cost = ts_parser__condense_stack(self); - while (self->included_range_difference_index < - self->included_range_differences.len) - { - t_parser_range *range = - &self->included_range_differences - .buffer[self->included_range_difference_index]; - if (range->end_byte <= position) - { - self->included_range_difference_index++; - } - else - { - break; - } - } - } while (version_count != 0); + // If there's already a finished parse tree that's better than any in-progress version, + // then terminate parsing. Clear the parse stack to remove any extra references to subtrees + // within the finished tree, ensuring that these subtrees can be safely mutated in-place + // for rebalancing. + if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) { + ts_stack_clear(self->stack); + break; + } - assert(self->finished_tree.ptr); - ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); - LOG("done"); - LOG_TREE(self->finished_tree); + while (self->included_range_difference_index < self->included_range_differences.size) { + TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index]; + if (range->end_byte <= position) { + self->included_range_difference_index++; + } else { + break; + } + } + } while (version_count != 0); - result = ts_tree_new(self->finished_tree, self->language, - self->lexer.included_ranges, - self->lexer.included_range_count); - self->finished_tree = NULL_SUBTREE; + assert(self->finished_tree.ptr); + ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); + LOG("done"); + LOG_TREE(self->finished_tree); + + result = ts_tree_new( + self->finished_tree, + self->language, + self->lexer.included_ranges, + self->lexer.included_range_count + ); + self->finished_tree = NULL_SUBTREE; exit: - ts_parser_reset(self); - return result; + ts_parser_reset(self); + return result; } -t_parse_tree *ts_parser_parse_string(t_parser *self, - const t_parse_tree *old_tree, - const char *string, t_u32 length) -{ - return ts_parser_parse_string_encoding(self, old_tree, string, length, - InputEncoding8); +TSTree *ts_parser_parse_string( + TSParser *self, + const TSTree *old_tree, + const char *string, + uint32_t length +) { + return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8); } -t_parse_tree *ts_parser_parse_string_encoding(t_parser *self, - const t_parse_tree *old_tree, - const char *string, t_u32 length, - t_input_encoding encoding) -{ - t_string_input input = {string, length}; - return ts_parser_parse(self, old_tree, - (t_parse_input){ - &input, - ts_string_inpt_read, - encoding, - }); +TSTree *ts_parser_parse_string_encoding( + TSParser *self, + const TSTree *old_tree, + const char *string, + uint32_t length, + TSInputEncoding encoding +) { + TSStringInput input = {string, length}; + return ts_parser_parse(self, old_tree, (TSInput) { + &input, + ts_string_input_read, + encoding, + }); } #undef LOG diff --git a/parser/src/parser.h b/parser/src/parser.h new file mode 100644 index 00000000..17f0e94b --- /dev/null +++ b/parser/src/parser.h @@ -0,0 +1,265 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +typedef struct { + uint16_t index; + uint16_t length; +} TSFieldMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + +struct TSLanguage { + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; +}; + +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + +/* + * Lexer Macros + */ + +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + UNUSED \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value) \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value), \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_ diff --git a/parser/src/point.h b/parser/src/point.h new file mode 100644 index 00000000..1c8b7133 --- /dev/null +++ b/parser/src/point.h @@ -0,0 +1,62 @@ +#ifndef TREE_SITTER_POINT_H_ +#define TREE_SITTER_POINT_H_ + +#include "./api.h" + +#define POINT_ZERO ((TSPoint) {0, 0}) +#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX}) + +static inline TSPoint point__new(unsigned row, unsigned column) { + TSPoint result = {row, column}; + return result; +} + +static inline TSPoint point_add(TSPoint a, TSPoint b) { + if (b.row > 0) + return point__new(a.row + b.row, b.column); + else + return point__new(a.row, a.column + b.column); +} + +static inline TSPoint point_sub(TSPoint a, TSPoint b) { + if (a.row > b.row) + return point__new(a.row - b.row, a.column); + else + return point__new(0, a.column - b.column); +} + +static inline bool point_lte(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column <= b.column); +} + +static inline bool point_lt(TSPoint a, TSPoint b) { + return (a.row < b.row) || (a.row == b.row && a.column < b.column); +} + +static inline bool point_gt(TSPoint a, TSPoint b) { + return (a.row > b.row) || (a.row == b.row && a.column > b.column); +} + +static inline bool point_gte(TSPoint a, TSPoint b) { + return (a.row > b.row) || (a.row == b.row && a.column >= b.column); +} + +static inline bool point_eq(TSPoint a, TSPoint b) { + return a.row == b.row && a.column == b.column; +} + +static inline TSPoint point_min(TSPoint a, TSPoint b) { + if (a.row < b.row || (a.row == b.row && a.column < b.column)) + return a; + else + return b; +} + +static inline TSPoint point_max(TSPoint a, TSPoint b) { + if (a.row > b.row || (a.row == b.row && a.column > b.column)) + return a; + else + return b; +} + +#endif diff --git a/parser/src/query.c b/parser/src/query.c new file mode 100644 index 00000000..c75700b8 --- /dev/null +++ b/parser/src/query.c @@ -0,0 +1,4134 @@ +#include "./api.h" +#include "./alloc.h" +#include "./array.h" +#include "./language.h" +#include "./point.h" +#include "./tree_cursor.h" +// #include "./unicode.h" +#include + +// #define DEBUG_ANALYZE_QUERY +// #define DEBUG_EXECUTE_QUERY + +#define MAX_STEP_CAPTURE_COUNT 3 +#define MAX_NEGATED_FIELD_COUNT 8 +#define MAX_STATE_PREDECESSOR_COUNT 256 +#define MAX_ANALYSIS_STATE_DEPTH 8 +#define MAX_ANALYSIS_ITERATION_COUNT 256 + +/* + * Stream - A sequence of unicode characters derived from a UTF8 string. + * This struct is used in parsing queries from S-expressions. + */ +typedef struct { + const char *input; + const char *start; + const char *end; + int32_t next; + uint8_t next_size; +} Stream; + +/* + * QueryStep - A step in the process of matching a query. Each node within + * a query S-expression corresponds to one of these steps. An entire pattern + * is represented as a sequence of these steps. The basic properties of a + * node are represented by these fields: + * - `symbol` - The grammar symbol to match. A zero value represents the + * wildcard symbol, '_'. + * - `field` - The field name to match. A zero value means that a field name + * was not specified. + * - `capture_ids` - An array of integers representing the names of captures + * associated with this node in the pattern, terminated by a `NONE` value. + * - `depth` - The depth where this node occurs in the pattern. The root node + * of the pattern has depth zero. + * - `negated_field_list_id` - An id representing a set of fields that must + * not be present on a node matching this step. + * + * Steps have some additional fields in order to handle the `.` (or "anchor") operator, + * which forbids additional child nodes: + * - `is_immediate` - Indicates that the node matching this step cannot be preceded + * by other sibling nodes that weren't specified in the pattern. + * - `is_last_child` - Indicates that the node matching this step cannot have any + * subsequent named siblings. + * + * For simple patterns, steps are matched in sequential order. But in order to + * handle alternative/repeated/optional sub-patterns, query steps are not always + * structured as a linear sequence; they sometimes need to split and merge. This + * is done using the following fields: + * - `alternative_index` - The index of a different query step that serves as + * an alternative to this step. A `NONE` value represents no alternative. + * When a query state reaches a step with an alternative index, the state + * is duplicated, with one copy remaining at the original step, and one copy + * moving to the alternative step. The alternative may have its own alternative + * step, so this splitting is an iterative process. + * - `is_dead_end` - Indicates that this state cannot be passed directly, and + * exists only in order to redirect to an alternative index, with no splitting. + * - `is_pass_through` - Indicates that state has no matching logic of its own, + * and exists only to split a state. One copy of the state advances immediately + * to the next step, and one moves to the alternative step. + * - `alternative_is_immediate` - Indicates that this step's alternative step + * should be treated as if `is_immediate` is true. + * + * Steps also store some derived state that summarizes how they relate to other + * steps within the same pattern. This is used to optimize the matching process: + * - `contains_captures` - Indicates that this step or one of its child steps + * has a non-empty `capture_ids` list. + * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then + * it and all of its subsequent sibling steps within the same parent pattern + * are guaranteed to match. + * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but + * for the entire top-level pattern. When iterating through a query's + * captures using `ts_query_cursor_next_capture`, this field is used to + * detect that a capture can safely be returned from a match that has not + * even completed yet. + */ +typedef struct { + TSSymbol symbol; + TSSymbol supertype_symbol; + TSFieldId field; + uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; + uint16_t depth; + uint16_t alternative_index; + uint16_t negated_field_list_id; + bool is_named: 1; + bool is_immediate: 1; + bool is_last_child: 1; + bool is_pass_through: 1; + bool is_dead_end: 1; + bool alternative_is_immediate: 1; + bool contains_captures: 1; + bool root_pattern_guaranteed: 1; + bool parent_pattern_guaranteed: 1; +} QueryStep; + +/* + * Slice - A slice of an external array. Within a query, capture names, + * literal string values, and predicate step information are stored in three + * contiguous arrays. Individual captures, string values, and predicates are + * represented as slices of these three arrays. + */ +typedef struct { + uint32_t offset; + uint32_t length; +} Slice; + +/* + * SymbolTable - a two-way mapping of strings to ids. + */ +typedef struct { + Array(char) characters; + Array(Slice) slices; +} SymbolTable; + +/** + * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. + */ +typedef Array(uint8_t) CaptureQuantifiers; + +/* + * PatternEntry - Information about the starting point for matching a particular + * pattern. These entries are stored in a 'pattern map' - a sorted array that + * makes it possible to efficiently lookup patterns based on the symbol for their + * first step. The entry consists of the following fields: + * - `pattern_index` - the index of the pattern within the query + * - `step_index` - the index of the pattern's first step in the shared `steps` array + * - `is_rooted` - whether or not the pattern has a single root node. This property + * affects decisions about whether or not to start the pattern for nodes outside + * of a QueryCursor's range restriction. + */ +typedef struct { + uint16_t step_index; + uint16_t pattern_index; + bool is_rooted; +} PatternEntry; + +typedef struct { + Slice steps; + Slice predicate_steps; + uint32_t start_byte; + bool is_non_local; +} QueryPattern; + +typedef struct { + uint32_t byte_offset; + uint16_t step_index; +} StepOffset; + +/* + * QueryState - The state of an in-progress match of a particular pattern + * in a query. While executing, a `TSQueryCursor` must keep track of a number + * of possible in-progress matches. Each of those possible matches is + * represented as one of these states. Fields: + * - `id` - A numeric id that is exposed to the public API. This allows the + * caller to remove a given match, preventing any more of its captures + * from being returned. + * - `start_depth` - The depth in the tree where the first step of the state's + * pattern was matched. + * - `pattern_index` - The pattern that the state is matching. + * - `consumed_capture_count` - The number of captures from this match that + * have already been returned. + * - `capture_list_id` - A numeric id that can be used to retrieve the state's + * list of captures from the `CaptureListPool`. + * - `seeking_immediate_match` - A flag that indicates that the state's next + * step must be matched by the very next sibling. This is used when + * processing repetitions. + * - `has_in_progress_alternatives` - A flag that indicates that there is are + * other states that have the same captures as this state, but are at + * different steps in their pattern. This means that in order to obey the + * 'longest-match' rule, this state should not be returned as a match until + * it is clear that there can be no other alternative match with more captures. + */ +typedef struct { + uint32_t id; + uint32_t capture_list_id; + uint16_t start_depth; + uint16_t step_index; + uint16_t pattern_index; + uint16_t consumed_capture_count: 12; + bool seeking_immediate_match: 1; + bool has_in_progress_alternatives: 1; + bool dead: 1; + bool needs_parent: 1; +} QueryState; + +typedef Array(TSQueryCapture) CaptureList; + +/* + * CaptureListPool - A collection of *lists* of captures. Each query state needs + * to maintain its own list of captures. To avoid repeated allocations, this struct + * maintains a fixed set of capture lists, and keeps track of which ones are + * currently in use by a query state. + */ +typedef struct { + Array(CaptureList) list; + CaptureList empty_list; + // The maximum number of capture lists that we are allowed to allocate. We + // never allow `list` to allocate more entries than this, dropping pending + // matches if needed to stay under the limit. + uint32_t max_capture_list_count; + // The number of capture lists allocated in `list` that are not currently in + // use. We reuse those existing-but-unused capture lists before trying to + // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture + // list's length to indicate that it's not in use. + uint32_t free_capture_list_count; +} CaptureListPool; + +/* + * AnalysisState - The state needed for walking the parse table when analyzing + * a query pattern, to determine at which steps the pattern might fail to match. + */ +typedef struct { + TSStateId parse_state; + TSSymbol parent_symbol; + uint16_t child_index; + TSFieldId field_id: 15; + bool done: 1; +} AnalysisStateEntry; + +typedef struct { + AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; + uint16_t depth; + uint16_t step_index; + TSSymbol root_symbol; +} AnalysisState; + +typedef Array(AnalysisState *) AnalysisStateSet; + +typedef struct { + AnalysisStateSet states; + AnalysisStateSet next_states; + AnalysisStateSet deeper_states; + AnalysisStateSet state_pool; + Array(uint16_t) final_step_indices; + Array(TSSymbol) finished_parent_symbols; + bool did_abort; +} QueryAnalysis; + +/* + * AnalysisSubgraph - A subset of the states in the parse table that are used + * in constructing nodes with a certain symbol. Each state is accompanied by + * some information about the possible node that could be produced in + * downstream states. + */ +typedef struct { + TSStateId state; + uint16_t production_id; + uint8_t child_index: 7; + bool done: 1; +} AnalysisSubgraphNode; + +typedef struct { + TSSymbol symbol; + Array(TSStateId) start_states; + Array(AnalysisSubgraphNode) nodes; +} AnalysisSubgraph; + +typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; + +/* + * StatePredecessorMap - A map that stores the predecessors of each parse state. + * This is used during query analysis to determine which parse states can lead + * to which reduce actions. + */ +typedef struct { + TSStateId *contents; +} StatePredecessorMap; + +/* + * TSQuery - A tree query, compiled from a string of S-expressions. The query + * itself is immutable. The mutable state used in the process of executing the + * query is stored in a `TSQueryCursor`. + */ +struct TSQuery { + SymbolTable captures; + SymbolTable predicate_values; + Array(CaptureQuantifiers) capture_quantifiers; + Array(QueryStep) steps; + Array(PatternEntry) pattern_map; + Array(TSQueryPredicateStep) predicate_steps; + Array(QueryPattern) patterns; + Array(StepOffset) step_offsets; + Array(TSFieldId) negated_fields; + Array(char) string_buffer; + Array(TSSymbol) repeat_symbols_with_rootless_patterns; + const TSLanguage *language; + uint16_t wildcard_root_pattern_count; +}; + +/* + * TSQueryCursor - A stateful struct used to execute a query on a tree. + */ +struct TSQueryCursor { + const TSQuery *query; + TSTreeCursor cursor; + Array(QueryState) states; + Array(QueryState) finished_states; + CaptureListPool capture_list_pool; + uint32_t depth; + uint32_t max_start_depth; + uint32_t start_byte; + uint32_t end_byte; + TSPoint start_point; + TSPoint end_point; + uint32_t next_state_id; + bool on_visible_node; + bool ascending; + bool halted; + bool did_exceed_match_limit; +}; + +static const TSQueryError PARENT_DONE = -1; +static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; +static const uint16_t NONE = UINT16_MAX; +static const TSSymbol WILDCARD_SYMBOL = 0; + +/********** + * Stream + **********/ + +// Advance to the next unicode code point in the stream. +static bool stream_advance(Stream *self) { + self->input += self->next_size; + if (self->input < self->end) { + uint32_t size = ts_decode_ascii( + (const uint8_t *)self->input, + (uint32_t)(self->end - self->input), + &self->next + ); + if (size > 0) { + self->next_size = size; + return true; + } + } else { + self->next_size = 0; + self->next = '\0'; + } + return false; +} + +// Reset the stream to the given input position, represented as a pointer +// into the input string. +static void stream_reset(Stream *self, const char *input) { + self->input = input; + self->next_size = 0; + stream_advance(self); +} + +static Stream stream_new(const char *string, uint32_t length) { + Stream self = { + .next = 0, + .input = string, + .start = string, + .end = string + length, + }; + stream_advance(&self); + return self; +} + +static void stream_skip_whitespace(Stream *self) { + for (;;) { + if (iswspace(self->next)) { + stream_advance(self); + } else if (self->next == ';') { + // skip over comments + stream_advance(self); + while (self->next && self->next != '\n') { + if (!stream_advance(self)) break; + } + } else { + break; + } + } +} + +static bool stream_is_ident_start(Stream *self) { + return iswalnum(self->next) || self->next == '_' || self->next == '-'; +} + +static void stream_scan_identifier(Stream *stream) { + do { + stream_advance(stream); + } while ( + iswalnum(stream->next) || + stream->next == '_' || + stream->next == '-' || + stream->next == '.' || + stream->next == '?' || + stream->next == '!' + ); +} + +static uint32_t stream_offset(Stream *self) { + return (uint32_t)(self->input - self->start); +} + +/****************** + * CaptureListPool + ******************/ + +static CaptureListPool capture_list_pool_new(void) { + return (CaptureListPool) { + .list = array_new(), + .empty_list = array_new(), + .max_capture_list_count = UINT32_MAX, + .free_capture_list_count = 0, + }; +} + +static void capture_list_pool_reset(CaptureListPool *self) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { + // This invalid size means that the list is not in use. + self->list.contents[i].size = UINT32_MAX; + } + self->free_capture_list_count = self->list.size; +} + +static void capture_list_pool_delete(CaptureListPool *self) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { + array_delete(&self->list.contents[i]); + } + array_delete(&self->list); +} + +static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { + if (id >= self->list.size) return &self->empty_list; + return &self->list.contents[id]; +} + +static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { + assert(id < self->list.size); + return &self->list.contents[id]; +} + +static bool capture_list_pool_is_empty(const CaptureListPool *self) { + // The capture list pool is empty if all allocated lists are in use, and we + // have reached the maximum allowed number of allocated lists. + return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; +} + +static uint16_t capture_list_pool_acquire(CaptureListPool *self) { + // First see if any already allocated capture list is currently unused. + if (self->free_capture_list_count > 0) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { + if (self->list.contents[i].size == UINT32_MAX) { + array_clear(&self->list.contents[i]); + self->free_capture_list_count--; + return i; + } + } + } + + // Otherwise allocate and initialize a new capture list, as long as that + // doesn't put us over the requested maximum. + uint32_t i = self->list.size; + if (i >= self->max_capture_list_count) { + return NONE; + } + CaptureList list; + array_init(&list); + array_push(&self->list, list); + return i; +} + +static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { + if (id >= self->list.size) return; + self->list.contents[id].size = UINT32_MAX; + self->free_capture_list_count++; +} + +/************** + * Quantifiers + **************/ + +static TSQuantifier quantifier_mul( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierZeroOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierOne: + return right; + case TSQuantifierOneOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +static TSQuantifier quantifier_join( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case TSQuantifierZero: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + break; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + break; + }; + break; + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + return TSQuantifierOne; + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOneOrMore: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +static TSQuantifier quantifier_add( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case TSQuantifierZero: + return right; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierZeroOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZeroOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierOne; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +// Create new capture quantifiers structure +static CaptureQuantifiers capture_quantifiers_new(void) { + return (CaptureQuantifiers) array_new(); +} + +// Delete capture quantifiers structure +static void capture_quantifiers_delete( + CaptureQuantifiers *self +) { + array_delete(self); +} + +// Clear capture quantifiers structure +static void capture_quantifiers_clear( + CaptureQuantifiers *self +) { + array_clear(self); +} + +// Replace capture quantifiers with the given quantifiers +static void capture_quantifiers_replace( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + array_clear(self); + array_push_all(self, quantifiers); +} + +// Return capture quantifier for the given capture id +static TSQuantifier capture_quantifier_for_id( + const CaptureQuantifiers *self, + uint16_t id +) { + return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id); +} + +// Add the given quantifier to the current value for id +static void capture_quantifiers_add_for_id( + CaptureQuantifiers *self, + uint16_t id, + TSQuantifier quantifier +) { + if (self->size <= id) { + array_grow_by(self, id + 1 - self->size); + } + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier); +} + +// Point-wise add the given quantifiers to the current values +static void capture_quantifiers_add_all( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + if (self->size < quantifiers->size) { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); + } +} + +// Join the given quantifier with the current values +static void capture_quantifiers_mul( + CaptureQuantifiers *self, + TSQuantifier quantifier +) { + for (uint16_t id = 0; id < (uint16_t)self->size; id++) { + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); + } +} + +// Point-wise join the quantifiers from a list of alternatives with the current values +static void capture_quantifiers_join_all( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + if (self->size < quantifiers->size) { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint32_t id = 0; id < quantifiers->size; id++) { + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); + } + for (uint32_t id = quantifiers->size; id < self->size; id++) { + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero); + } +} + +/************** + * SymbolTable + **************/ + +static SymbolTable symbol_table_new(void) { + return (SymbolTable) { + .characters = array_new(), + .slices = array_new(), + }; +} + +static void symbol_table_delete(SymbolTable *self) { + array_delete(&self->characters); + array_delete(&self->slices); +} + +static int symbol_table_id_for_name( + const SymbolTable *self, + const char *name, + uint32_t length +) { + for (unsigned i = 0; i < self->slices.size; i++) { + Slice slice = self->slices.contents[i]; + if ( + slice.length == length && + !strncmp(&self->characters.contents[slice.offset], name, length) + ) return i; + } + return -1; +} + +static const char *symbol_table_name_for_id( + const SymbolTable *self, + uint16_t id, + uint32_t *length +) { + Slice slice = self->slices.contents[id]; + *length = slice.length; + return &self->characters.contents[slice.offset]; +} + +static uint16_t symbol_table_insert_name( + SymbolTable *self, + const char *name, + uint32_t length +) { + int id = symbol_table_id_for_name(self, name, length); + if (id >= 0) return (uint16_t)id; + Slice slice = { + .offset = self->characters.size, + .length = length, + }; + array_grow_by(&self->characters, length + 1); + memcpy(&self->characters.contents[slice.offset], name, length); + self->characters.contents[self->characters.size - 1] = 0; + array_push(&self->slices, slice); + return self->slices.size - 1; +} + +/************ + * QueryStep + ************/ + +static QueryStep query_step__new( + TSSymbol symbol, + uint16_t depth, + bool is_immediate +) { + QueryStep step = { + .symbol = symbol, + .depth = depth, + .field = 0, + .alternative_index = NONE, + .negated_field_list_id = 0, + .contains_captures = false, + .is_last_child = false, + .is_named = false, + .is_pass_through = false, + .is_dead_end = false, + .root_pattern_guaranteed = false, + .is_immediate = is_immediate, + .alternative_is_immediate = false, + }; + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + step.capture_ids[i] = NONE; + } + return step; +} + +static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + if (self->capture_ids[i] == NONE) { + self->capture_ids[i] = capture_id; + break; + } + } +} + +static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + if (self->capture_ids[i] == capture_id) { + self->capture_ids[i] = NONE; + while (i + 1 < MAX_STEP_CAPTURE_COUNT) { + if (self->capture_ids[i + 1] == NONE) break; + self->capture_ids[i] = self->capture_ids[i + 1]; + self->capture_ids[i + 1] = NONE; + i++; + } + break; + } + } +} + +/********************** + * StatePredecessorMap + **********************/ + +static inline StatePredecessorMap state_predecessor_map_new( + const TSLanguage *language +) { + return (StatePredecessorMap) { + .contents = ts_calloc( + (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), + sizeof(TSStateId) + ), + }; +} + +static inline void state_predecessor_map_delete(StatePredecessorMap *self) { + ts_free(self->contents); +} + +static inline void state_predecessor_map_add( + StatePredecessorMap *self, + TSStateId state, + TSStateId predecessor +) { + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); + TSStateId *count = &self->contents[index]; + if ( + *count == 0 || + (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) + ) { + (*count)++; + self->contents[index + *count] = predecessor; + } +} + +static inline const TSStateId *state_predecessor_map_get( + const StatePredecessorMap *self, + TSStateId state, + unsigned *count +) { + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); + *count = self->contents[index]; + return &self->contents[index + 1]; +} + +/**************** + * AnalysisState + ****************/ + +static unsigned analysis_state__recursion_depth(const AnalysisState *self) { + unsigned result = 0; + for (unsigned i = 0; i < self->depth; i++) { + TSSymbol symbol = self->stack[i].parent_symbol; + for (unsigned j = 0; j < i; j++) { + if (self->stack[j].parent_symbol == symbol) { + result++; + break; + } + } + } + return result; +} + +static inline int analysis_state__compare_position( + AnalysisState *const *self, + AnalysisState *const *other +) { + for (unsigned i = 0; i < (*self)->depth; i++) { + if (i >= (*other)->depth) return -1; + if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1; + if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1; + } + if ((*self)->depth < (*other)->depth) return 1; + if ((*self)->step_index < (*other)->step_index) return -1; + if ((*self)->step_index > (*other)->step_index) return 1; + return 0; +} + +static inline int analysis_state__compare( + AnalysisState *const *self, + AnalysisState *const *other +) { + int result = analysis_state__compare_position(self, other); + if (result != 0) return result; + for (unsigned i = 0; i < (*self)->depth; i++) { + if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1; + if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1; + if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1; + if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1; + if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1; + if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1; + } + return 0; +} + +static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { + if (self->depth == 0) { + return &self->stack[0]; + } + return &self->stack[self->depth - 1]; +} + +static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) { + for (unsigned i = 0; i < self->depth; i++) { + if (self->stack[i].parent_symbol == symbol) return true; + } + return false; +} + +/****************** + * AnalysisStateSet + ******************/ + +// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by +// cloning one from scratch. +static inline AnalysisState *analysis_state_pool__clone_or_reuse( + AnalysisStateSet *self, + AnalysisState *borrowed_item +) { + AnalysisState *new_item; + if (self->size) { + new_item = array_pop(self); + } else { + new_item = ts_malloc(sizeof(AnalysisState)); + } + *new_item = *borrowed_item; + return new_item; +} + +// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this +// set. The set does not contain duplicates, so if the item is already present, it will not be +// inserted, and no clone will be made. +// +// The caller retains ownership of the passed-in memory. However, the clone that is created by this +// function will be managed by the state set. +static inline void analysis_state_set__insert_sorted( + AnalysisStateSet *self, + AnalysisStateSet *pool, + AnalysisState *borrowed_item +) { + unsigned index, exists; + array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); + if (!exists) { + AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); + array_insert(self, index, new_item); + } +} + +// Inserts a clone of the passed-in item at the end position of this list. +// +// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function +// `analysis_state__compare`) than largest item already in this set. If items are inserted in the +// wrong order, the set will not function properly for future use. +// +// The caller retains ownership of the passed-in memory. However, the clone that is created by this +// function will be managed by the state set. +static inline void analysis_state_set__push( + AnalysisStateSet *self, + AnalysisStateSet *pool, + AnalysisState *borrowed_item +) { + AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); + array_push(self, new_item); +} + +// Removes all items from this set, returning it to an empty state. +static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { + array_push_all(pool, self); + array_clear(self); +} + +// Releases all memory that is managed with this state set, including any items currently present. +// After calling this function, the set is no longer suitable for use. +static inline void analysis_state_set__delete(AnalysisStateSet *self) { + for (unsigned i = 0; i < self->size; i++) { + ts_free(self->contents[i]); + } + array_delete(self); +} + +/**************** + * QueryAnalyzer + ****************/ + +static inline QueryAnalysis query_analysis__new(void) { + return (QueryAnalysis) { + .states = array_new(), + .next_states = array_new(), + .deeper_states = array_new(), + .state_pool = array_new(), + .final_step_indices = array_new(), + .finished_parent_symbols = array_new(), + .did_abort = false, + }; +} + +static inline void query_analysis__delete(QueryAnalysis *self) { + analysis_state_set__delete(&self->states); + analysis_state_set__delete(&self->next_states); + analysis_state_set__delete(&self->deeper_states); + analysis_state_set__delete(&self->state_pool); + array_delete(&self->final_step_indices); + array_delete(&self->finished_parent_symbols); +} + +/*********************** + * AnalysisSubgraphNode + ***********************/ + +static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { + if (self->state < other->state) return -1; + if (self->state > other->state) return 1; + if (self->child_index < other->child_index) return -1; + if (self->child_index > other->child_index) return 1; + if (self->done < other->done) return -1; + if (self->done > other->done) return 1; + if (self->production_id < other->production_id) return -1; + if (self->production_id > other->production_id) return 1; + return 0; +} + +/********* + * Query + *********/ + +// The `pattern_map` contains a mapping from TSSymbol values to indices in the +// `steps` array. For a given syntax node, the `pattern_map` makes it possible +// to quickly find the starting steps of all of the patterns whose root matches +// that node. Each entry has two fields: a `pattern_index`, which identifies one +// of the patterns in the query, and a `step_index`, which indicates the start +// offset of that pattern's steps within the `steps` array. +// +// The entries are sorted by the patterns' root symbols, and lookups use a +// binary search. This ensures that the cost of this initial lookup step +// scales logarithmically with the number of patterns in the query. +// +// This returns `true` if the symbol is present and `false` otherwise. +// If the symbol is not present `*result` is set to the index where the +// symbol should be inserted. +static inline bool ts_query__pattern_map_search( + const TSQuery *self, + TSSymbol needle, + uint32_t *result +) { + uint32_t base_index = self->wildcard_root_pattern_count; + uint32_t size = self->pattern_map.size - base_index; + if (size == 0) { + *result = base_index; + return false; + } + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = base_index + half_size; + TSSymbol mid_symbol = self->steps.contents[ + self->pattern_map.contents[mid_index].step_index + ].symbol; + if (needle > mid_symbol) base_index = mid_index; + size -= half_size; + } + + TSSymbol symbol = self->steps.contents[ + self->pattern_map.contents[base_index].step_index + ].symbol; + + if (needle > symbol) { + base_index++; + if (base_index < self->pattern_map.size) { + symbol = self->steps.contents[ + self->pattern_map.contents[base_index].step_index + ].symbol; + } + } + + *result = base_index; + return needle == symbol; +} + +// Insert a new pattern's start index into the pattern map, maintaining +// the pattern map's ordering invariant. +static inline void ts_query__pattern_map_insert( + TSQuery *self, + TSSymbol symbol, + PatternEntry new_entry +) { + uint32_t index; + ts_query__pattern_map_search(self, symbol, &index); + + // Ensure that the entries are sorted not only by symbol, but also + // by pattern_index. This way, states for earlier patterns will be + // initiated first, which allows the ordering of the states array + // to be maintained more efficiently. + while (index < self->pattern_map.size) { + PatternEntry *entry = &self->pattern_map.contents[index]; + if ( + self->steps.contents[entry->step_index].symbol == symbol && + entry->pattern_index < new_entry.pattern_index + ) { + index++; + } else { + break; + } + } + + array_insert(&self->pattern_map, index, new_entry); +} + +// Walk the subgraph for this non-terminal, tracking all of the possible +// sequences of progress within the pattern. +static void ts_query__perform_analysis( + TSQuery *self, + const AnalysisSubgraphArray *subgraphs, + QueryAnalysis *analysis +) { + unsigned recursion_depth_limit = 0; + unsigned prev_final_step_count = 0; + array_clear(&analysis->final_step_indices); + array_clear(&analysis->finished_parent_symbols); + + for (unsigned iteration = 0;; iteration++) { + if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { + analysis->did_abort = true; + break; + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("Iteration: %u. Final step indices:", iteration); + for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { + printf(" %4u", analysis->final_step_indices.contents[j]); + } + printf("\n"); + for (unsigned j = 0; j < analysis->states.size; j++) { + AnalysisState *state = analysis->states.contents[j]; + printf(" %3u: step: %u, stack: [", j, state->step_index); + for (unsigned k = 0; k < state->depth; k++) { + printf( + " {%s, child: %u, state: %4u", + self->language->symbol_names[state->stack[k].parent_symbol], + state->stack[k].child_index, + state->stack[k].parse_state + ); + if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); + if (state->stack[k].done) printf(", DONE"); + printf("}"); + } + printf(" ]\n"); + } + #endif + + // If no further progress can be made within the current recursion depth limit, then + // bump the depth limit by one, and continue to process the states the exceeded the + // limit. But only allow this if progress has been made since the last time the depth + // limit was increased. + if (analysis->states.size == 0) { + if ( + analysis->deeper_states.size > 0 && + analysis->final_step_indices.size > prev_final_step_count + ) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); + #endif + + prev_final_step_count = analysis->final_step_indices.size; + recursion_depth_limit++; + AnalysisStateSet _states = analysis->states; + analysis->states = analysis->deeper_states; + analysis->deeper_states = _states; + continue; + } + + break; + } + + analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); + for (unsigned j = 0; j < analysis->states.size; j++) { + AnalysisState * const state = analysis->states.contents[j]; + + // For efficiency, it's important to avoid processing the same analysis state more + // than once. To achieve this, keep the states in order of ascending position within + // their hypothetical syntax trees. In each iteration of this loop, start by advancing + // the states that have made the least progress. Avoid advancing states that have already + // made more progress. + if (analysis->next_states.size > 0) { + int comparison = analysis_state__compare_position( + &state, + array_back(&analysis->next_states) + ); + if (comparison == 0) { + analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); + continue; + } else if (comparison > 0) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Terminate iteration at state %u\n", j); + #endif + while (j < analysis->states.size) { + analysis_state_set__push( + &analysis->next_states, + &analysis->state_pool, + analysis->states.contents[j] + ); + j++; + } + break; + } + } + + const TSStateId parse_state = analysis_state__top(state)->parse_state; + const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; + const TSFieldId parent_field_id = analysis_state__top(state)->field_id; + const unsigned child_index = analysis_state__top(state)->child_index; + const QueryStep * const step = &self->steps.contents[state->step_index]; + + unsigned subgraph_index, exists; + array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); + if (!exists) continue; + const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index]; + + // Follow every possible path in the parse table, but only visit states that + // are part of the subgraph for the current symbol. + LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); + while (ts_lookahead_iterator__next(&lookahead_iterator)) { + TSSymbol sym = lookahead_iterator.symbol; + + AnalysisSubgraphNode successor = { + .state = parse_state, + .child_index = child_index, + }; + if (lookahead_iterator.action_count) { + const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; + if (action->type == TSParseActionTypeShift) { + if (!action->shift.extra) { + successor.state = action->shift.state; + successor.child_index++; + } + } else { + continue; + } + } else if (lookahead_iterator.next_state != 0) { + successor.state = lookahead_iterator.next_state; + successor.child_index++; + } else { + continue; + } + + unsigned node_index; + array_search_sorted_with( + &subgraph->nodes, + analysis_subgraph_node__compare, &successor, + &node_index, &exists + ); + while (node_index < subgraph->nodes.size) { + AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; + if (node->state != successor.state || node->child_index != successor.child_index) break; + + // Use the subgraph to determine what alias and field will eventually be applied + // to this child node. + TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); + TSSymbol visible_symbol = alias + ? alias + : self->language->symbol_metadata[sym].visible + ? self->language->public_symbol_map[sym] + : 0; + TSFieldId field_id = parent_field_id; + if (!field_id) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); + for (; field_map != field_map_end; field_map++) { + if (!field_map->inherited && field_map->child_index == child_index) { + field_id = field_map->field_id; + break; + } + } + } + + // Create a new state that has advanced past this hypothetical subtree. + AnalysisState next_state = *state; + AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); + next_state_top->child_index = successor.child_index; + next_state_top->parse_state = successor.state; + if (node->done) next_state_top->done = true; + + // Determine if this hypothetical child node would match the current step + // of the query pattern. + bool does_match = false; + if (visible_symbol) { + does_match = true; + if (step->symbol == WILDCARD_SYMBOL) { + if ( + step->is_named && + !self->language->symbol_metadata[visible_symbol].named + ) does_match = false; + } else if (step->symbol != visible_symbol) { + does_match = false; + } + if (step->field && step->field != field_id) { + does_match = false; + } + if ( + step->supertype_symbol && + !analysis_state__has_supertype(state, step->supertype_symbol) + ) does_match = false; + } + + // If this child is hidden, then descend into it and walk through its children. + // If the top entry of the stack is at the end of its rule, then that entry can + // be replaced. Otherwise, push a new entry onto the stack. + else if (sym >= self->language->token_count) { + if (!next_state_top->done) { + if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Exceeded depth limit for state %u\n", j); + #endif + + analysis->did_abort = true; + continue; + } + + next_state.depth++; + next_state_top = analysis_state__top(&next_state); + } + + *next_state_top = (AnalysisStateEntry) { + .parse_state = parse_state, + .parent_symbol = sym, + .child_index = 0, + .field_id = field_id, + .done = false, + }; + + if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { + analysis_state_set__insert_sorted( + &analysis->deeper_states, + &analysis->state_pool, + &next_state + ); + continue; + } + } + + // Pop from the stack when this state reached the end of its current syntax node. + while (next_state.depth > 0 && next_state_top->done) { + next_state.depth--; + next_state_top = analysis_state__top(&next_state); + } + + // If this hypothetical child did match the current step of the query pattern, + // then advance to the next step at the current depth. This involves skipping + // over any descendant steps of the current child. + const QueryStep *next_step = step; + if (does_match) { + for (;;) { + next_state.step_index++; + next_step = &self->steps.contents[next_state.step_index]; + if ( + next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= step->depth + ) break; + } + } else if (successor.state == parse_state) { + continue; + } + + for (;;) { + // Skip pass-through states. Although these states have alternatives, they are only + // used to implement repetitions, and query analysis does not need to process + // repetitions in order to determine whether steps are possible and definite. + if (next_step->is_pass_through) { + next_state.step_index++; + next_step++; + continue; + } + + // If the pattern is finished or hypothetical parent node is complete, then + // record that matching can terminate at this step of the pattern. Otherwise, + // add this state to the list of states to process on the next iteration. + if (!next_step->is_dead_end) { + bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; + if (did_finish_pattern) { + array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); + } else if (next_state.depth == 0) { + array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); + } else { + analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); + } + } + + // If the state has advanced to a step with an alternative step, then add another state + // at that alternative step. This process is simpler than the process of actually matching a + // pattern during query execution, because for the purposes of query analysis, there is no + // need to process repetitions. + if ( + does_match && + next_step->alternative_index != NONE && + next_step->alternative_index > next_state.step_index + ) { + next_state.step_index = next_step->alternative_index; + next_step = &self->steps.contents[next_state.step_index]; + } else { + break; + } + } + } + } + } + + AnalysisStateSet _states = analysis->states; + analysis->states = analysis->next_states; + analysis->next_states = _states; + } +} + +static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { + Array(uint16_t) non_rooted_pattern_start_steps = array_new(); + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (!pattern->is_rooted) { + QueryStep *step = &self->steps.contents[pattern->step_index]; + if (step->symbol != WILDCARD_SYMBOL) { + array_push(&non_rooted_pattern_start_steps, i); + } + } + } + + // Walk forward through all of the steps in the query, computing some + // basic information about each step. Mark all of the steps that contain + // captures, and record the indices of all of the steps that have child steps. + Array(uint32_t) parent_step_indices = array_new(); + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) { + step->parent_pattern_guaranteed = true; + step->root_pattern_guaranteed = true; + continue; + } + + bool has_children = false; + bool is_wildcard = step->symbol == WILDCARD_SYMBOL; + step->contains_captures = step->capture_ids[0] != NONE; + for (unsigned j = i + 1; j < self->steps.size; j++) { + QueryStep *next_step = &self->steps.contents[j]; + if ( + next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= step->depth + ) break; + if (next_step->capture_ids[0] != NONE) { + step->contains_captures = true; + } + if (!is_wildcard) { + next_step->root_pattern_guaranteed = true; + next_step->parent_pattern_guaranteed = true; + } + has_children = true; + } + + if (has_children && !is_wildcard) { + array_push(&parent_step_indices, i); + } + } + + // For every parent symbol in the query, initialize an 'analysis subgraph'. + // This subgraph lists all of the states in the parse table that are directly + // involved in building subtrees for this symbol. + // + // In addition to the parent symbols in the query, construct subgraphs for all + // of the hidden symbols in the grammar, because these might occur within + // one of the parent nodes, such that their children appear to belong to the + // parent. + AnalysisSubgraphArray subgraphs = array_new(); + for (unsigned i = 0; i < parent_step_indices.size; i++) { + uint32_t parent_step_index = parent_step_indices.contents[i]; + TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + AnalysisSubgraph subgraph = { .symbol = parent_symbol }; + array_insert_sorted_by(&subgraphs, .symbol, subgraph); + } + for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { + if (!ts_language_symbol_metadata(self->language, sym).visible) { + AnalysisSubgraph subgraph = { .symbol = sym }; + array_insert_sorted_by(&subgraphs, .symbol, subgraph); + } + } + + // Scan the parse table to find the data needed to populate these subgraphs. + // Collect three things during this scan: + // 1) All of the parse states where one of these symbols can start. + // 2) All of the parse states where one of these symbols can end, along + // with information about the node that would be created. + // 3) A list of predecessor states for each state. + StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); + for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) { + unsigned subgraph_index, exists; + LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); + while (ts_lookahead_iterator__next(&lookahead_iterator)) { + if (lookahead_iterator.action_count) { + for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { + const TSParseAction *action = &lookahead_iterator.actions[i]; + if (action->type == TSParseActionTypeReduce) { + const TSSymbol *aliases, *aliases_end; + ts_language_aliases_for_symbol( + self->language, + action->reduce.symbol, + &aliases, + &aliases_end + ); + for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { + array_search_sorted_by( + &subgraphs, + .symbol, + *symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { + array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { + .state = state, + .production_id = action->reduce.production_id, + .child_index = action->reduce.child_count, + .done = true, + })); + } + } + } + } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { + TSStateId next_state = action->shift.state; + state_predecessor_map_add(&predecessor_map, next_state, state); + } + } + } else if (lookahead_iterator.next_state != 0) { + if (lookahead_iterator.next_state != state) { + state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); + } + if (ts_language_state_is_primary(self->language, state)) { + const TSSymbol *aliases, *aliases_end; + ts_language_aliases_for_symbol( + self->language, + lookahead_iterator.symbol, + &aliases, + &aliases_end + ); + for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { + array_search_sorted_by( + &subgraphs, + .symbol, + *symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if ( + subgraph->start_states.size == 0 || + *array_back(&subgraph->start_states) != state + ) + array_push(&subgraph->start_states, state); + } + } + } + } + } + } + + // For each subgraph, compute the preceding states by walking backward + // from the end states using the predecessor map. + Array(AnalysisSubgraphNode) next_nodes = array_new(); + for (unsigned i = 0; i < subgraphs.size; i++) { + AnalysisSubgraph *subgraph = &subgraphs.contents[i]; + if (subgraph->nodes.size == 0) { + array_delete(&subgraph->start_states); + array_erase(&subgraphs, i); + i--; + continue; + } + array_assign(&next_nodes, &subgraph->nodes); + while (next_nodes.size > 0) { + AnalysisSubgraphNode node = array_pop(&next_nodes); + if (node.child_index > 1) { + unsigned predecessor_count; + const TSStateId *predecessors = state_predecessor_map_get( + &predecessor_map, + node.state, + &predecessor_count + ); + for (unsigned j = 0; j < predecessor_count; j++) { + AnalysisSubgraphNode predecessor_node = { + .state = predecessors[j], + .child_index = node.child_index - 1, + .production_id = node.production_id, + .done = false, + }; + unsigned index, exists; + array_search_sorted_with( + &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, + &index, &exists + ); + if (!exists) { + array_insert(&subgraph->nodes, index, predecessor_node); + array_push(&next_nodes, predecessor_node); + } + } + } + } + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("\nSubgraphs:\n"); + for (unsigned i = 0; i < subgraphs.size; i++) { + AnalysisSubgraph *subgraph = &subgraphs.contents[i]; + printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); + for (unsigned j = 0; j < subgraph->start_states.size; j++) { + printf( + " {state: %u}\n", + subgraph->start_states.contents[j] + ); + } + for (unsigned j = 0; j < subgraph->nodes.size; j++) { + AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; + printf( + " {state: %u, child_index: %u, production_id: %u, done: %d}\n", + node->state, node->child_index, node->production_id, node->done + ); + } + printf("\n"); + } + #endif + + // For each non-terminal pattern, determine if the pattern can successfully match, + // and identify all of the possible children within the pattern where matching could fail. + bool all_patterns_are_valid = true; + QueryAnalysis analysis = query_analysis__new(); + for (unsigned i = 0; i < parent_step_indices.size; i++) { + uint16_t parent_step_index = parent_step_indices.contents[i]; + uint16_t parent_depth = self->steps.contents[parent_step_index].depth; + TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + if (parent_symbol == ts_builtin_sym_error) continue; + + // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's + // root symbol is a terminal, then return an error. + unsigned subgraph_index, exists; + array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); + if (!exists) { + unsigned first_child_step_index = parent_step_index + 1; + uint32_t j, child_exists; + array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); + assert(child_exists); + *error_offset = self->step_offsets.contents[j].byte_offset; + all_patterns_are_valid = false; + break; + } + + // Initialize an analysis state at every parse state in the table where + // this parent symbol can occur. + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + analysis_state_set__clear(&analysis.states, &analysis.state_pool); + analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); + for (unsigned j = 0; j < subgraph->start_states.size; j++) { + TSStateId parse_state = subgraph->start_states.contents[j]; + analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { + .step_index = parent_step_index + 1, + .stack = { + [0] = { + .parse_state = parse_state, + .parent_symbol = parent_symbol, + .child_index = 0, + .field_id = 0, + .done = false, + }, + }, + .depth = 1, + .root_symbol = parent_symbol, + })); + } + + #ifdef DEBUG_ANALYZE_QUERY + printf( + "\nWalk states for %s:\n", + ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol) + ); + #endif + + analysis.did_abort = false; + ts_query__perform_analysis(self, &subgraphs, &analysis); + + // If this pattern could not be fully analyzed, then every step should + // be considered fallible. + if (analysis.did_abort) { + for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { + QueryStep *step = &self->steps.contents[j]; + if ( + step->depth <= parent_depth || + step->depth == PATTERN_DONE_MARKER + ) break; + if (!step->is_dead_end) { + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; + } + } + continue; + } + + // If this pattern cannot match, store the pattern index so that it can be + // returned to the caller. + if (analysis.finished_parent_symbols.size == 0) { + assert(analysis.final_step_indices.size > 0); + uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); + uint32_t j, impossible_exists; + array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); + if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; + *error_offset = self->step_offsets.contents[j].byte_offset; + all_patterns_are_valid = false; + break; + } + + // Mark as fallible any step where a match terminated. + // Later, this property will be propagated to all of the step's predecessors. + for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { + uint32_t final_step_index = analysis.final_step_indices.contents[j]; + QueryStep *step = &self->steps.contents[final_step_index]; + if ( + step->depth != PATTERN_DONE_MARKER && + step->depth > parent_depth && + !step->is_dead_end + ) { + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; + } + } + } + + // Mark as indefinite any step with captures that are used in predicates. + Array(uint16_t) predicate_capture_ids = array_new(); + for (unsigned i = 0; i < self->patterns.size; i++) { + QueryPattern *pattern = &self->patterns.contents[i]; + + // Gather all of the captures that are used in predicates for this pattern. + array_clear(&predicate_capture_ids); + for ( + unsigned start = pattern->predicate_steps.offset, + end = start + pattern->predicate_steps.length, + j = start; j < end; j++ + ) { + TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; + if (step->type == TSQueryPredicateStepTypeCapture) { + uint16_t value_id = step->value_id; + array_insert_sorted_by(&predicate_capture_ids, , value_id); + } + } + + // Find all of the steps that have these captures. + for ( + unsigned start = pattern->steps.offset, + end = start + pattern->steps.length, + j = start; j < end; j++ + ) { + QueryStep *step = &self->steps.contents[j]; + for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { + uint16_t capture_id = step->capture_ids[k]; + if (capture_id == NONE) break; + unsigned index, exists; + array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); + if (exists) { + step->root_pattern_guaranteed = false; + break; + } + } + } + } + + // Propagate fallibility. If a pattern is fallible at a given step, then it is + // fallible at all of its preceding steps. + bool done = self->steps.size == 0; + while (!done) { + done = true; + for (unsigned i = self->steps.size - 1; i > 0; i--) { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) continue; + + // Determine if this step is definite or has definite alternatives. + bool parent_pattern_guaranteed = false; + for (;;) { + if (step->root_pattern_guaranteed) { + parent_pattern_guaranteed = true; + break; + } + if (step->alternative_index == NONE || step->alternative_index < i) { + break; + } + step = &self->steps.contents[step->alternative_index]; + } + + // If not, mark its predecessor as indefinite. + if (!parent_pattern_guaranteed) { + QueryStep *prev_step = &self->steps.contents[i - 1]; + if ( + !prev_step->is_dead_end && + prev_step->depth != PATTERN_DONE_MARKER && + prev_step->root_pattern_guaranteed + ) { + prev_step->root_pattern_guaranteed = false; + done = false; + } + } + } + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("Steps:\n"); + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) { + printf(" %u: DONE\n", i); + } else { + printf( + " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", + i, + (step->symbol == WILDCARD_SYMBOL) + ? "ANY" + : ts_language_symbol_name(self->language, step->symbol), + (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), + step->depth, + step->parent_pattern_guaranteed, + step->root_pattern_guaranteed + ); + } + } + #endif + + // Determine which repetition symbols in this language have the possibility + // of matching non-rooted patterns in this query. These repetition symbols + // prevent certain optimizations with range restrictions. + analysis.did_abort = false; + for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { + uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i]; + PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index]; + + analysis_state_set__clear(&analysis.states, &analysis.state_pool); + analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); + for (unsigned j = 0; j < subgraphs.size; j++) { + AnalysisSubgraph *subgraph = &subgraphs.contents[j]; + TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); + if (metadata.visible || metadata.named) continue; + + for (uint32_t k = 0; k < subgraph->start_states.size; k++) { + TSStateId parse_state = subgraph->start_states.contents[k]; + analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { + .step_index = pattern_entry->step_index, + .stack = { + [0] = { + .parse_state = parse_state, + .parent_symbol = subgraph->symbol, + .child_index = 0, + .field_id = 0, + .done = false, + }, + }, + .root_symbol = subgraph->symbol, + .depth = 1, + })); + } + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); + #endif + + ts_query__perform_analysis( + self, + &subgraphs, + &analysis + ); + + if (analysis.finished_parent_symbols.size > 0) { + self->patterns.contents[pattern_entry->pattern_index].is_non_local = true; + } + + for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { + TSSymbol symbol = analysis.finished_parent_symbols.contents[k]; + array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); + } + } + + #ifdef DEBUG_ANALYZE_QUERY + if (self->repeat_symbols_with_rootless_patterns.size > 0) { + printf("\nRepetition symbols with rootless patterns:\n"); + printf("aborted analysis: %d\n", analysis.did_abort); + for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { + TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; + printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); + } + printf("\n"); + } + #endif + + // Cleanup + for (unsigned i = 0; i < subgraphs.size; i++) { + array_delete(&subgraphs.contents[i].start_states); + array_delete(&subgraphs.contents[i].nodes); + } + array_delete(&subgraphs); + query_analysis__delete(&analysis); + array_delete(&next_nodes); + array_delete(&non_rooted_pattern_start_steps); + array_delete(&parent_step_indices); + array_delete(&predicate_capture_ids); + state_predecessor_map_delete(&predecessor_map); + + return all_patterns_are_valid; +} + +static void ts_query__add_negated_fields( + TSQuery *self, + uint16_t step_index, + TSFieldId *field_ids, + uint16_t field_count +) { + QueryStep *step = &self->steps.contents[step_index]; + + // The negated field array stores a list of field lists, separated by zeros. + // Try to find the start index of an existing list that matches this new list. + bool failed_match = false; + unsigned match_count = 0; + unsigned start_i = 0; + for (unsigned i = 0; i < self->negated_fields.size; i++) { + TSFieldId existing_field_id = self->negated_fields.contents[i]; + + // At each zero value, terminate the match attempt. If we've exactly + // matched the new field list, then reuse this index. Otherwise, + // start over the matching process. + if (existing_field_id == 0) { + if (match_count == field_count) { + step->negated_field_list_id = start_i; + return; + } else { + start_i = i + 1; + match_count = 0; + failed_match = false; + } + } + + // If the existing list matches our new list so far, then advance + // to the next element of the new list. + else if ( + match_count < field_count && + existing_field_id == field_ids[match_count] && + !failed_match + ) { + match_count++; + } + + // Otherwise, this existing list has failed to match. + else { + match_count = 0; + failed_match = true; + } + } + + step->negated_field_list_id = self->negated_fields.size; + array_extend(&self->negated_fields, field_count, field_ids); + array_push(&self->negated_fields, 0); +} + +static TSQueryError ts_query__parse_string_literal( + TSQuery *self, + Stream *stream +) { + const char *string_start = stream->input; + if (stream->next != '"') return TSQueryErrorSyntax; + stream_advance(stream); + const char *prev_position = stream->input; + + bool is_escaped = false; + array_clear(&self->string_buffer); + for (;;) { + if (is_escaped) { + is_escaped = false; + switch (stream->next) { + case 'n': + array_push(&self->string_buffer, '\n'); + break; + case 'r': + array_push(&self->string_buffer, '\r'); + break; + case 't': + array_push(&self->string_buffer, '\t'); + break; + case '0': + array_push(&self->string_buffer, '\0'); + break; + default: + array_extend(&self->string_buffer, stream->next_size, stream->input); + break; + } + prev_position = stream->input + stream->next_size; + } else { + if (stream->next == '\\') { + array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); + prev_position = stream->input + 1; + is_escaped = true; + } else if (stream->next == '"') { + array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); + stream_advance(stream); + return TSQueryErrorNone; + } else if (stream->next == '\n') { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } + if (!stream_advance(stream)) { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } +} + +// Parse a single predicate associated with a pattern, adding it to the +// query's internal `predicate_steps` array. Predicates are arbitrary +// S-expressions associated with a pattern which are meant to be handled at +// a higher level of abstraction, such as the Rust/JavaScript bindings. They +// can contain '@'-prefixed capture names, double-quoted strings, and bare +// symbols, which also represent strings. +static TSQueryError ts_query__parse_predicate( + TSQuery *self, + Stream *stream +) { + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *predicate_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - predicate_name); + uint16_t id = symbol_table_insert_name( + &self->predicate_values, + predicate_name, + length + ); + array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + .type = TSQueryPredicateStepTypeString, + .value_id = id, + })); + stream_skip_whitespace(stream); + + for (;;) { + if (stream->next == ')') { + stream_advance(stream); + stream_skip_whitespace(stream); + array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + .type = TSQueryPredicateStepTypeDone, + .value_id = 0, + })); + break; + } + + // Parse an '@'-prefixed capture name + else if (stream->next == '@') { + stream_advance(stream); + + // Parse the capture name + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *capture_name = stream->input; + stream_scan_identifier(stream); + uint32_t capture_length = (uint32_t)(stream->input - capture_name); + + // Add the capture id to the first step of the pattern + int capture_id = symbol_table_id_for_name( + &self->captures, + capture_name, + capture_length + ); + if (capture_id == -1) { + stream_reset(stream, capture_name); + return TSQueryErrorCapture; + } + + array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + .type = TSQueryPredicateStepTypeCapture, + .value_id = capture_id, + })); + } + + // Parse a string literal + else if (stream->next == '"') { + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) return e; + uint16_t query_id = symbol_table_insert_name( + &self->predicate_values, + self->string_buffer.contents, + self->string_buffer.size + ); + array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + .type = TSQueryPredicateStepTypeString, + .value_id = query_id, + })); + } + + // Parse a bare symbol + else if (stream_is_ident_start(stream)) { + const char *symbol_start = stream->input; + stream_scan_identifier(stream); + uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); + uint16_t query_id = symbol_table_insert_name( + &self->predicate_values, + symbol_start, + symbol_length + ); + array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + .type = TSQueryPredicateStepTypeString, + .value_id = query_id, + })); + } + + else { + return TSQueryErrorSyntax; + } + + stream_skip_whitespace(stream); + } + + return 0; +} + +// Read one S-expression pattern from the stream, and incorporate it into +// the query's internal state machine representation. For nested patterns, +// this function calls itself recursively. +// +// The caller is responsible for passing in a dedicated CaptureQuantifiers. +// These should not be shared between different calls to ts_query__parse_pattern! +static TSQueryError ts_query__parse_pattern( + TSQuery *self, + Stream *stream, + uint32_t depth, + bool is_immediate, + CaptureQuantifiers *capture_quantifiers +) { + if (stream->next == 0) return TSQueryErrorSyntax; + if (stream->next == ')' || stream->next == ']') return PARENT_DONE; + + const uint32_t starting_step_index = self->steps.size; + + // Store the byte offset of each step in the query. + if ( + self->step_offsets.size == 0 || + array_back(&self->step_offsets)->step_index != starting_step_index + ) { + array_push(&self->step_offsets, ((StepOffset) { + .step_index = starting_step_index, + .byte_offset = stream_offset(stream), + })); + } + + // An open bracket is the start of an alternation. + if (stream->next == '[') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse each branch, and add a placeholder step in between the branches. + Array(uint32_t) branch_step_indices = array_new(); + CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); + for (;;) { + uint32_t start_index = self->steps.size; + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth, + is_immediate, + &branch_capture_quantifiers + ); + + if (e == PARENT_DONE) { + if (stream->next == ']' && branch_step_indices.size > 0) { + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) { + capture_quantifiers_delete(&branch_capture_quantifiers); + array_delete(&branch_step_indices); + return e; + } + + if (start_index == starting_step_index) { + capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); + } else { + capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); + } + + array_push(&branch_step_indices, start_index); + array_push(&self->steps, query_step__new(0, depth, false)); + capture_quantifiers_clear(&branch_capture_quantifiers); + } + (void)array_pop(&self->steps); + + // For all of the branches except for the last one, add the subsequent branch as an + // alternative, and link the end of the branch to the current end of the steps. + for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { + uint32_t step_index = branch_step_indices.contents[i]; + uint32_t next_step_index = branch_step_indices.contents[i + 1]; + QueryStep *start_step = &self->steps.contents[step_index]; + QueryStep *end_step = &self->steps.contents[next_step_index - 1]; + start_step->alternative_index = next_step_index; + end_step->alternative_index = self->steps.size; + end_step->is_dead_end = true; + } + + capture_quantifiers_delete(&branch_capture_quantifiers); + array_delete(&branch_step_indices); + } + + // An open parenthesis can be the start of three possible constructs: + // * A grouped sequence + // * A predicate + // * A named node + else if (stream->next == '(') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // If this parenthesis is followed by a node, then it represents a grouped sequence. + if (stream->next == '(' || stream->next == '"' || stream->next == '[') { + bool child_is_immediate = is_immediate; + CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); + for (;;) { + if (stream->next == '.') { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth, + child_is_immediate, + &child_capture_quantifiers + ); + if (e == PARENT_DONE) { + if (stream->next == ')') { + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); + capture_quantifiers_clear(&child_capture_quantifiers); + child_is_immediate = false; + } + + capture_quantifiers_delete(&child_capture_quantifiers); + } + + // A dot/pound character indicates the start of a predicate. + else if (stream->next == '.' || stream->next == '#') { + stream_advance(stream); + return ts_query__parse_predicate(self, stream); + } + + // Otherwise, this parenthesis is the start of a named node. + else { + TSSymbol symbol; + + // Parse a normal node name + if (stream_is_ident_start(stream)) { + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - node_name); + + // Parse the wildcard symbol + if (length == 1 && node_name[0] == '_') { + symbol = WILDCARD_SYMBOL; + } + + else { + symbol = ts_language_symbol_for_name( + self->language, + node_name, + length, + true + ); + if (!symbol) { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; + } + } + } else { + return TSQueryErrorSyntax; + } + + // Add a step for the node. + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); + QueryStep *step = array_back(&self->steps); + if (ts_language_symbol_metadata(self->language, symbol).supertype) { + step->supertype_symbol = step->symbol; + step->symbol = WILDCARD_SYMBOL; + } + if (symbol == WILDCARD_SYMBOL) { + step->is_named = true; + } + + stream_skip_whitespace(stream); + + if (stream->next == '/') { + stream_advance(stream); + if (!stream_is_ident_start(stream)) { + return TSQueryErrorSyntax; + } + + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - node_name); + + step->symbol = ts_language_symbol_for_name( + self->language, + node_name, + length, + true + ); + if (!step->symbol) { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; + } + + stream_skip_whitespace(stream); + } + + // Parse the child patterns + bool child_is_immediate = false; + uint16_t last_child_step_index = 0; + uint16_t negated_field_count = 0; + TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; + CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); + for (;;) { + // Parse a negated field assertion + if (stream->next == '!') { + stream_advance(stream); + stream_skip_whitespace(stream); + if (!stream_is_ident_start(stream)) { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - field_name); + stream_skip_whitespace(stream); + + TSFieldId field_id = ts_language_field_id_for_name( + self->language, + field_name, + length + ); + if (!field_id) { + stream->input = field_name; + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorField; + } + + // Keep the field ids sorted. + if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { + negated_field_ids[negated_field_count] = field_id; + negated_field_count++; + } + + continue; + } + + // Parse a sibling anchor + if (stream->next == '.') { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + + uint16_t step_index = self->steps.size; + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth + 1, + child_is_immediate, + &child_capture_quantifiers + ); + if (e == PARENT_DONE) { + if (stream->next == ')') { + if (child_is_immediate) { + if (last_child_step_index == 0) { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } + self->steps.contents[last_child_step_index].is_last_child = true; + } + + if (negated_field_count) { + ts_query__add_negated_fields( + self, + starting_step_index, + negated_field_ids, + negated_field_count + ); + } + + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); + + last_child_step_index = step_index; + child_is_immediate = false; + capture_quantifiers_clear(&child_capture_quantifiers); + } + capture_quantifiers_delete(&child_capture_quantifiers); + } + } + + // Parse a wildcard pattern + else if (stream->next == '_') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Add a step that matches any kind of node + array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); + } + + // Parse a double-quoted anonymous leaf node expression + else if (stream->next == '"') { + const char *string_start = stream->input; + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) return e; + + // Add a step for the node + TSSymbol symbol = ts_language_symbol_for_name( + self->language, + self->string_buffer.contents, + self->string_buffer.size, + false + ); + if (!symbol) { + stream_reset(stream, string_start + 1); + return TSQueryErrorNodeType; + } + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); + } + + // Parse a field-prefixed pattern + else if (stream_is_ident_start(stream)) { + // Parse the field name + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - field_name); + stream_skip_whitespace(stream); + + if (stream->next != ':') { + stream_reset(stream, field_name); + return TSQueryErrorSyntax; + } + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse the pattern + CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth, + is_immediate, + &field_capture_quantifiers + ); + if (e) { + capture_quantifiers_delete(&field_capture_quantifiers); + if (e == PARENT_DONE) e = TSQueryErrorSyntax; + return e; + } + + // Add the field name to the first step of the pattern + TSFieldId field_id = ts_language_field_id_for_name( + self->language, + field_name, + length + ); + if (!field_id) { + stream->input = field_name; + return TSQueryErrorField; + } + + uint32_t step_index = starting_step_index; + QueryStep *step = &self->steps.contents[step_index]; + for (;;) { + step->field = field_id; + if ( + step->alternative_index != NONE && + step->alternative_index > step_index && + step->alternative_index < self->steps.size + ) { + step_index = step->alternative_index; + step = &self->steps.contents[step_index]; + } else { + break; + } + } + + capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); + capture_quantifiers_delete(&field_capture_quantifiers); + } + + else { + return TSQueryErrorSyntax; + } + + stream_skip_whitespace(stream); + + // Parse suffixes modifiers for this pattern + TSQuantifier quantifier = TSQuantifierOne; + for (;;) { + // Parse the one-or-more operator. + if (stream->next == '+') { + quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_pass_through = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + } + + // Parse the zero-or-more repetition operator. + else if (stream->next == '*') { + quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_pass_through = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + + // Stop when `step->alternative_index` is `NONE` or it points to + // `repeat_step` or beyond. Note that having just been pushed, + // `repeat_step` occupies slot `self->steps.size - 1`. + QueryStep *step = &self->steps.contents[starting_step_index]; + while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { + step = &self->steps.contents[step->alternative_index]; + } + step->alternative_index = self->steps.size; + } + + // Parse the optional operator. + else if (stream->next == '?') { + quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep *step = &self->steps.contents[starting_step_index]; + while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { + step = &self->steps.contents[step->alternative_index]; + } + step->alternative_index = self->steps.size; + } + + // Parse an '@'-prefixed capture pattern + else if (stream->next == '@') { + stream_advance(stream); + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *capture_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - capture_name); + stream_skip_whitespace(stream); + + // Add the capture id to the first step of the pattern + uint16_t capture_id = symbol_table_insert_name( + &self->captures, + capture_name, + length + ); + + // Add the capture quantifier + capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); + + uint32_t step_index = starting_step_index; + for (;;) { + QueryStep *step = &self->steps.contents[step_index]; + query_step__add_capture(step, capture_id); + if ( + step->alternative_index != NONE && + step->alternative_index > step_index && + step->alternative_index < self->steps.size + ) { + step_index = step->alternative_index; + } else { + break; + } + } + } + + // No more suffix modifiers + else { + break; + } + } + + capture_quantifiers_mul(capture_quantifiers, quantifier); + + return 0; +} + +TSQuery *ts_query_new( + const TSLanguage *language, + const char *source, + uint32_t source_len, + uint32_t *error_offset, + TSQueryError *error_type +) { + if ( + !language || + language->version > TREE_SITTER_LANGUAGE_VERSION || + language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + ) { + *error_type = TSQueryErrorLanguage; + return NULL; + } + + TSQuery *self = ts_malloc(sizeof(TSQuery)); + *self = (TSQuery) { + .steps = array_new(), + .pattern_map = array_new(), + .captures = symbol_table_new(), + .capture_quantifiers = array_new(), + .predicate_values = symbol_table_new(), + .predicate_steps = array_new(), + .patterns = array_new(), + .step_offsets = array_new(), + .string_buffer = array_new(), + .negated_fields = array_new(), + .repeat_symbols_with_rootless_patterns = array_new(), + .wildcard_root_pattern_count = 0, + .language = ts_language_copy(language), + }; + + array_push(&self->negated_fields, 0); + + // Parse all of the S-expressions in the given string. + Stream stream = stream_new(source, source_len); + stream_skip_whitespace(&stream); + while (stream.input < stream.end) { + uint32_t pattern_index = self->patterns.size; + uint32_t start_step_index = self->steps.size; + uint32_t start_predicate_step_index = self->predicate_steps.size; + array_push(&self->patterns, ((QueryPattern) { + .steps = (Slice) {.offset = start_step_index}, + .predicate_steps = (Slice) {.offset = start_predicate_step_index}, + .start_byte = stream_offset(&stream), + .is_non_local = false, + })); + CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); + *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); + array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); + + QueryPattern *pattern = array_back(&self->patterns); + pattern->steps.length = self->steps.size - start_step_index; + pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; + + // If any pattern could not be parsed, then report the error information + // and terminate. + if (*error_type) { + if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; + *error_offset = stream_offset(&stream); + capture_quantifiers_delete(&capture_quantifiers); + ts_query_delete(self); + return NULL; + } + + // Maintain a list of capture quantifiers for each pattern + array_push(&self->capture_quantifiers, capture_quantifiers); + + // Maintain a map that can look up patterns for a given root symbol. + uint16_t wildcard_root_alternative_index = NONE; + for (;;) { + QueryStep *step = &self->steps.contents[start_step_index]; + + // If a pattern has a wildcard at its root, but it has a non-wildcard child, + // then optimize the matching process by skipping matching the wildcard. + // Later, during the matching process, the query cursor will check that + // there is a parent node, and capture it if necessary. + if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { + QueryStep *second_step = &self->steps.contents[start_step_index + 1]; + if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { + wildcard_root_alternative_index = step->alternative_index; + start_step_index += 1; + step = second_step; + } + } + + // Determine whether the pattern has a single root node. This affects + // decisions about whether or not to start matching the pattern when + // a query cursor has a range restriction or when immediately within an + // error node. + uint32_t start_depth = step->depth; + bool is_rooted = start_depth == 0; + for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { + QueryStep *child_step = &self->steps.contents[step_index]; + if (child_step->is_dead_end) break; + if (child_step->depth == start_depth) { + is_rooted = false; + break; + } + } + + ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { + .step_index = start_step_index, + .pattern_index = pattern_index, + .is_rooted = is_rooted + }); + if (step->symbol == WILDCARD_SYMBOL) { + self->wildcard_root_pattern_count++; + } + + // If there are alternatives or options at the root of the pattern, + // then add multiple entries to the pattern map. + if (step->alternative_index != NONE) { + start_step_index = step->alternative_index; + } else if (wildcard_root_alternative_index != NONE) { + start_step_index = wildcard_root_alternative_index; + wildcard_root_alternative_index = NONE; + } else { + break; + } + } + } + + if (!ts_query__analyze_patterns(self, error_offset)) { + *error_type = TSQueryErrorStructure; + ts_query_delete(self); + return NULL; + } + + array_delete(&self->string_buffer); + return self; +} + +void ts_query_delete(TSQuery *self) { + if (self) { + array_delete(&self->steps); + array_delete(&self->pattern_map); + array_delete(&self->predicate_steps); + array_delete(&self->patterns); + array_delete(&self->step_offsets); + array_delete(&self->string_buffer); + array_delete(&self->negated_fields); + array_delete(&self->repeat_symbols_with_rootless_patterns); + ts_language_delete(self->language); + symbol_table_delete(&self->captures); + symbol_table_delete(&self->predicate_values); + for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { + CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); + capture_quantifiers_delete(capture_quantifiers); + } + array_delete(&self->capture_quantifiers); + ts_free(self); + } +} + +uint32_t ts_query_pattern_count(const TSQuery *self) { + return self->patterns.size; +} + +uint32_t ts_query_capture_count(const TSQuery *self) { + return self->captures.slices.size; +} + +uint32_t ts_query_string_count(const TSQuery *self) { + return self->predicate_values.slices.size; +} + +const char *ts_query_capture_name_for_id( + const TSQuery *self, + uint32_t index, + uint32_t *length +) { + return symbol_table_name_for_id(&self->captures, index, length); +} + +TSQuantifier ts_query_capture_quantifier_for_id( + const TSQuery *self, + uint32_t pattern_index, + uint32_t capture_index +) { + CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); + return capture_quantifier_for_id(capture_quantifiers, capture_index); +} + +const char *ts_query_string_value_for_id( + const TSQuery *self, + uint32_t index, + uint32_t *length +) { + return symbol_table_name_for_id(&self->predicate_values, index, length); +} + +const TSQueryPredicateStep *ts_query_predicates_for_pattern( + const TSQuery *self, + uint32_t pattern_index, + uint32_t *step_count +) { + Slice slice = self->patterns.contents[pattern_index].predicate_steps; + *step_count = slice.length; + if (self->predicate_steps.contents == NULL) { + return NULL; + } + return &self->predicate_steps.contents[slice.offset]; +} + +uint32_t ts_query_start_byte_for_pattern( + const TSQuery *self, + uint32_t pattern_index +) { + return self->patterns.contents[pattern_index].start_byte; +} + +bool ts_query_is_pattern_rooted( + const TSQuery *self, + uint32_t pattern_index +) { + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *entry = &self->pattern_map.contents[i]; + if (entry->pattern_index == pattern_index) { + if (!entry->is_rooted) return false; + } + } + return true; +} + +bool ts_query_is_pattern_non_local( + const TSQuery *self, + uint32_t pattern_index +) { + if (pattern_index < self->patterns.size) { + return self->patterns.contents[pattern_index].is_non_local; + } else { + return false; + } +} + +bool ts_query_is_pattern_guaranteed_at_step( + const TSQuery *self, + uint32_t byte_offset +) { + uint32_t step_index = UINT32_MAX; + for (unsigned i = 0; i < self->step_offsets.size; i++) { + StepOffset *step_offset = &self->step_offsets.contents[i]; + if (step_offset->byte_offset > byte_offset) break; + step_index = step_offset->step_index; + } + if (step_index < self->steps.size) { + return self->steps.contents[step_index].root_pattern_guaranteed; + } else { + return false; + } +} + +bool ts_query__step_is_fallible( + const TSQuery *self, + uint16_t step_index +) { + assert((uint32_t)step_index + 1 < self->steps.size); + QueryStep *step = &self->steps.contents[step_index]; + QueryStep *next_step = &self->steps.contents[step_index + 1]; + return ( + next_step->depth != PATTERN_DONE_MARKER && + next_step->depth > step->depth && + !next_step->parent_pattern_guaranteed + ); +} + +void ts_query_disable_capture( + TSQuery *self, + const char *name, + uint32_t length +) { + // Remove capture information for any pattern step that previously + // captured with the given name. + int id = symbol_table_id_for_name(&self->captures, name, length); + if (id != -1) { + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + query_step__remove_capture(step, id); + } + } +} + +void ts_query_disable_pattern( + TSQuery *self, + uint32_t pattern_index +) { + // Remove the given pattern from the pattern map. Its steps will still + // be in the `steps` array, but they will never be read. + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (pattern->pattern_index == pattern_index) { + array_erase(&self->pattern_map, i); + i--; + } + } +} + +/*************** + * QueryCursor + ***************/ + +TSQueryCursor *ts_query_cursor_new(void) { + TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); + *self = (TSQueryCursor) { + .did_exceed_match_limit = false, + .ascending = false, + .halted = false, + .states = array_new(), + .finished_states = array_new(), + .capture_list_pool = capture_list_pool_new(), + .start_byte = 0, + .end_byte = UINT32_MAX, + .start_point = {0, 0}, + .end_point = POINT_MAX, + .max_start_depth = UINT32_MAX, + }; + array_reserve(&self->states, 8); + array_reserve(&self->finished_states, 8); + return self; +} + +void ts_query_cursor_delete(TSQueryCursor *self) { + array_delete(&self->states); + array_delete(&self->finished_states); + ts_tree_cursor_delete(&self->cursor); + capture_list_pool_delete(&self->capture_list_pool); + ts_free(self); +} + +bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { + return self->did_exceed_match_limit; +} + +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { + return self->capture_list_pool.max_capture_list_count; +} + +void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { + self->capture_list_pool.max_capture_list_count = limit; +} + +#ifdef DEBUG_EXECUTE_QUERY +#define LOG(...) fprintf(stderr, __VA_ARGS__) +#else +#define LOG(...) +#endif + +void ts_query_cursor_exec( + TSQueryCursor *self, + const TSQuery *query, + TSNode node +) { + if (query) { + LOG("query steps:\n"); + for (unsigned i = 0; i < query->steps.size; i++) { + QueryStep *step = &query->steps.contents[i]; + LOG(" %u: {", i); + if (step->depth == PATTERN_DONE_MARKER) { + LOG("DONE"); + } else if (step->is_dead_end) { + LOG("dead_end"); + } else if (step->is_pass_through) { + LOG("pass_through"); + } else if (step->symbol != WILDCARD_SYMBOL) { + LOG("symbol: %s", query->language->symbol_names[step->symbol]); + } else { + LOG("symbol: *"); + } + if (step->field) { + LOG(", field: %s", query->language->field_names[step->field]); + } + if (step->alternative_index != NONE) { + LOG(", alternative: %u", step->alternative_index); + } + LOG("},\n"); + } + } + + array_clear(&self->states); + array_clear(&self->finished_states); + ts_tree_cursor_reset(&self->cursor, node); + capture_list_pool_reset(&self->capture_list_pool); + self->on_visible_node = true; + self->next_state_id = 0; + self->depth = 0; + self->ascending = false; + self->halted = false; + self->query = query; + self->did_exceed_match_limit = false; +} + +void ts_query_cursor_set_byte_range( + TSQueryCursor *self, + uint32_t start_byte, + uint32_t end_byte +) { + if (end_byte == 0) { + end_byte = UINT32_MAX; + } + self->start_byte = start_byte; + self->end_byte = end_byte; +} + +void ts_query_cursor_set_point_range( + TSQueryCursor *self, + TSPoint start_point, + TSPoint end_point +) { + if (end_point.row == 0 && end_point.column == 0) { + end_point = POINT_MAX; + } + self->start_point = start_point; + self->end_point = end_point; +} + +// Search through all of the in-progress states, and find the captured +// node that occurs earliest in the document. +static bool ts_query_cursor__first_in_progress_capture( + TSQueryCursor *self, + uint32_t *state_index, + uint32_t *byte_offset, + uint32_t *pattern_index, + bool *root_pattern_guaranteed +) { + bool result = false; + *state_index = UINT32_MAX; + *byte_offset = UINT32_MAX; + *pattern_index = UINT32_MAX; + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i]; + if (state->dead) continue; + + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + if (state->consumed_capture_count >= captures->size) { + continue; + } + + TSNode node = captures->contents[state->consumed_capture_count].node; + if ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ) { + state->consumed_capture_count++; + i--; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if ( + !result || + node_start_byte < *byte_offset || + (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) + ) { + QueryStep *step = &self->query->steps.contents[state->step_index]; + if (root_pattern_guaranteed) { + *root_pattern_guaranteed = step->root_pattern_guaranteed; + } else if (step->root_pattern_guaranteed) { + continue; + } + + result = true; + *state_index = i; + *byte_offset = node_start_byte; + *pattern_index = state->pattern_index; + } + } + return result; +} + +// Determine which node is first in a depth-first traversal +int ts_query_cursor__compare_nodes(TSNode left, TSNode right) { + if (left.id != right.id) { + uint32_t left_start = ts_node_start_byte(left); + uint32_t right_start = ts_node_start_byte(right); + if (left_start < right_start) return -1; + if (left_start > right_start) return 1; + uint32_t left_node_count = ts_node_end_byte(left); + uint32_t right_node_count = ts_node_end_byte(right); + if (left_node_count > right_node_count) return -1; + if (left_node_count < right_node_count) return 1; + } + return 0; +} + +// Determine if either state contains a superset of the other state's captures. +void ts_query_cursor__compare_captures( + TSQueryCursor *self, + QueryState *left_state, + QueryState *right_state, + bool *left_contains_right, + bool *right_contains_left +) { + const CaptureList *left_captures = capture_list_pool_get( + &self->capture_list_pool, + left_state->capture_list_id + ); + const CaptureList *right_captures = capture_list_pool_get( + &self->capture_list_pool, + right_state->capture_list_id + ); + *left_contains_right = true; + *right_contains_left = true; + unsigned i = 0, j = 0; + for (;;) { + if (i < left_captures->size) { + if (j < right_captures->size) { + TSQueryCapture *left = &left_captures->contents[i]; + TSQueryCapture *right = &right_captures->contents[j]; + if (left->node.id == right->node.id && left->index == right->index) { + i++; + j++; + } else { + switch (ts_query_cursor__compare_nodes(left->node, right->node)) { + case -1: + *right_contains_left = false; + i++; + break; + case 1: + *left_contains_right = false; + j++; + break; + default: + *right_contains_left = false; + *left_contains_right = false; + i++; + j++; + break; + } + } + } else { + *right_contains_left = false; + break; + } + } else { + if (j < right_captures->size) { + *left_contains_right = false; + } + break; + } + } +} + +static void ts_query_cursor__add_state( + TSQueryCursor *self, + const PatternEntry *pattern +) { + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + + // Keep the states array in ascending order of start_depth and pattern_index, + // so that it can be processed more efficiently elsewhere. Usually, there is + // no work to do here because of two facts: + // * States with lower start_depth are naturally added first due to the + // order in which nodes are visited. + // * Earlier patterns are naturally added first because of the ordering of the + // pattern_map data structure that's used to initiate matches. + // + // This loop is only needed in cases where two conditions hold: + // * A pattern consists of more than one sibling node, so that its states + // remain in progress after exiting the node that started the match. + // * The first node in the pattern matches against multiple nodes at the + // same depth. + // + // An example of this is the pattern '((comment)* (function))'. If multiple + // `comment` nodes appear in a row, then we may initiate a new state for this + // pattern while another state for the same pattern is already in progress. + // If there are multiple patterns like this in a query, then this loop will + // need to execute in order to keep the states ordered by pattern_index. + uint32_t index = self->states.size; + while (index > 0) { + QueryState *prev_state = &self->states.contents[index - 1]; + if (prev_state->start_depth < start_depth) break; + if (prev_state->start_depth == start_depth) { + // Avoid inserting an unnecessary duplicate state, which would be + // immediately pruned by the longest-match criteria. + if ( + prev_state->pattern_index == pattern->pattern_index && + prev_state->step_index == pattern->step_index + ) return; + if (prev_state->pattern_index <= pattern->pattern_index) break; + } + index--; + } + + LOG( + " start state. pattern:%u, step:%u\n", + pattern->pattern_index, + pattern->step_index + ); + array_insert(&self->states, index, ((QueryState) { + .id = UINT32_MAX, + .capture_list_id = NONE, + .step_index = pattern->step_index, + .pattern_index = pattern->pattern_index, + .start_depth = start_depth, + .consumed_capture_count = 0, + .seeking_immediate_match = true, + .has_in_progress_alternatives = false, + .needs_parent = step->depth == 1, + .dead = false, + })); +} + +// Acquire a capture list for this state. If there are no capture lists left in the +// pool, this will steal the capture list from another existing state, and mark that +// other state as 'dead'. +static CaptureList *ts_query_cursor__prepare_to_capture( + TSQueryCursor *self, + QueryState *state, + unsigned state_index_to_preserve +) { + if (state->capture_list_id == NONE) { + state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); + + // If there are no capture lists left in the pool, then terminate whichever + // state has captured the earliest node in the document, and steal its + // capture list. + if (state->capture_list_id == NONE) { + self->did_exceed_match_limit = true; + uint32_t state_index, byte_offset, pattern_index; + if ( + ts_query_cursor__first_in_progress_capture( + self, + &state_index, + &byte_offset, + &pattern_index, + NULL + ) && + state_index != state_index_to_preserve + ) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + state_index, pattern_index, byte_offset + ); + QueryState *other_state = &self->states.contents[state_index]; + state->capture_list_id = other_state->capture_list_id; + other_state->capture_list_id = NONE; + other_state->dead = true; + CaptureList *list = capture_list_pool_get_mut( + &self->capture_list_pool, + state->capture_list_id + ); + array_clear(list); + return list; + } else { + LOG(" ran out of capture lists"); + return NULL; + } + } + } + return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); +} + +static void ts_query_cursor__capture( + TSQueryCursor *self, + QueryState *state, + QueryStep *step, + TSNode node +) { + if (state->dead) return; + CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); + if (!capture_list) { + state->dead = true; + return; + } + + for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { + uint16_t capture_id = step->capture_ids[j]; + if (step->capture_ids[j] == NONE) break; + array_push(capture_list, ((TSQueryCapture) { node, capture_id })); + LOG( + " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", + ts_node_type(node), + state->pattern_index, + capture_id, + capture_list->size + ); + } +} + +// Duplicate the given state and insert the newly-created state immediately after +// the given state in the `states` array. Ensures that the given state reference is +// still valid, even if the states array is reallocated. +static QueryState *ts_query_cursor__copy_state( + TSQueryCursor *self, + QueryState **state_ref +) { + const QueryState *state = *state_ref; + uint32_t state_index = (uint32_t)(state - self->states.contents); + QueryState copy = *state; + copy.capture_list_id = NONE; + + // If the state has captures, copy its capture list. + if (state->capture_list_id != NONE) { + CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); + if (!new_captures) return NULL; + const CaptureList *old_captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + array_push_all(new_captures, old_captures); + } + + array_insert(&self->states, state_index + 1, copy); + *state_ref = &self->states.contents[state_index]; + return &self->states.contents[state_index + 1]; +} + +static inline bool ts_query_cursor__should_descend( + TSQueryCursor *self, + bool node_intersects_range +) { + + if (node_intersects_range && self->depth < self->max_start_depth) { + return true; + } + + // If there are in-progress matches whose remaining steps occur + // deeper in the tree, then descend. + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i];; + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if ( + next_step->depth != PATTERN_DONE_MARKER && + state->start_depth + next_step->depth > self->depth + ) { + return true; + } + } + + if (self->depth >= self->max_start_depth) { + return false; + } + + // If the current node is hidden, then a non-rooted pattern might match + // one if its roots inside of this node, and match another of its roots + // as part of a sibling node, so we may need to descend. + if (!self->on_visible_node) { + // Descending into a repetition node outside of the range can be + // expensive, because these nodes can have many visible children. + // Avoid descending into repetition nodes unless we have already + // determined that this query can match rootless patterns inside + // of this type of repetition node. + Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); + if (ts_subtree_is_repetition(subtree)) { + bool exists; + uint32_t index; + array_search_sorted_by( + &self->query->repeat_symbols_with_rootless_patterns,, + ts_subtree_symbol(subtree), + &index, + &exists + ); + return exists; + } + + return true; + } + + return false; +} + +// Walk the tree, processing patterns until at least one pattern finishes, +// If one or more patterns finish, return `true` and store their states in the +// `finished_states` array. Multiple patterns can finish on the same node. If +// there are no more matches, return `false`. +static inline bool ts_query_cursor__advance( + TSQueryCursor *self, + bool stop_on_definite_step +) { + bool did_match = false; + for (;;) { + if (self->halted) { + while (self->states.size > 0) { + QueryState state = array_pop(&self->states); + capture_list_pool_release( + &self->capture_list_pool, + state.capture_list_id + ); + } + } + + if (did_match || self->halted) return did_match; + + // Exit the current node. + if (self->ascending) { + if (self->on_visible_node) { + LOG( + "leave node. depth:%u, type:%s\n", + self->depth, + ts_node_type(ts_tree_cursor_current_node(&self->cursor)) + ); + + // After leaving a node, remove any states that cannot make further progress. + uint32_t deleted_count = 0; + for (unsigned i = 0, n = self->states.size; i < n; i++) { + QueryState *state = &self->states.contents[i]; + QueryStep *step = &self->query->steps.contents[state->step_index]; + + // If a state completed its pattern inside of this node, but was deferred from finishing + // in order to search for longer matches, mark it as finished. + if ( + step->depth == PATTERN_DONE_MARKER && + (state->start_depth > self->depth || self->depth == 0) + ) { + LOG(" finish pattern %u\n", state->pattern_index); + array_push(&self->finished_states, *state); + did_match = true; + deleted_count++; + } + + // If a state needed to match something within this node, then remove that state + // as it has failed to match. + else if ( + step->depth != PATTERN_DONE_MARKER && + (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth + ) { + LOG( + " failed to match. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + deleted_count++; + } + + else if (deleted_count > 0) { + self->states.contents[i - deleted_count] = *state; + } + } + self->states.size -= deleted_count; + } + + // Leave this node by stepping to its next sibling or to its parent. + switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { + case TreeCursorStepVisible: + if (!self->on_visible_node) { + self->depth++; + self->on_visible_node = true; + } + self->ascending = false; + break; + case TreeCursorStepHidden: + if (self->on_visible_node) { + self->depth--; + self->on_visible_node = false; + } + self->ascending = false; + break; + default: + if (ts_tree_cursor_goto_parent(&self->cursor)) { + self->depth--; + } else { + LOG("halt at root\n"); + self->halted = true; + } + } + } + + // Enter a new node. + else { + // Get the properties of the current node. + TSNode node = ts_tree_cursor_current_node(&self->cursor); + TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); + bool parent_precedes_range = !ts_node_is_null(parent_node) && ( + ts_node_end_byte(parent_node) <= self->start_byte || + point_lte(ts_node_end_point(parent_node), self->start_point) + ); + bool parent_follows_range = !ts_node_is_null(parent_node) && ( + ts_node_start_byte(parent_node) >= self->end_byte || + point_gte(ts_node_start_point(parent_node), self->end_point) + ); + bool node_precedes_range = parent_precedes_range || ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ); + bool node_follows_range = parent_follows_range || ( + ts_node_start_byte(node) >= self->end_byte || + point_gte(ts_node_start_point(node), self->end_point) + ); + bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; + bool node_intersects_range = !node_precedes_range && !node_follows_range; + + if (self->on_visible_node) { + TSSymbol symbol = ts_node_symbol(node); + bool is_named = ts_node_is_named(node); + bool has_later_siblings; + bool has_later_named_siblings; + bool can_have_later_siblings_with_this_field; + TSFieldId field_id = 0; + TSSymbol supertypes[8] = {0}; + unsigned supertype_count = 8; + ts_tree_cursor_current_status( + &self->cursor, + &field_id, + &has_later_siblings, + &has_later_named_siblings, + &can_have_later_siblings_with_this_field, + supertypes, + &supertype_count + ); + LOG( + "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", + self->depth, + ts_node_type(node), + ts_language_field_name_for_id(self->query->language, field_id), + ts_node_start_point(node).row, + self->states.size, + self->finished_states.size + ); + + bool node_is_error = symbol == ts_builtin_sym_error; + bool parent_is_error = + !ts_node_is_null(parent_node) && + ts_node_symbol(parent_node) == ts_builtin_sym_error; + + // Add new states for any patterns whose root node is a wildcard. + if (!node_is_error) { + for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + + // If this node matches the first step of the pattern, then add a new + // state at the start of this pattern. + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + if ( + (pattern->is_rooted ? + node_intersects_range : + (parent_intersects_range && !parent_is_error)) && + (!step->field || field_id == step->field) && + (!step->supertype_symbol || supertype_count > 0) && + (start_depth <= self->max_start_depth) + ) { + ts_query_cursor__add_state(self, pattern); + } + } + } + + // Add new states for any patterns whose root node matches this node. + unsigned i; + if (ts_query__pattern_map_search(self->query, symbol, &i)) { + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + do { + // If this node matches the first step of the pattern, then add a new + // state at the start of this pattern. + if ( + (pattern->is_rooted ? + node_intersects_range : + (parent_intersects_range && !parent_is_error)) && + (!step->field || field_id == step->field) && + (start_depth <= self->max_start_depth) + ) { + ts_query_cursor__add_state(self, pattern); + } + + // Advance to the next pattern whose root node matches this node. + i++; + if (i == self->query->pattern_map.size) break; + pattern = &self->query->pattern_map.contents[i]; + step = &self->query->steps.contents[pattern->step_index]; + } while (step->symbol == symbol); + } + + // Update all of the in-progress states with current node. + for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { + QueryState *state = &self->states.contents[j]; + QueryStep *step = &self->query->steps.contents[state->step_index]; + state->has_in_progress_alternatives = false; + copy_count = 0; + + // Check that the node matches all of the criteria for the next + // step of the pattern. + if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; + + // Determine if this node matches this step of the pattern, and also + // if this node can have later siblings that match this step of the + // pattern. + bool node_does_match = false; + if (step->symbol == WILDCARD_SYMBOL) { + node_does_match = !node_is_error && (is_named || !step->is_named); + } else { + node_does_match = symbol == step->symbol; + } + bool later_sibling_can_match = has_later_siblings; + if ((step->is_immediate && is_named) || state->seeking_immediate_match) { + later_sibling_can_match = false; + } + if (step->is_last_child && has_later_named_siblings) { + node_does_match = false; + } + if (step->supertype_symbol) { + bool has_supertype = false; + for (unsigned k = 0; k < supertype_count; k++) { + if (supertypes[k] == step->supertype_symbol) { + has_supertype = true; + break; + } + } + if (!has_supertype) node_does_match = false; + } + if (step->field) { + if (step->field == field_id) { + if (!can_have_later_siblings_with_this_field) { + later_sibling_can_match = false; + } + } else { + node_does_match = false; + } + } + + if (step->negated_field_list_id) { + TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; + for (;;) { + TSFieldId negated_field_id = *negated_field_ids; + if (negated_field_id) { + negated_field_ids++; + if (ts_node_child_by_field_id(node, negated_field_id).id) { + node_does_match = false; + break; + } + } else { + break; + } + } + } + + // Remove states immediately if it is ever clear that they cannot match. + if (!node_does_match) { + if (!later_sibling_can_match) { + LOG( + " discard state. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->states, j); + j--; + } + continue; + } + + // Some patterns can match their root node in multiple ways, capturing different + // children. If this pattern step could match later children within the same + // parent, then this query state cannot simply be updated in place. It must be + // split into two states: one that matches this node, and one which skips over + // this node, to preserve the possibility of matching later siblings. + if (later_sibling_can_match && ( + step->contains_captures || + ts_query__step_is_fallible(self->query, state->step_index) + )) { + if (ts_query_cursor__copy_state(self, &state)) { + LOG( + " split state for capture. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + copy_count++; + } + } + + // If this pattern started with a wildcard, such that the pattern map + // actually points to the *second* step of the pattern, then check + // that the node has a parent, and capture the parent node if necessary. + if (state->needs_parent) { + TSNode parent = ts_tree_cursor_parent_node(&self->cursor); + if (ts_node_is_null(parent)) { + LOG(" missing parent node\n"); + state->dead = true; + } else { + state->needs_parent = false; + QueryStep *skipped_wildcard_step = step; + do { + skipped_wildcard_step--; + } while ( + skipped_wildcard_step->is_dead_end || + skipped_wildcard_step->is_pass_through || + skipped_wildcard_step->depth > 0 + ); + if (skipped_wildcard_step->capture_ids[0] != NONE) { + LOG(" capture wildcard parent\n"); + ts_query_cursor__capture( + self, + state, + skipped_wildcard_step, + parent + ); + } + } + } + + // If the current node is captured in this pattern, add it to the capture list. + if (step->capture_ids[0] != NONE) { + ts_query_cursor__capture(self, state, step, node); + } + + if (state->dead) { + array_erase(&self->states, j); + j--; + continue; + } + + // Advance this state to the next step of its pattern. + state->step_index++; + state->seeking_immediate_match = false; + LOG( + " advance state. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; + + // If this state's next step has an alternative step, then copy the state in order + // to pursue both alternatives. The alternative step itself may have an alternative, + // so this is an interactive process. + unsigned end_index = j + 1; + for (unsigned k = j; k < end_index; k++) { + QueryState *child_state = &self->states.contents[k]; + QueryStep *child_step = &self->query->steps.contents[child_state->step_index]; + if (child_step->alternative_index != NONE) { + // A "dead-end" step exists only to add a non-sequential jump into the step sequence, + // via its alternative index. When a state reaches a dead-end step, it jumps straight + // to the step's alternative. + if (child_step->is_dead_end) { + child_state->step_index = child_step->alternative_index; + k--; + continue; + } + + // A "pass-through" step exists only to add a branch into the step sequence, + // via its alternative_index. When a state reaches a pass-through step, it splits + // in order to process the alternative step, and then it advances to the next step. + if (child_step->is_pass_through) { + child_state->step_index++; + k--; + } + + QueryState *copy = ts_query_cursor__copy_state(self, &child_state); + if (copy) { + LOG( + " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", + copy->pattern_index, + copy->step_index, + next_step->alternative_index, + next_step->alternative_is_immediate, + capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size + ); + end_index++; + copy_count++; + copy->step_index = child_step->alternative_index; + if (child_step->alternative_is_immediate) { + copy->seeking_immediate_match = true; + } + } + } + } + } + + for (unsigned j = 0; j < self->states.size; j++) { + QueryState *state = &self->states.contents[j]; + if (state->dead) { + array_erase(&self->states, j); + j--; + continue; + } + + // Enforce the longest-match criteria. When a query pattern contains optional or + // repeated nodes, this is necessary to avoid multiple redundant states, where + // one state has a strict subset of another state's captures. + bool did_remove = false; + for (unsigned k = j + 1; k < self->states.size; k++) { + QueryState *other_state = &self->states.contents[k]; + + // Query states are kept in ascending order of start_depth and pattern_index. + // Since the longest-match criteria is only used for deduping matches of the same + // pattern and root node, we only need to perform pairwise comparisons within a + // small slice of the states array. + if ( + other_state->start_depth != state->start_depth || + other_state->pattern_index != state->pattern_index + ) break; + + bool left_contains_right, right_contains_left; + ts_query_cursor__compare_captures( + self, + state, + other_state, + &left_contains_right, + &right_contains_left + ); + if (left_contains_right) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); + array_erase(&self->states, k); + k--; + continue; + } + other_state->has_in_progress_alternatives = true; + } + if (right_contains_left) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); + array_erase(&self->states, j); + j--; + did_remove = true; + break; + } + state->has_in_progress_alternatives = true; + } + } + + // If the state is at the end of its pattern, remove it from the list + // of in-progress states and add it to the list of finished states. + if (!did_remove) { + LOG( + " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", + state->pattern_index, + state->start_depth, + state->step_index, + capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size + ); + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (next_step->depth == PATTERN_DONE_MARKER) { + if (state->has_in_progress_alternatives) { + LOG(" defer finishing pattern %u\n", state->pattern_index); + } else { + LOG(" finish pattern %u\n", state->pattern_index); + array_push(&self->finished_states, *state); + array_erase(&self->states, (uint32_t)(state - self->states.contents)); + did_match = true; + j--; + } + } + } + } + } + + if (ts_query_cursor__should_descend(self, node_intersects_range)) { + switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { + case TreeCursorStepVisible: + self->depth++; + self->on_visible_node = true; + continue; + case TreeCursorStepHidden: + self->on_visible_node = false; + continue; + default: + break; + } + } + + self->ascending = true; + } + } +} + +bool ts_query_cursor_next_match( + TSQueryCursor *self, + TSQueryMatch *match +) { + if (self->finished_states.size == 0) { + if (!ts_query_cursor__advance(self, false)) { + return false; + } + } + + QueryState *state = &self->finished_states.contents[0]; + if (state->id == UINT32_MAX) state->id = self->next_state_id++; + match->id = state->id; + match->pattern_index = state->pattern_index; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + match->captures = captures->contents; + match->capture_count = captures->size; + capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); + array_erase(&self->finished_states, 0); + return true; +} + +void ts_query_cursor_remove_match( + TSQueryCursor *self, + uint32_t match_id +) { + for (unsigned i = 0; i < self->finished_states.size; i++) { + const QueryState *state = &self->finished_states.contents[i]; + if (state->id == match_id) { + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->finished_states, i); + return; + } + } + + // Remove unfinished query states as well to prevent future + // captures for a match being removed. + for (unsigned i = 0; i < self->states.size; i++) { + const QueryState *state = &self->states.contents[i]; + if (state->id == match_id) { + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->states, i); + return; + } + } +} + +bool ts_query_cursor_next_capture( + TSQueryCursor *self, + TSQueryMatch *match, + uint32_t *capture_index +) { + // The goal here is to return captures in order, even though they may not + // be discovered in order, because patterns can overlap. Search for matches + // until there is a finished capture that is before any unfinished capture. + for (;;) { + // First, find the earliest capture in an unfinished match. + uint32_t first_unfinished_capture_byte; + uint32_t first_unfinished_pattern_index; + uint32_t first_unfinished_state_index; + bool first_unfinished_state_is_definite = false; + ts_query_cursor__first_in_progress_capture( + self, + &first_unfinished_state_index, + &first_unfinished_capture_byte, + &first_unfinished_pattern_index, + &first_unfinished_state_is_definite + ); + + // Then find the earliest capture in a finished match. It must occur + // before the first capture in an *unfinished* match. + QueryState *first_finished_state = NULL; + uint32_t first_finished_capture_byte = first_unfinished_capture_byte; + uint32_t first_finished_pattern_index = first_unfinished_pattern_index; + for (unsigned i = 0; i < self->finished_states.size;) { + QueryState *state = &self->finished_states.contents[i]; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + + // Remove states whose captures are all consumed. + if (state->consumed_capture_count >= captures->size) { + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->finished_states, i); + continue; + } + + TSNode node = captures->contents[state->consumed_capture_count].node; + + bool node_precedes_range = ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ); + bool node_follows_range = ( + ts_node_start_byte(node) >= self->end_byte || + point_gte(ts_node_start_point(node), self->end_point) + ); + bool node_outside_of_range = node_precedes_range || node_follows_range; + + // Skip captures that are outside of the cursor's range. + if (node_outside_of_range) { + state->consumed_capture_count++; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if ( + node_start_byte < first_finished_capture_byte || + ( + node_start_byte == first_finished_capture_byte && + state->pattern_index < first_finished_pattern_index + ) + ) { + first_finished_state = state; + first_finished_capture_byte = node_start_byte; + first_finished_pattern_index = state->pattern_index; + } + i++; + } + + // If there is finished capture that is clearly before any unfinished + // capture, then return its match, and its capture index. Internally + // record the fact that the capture has been 'consumed'. + QueryState *state; + if (first_finished_state) { + state = first_finished_state; + } else if (first_unfinished_state_is_definite) { + state = &self->states.contents[first_unfinished_state_index]; + } else { + state = NULL; + } + + if (state) { + if (state->id == UINT32_MAX) state->id = self->next_state_id++; + match->id = state->id; + match->pattern_index = state->pattern_index; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + match->captures = captures->contents; + match->capture_count = captures->size; + *capture_index = state->consumed_capture_count; + state->consumed_capture_count++; + return true; + } + + if (capture_list_pool_is_empty(&self->capture_list_pool)) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + first_unfinished_state_index, + first_unfinished_pattern_index, + first_unfinished_capture_byte + ); + capture_list_pool_release( + &self->capture_list_pool, + self->states.contents[first_unfinished_state_index].capture_list_id + ); + array_erase(&self->states, first_unfinished_state_index); + } + + // If there are no finished matches that are ready to be returned, then + // continue finding more matches. + if ( + !ts_query_cursor__advance(self, true) && + self->finished_states.size == 0 + ) return false; + } +} + +void ts_query_cursor_set_max_start_depth( + TSQueryCursor *self, + uint32_t max_start_depth +) { + self->max_start_depth = max_start_depth; +} + +#undef LOG diff --git a/parser/src/reduce_action.h b/parser/src/reduce_action.h new file mode 100644 index 00000000..2d95b8bc --- /dev/null +++ b/parser/src/reduce_action.h @@ -0,0 +1,34 @@ +#ifndef TREE_SITTER_REDUCE_ACTION_H_ +#define TREE_SITTER_REDUCE_ACTION_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./array.h" +#include "./api.h" + +typedef struct { + uint32_t count; + TSSymbol symbol; + int dynamic_precedence; + unsigned short production_id; +} ReduceAction; + +typedef Array(ReduceAction) ReduceActionSet; + +static inline void ts_reduce_action_set_add(ReduceActionSet *self, + ReduceAction new_action) { + for (uint32_t i = 0; i < self->size; i++) { + ReduceAction action = self->contents[i]; + if (action.symbol == new_action.symbol && action.count == new_action.count) + return; + } + array_push(self, new_action); +} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_REDUCE_ACTION_H_ diff --git a/parser/src/reusable_node.h b/parser/src/reusable_node.h index 28268cdf..63fe3c1a 100644 --- a/parser/src/reusable_node.h +++ b/parser/src/reusable_node.h @@ -2,8 +2,8 @@ typedef struct { Subtree tree; - t_u32 child_index; - t_u32 byte_offset; + uint32_t child_index; + uint32_t byte_offset; } StackEntry; typedef struct { @@ -26,7 +26,7 @@ static inline Subtree reusable_node_tree(ReusableNode *self) { : NULL_SUBTREE; } -static inline t_u32 reusable_node_byte_offset(ReusableNode *self) { +static inline uint32_t reusable_node_byte_offset(ReusableNode *self) { return self->stack.size > 0 ? self->stack.contents[self->stack.size - 1].byte_offset : UINT32_MAX; @@ -38,13 +38,13 @@ static inline void reusable_node_delete(ReusableNode *self) { static inline void reusable_node_advance(ReusableNode *self) { StackEntry last_entry = *array_back(&self->stack); - t_u32 byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); + uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); if (ts_subtree_has_external_tokens(last_entry.tree)) { self->last_external_token = ts_subtree_last_external_token(last_entry.tree); } Subtree tree; - t_u32 next_index; + uint32_t next_index; do { StackEntry popped_entry = array_pop(&self->stack); next_index = popped_entry.child_index + 1; diff --git a/parser/src/scanner.c b/parser/src/scanner.c index 4657955c..a63963cf 100644 --- a/parser/src/scanner.c +++ b/parser/src/scanner.c @@ -1,181 +1,157 @@ -#include "me/mem/mem_alloc.h" -#include "me/types.h" -#include "me/vec/vec_parser_heredoc.h" -#include "parser/types/types_lexer.h" -#include "parser/types/types_scanner_ctx.h" +#include "./array.h" +#include "./parser.h" #include #include #include +#include -#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - -enum TokenType -{ - HEREDOC_START, - SIMPLE_HEREDOC_BODY, - HEREDOC_BODY_BEGINNING, - HEREDOC_CONTENT, - HEREDOC_END, - FILE_DESCRIPTOR, - EMPTY_VALUE, - CONCAT, - VARIABLE_NAME, - TEST_OPERATOR, - REGEX, - REGEX_NO_SLASH, - REGEX_NO_SPACE, - EXPANSION_WORD, - EXTGLOB_PATTERN, - BARE_DOLLAR, - BRACE_START, - IMMEDIATE_DOUBLE_HASH, - EXTERNAL_EXPANSION_SYM_HASH, - EXTERNAL_EXPANSION_SYM_BANG, - EXTERNAL_EXPANSION_SYM_EQUAL, - CLOSING_BRACE, - CLOSING_BRACKET, - HEREDOC_ARROW, - HEREDOC_ARROW_DASH, - NEWLINE, - OPENING_PAREN, - ESAC, - ERROR_RECOVERY, +enum TokenType { + HEREDOC_START, + SIMPLE_HEREDOC_BODY, + HEREDOC_BODY_BEGINNING, + HEREDOC_CONTENT, + HEREDOC_END, + FILE_DESCRIPTOR, + EMPTY_VALUE, + CONCAT, + VARIABLE_NAME, + TEST_OPERATOR, + REGEX, + REGEX_NO_SLASH, + REGEX_NO_SPACE, + EXPANSION_WORD, + EXTGLOB_PATTERN, + BARE_DOLLAR, + BRACE_START, + IMMEDIATE_DOUBLE_HASH, + EXTERNAL_EXPANSION_SYM_HASH, + EXTERNAL_EXPANSION_SYM_BANG, + EXTERNAL_EXPANSION_SYM_EQUAL, + CLOSING_BRACE, + CLOSING_BRACKET, + HEREDOC_ARROW, + HEREDOC_ARROW_DASH, + NEWLINE, + OPENING_PAREN, + ESAC, + ERROR_RECOVERY, }; -static inline t_heredoc heredoc_new(void) -{ - return ((t_heredoc){ - .is_raw = false, - .started = false, - .allows_indent = false, - .delimiter = alloc_new_buffer(0), - .current_leading_word = alloc_new_buffer(0), - }); +typedef Array(char) String; + +typedef struct { + bool is_raw; + bool started; + bool allows_indent; + String delimiter; + String current_leading_word; +} Heredoc; + +#define heredoc_new() \ + { \ + .is_raw = false, \ + .started = false, \ + .allows_indent = false, \ + .delimiter = array_new(), \ + .current_leading_word = array_new(), \ + }; + +typedef struct { + uint8_t last_glob_paren_depth; + bool ext_was_in_double_quote; + bool ext_saw_outside_quote; + Array(Heredoc) heredocs; +} Scanner; + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static inline bool in_error_recovery(const bool *valid_symbols) { return valid_symbols[ERROR_RECOVERY]; } + +static inline void reset_string(String *string) { + if (string->size > 0) { + memset(string->contents, 0, string->size); + array_clear(string); + } } -static inline void advance(t_lexer *lexer) -{ - lexer->advance(lexer, false); +static inline void reset_heredoc(Heredoc *heredoc) { + heredoc->is_raw = false; + heredoc->started = false; + heredoc->allows_indent = false; + reset_string(&heredoc->delimiter); } -static inline void skip(t_lexer *lexer) -{ - lexer->advance(lexer, true); +static inline void reset(Scanner *scanner) { + for (uint32_t i = 0; i < scanner->heredocs.size; i++) { + reset_heredoc(array_get(&scanner->heredocs, i)); + } } -static inline bool in_error_recovery(const bool *valid_symbols) -{ - return valid_symbols[ERROR_RECOVERY]; +static unsigned serialize(Scanner *scanner, char *buffer) { + uint32_t size = 0; + + buffer[size++] = (char)scanner->last_glob_paren_depth; + buffer[size++] = (char)scanner->ext_was_in_double_quote; + buffer[size++] = (char)scanner->ext_saw_outside_quote; + buffer[size++] = (char)scanner->heredocs.size; + + for (uint32_t i = 0; i < scanner->heredocs.size; i++) { + Heredoc *heredoc = array_get(&scanner->heredocs, i); + if (heredoc->delimiter.size + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + return 0; + } + + buffer[size++] = (char)heredoc->is_raw; + buffer[size++] = (char)heredoc->started; + buffer[size++] = (char)heredoc->allows_indent; + + memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(uint32_t)); + size += sizeof(uint32_t); + if (heredoc->delimiter.size > 0) { + memcpy(&buffer[size], heredoc->delimiter.contents, heredoc->delimiter.size); + size += heredoc->delimiter.size; + } + } + return size; } -static inline void reset_string(t_buffer_str *string) -{ - if (string->len > 0) - { - memset(string->buf, 0, string->len); - string->len = 0; - } -} +static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { + if (length == 0) { + reset(scanner); + } else { + uint32_t size = 0; + scanner->last_glob_paren_depth = buffer[size++]; + scanner->ext_was_in_double_quote = buffer[size++]; + scanner->ext_saw_outside_quote = buffer[size++]; + uint32_t heredoc_count = (unsigned char)buffer[size++]; + for (uint32_t i = 0; i < heredoc_count; i++) { + Heredoc *heredoc = NULL; + if (i < scanner->heredocs.size) { + heredoc = array_get(&scanner->heredocs, i); + } else { + Heredoc new_heredoc = heredoc_new(); + array_push(&scanner->heredocs, new_heredoc); + heredoc = array_back(&scanner->heredocs); + } -static inline void reset_heredoc(t_heredoc *heredoc) -{ - heredoc->is_raw = false; - heredoc->started = false; - heredoc->allows_indent = false; - reset_string(&heredoc->delimiter); -} + heredoc->is_raw = buffer[size++]; + heredoc->started = buffer[size++]; + heredoc->allows_indent = buffer[size++]; -static inline void reset(t_scanner_ctx *scanner) -{ - t_u32 i; + memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t)); + size += sizeof(uint32_t); + array_reserve(&heredoc->delimiter, heredoc->delimiter.size); - i = 0; - while (i < scanner->heredocs.len) - { - reset_heredoc(&scanner->heredocs.buffer[i]); - i++; - } -} - -static unsigned serialize(t_scanner_ctx *scanner, char *buffer) -{ - t_u32 size; - t_u32 i; - t_heredoc *heredoc; - - size = 0; - buffer[size++] = (char)scanner->last_glob_paren_depth; - buffer[size++] = (char)scanner->ext_was_in_double_quote; - buffer[size++] = (char)scanner->ext_saw_outside_quote; - buffer[size++] = (char)scanner->heredocs.len; - i = 0; - while (i < scanner->heredocs.len) - { - heredoc = &scanner->heredocs.buffer[i]; - if (heredoc->delimiter.len + 3 + size >= - TREE_SITTER_SERIALIZATION_BUFFER_SIZE) - return 0; - buffer[size++] = (char)heredoc->is_raw; - buffer[size++] = (char)heredoc->started; - buffer[size++] = (char)heredoc->allows_indent; - memcpy(&buffer[size], &heredoc->delimiter.len, sizeof(t_u32)); - size += sizeof(t_u32); - if (heredoc->delimiter.len > 0) - { - memcpy(&buffer[size], heredoc->delimiter.buf, - heredoc->delimiter.len); - size += heredoc->delimiter.len; - } - i++; - } - return size; -} - -static void deserialize(t_scanner_ctx *scanner, const char *buffer, - unsigned length) -{ - t_u32 size; - t_u32 heredoc_count; - t_heredoc *heredoc; - t_u32 i; - - size = 0; - if (length == 0) - reset(scanner); - else - { - scanner->last_glob_paren_depth = buffer[size++]; - scanner->ext_was_in_double_quote = buffer[size++]; - scanner->ext_saw_outside_quote = buffer[size++]; - heredoc_count = (unsigned char)buffer[size++]; - i = 0; - while (i < heredoc_count) - { - heredoc = NULL; - if (i < scanner->heredocs.len) - heredoc = &scanner->heredocs.buffer[i]; - else - { - vec_parser_heredoc_push(&scanner->heredocs, heredoc_new()); - heredoc = &scanner->heredocs.buffer[scanner->heredocs.len - 1]; - } - heredoc->is_raw = buffer[size++]; - heredoc->started = buffer[size++]; - heredoc->allows_indent = buffer[size++]; - memcpy(&heredoc->delimiter.len, &buffer[size], sizeof(t_u32)); - size += sizeof(t_u32); - str_reserve(&heredoc->delimiter, heredoc->delimiter.len); - if (heredoc->delimiter.len > 0) - { - memcpy(heredoc->delimiter.buf, &buffer[size], - heredoc->delimiter.len); - size += heredoc->delimiter.len; - } - i++; - } - assert(size == length); - } + if (heredoc->delimiter.size > 0) { + memcpy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size); + size += heredoc->delimiter.size; + } + } + assert(size == length); + } } /** @@ -185,1320 +161,1057 @@ static void deserialize(t_scanner_ctx *scanner, const char *buffer, * POSIX-mandated substitution, and assumes the default value for * IFS. */ -static bool advance_word(t_lexer *lexer, t_buffer_str *unquoted_word) -{ - bool empty; - t_i32 quote; +static bool advance_word(TSLexer *lexer, String *unquoted_word) { + bool empty = true; - quote = 0; - empty = true; - if (lexer->lookahead == '\'' || lexer->lookahead == '"') - (quote = lexer->lookahead, advance(lexer)); - while (lexer->lookahead && - !((quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || - lexer->lookahead == '\n' - : isspace(lexer->lookahead)))) - { - if (lexer->lookahead == '\\') - { - advance(lexer); - if (!lexer->lookahead) - return (false); - } - empty = false; - push_str_char(unquoted_word, lexer->lookahead); - advance(lexer); - } - if (quote && lexer->lookahead == quote) - advance(lexer); - return (!empty); + int32_t quote = 0; + if (lexer->lookahead == '\'' || lexer->lookahead == '"') { + quote = lexer->lookahead; + advance(lexer); + } + + while (lexer->lookahead && + !(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n' + : iswspace(lexer->lookahead))) { + if (lexer->lookahead == '\\') { + advance(lexer); + if (!lexer->lookahead) { + return false; + } + } + empty = false; + array_push(unquoted_word, lexer->lookahead); + advance(lexer); + } + array_push(unquoted_word, '\0'); + + if (quote && lexer->lookahead == quote) { + advance(lexer); + } + + return !empty; } -static inline bool scan_bare_dollar(t_lexer *lexer) -{ - while (isspace(lexer->lookahead) && lexer->lookahead != '\n' && - !lexer->eof(lexer)) - skip(lexer); +static inline bool scan_bare_dollar(TSLexer *lexer) { + while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer)) { + skip(lexer); + } - if (lexer->lookahead == '$') - { - advance(lexer); - lexer->result_symbol = BARE_DOLLAR; - lexer->mark_end(lexer); - return (isspace(lexer->lookahead) || lexer->eof(lexer) || - lexer->lookahead == '\"'); - } + if (lexer->lookahead == '$') { + advance(lexer); + lexer->result_symbol = BARE_DOLLAR; + lexer->mark_end(lexer); + return iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"'; + } - return (false); + return false; } -static bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer) -{ - bool found_delimiter; +static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } - while (isspace(lexer->lookahead)) - skip(lexer); - lexer->result_symbol = HEREDOC_START; - heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || - lexer->lookahead == '\\'; - found_delimiter = advance_word(lexer, &heredoc->delimiter); - if (!found_delimiter) - { - reset_string(&heredoc->delimiter); - return false; - } - return found_delimiter; + lexer->result_symbol = HEREDOC_START; + heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\'; + + bool found_delimiter = advance_word(lexer, &heredoc->delimiter); + if (!found_delimiter) { + reset_string(&heredoc->delimiter); + return false; + } + return found_delimiter; } -static bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer) -{ - reset_string(&heredoc->current_leading_word); - // Scan the first 'n' characters on this line, to see if they match the - // heredoc delimiter - t_i32 size; - - size = 0; - if (heredoc->delimiter.len > 0) - { - while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && - (t_i32) * (&heredoc->delimiter.buf[size]) == lexer->lookahead && - heredoc->current_leading_word.len < heredoc->delimiter.len) - { - push_str_char(&heredoc->current_leading_word, lexer->lookahead); - advance(lexer); - size++; - } - } - return heredoc->delimiter.len == 0 - ? false - : strcmp(heredoc->current_leading_word.buf, - heredoc->delimiter.buf) == 0; +static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer) { + reset_string(&heredoc->current_leading_word); + // Scan the first 'n' characters on this line, to see if they match the + // heredoc delimiter + int32_t size = 0; + if (heredoc->delimiter.size > 0) { + while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && + (int32_t)*array_get(&heredoc->delimiter, size) == lexer->lookahead && + heredoc->current_leading_word.size < heredoc->delimiter.size) { + array_push(&heredoc->current_leading_word, lexer->lookahead); + advance(lexer); + size++; + } + } + array_push(&heredoc->current_leading_word, '\0'); + return heredoc->delimiter.size == 0 + ? false + : strcmp(heredoc->current_leading_word.contents, heredoc->delimiter.contents) == 0; } -static bool scan_heredoc_content(t_scanner_ctx *scanner, t_lexer *lexer, - enum TokenType middle_type, - enum TokenType end_type) -{ - bool did_advance = false; - t_heredoc *heredoc = (&scanner->heredocs.buffer[scanner->heredocs.len - 1]); +static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, enum TokenType middle_type, + enum TokenType end_type) { + bool did_advance = false; + Heredoc *heredoc = array_back(&scanner->heredocs); - for (;;) - { - switch (lexer->lookahead) - { - case '\0': { - if (lexer->eof(lexer) && did_advance) - { - reset_heredoc(heredoc); - lexer->result_symbol = end_type; - return true; - } - return false; - } + for (;;) { + switch (lexer->lookahead) { + case '\0': { + if (lexer->eof(lexer) && did_advance) { + reset_heredoc(heredoc); + lexer->result_symbol = end_type; + return true; + } + return false; + } - case '\\': { - did_advance = true; - advance(lexer); - advance(lexer); - break; - } + case '\\': { + did_advance = true; + advance(lexer); + advance(lexer); + break; + } - case '$': { - if (heredoc->is_raw) - { - did_advance = true; - advance(lexer); - break; - } - if (did_advance) - { - lexer->mark_end(lexer); - lexer->result_symbol = middle_type; - heredoc->started = true; - advance(lexer); - if (isalpha(lexer->lookahead) || lexer->lookahead == '{' || - lexer->lookahead == '(') - { - return true; - } - break; - } - if (middle_type == HEREDOC_BODY_BEGINNING && - lexer->get_column(lexer) == 0) - { - lexer->result_symbol = middle_type; - heredoc->started = true; - return true; - } - return false; - } + case '$': { + if (heredoc->is_raw) { + did_advance = true; + advance(lexer); + break; + } + if (did_advance) { + lexer->mark_end(lexer); + lexer->result_symbol = middle_type; + heredoc->started = true; + advance(lexer); + if (iswalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(') { + return true; + } + break; + } + if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0) { + lexer->result_symbol = middle_type; + heredoc->started = true; + return true; + } + return false; + } - case '\n': { - if (!did_advance) - { - skip(lexer); - } - else - { - advance(lexer); - } - did_advance = true; - if (heredoc->allows_indent) - { - while (isspace(lexer->lookahead)) - { - advance(lexer); - } - } - lexer->result_symbol = heredoc->started ? middle_type : end_type; - lexer->mark_end(lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - if (lexer->result_symbol == HEREDOC_END) - vec_parser_heredoc_pop(&scanner->heredocs, NULL); - return true; - } - break; - } + case '\n': { + if (!did_advance) { + skip(lexer); + } else { + advance(lexer); + } + did_advance = true; + if (heredoc->allows_indent) { + while (iswspace(lexer->lookahead)) { + advance(lexer); + } + } + lexer->result_symbol = heredoc->started ? middle_type : end_type; + lexer->mark_end(lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + if (lexer->result_symbol == HEREDOC_END) { + array_pop(&scanner->heredocs); + } + return true; + } + break; + } - default: { - if (lexer->get_column(lexer) == 0) - { - // an alternative is to check the starting column of the - // heredoc body and track that statefully - while (isspace(lexer->lookahead)) - { - if (did_advance) - { - advance(lexer); - } - else - { - skip(lexer); - } - } - if (end_type != SIMPLE_HEREDOC_BODY) - { - lexer->result_symbol = middle_type; - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - return true; - } - } - if (end_type == SIMPLE_HEREDOC_BODY) - { - lexer->result_symbol = end_type; - lexer->mark_end(lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - return true; - } - } - } - did_advance = true; - advance(lexer); - break; - } - } - } -} -static bool regex_scan(t_scanner_ctx *scanner, t_lexer *lexer, - const bool *valid_symbols) -{ - (void)(scanner); - - if ((valid_symbols[REGEX] || valid_symbols[REGEX_NO_SLASH] || - valid_symbols[REGEX_NO_SPACE]) && - !in_error_recovery(valid_symbols)) - { - if (valid_symbols[REGEX] || valid_symbols[REGEX_NO_SPACE]) - { - while (isspace(lexer->lookahead)) - { - skip(lexer); - } - } - - if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || - ((lexer->lookahead == '$' || lexer->lookahead == '\'') && - valid_symbols[REGEX_NO_SLASH]) || - (lexer->lookahead == '\'' && valid_symbols[REGEX_NO_SPACE])) - { - typedef struct - { - bool done; - bool advanced_once; - bool found_non_alnumdollarunderdash; - bool last_was_escape; - bool in_single_quote; - t_u32 paren_depth; - t_u32 bracket_depth; - t_u32 brace_depth; - } State; - - if (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH]) - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '(') - { - return false; - } - } - - lexer->mark_end(lexer); - - State state = {false, false, false, false, false, 0, 0, 0}; - while (!state.done) - { - if (state.in_single_quote) - { - if (lexer->lookahead == '\'') - { - state.in_single_quote = false; - advance(lexer); - lexer->mark_end(lexer); - } - } - switch (lexer->lookahead) - { - case '\\': - state.last_was_escape = true; - break; - case '\0': - return false; - case '(': - state.paren_depth++; - state.last_was_escape = false; - break; - case '[': - state.bracket_depth++; - state.last_was_escape = false; - break; - case '{': - if (!state.last_was_escape) - { - state.brace_depth++; - } - state.last_was_escape = false; - break; - case ')': - if (state.paren_depth == 0) - { - state.done = true; - } - state.paren_depth--; - state.last_was_escape = false; - break; - case ']': - if (state.bracket_depth == 0) - { - state.done = true; - } - state.bracket_depth--; - state.last_was_escape = false; - break; - case '}': - if (state.brace_depth == 0) - { - state.done = true; - } - state.brace_depth--; - state.last_was_escape = false; - break; - case '\'': - // Enter or exit a single-quoted string. - state.in_single_quote = !state.in_single_quote; - advance(lexer); - state.advanced_once = true; - state.last_was_escape = false; - continue; - default: - state.last_was_escape = false; - break; - } - - if (!state.done) - { - if (valid_symbols[REGEX]) - { - bool was_space = - !state.in_single_quote && isspace(lexer->lookahead); - advance(lexer); - state.advanced_once = true; - if (!was_space || state.paren_depth > 0) - { - lexer->mark_end(lexer); - } - } - else if (valid_symbols[REGEX_NO_SLASH]) - { - if (lexer->lookahead == '/') - { - lexer->mark_end(lexer); - lexer->result_symbol = REGEX_NO_SLASH; - return state.advanced_once; - } - if (lexer->lookahead == '\\') - { - advance(lexer); - state.advanced_once = true; - if (!lexer->eof(lexer) && lexer->lookahead != '[' && - lexer->lookahead != '/') - { - advance(lexer); - lexer->mark_end(lexer); - } - } - else - { - bool was_space = !state.in_single_quote && - isspace(lexer->lookahead); - advance(lexer); - state.advanced_once = true; - if (!was_space) - { - lexer->mark_end(lexer); - } - } - } - else if (valid_symbols[REGEX_NO_SPACE]) - { - if (lexer->lookahead == '\\') - { - state.found_non_alnumdollarunderdash = true; - advance(lexer); - if (!lexer->eof(lexer)) - { - advance(lexer); - } - } - else if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - // do not parse a command - // substitution - if (lexer->lookahead == '(') - { - return false; - } - // end $ always means regex, e.g. - // 99999999$ - if (isspace(lexer->lookahead)) - { - lexer->result_symbol = REGEX_NO_SPACE; - lexer->mark_end(lexer); - return true; - } - } - else - { - bool was_space = !state.in_single_quote && - isspace(lexer->lookahead); - if (was_space && state.paren_depth == 0) - { - lexer->mark_end(lexer); - lexer->result_symbol = REGEX_NO_SPACE; - return state.found_non_alnumdollarunderdash; - } - if (!isalnum(lexer->lookahead) && - lexer->lookahead != '$' && - lexer->lookahead != '-' && - lexer->lookahead != '_') - { - state.found_non_alnumdollarunderdash = true; - } - advance(lexer); - } - } - } - } - - lexer->result_symbol = - valid_symbols[REGEX_NO_SLASH] ? REGEX_NO_SLASH - : valid_symbols[REGEX_NO_SPACE] ? REGEX_NO_SPACE - : REGEX; - if (valid_symbols[REGEX] && !state.advanced_once) - return (false); - return (true); - } - } - return (false); + default: { + if (lexer->get_column(lexer) == 0) { + // an alternative is to check the starting column of the + // heredoc body and track that statefully + while (iswspace(lexer->lookahead)) { + if (did_advance) { + advance(lexer); + } else { + skip(lexer); + } + } + if (end_type != SIMPLE_HEREDOC_BODY) { + lexer->result_symbol = middle_type; + if (scan_heredoc_end_identifier(heredoc, lexer)) { + return true; + } + } + if (end_type == SIMPLE_HEREDOC_BODY) { + lexer->result_symbol = end_type; + lexer->mark_end(lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + return true; + } + } + } + did_advance = true; + advance(lexer); + break; + } + } + } } -static bool extglob_pattern_scan(t_scanner_ctx *scanner, t_lexer *lexer, - const bool *valid_symbols) -{ - if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) - { - // first skip ws, then check for ? * + @ ! - while (isspace(lexer->lookahead)) - { - skip(lexer); - } +static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { + if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) { + if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) || lexer->lookahead == '>' || + lexer->lookahead == '<' || lexer->lookahead == ')' || lexer->lookahead == '(' || + lexer->lookahead == ';' || lexer->lookahead == '&' || lexer->lookahead == '|' || + (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) || + (lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET]))) { + lexer->result_symbol = CONCAT; + // So for a`b`, we want to return a concat. We check if the + // 2nd backtick has whitespace after it, and if it does we + // return concat. + if (lexer->lookahead == '`') { + lexer->mark_end(lexer); + advance(lexer); + while (lexer->lookahead != '`' && !lexer->eof(lexer)) { + advance(lexer); + } + if (lexer->eof(lexer)) { + return false; + } + if (lexer->lookahead == '`') { + advance(lexer); + } + return iswspace(lexer->lookahead) || lexer->eof(lexer); + } + // strings w/ expansions that contains escaped quotes or + // backslashes need this to return a concat + if (lexer->lookahead == '\\') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\') { + return true; + } + if (lexer->eof(lexer)) { + return false; + } + } else { + return true; + } + } + if (iswspace(lexer->lookahead) && valid_symbols[CLOSING_BRACE] && !valid_symbols[EXPANSION_WORD]) { + lexer->result_symbol = CONCAT; + return true; + } + } - if (lexer->lookahead == '?' || lexer->lookahead == '*' || - lexer->lookahead == '+' || lexer->lookahead == '@' || - lexer->lookahead == '!' || lexer->lookahead == '-' || - lexer->lookahead == ')' || lexer->lookahead == '\\' || - lexer->lookahead == '.' || lexer->lookahead == '[' || - (isalpha(lexer->lookahead))) - { - if (lexer->lookahead == '\\') - { - advance(lexer); - if ((isspace(lexer->lookahead) || lexer->lookahead == '"') && - lexer->lookahead != '\r' && lexer->lookahead != '\n') - { - advance(lexer); - } - else - { - return false; - } - } + if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols)) { + // advance two # and ensure not } after + if (lexer->lookahead == '#') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '#') { + advance(lexer); + if (lexer->lookahead != '}') { + lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; + lexer->mark_end(lexer); + return true; + } + } + } + } - if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) - { - lexer->mark_end(lexer); - advance(lexer); + if (valid_symbols[EXTERNAL_EXPANSION_SYM_HASH] && !in_error_recovery(valid_symbols)) { + if (lexer->lookahead == '#' || lexer->lookahead == '=' || lexer->lookahead == '!') { + lexer->result_symbol = lexer->lookahead == '#' ? EXTERNAL_EXPANSION_SYM_HASH + : lexer->lookahead == '!' ? EXTERNAL_EXPANSION_SYM_BANG + : EXTERNAL_EXPANSION_SYM_EQUAL; + advance(lexer); + lexer->mark_end(lexer); + while (lexer->lookahead == '#' || lexer->lookahead == '=' || lexer->lookahead == '!') { + advance(lexer); + } + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (lexer->lookahead == '}') { + return true; + } + return false; + } + } - if (isspace(lexer->lookahead)) - { - return false; - } - } + if (valid_symbols[EMPTY_VALUE]) { + if (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&') { + lexer->result_symbol = EMPTY_VALUE; + return true; + } + } - lexer->mark_end(lexer); - bool was_non_alpha = !isalpha(lexer->lookahead); - if (lexer->lookahead != '[') - { - // no esac - if (lexer->lookahead == 'e') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == 's') - { - advance(lexer); - if (lexer->lookahead == 'a') - { - advance(lexer); - if (lexer->lookahead == 'c') - { - advance(lexer); - if (isspace(lexer->lookahead)) - { - return false; - } - } - } - } - } - else - { - advance(lexer); - } - } + if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 && + !array_back(&scanner->heredocs)->started && !in_error_recovery(valid_symbols)) { + return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY); + } - // -\w is just a word, find something else special - if (lexer->lookahead == '-') - { - lexer->mark_end(lexer); - advance(lexer); - while (isalnum(lexer->lookahead)) - { - advance(lexer); - } + if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0) { + Heredoc *heredoc = array_back(&scanner->heredocs); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + array_delete(&heredoc->current_leading_word); + array_delete(&heredoc->delimiter); + array_pop(&scanner->heredocs); + lexer->result_symbol = HEREDOC_END; + return true; + } + } - if (lexer->lookahead == ')' || lexer->lookahead == '\\' || - lexer->lookahead == '.') - { - return false; - } - lexer->mark_end(lexer); - } + if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started && + !in_error_recovery(valid_symbols)) { + return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END); + } - // case item -) or *) - if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) - { - lexer->mark_end(lexer); - advance(lexer); - if (isspace(lexer->lookahead)) - { - lexer->result_symbol = EXTGLOB_PATTERN; - return was_non_alpha; - } - } + if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && scanner->heredocs.size > 0) { + return scan_heredoc_start(array_back(&scanner->heredocs), lexer); + } - if (isspace(lexer->lookahead)) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return true; - } + if (valid_symbols[TEST_OPERATOR] && !valid_symbols[EXPANSION_WORD]) { + while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') { + skip(lexer); + } - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(') - { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } + if (lexer->lookahead == '\\') { + if (valid_symbols[EXTGLOB_PATTERN]) { + goto extglob_pattern; + } + if (valid_symbols[REGEX_NO_SPACE]) { + goto regex; + } + skip(lexer); - if (lexer->lookahead == '|') - { - lexer->mark_end(lexer); - advance(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } + if (lexer->eof(lexer)) { + return false; + } - if (!isalnum(lexer->lookahead) && lexer->lookahead != '(' && - lexer->lookahead != '"' && lexer->lookahead != '[' && - lexer->lookahead != '?' && lexer->lookahead != '/' && - lexer->lookahead != '\\' && lexer->lookahead != '_' && - lexer->lookahead != '*') - { - return false; - } + if (lexer->lookahead == '\r') { + skip(lexer); + if (lexer->lookahead == '\n') { + skip(lexer); + } + } else if (lexer->lookahead == '\n') { + skip(lexer); + } else { + return false; + } - typedef struct - { - bool done; - bool saw_non_alphadot; - t_u32 paren_depth; - t_u32 bracket_depth; - t_u32 brace_depth; - } State; + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } - State state = {false, was_non_alpha, scanner->last_glob_paren_depth, - 0, 0}; - while (!state.done) - { - switch (lexer->lookahead) - { - case '\0': - return false; - case '(': - state.paren_depth++; - break; - case '[': - state.bracket_depth++; - break; - case '{': - state.brace_depth++; - break; - case ')': - if (state.paren_depth == 0) - { - state.done = true; - } - state.paren_depth--; - break; - case ']': - if (state.bracket_depth == 0) - { - state.done = true; - } - state.bracket_depth--; - break; - case '}': - if (state.brace_depth == 0) - { - state.done = true; - } - state.brace_depth--; - break; - } + if (lexer->lookahead == '\n' && !valid_symbols[NEWLINE]) { + skip(lexer); - if (lexer->lookahead == '|') - { - lexer->mark_end(lexer); - advance(lexer); - if (state.paren_depth == 0 && state.bracket_depth == 0 && - state.brace_depth == 0) - { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } - if (!state.done) - { - bool was_space = isspace(lexer->lookahead); - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - if (!isalpha(lexer->lookahead) && - lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - if (lexer->lookahead == '(' || lexer->lookahead == '{') - { - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = state.paren_depth; - return state.saw_non_alphadot; - } - } - if (was_space) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - if (lexer->lookahead == '"') - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - if (lexer->lookahead == '\\') - { - if (!isalpha(lexer->lookahead) && - lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - if (isspace(lexer->lookahead) || - lexer->lookahead == '"') - { - advance(lexer); - } - } - else - { - if (!isalpha(lexer->lookahead) && - lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - } - if (!was_space) - { - lexer->mark_end(lexer); - } - } - } + if (lexer->lookahead == '-') { + advance(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - scanner->last_glob_paren_depth = 0; + bool advanced_once = false; + while (iswalpha(lexer->lookahead)) { + advanced_once = true; + advance(lexer); + } - return false; - } - return (false); + if (iswspace(lexer->lookahead) && advanced_once) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) { + if (valid_symbols[EXPANSION_WORD]) { + lexer->mark_end(lexer); + lexer->result_symbol = EXPANSION_WORD; + return true; + } + return false; + } + lexer->result_symbol = TEST_OPERATOR; + return true; + } + if (iswspace(lexer->lookahead) && valid_symbols[EXTGLOB_PATTERN]) { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) { + return true; + } + } + + if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && + !valid_symbols[REGEX_NO_SLASH] && !in_error_recovery(valid_symbols)) { + for (;;) { + if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || + (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && + !valid_symbols[EXPANSION_WORD]) { + skip(lexer); + } else if (lexer->lookahead == '\\') { + skip(lexer); + + if (lexer->eof(lexer)) { + lexer->mark_end(lexer); + lexer->result_symbol = VARIABLE_NAME; + return true; + } + + if (lexer->lookahead == '\r') { + skip(lexer); + } + if (lexer->lookahead == '\n') { + skip(lexer); + } else { + if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD]) { + goto expansion_word; + } + return false; + } + } else { + break; + } + } + + // no '*', '@', '?', '-', '$', '0', '_' + if (!valid_symbols[EXPANSION_WORD] && + (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || lexer->lookahead == '-' || + lexer->lookahead == '0' || lexer->lookahead == '_')) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || + lexer->lookahead == '-' || lexer->lookahead == '%' || lexer->lookahead == '#' || + lexer->lookahead == '/') { + return false; + } + if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead)) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') { + advance(lexer); + if (lexer->lookahead == '<') { + advance(lexer); + if (lexer->lookahead == '-') { + advance(lexer); + Heredoc heredoc = heredoc_new(); + heredoc.allows_indent = true; + array_push(&scanner->heredocs, heredoc); + lexer->result_symbol = HEREDOC_ARROW_DASH; + } else if (lexer->lookahead == '<' || lexer->lookahead == '=') { + return false; + } else { + Heredoc heredoc = heredoc_new(); + array_push(&scanner->heredocs, heredoc); + lexer->result_symbol = HEREDOC_ARROW; + } + return true; + } + return false; + } + + bool is_number = true; + if (iswdigit(lexer->lookahead)) { + advance(lexer); + } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') { + is_number = false; + advance(lexer); + } else { + if (lexer->lookahead == '{') { + goto brace_start; + } + if (valid_symbols[EXPANSION_WORD]) { + goto expansion_word; + } + if (valid_symbols[EXTGLOB_PATTERN]) { + goto extglob_pattern; + } + return false; + } + + for (;;) { + if (iswdigit(lexer->lookahead)) { + advance(lexer); + } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') { + is_number = false; + advance(lexer); + } else { + break; + } + } + + if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<')) { + lexer->result_symbol = FILE_DESCRIPTOR; + return true; + } + + if (valid_symbols[VARIABLE_NAME]) { + if (lexer->lookahead == '+') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '=' || lexer->lookahead == ':' || valid_symbols[CLOSING_BRACE]) { + lexer->result_symbol = VARIABLE_NAME; + return true; + } + return false; + } + if (lexer->lookahead == '/') { + return false; + } + if (lexer->lookahead == '=' || lexer->lookahead == '[' || + (lexer->lookahead == ':' && !valid_symbols[CLOSING_BRACE] && + !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable + // names for function words, only handling : for now? #235 + lexer->lookahead == '%' || + (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || + (lexer->lookahead == '-' && valid_symbols[CLOSING_BRACE])) { + lexer->mark_end(lexer); + lexer->result_symbol = VARIABLE_NAME; + return true; + } + + if (lexer->lookahead == '?') { + lexer->mark_end(lexer); + advance(lexer); + lexer->result_symbol = VARIABLE_NAME; + return iswalpha(lexer->lookahead); + } + } + + return false; + } + + if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) { + return true; + } + +regex: + if ((valid_symbols[REGEX] || valid_symbols[REGEX_NO_SLASH] || valid_symbols[REGEX_NO_SPACE]) && + !in_error_recovery(valid_symbols)) { + if (valid_symbols[REGEX] || valid_symbols[REGEX_NO_SPACE]) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } + + if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || + ((lexer->lookahead == '$' || lexer->lookahead == '\'') && valid_symbols[REGEX_NO_SLASH]) || + (lexer->lookahead == '\'' && valid_symbols[REGEX_NO_SPACE])) { + typedef struct { + bool done; + bool advanced_once; + bool found_non_alnumdollarunderdash; + bool last_was_escape; + bool in_single_quote; + uint32_t paren_depth; + uint32_t bracket_depth; + uint32_t brace_depth; + } State; + + if (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH]) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '(') { + return false; + } + } + + lexer->mark_end(lexer); + + State state = {false, false, false, false, false, 0, 0, 0}; + while (!state.done) { + if (state.in_single_quote) { + if (lexer->lookahead == '\'') { + state.in_single_quote = false; + advance(lexer); + lexer->mark_end(lexer); + } + } + switch (lexer->lookahead) { + case '\\': + state.last_was_escape = true; + break; + case '\0': + return false; + case '(': + state.paren_depth++; + state.last_was_escape = false; + break; + case '[': + state.bracket_depth++; + state.last_was_escape = false; + break; + case '{': + if (!state.last_was_escape) { + state.brace_depth++; + } + state.last_was_escape = false; + break; + case ')': + if (state.paren_depth == 0) { + state.done = true; + } + state.paren_depth--; + state.last_was_escape = false; + break; + case ']': + if (state.bracket_depth == 0) { + state.done = true; + } + state.bracket_depth--; + state.last_was_escape = false; + break; + case '}': + if (state.brace_depth == 0) { + state.done = true; + } + state.brace_depth--; + state.last_was_escape = false; + break; + case '\'': + // Enter or exit a single-quoted string. + state.in_single_quote = !state.in_single_quote; + advance(lexer); + state.advanced_once = true; + state.last_was_escape = false; + continue; + default: + state.last_was_escape = false; + break; + } + + if (!state.done) { + if (valid_symbols[REGEX]) { + bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); + advance(lexer); + state.advanced_once = true; + if (!was_space || state.paren_depth > 0) { + lexer->mark_end(lexer); + } + } else if (valid_symbols[REGEX_NO_SLASH]) { + if (lexer->lookahead == '/') { + lexer->mark_end(lexer); + lexer->result_symbol = REGEX_NO_SLASH; + return state.advanced_once; + } + if (lexer->lookahead == '\\') { + advance(lexer); + state.advanced_once = true; + if (!lexer->eof(lexer) && lexer->lookahead != '[' && lexer->lookahead != '/') { + advance(lexer); + lexer->mark_end(lexer); + } + } else { + bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); + advance(lexer); + state.advanced_once = true; + if (!was_space) { + lexer->mark_end(lexer); + } + } + } else if (valid_symbols[REGEX_NO_SPACE]) { + if (lexer->lookahead == '\\') { + state.found_non_alnumdollarunderdash = true; + advance(lexer); + if (!lexer->eof(lexer)) { + advance(lexer); + } + } else if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + // do not parse a command + // substitution + if (lexer->lookahead == '(') { + return false; + } + // end $ always means regex, e.g. + // 99999999$ + if (iswspace(lexer->lookahead)) { + lexer->result_symbol = REGEX_NO_SPACE; + lexer->mark_end(lexer); + return true; + } + } else { + bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); + if (was_space && state.paren_depth == 0) { + lexer->mark_end(lexer); + lexer->result_symbol = REGEX_NO_SPACE; + return state.found_non_alnumdollarunderdash; + } + if (!iswalnum(lexer->lookahead) && lexer->lookahead != '$' && lexer->lookahead != '-' && + lexer->lookahead != '_') { + state.found_non_alnumdollarunderdash = true; + } + advance(lexer); + } + } + } + } + + lexer->result_symbol = valid_symbols[REGEX_NO_SLASH] ? REGEX_NO_SLASH + : valid_symbols[REGEX_NO_SPACE] ? REGEX_NO_SPACE + : REGEX; + if (valid_symbols[REGEX] && !state.advanced_once) { + return false; + } + return true; + } + } + +extglob_pattern: + if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) { + // first skip ws, then check for ? * + @ ! + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' || + lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' || + lexer->lookahead == '.' || lexer->lookahead == '[' || (iswalpha(lexer->lookahead))) { + if (lexer->lookahead == '\\') { + advance(lexer); + if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && + lexer->lookahead != '\n') { + advance(lexer); + } else { + return false; + } + } + + if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) { + lexer->mark_end(lexer); + advance(lexer); + + if (iswspace(lexer->lookahead)) { + return false; + } + } + + lexer->mark_end(lexer); + bool was_non_alpha = !iswalpha(lexer->lookahead); + if (lexer->lookahead != '[') { + // no esac + if (lexer->lookahead == 'e') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == 's') { + advance(lexer); + if (lexer->lookahead == 'a') { + advance(lexer); + if (lexer->lookahead == 'c') { + advance(lexer); + if (iswspace(lexer->lookahead)) { + return false; + } + } + } + } + } else { + advance(lexer); + } + } + + // -\w is just a word, find something else special + if (lexer->lookahead == '-') { + lexer->mark_end(lexer); + advance(lexer); + while (iswalnum(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.') { + return false; + } + lexer->mark_end(lexer); + } + + // case item -) or *) + if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) { + lexer->mark_end(lexer); + advance(lexer); + if (iswspace(lexer->lookahead)) { + lexer->result_symbol = EXTGLOB_PATTERN; + return was_non_alpha; + } + } + + if (iswspace(lexer->lookahead)) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return true; + } + + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || lexer->lookahead == '(') { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (lexer->lookahead == '|') { + lexer->mark_end(lexer); + advance(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + + if (!iswalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && + lexer->lookahead != '[' && lexer->lookahead != '?' && lexer->lookahead != '/' && + lexer->lookahead != '\\' && lexer->lookahead != '_' && lexer->lookahead != '*') { + return false; + } + + typedef struct { + bool done; + bool saw_non_alphadot; + uint32_t paren_depth; + uint32_t bracket_depth; + uint32_t brace_depth; + } State; + + State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0}; + while (!state.done) { + switch (lexer->lookahead) { + case '\0': + return false; + case '(': + state.paren_depth++; + break; + case '[': + state.bracket_depth++; + break; + case '{': + state.brace_depth++; + break; + case ')': + if (state.paren_depth == 0) { + state.done = true; + } + state.paren_depth--; + break; + case ']': + if (state.bracket_depth == 0) { + state.done = true; + } + state.bracket_depth--; + break; + case '}': + if (state.brace_depth == 0) { + state.done = true; + } + state.brace_depth--; + break; + } + + if (lexer->lookahead == '|') { + lexer->mark_end(lexer); + advance(lexer); + if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0) { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (!state.done) { + bool was_space = iswspace(lexer->lookahead); + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { + state.saw_non_alphadot = true; + } + advance(lexer); + if (lexer->lookahead == '(' || lexer->lookahead == '{') { + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = state.paren_depth; + return state.saw_non_alphadot; + } + } + if (was_space) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return state.saw_non_alphadot; + } + if (lexer->lookahead == '"') { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return state.saw_non_alphadot; + } + if (lexer->lookahead == '\\') { + if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { + state.saw_non_alphadot = true; + } + advance(lexer); + if (iswspace(lexer->lookahead) || lexer->lookahead == '"') { + advance(lexer); + } + } else { + if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { + state.saw_non_alphadot = true; + } + advance(lexer); + } + if (!was_space) { + lexer->mark_end(lexer); + } + } + } + + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return state.saw_non_alphadot; + } + scanner->last_glob_paren_depth = 0; + + return false; + } + +expansion_word: + if (valid_symbols[EXPANSION_WORD]) { + bool advanced_once = false; + bool advance_once_space = false; + for (;;) { + if (lexer->lookahead == '\"') { + return false; + } + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || + iswalnum(lexer->lookahead)) { + lexer->result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } + + if (lexer->lookahead == '}') { + lexer->mark_end(lexer); + lexer->result_symbol = EXPANSION_WORD; + return advanced_once || advance_once_space; + } + + if (lexer->lookahead == '(' && !(advanced_once || advance_once_space)) { + lexer->mark_end(lexer); + advance(lexer); + while (lexer->lookahead != ')' && !lexer->eof(lexer)) { + // if we find a $( or ${ assume this is valid and is + // a garbage concatenation of some weird word + an + // expansion + // I wonder where this can fail + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || + iswalnum(lexer->lookahead)) { + lexer->result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } else { + advanced_once = advanced_once || !iswspace(lexer->lookahead); + advance_once_space = advance_once_space || iswspace(lexer->lookahead); + advance(lexer); + } + } + lexer->mark_end(lexer); + if (lexer->lookahead == ')') { + advanced_once = true; + advance(lexer); + lexer->mark_end(lexer); + if (lexer->lookahead == '}') { + return false; + } + } else { + return false; + } + } + + if (lexer->lookahead == '\'') { + return false; + } + + if (lexer->eof(lexer)) { + return false; + } + advanced_once = advanced_once || !iswspace(lexer->lookahead); + advance_once_space = advance_once_space || iswspace(lexer->lookahead); + advance(lexer); + } + } + +brace_start: + if (valid_symbols[BRACE_START] && !in_error_recovery(valid_symbols)) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead != '{') { + return false; + } + + advance(lexer); + lexer->mark_end(lexer); + + while (isdigit(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead != '.') { + return false; + } + advance(lexer); + + if (lexer->lookahead != '.') { + return false; + } + advance(lexer); + + while (isdigit(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead != '}') { + return false; + } + + lexer->result_symbol = BRACE_START; + return true; + } + + return false; } -static bool expansion_word_scan(t_scanner_ctx *scanner, t_lexer *lexer, - const bool *valid_symbols) -{ - (void)(scanner); - - if (valid_symbols[EXPANSION_WORD]) - { - bool advanced_once = false; - bool advance_once_space = false; - for (;;) - { - if (lexer->lookahead == '\"') - { - return false; - } - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || - lexer->lookahead == '\'' || isalnum(lexer->lookahead)) - { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - - if (lexer->lookahead == '}') - { - lexer->mark_end(lexer); - lexer->result_symbol = EXPANSION_WORD; - return advanced_once || advance_once_space; - } - - if (lexer->lookahead == '(' && - !(advanced_once || advance_once_space)) - { - lexer->mark_end(lexer); - advance(lexer); - while (lexer->lookahead != ')' && !lexer->eof(lexer)) - { - // if we find a $( or ${ assume this is valid and is - // a garbage concatenation of some weird word + an - // expansion - // I wonder where this can fail - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || - lexer->lookahead == '(' || - lexer->lookahead == '\'' || - isalnum(lexer->lookahead)) - { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - else - { - advanced_once = - advanced_once || !isspace(lexer->lookahead); - advance_once_space = - advance_once_space || isspace(lexer->lookahead); - advance(lexer); - } - } - lexer->mark_end(lexer); - if (lexer->lookahead == ')') - { - advanced_once = true; - advance(lexer); - lexer->mark_end(lexer); - if (lexer->lookahead == '}') - { - return false; - } - } - else - { - return false; - } - } - - if (lexer->lookahead == '\'') - { - return false; - } - - if (lexer->eof(lexer)) - { - return false; - } - advanced_once = advanced_once || !isspace(lexer->lookahead); - advance_once_space = - advance_once_space || isspace(lexer->lookahead); - advance(lexer); - } - } - return (false); +void *tree_sitter_bash_external_scanner_create() { + Scanner *scanner = calloc(1, sizeof(Scanner)); + array_init(&scanner->heredocs); + return scanner; } -static bool brace_start_scan(t_scanner_ctx *scanner, t_lexer *lexer, - const bool *valid_symbols) -{ - (void)(scanner); - - if (valid_symbols[BRACE_START] && !in_error_recovery(valid_symbols)) - { - while (isspace(lexer->lookahead)) - { - skip(lexer); - } - - if (lexer->lookahead != '{') - { - return false; - } - - advance(lexer); - lexer->mark_end(lexer); - - while (isdigit(lexer->lookahead)) - { - advance(lexer); - } - - if (lexer->lookahead != '.') - { - return false; - } - advance(lexer); - - if (lexer->lookahead != '.') - { - return false; - } - advance(lexer); - - while (isdigit(lexer->lookahead)) - { - advance(lexer); - } - - if (lexer->lookahead != '}') - { - return false; - } - - lexer->result_symbol = BRACE_START; - return true; - } - return (false); -} -static bool scan(t_scanner_ctx *scanner, t_lexer *lexer, - const bool *valid_symbols) -{ - if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) - { - if (!(lexer->lookahead == 0 || isspace(lexer->lookahead) || - lexer->lookahead == '>' || lexer->lookahead == '<' || - lexer->lookahead == ')' || lexer->lookahead == '(' || - lexer->lookahead == ';' || lexer->lookahead == '&' || - lexer->lookahead == '|' || - (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) || - (lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET]))) - { - lexer->result_symbol = CONCAT; - // So for a`b`, we want to return a concat. We check if the - // 2nd backtick has whitespace after it, and if it does we - // return concat. - if (lexer->lookahead == '`') - { - lexer->mark_end(lexer); - advance(lexer); - while (lexer->lookahead != '`' && !lexer->eof(lexer)) - advance(lexer); - if (lexer->eof(lexer)) - return (false); - if (lexer->lookahead == '`') - advance(lexer); - return (isspace(lexer->lookahead) || lexer->eof(lexer)); - } - // strings w/ expansions that contains escaped quotes or - // backslashes need this to return a concat - if (lexer->lookahead == '\\') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '"' || lexer->lookahead == '\'' || - lexer->lookahead == '\\') - return (true); - if (lexer->eof(lexer)) - return (false); - } - else - return (true); - } - if (isspace(lexer->lookahead) && valid_symbols[CLOSING_BRACE] && - !valid_symbols[EXPANSION_WORD]) - { - lexer->result_symbol = CONCAT; - return (true); - } - } - - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && - !in_error_recovery(valid_symbols)) - { - // advance two # and ensure not } after - if (lexer->lookahead == '#') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '#') - { - advance(lexer); - if (lexer->lookahead != '}') - { - lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; - lexer->mark_end(lexer); - return (true); - } - } - } - } - - if (valid_symbols[EXTERNAL_EXPANSION_SYM_HASH] && - !in_error_recovery(valid_symbols)) - { - if (lexer->lookahead == '#' || lexer->lookahead == '=' || - lexer->lookahead == '!') - { - lexer->result_symbol = - lexer->lookahead == '#' ? EXTERNAL_EXPANSION_SYM_HASH - : lexer->lookahead == '!' ? EXTERNAL_EXPANSION_SYM_BANG - : EXTERNAL_EXPANSION_SYM_EQUAL; - advance(lexer); - lexer->mark_end(lexer); - while (lexer->lookahead == '#' || lexer->lookahead == '=' || - lexer->lookahead == '!') - advance(lexer); - while (isspace(lexer->lookahead)) - skip(lexer); - if (lexer->lookahead == '}') - return (true); - return (false); - } - } - - if (valid_symbols[EMPTY_VALUE]) - { - if (isspace(lexer->lookahead) || lexer->eof(lexer) || - lexer->lookahead == ';' || lexer->lookahead == '&') - { - lexer->result_symbol = EMPTY_VALUE; - return (true); - } - } - t_heredoc *back; - vec_parser_heredoc_back(&scanner->heredocs, &back); - if ((valid_symbols[HEREDOC_BODY_BEGINNING] || - valid_symbols[SIMPLE_HEREDOC_BODY]) && - scanner->heredocs.len > 0 && !back->started && - !in_error_recovery(valid_symbols)) - return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, - SIMPLE_HEREDOC_BODY)); - - if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0) - { - t_heredoc *heredoc; - vec_parser_heredoc_back(&scanner->heredocs, &heredoc); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - str_free(heredoc->current_leading_word); - str_free(heredoc->delimiter); - scanner->heredocs.len -= 1; - lexer->result_symbol = HEREDOC_END; - return (true); - } - } - - vec_parser_heredoc_back(&scanner->heredocs, &back); - if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && - back->started && !in_error_recovery(valid_symbols)) - return ( - scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END)); - - if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && - scanner->heredocs.len > 0) - { - vec_parser_heredoc_back(&scanner->heredocs, &back); - return (scan_heredoc_start(back, lexer)); - } - if (valid_symbols[TEST_OPERATOR] && !valid_symbols[EXPANSION_WORD]) - { - while (isspace(lexer->lookahead) && lexer->lookahead != '\n') - skip(lexer); - - if (lexer->lookahead == '\\') - { - if (valid_symbols[EXTGLOB_PATTERN]) - return (extglob_pattern_scan(scanner, lexer, valid_symbols)); - if (valid_symbols[REGEX_NO_SPACE]) - return (regex_scan(scanner, lexer, valid_symbols)); - skip(lexer); - - if (lexer->eof(lexer)) - return false; - - if (lexer->lookahead == '\r') - { - skip(lexer); - if (lexer->lookahead == '\n') - skip(lexer); - } - else if (lexer->lookahead == '\n') - skip(lexer); - else - return (false); - - while (isspace(lexer->lookahead)) - skip(lexer); - } - - if (lexer->lookahead == '\n' && !valid_symbols[NEWLINE]) - { - skip(lexer); - while (isspace(lexer->lookahead)) - skip(lexer); - } - - if (lexer->lookahead == '-') - { - advance(lexer); - - bool advanced_once = false; - while (isalpha(lexer->lookahead)) - { - advanced_once = true; - advance(lexer); - } - - if (isspace(lexer->lookahead) && advanced_once) - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) - { - if (valid_symbols[EXPANSION_WORD]) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXPANSION_WORD; - return (true); - } - return (false); - } - lexer->result_symbol = TEST_OPERATOR; - return (true); - } - if (isspace(lexer->lookahead) && valid_symbols[EXTGLOB_PATTERN]) - { - lexer->result_symbol = EXTGLOB_PATTERN; - return (true); - } - } - - if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && - scan_bare_dollar(lexer)) - return (true); - } - - if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || - valid_symbols[HEREDOC_ARROW]) && - !valid_symbols[REGEX_NO_SLASH] && !in_error_recovery(valid_symbols)) - { - while (true) - { - if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || - lexer->lookahead == '\r' || - (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && - !valid_symbols[EXPANSION_WORD]) - skip(lexer); - else if (lexer->lookahead == '\\') - { - skip(lexer); - - if (lexer->eof(lexer)) - { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return (true); - } - - if (lexer->lookahead == '\r') - - skip(lexer); - - if (lexer->lookahead == '\n') - - skip(lexer); - - else - { - if (lexer->lookahead == '\\' && - valid_symbols[EXPANSION_WORD]) - - return ( - expansion_word_scan(scanner, lexer, valid_symbols)); - - return (false); - } - } - else - - break; - } - - // no '*', '@', '?', '-', '$', '0', '_' - if (!valid_symbols[EXPANSION_WORD] && - (lexer->lookahead == '*' || lexer->lookahead == '@' || - lexer->lookahead == '?' || lexer->lookahead == '-' || - lexer->lookahead == '0' || lexer->lookahead == '_')) - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '=' || lexer->lookahead == '[' || - lexer->lookahead == ':' || lexer->lookahead == '-' || - lexer->lookahead == '%' || lexer->lookahead == '#' || - lexer->lookahead == '/') - - return (false); - - if (valid_symbols[EXTGLOB_PATTERN] && isspace(lexer->lookahead)) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return (true); - } - } - - if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') - { - advance(lexer); - if (lexer->lookahead == '<') - { - advance(lexer); - if (lexer->lookahead == '-') - { - advance(lexer); - t_heredoc heredoc = heredoc_new(); - heredoc.allows_indent = true; - vec_parser_heredoc_push(&scanner->heredocs, heredoc); - lexer->result_symbol = HEREDOC_ARROW_DASH; - } - else if (lexer->lookahead == '<' || lexer->lookahead == '=') - return (false); - else - { - t_heredoc heredoc = heredoc_new(); - vec_parser_heredoc_push(&scanner->heredocs, heredoc); - lexer->result_symbol = HEREDOC_ARROW; - } - return (true); - } - return (false); - } - - bool is_number = true; - if (isdigit(lexer->lookahead)) - advance(lexer); - else if (isalpha(lexer->lookahead) || lexer->lookahead == '_') - { - is_number = false; - advance(lexer); - } - else - { - if (lexer->lookahead == '{') - return (brace_start_scan(scanner, lexer, valid_symbols)); - if (valid_symbols[EXPANSION_WORD]) - return (expansion_word_scan(scanner, lexer, valid_symbols)); - if (valid_symbols[EXTGLOB_PATTERN]) - return (extglob_pattern_scan(scanner, lexer, valid_symbols)); - return false; - } - - while (true) - { - if (isdigit(lexer->lookahead)) - advance(lexer); - else if (isalpha(lexer->lookahead) || lexer->lookahead == '_') - (is_number = false, advance(lexer)); - else - break; - } - - if (is_number && valid_symbols[FILE_DESCRIPTOR] && - (lexer->lookahead == '>' || lexer->lookahead == '<')) - { - lexer->result_symbol = FILE_DESCRIPTOR; - return (true); - } - - if (valid_symbols[VARIABLE_NAME]) - { - if (lexer->lookahead == '+') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '=' || lexer->lookahead == ':' || - valid_symbols[CLOSING_BRACE]) - { - lexer->result_symbol = VARIABLE_NAME; - return (true); - } - return (false); - } - if (lexer->lookahead == '/') - { - return (false); - } - if (lexer->lookahead == '=' || lexer->lookahead == '[' || - (lexer->lookahead == ':' && !valid_symbols[CLOSING_BRACE] && - !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases - // for regular word chars but - // not variable names for - // function words, only - // handling : for now? #235 - lexer->lookahead == '%' || - (lexer->lookahead == '#' && !is_number) || - lexer->lookahead == '@' || - (lexer->lookahead == '-' && valid_symbols[CLOSING_BRACE])) - { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return (true); - } - - if (lexer->lookahead == '?') - { - lexer->mark_end(lexer); - advance(lexer); - lexer->result_symbol = VARIABLE_NAME; - return (isalpha(lexer->lookahead)); - } - } - - return (false); - } - - if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && - scan_bare_dollar(lexer)) - return (true); - return (false); +bool tree_sitter_bash_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + Scanner *scanner = (Scanner *)payload; + return scan(scanner, lexer, valid_symbols); } -void *tree_sitter_bash_external_scanner_create() -{ - t_scanner_ctx *scanner = mem_alloc(sizeof(t_scanner_ctx)); - scanner->heredocs = vec_parser_heredoc_new(5, NULL); - return (scanner); +unsigned tree_sitter_bash_external_scanner_serialize(void *payload, char *state) { + Scanner *scanner = (Scanner *)payload; + return serialize(scanner, state); } -bool tree_sitter_bash_external_scanner_scan(void *payload, t_lexer *lexer, - const bool *valid_symbols) -{ - t_scanner_ctx *scanner = (t_scanner_ctx *)payload; - return (scan(scanner, lexer, valid_symbols)); +void tree_sitter_bash_external_scanner_deserialize(void *payload, const char *state, unsigned length) { + Scanner *scanner = (Scanner *)payload; + deserialize(scanner, state, length); } -unsigned tree_sitter_bash_external_scanner_serialize(void *payload, char *state) -{ - t_scanner_ctx *scanner = (t_scanner_ctx *)payload; - return (serialize(scanner, state)); -} - -void tree_sitter_bash_external_scanner_deserialize(void *payload, - const char *state, - unsigned length) -{ - t_scanner_ctx *scanner = (t_scanner_ctx *)payload; - deserialize(scanner, state, length); -} - -void tree_sitter_bash_external_scanner_destroy(void *payload) -{ - t_scanner_ctx *scanner = (t_scanner_ctx *)payload; - for (size_t i = 0; i < scanner->heredocs.len; i++) - { - t_heredoc *heredoc = &scanner->heredocs.buffer[i]; - str_free(heredoc->current_leading_word); - str_free(heredoc->delimiter); - } - vec_parser_heredoc_free(scanner->heredocs); - free(scanner); +void tree_sitter_bash_external_scanner_destroy(void *payload) { + Scanner *scanner = (Scanner *)payload; + for (size_t i = 0; i < scanner->heredocs.size; i++) { + Heredoc *heredoc = array_get(&scanner->heredocs, i); + array_delete(&heredoc->current_leading_word); + array_delete(&heredoc->delimiter); + } + array_delete(&scanner->heredocs); + free(scanner); } diff --git a/parser/src/stack.c b/parser/src/stack.c index c02c6ad6..98d8c561 100644 --- a/parser/src/stack.c +++ b/parser/src/stack.c @@ -1,9 +1,9 @@ - +#include "./alloc.h" #include "./language.h" #include "./subtree.h" #include "./array.h" #include "./stack.h" -#include "parser/parser_length.h" +#include "./length.h" #include #include #include @@ -27,11 +27,11 @@ typedef struct { } StackLink; struct StackNode { - t_state_id state; - t_parse_length position; + TSStateId state; + Length position; StackLink links[MAX_LINK_COUNT]; short unsigned int link_count; - t_u32 ref_count; + uint32_t ref_count; unsigned error_cost; unsigned node_count; int dynamic_precedence; @@ -40,7 +40,7 @@ struct StackNode { typedef struct { StackNode *node; SubtreeArray subtrees; - t_u32 subtree_count; + uint32_t subtree_count; bool is_pending; } StackIterator; @@ -112,7 +112,7 @@ recur: if (pool->size < MAX_NODE_POOL_SIZE) { array_push(pool, self); } else { - free(self); + ts_free(self); } if (first_predecessor) { @@ -123,8 +123,8 @@ recur: /// Get the number of nodes in the subtree, for the purpose of measuring /// how much progress has been made by a given version of the stack. -static t_u32 stack__subtree_node_count(Subtree subtree) { - t_u32 count = ts_subtree_visible_descendant_count(subtree); +static uint32_t stack__subtree_node_count(Subtree subtree) { + uint32_t count = ts_subtree_visible_descendant_count(subtree); if (ts_subtree_visible(subtree)) count++; // Count intermediate error nodes even though they are not visible, @@ -139,12 +139,12 @@ static StackNode *stack_node_new( StackNode *previous_node, Subtree subtree, bool is_pending, - t_state_id state, + TSStateId state, StackNodeArray *pool ) { StackNode *node = pool->size > 0 ? array_pop(pool) - : malloc(sizeof(StackNode)); + : ts_malloc(sizeof(StackNode)); *node = (StackNode) { .ref_count = 1, .link_count = 0, @@ -234,7 +234,7 @@ static void stack_node_add_link( for (int j = 0; j < link.node->link_count; j++) { stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); } - t_i32 dynamic_precedence = link.node->dynamic_precedence; + int32_t dynamic_precedence = link.node->dynamic_precedence; if (link.subtree.ptr) { dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); } @@ -277,7 +277,7 @@ static void stack_head_delete( } if (self->summary) { array_delete(self->summary); - free(self->summary); + ts_free(self->summary); } stack_node_release(self->node, pool, subtree_pool); } @@ -307,7 +307,7 @@ static void ts_stack__add_slice( StackNode *node, SubtreeArray *subtrees ) { - for (t_u32 i = self->slices.size - 1; i + 1 > 0; i--) { + for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { StackSlice slice = {*subtrees, version}; @@ -342,13 +342,13 @@ static StackSliceArray stack__iter( bool include_subtrees = false; if (goal_subtree_count >= 0) { include_subtrees = true; - array_reserve(&new_iterator.subtrees, (t_u32)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); + array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); } array_push(&self->iterators, new_iterator); while (self->iterators.size > 0) { - for (t_u32 i = 0, size = self->iterators.size; i < size; i++) { + for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { StackIterator *iterator = &self->iterators.contents[i]; StackNode *node = iterator->node; @@ -379,7 +379,7 @@ static StackSliceArray stack__iter( continue; } - for (t_u32 j = 1; j <= node->link_count; j++) { + for (uint32_t j = 1; j <= node->link_count; j++) { StackIterator *next_iterator; StackLink link; if (j == node->link_count) { @@ -419,7 +419,7 @@ static StackSliceArray stack__iter( } Stack *ts_stack_new(SubtreePool *subtree_pool) { - Stack *self = calloc(1, sizeof(Stack)); + Stack *self = ts_calloc(1, sizeof(Stack)); array_init(&self->heads); array_init(&self->slices); @@ -443,28 +443,28 @@ void ts_stack_delete(Stack *self) { if (self->iterators.contents) array_delete(&self->iterators); stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); - for (t_u32 i = 0; i < self->heads.size; i++) { + for (uint32_t i = 0; i < self->heads.size; i++) { stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); } array_clear(&self->heads); if (self->node_pool.contents) { - for (t_u32 i = 0; i < self->node_pool.size; i++) - free(self->node_pool.contents[i]); + for (uint32_t i = 0; i < self->node_pool.size; i++) + ts_free(self->node_pool.contents[i]); array_delete(&self->node_pool); } array_delete(&self->heads); - free(self); + ts_free(self); } -t_u32 ts_stack_version_count(const Stack *self) { +uint32_t ts_stack_version_count(const Stack *self) { return self->heads.size; } -t_state_id ts_stack_state(const Stack *self, StackVersion version) { +TSStateId ts_stack_state(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->state; } -t_parse_length ts_stack_position(const Stack *self, StackVersion version) { +Length ts_stack_position(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->position; } @@ -503,7 +503,7 @@ void ts_stack_push( StackVersion version, Subtree subtree, bool pending, - t_state_id state + TSStateId state ) { StackHead *head = array_get(&self->heads, version); StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); @@ -520,7 +520,7 @@ forceinline StackAction pop_count_callback(void *payload, const StackIterator *i } } -StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, t_u32 count) { +StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { return stack__iter(self, version, pop_count_callback, &count, (int)count); } @@ -593,7 +593,7 @@ typedef struct { forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { SummarizeStackSession *session = payload; - t_state_id state = iterator->node->state; + TSStateId state = iterator->node->state; unsigned depth = iterator->subtree_count; if (depth > session->max_depth) return StackActionStop; for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { @@ -611,7 +611,7 @@ forceinline StackAction summarize_stack_callback(void *payload, const StackItera void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { SummarizeStackSession session = { - .summary = malloc(sizeof(StackSummary)), + .summary = ts_malloc(sizeof(StackSummary)), .max_depth = max_depth }; array_init(session.summary); @@ -619,7 +619,7 @@ void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_dep StackHead *head = &self->heads.contents[version]; if (head->summary) { array_delete(head->summary); - free(head->summary); + ts_free(head->summary); } head->summary = session.summary; } @@ -664,7 +664,7 @@ void ts_stack_remove_version(Stack *self, StackVersion version) { void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { if (v1 == v2) return; assert(v2 < v1); - assert((t_u32)v1 < self->heads.size); + assert((uint32_t)v1 < self->heads.size); StackHead *source_head = &self->heads.contents[v1]; StackHead *target_head = &self->heads.contents[v2]; if (target_head->summary && !source_head->summary) { @@ -696,7 +696,7 @@ bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { if (!ts_stack_can_merge(self, version1, version2)) return false; StackHead *head1 = &self->heads.contents[version1]; StackHead *head2 = &self->heads.contents[version2]; - for (t_u32 i = 0; i < head2->node->link_count; i++) { + for (uint32_t i = 0; i < head2->node->link_count; i++) { stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); } if (head1->node->state == ERROR_STATE) { @@ -752,7 +752,7 @@ Subtree ts_stack_resume(Stack *self, StackVersion version) { void ts_stack_clear(Stack *self) { stack_node_retain(self->base_node); - for (t_u32 i = 0; i < self->heads.size; i++) { + for (uint32_t i = 0; i < self->heads.size; i++) { stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); } array_clear(&self->heads); @@ -764,7 +764,7 @@ void ts_stack_clear(Stack *self) { })); } -bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) { +bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) { array_reserve(&self->iterators, 32); if (!f) f = stderr; @@ -775,7 +775,7 @@ bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) Array(StackNode *) visited_nodes = array_new(); array_clear(&self->iterators); - for (t_u32 i = 0; i < self->heads.size; i++) { + for (uint32_t i = 0; i < self->heads.size; i++) { StackHead *head = &self->heads.contents[i]; if (head->status == StackStatusHalted) continue; @@ -794,14 +794,14 @@ bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) if (head->summary) { fprintf(f, "\nsummary:"); - for (t_u32 j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state); + for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state); } if (head->last_external_token.ptr) { const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; const char *data = ts_external_scanner_state_data(state); fprintf(f, "\nexternal_scanner_state:"); - for (t_u32 j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); + for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); } fprintf(f, "\"]\n"); @@ -814,11 +814,11 @@ bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) while (!all_iterators_done) { all_iterators_done = true; - for (t_u32 i = 0; i < self->iterators.size; i++) { + for (uint32_t i = 0; i < self->iterators.size; i++) { StackIterator iterator = self->iterators.contents[i]; StackNode *node = iterator.node; - for (t_u32 j = 0; j < visited_nodes.size; j++) { + for (uint32_t j = 0; j < visited_nodes.size; j++) { if (visited_nodes.contents[j] == node) { node = NULL; break; diff --git a/parser/src/stack.h b/parser/src/stack.h index c9309303..86abbc9d 100644 --- a/parser/src/stack.h +++ b/parser/src/stack.h @@ -7,7 +7,7 @@ extern "C" { #include "./array.h" #include "./subtree.h" -#include "parser/error_costs.h" +#include "./error_costs.h" #include typedef struct Stack Stack; @@ -22,9 +22,9 @@ typedef struct { typedef Array(StackSlice) StackSliceArray; typedef struct { - t_parse_length position; + Length position; unsigned depth; - t_state_id state; + TSStateId state; } StackSummaryEntry; typedef Array(StackSummaryEntry) StackSummary; @@ -35,11 +35,11 @@ Stack *ts_stack_new(SubtreePool *); void ts_stack_delete(Stack *); // Get the stack's current number of versions. -t_u32 ts_stack_version_count(const Stack *); +uint32_t ts_stack_version_count(const Stack *); // Get the state at the top of the given version of the stack. If the stack is // empty, this returns the initial state, 0. -t_state_id ts_stack_state(const Stack *, StackVersion); +TSStateId ts_stack_state(const Stack *, StackVersion); // Get the last external token associated with a given version of the stack. Subtree ts_stack_last_external_token(const Stack *, StackVersion); @@ -48,21 +48,21 @@ Subtree ts_stack_last_external_token(const Stack *, StackVersion); void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree ); // Get the position of the given version of the stack within the document. -t_parse_length ts_stack_position(const Stack *, StackVersion); +Length ts_stack_position(const Stack *, StackVersion); // Push a tree and state onto the given version of the stack. // // This transfers ownership of the tree to the Stack. Callers that // need to retain ownership of the tree for their own purposes should // first retain the tree. -void ts_stack_push(Stack *, StackVersion, Subtree , bool, t_state_id); +void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId); // Pop the given number of entries from the given version of the stack. This // operation can increase the number of stack versions by revealing multiple // versions which had previously been merged. It returns an array that // specifies the index of each revealed version and the trees that were // removed from that version. -StackSliceArray ts_stack_pop_count(Stack *, StackVersion, t_u32 count); +StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count); // Remove an error at the top of the given version of the stack. SubtreeArray ts_stack_pop_error(Stack *, StackVersion); @@ -122,9 +122,9 @@ void ts_stack_remove_version(Stack *, StackVersion); void ts_stack_clear(Stack *); -bool ts_stack_print_dot_graph(Stack *, const t_language *, FILE *); +bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *); -typedef void (*StackIterateCallback)(void *, t_state_id, t_u32); +typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t); #ifdef __cplusplus } diff --git a/parser/src/subtree.c b/parser/src/subtree.c index 577a2bf7..4524e182 100644 --- a/parser/src/subtree.c +++ b/parser/src/subtree.c @@ -1,23 +1,22 @@ #include #include -#include #include -#include +#include #include - +#include +#include "./alloc.h" #include "./array.h" - -#include "parser/error_costs.h" -#include "./language.h" -#include "parser/parser_length.h" +#include "./atomic.h" #include "./subtree.h" +#include "./length.h" +#include "./language.h" +#include "./error_costs.h" #include -typedef struct -{ - t_parse_length start; - t_parse_length old_end; - t_parse_length new_end; +typedef struct { + Length start; + Length old_end; + Length new_end; } Edit; #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX @@ -25,301 +24,255 @@ typedef struct // ExternalScannerState -void ts_external_scanner_state_init(ExternalScannerState *self, - const char *data, unsigned length) -{ - self->length = length; - if (length > sizeof(self->short_data)) - { - self->long_data = malloc(length); - memcpy(self->long_data, data, length); - } - else - { - memcpy(self->short_data, data, length); - } +void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { + self->length = length; + if (length > sizeof(self->short_data)) { + self->long_data = ts_malloc(length); + memcpy(self->long_data, data, length); + } else { + memcpy(self->short_data, data, length); + } } -ExternalScannerState ts_external_scanner_state_copy( - const ExternalScannerState *self) -{ - ExternalScannerState result = *self; - if (self->length > sizeof(self->short_data)) - { - result.long_data = malloc(self->length); - memcpy(result.long_data, self->long_data, self->length); - } - return result; +ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) { + ExternalScannerState result = *self; + if (self->length > sizeof(self->short_data)) { + result.long_data = ts_malloc(self->length); + memcpy(result.long_data, self->long_data, self->length); + } + return result; } -void ts_external_scanner_state_delete(ExternalScannerState *self) -{ - if (self->length > sizeof(self->short_data)) - { - free(self->long_data); - } +void ts_external_scanner_state_delete(ExternalScannerState *self) { + if (self->length > sizeof(self->short_data)) { + ts_free(self->long_data); + } } -const char *ts_external_scanner_state_data(const ExternalScannerState *self) -{ - if (self->length > sizeof(self->short_data)) - { - return self->long_data; - } - else - { - return self->short_data; - } +const char *ts_external_scanner_state_data(const ExternalScannerState *self) { + if (self->length > sizeof(self->short_data)) { + return self->long_data; + } else { + return self->short_data; + } } -bool ts_external_scanner_state_eq(const ExternalScannerState *self, - const char *buffer, unsigned length) -{ - return self->length == length && - memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; +bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) { + return + self->length == length && + memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; } // SubtreeArray -void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) -{ - dest->size = self.size; - dest->capacity = self.capacity; - dest->contents = self.contents; - if (self.capacity > 0) - { - dest->contents = calloc(self.capacity, sizeof(Subtree)); - memcpy(dest->contents, self.contents, self.size * sizeof(Subtree)); - for (t_u32 i = 0; i < self.size; i++) - { - ts_subtree_retain(dest->contents[i]); - } - } +void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { + dest->size = self.size; + dest->capacity = self.capacity; + dest->contents = self.contents; + if (self.capacity > 0) { + dest->contents = ts_calloc(self.capacity, sizeof(Subtree)); + memcpy(dest->contents, self.contents, self.size * sizeof(Subtree)); + for (uint32_t i = 0; i < self.size; i++) { + ts_subtree_retain(dest->contents[i]); + } + } } -void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) -{ - for (t_u32 i = 0; i < self->size; i++) - { - ts_subtree_release(pool, self->contents[i]); - } - array_clear(self); +void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { + for (uint32_t i = 0; i < self->size; i++) { + ts_subtree_release(pool, self->contents[i]); + } + array_clear(self); } -void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) -{ - ts_subtree_array_clear(pool, self); - array_delete(self); +void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { + ts_subtree_array_clear(pool, self); + array_delete(self); } -void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, - SubtreeArray *destination) -{ - array_clear(destination); - while (self->size > 0) - { - Subtree last = self->contents[self->size - 1]; - if (ts_subtree_extra(last)) - { - self->size--; - array_push(destination, last); - } - else - { - break; - } - } - ts_subtree_array_reverse(destination); +void ts_subtree_array_remove_trailing_extras( + SubtreeArray *self, + SubtreeArray *destination +) { + array_clear(destination); + while (self->size > 0) { + Subtree last = self->contents[self->size - 1]; + if (ts_subtree_extra(last)) { + self->size--; + array_push(destination, last); + } else { + break; + } + } + ts_subtree_array_reverse(destination); } -void ts_subtree_array_reverse(SubtreeArray *self) -{ - for (t_u32 i = 0, limit = self->size / 2; i < limit; i++) - { - size_t reverse_index = self->size - 1 - i; - Subtree swap = self->contents[i]; - self->contents[i] = self->contents[reverse_index]; - self->contents[reverse_index] = swap; - } +void ts_subtree_array_reverse(SubtreeArray *self) { + for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) { + size_t reverse_index = self->size - 1 - i; + Subtree swap = self->contents[i]; + self->contents[i] = self->contents[reverse_index]; + self->contents[reverse_index] = swap; + } } // SubtreePool -SubtreePool ts_subtree_pool_new(t_u32 capacity) -{ - SubtreePool self = {array_new(), array_new()}; - array_reserve(&self.free_trees, capacity); - return self; +SubtreePool ts_subtree_pool_new(uint32_t capacity) { + SubtreePool self = {array_new(), array_new()}; + array_reserve(&self.free_trees, capacity); + return self; } -void ts_subtree_pool_delete(SubtreePool *self) -{ - if (self->free_trees.contents) - { - for (unsigned i = 0; i < self->free_trees.size; i++) - { - free(self->free_trees.contents[i].ptr); - } - array_delete(&self->free_trees); - } - if (self->tree_stack.contents) - array_delete(&self->tree_stack); +void ts_subtree_pool_delete(SubtreePool *self) { + if (self->free_trees.contents) { + for (unsigned i = 0; i < self->free_trees.size; i++) { + ts_free(self->free_trees.contents[i].ptr); + } + array_delete(&self->free_trees); + } + if (self->tree_stack.contents) array_delete(&self->tree_stack); } -static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) -{ - if (self->free_trees.size > 0) - { - return array_pop(&self->free_trees).ptr; - } - else - { - return malloc(sizeof(SubtreeHeapData)); - } +static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) { + if (self->free_trees.size > 0) { + return array_pop(&self->free_trees).ptr; + } else { + return ts_malloc(sizeof(SubtreeHeapData)); + } } -static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) -{ - if (self->free_trees.capacity > 0 && - self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) - { - array_push(&self->free_trees, (MutableSubtree){.ptr = tree}); - } - else - { - free(tree); - } +static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) { + if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) { + array_push(&self->free_trees, (MutableSubtree) {.ptr = tree}); + } else { + ts_free(tree); + } } // Subtree -static inline bool ts_subtree_can_inline(t_parse_length padding, t_parse_length size, - t_u32 lookahead_bytes) -{ - return padding.bytes < TS_MAX_INLINE_TREE_LENGTH && - padding.extent.row < 16 && - padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && - size.extent.row == 0 && - size.extent.column < TS_MAX_INLINE_TREE_LENGTH && - lookahead_bytes < 16; +static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) { + return + padding.bytes < TS_MAX_INLINE_TREE_LENGTH && + padding.extent.row < 16 && + padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && + size.extent.row == 0 && + size.extent.column < TS_MAX_INLINE_TREE_LENGTH && + lookahead_bytes < 16; } -Subtree ts_subtree_new_leaf(SubtreePool *pool, t_symbol symbol, t_parse_length padding, - t_parse_length size, t_u32 lookahead_bytes, - t_state_id parse_state, bool has_external_tokens, - bool depends_on_column, bool is_keyword, - const t_language *language) -{ - t_symbol_metadata metadata = ts_language_symbol_metadata(language, symbol); - bool extra = symbol == ts_builtin_sym_end; +Subtree ts_subtree_new_leaf( + SubtreePool *pool, TSSymbol symbol, Length padding, Length size, + uint32_t lookahead_bytes, TSStateId parse_state, + bool has_external_tokens, bool depends_on_column, + bool is_keyword, const TSLanguage *language +) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + bool extra = symbol == ts_builtin_sym_end; - bool is_inline = (symbol <= UINT8_MAX && !has_external_tokens && - ts_subtree_can_inline(padding, size, lookahead_bytes)); + bool is_inline = ( + symbol <= UINT8_MAX && + !has_external_tokens && + ts_subtree_can_inline(padding, size, lookahead_bytes) + ); - if (is_inline) - { - return (Subtree){{ - .parse_state = parse_state, - .symbol = symbol, - .padding_bytes = padding.bytes, - .padding_rows = padding.extent.row, - .padding_columns = padding.extent.column, - .size_bytes = size.bytes, - .lookahead_bytes = lookahead_bytes, - .visible = metadata.visible, - .named = metadata.named, - .extra = extra, - .has_changes = false, - .is_missing = false, - .is_keyword = is_keyword, - .is_inline = true, - }}; - } - else - { - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - *data = (SubtreeHeapData){ - .ref_count = 1, - .padding = padding, - .size = size, - .lookahead_bytes = lookahead_bytes, - .error_cost = 0, - .child_count = 0, - .symbol = symbol, - .parse_state = parse_state, - .visible = metadata.visible, - .named = metadata.named, - .extra = extra, - .fragile_left = false, - .fragile_right = false, - .has_changes = false, - .has_external_tokens = has_external_tokens, - .has_external_scanner_state_change = false, - .depends_on_column = depends_on_column, - .is_missing = false, - .is_keyword = is_keyword, - {{.first_leaf = {.symbol = 0, .parse_state = 0}}}}; - return (Subtree){.ptr = data}; - } + if (is_inline) { + return (Subtree) {{ + .parse_state = parse_state, + .symbol = symbol, + .padding_bytes = padding.bytes, + .padding_rows = padding.extent.row, + .padding_columns = padding.extent.column, + .size_bytes = size.bytes, + .lookahead_bytes = lookahead_bytes, + .visible = metadata.visible, + .named = metadata.named, + .extra = extra, + .has_changes = false, + .is_missing = false, + .is_keyword = is_keyword, + .is_inline = true, + }}; + } else { + SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + *data = (SubtreeHeapData) { + .ref_count = 1, + .padding = padding, + .size = size, + .lookahead_bytes = lookahead_bytes, + .error_cost = 0, + .child_count = 0, + .symbol = symbol, + .parse_state = parse_state, + .visible = metadata.visible, + .named = metadata.named, + .extra = extra, + .fragile_left = false, + .fragile_right = false, + .has_changes = false, + .has_external_tokens = has_external_tokens, + .has_external_scanner_state_change = false, + .depends_on_column = depends_on_column, + .is_missing = false, + .is_keyword = is_keyword, + {{.first_leaf = {.symbol = 0, .parse_state = 0}}} + }; + return (Subtree) {.ptr = data}; + } } -void ts_subtree_set_symbol(MutableSubtree *self, t_symbol symbol, - const t_language *language) -{ - t_symbol_metadata metadata = ts_language_symbol_metadata(language, symbol); - if (self->data.is_inline) - { - assert(symbol < UINT8_MAX); - self->data.symbol = symbol; - self->data.named = metadata.named; - self->data.visible = metadata.visible; - } - else - { - self->ptr->symbol = symbol; - self->ptr->named = metadata.named; - self->ptr->visible = metadata.visible; - } +void ts_subtree_set_symbol( + MutableSubtree *self, + TSSymbol symbol, + const TSLanguage *language +) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + if (self->data.is_inline) { + assert(symbol < UINT8_MAX); + self->data.symbol = symbol; + self->data.named = metadata.named; + self->data.visible = metadata.visible; + } else { + self->ptr->symbol = symbol; + self->ptr->named = metadata.named; + self->ptr->visible = metadata.visible; + } } -Subtree ts_subtree_new_error(SubtreePool *pool, t_i32 lookahead_char, - t_parse_length padding, t_parse_length size, - t_u32 bytes_scanned, t_state_id parse_state, - const t_language *language) -{ - Subtree result = ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, - size, bytes_scanned, parse_state, - false, false, false, language); - SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; - data->fragile_left = true; - data->fragile_right = true; - data->lookahead_char = lookahead_char; - return result; +Subtree ts_subtree_new_error( + SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, + uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language +) { + Subtree result = ts_subtree_new_leaf( + pool, ts_builtin_sym_error, padding, size, bytes_scanned, + parse_state, false, false, false, language + ); + SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; + data->fragile_left = true; + data->fragile_right = true; + data->lookahead_char = lookahead_char; + return result; } // Clone a subtree. -MutableSubtree ts_subtree_clone(Subtree self) -{ - size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - Subtree *new_children = malloc(alloc_size); - Subtree *old_children = ts_subtree_children(self); - memcpy(new_children, old_children, alloc_size); - SubtreeHeapData *result = - (SubtreeHeapData *)&new_children[self.ptr->child_count]; - if (self.ptr->child_count > 0) - { - for (t_u32 i = 0; i < self.ptr->child_count; i++) - { - ts_subtree_retain(new_children[i]); - } - } - else if (self.ptr->has_external_tokens) - { - result->external_scanner_state = - ts_external_scanner_state_copy(&self.ptr->external_scanner_state); - } - result->ref_count = 1; - return (MutableSubtree){.ptr = result}; +MutableSubtree ts_subtree_clone(Subtree self) { + size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); + Subtree *new_children = ts_malloc(alloc_size); + Subtree *old_children = ts_subtree_children(self); + memcpy(new_children, old_children, alloc_size); + SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count]; + if (self.ptr->child_count > 0) { + for (uint32_t i = 0; i < self.ptr->child_count; i++) { + ts_subtree_retain(new_children[i]); + } + } else if (self.ptr->has_external_tokens) { + result->external_scanner_state = ts_external_scanner_state_copy( + &self.ptr->external_scanner_state + ); + } + result->ref_count = 1; + return (MutableSubtree) {.ptr = result}; } // Get mutable version of a subtree. @@ -327,912 +280,781 @@ MutableSubtree ts_subtree_clone(Subtree self) // This takes ownership of the subtree. If the subtree has only one owner, // this will directly convert it into a mutable version. Otherwise, it will // perform a copy. -MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) -{ - if (self.data.is_inline) - return (MutableSubtree){self.data}; - if (self.ptr->ref_count == 1) - return ts_subtree_to_mt_unsafe(self); - MutableSubtree result = ts_subtree_clone(self); - ts_subtree_release(pool, self); - return result; +MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { + if (self.data.is_inline) return (MutableSubtree) {self.data}; + if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); + MutableSubtree result = ts_subtree_clone(self); + ts_subtree_release(pool, self); + return result; } -static void ts_subtree__compress(MutableSubtree self, unsigned count, - const t_language *language, - MutableSubtreeArray *stack) -{ - unsigned initial_stack_size = stack->size; +static void ts_subtree__compress( + MutableSubtree self, + unsigned count, + const TSLanguage *language, + MutableSubtreeArray *stack +) { + unsigned initial_stack_size = stack->size; - MutableSubtree tree = self; - t_symbol symbol = tree.ptr->symbol; - for (unsigned i = 0; i < count; i++) - { - if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) - break; + MutableSubtree tree = self; + TSSymbol symbol = tree.ptr->symbol; + for (unsigned i = 0; i < count; i++) { + if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; - MutableSubtree child = - ts_subtree_to_mt_unsafe(ts_subtree_children(tree)[0]); - if (child.data.is_inline || child.ptr->child_count < 2 || - child.ptr->ref_count > 1 || child.ptr->symbol != symbol) - break; + MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); + if ( + child.data.is_inline || + child.ptr->child_count < 2 || + child.ptr->ref_count > 1 || + child.ptr->symbol != symbol + ) break; - MutableSubtree grandchild = - ts_subtree_to_mt_unsafe(ts_subtree_children(child)[0]); - if (grandchild.data.is_inline || grandchild.ptr->child_count < 2 || - grandchild.ptr->ref_count > 1 || grandchild.ptr->symbol != symbol) - break; + MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); + if ( + grandchild.data.is_inline || + grandchild.ptr->child_count < 2 || + grandchild.ptr->ref_count > 1 || + grandchild.ptr->symbol != symbol + ) break; - ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); - ts_subtree_children(child)[0] = - ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; - ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = - ts_subtree_from_mut(child); - array_push(stack, tree); - tree = grandchild; - } + ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); + ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; + ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); + array_push(stack, tree); + tree = grandchild; + } - while (stack->size > initial_stack_size) - { - tree = array_pop(stack); - MutableSubtree child = - ts_subtree_to_mt_unsafe(ts_subtree_children(tree)[0]); - MutableSubtree grandchild = ts_subtree_to_mt_unsafe( - ts_subtree_children(child)[child.ptr->child_count - 1]); - ts_subtree_summarize_children(grandchild, language); - ts_subtree_summarize_children(child, language); - ts_subtree_summarize_children(tree, language); - } + while (stack->size > initial_stack_size) { + tree = array_pop(stack); + MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); + MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]); + ts_subtree_summarize_children(grandchild, language); + ts_subtree_summarize_children(child, language); + ts_subtree_summarize_children(tree, language); + } } -void ts_subtree_balance(Subtree self, SubtreePool *pool, - const t_language *language) -{ - array_clear(&pool->tree_stack); +void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) { + array_clear(&pool->tree_stack); - if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) - { - array_push(&pool->tree_stack, ts_subtree_to_mt_unsafe(self)); - } + if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); + } - while (pool->tree_stack.size > 0) - { - MutableSubtree tree = array_pop(&pool->tree_stack); + while (pool->tree_stack.size > 0) { + MutableSubtree tree = array_pop(&pool->tree_stack); - if (tree.ptr->repeat_depth > 0) - { - Subtree child1 = ts_subtree_children(tree)[0]; - Subtree child2 = - ts_subtree_children(tree)[tree.ptr->child_count - 1]; - long repeat_delta = (long)ts_subtree_repeat_depth(child1) - - (long)ts_subtree_repeat_depth(child2); - if (repeat_delta > 0) - { - unsigned n = (unsigned)repeat_delta; - for (unsigned i = n / 2; i > 0; i /= 2) - { - ts_subtree__compress(tree, i, language, &pool->tree_stack); - n -= i; - } - } - } + if (tree.ptr->repeat_depth > 0) { + Subtree child1 = ts_subtree_children(tree)[0]; + Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; + long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); + if (repeat_delta > 0) { + unsigned n = (unsigned)repeat_delta; + for (unsigned i = n / 2; i > 0; i /= 2) { + ts_subtree__compress(tree, i, language, &pool->tree_stack); + n -= i; + } + } + } - for (t_u32 i = 0; i < tree.ptr->child_count; i++) - { - Subtree child = ts_subtree_children(tree)[i]; - if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) - { - array_push(&pool->tree_stack, ts_subtree_to_mt_unsafe(child)); - } - } - } + for (uint32_t i = 0; i < tree.ptr->child_count; i++) { + Subtree child = ts_subtree_children(tree)[i]; + if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); + } + } + } } // Assign all of the node's properties that depend on its children. -void ts_subtree_summarize_children(MutableSubtree self, - const t_language *language) -{ - assert(!self.data.is_inline); +void ts_subtree_summarize_children( + MutableSubtree self, + const TSLanguage *language +) { + assert(!self.data.is_inline); - self.ptr->named_child_count = 0; - self.ptr->visible_child_count = 0; - self.ptr->error_cost = 0; - self.ptr->repeat_depth = 0; - self.ptr->visible_descendant_count = 0; - self.ptr->has_external_tokens = false; - self.ptr->depends_on_column = false; - self.ptr->has_external_scanner_state_change = false; - self.ptr->dynamic_precedence = 0; + self.ptr->named_child_count = 0; + self.ptr->visible_child_count = 0; + self.ptr->error_cost = 0; + self.ptr->repeat_depth = 0; + self.ptr->visible_descendant_count = 0; + self.ptr->has_external_tokens = false; + self.ptr->depends_on_column = false; + self.ptr->has_external_scanner_state_change = false; + self.ptr->dynamic_precedence = 0; - t_u32 structural_index = 0; - const t_symbol *alias_sequence = - ts_language_alias_sequence(language, self.ptr->production_id); - t_u32 lookahead_end_byte = 0; + uint32_t structural_index = 0; + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + uint32_t lookahead_end_byte = 0; - const Subtree *children = ts_subtree_children(self); - for (t_u32 i = 0; i < self.ptr->child_count; i++) - { - Subtree child = children[i]; + const Subtree *children = ts_subtree_children(self); + for (uint32_t i = 0; i < self.ptr->child_count; i++) { + Subtree child = children[i]; - if (self.ptr->size.extent.row == 0 && - ts_subtree_depends_on_column(child)) - { - self.ptr->depends_on_column = true; - } + if ( + self.ptr->size.extent.row == 0 && + ts_subtree_depends_on_column(child) + ) { + self.ptr->depends_on_column = true; + } - if (ts_subtree_has_external_scanner_state_change(child)) - { - self.ptr->has_external_scanner_state_change = true; - } + if (ts_subtree_has_external_scanner_state_change(child)) { + self.ptr->has_external_scanner_state_change = true; + } - if (i == 0) - { - self.ptr->padding = ts_subtree_padding(child); - self.ptr->size = ts_subtree_size(child); - } - else - { - self.ptr->size = - length_add(self.ptr->size, ts_subtree_total_size(child)); - } + if (i == 0) { + self.ptr->padding = ts_subtree_padding(child); + self.ptr->size = ts_subtree_size(child); + } else { + self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child)); + } - t_u32 child_lookahead_end_byte = self.ptr->padding.bytes + - self.ptr->size.bytes + - ts_subtree_lookahead_bytes(child); - if (child_lookahead_end_byte > lookahead_end_byte) - { - lookahead_end_byte = child_lookahead_end_byte; - } + uint32_t child_lookahead_end_byte = + self.ptr->padding.bytes + + self.ptr->size.bytes + + ts_subtree_lookahead_bytes(child); + if (child_lookahead_end_byte > lookahead_end_byte) { + lookahead_end_byte = child_lookahead_end_byte; + } - if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) - { - self.ptr->error_cost += ts_subtree_error_cost(child); - } + if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) { + self.ptr->error_cost += ts_subtree_error_cost(child); + } - t_u32 grandchild_count = ts_subtree_child_count(child); - if (self.ptr->symbol == ts_builtin_sym_error || - self.ptr->symbol == ts_builtin_sym_error_repeat) - { - if (!ts_subtree_extra(child) && - !(ts_subtree_is_error(child) && grandchild_count == 0)) - { - if (ts_subtree_visible(child)) - { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; - } - else if (grandchild_count > 0) - { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * - child.ptr->visible_child_count; - } - } - } + uint32_t grandchild_count = ts_subtree_child_count(child); + if ( + self.ptr->symbol == ts_builtin_sym_error || + self.ptr->symbol == ts_builtin_sym_error_repeat + ) { + if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { + if (ts_subtree_visible(child)) { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; + } else if (grandchild_count > 0) { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; + } + } + } - self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); - self.ptr->visible_descendant_count += - ts_subtree_visible_descendant_count(child); + self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); + self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child); - if (alias_sequence && alias_sequence[structural_index] != 0 && - !ts_subtree_extra(child)) - { - self.ptr->visible_descendant_count++; - self.ptr->visible_child_count++; - if (ts_language_symbol_metadata(language, - alias_sequence[structural_index]) - .named) - { - self.ptr->named_child_count++; - } - } - else if (ts_subtree_visible(child)) - { - self.ptr->visible_descendant_count++; - self.ptr->visible_child_count++; - if (ts_subtree_named(child)) - self.ptr->named_child_count++; - } - else if (grandchild_count > 0) - { - self.ptr->visible_child_count += child.ptr->visible_child_count; - self.ptr->named_child_count += child.ptr->named_child_count; - } + if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) { + self.ptr->visible_descendant_count++; + self.ptr->visible_child_count++; + if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) { + self.ptr->named_child_count++; + } + } else if (ts_subtree_visible(child)) { + self.ptr->visible_descendant_count++; + self.ptr->visible_child_count++; + if (ts_subtree_named(child)) self.ptr->named_child_count++; + } else if (grandchild_count > 0) { + self.ptr->visible_child_count += child.ptr->visible_child_count; + self.ptr->named_child_count += child.ptr->named_child_count; + } - if (ts_subtree_has_external_tokens(child)) - self.ptr->has_external_tokens = true; + if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true; - if (ts_subtree_is_error(child)) - { - self.ptr->fragile_left = self.ptr->fragile_right = true; - self.ptr->parse_state = TS_TREE_STATE_NONE; - } + if (ts_subtree_is_error(child)) { + self.ptr->fragile_left = self.ptr->fragile_right = true; + self.ptr->parse_state = TS_TREE_STATE_NONE; + } - if (!ts_subtree_extra(child)) - structural_index++; - } + if (!ts_subtree_extra(child)) structural_index++; + } - self.ptr->lookahead_bytes = - lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; + self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; - if (self.ptr->symbol == ts_builtin_sym_error || - self.ptr->symbol == ts_builtin_sym_error_repeat) - { - self.ptr->error_cost += - ERROR_COST_PER_RECOVERY + - ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + - ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; - } + if ( + self.ptr->symbol == ts_builtin_sym_error || + self.ptr->symbol == ts_builtin_sym_error_repeat + ) { + self.ptr->error_cost += + ERROR_COST_PER_RECOVERY + + ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + + ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; + } - if (self.ptr->child_count > 0) - { - Subtree first_child = children[0]; - Subtree last_child = children[self.ptr->child_count - 1]; + if (self.ptr->child_count > 0) { + Subtree first_child = children[0]; + Subtree last_child = children[self.ptr->child_count - 1]; - self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); - self.ptr->first_leaf.parse_state = - ts_subtree_leaf_parse_state(first_child); + self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); + self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child); - if (ts_subtree_fragile_left(first_child)) - self.ptr->fragile_left = true; - if (ts_subtree_fragile_right(last_child)) - self.ptr->fragile_right = true; + if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true; + if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true; - if (self.ptr->child_count >= 2 && !self.ptr->visible && - !self.ptr->named && - ts_subtree_symbol(first_child) == self.ptr->symbol) - { - if (ts_subtree_repeat_depth(first_child) > - ts_subtree_repeat_depth(last_child)) - { - self.ptr->repeat_depth = - ts_subtree_repeat_depth(first_child) + 1; - } - else - { - self.ptr->repeat_depth = - ts_subtree_repeat_depth(last_child) + 1; - } - } - } + if ( + self.ptr->child_count >= 2 && + !self.ptr->visible && + !self.ptr->named && + ts_subtree_symbol(first_child) == self.ptr->symbol + ) { + if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) { + self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1; + } else { + self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1; + } + } + } } // Create a new parent node with the given children. // // This takes ownership of the children array. -MutableSubtree ts_subtree_new_node(t_symbol symbol, SubtreeArray *children, - unsigned production_id, - const t_language *language) -{ - t_symbol_metadata metadata = ts_language_symbol_metadata(language, symbol); - bool fragile = - symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; +MutableSubtree ts_subtree_new_node( + TSSymbol symbol, + SubtreeArray *children, + unsigned production_id, + const TSLanguage *language +) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; - // Allocate the node's data at the end of the array of children. - size_t new_byte_size = ts_subtree_alloc_size(children->size); - if (children->capacity * sizeof(Subtree) < new_byte_size) - { - children->contents = realloc(children->contents, new_byte_size); - children->capacity = (t_u32)(new_byte_size / sizeof(Subtree)); - } - SubtreeHeapData *data = - (SubtreeHeapData *)&children->contents[children->size]; + // Allocate the node's data at the end of the array of children. + size_t new_byte_size = ts_subtree_alloc_size(children->size); + if (children->capacity * sizeof(Subtree) < new_byte_size) { + children->contents = ts_realloc(children->contents, new_byte_size); + children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); + } + SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; - *data = (SubtreeHeapData){.ref_count = 1, - .symbol = symbol, - .child_count = children->size, - .visible = metadata.visible, - .named = metadata.named, - .has_changes = false, - .has_external_scanner_state_change = false, - .fragile_left = fragile, - .fragile_right = fragile, - .is_keyword = false, - {{ - .visible_descendant_count = 0, - .production_id = production_id, - .first_leaf = {.symbol = 0, .parse_state = 0}, - }}}; - MutableSubtree result = {.ptr = data}; - ts_subtree_summarize_children(result, language); - return result; + *data = (SubtreeHeapData) { + .ref_count = 1, + .symbol = symbol, + .child_count = children->size, + .visible = metadata.visible, + .named = metadata.named, + .has_changes = false, + .has_external_scanner_state_change = false, + .fragile_left = fragile, + .fragile_right = fragile, + .is_keyword = false, + {{ + .visible_descendant_count = 0, + .production_id = production_id, + .first_leaf = {.symbol = 0, .parse_state = 0}, + }} + }; + MutableSubtree result = {.ptr = data}; + ts_subtree_summarize_children(result, language); + return result; } // Create a new error node containing the given children. // // This node is treated as 'extra'. Its children are prevented from having // having any effect on the parse state. -Subtree ts_subtree_new_error_node(SubtreeArray *children, bool extra, - const t_language *language) -{ - MutableSubtree result = - ts_subtree_new_node(ts_builtin_sym_error, children, 0, language); - result.ptr->extra = extra; - return ts_subtree_from_mut(result); +Subtree ts_subtree_new_error_node( + SubtreeArray *children, + bool extra, + const TSLanguage *language +) { + MutableSubtree result = ts_subtree_new_node( + ts_builtin_sym_error, children, 0, language + ); + result.ptr->extra = extra; + return ts_subtree_from_mut(result); } // Create a new 'missing leaf' node. // // This node is treated as 'extra'. Its children are prevented from having // having any effect on the parse state. -Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, t_symbol symbol, - t_parse_length padding, t_u32 lookahead_bytes, - const t_language *language) -{ - Subtree result = - ts_subtree_new_leaf(pool, symbol, padding, length_zero(), - lookahead_bytes, 0, false, false, false, language); - if (result.data.is_inline) - { - result.data.is_missing = true; - } - else - { - ((SubtreeHeapData *)result.ptr)->is_missing = true; - } - return result; +Subtree ts_subtree_new_missing_leaf( + SubtreePool *pool, + TSSymbol symbol, + Length padding, + uint32_t lookahead_bytes, + const TSLanguage *language +) { + Subtree result = ts_subtree_new_leaf( + pool, symbol, padding, length_zero(), lookahead_bytes, + 0, false, false, false, language + ); + if (result.data.is_inline) { + result.data.is_missing = true; + } else { + ((SubtreeHeapData *)result.ptr)->is_missing = true; + } + return result; } -void ts_subtree_retain(Subtree self) -{ - if (self.data.is_inline) - return; - assert(self.ptr->ref_count > 0); - *(t_u32 *)&self.ptr->ref_count += 1; - assert(self.ptr->ref_count != 0); +void ts_subtree_retain(Subtree self) { + if (self.data.is_inline) return; + assert(self.ptr->ref_count > 0); + atomic_inc((volatile uint32_t *)&self.ptr->ref_count); + assert(self.ptr->ref_count != 0); } -void ts_subtree_release(SubtreePool *pool, Subtree self) -{ - if (self.data.is_inline) - return; - array_clear(&pool->tree_stack); +void ts_subtree_release(SubtreePool *pool, Subtree self) { + if (self.data.is_inline) return; + array_clear(&pool->tree_stack); - assert(self.ptr->ref_count > 0); - if (--(*(t_u32 *)&self.ptr->ref_count) == 0) - { - array_push(&pool->tree_stack, ts_subtree_to_mt_unsafe(self)); - } + assert(self.ptr->ref_count > 0); + if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); + } - while (pool->tree_stack.size > 0) - { - MutableSubtree tree = array_pop(&pool->tree_stack); - if (tree.ptr->child_count > 0) - { - Subtree *children = ts_subtree_children(tree); - for (t_u32 i = 0; i < tree.ptr->child_count; i++) - { - Subtree child = children[i]; - if (child.data.is_inline) - continue; - assert(child.ptr->ref_count > 0); - if (--*(t_u32 *)&child.ptr->ref_count == 0) - { - array_push(&pool->tree_stack, - ts_subtree_to_mt_unsafe(child)); - } - } - free(children); - } - else - { - if (tree.ptr->has_external_tokens) - { - ts_external_scanner_state_delete( - &tree.ptr->external_scanner_state); - } - ts_subtree_pool_free(pool, tree.ptr); - } - } + while (pool->tree_stack.size > 0) { + MutableSubtree tree = array_pop(&pool->tree_stack); + if (tree.ptr->child_count > 0) { + Subtree *children = ts_subtree_children(tree); + for (uint32_t i = 0; i < tree.ptr->child_count; i++) { + Subtree child = children[i]; + if (child.data.is_inline) continue; + assert(child.ptr->ref_count > 0); + if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); + } + } + ts_free(children); + } else { + if (tree.ptr->has_external_tokens) { + ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); + } + ts_subtree_pool_free(pool, tree.ptr); + } + } } -int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) -{ - array_push(&pool->tree_stack, ts_subtree_to_mt_unsafe(left)); - array_push(&pool->tree_stack, ts_subtree_to_mt_unsafe(right)); +int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left)); + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right)); - while (pool->tree_stack.size > 0) - { - right = ts_subtree_from_mut(array_pop(&pool->tree_stack)); - left = ts_subtree_from_mut(array_pop(&pool->tree_stack)); + while (pool->tree_stack.size > 0) { + right = ts_subtree_from_mut(array_pop(&pool->tree_stack)); + left = ts_subtree_from_mut(array_pop(&pool->tree_stack)); - int result = 0; - if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) - result = -1; - else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) - result = 1; - else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) - result = -1; - else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) - result = 1; - if (result != 0) - { - array_clear(&pool->tree_stack); - return result; - } + int result = 0; + if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) result = -1; + else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) result = 1; + else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) result = -1; + else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) result = 1; + if (result != 0) { + array_clear(&pool->tree_stack); + return result; + } - for (t_u32 i = ts_subtree_child_count(left); i > 0; i--) - { - Subtree left_child = ts_subtree_children(left)[i - 1]; - Subtree right_child = ts_subtree_children(right)[i - 1]; - array_push(&pool->tree_stack, ts_subtree_to_mt_unsafe(left_child)); - array_push(&pool->tree_stack, - ts_subtree_to_mt_unsafe(right_child)); - } - } + for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) { + Subtree left_child = ts_subtree_children(left)[i - 1]; + Subtree right_child = ts_subtree_children(right)[i - 1]; + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child)); + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child)); + } + } - return 0; + return 0; } -static inline void ts_subtree_set_has_changes(MutableSubtree *self) -{ - if (self->data.is_inline) - { - self->data.has_changes = true; - } - else - { - self->ptr->has_changes = true; - } +static inline void ts_subtree_set_has_changes(MutableSubtree *self) { + if (self->data.is_inline) { + self->data.has_changes = true; + } else { + self->ptr->has_changes = true; + } } -Subtree ts_subtree_edit(Subtree self, const t_input_edit *inpt_edit, - SubtreePool *pool) -{ - typedef struct - { - Subtree *tree; - Edit edit; - } EditEntry; +Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) { + typedef struct { + Subtree *tree; + Edit edit; + } EditEntry; - Array(EditEntry) stack = array_new(); - array_push( - &stack, - ((EditEntry){ - .tree = &self, - .edit = - (Edit){ - .start = {inpt_edit->start_byte, inpt_edit->start_point}, - .old_end = {inpt_edit->old_end_byte, - inpt_edit->old_end_point}, - .new_end = {inpt_edit->new_end_byte, - inpt_edit->new_end_point}, - }, - })); + Array(EditEntry) stack = array_new(); + array_push(&stack, ((EditEntry) { + .tree = &self, + .edit = (Edit) { + .start = {input_edit->start_byte, input_edit->start_point}, + .old_end = {input_edit->old_end_byte, input_edit->old_end_point}, + .new_end = {input_edit->new_end_byte, input_edit->new_end_point}, + }, + })); - while (stack.size) - { - EditEntry entry = array_pop(&stack); - Edit edit = entry.edit; - bool is_noop = edit.old_end.bytes == edit.start.bytes && - edit.new_end.bytes == edit.start.bytes; - bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; - bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); + while (stack.size) { + EditEntry entry = array_pop(&stack); + Edit edit = entry.edit; + bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; + bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; + bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); - t_parse_length size = ts_subtree_size(*entry.tree); - t_parse_length padding = ts_subtree_padding(*entry.tree); - t_parse_length total_size = length_add(padding, size); - t_u32 lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); - t_u32 end_byte = total_size.bytes + lookahead_bytes; - if (edit.start.bytes > end_byte || - (is_noop && edit.start.bytes == end_byte)) - continue; + Length size = ts_subtree_size(*entry.tree); + Length padding = ts_subtree_padding(*entry.tree); + Length total_size = length_add(padding, size); + uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); + uint32_t end_byte = total_size.bytes + lookahead_bytes; + if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; - // If the edit is entirely within the space before this subtree, then - // shift this subtree over according to the edit without changing its - // size. - if (edit.old_end.bytes <= padding.bytes) - { - padding = - length_add(edit.new_end, length_sub(padding, edit.old_end)); - } + // If the edit is entirely within the space before this subtree, then shift this + // subtree over according to the edit without changing its size. + if (edit.old_end.bytes <= padding.bytes) { + padding = length_add(edit.new_end, length_sub(padding, edit.old_end)); + } - // If the edit starts in the space before this subtree and extends into - // this subtree, shrink the subtree's content to compensate for the - // change in the space before it. - else if (edit.start.bytes < padding.bytes) - { - size = - length_saturating_sub(size, length_sub(edit.old_end, padding)); - padding = edit.new_end; - } + // If the edit starts in the space before this subtree and extends into this subtree, + // shrink the subtree's content to compensate for the change in the space before it. + else if (edit.start.bytes < padding.bytes) { + size = length_saturating_sub(size, length_sub(edit.old_end, padding)); + padding = edit.new_end; + } - // If the edit is a pure insertion right at the start of the subtree, - // shift the subtree over according to the insertion. - else if (edit.start.bytes == padding.bytes && is_pure_insertion) - { - padding = edit.new_end; - } + // If the edit is a pure insertion right at the start of the subtree, + // shift the subtree over according to the insertion. + else if (edit.start.bytes == padding.bytes && is_pure_insertion) { + padding = edit.new_end; + } - // If the edit is within this subtree, resize the subtree to reflect the - // edit. - else if (edit.start.bytes < total_size.bytes || - (edit.start.bytes == total_size.bytes && is_pure_insertion)) - { - size = length_add(length_sub(edit.new_end, padding), - length_saturating_sub(total_size, edit.old_end)); - } + // If the edit is within this subtree, resize the subtree to reflect the edit. + else if ( + edit.start.bytes < total_size.bytes || + (edit.start.bytes == total_size.bytes && is_pure_insertion) + ) { + size = length_add( + length_sub(edit.new_end, padding), + length_saturating_sub(total_size, edit.old_end) + ); + } - MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); + MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); - if (result.data.is_inline) - { - if (ts_subtree_can_inline(padding, size, lookahead_bytes)) - { - result.data.padding_bytes = padding.bytes; - result.data.padding_rows = padding.extent.row; - result.data.padding_columns = padding.extent.column; - result.data.size_bytes = size.bytes; - } - else - { - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - data->ref_count = 1; - data->padding = padding; - data->size = size; - data->lookahead_bytes = lookahead_bytes; - data->error_cost = 0; - data->child_count = 0; - data->symbol = result.data.symbol; - data->parse_state = result.data.parse_state; - data->visible = result.data.visible; - data->named = result.data.named; - data->extra = result.data.extra; - data->fragile_left = false; - data->fragile_right = false; - data->has_changes = false; - data->has_external_tokens = false; - data->depends_on_column = false; - data->is_missing = result.data.is_missing; - data->is_keyword = result.data.is_keyword; - result.ptr = data; - } - } - else - { - result.ptr->padding = padding; - result.ptr->size = size; - } + if (result.data.is_inline) { + if (ts_subtree_can_inline(padding, size, lookahead_bytes)) { + result.data.padding_bytes = padding.bytes; + result.data.padding_rows = padding.extent.row; + result.data.padding_columns = padding.extent.column; + result.data.size_bytes = size.bytes; + } else { + SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + data->ref_count = 1; + data->padding = padding; + data->size = size; + data->lookahead_bytes = lookahead_bytes; + data->error_cost = 0; + data->child_count = 0; + data->symbol = result.data.symbol; + data->parse_state = result.data.parse_state; + data->visible = result.data.visible; + data->named = result.data.named; + data->extra = result.data.extra; + data->fragile_left = false; + data->fragile_right = false; + data->has_changes = false; + data->has_external_tokens = false; + data->depends_on_column = false; + data->is_missing = result.data.is_missing; + data->is_keyword = result.data.is_keyword; + result.ptr = data; + } + } else { + result.ptr->padding = padding; + result.ptr->size = size; + } - ts_subtree_set_has_changes(&result); - *entry.tree = ts_subtree_from_mut(result); + ts_subtree_set_has_changes(&result); + *entry.tree = ts_subtree_from_mut(result); - t_parse_length child_left, child_right = length_zero(); - for (t_u32 i = 0, n = ts_subtree_child_count(*entry.tree); i < n; - i++) - { - Subtree *child = &ts_subtree_children(*entry.tree)[i]; - t_parse_length child_size = ts_subtree_total_size(*child); - child_left = child_right; - child_right = length_add(child_left, child_size); + Length child_left, child_right = length_zero(); + for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { + Subtree *child = &ts_subtree_children(*entry.tree)[i]; + Length child_size = ts_subtree_total_size(*child); + child_left = child_right; + child_right = length_add(child_left, child_size); - // If this child ends before the edit, it is not affected. - if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < - edit.start.bytes) - continue; + // If this child ends before the edit, it is not affected. + if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; - // Keep editing child nodes until a node is reached that starts - // after the edit. Also, if this node's validity depends on its - // column position, then continue invaliditing child nodes until - // reaching a line break. - if (((child_left.bytes > edit.old_end.bytes) || - (child_left.bytes == edit.old_end.bytes && - child_size.bytes > 0 && i > 0)) && - (!invalidate_first_row || - child_left.extent.row > entry.tree->ptr->padding.extent.row)) - { - break; - } + // Keep editing child nodes until a node is reached that starts after the edit. + // Also, if this node's validity depends on its column position, then continue + // invaliditing child nodes until reaching a line break. + if (( + (child_left.bytes > edit.old_end.bytes) || + (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) + ) && ( + !invalidate_first_row || + child_left.extent.row > entry.tree->ptr->padding.extent.row + )) { + break; + } - // Transform edit into the child's coordinate space. - Edit child_edit = { - .start = length_saturating_sub(edit.start, child_left), - .old_end = length_saturating_sub(edit.old_end, child_left), - .new_end = length_saturating_sub(edit.new_end, child_left), - }; + // Transform edit into the child's coordinate space. + Edit child_edit = { + .start = length_saturating_sub(edit.start, child_left), + .old_end = length_saturating_sub(edit.old_end, child_left), + .new_end = length_saturating_sub(edit.new_end, child_left), + }; - // Interpret all inserted text as applying to the *first* child that - // touches the edit. Subsequent children are only never have any - // text inserted into them; they are only shrunk to compensate for - // the edit. - if (child_right.bytes > edit.start.bytes || - (child_right.bytes == edit.start.bytes && is_pure_insertion)) - { - edit.new_end = edit.start; - } + // Interpret all inserted text as applying to the *first* child that touches the edit. + // Subsequent children are only never have any text inserted into them; they are only + // shrunk to compensate for the edit. + if ( + child_right.bytes > edit.start.bytes || + (child_right.bytes == edit.start.bytes && is_pure_insertion) + ) { + edit.new_end = edit.start; + } - // Children that occur before the edit are not reshaped by the edit. - else - { - child_edit.old_end = child_edit.start; - child_edit.new_end = child_edit.start; - } + // Children that occur before the edit are not reshaped by the edit. + else { + child_edit.old_end = child_edit.start; + child_edit.new_end = child_edit.start; + } - // Queue processing of this child's subtree. - array_push(&stack, ((EditEntry){ - .tree = child, - .edit = child_edit, - })); - } - } + // Queue processing of this child's subtree. + array_push(&stack, ((EditEntry) { + .tree = child, + .edit = child_edit, + })); + } + } - array_delete(&stack); - return self; + array_delete(&stack); + return self; } -Subtree ts_subtree_last_external_token(Subtree tree) -{ - if (!ts_subtree_has_external_tokens(tree)) - return NULL_SUBTREE; - while (tree.ptr->child_count > 0) - { - for (t_u32 i = tree.ptr->child_count - 1; i + 1 > 0; i--) - { - Subtree child = ts_subtree_children(tree)[i]; - if (ts_subtree_has_external_tokens(child)) - { - tree = child; - break; - } - } - } - return tree; +Subtree ts_subtree_last_external_token(Subtree tree) { + if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE; + while (tree.ptr->child_count > 0) { + for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) { + Subtree child = ts_subtree_children(tree)[i]; + if (ts_subtree_has_external_tokens(child)) { + tree = child; + break; + } + } + } + return tree; } -static size_t ts_subtree__write_char_to_string(char *str, size_t n, t_i32 chr) -{ - if (chr == -1) - return snprintf(str, n, "INVALID"); - else if (chr == '\0') - return snprintf(str, n, "'\\0'"); - else if (chr == '\n') - return snprintf(str, n, "'\\n'"); - else if (chr == '\t') - return snprintf(str, n, "'\\t'"); - else if (chr == '\r') - return snprintf(str, n, "'\\r'"); - else if (0 < chr && chr < 128 && isprint(chr)) - return snprintf(str, n, "'%c'", chr); - else - return snprintf(str, n, "%d", chr); +static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) { + if (chr == -1) + return snprintf(str, n, "INVALID"); + else if (chr == '\0') + return snprintf(str, n, "'\\0'"); + else if (chr == '\n') + return snprintf(str, n, "'\\n'"); + else if (chr == '\t') + return snprintf(str, n, "'\\t'"); + else if (chr == '\r') + return snprintf(str, n, "'\\r'"); + else if (0 < chr && chr < 128 && isprint(chr)) + return snprintf(str, n, "'%c'", chr); + else + return snprintf(str, n, "%d", chr); } static const char *const ROOT_FIELD = "__ROOT__"; static size_t ts_subtree__write_to_string( - Subtree self, char *string, size_t limit, const t_language *language, - bool include_all, t_symbol alias_symbol, bool alias_is_named, - const char *field_name) -{ - if (!self.ptr) - return snprintf(string, limit, "(NULL)"); + Subtree self, char *string, size_t limit, + const TSLanguage *language, bool include_all, + TSSymbol alias_symbol, bool alias_is_named, const char *field_name +) { + if (!self.ptr) return snprintf(string, limit, "(NULL)"); - char *cursor = string; - char **writer = (limit > 1) ? &cursor : &string; - bool is_root = field_name == ROOT_FIELD; - bool is_visible = - include_all || ts_subtree_missing(self) || - (alias_symbol ? alias_is_named - : ts_subtree_visible(self) && ts_subtree_named(self)); + char *cursor = string; + char **writer = (limit > 1) ? &cursor : &string; + bool is_root = field_name == ROOT_FIELD; + bool is_visible = + include_all || + ts_subtree_missing(self) || + ( + alias_symbol + ? alias_is_named + : ts_subtree_visible(self) && ts_subtree_named(self) + ); - if (is_visible) - { - if (!is_root) - { - cursor += snprintf(*writer, limit, " "); - if (field_name) - { - cursor += snprintf(*writer, limit, "%s: ", field_name); - } - } + if (is_visible) { + if (!is_root) { + cursor += snprintf(*writer, limit, " "); + if (field_name) { + cursor += snprintf(*writer, limit, "%s: ", field_name); + } + } - if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && - self.ptr->size.bytes > 0) - { - cursor += snprintf(*writer, limit, "(UNEXPECTED "); - cursor += ts_subtree__write_char_to_string( - *writer, limit, self.ptr->lookahead_char); - } - else - { - t_symbol symbol = - alias_symbol ? alias_symbol : ts_subtree_symbol(self); - const char *symbol_name = ts_language_symbol_name(language, symbol); - if (ts_subtree_missing(self)) - { - cursor += snprintf(*writer, limit, "(MISSING "); - if (alias_is_named || ts_subtree_named(self)) - { - cursor += snprintf(*writer, limit, "%s", symbol_name); - } - else - { - cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); - } - } - else - { - cursor += snprintf(*writer, limit, "(%s", symbol_name); - } - } - } - else if (is_root) - { - t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); - const char *symbol_name = ts_language_symbol_name(language, symbol); - if (ts_subtree_child_count(self) > 0) - { - cursor += snprintf(*writer, limit, "(%s", symbol_name); - } - else if (ts_subtree_named(self)) - { - cursor += snprintf(*writer, limit, "(%s)", symbol_name); - } - else - { - cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); - } - } + if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) { + cursor += snprintf(*writer, limit, "(UNEXPECTED "); + cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); + } else { + TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); + const char *symbol_name = ts_language_symbol_name(language, symbol); + if (ts_subtree_missing(self)) { + cursor += snprintf(*writer, limit, "(MISSING "); + if (alias_is_named || ts_subtree_named(self)) { + cursor += snprintf(*writer, limit, "%s", symbol_name); + } else { + cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); + } + } else { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } + } + } else if (is_root) { + TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); + const char *symbol_name = ts_language_symbol_name(language, symbol); + if (ts_subtree_child_count(self) > 0) { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } else if (ts_subtree_named(self)) { + cursor += snprintf(*writer, limit, "(%s)", symbol_name); + } else { + cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); + } + } - if (ts_subtree_child_count(self)) - { - const t_symbol *alias_sequence = - ts_language_alias_sequence(language, self.ptr->production_id); - const t_field_map_entry *field_map, *field_map_end; - ts_language_field_map(language, self.ptr->production_id, &field_map, - &field_map_end); + if (ts_subtree_child_count(self)) { + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + language, + self.ptr->production_id, + &field_map, + &field_map_end + ); - t_u32 structural_child_index = 0; - for (t_u32 i = 0; i < self.ptr->child_count; i++) - { - Subtree child = ts_subtree_children(self)[i]; - if (ts_subtree_extra(child)) - { - cursor += - ts_subtree__write_to_string(child, *writer, limit, language, - include_all, 0, false, NULL); - } - else - { - t_symbol subtree_alias_symbol = - alias_sequence ? alias_sequence[structural_child_index] : 0; - bool subtree_alias_is_named = - subtree_alias_symbol ? ts_language_symbol_metadata( - language, subtree_alias_symbol) - .named - : false; + uint32_t structural_child_index = 0; + for (uint32_t i = 0; i < self.ptr->child_count; i++) { + Subtree child = ts_subtree_children(self)[i]; + if (ts_subtree_extra(child)) { + cursor += ts_subtree__write_to_string( + child, *writer, limit, + language, include_all, + 0, false, NULL + ); + } else { + TSSymbol subtree_alias_symbol = alias_sequence + ? alias_sequence[structural_child_index] + : 0; + bool subtree_alias_is_named = subtree_alias_symbol + ? ts_language_symbol_metadata(language, subtree_alias_symbol).named + : false; - const char *child_field_name = is_visible ? NULL : field_name; - for (const t_field_map_entry *map = field_map; - map < field_map_end; map++) - { - if (!map->inherited && - map->child_index == structural_child_index) - { - child_field_name = language->field_names[map->field_id]; - break; - } - } + const char *child_field_name = is_visible ? NULL : field_name; + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { + if (!map->inherited && map->child_index == structural_child_index) { + child_field_name = language->field_names[map->field_id]; + break; + } + } - cursor += ts_subtree__write_to_string( - child, *writer, limit, language, include_all, - subtree_alias_symbol, subtree_alias_is_named, - child_field_name); - structural_child_index++; - } - } - } + cursor += ts_subtree__write_to_string( + child, *writer, limit, + language, include_all, + subtree_alias_symbol, subtree_alias_is_named, child_field_name + ); + structural_child_index++; + } + } + } - if (is_visible) - cursor += snprintf(*writer, limit, ")"); + if (is_visible) cursor += snprintf(*writer, limit, ")"); - return cursor - string; + return cursor - string; } -char *ts_subtree_string(Subtree self, t_symbol alias_symbol, - bool alias_is_named, const t_language *language, - bool include_all) -{ - char scratch_string[1]; - size_t size = ts_subtree__write_to_string(self, scratch_string, 1, language, - include_all, alias_symbol, - alias_is_named, ROOT_FIELD) + - 1; - char *result = malloc(size * sizeof(char)); - ts_subtree__write_to_string(self, result, size, language, include_all, - alias_symbol, alias_is_named, ROOT_FIELD); - return result; +char *ts_subtree_string( + Subtree self, + TSSymbol alias_symbol, + bool alias_is_named, + const TSLanguage *language, + bool include_all +) { + char scratch_string[1]; + size_t size = ts_subtree__write_to_string( + self, scratch_string, 1, + language, include_all, + alias_symbol, alias_is_named, ROOT_FIELD + ) + 1; + char *result = ts_malloc(size * sizeof(char)); + ts_subtree__write_to_string( + self, result, size, + language, include_all, + alias_symbol, alias_is_named, ROOT_FIELD + ); + return result; } -void ts_subtree__print_dot_graph(const Subtree *self, t_u32 start_offset, - const t_language *language, - t_symbol alias_symbol, FILE *f) -{ - t_symbol subtree_symbol = ts_subtree_symbol(*self); - t_symbol symbol = alias_symbol ? alias_symbol : subtree_symbol; - t_u32 end_offset = start_offset + ts_subtree_total_bytes(*self); - fprintf(f, "tree_%p [label=\"", (void *)self); - ts_language_write_symbol_as_dot_string(language, f, symbol); - fprintf(f, "\""); +void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, + const TSLanguage *language, TSSymbol alias_symbol, + FILE *f) { + TSSymbol subtree_symbol = ts_subtree_symbol(*self); + TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol; + uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); + fprintf(f, "tree_%p [label=\"", (void *)self); + ts_language_write_symbol_as_dot_string(language, f, symbol); + fprintf(f, "\""); - if (ts_subtree_child_count(*self) == 0) - fprintf(f, ", shape=plaintext"); - if (ts_subtree_extra(*self)) - fprintf(f, ", fontcolor=gray"); + if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext"); + if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray"); - fprintf(f, - ", tooltip=\"" - "range: %u - %u\n" - "state: %d\n" - "error-cost: %u\n" - "has-changes: %u\n" - "depends-on-column: %u\n" - "descendant-count: %u\n" - "repeat-depth: %u\n" - "lookahead-bytes: %u", - start_offset, end_offset, ts_subtree_parse_state(*self), - ts_subtree_error_cost(*self), ts_subtree_has_changes(*self), - ts_subtree_depends_on_column(*self), - ts_subtree_visible_descendant_count(*self), - ts_subtree_repeat_depth(*self), ts_subtree_lookahead_bytes(*self)); + fprintf(f, ", tooltip=\"" + "range: %u - %u\n" + "state: %d\n" + "error-cost: %u\n" + "has-changes: %u\n" + "depends-on-column: %u\n" + "descendant-count: %u\n" + "repeat-depth: %u\n" + "lookahead-bytes: %u", + start_offset, end_offset, + ts_subtree_parse_state(*self), + ts_subtree_error_cost(*self), + ts_subtree_has_changes(*self), + ts_subtree_depends_on_column(*self), + ts_subtree_visible_descendant_count(*self), + ts_subtree_repeat_depth(*self), + ts_subtree_lookahead_bytes(*self) + ); - if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && - self->ptr->lookahead_char != 0) - { - fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); - } + if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) { + fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); + } - fprintf(f, "\"]\n"); + fprintf(f, "\"]\n"); - t_u32 child_start_offset = start_offset; - t_u32 child_info_offset = - language->max_alias_sequence_length * ts_subtree_production_id(*self); - for (t_u32 i = 0, n = ts_subtree_child_count(*self); i < n; i++) - { - const Subtree *child = &ts_subtree_children(*self)[i]; - t_symbol subtree_alias_symbol = 0; - if (!ts_subtree_extra(*child) && child_info_offset) - { - subtree_alias_symbol = language->alias_sequences[child_info_offset]; - child_info_offset++; - } - ts_subtree__print_dot_graph(child, child_start_offset, language, - subtree_alias_symbol, f); - fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, - (void *)child, i); - child_start_offset += ts_subtree_total_bytes(*child); - } + uint32_t child_start_offset = start_offset; + uint32_t child_info_offset = + language->max_alias_sequence_length * + ts_subtree_production_id(*self); + for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { + const Subtree *child = &ts_subtree_children(*self)[i]; + TSSymbol subtree_alias_symbol = 0; + if (!ts_subtree_extra(*child) && child_info_offset) { + subtree_alias_symbol = language->alias_sequences[child_info_offset]; + child_info_offset++; + } + ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f); + fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); + child_start_offset += ts_subtree_total_bytes(*child); + } } -void ts_subtree_print_dot_graph(Subtree self, const t_language *language, - FILE *f) -{ - fprintf(f, "digraph tree {\n"); - fprintf(f, "edge [arrowhead=none]\n"); - ts_subtree__print_dot_graph(&self, 0, language, 0, f); - fprintf(f, "}\n"); +void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) { + fprintf(f, "digraph tree {\n"); + fprintf(f, "edge [arrowhead=none]\n"); + ts_subtree__print_dot_graph(&self, 0, language, 0, f); + fprintf(f, "}\n"); } -const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) -{ - static const ExternalScannerState empty_state = {{.short_data = {0}}, - .length = 0}; - if (self.ptr && !self.data.is_inline && self.ptr->has_external_tokens && - self.ptr->child_count == 0) - { - return &self.ptr->external_scanner_state; - } - else - { - return &empty_state; - } +const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { + static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; + if ( + self.ptr && + !self.data.is_inline && + self.ptr->has_external_tokens && + self.ptr->child_count == 0 + ) { + return &self.ptr->external_scanner_state; + } else { + return &empty_state; + } } -bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) -{ - const ExternalScannerState *state_self = - ts_subtree_external_scanner_state(self); - const ExternalScannerState *state_other = - ts_subtree_external_scanner_state(other); - return ts_external_scanner_state_eq( - state_self, ts_external_scanner_state_data(state_other), - state_other->length); +bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) { + const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self); + const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other); + return ts_external_scanner_state_eq( + state_self, + ts_external_scanner_state_data(state_other), + state_other->length + ); } diff --git a/parser/src/subtree.h b/parser/src/subtree.h index 3fbe3eb1..58c7e483 100644 --- a/parser/src/subtree.h +++ b/parser/src/subtree.h @@ -1,19 +1,22 @@ #ifndef TREE_SITTER_SUBTREE_H_ #define TREE_SITTER_SUBTREE_H_ -#include "me/types.h" -#include "parser/types/types_symbol.h" +#ifdef __cplusplus +extern "C" { +#endif -#include "./array.h" -#include "parser/error_costs.h" -#include "parser/parser_length.h" -#include "parser/api.h" #include #include #include +#include "./length.h" +#include "./array.h" +#include "./error_costs.h" +#include "./host.h" +#include "./api.h" +#include "./parser.h" #define TS_TREE_STATE_NONE USHRT_MAX -#define NULL_SUBTREE ((Subtree){.ptr = NULL}) +#define NULL_SUBTREE ((Subtree) {.ptr = NULL}) // The serialized state of an external scanner. // @@ -25,13 +28,12 @@ // // Small byte arrays are stored inline, and long ones are allocated // separately on the heap. -typedef struct -{ - union { - char *long_data; - char short_data[24]; - }; - t_u32 length; +typedef struct { + union { + char *long_data; + char short_data[24]; + }; + uint32_t length; } ExternalScannerState; // A compact representation of a subtree. @@ -45,108 +47,136 @@ typedef struct // Because of alignment, for any valid pointer this will be 0, giving // us the opportunity to make use of this bit to signify whether to use // the pointer or the inline struct. -typedef struct s_subtree_inline_data t_subtree_inline_data; +typedef struct SubtreeInlineData SubtreeInlineData; -struct s_subtree_inline_data -{ - bool is_inline : 1; - bool visible : 1; - bool named : 1; - bool extra : 1; - bool has_changes : 1; - bool is_missing : 1; - bool is_keyword : 1; - t_u8 symbol; - t_u16 parse_state; - t_u8 padding_columns; - t_u8 padding_rows : 4; - t_u8 lookahead_bytes : 4; - t_u8 padding_bytes; - t_u8 size_bytes; +#define SUBTREE_BITS \ + bool visible : 1; \ + bool named : 1; \ + bool extra : 1; \ + bool has_changes : 1; \ + bool is_missing : 1; \ + bool is_keyword : 1; + +#define SUBTREE_SIZE \ + uint8_t padding_columns; \ + uint8_t padding_rows : 4; \ + uint8_t lookahead_bytes : 4; \ + uint8_t padding_bytes; \ + uint8_t size_bytes; + +#if TS_BIG_ENDIAN +#if TS_PTR_SIZE == 32 + +struct SubtreeInlineData { + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; + SUBTREE_SIZE }; +#else + +struct SubtreeInlineData { + SUBTREE_SIZE + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; +}; + +#endif +#else + +struct SubtreeInlineData { + bool is_inline : 1; + SUBTREE_BITS + uint8_t symbol; + uint16_t parse_state; + SUBTREE_SIZE +}; + +#endif + +#undef SUBTREE_BITS +#undef SUBTREE_SIZE + // A heap-allocated representation of a subtree. // // This representation is used for parent nodes, external tokens, // errors, and other leaf nodes whose data is too large to fit into // the inline representation. -typedef struct -{ - volatile t_u32 ref_count; - t_parse_length padding; - t_parse_length size; - t_u32 lookahead_bytes; - t_u32 error_cost; - t_u32 child_count; - t_symbol symbol; - t_state_id parse_state; +typedef struct { + volatile uint32_t ref_count; + Length padding; + Length size; + uint32_t lookahead_bytes; + uint32_t error_cost; + uint32_t child_count; + TSSymbol symbol; + TSStateId parse_state; - bool visible : 1; - bool named : 1; - bool extra : 1; - bool fragile_left : 1; - bool fragile_right : 1; - bool has_changes : 1; - bool has_external_tokens : 1; - bool has_external_scanner_state_change : 1; - bool depends_on_column : 1; - bool is_missing : 1; - bool is_keyword : 1; + bool visible : 1; + bool named : 1; + bool extra : 1; + bool fragile_left : 1; + bool fragile_right : 1; + bool has_changes : 1; + bool has_external_tokens : 1; + bool has_external_scanner_state_change : 1; + bool depends_on_column: 1; + bool is_missing : 1; + bool is_keyword : 1; - union { - // Non-terminal subtrees (`child_count > 0`) - struct - { - t_u32 visible_child_count; - t_u32 named_child_count; - t_u32 visible_descendant_count; - t_i32 dynamic_precedence; - t_u16 repeat_depth; - t_u16 production_id; - struct - { - t_symbol symbol; - t_state_id parse_state; - } first_leaf; - }; + union { + // Non-terminal subtrees (`child_count > 0`) + struct { + uint32_t visible_child_count; + uint32_t named_child_count; + uint32_t visible_descendant_count; + int32_t dynamic_precedence; + uint16_t repeat_depth; + uint16_t production_id; + struct { + TSSymbol symbol; + TSStateId parse_state; + } first_leaf; + }; - // External terminal subtrees (`child_count == 0 && - // has_external_tokens`) - ExternalScannerState external_scanner_state; + // External terminal subtrees (`child_count == 0 && has_external_tokens`) + ExternalScannerState external_scanner_state; - // Error terminal subtrees (`child_count == 0 && symbol == - // ts_builtin_sym_error`) - t_i32 lookahead_char; - }; + // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`) + int32_t lookahead_char; + }; } SubtreeHeapData; // The fundamental building block of a syntax tree. typedef union { - t_subtree_inline_data data; - const SubtreeHeapData *ptr; + SubtreeInlineData data; + const SubtreeHeapData *ptr; } Subtree; // Like Subtree, but mutable. typedef union { - t_subtree_inline_data data; - SubtreeHeapData *ptr; + SubtreeInlineData data; + SubtreeHeapData *ptr; } MutableSubtree; typedef Array(Subtree) SubtreeArray; typedef Array(MutableSubtree) MutableSubtreeArray; -typedef struct -{ - MutableSubtreeArray free_trees; - MutableSubtreeArray tree_stack; +typedef struct { + MutableSubtreeArray free_trees; + MutableSubtreeArray tree_stack; } SubtreePool; -void ts_external_scanner_state_init(ExternalScannerState *, const char *, - unsigned); +void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned); const char *ts_external_scanner_state_data(const ExternalScannerState *); -bool ts_external_scanner_state_eq(const ExternalScannerState *self, - const char *, unsigned); -void ts_external_scanner_state_delete(ExternalScannerState *self); +bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned); +void ts_external_scanner_state_delete(ExternalScannerState *self); void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); void ts_subtree_array_clear(SubtreePool *, SubtreeArray *); @@ -154,280 +184,199 @@ void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); void ts_subtree_array_reverse(SubtreeArray *); -SubtreePool ts_subtree_pool_new(t_u32 capacity); -void ts_subtree_pool_delete(SubtreePool *); +SubtreePool ts_subtree_pool_new(uint32_t capacity); +void ts_subtree_pool_delete(SubtreePool *); -Subtree ts_subtree_new_leaf(SubtreePool *, t_symbol, t_parse_length, t_parse_length, t_u32, - t_state_id, bool, bool, bool, const t_language *); -Subtree ts_subtree_new_error(SubtreePool *, t_i32, t_parse_length, t_parse_length, t_u32, - t_state_id, const t_language *); -MutableSubtree ts_subtree_new_node(t_symbol, SubtreeArray *, unsigned, - const t_language *); -Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const t_language *); -Subtree ts_subtree_new_missing_leaf(SubtreePool *, t_symbol, t_parse_length, t_u32, - const t_language *); +Subtree ts_subtree_new_leaf( + SubtreePool *, TSSymbol, Length, Length, uint32_t, + TSStateId, bool, bool, bool, const TSLanguage * +); +Subtree ts_subtree_new_error( + SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * +); +MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); +Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); +Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *); MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); -void ts_subtree_retain(Subtree); -void ts_subtree_release(SubtreePool *, Subtree); -int ts_subtree_compare(Subtree, Subtree, SubtreePool *); -void ts_subtree_set_symbol(MutableSubtree *, t_symbol, const t_language *); -void ts_subtree_summarize(MutableSubtree, const Subtree *, t_u32, - const t_language *); -void ts_subtree_summarize_children(MutableSubtree, const t_language *); -void ts_subtree_balance(Subtree, SubtreePool *, const t_language *); -Subtree ts_subtree_edit(Subtree, const t_input_edit *edit, SubtreePool *); -char *ts_subtree_string(Subtree, t_symbol, bool, const t_language *, - bool include_all); -void ts_subtree_print_dot_graph(Subtree, const t_language *, FILE *); +void ts_subtree_retain(Subtree); +void ts_subtree_release(SubtreePool *, Subtree); +int ts_subtree_compare(Subtree, Subtree, SubtreePool *); +void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); +void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); +void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); +void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); +Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); +char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all); +void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); Subtree ts_subtree_last_external_token(Subtree); const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); -#define SUBTREE_GET(self, name) \ - ((self).data.is_inline ? (self).data.name : (self).ptr->name) +#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name) -static inline t_symbol ts_subtree_symbol(Subtree self) -{ - return SUBTREE_GET(self, symbol); -} -static inline bool ts_subtree_visible(Subtree self) -{ - return SUBTREE_GET(self, visible); -} -static inline bool ts_subtree_named(Subtree self) -{ - return SUBTREE_GET(self, named); -} -static inline bool ts_subtree_extra(Subtree self) -{ - return SUBTREE_GET(self, extra); -} -static inline bool ts_subtree_has_changes(Subtree self) -{ - return SUBTREE_GET(self, has_changes); -} -static inline bool ts_subtree_missing(Subtree self) -{ - return SUBTREE_GET(self, is_missing); -} -static inline bool ts_subtree_is_keyword(Subtree self) -{ - return SUBTREE_GET(self, is_keyword); -} -static inline t_state_id ts_subtree_parse_state(Subtree self) -{ - return SUBTREE_GET(self, parse_state); -} -static inline t_u32 ts_subtree_lookahead_bytes(Subtree self) -{ - return SUBTREE_GET(self, lookahead_bytes); -} +static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } +static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); } +static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); } +static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); } +static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); } +static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); } +static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); } +static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); } +static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); } #undef SUBTREE_GET // Get the size needed to store a heap-allocated subtree with the given // number of children. -static inline size_t ts_subtree_alloc_size(t_u32 child_count) -{ - return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); +static inline size_t ts_subtree_alloc_size(uint32_t child_count) { + return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); } // Get a subtree's children, which are allocated immediately before the // tree's own heap data. -#define ts_subtree_children(self) \ - ((self).data.is_inline \ - ? NULL \ - : (Subtree *)((self).ptr) - (self).ptr->child_count) +#define ts_subtree_children(self) \ + ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) -static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) -{ - if (self->data.is_inline) - { - self->data.extra = is_extra; - } - else - { - self->ptr->extra = is_extra; - } +static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { + if (self->data.is_inline) { + self->data.extra = is_extra; + } else { + self->ptr->extra = is_extra; + } } -static inline t_symbol ts_subtree_leaf_symbol(Subtree self) -{ - if (self.data.is_inline) - return self.data.symbol; - if (self.ptr->child_count == 0) - return self.ptr->symbol; - return self.ptr->first_leaf.symbol; +static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) { + if (self.data.is_inline) return self.data.symbol; + if (self.ptr->child_count == 0) return self.ptr->symbol; + return self.ptr->first_leaf.symbol; } -static inline t_state_id ts_subtree_leaf_parse_state(Subtree self) -{ - if (self.data.is_inline) - return self.data.parse_state; - if (self.ptr->child_count == 0) - return self.ptr->parse_state; - return self.ptr->first_leaf.parse_state; +static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) { + if (self.data.is_inline) return self.data.parse_state; + if (self.ptr->child_count == 0) return self.ptr->parse_state; + return self.ptr->first_leaf.parse_state; } -static inline t_parse_length ts_subtree_padding(Subtree self) -{ - if (self.data.is_inline) - { - t_parse_length result = {self.data.padding_bytes, - {self.data.padding_rows, self.data.padding_columns}}; - return result; - } - else - { - return self.ptr->padding; - } +static inline Length ts_subtree_padding(Subtree self) { + if (self.data.is_inline) { + Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}; + return result; + } else { + return self.ptr->padding; + } } -static inline t_parse_length ts_subtree_size(Subtree self) -{ - if (self.data.is_inline) - { - t_parse_length result = {self.data.size_bytes, {0, self.data.size_bytes}}; - return result; - } - else - { - return self.ptr->size; - } +static inline Length ts_subtree_size(Subtree self) { + if (self.data.is_inline) { + Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; + return result; + } else { + return self.ptr->size; + } } -static inline t_parse_length ts_subtree_total_size(Subtree self) -{ - return length_add(ts_subtree_padding(self), ts_subtree_size(self)); +static inline Length ts_subtree_total_size(Subtree self) { + return length_add(ts_subtree_padding(self), ts_subtree_size(self)); } -static inline t_u32 ts_subtree_total_bytes(Subtree self) -{ - return ts_subtree_total_size(self).bytes; +static inline uint32_t ts_subtree_total_bytes(Subtree self) { + return ts_subtree_total_size(self).bytes; } -static inline t_u32 ts_subtree_child_count(Subtree self) -{ - return self.data.is_inline ? 0 : self.ptr->child_count; +static inline uint32_t ts_subtree_child_count(Subtree self) { + return self.data.is_inline ? 0 : self.ptr->child_count; } -static inline t_u32 ts_subtree_repeat_depth(Subtree self) -{ - return self.data.is_inline ? 0 : self.ptr->repeat_depth; +static inline uint32_t ts_subtree_repeat_depth(Subtree self) { + return self.data.is_inline ? 0 : self.ptr->repeat_depth; } -static inline t_u32 ts_subtree_is_repetition(Subtree self) -{ - return self.data.is_inline ? 0 - : !self.ptr->named && !self.ptr->visible && - self.ptr->child_count != 0; +static inline uint32_t ts_subtree_is_repetition(Subtree self) { + return self.data.is_inline + ? 0 + : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; } -static inline t_u32 ts_subtree_visible_descendant_count(Subtree self) -{ - return (self.data.is_inline || self.ptr->child_count == 0) - ? 0 - : self.ptr->visible_descendant_count; +static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) { + return (self.data.is_inline || self.ptr->child_count == 0) + ? 0 + : self.ptr->visible_descendant_count; } -static inline t_u32 ts_subtree_visible_child_count(Subtree self) -{ - if (ts_subtree_child_count(self) > 0) - { - return self.ptr->visible_child_count; - } - else - { - return 0; - } +static inline uint32_t ts_subtree_visible_child_count(Subtree self) { + if (ts_subtree_child_count(self) > 0) { + return self.ptr->visible_child_count; + } else { + return 0; + } } -static inline t_u32 ts_subtree_error_cost(Subtree self) -{ - if (ts_subtree_missing(self)) - { - return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; - } - else - { - return self.data.is_inline ? 0 : self.ptr->error_cost; - } +static inline uint32_t ts_subtree_error_cost(Subtree self) { + if (ts_subtree_missing(self)) { + return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; + } else { + return self.data.is_inline ? 0 : self.ptr->error_cost; + } } -static inline t_i32 ts_subtree_dynamic_precedence(Subtree self) -{ - return (self.data.is_inline || self.ptr->child_count == 0) - ? 0 - : self.ptr->dynamic_precedence; +static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { + return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; } -static inline t_u16 ts_subtree_production_id(Subtree self) -{ - if (ts_subtree_child_count(self) > 0) - { - return self.ptr->production_id; - } - else - { - return 0; - } +static inline uint16_t ts_subtree_production_id(Subtree self) { + if (ts_subtree_child_count(self) > 0) { + return self.ptr->production_id; + } else { + return 0; + } } -static inline bool ts_subtree_fragile_left(Subtree self) -{ - return self.data.is_inline ? false : self.ptr->fragile_left; +static inline bool ts_subtree_fragile_left(Subtree self) { + return self.data.is_inline ? false : self.ptr->fragile_left; } -static inline bool ts_subtree_fragile_right(Subtree self) -{ - return self.data.is_inline ? false : self.ptr->fragile_right; +static inline bool ts_subtree_fragile_right(Subtree self) { + return self.data.is_inline ? false : self.ptr->fragile_right; } -static inline bool ts_subtree_has_external_tokens(Subtree self) -{ - return self.data.is_inline ? false : self.ptr->has_external_tokens; +static inline bool ts_subtree_has_external_tokens(Subtree self) { + return self.data.is_inline ? false : self.ptr->has_external_tokens; } -static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) -{ - return self.data.is_inline ? false - : self.ptr->has_external_scanner_state_change; +static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) { + return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change; } -static inline bool ts_subtree_depends_on_column(Subtree self) -{ - return self.data.is_inline ? false : self.ptr->depends_on_column; +static inline bool ts_subtree_depends_on_column(Subtree self) { + return self.data.is_inline ? false : self.ptr->depends_on_column; } -static inline bool ts_subtree_is_fragile(Subtree self) -{ - return self.data.is_inline - ? false - : (self.ptr->fragile_left || self.ptr->fragile_right); +static inline bool ts_subtree_is_fragile(Subtree self) { + return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); } -static inline bool ts_subtree_is_error(Subtree self) -{ - return ts_subtree_symbol(self) == ts_builtin_sym_error; +static inline bool ts_subtree_is_error(Subtree self) { + return ts_subtree_symbol(self) == ts_builtin_sym_error; } -static inline bool ts_subtree_is_eof(Subtree self) -{ - return ts_subtree_symbol(self) == ts_builtin_sym_end; +static inline bool ts_subtree_is_eof(Subtree self) { + return ts_subtree_symbol(self) == ts_builtin_sym_end; } -static inline Subtree ts_subtree_from_mut(MutableSubtree self) -{ - Subtree result; - result.data = self.data; - return result; +static inline Subtree ts_subtree_from_mut(MutableSubtree self) { + Subtree result; + result.data = self.data; + return result; } -static inline MutableSubtree ts_subtree_to_mt_unsafe(Subtree self) -{ - MutableSubtree result; - result.data = self.data; - return result; +static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) { + MutableSubtree result; + result.data = self.data; + return result; } -#endif // TREE_SITTER_SUBTREE_H_ +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_SUBTREE_H_ diff --git a/parser/src/tree.c b/parser/src/tree.c index 6122ea08..328a7984 100644 --- a/parser/src/tree.c +++ b/parser/src/tree.c @@ -1,62 +1,62 @@ #define _POSIX_C_SOURCE 200112L -#include "parser/api.h" +#include "./api.h" #include "./array.h" - -#include "parser/parser_length.h" +#include "./get_changed_ranges.h" +#include "./length.h" #include "./subtree.h" #include "./tree_cursor.h" #include "./tree.h" -t_parse_tree *ts_tree_new( - Subtree root, const t_language *language, - const t_parser_range *included_ranges, unsigned included_range_count +TSTree *ts_tree_new( + Subtree root, const TSLanguage *language, + const TSRange *included_ranges, unsigned included_range_count ) { - t_parse_tree *result = malloc(sizeof(t_parse_tree)); + TSTree *result = ts_malloc(sizeof(TSTree)); result->root = root; result->language = ts_language_copy(language); - result->included_ranges = calloc(included_range_count, sizeof(t_parser_range)); - memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(t_parser_range)); + result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); + memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); result->included_range_count = included_range_count; return result; } -t_parse_tree *ts_tree_copy(const t_parse_tree *self) { +TSTree *ts_tree_copy(const TSTree *self) { ts_subtree_retain(self->root); return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); } -void ts_tree_delete(t_parse_tree *self) { +void ts_tree_delete(TSTree *self) { if (!self) return; SubtreePool pool = ts_subtree_pool_new(0); ts_subtree_release(&pool, self->root); ts_subtree_pool_delete(&pool); ts_language_delete(self->language); - free(self->included_ranges); - free(self); + ts_free(self->included_ranges); + ts_free(self); } -t_parse_node ts_tree_root_node(const t_parse_tree *self) { +TSNode ts_tree_root_node(const TSTree *self) { return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); } -t_parse_node ts_tree_root_node_with_offset( - const t_parse_tree *self, - t_u32 offset_bytes, - t_point offset_extent +TSNode ts_tree_root_node_with_offset( + const TSTree *self, + uint32_t offset_bytes, + TSPoint offset_extent ) { - t_parse_length offset = {offset_bytes, offset_extent}; + Length offset = {offset_bytes, offset_extent}; return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); } -const t_language *ts_tree_language(const t_parse_tree *self) { +const TSLanguage *ts_tree_language(const TSTree *self) { return self->language; } -void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) { +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { for (unsigned i = 0; i < self->included_range_count; i++) { - t_parser_range *range = &self->included_ranges[i]; + TSRange *range = &self->included_ranges[i]; if (range->end_byte >= edit->old_end_byte) { if (range->end_byte != UINT32_MAX) { range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); @@ -66,7 +66,7 @@ void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) { ); if (range->end_byte < edit->new_end_byte) { range->end_byte = UINT32_MAX; - range->end_point = point_val_max(); + range->end_point = POINT_MAX; } } } else if (range->end_byte > edit->start_byte) { @@ -81,7 +81,7 @@ void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) { ); if (range->start_byte < edit->new_end_byte) { range->start_byte = UINT32_MAX; - range->start_point = point_val_max(); + range->start_point = POINT_MAX; } } else if (range->start_byte > edit->start_byte) { range->start_byte = edit->start_byte; @@ -94,13 +94,38 @@ void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) { ts_subtree_pool_delete(&pool); } -t_parser_range *ts_tree_included_ranges(const t_parse_tree *self, t_u32 *length) { +TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) { *length = self->included_range_count; - t_parser_range *ranges = calloc(self->included_range_count, sizeof(t_parser_range)); - memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(t_parser_range)); + TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange)); + memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange)); return ranges; } +TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) { + TreeCursor cursor1 = {NULL, array_new(), 0}; + TreeCursor cursor2 = {NULL, array_new(), 0}; + ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); + ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); + + TSRangeArray included_range_differences = array_new(); + ts_range_array_get_changed_ranges( + old_tree->included_ranges, old_tree->included_range_count, + new_tree->included_ranges, new_tree->included_range_count, + &included_range_differences + ); + + TSRange *result; + *length = ts_subtree_get_changed_ranges( + &old_tree->root, &new_tree->root, &cursor1, &cursor2, + old_tree->language, &included_range_differences, &result + ); + + array_delete(&included_range_differences); + array_delete(&cursor1.stack); + array_delete(&cursor2.stack); + return result; +} + #ifdef _WIN32 #include @@ -117,7 +142,7 @@ int _ts_dup(HANDLE handle) { return _open_osfhandle((intptr_t)dup_handle, 0); } -void ts_tree_print_dot_graph(const t_parse_tree *self, int fd) { +void ts_tree_print_dot_graph(const TSTree *self, int fd) { FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); ts_subtree_print_dot_graph(self->root, self->language, file); fclose(file); @@ -131,7 +156,7 @@ int _ts_dup(int file_descriptor) { return dup(file_descriptor); } -void ts_tree_print_dot_graph(const t_parse_tree *self, int file_descriptor) { +void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { FILE *file = fdopen(_ts_dup(file_descriptor), "a"); ts_subtree_print_dot_graph(self->root, self->language, file); fclose(file); diff --git a/parser/src/tree.h b/parser/src/tree.h index 360974f0..f012f888 100644 --- a/parser/src/tree.h +++ b/parser/src/tree.h @@ -3,25 +3,29 @@ #include "./subtree.h" -typedef struct -{ - const Subtree *child; - const Subtree *parent; - t_parse_length position; - t_symbol alias_symbol; +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + const Subtree *child; + const Subtree *parent; + Length position; + TSSymbol alias_symbol; } ParentCacheEntry; -struct s_parse_tree -{ - Subtree root; - const t_language *language; - t_parser_range *included_ranges; - t_u32 included_range_count; +struct TSTree { + Subtree root; + const TSLanguage *language; + TSRange *included_ranges; + unsigned included_range_count; }; -t_parse_tree *ts_tree_new(Subtree root, const t_language *language, - const t_parser_range *, t_u32); -t_parse_node ts_node_new(const t_parse_tree *, const Subtree *, t_parse_length, - t_symbol); +TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned); +TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); -#endif // TREE_SITTER_TREE_H_ +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_TREE_H_ diff --git a/parser/src/tree_cursor.c b/parser/src/tree_cursor.c index e863c961..0a351606 100644 --- a/parser/src/tree_cursor.c +++ b/parser/src/tree_cursor.c @@ -1,22 +1,22 @@ -#include "parser/api.h" - +#include "./api.h" +#include "./alloc.h" #include "./tree_cursor.h" #include "./language.h" #include "./tree.h" typedef struct { Subtree parent; - const t_parse_tree *tree; - t_parse_length position; - t_u32 child_index; - t_u32 structural_child_index; - t_u32 descendant_index; - const t_symbol *alias_sequence; + const TSTree *tree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + uint32_t descendant_index; + const TSSymbol *alias_sequence; } CursorChildIterator; // CursorChildIterator -static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, t_u32 index) { +static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) { TreeCursorEntry *entry = &self->stack.contents[index]; if (index == 0 || ts_subtree_visible(*entry->subtree)) { return true; @@ -37,12 +37,12 @@ static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCurs if (ts_subtree_child_count(*last_entry->subtree) == 0) { return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; } - const t_symbol *alias_sequence = ts_language_alias_sequence( + const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, last_entry->subtree->ptr->production_id ); - t_u32 descendant_index = last_entry->descendant_index; + uint32_t descendant_index = last_entry->descendant_index; if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { descendant_index += 1; } @@ -101,12 +101,12 @@ static inline bool ts_tree_cursor_child_iterator_next( // can only be computed if `b` has zero rows. Otherwise, this function // returns `LENGTH_UNDEFINED`, and the caller needs to recompute // the position some other way. -static inline t_parse_length length_backtrack(t_parse_length a, t_parse_length b) { +static inline Length length_backtrack(Length a, Length b) { if (length_is_undefined(a) || b.extent.row != 0) { return LENGTH_UNDEFINED; } - t_parse_length result; + Length result; result.bytes = a.bytes - b.bytes; result.extent.row = a.extent.row; result.extent.column = a.extent.column - b.extent.column; @@ -120,7 +120,7 @@ static inline bool ts_tree_cursor_child_iterator_previous( ) { // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into // account unsigned underflow - if (!self->parent.ptr || (t_i8)self->child_index == -1) return false; + if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; *result = (TreeCursorEntry) { .subtree = child, @@ -141,26 +141,26 @@ static inline bool ts_tree_cursor_child_iterator_previous( // unsigned can underflow so compare it to child_count if (self->child_index < self->parent.ptr->child_count) { Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; - t_parse_length size = ts_subtree_size(previous_child); + Length size = ts_subtree_size(previous_child); self->position = length_backtrack(self->position, size); } return true; } -// t_parse_tree_cursor - lifecycle +// TSTreeCursor - lifecycle -t_parse_tree_cursor ts_tree_cursor_new(t_parse_node node) { - t_parse_tree_cursor self = {NULL, NULL, {0, 0, 0}}; +TSTreeCursor ts_tree_cursor_new(TSNode node) { + TSTreeCursor self = {NULL, NULL, {0, 0, 0}}; ts_tree_cursor_init((TreeCursor *)&self, node); return self; } -void ts_tree_cursor_reset(t_parse_tree_cursor *_self, t_parse_node node) { +void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) { ts_tree_cursor_init((TreeCursor *)_self, node); } -void ts_tree_cursor_init(TreeCursor *self, t_parse_node node) { +void ts_tree_cursor_init(TreeCursor *self, TSNode node) { self->tree = node.tree; self->root_alias_symbol = node.context[3]; array_clear(&self->stack); @@ -176,14 +176,14 @@ void ts_tree_cursor_init(TreeCursor *self, t_parse_node node) { })); } -void ts_tree_cursor_delete(t_parse_tree_cursor *_self) { +void ts_tree_cursor_delete(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; array_delete(&self->stack); } -// t_parse_tree_cursor - walking the tree +// TSTreeCursor - walking the tree -TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_parse_tree_cursor *_self) { +TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; bool visible; TreeCursorEntry entry; @@ -201,7 +201,7 @@ TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_parse_tree_cursor *_se return TreeCursorStepNone; } -bool ts_tree_cursor_goto_first_child(t_parse_tree_cursor *self) { +bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { for (;;) { switch (ts_tree_cursor_goto_first_child_internal(self)) { case TreeCursorStepHidden: @@ -215,7 +215,7 @@ bool ts_tree_cursor_goto_first_child(t_parse_tree_cursor *self) { return false; } -TreeCursorStep ts_tree_cursor_goto_last_child_internal(t_parse_tree_cursor *_self) { +TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; bool visible; TreeCursorEntry entry; @@ -242,7 +242,7 @@ TreeCursorStep ts_tree_cursor_goto_last_child_internal(t_parse_tree_cursor *_sel return TreeCursorStepNone; } -bool ts_tree_cursor_goto_last_child(t_parse_tree_cursor *self) { +bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) { for (;;) { switch (ts_tree_cursor_goto_last_child_internal(self)) { case TreeCursorStepHidden: @@ -256,14 +256,14 @@ bool ts_tree_cursor_goto_last_child(t_parse_tree_cursor *self) { return false; } -static inline t_i64 ts_tree_cursor_goto_first_child_for_byte_and_point( - t_parse_tree_cursor *_self, - t_u32 goal_byte, - t_point goal_point +static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( + TSTreeCursor *_self, + uint32_t goal_byte, + TSPoint goal_point ) { TreeCursor *self = (TreeCursor *)_self; - t_u32 initial_size = self->stack.size; - t_u32 visible_child_index = 0; + uint32_t initial_size = self->stack.size; + uint32_t visible_child_index = 0; bool did_descend; do { @@ -273,9 +273,9 @@ static inline t_i64 ts_tree_cursor_goto_first_child_for_byte_and_point( TreeCursorEntry entry; CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - t_parse_length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); + Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); - t_u32 visible_child_count = ts_subtree_visible_child_count(*entry.subtree); + uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); if (at_goal) { if (visible) { array_push(&self->stack, entry); @@ -298,19 +298,19 @@ static inline t_i64 ts_tree_cursor_goto_first_child_for_byte_and_point( return -1; } -t_i64 ts_tree_cursor_goto_first_child_for_byte(t_parse_tree_cursor *self, t_u32 goal_byte) { - return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, point_val_zero()); +int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) { + return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); } -t_i64 ts_tree_cursor_goto_first_child_for_point(t_parse_tree_cursor *self, t_point goal_point) { +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) { return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); } TreeCursorStep ts_tree_cursor_goto_sibling_internal( - t_parse_tree_cursor *_self, + TSTreeCursor *_self, bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { TreeCursor *self = (TreeCursor *)_self; - t_u32 initial_size = self->stack.size; + uint32_t initial_size = self->stack.size; while (self->stack.size > 1) { TreeCursorEntry entry = array_pop(&self->stack); @@ -341,11 +341,11 @@ TreeCursorStep ts_tree_cursor_goto_sibling_internal( return TreeCursorStepNone; } -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_parse_tree_cursor *_self) { +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); } -bool ts_tree_cursor_goto_next_sibling(t_parse_tree_cursor *self) { +bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { switch (ts_tree_cursor_goto_next_sibling_internal(self)) { case TreeCursorStepHidden: ts_tree_cursor_goto_first_child(self); @@ -357,7 +357,7 @@ bool ts_tree_cursor_goto_next_sibling(t_parse_tree_cursor *self) { } } -TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_parse_tree_cursor *_self) { +TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) { // since subtracting across row loses column information, we may have to // restore it TreeCursor *self = (TreeCursor *)_self; @@ -374,14 +374,14 @@ TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_parse_tree_cursor // restore position from the parent node const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; - t_parse_length position = parent->position; - t_u32 child_index = array_back(&self->stack)->child_index; + Length position = parent->position; + uint32_t child_index = array_back(&self->stack)->child_index; const Subtree *children = ts_subtree_children((*(parent->subtree))); if (child_index > 0) { // skip first child padding since its position should match the position of the parent position = length_add(position, ts_subtree_size(children[0])); - for (t_u32 i = 1; i < child_index; ++i) { + for (uint32_t i = 1; i < child_index; ++i) { position = length_add(position, ts_subtree_total_size(children[i])); } position = length_add(position, ts_subtree_padding(children[child_index])); @@ -392,7 +392,7 @@ TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_parse_tree_cursor return step; } -bool ts_tree_cursor_goto_previous_sibling(t_parse_tree_cursor *self) { +bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) { switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { case TreeCursorStepHidden: ts_tree_cursor_goto_last_child(self); @@ -404,7 +404,7 @@ bool ts_tree_cursor_goto_previous_sibling(t_parse_tree_cursor *self) { } } -bool ts_tree_cursor_goto_parent(t_parse_tree_cursor *_self) { +bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { if (ts_tree_cursor_is_entry_visible(self, i)) { @@ -416,16 +416,16 @@ bool ts_tree_cursor_goto_parent(t_parse_tree_cursor *_self) { } void ts_tree_cursor_goto_descendant( - t_parse_tree_cursor *_self, - t_u32 goal_descendant_index + TSTreeCursor *_self, + uint32_t goal_descendant_index ) { TreeCursor *self = (TreeCursor *)_self; // Ascend to the lowest ancestor that contains the goal node. for (;;) { - t_u32 i = self->stack.size - 1; + uint32_t i = self->stack.size - 1; TreeCursorEntry *entry = &self->stack.contents[i]; - t_u32 next_descendant_index = + uint32_t next_descendant_index = entry->descendant_index + (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + ts_subtree_visible_descendant_count(*entry->subtree); @@ -466,16 +466,16 @@ void ts_tree_cursor_goto_descendant( } while (did_descend); } -t_u32 ts_tree_cursor_current_descendant_index(const t_parse_tree_cursor *_self) { +uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); return last_entry->descendant_index; } -t_parse_node ts_tree_cursor_current_node(const t_parse_tree_cursor *_self) { +TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); - t_symbol alias_symbol = self->root_alias_symbol; + TSSymbol alias_symbol = self->root_alias_symbol; if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; alias_symbol = ts_language_alias_at( @@ -495,12 +495,12 @@ t_parse_node ts_tree_cursor_current_node(const t_parse_tree_cursor *_self) { // Private - Get various facts about the current node that are needed // when executing tree queries. void ts_tree_cursor_current_status( - const t_parse_tree_cursor *_self, - t_field_id *field_id, + const TSTreeCursor *_self, + TSFieldId *field_id, bool *has_later_siblings, bool *has_later_named_siblings, bool *can_have_later_siblings_with_this_field, - t_symbol *supertypes, + TSSymbol *supertypes, unsigned *supertype_count ) { const TreeCursor *self = (const TreeCursor *)_self; @@ -517,7 +517,7 @@ void ts_tree_cursor_current_status( TreeCursorEntry *entry = &self->stack.contents[i]; TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - const t_symbol *alias_sequence = ts_language_alias_sequence( + const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, parent_entry->subtree->ptr->production_id ); @@ -532,11 +532,11 @@ void ts_tree_cursor_current_status( ts_subtree_symbol(subtree)) // Stop walking up when a visible ancestor is found. - t_symbol entry_symbol = subtree_symbol( + TSSymbol entry_symbol = subtree_symbol( *entry->subtree, entry->structural_child_index ); - t_symbol_metadata entry_metadata = ts_language_symbol_metadata( + TSSymbolMetadata entry_metadata = ts_language_symbol_metadata( self->tree->language, entry_symbol ); @@ -555,7 +555,7 @@ void ts_tree_cursor_current_status( if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; - t_symbol_metadata sibling_metadata = ts_language_symbol_metadata( + TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( self->tree->language, subtree_symbol(sibling, structural_child_index) ); @@ -581,7 +581,7 @@ void ts_tree_cursor_current_status( #undef subtree_symbol if (!ts_subtree_extra(*entry->subtree)) { - const t_field_map_entry *field_map, *field_map_end; + const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self->tree->language, parent_entry->subtree->ptr->production_id, @@ -590,7 +590,7 @@ void ts_tree_cursor_current_status( // Look for a field name associated with the current node. if (!*field_id) { - for (const t_field_map_entry *map = field_map; map < field_map_end; map++) { + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { if (!map->inherited && map->child_index == entry->structural_child_index) { *field_id = map->field_id; break; @@ -600,7 +600,7 @@ void ts_tree_cursor_current_status( // Determine if the current node can have later siblings with the same field name. if (*field_id) { - for (const t_field_map_entry *map = field_map; map < field_map_end; map++) { + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { if ( map->field_id == *field_id && map->child_index > entry->structural_child_index @@ -614,9 +614,9 @@ void ts_tree_cursor_current_status( } } -t_u32 ts_tree_cursor_current_depth(const t_parse_tree_cursor *_self) { +uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; - t_u32 depth = 0; + uint32_t depth = 0; for (unsigned i = 1; i < self->stack.size; i++) { if (ts_tree_cursor_is_entry_visible(self, i)) { depth++; @@ -625,12 +625,12 @@ t_u32 ts_tree_cursor_current_depth(const t_parse_tree_cursor *_self) { return depth; } -t_parse_node ts_tree_cursor_parent_node(const t_parse_tree_cursor *_self) { +TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; for (int i = (int)self->stack.size - 2; i >= 0; i--) { TreeCursorEntry *entry = &self->stack.contents[i]; bool is_visible = true; - t_symbol alias_symbol = 0; + TSSymbol alias_symbol = 0; if (i > 0) { TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; alias_symbol = ts_language_alias_at( @@ -652,7 +652,7 @@ t_parse_node ts_tree_cursor_parent_node(const t_parse_tree_cursor *_self) { return ts_node_new(NULL, NULL, length_zero(), 0); } -t_field_id ts_tree_cursor_current_field_id(const t_parse_tree_cursor *_self) { +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; // Walk up the tree, visiting the current node and its invisible ancestors. @@ -668,13 +668,13 @@ t_field_id ts_tree_cursor_current_field_id(const t_parse_tree_cursor *_self) { if (ts_subtree_extra(*entry->subtree)) break; - const t_field_map_entry *field_map, *field_map_end; + const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self->tree->language, parent_entry->subtree->ptr->production_id, &field_map, &field_map_end ); - for (const t_field_map_entry *map = field_map; map < field_map_end; map++) { + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { if (!map->inherited && map->child_index == entry->structural_child_index) { return map->field_id; } @@ -683,8 +683,8 @@ t_field_id ts_tree_cursor_current_field_id(const t_parse_tree_cursor *_self) { return 0; } -const char *ts_tree_cursor_current_field_name(const t_parse_tree_cursor *_self) { - t_field_id id = ts_tree_cursor_current_field_id(_self); +const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { + TSFieldId id = ts_tree_cursor_current_field_id(_self); if (id) { const TreeCursor *self = (const TreeCursor *)_self; return self->tree->language->field_names[id]; @@ -693,9 +693,9 @@ const char *ts_tree_cursor_current_field_name(const t_parse_tree_cursor *_self) } } -t_parse_tree_cursor ts_tree_cursor_copy(const t_parse_tree_cursor *_cursor) { +TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { const TreeCursor *cursor = (const TreeCursor *)_cursor; - t_parse_tree_cursor res = {NULL, NULL, {0, 0}}; + TSTreeCursor res = {NULL, NULL, {0, 0}}; TreeCursor *copy = (TreeCursor *)&res; copy->tree = cursor->tree; copy->root_alias_symbol = cursor->root_alias_symbol; @@ -704,7 +704,7 @@ t_parse_tree_cursor ts_tree_cursor_copy(const t_parse_tree_cursor *_cursor) { return res; } -void ts_tree_cursor_reset_to(t_parse_tree_cursor *_dst, const t_parse_tree_cursor *_src) { +void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) { const TreeCursor *cursor = (const TreeCursor *)_src; TreeCursor *copy = (TreeCursor *)_dst; copy->tree = cursor->tree; diff --git a/parser/src/tree_cursor.h b/parser/src/tree_cursor.h index e63c9757..96a386df 100644 --- a/parser/src/tree_cursor.h +++ b/parser/src/tree_cursor.h @@ -5,16 +5,16 @@ typedef struct { const Subtree *subtree; - t_parse_length position; - t_u32 child_index; - t_u32 structural_child_index; - t_u32 descendant_index; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + uint32_t descendant_index; } TreeCursorEntry; typedef struct { - const t_parse_tree *tree; + const TSTree *tree; Array(TreeCursorEntry) stack; - t_symbol root_alias_symbol; + TSSymbol root_alias_symbol; } TreeCursor; typedef enum { @@ -23,26 +23,26 @@ typedef enum { TreeCursorStepVisible, } TreeCursorStep; -void ts_tree_cursor_init(TreeCursor *, t_parse_node); +void ts_tree_cursor_init(TreeCursor *, TSNode); void ts_tree_cursor_current_status( - const t_parse_tree_cursor *, - t_field_id *, + const TSTreeCursor *, + TSFieldId *, bool *, bool *, bool *, - t_symbol *, + TSSymbol *, unsigned * ); -TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_parse_tree_cursor *); -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_parse_tree_cursor *); +TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *); +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *); -static inline Subtree ts_tree_cursor_current_subtree(const t_parse_tree_cursor *_self) { +static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); return *last_entry->subtree; } -t_parse_node ts_tree_cursor_parent_node(const t_parse_tree_cursor *); +TSNode ts_tree_cursor_parent_node(const TSTreeCursor *); #endif // TREE_SITTER_TREE_CURSOR_H_ diff --git a/parser/src/unicode.h b/parser/src/unicode.h new file mode 100644 index 00000000..0fba56a6 --- /dev/null +++ b/parser/src/unicode.h @@ -0,0 +1,50 @@ +#ifndef TREE_SITTER_UNICODE_H_ +#define TREE_SITTER_UNICODE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#define U_EXPORT +#define U_EXPORT2 +#include "unicode/utf8.h" +#include "unicode/utf16.h" + +static const int32_t TS_DECODE_ERROR = U_SENTINEL; + +// These functions read one unicode code point from the given string, +// returning the number of bytes consumed. +typedef uint32_t (*UnicodeDecodeFunction)( + const uint8_t *string, + uint32_t length, + int32_t *code_point +); + +static inline uint32_t ts_decode_utf8( + const uint8_t *string, + uint32_t length, + int32_t *code_point +) { + uint32_t i = 0; + U8_NEXT(string, i, length, *code_point); + return i; +} + +static inline uint32_t ts_decode_utf16( + const uint8_t *string, + uint32_t length, + int32_t *code_point +) { + uint32_t i = 0; + U16_NEXT(((uint16_t *)string), i, length, *code_point); + return i * 2; +} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_UNICODE_H_ diff --git a/sources/main.c b/sources/main.c index 5727afa5..f5a7bb35 100644 --- a/sources/main.c +++ b/sources/main.c @@ -6,15 +6,14 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */ -/* Updated: 2024/04/30 16:15:53 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 16:43:14 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ +#include "../includes/minishell.h" #include "app/node.h" #include "me/string/str_len.h" #include "parser/api.h" -#include "parser/parser.h" -#include "../includes/minishell.h" void print_node_data(t_node *t, t_usize depth) { @@ -32,8 +31,8 @@ void print_node_data(t_node *t, t_usize depth) t_node parse_to_nodes(t_parser *parser, t_const_str input) { t_parse_tree *tree; - t_parse_node node; - t_node ret; + t_parse_node node; + t_node ret; tree = ts_parser_parse_string(parser, NULL, input, str_len(input)); node = ts_tree_root_node(tree); @@ -46,25 +45,27 @@ t_node parse_str(t_myparser *parser, t_const_str input) return (parse_to_nodes(parser->parser, input)); } -void ft_check(t_utils *shcat, char **input) { - t_usize i; - t_usize prev_i; +void ft_check(t_utils *shcat, char **input) +{ + t_usize i; + t_usize prev_i; - i = 0; - prev_i = 0; - while (input[i] != NULL) { - if (ft_strcmp(input[i], "exit") == 0) - ft_exit(shcat, 0); - else if (ft_strcmp(input[i], "pwd") == 0) - ft_pwd(); - else - ft_other_cmd(shcat, i, prev_i); - prev_i = i; - i++; - } + i = 0; + prev_i = 0; + while (input[i] != NULL) + { + if (ft_strcmp(input[i], "exit") == 0) + ft_exit(shcat, 0); + else if (ft_strcmp(input[i], "pwd") == 0) + ft_pwd(); + else + ft_other_cmd(shcat, i, prev_i); + prev_i = i; + i++; + } } -void exec_shcat(t_utils *shcat) +void exec_shcat(t_utils *shcat) { print_node_data(&shcat->current_node, 0); free_node(shcat->current_node); @@ -72,34 +73,36 @@ void exec_shcat(t_utils *shcat) void ft_take_args(t_utils *shcat) { - t_i32 i; + t_i32 i; - i = 0; - while (1) { - shcat->str_input = readline((t_const_str)shcat->name_shell); - if (!shcat->str_input) - ft_exit(shcat, 0); - shcat->current_node = parse_str(&shcat->parser, shcat->str_input); - exec_shcat(shcat); - add_history(shcat->str_input); - free(shcat->str_input); - i++; - } + i = 0; + while (1) + { + shcat->str_input = readline((t_const_str)shcat->name_shell); + if (!shcat->str_input) + ft_exit(shcat, 0); + shcat->current_node = parse_str(&shcat->parser, shcat->str_input); + exec_shcat(shcat); + add_history(shcat->str_input); + free(shcat->str_input); + i++; + } } -void ft_find_path(t_str arge[], t_utils *utils) +void ft_find_path(t_str arge[], t_utils *utils) { - t_i32 i; - t_u8 check; + t_i32 i; + t_u8 check; i = 0; check = 0; while (arge[i] != NULL) { - if (arge[i][0] == 'P' && arge[i][1] == 'A' && arge[i][2] == 'T' && arge[i][3] == 'H' && arge[i][4] == '=') + if (arge[i][0] == 'P' && arge[i][1] == 'A' && arge[i][2] == 'T' && + arge[i][3] == 'H' && arge[i][4] == '=') { utils->path = ft_split(arge[i] + 5, ':'); - return ; + return; } i++; } @@ -108,9 +111,6 @@ void ft_find_path(t_str arge[], t_utils *utils) t_language *tree_sitter_bash(void); - - - t_myparser create_myparser(void) { t_language *lang; @@ -127,10 +127,9 @@ void free_myparser(t_myparser self) ts_parser_delete(self.parser); } - -t_i32 main(t_i32 argc, t_str argv[], t_str arge[]) +t_i32 main(t_i32 argc, t_str argv[], t_str arge[]) { - t_utils utils; + t_utils utils; (void)argc; (void)argv; diff --git a/sources/node/node.c b/sources/node/node.c index a2b7ad8b..d36edc53 100644 --- a/sources/node/node.c +++ b/sources/node/node.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/28 18:36:40 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 16:26:08 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 16:43:35 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ From c81cd0c2ea5c07053098828c16d4fc778d51941f Mon Sep 17 00:00:00 2001 From: Maix0 Date: Tue, 30 Apr 2024 21:31:08 +0200 Subject: [PATCH 08/14] flake devshell --- .clangd | 8 ++ .gitignore | 1 + flake.lock | 228 +++++++++++++++++++++++++++++++++++++++++ flake.nix | 16 +++ stdme/src/os/process.c | 4 +- 5 files changed, 254 insertions(+), 3 deletions(-) create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/.clangd b/.clangd index 294e143f..86a1a788 100644 --- a/.clangd +++ b/.clangd @@ -9,4 +9,12 @@ CompileFlags: # Tweak the parse settings - "-I/home/maiboyer/Documents/ring-2/shcat/includes/" - "-I/home/maiboyer/Documents/ring-2/shcat/output/include/" - "-I/home/maiboyer/Documents/ring-2/shcat/generic_sources/header/" + - "-I/home/maix/school/shcat/vendor/" + - "-I/home/maix/school/shcat/stdme/include/" + - "-I/home/maix/school/shcat/stdme/vendor/" + - "-I/home/maix/school/shcat/stdme/output/include/" + - "-I/home/maix/school/shcat/stdme/generic_sources/header/" + - "-I/home/maix/school/shcat/includes/" + - "-I/home/maix/school/shcat/output/include/" + - "-I/home/maix/school/shcat/generic_sources/header/" diff --git a/.gitignore b/.gitignore index bc400656..10885b2d 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ Mkfile.old dkms.conf objects/ minishell +.envrc diff --git a/flake.lock b/flake.lock new file mode 100644 index 00000000..28814d17 --- /dev/null +++ b/flake.lock @@ -0,0 +1,228 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_2": { + "inputs": { + "systems": "systems_2" + }, + "locked": { + "lastModified": 1705309234, + "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_3": { + "inputs": { + "systems": "systems_3" + }, + "locked": { + "lastModified": 1705309234, + "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "generic_c": { + "inputs": { + "flake-utils": "flake-utils_2", + "naersk": "naersk", + "nixpkgs": "nixpkgs_2", + "rust-overlay": "rust-overlay" + }, + "locked": { + "lastModified": 1706829609, + "narHash": "sha256-A0imQ9AIJafdL1/+j/1b3G7bm2j+N+VhzTsvKikKjz4=", + "owner": "Maix0", + "repo": "generic_c", + "rev": "a470c2c5a8c8aadc852a7a50d72853f2a3873595", + "type": "github" + }, + "original": { + "owner": "Maix0", + "repo": "generic_c", + "type": "github" + } + }, + "naersk": { + "inputs": { + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1698420672, + "narHash": "sha256-/TdeHMPRjjdJub7p7+w55vyABrsJlt5QkznPYy55vKA=", + "owner": "nix-community", + "repo": "naersk", + "rev": "aeb58d5e8faead8980a807c840232697982d47b9", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "naersk", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1706683685, + "narHash": "sha256-FtPPshEpxH/ewBOsdKBNhlsL2MLEFv1hEnQ19f/bFsQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "5ad9903c16126a7d949101687af0aa589b1d7d3d", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1706683685, + "narHash": "sha256-FtPPshEpxH/ewBOsdKBNhlsL2MLEFv1hEnQ19f/bFsQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "5ad9903c16126a7d949101687af0aa589b1d7d3d", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_3": { + "locked": { + "lastModified": 1706487304, + "narHash": "sha256-LE8lVX28MV2jWJsidW13D2qrHU/RUUONendL2Q/WlJg=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "90f456026d284c22b3e3497be980b2e47d0b28ac", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_4": { + "locked": { + "lastModified": 0, + "narHash": "sha256-XtTSSIB2DA6tOv+l0FhvfDMiyCmhoRbNB+0SeInZkbk=", + "path": "/nix/store/v4pcs3nzx54m5bmxd39win0rgl2d2hbx-source", + "type": "path" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "generic_c": "generic_c", + "nixpkgs": "nixpkgs_4" + } + }, + "rust-overlay": { + "inputs": { + "flake-utils": "flake-utils_3", + "nixpkgs": "nixpkgs_3" + }, + "locked": { + "lastModified": 1706753617, + "narHash": "sha256-ZKqTFzhFwSWFEpQTJ0uXnfJBs5Y/po9/8TK4bzssdbs=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "58be43ae223034217ea1bd58c73210644031b687", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_3": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 00000000..3069590a --- /dev/null +++ b/flake.nix @@ -0,0 +1,16 @@ +{ + description = "Flake utils demo"; + + inputs.flake-utils.url = "github:numtide/flake-utils"; + inputs.generic_c.url = "github:Maix0/generic_c"; + + outputs = { self, nixpkgs, flake-utils, generic_c }: + flake-utils.lib.eachDefaultSystem (system: + let pkgs = nixpkgs.legacyPackages.${system}; in + { + devShell = pkgs.mkShell { + packages = with pkgs; [clang valgrind gnumake readline.out readline.dev generic_c.packages.${system}.default]; + }; + } + ); +} diff --git a/stdme/src/os/process.c b/stdme/src/os/process.c index 55e0d394..70b6b5f4 100644 --- a/stdme/src/os/process.c +++ b/stdme/src/os/process.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/01/03 16:22:41 by maiboyer #+# #+# */ -/* Updated: 2024/01/06 17:50:47 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 21:17:22 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -81,11 +81,9 @@ t_error in_path(t_spawn_info *info, t_process *process, t_const_str path, t_error find_binary(t_spawn_info *info, t_process *process) { t_usize p_idx; - t_str *splitted_path; t_buffer_str s; (void)(process); - splitted_path = NULL; s = alloc_new_buffer(256); if (str_start_with(info->binary_path, "/") || str_find_chr(info->binary_path, '/') != NULL) From 3463af82957093bbdfad1ecd83351d49573cf241 Mon Sep 17 00:00:00 2001 From: Maix0 Date: Tue, 30 Apr 2024 21:35:28 +0200 Subject: [PATCH 09/14] Update so it works on XeMaix --- Minishell.mk | 8 ++++---- sources/ft_cmd.c | 4 +--- sources/main.c | 7 +------ 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/Minishell.mk b/Minishell.mk index f4ebf28a..d38fa064 100644 --- a/Minishell.mk +++ b/Minishell.mk @@ -6,7 +6,7 @@ # By: maiboyer +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2024/04/28 17:28:30 by maiboyer #+# #+# # -# Updated: 2024/04/30 14:19:46 by maiboyer ### ########.fr # +# Updated: 2024/04/30 21:32:49 by maiboyer ### ########.fr # # # # **************************************************************************** # @@ -63,7 +63,7 @@ bonus: $(OBJ) $(LIB_OBJ) $(OBJDIRNAME)/libme.a $(OBJDIRNAME)/libgmr.a @mkdir -p $(OBJDIRNAME) @mkdir -p $(OBJDIRNAME)/$(LIBDIRNAME) @mkdir -p $(OBJDIRNAME)/$(SRCDIRNAME) - @printf '$(GREY) Be Carefull ur in $(END)$(GREEN)Debug Mode$(END)\n' + @echo -e '$(GREY) Be Carefull ur in $(END)$(GREEN)Debug Mode$(END)' @cc $(CFLAGS) -D DEBUG=42 -o $(NAME) $(OBJ) -L$(OBJDIRNAME) -lgmr -lme # Dependences for all @@ -71,13 +71,13 @@ $(NAME): $(OBJ) $(LIB_OBJ) $(OBJDIRNAME)/libgmr.a $(OBJDIRNAME)/libme.a @mkdir -p $(OBJDIRNAME) @mkdir -p $(OBJDIRNAME)/$(LIBDIRNAME) @mkdir -p $(OBJDIRNAME)/$(SRCDIRNAME) - @echo "$(GREY) Linking $(END)$(GREEN)$(NAME)$(END)" + @echo -e "$(GREY) Linking $(END)$(GREEN)$(NAME)$(END)" @cc $(CFLAGS) -o $(NAME) $(OBJ) $(LIB_OBJ) -L$(OBJDIRNAME) -lgmr -lme # Creating the objects $(OBJDIRNAME)/%.o: %.c @mkdir -p $(dir $@) - @printf '$(GREY) Compiling $(END)$(GREEN)$<$(END)\n' + @echo -e '$(GREY) Compiling $(END)$(GREEN)$<$(END)' @cc $(CFLAGS) -o $@ -c $< -include ${OBJ:.o=.d} diff --git a/sources/ft_cmd.c b/sources/ft_cmd.c index 15401c08..766be78c 100644 --- a/sources/ft_cmd.c +++ b/sources/ft_cmd.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/13 20:26:13 by rparodi #+# #+# */ -/* Updated: 2024/04/30 15:30:52 by rparodi ### ########.fr */ +/* Updated: 2024/04/30 21:31:27 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -28,11 +28,9 @@ void ft_other_cmd(t_utils *shcat, t_usize i, t_usize prev_i) t_i32 options; t_str *args; t_usize k; - t_usize tmp; // t_str cmd; k = prev_i; - tmp = prev_i; args = (t_str *)malloc(sizeof(t_str) * (i + 2)); while (prev_i < i) { diff --git a/sources/main.c b/sources/main.c index f5a7bb35..37f6ddae 100644 --- a/sources/main.c +++ b/sources/main.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */ -/* Updated: 2024/04/30 16:43:14 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 21:32:13 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -73,9 +73,7 @@ void exec_shcat(t_utils *shcat) void ft_take_args(t_utils *shcat) { - t_i32 i; - i = 0; while (1) { shcat->str_input = readline((t_const_str)shcat->name_shell); @@ -85,17 +83,14 @@ void ft_take_args(t_utils *shcat) exec_shcat(shcat); add_history(shcat->str_input); free(shcat->str_input); - i++; } } void ft_find_path(t_str arge[], t_utils *utils) { t_i32 i; - t_u8 check; i = 0; - check = 0; while (arge[i] != NULL) { if (arge[i][0] == 'P' && arge[i][1] == 'A' && arge[i][2] == 'T' && From 0eb89227708d5247b87023f8342cd10a646517e5 Mon Sep 17 00:00:00 2001 From: Maix0 Date: Tue, 30 Apr 2024 22:06:24 +0200 Subject: [PATCH 10/14] Main doesn't leak anymore --- flake.nix | 17 +++++++++++++---- sources/ft_exit.c | 4 +++- sources/main.c | 7 ++++--- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/flake.nix b/flake.nix index 3069590a..351bb4e9 100644 --- a/flake.nix +++ b/flake.nix @@ -4,12 +4,21 @@ inputs.flake-utils.url = "github:numtide/flake-utils"; inputs.generic_c.url = "github:Maix0/generic_c"; - outputs = { self, nixpkgs, flake-utils, generic_c }: - flake-utils.lib.eachDefaultSystem (system: - let pkgs = nixpkgs.legacyPackages.${system}; in - { + outputs = { + self, + nixpkgs, + flake-utils, + generic_c, + }: + flake-utils.lib.eachDefaultSystem ( + system: let + pkgs = nixpkgs.legacyPackages.${system}; + in { devShell = pkgs.mkShell { packages = with pkgs; [clang valgrind gnumake readline.out readline.dev generic_c.packages.${system}.default]; + shellHook = '' + alias 'valgrind'='valgrind --supressions=${./valgrind_readline_leaks_ignore.txt}'; + ''; }; } ); diff --git a/sources/ft_exit.c b/sources/ft_exit.c index 19a0d8ce..ace74c4b 100644 --- a/sources/ft_exit.c +++ b/sources/ft_exit.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/29 11:35:51 by rparodi #+# #+# */ -/* Updated: 2024/04/30 16:16:55 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 22:03:14 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -37,6 +37,8 @@ void ft_free_utils(t_utils *s) (void)(s); if (s->str_input) free(s->str_input); + if (s->path) + ft_free_strs(s->path); ts_parser_delete(s->parser.parser); } diff --git a/sources/main.c b/sources/main.c index 37f6ddae..227ce0f2 100644 --- a/sources/main.c +++ b/sources/main.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */ -/* Updated: 2024/04/30 21:32:13 by maiboyer ### ########.fr */ +/* Updated: 2024/04/30 22:05:49 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -122,14 +122,15 @@ void free_myparser(t_myparser self) ts_parser_delete(self.parser); } -t_i32 main(t_i32 argc, t_str argv[], t_str arge[]) +t_i32 main(t_i32 argc, t_str argv[], t_str envp[]) { t_utils utils; (void)argc; (void)argv; + (void)envp; utils.parser = create_myparser(); - ft_find_path(arge, &utils); + //ft_find_path(arge, &utils); utils.name_shell = "42sh > "; ft_take_args(&utils); } From 86b5025fb0b6561bf2fd335852d0a2309bee0e35 Mon Sep 17 00:00:00 2001 From: Maix0 Date: Tue, 30 Apr 2024 22:25:51 +0200 Subject: [PATCH 11/14] Start work AGAIN --- parser/Makefile | 4 +- parser/combined.c | 12273 ++++++++++++++++++++++++++++++ parser/src/api.h | 451 +- parser/src/get_changed_ranges.c | 30 +- parser/src/get_changed_ranges.h | 10 +- parser/src/language.c | 80 +- parser/src/language.h | 76 +- parser/src/length.h | 2 +- parser/src/lexer.c | 24 +- parser/src/lexer.h | 12 +- parser/src/node.c | 230 +- parser/src/parser.c | 180 +- parser/src/parser.h | 26 +- parser/src/point.h | 26 +- parser/src/query.c | 346 +- parser/src/reduce_action.h | 2 +- parser/src/stack.c | 12 +- parser/src/stack.h | 10 +- parser/src/subtree.c | 60 +- parser/src/subtree.h | 42 +- parser/src/tree.c | 42 +- parser/src/tree.h | 12 +- parser/src/tree_cursor.c | 80 +- parser/src/tree_cursor.h | 20 +- 24 files changed, 13158 insertions(+), 892 deletions(-) create mode 100644 parser/combined.c diff --git a/parser/Makefile b/parser/Makefile index d0a4931c..9e9612e0 100644 --- a/parser/Makefile +++ b/parser/Makefile @@ -6,7 +6,7 @@ # By: maiboyer +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2023/11/03 13:20:01 by maiboyer #+# #+# # -# Updated: 2024/04/30 17:20:27 by maiboyer ### ########.fr # +# Updated: 2024/04/30 22:23:58 by maiboyer ### ########.fr # # # # **************************************************************************** # @@ -22,7 +22,7 @@ CFLAGS = -Wall -Wextra -Werror -MMD -I./includes -I../includes -I../output/inc include ./Filelist.mk -SRC_FILES += ./src/lib ./src/scanner +SRC_FILES += ./combined SRC = $(addsuffix .c,$(addprefix $(SRC_DIR)/,$(SRC_FILES))) OBJ = $(addsuffix .o,$(addprefix $(BUILD_DIR)/,$(SRC_FILES))) DEPS = $(addsuffix .d,$(addprefix $(BUILD_DIR)/,$(SRC_FILES))) diff --git a/parser/combined.c b/parser/combined.c new file mode 100644 index 00000000..5791ec0f --- /dev/null +++ b/parser/combined.c @@ -0,0 +1,12273 @@ +#include "./src/alloc.h" +#include "src/api.h" +#include + +static void *ts_malloc_default(size_t size) { + void *result = malloc(size); + if (size > 0 && !result) { + fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); + abort(); + } + return result; +} + +static void *ts_calloc_default(size_t count, size_t size) { + void *result = calloc(count, size); + if (count > 0 && !result) { + fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); + abort(); + } + return result; +} + +static void *ts_realloc_default(void *buffer, size_t size) { + void *result = realloc(buffer, size); + if (size > 0 && !result) { + fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); + abort(); + } + return result; +} + +// Allow clients to override allocation functions dynamically +TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default; +TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; +TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; +TS_PUBLIC void (*ts_current_free)(void *) = free; + +void ts_set_allocator( + void *(*new_malloc)(size_t size), + void *(*new_calloc)(size_t count, size_t size), + void *(*new_realloc)(void *ptr, size_t size), + void (*new_free)(void *ptr) +) { + ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; + ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; + ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; + ts_current_free = new_free ? new_free : free; +} + +#include "src/get_changed_ranges.h" +#include "src/subtree.h" +#include "src/language.h" +#include "src/error_costs.h" +#include "src/tree_cursor.h" +#include + +// #define DEBUG_GET_CHANGED_RANGES + +static void ts_range_array_add( + TSRangeArray *self, + Length start, + Length end +) { + if (self->size > 0) { + t_range *last_range = array_back(self); + if (start.bytes <= last_range->end_byte) { + last_range->end_byte = end.bytes; + last_range->end_point = end.extent; + return; + } + } + + if (start.bytes < end.bytes) { + t_range range = { start.extent, end.extent, start.bytes, end.bytes }; + array_push(self, range); + } +} + +bool ts_range_array_intersects( + const TSRangeArray *self, + unsigned start_index, + uint32_t start_byte, + uint32_t end_byte +) { + for (unsigned i = start_index; i < self->size; i++) { + t_range *range = &self->contents[i]; + if (range->end_byte > start_byte) { + if (range->start_byte >= end_byte) break; + return true; + } + } + return false; +} + +void ts_range_array_get_changed_ranges( + const t_range *old_ranges, unsigned old_range_count, + const t_range *new_ranges, unsigned new_range_count, + TSRangeArray *differences +) { + unsigned new_index = 0; + unsigned old_index = 0; + Length current_position = length_zero(); + bool in_old_range = false; + bool in_new_range = false; + + while (old_index < old_range_count || new_index < new_range_count) { + const t_range *old_range = &old_ranges[old_index]; + const t_range *new_range = &new_ranges[new_index]; + + Length next_old_position; + if (in_old_range) { + next_old_position = (Length) {old_range->end_byte, old_range->end_point}; + } else if (old_index < old_range_count) { + next_old_position = (Length) {old_range->start_byte, old_range->start_point}; + } else { + next_old_position = LENGTH_MAX; + } + + Length next_new_position; + if (in_new_range) { + next_new_position = (Length) {new_range->end_byte, new_range->end_point}; + } else if (new_index < new_range_count) { + next_new_position = (Length) {new_range->start_byte, new_range->start_point}; + } else { + next_new_position = LENGTH_MAX; + } + + if (next_old_position.bytes < next_new_position.bytes) { + if (in_old_range != in_new_range) { + ts_range_array_add(differences, current_position, next_old_position); + } + if (in_old_range) old_index++; + current_position = next_old_position; + in_old_range = !in_old_range; + } else if (next_new_position.bytes < next_old_position.bytes) { + if (in_old_range != in_new_range) { + ts_range_array_add(differences, current_position, next_new_position); + } + if (in_new_range) new_index++; + current_position = next_new_position; + in_new_range = !in_new_range; + } else { + if (in_old_range != in_new_range) { + ts_range_array_add(differences, current_position, next_new_position); + } + if (in_old_range) old_index++; + if (in_new_range) new_index++; + in_old_range = !in_old_range; + in_new_range = !in_new_range; + current_position = next_new_position; + } + } +} + +typedef struct { + TreeCursor cursor; + const t_language *language; + unsigned visible_depth; + bool in_padding; +} Iterator; + +static Iterator iterator_new( + TreeCursor *cursor, + const Subtree *tree, + const t_language *language +) { + array_clear(&cursor->stack); + array_push(&cursor->stack, ((TreeCursorEntry) { + .subtree = tree, + .position = length_zero(), + .child_index = 0, + .structural_child_index = 0, + })); + return (Iterator) { + .cursor = *cursor, + .language = language, + .visible_depth = 1, + .in_padding = false, + }; +} + +static bool iterator_done(Iterator *self) { + return self->cursor.stack.size == 0; +} + +static Length iterator_start_position(Iterator *self) { + TreeCursorEntry entry = *array_back(&self->cursor.stack); + if (self->in_padding) { + return entry.position; + } else { + return length_add(entry.position, ts_subtree_padding(*entry.subtree)); + } +} + +static Length iterator_end_position(Iterator *self) { + TreeCursorEntry entry = *array_back(&self->cursor.stack); + Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); + if (self->in_padding) { + return result; + } else { + return length_add(result, ts_subtree_size(*entry.subtree)); + } +} + +static bool iterator_tree_is_visible(const Iterator *self) { + TreeCursorEntry entry = *array_back(&self->cursor.stack); + if (ts_subtree_visible(*entry.subtree)) return true; + if (self->cursor.stack.size > 1) { + Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; + return ts_language_alias_at( + self->language, + parent.ptr->production_id, + entry.structural_child_index + ) != 0; + } + return false; +} + +static void iterator_get_visible_state( + const Iterator *self, + Subtree *tree, + t_symbol *alias_symbol, + uint32_t *start_byte +) { + uint32_t i = self->cursor.stack.size - 1; + + if (self->in_padding) { + if (i == 0) return; + i--; + } + + for (; i + 1 > 0; i--) { + TreeCursorEntry entry = self->cursor.stack.contents[i]; + + if (i > 0) { + const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; + *alias_symbol = ts_language_alias_at( + self->language, + parent->ptr->production_id, + entry.structural_child_index + ); + } + + if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { + *tree = *entry.subtree; + *start_byte = entry.position.bytes; + break; + } + } +} + +static void iterator_ascend(Iterator *self) { + if (iterator_done(self)) return; + if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; + if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; + self->cursor.stack.size--; +} + +static bool iterator_descend(Iterator *self, uint32_t goal_position) { + if (self->in_padding) return false; + + bool did_descend = false; + do { + did_descend = false; + TreeCursorEntry entry = *array_back(&self->cursor.stack); + Length position = entry.position; + uint32_t structural_child_index = 0; + for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { + const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; + Length child_left = length_add(position, ts_subtree_padding(*child)); + Length child_right = length_add(child_left, ts_subtree_size(*child)); + + if (child_right.bytes > goal_position) { + array_push(&self->cursor.stack, ((TreeCursorEntry) { + .subtree = child, + .position = position, + .child_index = i, + .structural_child_index = structural_child_index, + })); + + if (iterator_tree_is_visible(self)) { + if (child_left.bytes > goal_position) { + self->in_padding = true; + } else { + self->visible_depth++; + } + return true; + } + + did_descend = true; + break; + } + + position = child_right; + if (!ts_subtree_extra(*child)) structural_child_index++; + } + } while (did_descend); + + return false; +} + +static void iterator_advance(Iterator *self) { + if (self->in_padding) { + self->in_padding = false; + if (iterator_tree_is_visible(self)) { + self->visible_depth++; + } else { + iterator_descend(self, 0); + } + return; + } + + for (;;) { + if (iterator_tree_is_visible(self)) self->visible_depth--; + TreeCursorEntry entry = array_pop(&self->cursor.stack); + if (iterator_done(self)) return; + + const Subtree *parent = array_back(&self->cursor.stack)->subtree; + uint32_t child_index = entry.child_index + 1; + if (ts_subtree_child_count(*parent) > child_index) { + Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); + uint32_t structural_child_index = entry.structural_child_index; + if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; + const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; + + array_push(&self->cursor.stack, ((TreeCursorEntry) { + .subtree = next_child, + .position = position, + .child_index = child_index, + .structural_child_index = structural_child_index, + })); + + if (iterator_tree_is_visible(self)) { + if (ts_subtree_padding(*next_child).bytes > 0) { + self->in_padding = true; + } else { + self->visible_depth++; + } + } else { + iterator_descend(self, 0); + } + break; + } + } +} + +typedef enum { + IteratorDiffers, + IteratorMayDiffer, + IteratorMatches, +} IteratorComparison; + +static IteratorComparison iterator_compare( + const Iterator *old_iter, + const Iterator *new_iter +) { + Subtree old_tree = NULL_SUBTREE; + Subtree new_tree = NULL_SUBTREE; + uint32_t old_start = 0; + uint32_t new_start = 0; + t_symbol old_alias_symbol = 0; + t_symbol new_alias_symbol = 0; + iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); + iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); + + if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; + if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; + + if ( + old_alias_symbol == new_alias_symbol && + ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree) + ) { + if (old_start == new_start && + !ts_subtree_has_changes(old_tree) && + ts_subtree_symbol(old_tree) != ts_builtin_sym_error && + ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && + ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && + ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && + (ts_subtree_parse_state(old_tree) == ERROR_STATE) == + (ts_subtree_parse_state(new_tree) == ERROR_STATE)) { + return IteratorMatches; + } else { + return IteratorMayDiffer; + } + } + + return IteratorDiffers; +} + +#ifdef DEBUG_GET_CHANGED_RANGES +static inline void iterator_print_state(Iterator *self) { + TreeCursorEntry entry = *array_back(&self->cursor.stack); + TSPoint start = iterator_start_position(self).extent; + TSPoint end = iterator_end_position(self).extent; + const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); + printf( + "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", + name, self->in_padding ? "(p)" : " ", + self->visible_depth, + start.row + 1, start.column, + end.row + 1, end.column + ); +} +#endif + +unsigned ts_subtree_get_changed_ranges( + const Subtree *old_tree, const Subtree *new_tree, + TreeCursor *cursor1, TreeCursor *cursor2, + const t_language *language, + const TSRangeArray *included_range_differences, + t_range **ranges +) { + TSRangeArray results = array_new(); + + Iterator old_iter = iterator_new(cursor1, old_tree, language); + Iterator new_iter = iterator_new(cursor2, new_tree, language); + + unsigned included_range_difference_index = 0; + + Length position = iterator_start_position(&old_iter); + Length next_position = iterator_start_position(&new_iter); + if (position.bytes < next_position.bytes) { + ts_range_array_add(&results, position, next_position); + position = next_position; + } else if (position.bytes > next_position.bytes) { + ts_range_array_add(&results, next_position, position); + next_position = position; + } + + do { + #ifdef DEBUG_GET_CHANGED_RANGES + printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); + iterator_print_state(&old_iter); + printf("\tvs\t"); + iterator_print_state(&new_iter); + puts(""); + #endif + + // Compare the old and new subtrees. + IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); + + // Even if the two subtrees appear to be identical, they could differ + // internally if they contain a range of text that was previously + // excluded from the parse, and is now included, or vice-versa. + if (comparison == IteratorMatches && ts_range_array_intersects( + included_range_differences, + included_range_difference_index, + position.bytes, + iterator_end_position(&old_iter).bytes + )) { + comparison = IteratorMayDiffer; + } + + bool is_changed = false; + switch (comparison) { + // If the subtrees are definitely identical, move to the end + // of both subtrees. + case IteratorMatches: + next_position = iterator_end_position(&old_iter); + break; + + // If the subtrees might differ internally, descend into both + // subtrees, finding the first child that spans the current position. + case IteratorMayDiffer: + if (iterator_descend(&old_iter, position.bytes)) { + if (!iterator_descend(&new_iter, position.bytes)) { + is_changed = true; + next_position = iterator_end_position(&old_iter); + } + } else if (iterator_descend(&new_iter, position.bytes)) { + is_changed = true; + next_position = iterator_end_position(&new_iter); + } else { + next_position = length_min( + iterator_end_position(&old_iter), + iterator_end_position(&new_iter) + ); + } + break; + + // If the subtrees are different, record a change and then move + // to the end of both subtrees. + case IteratorDiffers: + is_changed = true; + next_position = length_min( + iterator_end_position(&old_iter), + iterator_end_position(&new_iter) + ); + break; + } + + // Ensure that both iterators are caught up to the current position. + while ( + !iterator_done(&old_iter) && + iterator_end_position(&old_iter).bytes <= next_position.bytes + ) iterator_advance(&old_iter); + while ( + !iterator_done(&new_iter) && + iterator_end_position(&new_iter).bytes <= next_position.bytes + ) iterator_advance(&new_iter); + + // Ensure that both iterators are at the same depth in the tree. + while (old_iter.visible_depth > new_iter.visible_depth) { + iterator_ascend(&old_iter); + } + while (new_iter.visible_depth > old_iter.visible_depth) { + iterator_ascend(&new_iter); + } + + if (is_changed) { + #ifdef DEBUG_GET_CHANGED_RANGES + printf( + " change: [[%u, %u] - [%u, %u]]\n", + position.extent.row + 1, position.extent.column, + next_position.extent.row + 1, next_position.extent.column + ); + #endif + + ts_range_array_add(&results, position, next_position); + } + + position = next_position; + + // Keep track of the current position in the included range differences + // array in order to avoid scanning the entire array on each iteration. + while (included_range_difference_index < included_range_differences->size) { + const t_range *range = &included_range_differences->contents[ + included_range_difference_index + ]; + if (range->end_byte <= position.bytes) { + included_range_difference_index++; + } else { + break; + } + } + } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); + + Length old_size = ts_subtree_total_size(*old_tree); + Length new_size = ts_subtree_total_size(*new_tree); + if (old_size.bytes < new_size.bytes) { + ts_range_array_add(&results, old_size, new_size); + } else if (new_size.bytes < old_size.bytes) { + ts_range_array_add(&results, new_size, old_size); + } + + *cursor1 = old_iter.cursor; + *cursor2 = new_iter.cursor; + *ranges = results.contents; + return results.size; +} +#include "src/language.h" + +#include "src/api.h" +#include + +const t_language *ts_language_copy(const t_language *self) { + return self; +} + +void ts_language_delete(const t_language *self) { + (void)(self); +} + +uint32_t ts_language_symbol_count(const t_language *self) { + return self->symbol_count + self->alias_count; +} + +uint32_t ts_language_state_count(const t_language *self) { + return self->state_count; +} + +uint32_t ts_language_version(const t_language *self) { + return self->version; +} + +uint32_t ts_language_field_count(const t_language *self) { + return self->field_count; +} + +void ts_language_table_entry( + const t_language *self, + t_state_id state, + t_symbol symbol, + TableEntry *result +) { + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { + result->action_count = 0; + result->is_reusable = false; + result->actions = NULL; + } else { + assert(symbol < self->token_count); + uint32_t action_index = ts_language_lookup(self, state, symbol); + const TSParseActionEntry *entry = &self->parse_actions[action_index]; + result->action_count = entry->entry.count; + result->is_reusable = entry->entry.reusable; + result->actions = (const TSParseAction *)(entry + 1); + } +} + +TSSymbolMetadata ts_language_symbol_metadata( + const t_language *self, + t_symbol symbol +) { + if (symbol == ts_builtin_sym_error) { + return (TSSymbolMetadata) {.visible = true, .named = true}; + } else if (symbol == ts_builtin_sym_error_repeat) { + return (TSSymbolMetadata) {.visible = false, .named = false}; + } else { + return self->symbol_metadata[symbol]; + } +} + +t_symbol ts_language_public_symbol( + const t_language *self, + t_symbol symbol +) { + if (symbol == ts_builtin_sym_error) return symbol; + return self->public_symbol_map[symbol]; +} + +t_state_id ts_language_next_state( + const t_language *self, + t_state_id state, + t_symbol symbol +) { + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { + return 0; + } else if (symbol < self->token_count) { + uint32_t count; + const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); + if (count > 0) { + TSParseAction action = actions[count - 1]; + if (action.type == TSParseActionTypeShift) { + return action.shift.extra ? state : action.shift.state; + } + } + return 0; + } else { + return ts_language_lookup(self, state, symbol); + } +} + +const char *ts_language_symbol_name( + const t_language *self, + t_symbol symbol +) { + if (symbol == ts_builtin_sym_error) { + return "ERROR"; + } else if (symbol == ts_builtin_sym_error_repeat) { + return "_ERROR"; + } else if (symbol < ts_language_symbol_count(self)) { + return self->symbol_names[symbol]; + } else { + return NULL; + } +} + +t_symbol ts_language_symbol_for_name( + const t_language *self, + const char *string, + uint32_t length, + bool is_named +) { + if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; + uint16_t count = (uint16_t)ts_language_symbol_count(self); + for (t_symbol i = 0; i < count; i++) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); + if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; + const char *symbol_name = self->symbol_names[i]; + if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { + return self->public_symbol_map[i]; + } + } + return 0; +} + +t_symbol_type ts_language_symbol_type( + const t_language *self, + t_symbol symbol +) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); + if (metadata.named && metadata.visible) { + return TSSymbolTypeRegular; + } else if (metadata.visible) { + return TSSymbolTypeAnonymous; + } else { + return TSSymbolTypeAuxiliary; + } +} + +const char *ts_language_field_name_for_id( + const t_language *self, + t_field_id id +) { + uint32_t count = ts_language_field_count(self); + if (count && id <= count) { + return self->field_names[id]; + } else { + return NULL; + } +} + +t_field_id ts_language_field_id_for_name( + const t_language *self, + const char *name, + uint32_t name_length +) { + uint16_t count = (uint16_t)ts_language_field_count(self); + for (t_symbol i = 1; i < count + 1; i++) { + switch (strncmp(name, self->field_names[i], name_length)) { + case 0: + if (self->field_names[i][name_length] == 0) return i; + break; + case -1: + return 0; + default: + break; + } + } + return 0; +} + +t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { + if (state >= self->state_count) return NULL; + LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); + *iterator = ts_language_lookaheads(self, state); + return (t_lookahead_iterator *)iterator; +} + +void ts_lookahead_iterator_delete(t_lookahead_iterator *self) { + ts_free(self); +} + +bool ts_lookahead_iterator_reset_state(t_lookahead_iterator * self, t_state_id state) { + LookaheadIterator *iterator = (LookaheadIterator *)self; + if (state >= iterator->language->state_count) return false; + *iterator = ts_language_lookaheads(iterator->language, state); + return true; +} + +const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return iterator->language; +} + +bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state) { + if (state >= language->state_count) return false; + LookaheadIterator *iterator = (LookaheadIterator *)self; + *iterator = ts_language_lookaheads(language, state); + return true; +} + +bool ts_lookahead_iterator_next(t_lookahead_iterator *self) { + LookaheadIterator *iterator = (LookaheadIterator *)self; + return ts_lookahead_iterator__next(iterator); +} + +t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return iterator->symbol; +} + +const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return ts_language_symbol_name(iterator->language, iterator->symbol); +} +#include +#include "src/lexer.h" +#include "src/subtree.h" +#include "src/length.h" +//#include "src/unicode.h" + +#define LOG(message, character) \ + if (self->logger.log) { \ + snprintf( \ + self->debug_buffer, \ + TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ + 32 <= character && character < 127 ? \ + message " character:'%c'" : \ + message " character:%d", \ + character \ + ); \ + self->logger.log( \ + self->logger.payload, \ + TSLogTypeLex, \ + self->debug_buffer \ + ); \ + } + +static const int32_t BYTE_ORDER_MARK = 0xFEFF; + +static const t_range DEFAULT_RANGE = { + .start_point = { + .row = 0, + .column = 0, + }, + .end_point = { + .row = UINT32_MAX, + .column = UINT32_MAX, + }, + .start_byte = 0, + .end_byte = UINT32_MAX +}; + +// Check if the lexer has reached EOF. This state is stored +// by setting the lexer's `current_included_range_index` such that +// it has consumed all of its available ranges. +static bool ts_lexer__eof(const TSLexer *_self) { + Lexer *self = (Lexer *)_self; + return self->current_included_range_index == self->included_range_count; +} + +// Clear the currently stored chunk of source code, because the lexer's +// position has changed. +static void ts_lexer__clear_chunk(Lexer *self) { + self->chunk = NULL; + self->chunk_size = 0; + self->chunk_start = 0; +} + +// Call the lexer's input callback to obtain a new chunk of source code +// for the current position. +static void ts_lexer__get_chunk(Lexer *self) { + self->chunk_start = self->current_position.bytes; + self->chunk = self->input.read( + self->input.payload, + self->current_position.bytes, + self->current_position.extent, + &self->chunk_size + ); + if (!self->chunk_size) { + self->current_included_range_index = self->included_range_count; + self->chunk = NULL; + } +} +typedef uint32_t (*DecodeFunc)( + const uint8_t *string, + uint32_t length, + int32_t *code_point +); + +static uint32_t ts_decode_ascii( + const uint8_t *string, + uint32_t length, + int32_t *code_point +) { + uint32_t i = 1; + (void)(length); + *code_point = *string; + return i; +} + +// Decode the next unicode character in the current chunk of source code. +// This assumes that the lexer has already retrieved a chunk of source +// code that spans the current position. +static void ts_lexer__get_lookahead(Lexer *self) { + uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; + uint32_t size = self->chunk_size - position_in_chunk; + + if (size == 0) { + self->lookahead_size = 1; + self->data.lookahead = '\0'; + return; + } + + #define TS_DECODE_ERROR -1 + + const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; + // UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8 + // ? ts_decode_utf8 + // : ts_decode_utf16; + + + self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead); + + // If this chunk ended in the middle of a multi-byte character, + // try again with a fresh chunk. + if (self->data.lookahead == TS_DECODE_ERROR && size < 4) { + ts_lexer__get_chunk(self); + chunk = (const uint8_t *)self->chunk; + size = self->chunk_size; + self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead); + } + + if (self->data.lookahead == TS_DECODE_ERROR) { + self->lookahead_size = 1; + } +} + +static void ts_lexer_goto(Lexer *self, Length position) { + self->current_position = position; + + // Move to the first valid position at or after the given position. + bool found_included_range = false; + for (unsigned i = 0; i < self->included_range_count; i++) { + t_range *included_range = &self->included_ranges[i]; + if ( + included_range->end_byte > self->current_position.bytes && + included_range->end_byte > included_range->start_byte + ) { + if (included_range->start_byte >= self->current_position.bytes) { + self->current_position = (Length) { + .bytes = included_range->start_byte, + .extent = included_range->start_point, + }; + } + + self->current_included_range_index = i; + found_included_range = true; + break; + } + } + + if (found_included_range) { + // If the current position is outside of the current chunk of text, + // then clear out the current chunk of text. + if (self->chunk && ( + self->current_position.bytes < self->chunk_start || + self->current_position.bytes >= self->chunk_start + self->chunk_size + )) { + ts_lexer__clear_chunk(self); + } + + self->lookahead_size = 0; + self->data.lookahead = '\0'; + } + + // If the given position is beyond any of included ranges, move to the EOF + // state - past the end of the included ranges. + else { + self->current_included_range_index = self->included_range_count; + t_range *last_included_range = &self->included_ranges[self->included_range_count - 1]; + self->current_position = (Length) { + .bytes = last_included_range->end_byte, + .extent = last_included_range->end_point, + }; + ts_lexer__clear_chunk(self); + self->lookahead_size = 1; + self->data.lookahead = '\0'; + } +} + +// Intended to be called only from functions that control logging. +static void ts_lexer__do_advance(Lexer *self, bool skip) { + if (self->lookahead_size) { + self->current_position.bytes += self->lookahead_size; + if (self->data.lookahead == '\n') { + self->current_position.extent.row++; + self->current_position.extent.column = 0; + } else { + self->current_position.extent.column += self->lookahead_size; + } + } + + const t_range *current_range = &self->included_ranges[self->current_included_range_index]; + while ( + self->current_position.bytes >= current_range->end_byte || + current_range->end_byte == current_range->start_byte + ) { + if (self->current_included_range_index < self->included_range_count) { + self->current_included_range_index++; + } + if (self->current_included_range_index < self->included_range_count) { + current_range++; + self->current_position = (Length) { + current_range->start_byte, + current_range->start_point, + }; + } else { + current_range = NULL; + break; + } + } + + if (skip) self->token_start_position = self->current_position; + + if (current_range) { + if ( + self->current_position.bytes < self->chunk_start || + self->current_position.bytes >= self->chunk_start + self->chunk_size + ) { + ts_lexer__get_chunk(self); + } + ts_lexer__get_lookahead(self); + } else { + ts_lexer__clear_chunk(self); + self->data.lookahead = '\0'; + self->lookahead_size = 1; + } +} + +// Advance to the next character in the source code, retrieving a new +// chunk of source code if needed. +static void ts_lexer__advance(TSLexer *_self, bool skip) { + Lexer *self = (Lexer *)_self; + if (!self->chunk) return; + + if (skip) { + LOG("skip", self->data.lookahead) + } else { + LOG("consume", self->data.lookahead) + } + + ts_lexer__do_advance(self, skip); +} + +// Mark that a token match has completed. This can be called multiple +// times if a longer match is found later. +static void ts_lexer__mark_end(TSLexer *_self) { + Lexer *self = (Lexer *)_self; + if (!ts_lexer__eof(&self->data)) { + // If the lexer is right at the beginning of included range, + // then the token should be considered to end at the *end* of the + // previous included range, rather than here. + t_range *current_included_range = &self->included_ranges[ + self->current_included_range_index + ]; + if ( + self->current_included_range_index > 0 && + self->current_position.bytes == current_included_range->start_byte + ) { + t_range *previous_included_range = current_included_range - 1; + self->token_end_position = (Length) { + previous_included_range->end_byte, + previous_included_range->end_point, + }; + return; + } + } + self->token_end_position = self->current_position; +} + +static uint32_t ts_lexer__get_column(TSLexer *_self) { + Lexer *self = (Lexer *)_self; + + uint32_t goal_byte = self->current_position.bytes; + + self->did_get_column = true; + self->current_position.bytes -= self->current_position.extent.column; + self->current_position.extent.column = 0; + + if (self->current_position.bytes < self->chunk_start) { + ts_lexer__get_chunk(self); + } + + uint32_t result = 0; + if (!ts_lexer__eof(_self)) { + ts_lexer__get_lookahead(self); + while (self->current_position.bytes < goal_byte && self->chunk) { + result++; + ts_lexer__do_advance(self, false); + if (ts_lexer__eof(_self)) break; + } + } + + return result; +} + +// Is the lexer at a boundary between two disjoint included ranges of +// source code? This is exposed as an API because some languages' external +// scanners need to perform custom actions at these boundaries. +static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { + const Lexer *self = (const Lexer *)_self; + if (self->current_included_range_index < self->included_range_count) { + t_range *current_range = &self->included_ranges[self->current_included_range_index]; + return self->current_position.bytes == current_range->start_byte; + } else { + return false; + } +} + +void ts_lexer_init(Lexer *self) { + *self = (Lexer) { + .data = { + // The lexer's methods are stored as struct fields so that generated + // parsers can call them without needing to be linked against this + // library. + .advance = ts_lexer__advance, + .mark_end = ts_lexer__mark_end, + .get_column = ts_lexer__get_column, + .is_at_included_range_start = ts_lexer__is_at_included_range_start, + .eof = ts_lexer__eof, + .lookahead = 0, + .result_symbol = 0, + }, + .chunk = NULL, + .chunk_size = 0, + .chunk_start = 0, + .current_position = {0, {0, 0}}, + .logger = { + .payload = NULL, + .log = NULL + }, + .included_ranges = NULL, + .included_range_count = 0, + .current_included_range_index = 0, + }; + ts_lexer_set_included_ranges(self, NULL, 0); +} + +void ts_lexer_delete(Lexer *self) { + ts_free(self->included_ranges); +} + +void ts_lexer_set_input(Lexer *self, t_input input) { + self->input = input; + ts_lexer__clear_chunk(self); + ts_lexer_goto(self, self->current_position); +} + +// Move the lexer to the given position. This doesn't do any work +// if the parser is already at the given position. +void ts_lexer_reset(Lexer *self, Length position) { + if (position.bytes != self->current_position.bytes) { + ts_lexer_goto(self, position); + } +} + +void ts_lexer_start(Lexer *self) { + self->token_start_position = self->current_position; + self->token_end_position = LENGTH_UNDEFINED; + self->data.result_symbol = 0; + self->did_get_column = false; + if (!ts_lexer__eof(&self->data)) { + if (!self->chunk_size) ts_lexer__get_chunk(self); + if (!self->lookahead_size) ts_lexer__get_lookahead(self); + if ( + self->current_position.bytes == 0 && + self->data.lookahead == BYTE_ORDER_MARK + ) ts_lexer__advance(&self->data, true); + } +} + +void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { + if (length_is_undefined(self->token_end_position)) { + ts_lexer__mark_end(&self->data); + } + + // If the token ended at an included range boundary, then its end position + // will have been reset to the end of the preceding range. Reset the start + // position to match. + if (self->token_end_position.bytes < self->token_start_position.bytes) { + self->token_start_position = self->token_end_position; + } + + uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; + + // In order to determine that a byte sequence is invalid UTF8 or UTF16, + // the character decoding algorithm may have looked at the following byte. + // Therefore, the next byte *after* the current (invalid) character + // affects the interpretation of the current character. + if (self->data.lookahead == TS_DECODE_ERROR) { + current_lookahead_end_byte++; + } + + if (current_lookahead_end_byte > *lookahead_end_byte) { + *lookahead_end_byte = current_lookahead_end_byte; + } +} + +void ts_lexer_advance_to_end(Lexer *self) { + while (self->chunk) { + ts_lexer__advance(&self->data, false); + } +} + +void ts_lexer_mark_end(Lexer *self) { + ts_lexer__mark_end(&self->data); +} + +bool ts_lexer_set_included_ranges( + Lexer *self, + const t_range *ranges, + uint32_t count +) { + if (count == 0 || !ranges) { + ranges = &DEFAULT_RANGE; + count = 1; + } else { + uint32_t previous_byte = 0; + for (unsigned i = 0; i < count; i++) { + const t_range *range = &ranges[i]; + if ( + range->start_byte < previous_byte || + range->end_byte < range->start_byte + ) return false; + previous_byte = range->end_byte; + } + } + + size_t size = count * sizeof(t_range); + self->included_ranges = ts_realloc(self->included_ranges, size); + memcpy(self->included_ranges, ranges, size); + self->included_range_count = count; + ts_lexer_goto(self, self->current_position); + return true; +} + +t_range *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { + *count = self->included_range_count; + return self->included_ranges; +} + +#undef LOG + + + + + + + + + + + + + +#include +#include "src/subtree.h" +#include "src/tree.h" +#include "src/language.h" + +typedef struct { + Subtree parent; + const t_tree *tree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + const t_symbol *alias_sequence; +} NodeChildIterator; + +// TSNode - constructors + +t_parse_node ts_node_new( + const t_tree *tree, + const Subtree *subtree, + Length position, + t_symbol alias +) { + return (t_parse_node) { + {position.bytes, position.extent.row, position.extent.column, alias}, + subtree, + tree, + }; +} + +static inline t_parse_node ts_node__null(void) { + return ts_node_new(NULL, NULL, length_zero(), 0); +} + +// TSNode - accessors + +uint32_t ts_node_start_byte(t_parse_node self) { + return self.context[0]; +} + +t_point ts_node_start_point(t_parse_node self) { + return (t_point) {self.context[1], self.context[2]}; +} + +static inline uint32_t ts_node__alias(const t_parse_node *self) { + return self->context[3]; +} + +static inline Subtree ts_node__subtree(t_parse_node self) { + return *(const Subtree *)self.id; +} + +// NodeChildIterator + +static inline NodeChildIterator ts_node_iterate_children(const t_parse_node *node) { + Subtree subtree = ts_node__subtree(*node); + if (ts_subtree_child_count(subtree) == 0) { + return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; + } + const t_symbol *alias_sequence = ts_language_alias_sequence( + node->tree->language, + subtree.ptr->production_id + ); + return (NodeChildIterator) { + .tree = node->tree, + .parent = subtree, + .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, + .child_index = 0, + .structural_child_index = 0, + .alias_sequence = alias_sequence, + }; +} + +static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { + return self->child_index == self->parent.ptr->child_count; +} + +static inline bool ts_node_child_iterator_next( + NodeChildIterator *self, + t_parse_node *result +) { + if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; + t_symbol alias_symbol = 0; + if (!ts_subtree_extra(*child)) { + if (self->alias_sequence) { + alias_symbol = self->alias_sequence[self->structural_child_index]; + } + self->structural_child_index++; + } + if (self->child_index > 0) { + self->position = length_add(self->position, ts_subtree_padding(*child)); + } + *result = ts_node_new( + self->tree, + child, + self->position, + alias_symbol + ); + self->position = length_add(self->position, ts_subtree_size(*child)); + self->child_index++; + return true; +} + +// TSNode - private + +static inline bool ts_node__is_relevant(t_parse_node self, bool include_anonymous) { + Subtree tree = ts_node__subtree(self); + if (include_anonymous) { + return ts_subtree_visible(tree) || ts_node__alias(&self); + } else { + t_symbol alias = ts_node__alias(&self); + if (alias) { + return ts_language_symbol_metadata(self.tree->language, alias).named; + } else { + return ts_subtree_visible(tree) && ts_subtree_named(tree); + } + } +} + +static inline uint32_t ts_node__relevant_child_count( + t_parse_node self, + bool include_anonymous +) { + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) { + if (include_anonymous) { + return tree.ptr->visible_child_count; + } else { + return tree.ptr->named_child_count; + } + } else { + return 0; + } +} + +static inline t_parse_node ts_node__child( + t_parse_node self, + uint32_t child_index, + bool include_anonymous +) { + t_parse_node result = self; + bool did_descend = true; + + while (did_descend) { + did_descend = false; + + t_parse_node child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node__is_relevant(child, include_anonymous)) { + if (index == child_index) { + return child; + } + index++; + } else { + uint32_t grandchild_index = child_index - index; + uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous); + if (grandchild_index < grandchild_count) { + did_descend = true; + result = child; + child_index = grandchild_index; + break; + } + index += grandchild_count; + } + } + } + + return ts_node__null(); +} + +static bool ts_subtree_has_trailing_empty_descendant( + Subtree self, + Subtree other +) { + for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { + Subtree child = ts_subtree_children(self)[i]; + if (ts_subtree_total_bytes(child) > 0) break; + if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) { + return true; + } + } + return false; +} + +static inline t_parse_node ts_node__prev_sibling(t_parse_node self, bool include_anonymous) { + Subtree self_subtree = ts_node__subtree(self); + bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; + uint32_t target_end_byte = ts_node_end_byte(self); + + t_parse_node node = ts_node_parent(self); + t_parse_node earlier_node = ts_node__null(); + bool earlier_node_is_relevant = false; + + while (!ts_node_is_null(node)) { + t_parse_node earlier_child = ts_node__null(); + bool earlier_child_is_relevant = false; + bool found_child_containing_target = false; + + t_parse_node child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (child.id == self.id) break; + if (iterator.position.bytes > target_end_byte) { + found_child_containing_target = true; + break; + } + + if (iterator.position.bytes == target_end_byte && + (!self_is_empty || + ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) { + found_child_containing_target = true; + break; + } + + if (ts_node__is_relevant(child, include_anonymous)) { + earlier_child = child; + earlier_child_is_relevant = true; + } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { + earlier_child = child; + earlier_child_is_relevant = false; + } + } + + if (found_child_containing_target) { + if (!ts_node_is_null(earlier_child)) { + earlier_node = earlier_child; + earlier_node_is_relevant = earlier_child_is_relevant; + } + node = child; + } else if (earlier_child_is_relevant) { + return earlier_child; + } else if (!ts_node_is_null(earlier_child)) { + node = earlier_child; + } else if (earlier_node_is_relevant) { + return earlier_node; + } else { + node = earlier_node; + earlier_node = ts_node__null(); + earlier_node_is_relevant = false; + } + } + + return ts_node__null(); +} + +static inline t_parse_node ts_node__next_sibling(t_parse_node self, bool include_anonymous) { + uint32_t target_end_byte = ts_node_end_byte(self); + + t_parse_node node = ts_node_parent(self); + t_parse_node later_node = ts_node__null(); + bool later_node_is_relevant = false; + + while (!ts_node_is_null(node)) { + t_parse_node later_child = ts_node__null(); + bool later_child_is_relevant = false; + t_parse_node child_containing_target = ts_node__null(); + + t_parse_node child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (iterator.position.bytes < target_end_byte) continue; + if (ts_node_start_byte(child) <= ts_node_start_byte(self)) { + if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) { + child_containing_target = child; + } + } else if (ts_node__is_relevant(child, include_anonymous)) { + later_child = child; + later_child_is_relevant = true; + break; + } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { + later_child = child; + later_child_is_relevant = false; + break; + } + } + + if (!ts_node_is_null(child_containing_target)) { + if (!ts_node_is_null(later_child)) { + later_node = later_child; + later_node_is_relevant = later_child_is_relevant; + } + node = child_containing_target; + } else if (later_child_is_relevant) { + return later_child; + } else if (!ts_node_is_null(later_child)) { + node = later_child; + } else if (later_node_is_relevant) { + return later_node; + } else { + node = later_node; + } + } + + return ts_node__null(); +} + +static inline t_parse_node ts_node__first_child_for_byte( + t_parse_node self, + uint32_t goal, + bool include_anonymous +) { + t_parse_node node = self; + bool did_descend = true; + + while (did_descend) { + did_descend = false; + + t_parse_node child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node_end_byte(child) > goal) { + if (ts_node__is_relevant(child, include_anonymous)) { + return child; + } else if (ts_node_child_count(child) > 0) { + did_descend = true; + node = child; + break; + } + } + } + } + + return ts_node__null(); +} + +static inline t_parse_node ts_node__descendant_for_byte_range( + t_parse_node self, + uint32_t range_start, + uint32_t range_end, + bool include_anonymous +) { + t_parse_node node = self; + t_parse_node last_visible_node = self; + + bool did_descend = true; + while (did_descend) { + did_descend = false; + + t_parse_node child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + uint32_t node_end = iterator.position.bytes; + + // The end of this node must extend far enough forward to touch + // the end of the range and exceed the start of the range. + if (node_end < range_end) continue; + if (node_end <= range_start) continue; + + // The start of this node must extend far enough backward to + // touch the start of the range. + if (range_start < ts_node_start_byte(child)) break; + + node = child; + if (ts_node__is_relevant(node, include_anonymous)) { + last_visible_node = node; + } + did_descend = true; + break; + } + } + + return last_visible_node; +} + +static inline t_parse_node ts_node__descendant_for_point_range( + t_parse_node self, + t_point range_start, + t_point range_end, + bool include_anonymous +) { + t_parse_node node = self; + t_parse_node last_visible_node = self; + + bool did_descend = true; + while (did_descend) { + did_descend = false; + + t_parse_node child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) { + t_point node_end = iterator.position.extent; + + // The end of this node must extend far enough forward to touch + // the end of the range and exceed the start of the range. + if (point_lt(node_end, range_end)) continue; + if (point_lte(node_end, range_start)) continue; + + // The start of this node must extend far enough backward to + // touch the start of the range. + if (point_lt(range_start, ts_node_start_point(child))) break; + + node = child; + if (ts_node__is_relevant(node, include_anonymous)) { + last_visible_node = node; + } + did_descend = true; + break; + } + } + + return last_visible_node; +} + +// TSNode - public + +uint32_t ts_node_end_byte(t_parse_node self) { + return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes; +} + +t_point ts_node_end_point(t_parse_node self) { + return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent); +} + +t_symbol ts_node_symbol(t_parse_node self) { + t_symbol symbol = ts_node__alias(&self); + if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_public_symbol(self.tree->language, symbol); +} + +const char *ts_node_type(t_parse_node self) { + t_symbol symbol = ts_node__alias(&self); + if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_symbol_name(self.tree->language, symbol); +} + +const t_language *ts_node_language(t_parse_node self) { + return self.tree->language; +} + +t_symbol ts_node_grammar_symbol(t_parse_node self) { + return ts_subtree_symbol(ts_node__subtree(self)); +} + +const char *ts_node_grammar_type(t_parse_node self) { + t_symbol symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_symbol_name(self.tree->language, symbol); +} + +char *ts_node_string(t_parse_node self) { + t_symbol alias_symbol = ts_node__alias(&self); + return ts_subtree_string( + ts_node__subtree(self), + alias_symbol, + ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, + self.tree->language, + false + ); +} + +bool ts_node_eq(t_parse_node self, t_parse_node other) { + return self.tree == other.tree && self.id == other.id; +} + +bool ts_node_is_null(t_parse_node self) { + return self.id == 0; +} + +bool ts_node_is_extra(t_parse_node self) { + return ts_subtree_extra(ts_node__subtree(self)); +} + +bool ts_node_is_named(t_parse_node self) { + t_symbol alias = ts_node__alias(&self); + return alias + ? ts_language_symbol_metadata(self.tree->language, alias).named + : ts_subtree_named(ts_node__subtree(self)); +} + +bool ts_node_is_missing(t_parse_node self) { + return ts_subtree_missing(ts_node__subtree(self)); +} + +bool ts_node_has_changes(t_parse_node self) { + return ts_subtree_has_changes(ts_node__subtree(self)); +} + +bool ts_node_has_error(t_parse_node self) { + return ts_subtree_error_cost(ts_node__subtree(self)) > 0; +} + +bool ts_node_is_error(t_parse_node self) { + t_symbol symbol = ts_node_symbol(self); + return symbol == ts_builtin_sym_error; +} + +uint32_t ts_node_descendant_count(t_parse_node self) { + return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; +} + +t_state_id ts_node_parse_state(t_parse_node self) { + return ts_subtree_parse_state(ts_node__subtree(self)); +} + +t_state_id ts_node_next_parse_state(t_parse_node self) { + const t_language *language = self.tree->language; + uint16_t state = ts_node_parse_state(self); + if (state == TS_TREE_STATE_NONE) { + return TS_TREE_STATE_NONE; + } + uint16_t symbol = ts_node_grammar_symbol(self); + return ts_language_next_state(language, state, symbol); +} + +t_parse_node ts_node_parent(t_parse_node self) { + t_parse_node node = ts_tree_root_node(self.tree); + if (node.id == self.id) return ts_node__null(); + + while (true) { + t_parse_node next_node = ts_node_child_containing_descendant(node, self); + if (ts_node_is_null(next_node)) break; + node = next_node; + } + + return node; +} + +t_parse_node ts_node_child_containing_descendant(t_parse_node self, t_parse_node subnode) { + uint32_t start_byte = ts_node_start_byte(subnode); + uint32_t end_byte = ts_node_end_byte(subnode); + + do { + NodeChildIterator iter = ts_node_iterate_children(&self); + do { + if ( + !ts_node_child_iterator_next(&iter, &self) + || ts_node_start_byte(self) > start_byte + || self.id == subnode.id + ) { + return ts_node__null(); + } + } while (iter.position.bytes < end_byte || ts_node_child_count(self) == 0); + } while (!ts_node__is_relevant(self, true)); + + return self; +} + +t_parse_node ts_node_child(t_parse_node self, uint32_t child_index) { + return ts_node__child(self, child_index, true); +} + +t_parse_node ts_node_named_child(t_parse_node self, uint32_t child_index) { + return ts_node__child(self, child_index, false); +} + +t_parse_node ts_node_child_by_field_id(t_parse_node self, t_field_id field_id) { +recur: + if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); + + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->production_id, + &field_map, + &field_map_end + ); + if (field_map == field_map_end) return ts_node__null(); + + // The field mappings are sorted by their field id. Scan all + // the mappings to find the ones for the given field id. + while (field_map->field_id < field_id) { + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + while (field_map_end[-1].field_id > field_id) { + field_map_end--; + if (field_map == field_map_end) return ts_node__null(); + } + + t_parse_node child; + NodeChildIterator iterator = ts_node_iterate_children(&self); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (!ts_subtree_extra(ts_node__subtree(child))) { + uint32_t index = iterator.structural_child_index - 1; + if (index < field_map->child_index) continue; + + // Hidden nodes' fields are "inherited" by their visible parent. + if (field_map->inherited) { + + // If this is the *last* possible child node for this field, + // then perform a tail call to avoid recursion. + if (field_map + 1 == field_map_end) { + self = child; + goto recur; + } + + // Otherwise, descend into this child, but if it doesn't contain + // the field, continue searching subsequent children. + else { + t_parse_node result = ts_node_child_by_field_id(child, field_id); + if (result.id) return result; + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + } + + else if (ts_node__is_relevant(child, true)) { + return child; + } + + // If the field refers to a hidden node with visible children, + // return the first visible child. + else if (ts_node_child_count(child) > 0 ) { + return ts_node_child(child, 0); + } + + // Otherwise, continue searching subsequent children. + else { + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + } + } + + return ts_node__null(); +} + +static inline const char *ts_node__field_name_from_language(t_parse_node self, uint32_t structural_child_index) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->production_id, + &field_map, + &field_map_end + ); + for (; field_map != field_map_end; field_map++) { + if (!field_map->inherited && field_map->child_index == structural_child_index) { + return self.tree->language->field_names[field_map->field_id]; + } + } + return NULL; +} + +const char *ts_node_field_name_for_child(t_parse_node self, uint32_t child_index) { + t_parse_node result = self; + bool did_descend = true; + const char *inherited_field_name = NULL; + + while (did_descend) { + did_descend = false; + + t_parse_node child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node__is_relevant(child, true)) { + if (index == child_index) { + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) return field_name; + return inherited_field_name; + } + index++; + } else { + uint32_t grandchild_index = child_index - index; + uint32_t grandchild_count = ts_node__relevant_child_count(child, true); + if (grandchild_index < grandchild_count) { + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) inherited_field_name = field_name; + + did_descend = true; + result = child; + child_index = grandchild_index; + break; + } + index += grandchild_count; + } + } + } + + return NULL; +} + +t_parse_node ts_node_child_by_field_name( + t_parse_node self, + const char *name, + uint32_t name_length +) { + t_field_id field_id = ts_language_field_id_for_name( + self.tree->language, + name, + name_length + ); + return ts_node_child_by_field_id(self, field_id); +} + +uint32_t ts_node_child_count(t_parse_node self) { + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) { + return tree.ptr->visible_child_count; + } else { + return 0; + } +} + +uint32_t ts_node_named_child_count(t_parse_node self) { + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) { + return tree.ptr->named_child_count; + } else { + return 0; + } +} + +t_parse_node ts_node_next_sibling(t_parse_node self) { + return ts_node__next_sibling(self, true); +} + +t_parse_node ts_node_next_named_sibling(t_parse_node self) { + return ts_node__next_sibling(self, false); +} + +t_parse_node ts_node_prev_sibling(t_parse_node self) { + return ts_node__prev_sibling(self, true); +} + +t_parse_node ts_node_prev_named_sibling(t_parse_node self) { + return ts_node__prev_sibling(self, false); +} + +t_parse_node ts_node_first_child_for_byte(t_parse_node self, uint32_t byte) { + return ts_node__first_child_for_byte(self, byte, true); +} + +t_parse_node ts_node_first_named_child_for_byte(t_parse_node self, uint32_t byte) { + return ts_node__first_child_for_byte(self, byte, false); +} + +t_parse_node ts_node_descendant_for_byte_range( + t_parse_node self, + uint32_t start, + uint32_t end +) { + return ts_node__descendant_for_byte_range(self, start, end, true); +} + +t_parse_node ts_node_named_descendant_for_byte_range( + t_parse_node self, + uint32_t start, + uint32_t end +) { + return ts_node__descendant_for_byte_range(self, start, end, false); +} + +t_parse_node ts_node_descendant_for_point_range( + t_parse_node self, + t_point start, + t_point end +) { + return ts_node__descendant_for_point_range(self, start, end, true); +} + +t_parse_node ts_node_named_descendant_for_point_range( + t_parse_node self, + t_point start, + t_point end +) { + return ts_node__descendant_for_point_range(self, start, end, false); +} + +void ts_node_edit(t_parse_node *self, const t_input_edit *edit) { + uint32_t start_byte = ts_node_start_byte(*self); + t_point start_point = ts_node_start_point(*self); + + if (start_byte >= edit->old_end_byte) { + start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); + start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point)); + } else if (start_byte > edit->start_byte) { + start_byte = edit->new_end_byte; + start_point = edit->new_end_point; + } + + self->context[0] = start_byte; + self->context[1] = start_point.row; + self->context[2] = start_point.column; +} + + +#include +#include +#include +#include +#include +#include +#include "src/api.h" +#include "src/alloc.h" +#include "src/array.h" +#include "src/atomic.h" +#include "src/clock.h" +#include "src/error_costs.h" +#include "src/get_changed_ranges.h" +#include "src/language.h" +#include "src/length.h" +#include "src/lexer.h" +#include "src/reduce_action.h" +#include "src/reusable_node.h" +#include "src/stack.h" +#include "src/subtree.h" +#include "src/tree.h" + + +#define LOG(...) \ + if (self->lexer.logger.log || self->dot_graph_file) { \ + snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ + ts_parser__log(self); \ + } + +#define LOG_LOOKAHEAD(symbol_name, size) \ + if (self->lexer.logger.log || self->dot_graph_file) { \ + char *buf = self->lexer.debug_buffer; \ + const char *symbol = symbol_name; \ + int off = sprintf(buf, "lexed_lookahead sym:"); \ + for ( \ + int i = 0; \ + symbol[i] != '\0' \ + && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ + i++ \ + ) { \ + switch (symbol[i]) { \ + case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \ + case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \ + case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \ + case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \ + case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \ + case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \ + default: buf[off++] = symbol[i]; break; \ + } \ + } \ + snprintf( \ + buf + off, \ + TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ + ", size:%u", \ + size \ + ); \ + ts_parser__log(self); \ + } + +#define LOG_STACK() \ + if (self->dot_graph_file) { \ + ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ + fputs("\n\n", self->dot_graph_file); \ + } + +#define LOG_TREE(tree) \ + if (self->dot_graph_file) { \ + ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \ + fputs("\n", self->dot_graph_file); \ + } + +#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) + +#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree)) + +static const unsigned MAX_VERSION_COUNT = 6; +static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; +static const unsigned MAX_SUMMARY_DEPTH = 16; +static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; +static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; + +typedef struct { + Subtree token; + Subtree last_external_token; + uint32_t byte_index; +} TokenCache; + +struct t_parser { + Lexer lexer; + Stack *stack; + SubtreePool tree_pool; + const t_language *language; + ReduceActionSet reduce_actions; + Subtree finished_tree; + SubtreeArray trailing_extras; + SubtreeArray trailing_extras2; + SubtreeArray scratch_trees; + TokenCache token_cache; + ReusableNode reusable_node; + void *external_scanner_payload; + FILE *dot_graph_file; + TSClock end_clock; + TSDuration timeout_duration; + unsigned accept_count; + unsigned operation_count; + const volatile size_t *cancellation_flag; + Subtree old_tree; + TSRangeArray included_range_differences; + unsigned included_range_difference_index; + bool has_scanner_error; +}; + +typedef struct { + unsigned cost; + unsigned node_count; + int dynamic_precedence; + bool is_in_error; +} ErrorStatus; + +typedef enum { + ErrorComparisonTakeLeft, + ErrorComparisonPreferLeft, + ErrorComparisonNone, + ErrorComparisonPreferRight, + ErrorComparisonTakeRight, +} ErrorComparison; + +typedef struct { + const char *string; + uint32_t length; +} TSStringInput; + +// StringInput + +static const char *ts_string_input_read( + void *_self, + uint32_t byte, + t_point point, + uint32_t *length +) { + (void)point; + TSStringInput *self = (TSStringInput *)_self; + if (byte >= self->length) { + *length = 0; + return ""; + } else { + *length = self->length - byte; + return self->string + byte; + } +} + +// Parser - Private + +static void ts_parser__log(t_parser *self) { + if (self->lexer.logger.log) { + self->lexer.logger.log( + self->lexer.logger.payload, + TSLogTypeParse, + self->lexer.debug_buffer + ); + } + + if (self->dot_graph_file) { + fprintf(self->dot_graph_file, "graph {\nlabel=\""); + for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) { + if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file); + fputc(*chr, self->dot_graph_file); + } + fprintf(self->dot_graph_file, "\"\n}\n\n"); + } +} + +static bool ts_parser__breakdown_top_of_stack( + t_parser *self, + StackVersion version +) { + bool did_break_down = false; + bool pending = false; + + do { + StackSliceArray pop = ts_stack_pop_pending(self->stack, version); + if (!pop.size) break; + + did_break_down = true; + pending = false; + for (uint32_t i = 0; i < pop.size; i++) { + StackSlice slice = pop.contents[i]; + t_state_id state = ts_stack_state(self->stack, slice.version); + Subtree parent = *array_front(&slice.subtrees); + + for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { + Subtree child = ts_subtree_children(parent)[j]; + pending = ts_subtree_child_count(child) > 0; + + if (ts_subtree_is_error(child)) { + state = ERROR_STATE; + } else if (!ts_subtree_extra(child)) { + state = ts_language_next_state(self->language, state, ts_subtree_symbol(child)); + } + + ts_subtree_retain(child); + ts_stack_push(self->stack, slice.version, child, pending, state); + } + + for (uint32_t j = 1; j < slice.subtrees.size; j++) { + Subtree tree = slice.subtrees.contents[j]; + ts_stack_push(self->stack, slice.version, tree, false, state); + } + + ts_subtree_release(&self->tree_pool, parent); + array_delete(&slice.subtrees); + + LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent)); + LOG_STACK(); + } + } while (pending); + + return did_break_down; +} + +static void ts_parser__breakdown_lookahead( + t_parser *self, + Subtree *lookahead, + t_state_id state, + ReusableNode *reusable_node +) { + bool did_descend = false; + Subtree tree = reusable_node_tree(reusable_node); + while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) { + LOG("state_mismatch sym:%s", TREE_NAME(tree)); + reusable_node_descend(reusable_node); + tree = reusable_node_tree(reusable_node); + did_descend = true; + } + + if (did_descend) { + ts_subtree_release(&self->tree_pool, *lookahead); + *lookahead = tree; + ts_subtree_retain(*lookahead); + } +} + +static ErrorComparison ts_parser__compare_versions( + t_parser *self, + ErrorStatus a, + ErrorStatus b +) { + (void)self; + if (!a.is_in_error && b.is_in_error) { + if (a.cost < b.cost) { + return ErrorComparisonTakeLeft; + } else { + return ErrorComparisonPreferLeft; + } + } + + if (a.is_in_error && !b.is_in_error) { + if (b.cost < a.cost) { + return ErrorComparisonTakeRight; + } else { + return ErrorComparisonPreferRight; + } + } + + if (a.cost < b.cost) { + if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { + return ErrorComparisonTakeLeft; + } else { + return ErrorComparisonPreferLeft; + } + } + + if (b.cost < a.cost) { + if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { + return ErrorComparisonTakeRight; + } else { + return ErrorComparisonPreferRight; + } + } + + if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft; + if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight; + return ErrorComparisonNone; +} + +static ErrorStatus ts_parser__version_status( + t_parser *self, + StackVersion version +) { + unsigned cost = ts_stack_error_cost(self->stack, version); + bool is_paused = ts_stack_is_paused(self->stack, version); + if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; + return (ErrorStatus) { + .cost = cost, + .node_count = ts_stack_node_count_since_error(self->stack, version), + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE + }; +} + +static bool ts_parser__better_version_exists( + t_parser *self, + StackVersion version, + bool is_in_error, + unsigned cost +) { + if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) { + return true; + } + + Length position = ts_stack_position(self->stack, version); + ErrorStatus status = { + .cost = cost, + .is_in_error = is_in_error, + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .node_count = ts_stack_node_count_since_error(self->stack, version), + }; + + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { + if (i == version || + !ts_stack_is_active(self->stack, i) || + ts_stack_position(self->stack, i).bytes < position.bytes) continue; + ErrorStatus status_i = ts_parser__version_status(self, i); + switch (ts_parser__compare_versions(self, status, status_i)) { + case ErrorComparisonTakeRight: + return true; + case ErrorComparisonPreferRight: + if (ts_stack_can_merge(self->stack, i, version)) return true; + break; + default: + break; + } + } + + return false; +} + +static bool ts_parser__call_main_lex_fn(t_parser *self, TSLexMode lex_mode) { + + return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); + +} + +static bool ts_parser__call_keyword_lex_fn(t_parser *self, TSLexMode lex_mode) { + (void)(lex_mode); + + return self->language->keyword_lex_fn(&self->lexer.data, 0); + +} + +static void ts_parser__external_scanner_create( + t_parser *self +) { + if (self->language && self->language->external_scanner.states) { +if (self->language->external_scanner.create) { + self->external_scanner_payload = self->language->external_scanner.create(); + + } +}} + +static void ts_parser__external_scanner_destroy( + t_parser *self +) { + if ( + self->language && + self->external_scanner_payload && + self->language->external_scanner.destroy + ) { + self->language->external_scanner.destroy( + self->external_scanner_payload + ); + } + self->external_scanner_payload = NULL; +} + +static unsigned ts_parser__external_scanner_serialize( + t_parser *self +) { + uint32_t length = self->language->external_scanner.serialize( + self->external_scanner_payload, + self->lexer.debug_buffer + ); + assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + return length; + +} + +static void ts_parser__external_scanner_deserialize( + t_parser *self, + Subtree external_token +) { + const char *data = NULL; + uint32_t length = 0; + if (external_token.ptr) { + data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state); + length = external_token.ptr->external_scanner_state.length; + } + + + self->language->external_scanner.deserialize( + self->external_scanner_payload, + data, + length + ); + +} + +static bool ts_parser__external_scanner_scan( + t_parser *self, + t_state_id external_lex_state +) { + + const bool *valid_external_tokens = ts_language_enabled_external_tokens( + self->language, + external_lex_state + ); + return self->language->external_scanner.scan( + self->external_scanner_payload, + &self->lexer.data, + valid_external_tokens + ); + +} + +static bool ts_parser__can_reuse_first_leaf( + t_parser *self, + t_state_id state, + Subtree tree, + TableEntry *table_entry +) { + TSLexMode current_lex_mode = self->language->lex_modes[state]; + t_symbol leaf_symbol = ts_subtree_leaf_symbol(tree); + t_state_id leaf_state = ts_subtree_leaf_parse_state(tree); + TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state]; + + // At the end of a non-terminal extra node, the lexer normally returns + // NULL, which indicates that the parser should look for a reduce action + // at symbol `0`. Avoid reusing tokens in this situation to ensure that + // the same thing happens when incrementally reparsing. + if (current_lex_mode.lex_state == (uint16_t)(-1)) return false; + + // If the token was created in a state with the same set of lookaheads, it is reusable. + if ( + table_entry->action_count > 0 && + memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexMode)) == 0 && + ( + leaf_symbol != self->language->keyword_capture_token || + (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state) + ) + ) return true; + + // Empty tokens are not reusable in states with different lookaheads. + if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false; + + // If the current state allows external tokens or other tokens that conflict with this + // token, this token is not reusable. + return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; +} + +static Subtree ts_parser__lex( + t_parser *self, + StackVersion version, + t_state_id parse_state +) { + TSLexMode lex_mode = self->language->lex_modes[parse_state]; + if (lex_mode.lex_state == (uint16_t)-1) { + LOG("no_lookahead_after_non_terminal_extra"); + return NULL_SUBTREE; + } + + const Length start_position = ts_stack_position(self->stack, version); + const Subtree external_token = ts_stack_last_external_token(self->stack, version); + + bool found_external_token = false; + bool error_mode = parse_state == ERROR_STATE; + bool skipped_error = false; + bool called_get_column = false; + int32_t first_error_character = 0; + Length error_start_position = length_zero(); + Length error_end_position = length_zero(); + uint32_t lookahead_end_byte = 0; + uint32_t external_scanner_state_len = 0; + bool external_scanner_state_changed = false; + ts_lexer_reset(&self->lexer, start_position); + + for (;;) { + bool found_token = false; + Length current_position = self->lexer.current_position; + + if (lex_mode.external_lex_state != 0) { + LOG( + "lex_external state:%d, row:%u, column:%u", + lex_mode.external_lex_state, + current_position.extent.row, + current_position.extent.column + ); + ts_lexer_start(&self->lexer); + ts_parser__external_scanner_deserialize(self, external_token); + found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state); + if (self->has_scanner_error) return NULL_SUBTREE; + ts_lexer_finish(&self->lexer, &lookahead_end_byte); + + if (found_token) { + external_scanner_state_len = ts_parser__external_scanner_serialize(self); + external_scanner_state_changed = !ts_external_scanner_state_eq( + ts_subtree_external_scanner_state(external_token), + self->lexer.debug_buffer, + external_scanner_state_len + ); + + // When recovering from an error, ignore any zero-length external tokens + // unless they have changed the external scanner's state. This helps to + // avoid infinite loops which could otherwise occur, because the lexer is + // looking for any possible token, instead of looking for the specific set of + // tokens that are valid in some parse state. + // + // Note that it's possible that the token end position may be *before* the + // original position of the lexer because of the way that tokens are positioned + // at included range boundaries: when a token is terminated at the start of + // an included range, it is marked as ending at the *end* of the preceding + // included range. + if ( + self->lexer.token_end_position.bytes <= current_position.bytes && + (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) && + !external_scanner_state_changed + ) { + LOG( + "ignore_empty_external_token symbol:%s", + SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol]) + ) + found_token = false; + } + } + + if (found_token) { + found_external_token = true; + called_get_column = self->lexer.did_get_column; + break; + } + + ts_lexer_reset(&self->lexer, current_position); + } + + LOG( + "lex_internal state:%d, row:%u, column:%u", + lex_mode.lex_state, + current_position.extent.row, + current_position.extent.column + ); + ts_lexer_start(&self->lexer); + found_token = ts_parser__call_main_lex_fn(self, lex_mode); + ts_lexer_finish(&self->lexer, &lookahead_end_byte); + if (found_token) break; + + if (!error_mode) { + error_mode = true; + lex_mode = self->language->lex_modes[ERROR_STATE]; + ts_lexer_reset(&self->lexer, start_position); + continue; + } + + if (!skipped_error) { + LOG("skip_unrecognized_character"); + skipped_error = true; + error_start_position = self->lexer.token_start_position; + error_end_position = self->lexer.token_start_position; + first_error_character = self->lexer.data.lookahead; + } + + if (self->lexer.current_position.bytes == error_end_position.bytes) { + if (self->lexer.data.eof(&self->lexer.data)) { + self->lexer.data.result_symbol = ts_builtin_sym_error; + break; + } + self->lexer.data.advance(&self->lexer.data, false); + } + + error_end_position = self->lexer.current_position; + } + + Subtree result; + if (skipped_error) { + Length padding = length_sub(error_start_position, start_position); + Length size = length_sub(error_end_position, error_start_position); + uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes; + result = ts_subtree_new_error( + &self->tree_pool, + first_error_character, + padding, + size, + lookahead_bytes, + parse_state, + self->language + ); + } else { + bool is_keyword = false; + t_symbol symbol = self->lexer.data.result_symbol; + Length padding = length_sub(self->lexer.token_start_position, start_position); + Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); + uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; + + if (found_external_token) { + symbol = self->language->external_scanner.symbol_map[symbol]; + } else if (symbol == self->language->keyword_capture_token && symbol != 0) { + uint32_t end_byte = self->lexer.token_end_position.bytes; + ts_lexer_reset(&self->lexer, self->lexer.token_start_position); + ts_lexer_start(&self->lexer); + + is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode); + + if ( + is_keyword && + self->lexer.token_end_position.bytes == end_byte && + ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) + ) { + symbol = self->lexer.data.result_symbol; + } + } + + result = ts_subtree_new_leaf( + &self->tree_pool, + symbol, + padding, + size, + lookahead_bytes, + parse_state, + found_external_token, + called_get_column, + is_keyword, + self->language + ); + + if (found_external_token) { + MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result); + ts_external_scanner_state_init( + &mut_result.ptr->external_scanner_state, + self->lexer.debug_buffer, + external_scanner_state_len + ); + mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed; + } + } + + LOG_LOOKAHEAD( + SYM_NAME(ts_subtree_symbol(result)), + ts_subtree_total_size(result).bytes + ); + return result; +} + +static Subtree ts_parser__get_cached_token( + t_parser *self, + t_state_id state, + size_t position, + Subtree last_external_token, + TableEntry *table_entry +) { + TokenCache *cache = &self->token_cache; + if ( + cache->token.ptr && cache->byte_index == position && + ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token) + ) { + ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry); + if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) { + ts_subtree_retain(cache->token); + return cache->token; + } + } + return NULL_SUBTREE; +} + +static void ts_parser__set_cached_token( + t_parser *self, + uint32_t byte_index, + Subtree last_external_token, + Subtree token +) { + TokenCache *cache = &self->token_cache; + if (token.ptr) ts_subtree_retain(token); + if (last_external_token.ptr) ts_subtree_retain(last_external_token); + if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token); + if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token); + cache->token = token; + cache->byte_index = byte_index; + cache->last_external_token = last_external_token; +} + +static bool ts_parser__has_included_range_difference( + const t_parser *self, + uint32_t start_position, + uint32_t end_position +) { + return ts_range_array_intersects( + &self->included_range_differences, + self->included_range_difference_index, + start_position, + end_position + ); +} + +static Subtree ts_parser__reuse_node( + t_parser *self, + StackVersion version, + t_state_id *state, + uint32_t position, + Subtree last_external_token, + TableEntry *table_entry +) { + Subtree result; + while ((result = reusable_node_tree(&self->reusable_node)).ptr) { + uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node); + uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result); + + // Do not reuse an EOF node if the included ranges array has changes + // later on in the file. + if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX; + + if (byte_offset > position) { + LOG("before_reusable_node symbol:%s", TREE_NAME(result)); + break; + } + + if (byte_offset < position) { + LOG("past_reusable_node symbol:%s", TREE_NAME(result)); + if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) { + reusable_node_advance(&self->reusable_node); + } + continue; + } + + if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) { + LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result)); + reusable_node_advance(&self->reusable_node); + continue; + } + + const char *reason = NULL; + if (ts_subtree_has_changes(result)) { + reason = "has_changes"; + } else if (ts_subtree_is_error(result)) { + reason = "is_error"; + } else if (ts_subtree_missing(result)) { + reason = "is_missing"; + } else if (ts_subtree_is_fragile(result)) { + reason = "is_fragile"; + } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) { + reason = "contains_different_included_range"; + } + + if (reason) { + LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result)); + if (!reusable_node_descend(&self->reusable_node)) { + reusable_node_advance(&self->reusable_node); + ts_parser__breakdown_top_of_stack(self, version); + *state = ts_stack_state(self->stack, version); + } + continue; + } + + t_symbol leaf_symbol = ts_subtree_leaf_symbol(result); + ts_language_table_entry(self->language, *state, leaf_symbol, table_entry); + if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { + LOG( + "cant_reuse_node symbol:%s, first_leaf_symbol:%s", + TREE_NAME(result), + SYM_NAME(leaf_symbol) + ); + reusable_node_advance_past_leaf(&self->reusable_node); + break; + } + + LOG("reuse_node symbol:%s", TREE_NAME(result)); + ts_subtree_retain(result); + return result; + } + + return NULL_SUBTREE; +} + +// Determine if a given tree should be replaced by an alternative tree. +// +// The decision is based on the trees' error costs (if any), their dynamic precedence, +// and finally, as a default, by a recursive comparison of the trees' symbols. +static bool ts_parser__select_tree(t_parser *self, Subtree left, Subtree right) { + if (!left.ptr) return true; + if (!right.ptr) return false; + + if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) { + LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); + return true; + } + + if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) { + LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); + return false; + } + + if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) { + LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, + TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left), + ts_subtree_dynamic_precedence(left)); + return true; + } + + if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) { + LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, + TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right), + ts_subtree_dynamic_precedence(right)); + return false; + } + + if (ts_subtree_error_cost(left) > 0) return true; + + int comparison = ts_subtree_compare(left, right, &self->tree_pool); + switch (comparison) { + case -1: + LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); + return false; + break; + case 1: + LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); + return true; + default: + LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); + return false; + } +} + +// Determine if a given tree's children should be replaced by an alternative +// array of children. +static bool ts_parser__select_children( + t_parser *self, + Subtree left, + const SubtreeArray *children +) { + array_assign(&self->scratch_trees, children); + + // Create a temporary subtree using the scratch trees array. This node does + // not perform any allocation except for possibly growing the array to make + // room for its own heap data. The scratch tree is never explicitly released, + // so the same 'scratch trees' array can be reused again later. + MutableSubtree scratch_tree = ts_subtree_new_node( + ts_subtree_symbol(left), + &self->scratch_trees, + 0, + self->language + ); + + return ts_parser__select_tree( + self, + left, + ts_subtree_from_mut(scratch_tree) + ); +} + +static void ts_parser__shift( + t_parser *self, + StackVersion version, + t_state_id state, + Subtree lookahead, + bool extra +) { + bool is_leaf = ts_subtree_child_count(lookahead) == 0; + Subtree subtree_to_push = lookahead; + if (extra != ts_subtree_extra(lookahead) && is_leaf) { + MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_extra(&result, extra); + subtree_to_push = ts_subtree_from_mut(result); + } + + ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); + if (ts_subtree_has_external_tokens(subtree_to_push)) { + ts_stack_set_last_external_token( + self->stack, version, ts_subtree_last_external_token(subtree_to_push) + ); + } +} + +static StackVersion ts_parser__reduce( + t_parser *self, + StackVersion version, + t_symbol symbol, + uint32_t count, + int dynamic_precedence, + uint16_t production_id, + bool is_fragile, + bool end_of_non_terminal_extra +) { + uint32_t initial_version_count = ts_stack_version_count(self->stack); + + // Pop the given number of nodes from the given version of the parse stack. + // If stack versions have previously merged, then there may be more than one + // path back through the stack. For each path, create a new parent node to + // contain the popped children, and push it onto the stack in place of the + // children. + StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); + uint32_t removed_version_count = 0; + for (uint32_t i = 0; i < pop.size; i++) { + StackSlice slice = pop.contents[i]; + StackVersion slice_version = slice.version - removed_version_count; + + // This is where new versions are added to the parse stack. The versions + // will all be sorted and truncated at the end of the outer parsing loop. + // Allow the maximum version count to be temporarily exceeded, but only + // by a limited threshold. + if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) { + ts_stack_remove_version(self->stack, slice_version); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + removed_version_count++; + while (i + 1 < pop.size) { + StackSlice next_slice = pop.contents[i + 1]; + if (next_slice.version != slice.version) break; + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); + i++; + } + continue; + } + + // Extra tokens on top of the stack should not be included in this new parent + // node. They will be re-pushed onto the stack after the parent node is + // created and pushed. + SubtreeArray children = slice.subtrees; + ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras); + + MutableSubtree parent = ts_subtree_new_node( + symbol, &children, production_id, self->language + ); + + // This pop operation may have caused multiple stack versions to collapse + // into one, because they all diverged from a common state. In that case, + // choose one of the arrays of trees to be the parent node's children, and + // delete the rest of the tree arrays. + while (i + 1 < pop.size) { + StackSlice next_slice = pop.contents[i + 1]; + if (next_slice.version != slice.version) break; + i++; + + SubtreeArray next_slice_children = next_slice.subtrees; + ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2); + + if (ts_parser__select_children( + self, + ts_subtree_from_mut(parent), + &next_slice_children + )) { + ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); + ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); + array_swap(&self->trailing_extras, &self->trailing_extras2); + parent = ts_subtree_new_node( + symbol, &next_slice_children, production_id, self->language + ); + } else { + array_clear(&self->trailing_extras2); + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); + } + } + + t_state_id state = ts_stack_state(self->stack, slice_version); + t_state_id next_state = ts_language_next_state(self->language, state, symbol); + if (end_of_non_terminal_extra && next_state == state) { + parent.ptr->extra = true; + } + if (is_fragile || pop.size > 1 || initial_version_count > 1) { + parent.ptr->fragile_left = true; + parent.ptr->fragile_right = true; + parent.ptr->parse_state = TS_TREE_STATE_NONE; + } else { + parent.ptr->parse_state = state; + } + parent.ptr->dynamic_precedence += dynamic_precedence; + + // Push the parent node onto the stack, along with any extra tokens that + // were previously on top of the stack. + ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); + for (uint32_t j = 0; j < self->trailing_extras.size; j++) { + ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); + } + + for (StackVersion j = 0; j < slice_version; j++) { + if (j == version) continue; + if (ts_stack_merge(self->stack, j, slice_version)) { + removed_version_count++; + break; + } + } + } + + // Return the first new stack version that was created. + return ts_stack_version_count(self->stack) > initial_version_count + ? initial_version_count + : STACK_VERSION_NONE; +} + +static void ts_parser__accept( + t_parser *self, + StackVersion version, + Subtree lookahead +) { + assert(ts_subtree_is_eof(lookahead)); + ts_stack_push(self->stack, version, lookahead, false, 1); + + StackSliceArray pop = ts_stack_pop_all(self->stack, version); + for (uint32_t i = 0; i < pop.size; i++) { + SubtreeArray trees = pop.contents[i].subtrees; + + Subtree root = NULL_SUBTREE; + for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { + Subtree tree = trees.contents[j]; + if (!ts_subtree_extra(tree)) { + assert(!tree.data.is_inline); + uint32_t child_count = ts_subtree_child_count(tree); + const Subtree *children = ts_subtree_children(tree); + for (uint32_t k = 0; k < child_count; k++) { + ts_subtree_retain(children[k]); + } + array_splice(&trees, j, 1, child_count, children); + root = ts_subtree_from_mut(ts_subtree_new_node( + ts_subtree_symbol(tree), + &trees, + tree.ptr->production_id, + self->language + )); + ts_subtree_release(&self->tree_pool, tree); + break; + } + } + + assert(root.ptr); + self->accept_count++; + + if (self->finished_tree.ptr) { + if (ts_parser__select_tree(self, self->finished_tree, root)) { + ts_subtree_release(&self->tree_pool, self->finished_tree); + self->finished_tree = root; + } else { + ts_subtree_release(&self->tree_pool, root); + } + } else { + self->finished_tree = root; + } + } + + ts_stack_remove_version(self->stack, pop.contents[0].version); + ts_stack_halt(self->stack, version); +} + +static bool ts_parser__do_all_potential_reductions( + t_parser *self, + StackVersion starting_version, + t_symbol lookahead_symbol +) { + uint32_t initial_version_count = ts_stack_version_count(self->stack); + + bool can_shift_lookahead_symbol = false; + StackVersion version = starting_version; + for (unsigned i = 0; true; i++) { + uint32_t version_count = ts_stack_version_count(self->stack); + if (version >= version_count) break; + + bool merged = false; + for (StackVersion j = initial_version_count; j < version; j++) { + if (ts_stack_merge(self->stack, j, version)) { + merged = true; + break; + } + } + if (merged) continue; + + t_state_id state = ts_stack_state(self->stack, version); + bool has_shift_action = false; + array_clear(&self->reduce_actions); + + t_symbol first_symbol, end_symbol; + if (lookahead_symbol != 0) { + first_symbol = lookahead_symbol; + end_symbol = lookahead_symbol + 1; + } else { + first_symbol = 1; + end_symbol = self->language->token_count; + } + + for (t_symbol symbol = first_symbol; symbol < end_symbol; symbol++) { + TableEntry entry; + ts_language_table_entry(self->language, state, symbol, &entry); + for (uint32_t j = 0; j < entry.action_count; j++) { + TSParseAction action = entry.actions[j]; + switch (action.type) { + case TSParseActionTypeShift: + case TSParseActionTypeRecover: + if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; + break; + case TSParseActionTypeReduce: + if (action.reduce.child_count > 0) + ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) { + .symbol = action.reduce.symbol, + .count = action.reduce.child_count, + .dynamic_precedence = action.reduce.dynamic_precedence, + .production_id = action.reduce.production_id, + }); + break; + default: + break; + } + } + } + + StackVersion reduction_version = STACK_VERSION_NONE; + for (uint32_t j = 0; j < self->reduce_actions.size; j++) { + ReduceAction action = self->reduce_actions.contents[j]; + + reduction_version = ts_parser__reduce( + self, version, action.symbol, action.count, + action.dynamic_precedence, action.production_id, + true, false + ); + } + + if (has_shift_action) { + can_shift_lookahead_symbol = true; + } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) { + ts_stack_renumber_version(self->stack, reduction_version, version); + continue; + } else if (lookahead_symbol != 0) { + ts_stack_remove_version(self->stack, version); + } + + if (version == starting_version) { + version = version_count; + } else { + version++; + } + } + + return can_shift_lookahead_symbol; +} + +static bool ts_parser__recover_to_state( + t_parser *self, + StackVersion version, + unsigned depth, + t_state_id goal_state +) { + StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); + StackVersion previous_version = STACK_VERSION_NONE; + + for (unsigned i = 0; i < pop.size; i++) { + StackSlice slice = pop.contents[i]; + + if (slice.version == previous_version) { + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + array_erase(&pop, i--); + continue; + } + + if (ts_stack_state(self->stack, slice.version) != goal_state) { + ts_stack_halt(self->stack, slice.version); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + array_erase(&pop, i--); + continue; + } + + SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); + if (error_trees.size > 0) { + assert(error_trees.size == 1); + Subtree error_tree = error_trees.contents[0]; + uint32_t error_child_count = ts_subtree_child_count(error_tree); + if (error_child_count > 0) { + array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree)); + for (unsigned j = 0; j < error_child_count; j++) { + ts_subtree_retain(slice.subtrees.contents[j]); + } + } + ts_subtree_array_delete(&self->tree_pool, &error_trees); + } + + ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); + + if (slice.subtrees.size > 0) { + Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); + ts_stack_push(self->stack, slice.version, error, false, goal_state); + } else { + array_delete(&slice.subtrees); + } + + for (unsigned j = 0; j < self->trailing_extras.size; j++) { + Subtree tree = self->trailing_extras.contents[j]; + ts_stack_push(self->stack, slice.version, tree, false, goal_state); + } + + previous_version = slice.version; + } + + return previous_version != STACK_VERSION_NONE; +} + +static void ts_parser__recover( + t_parser *self, + StackVersion version, + Subtree lookahead +) { + bool did_recover = false; + unsigned previous_version_count = ts_stack_version_count(self->stack); + Length position = ts_stack_position(self->stack, version); + StackSummary *summary = ts_stack_get_summary(self->stack, version); + unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); + unsigned current_error_cost = ts_stack_error_cost(self->stack, version); + + // When the parser is in the error state, there are two strategies for recovering with a + // given lookahead token: + // 1. Find a previous state on the stack in which that lookahead token would be valid. Then, + // create a new stack version that is in that state again. This entails popping all of the + // subtrees that have been pushed onto the stack since that previous state, and wrapping + // them in an ERROR node. + // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and + // move on to the next lookahead token, remaining in the error state. + // + // First, try the strategy 1. Upon entering the error state, the parser recorded a summary + // of the previous parse states and their depths. Look at each state in the summary, to see + // if the current lookahead token would be valid in that state. + if (summary && !ts_subtree_is_error(lookahead)) { + for (unsigned i = 0; i < summary->size; i++) { + StackSummaryEntry entry = summary->contents[i]; + + if (entry.state == ERROR_STATE) continue; + if (entry.position.bytes == position.bytes) continue; + unsigned depth = entry.depth; + if (node_count_since_error > 0) depth++; + + // Do not recover in ways that create redundant stack versions. + bool would_merge = false; + for (unsigned j = 0; j < previous_version_count; j++) { + if ( + ts_stack_state(self->stack, j) == entry.state && + ts_stack_position(self->stack, j).bytes == position.bytes + ) { + would_merge = true; + break; + } + } + if (would_merge) continue; + + // Do not recover if the result would clearly be worse than some existing stack version. + unsigned new_cost = + current_error_cost + + entry.depth * ERROR_COST_PER_SKIPPED_TREE + + (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + + (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; + if (ts_parser__better_version_exists(self, version, false, new_cost)) break; + + // If the current lookahead token is valid in some previous state, recover to that state. + // Then stop looking for further recoveries. + if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) { + if (ts_parser__recover_to_state(self, version, depth, entry.state)) { + did_recover = true; + LOG("recover_to_previous state:%u, depth:%u", entry.state, depth); + LOG_STACK(); + break; + } + } + } + } + + // In the process of attempting to recover, some stack versions may have been created + // and subsequently halted. Remove those versions. + for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { + if (!ts_stack_is_active(self->stack, i)) { + ts_stack_remove_version(self->stack, i--); + } + } + + // If strategy 1 succeeded, a new stack version will have been created which is able to handle + // the current lookahead token. Now, in addition, try strategy 2 described above: skip the + // current lookahead token by wrapping it in an ERROR node. + + // Don't pursue this additional strategy if there are already too many stack versions. + if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } + + if ( + did_recover && + ts_subtree_has_external_scanner_state_change(lookahead) + ) { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } + + // If the parser is still in the error state at the end of the file, just wrap everything + // in an ERROR node and terminate. + if (ts_subtree_is_eof(lookahead)) { + LOG("recover_eof"); + SubtreeArray children = array_new(); + Subtree parent = ts_subtree_new_error_node(&children, false, self->language); + ts_stack_push(self->stack, version, parent, false, 1); + ts_parser__accept(self, version, lookahead); + return; + } + + // Do not recover if the result would clearly be worse than some existing stack version. + unsigned new_cost = + current_error_cost + ERROR_COST_PER_SKIPPED_TREE + + ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + + ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; + if (ts_parser__better_version_exists(self, version, false, new_cost)) { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } + + // If the current lookahead token is an extra token, mark it as extra. This means it won't + // be counted in error cost calculations. + unsigned n; + const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { + MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_extra(&mutable_lookahead, true); + lookahead = ts_subtree_from_mut(mutable_lookahead); + } + + // Wrap the lookahead token in an ERROR. + LOG("skip_token symbol:%s", TREE_NAME(lookahead)); + SubtreeArray children = array_new(); + array_reserve(&children, 1); + array_push(&children, lookahead); + MutableSubtree error_repeat = ts_subtree_new_node( + ts_builtin_sym_error_repeat, + &children, + 0, + self->language + ); + + // If other tokens have already been skipped, so there is already an ERROR at the top of the + // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger + // ERROR. + if (node_count_since_error > 0) { + StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); + + // TODO: Figure out how to make this condition occur. + // See https://github.com/atom/atom/issues/18450#issuecomment-439579778 + // If multiple stack versions have merged at this point, just pick one of the errors + // arbitrarily and discard the rest. + if (pop.size > 1) { + for (unsigned i = 1; i < pop.size; i++) { + ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees); + } + while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) { + ts_stack_remove_version(self->stack, pop.contents[0].version + 1); + } + } + + ts_stack_renumber_version(self->stack, pop.contents[0].version, version); + array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat)); + error_repeat = ts_subtree_new_node( + ts_builtin_sym_error_repeat, + &pop.contents[0].subtrees, + 0, + self->language + ); + } + + // Push the new ERROR onto the stack. + ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE); + if (ts_subtree_has_external_tokens(lookahead)) { + ts_stack_set_last_external_token( + self->stack, version, ts_subtree_last_external_token(lookahead) + ); + } +} + +static void ts_parser__handle_error( + t_parser *self, + StackVersion version, + Subtree lookahead +) { + uint32_t previous_version_count = ts_stack_version_count(self->stack); + + // Perform any reductions that can happen in this state, regardless of the lookahead. After + // skipping one or more invalid tokens, the parser might find a token that would have allowed + // a reduction to take place. + ts_parser__do_all_potential_reductions(self, version, 0); + uint32_t version_count = ts_stack_version_count(self->stack); + Length position = ts_stack_position(self->stack, version); + + // Push a discontinuity onto the stack. Merge all of the stack versions that + // were created in the previous step. + bool did_insert_missing_token = false; + for (StackVersion v = version; v < version_count;) { + if (!did_insert_missing_token) { + t_state_id state = ts_stack_state(self->stack, v); + for ( + t_symbol missing_symbol = 1; + missing_symbol < (uint16_t)self->language->token_count; + missing_symbol++ + ) { + t_state_id state_after_missing_symbol = ts_language_next_state( + self->language, state, missing_symbol + ); + if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) { + continue; + } + + if (ts_language_has_reduce_action( + self->language, + state_after_missing_symbol, + ts_subtree_leaf_symbol(lookahead) + )) { + // In case the parser is currently outside of any included range, the lexer will + // snap to the beginning of the next included range. The missing token's padding + // must be assigned to position it within the next included range. + ts_lexer_reset(&self->lexer, position); + ts_lexer_mark_end(&self->lexer); + Length padding = length_sub(self->lexer.token_end_position, position); + uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead); + + StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); + Subtree missing_tree = ts_subtree_new_missing_leaf( + &self->tree_pool, missing_symbol, + padding, lookahead_bytes, + self->language + ); + ts_stack_push( + self->stack, version_with_missing_tree, + missing_tree, false, + state_after_missing_symbol + ); + + if (ts_parser__do_all_potential_reductions( + self, version_with_missing_tree, + ts_subtree_leaf_symbol(lookahead) + )) { + LOG( + "recover_with_missing symbol:%s, state:%u", + SYM_NAME(missing_symbol), + ts_stack_state(self->stack, version_with_missing_tree) + ); + did_insert_missing_token = true; + break; + } + } + } + } + + ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); + v = (v == version) ? previous_version_count : v + 1; + } + + for (unsigned i = previous_version_count; i < version_count; i++) { + bool did_merge = ts_stack_merge(self->stack, version, previous_version_count); + assert(did_merge); + (void)did_merge; // fix warning/error with clang -Os + } + + ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); + + // Begin recovery with the current lookahead node, rather than waiting for the + // next turn of the parse loop. This ensures that the tree accounts for the + // current lookahead token's "lookahead bytes" value, which describes how far + // the lexer needed to look ahead beyond the content of the token in order to + // recognize it. + if (ts_subtree_child_count(lookahead) > 0) { + ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); + } + ts_parser__recover(self, version, lookahead); + + LOG_STACK(); +} + +static bool ts_parser__advance( + t_parser *self, + StackVersion version, + bool allow_node_reuse +) { + t_state_id state = ts_stack_state(self->stack, version); + uint32_t position = ts_stack_position(self->stack, version).bytes; + Subtree last_external_token = ts_stack_last_external_token(self->stack, version); + + bool did_reuse = true; + Subtree lookahead = NULL_SUBTREE; + TableEntry table_entry = {.action_count = 0}; + + // If possible, reuse a node from the previous syntax tree. + if (allow_node_reuse) { + lookahead = ts_parser__reuse_node( + self, version, &state, position, last_external_token, &table_entry + ); + } + + // If no node from the previous syntax tree could be reused, then try to + // reuse the token previously returned by the lexer. + if (!lookahead.ptr) { + did_reuse = false; + lookahead = ts_parser__get_cached_token( + self, state, position, last_external_token, &table_entry + ); + } + + bool needs_lex = !lookahead.ptr; + for (;;) { + // Otherwise, re-run the lexer. + if (needs_lex) { + needs_lex = false; + lookahead = ts_parser__lex(self, version, state); + if (self->has_scanner_error) return false; + + if (lookahead.ptr) { + ts_parser__set_cached_token(self, position, last_external_token, lookahead); + ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); + } + + // When parsing a non-terminal extra, a null lookahead indicates the + // end of the rule. The reduction is stored in the EOF table entry. + // After the reduction, the lexer needs to be run again. + else { + ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); + } + } + + // If a cancellation flag or a timeout was provided, then check every + // time a fixed number of parse actions has been processed. + if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { + self->operation_count = 0; + } + if ( + self->operation_count == 0 && + ((self->cancellation_flag && atomic_load(self->cancellation_flag)) || + (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock))) + ) { + if (lookahead.ptr) { + ts_subtree_release(&self->tree_pool, lookahead); + } + return false; + } + + // Process each parse action for the current lookahead token in + // the current state. If there are multiple actions, then this is + // an ambiguous state. REDUCE actions always create a new stack + // version, whereas SHIFT actions update the existing stack version + // and terminate this loop. + StackVersion last_reduction_version = STACK_VERSION_NONE; + for (uint32_t i = 0; i < table_entry.action_count; i++) { + TSParseAction action = table_entry.actions[i]; + + switch (action.type) { + case TSParseActionTypeShift: { + if (action.shift.repetition) break; + t_state_id next_state; + if (action.shift.extra) { + next_state = state; + LOG("shift_extra"); + } else { + next_state = action.shift.state; + LOG("shift state:%u", next_state); + } + + if (ts_subtree_child_count(lookahead) > 0) { + ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node); + next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); + } + + ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); + if (did_reuse) reusable_node_advance(&self->reusable_node); + return true; + } + + case TSParseActionTypeReduce: { + bool is_fragile = table_entry.action_count > 1; + bool end_of_non_terminal_extra = lookahead.ptr == NULL; + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); + StackVersion reduction_version = ts_parser__reduce( + self, version, action.reduce.symbol, action.reduce.child_count, + action.reduce.dynamic_precedence, action.reduce.production_id, + is_fragile, end_of_non_terminal_extra + ); + if (reduction_version != STACK_VERSION_NONE) { + last_reduction_version = reduction_version; + } + break; + } + + case TSParseActionTypeAccept: { + LOG("accept"); + ts_parser__accept(self, version, lookahead); + return true; + } + + case TSParseActionTypeRecover: { + if (ts_subtree_child_count(lookahead) > 0) { + ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); + } + + ts_parser__recover(self, version, lookahead); + if (did_reuse) reusable_node_advance(&self->reusable_node); + return true; + } + } + } + + // If a reduction was performed, then replace the current stack version + // with one of the stack versions created by a reduction, and continue + // processing this version of the stack with the same lookahead symbol. + if (last_reduction_version != STACK_VERSION_NONE) { + ts_stack_renumber_version(self->stack, last_reduction_version, version); + LOG_STACK(); + state = ts_stack_state(self->stack, version); + + // At the end of a non-terminal extra rule, the lexer will return a + // null subtree, because the parser needs to perform a fixed reduction + // regardless of the lookahead node. After performing that reduction, + // (and completing the non-terminal extra rule) run the lexer again based + // on the current parse state. + if (!lookahead.ptr) { + needs_lex = true; + } else { + ts_language_table_entry( + self->language, + state, + ts_subtree_leaf_symbol(lookahead), + &table_entry + ); + } + + continue; + } + + // A non-terminal extra rule was reduced and merged into an existing + // stack version. This version can be discarded. + if (!lookahead.ptr) { + ts_stack_halt(self->stack, version); + return true; + } + + // If there were no parse actions for the current lookahead token, then + // it is not valid in this state. If the current lookahead token is a + // keyword, then switch to treating it as the normal word token if that + // token is valid in this state. + if ( + ts_subtree_is_keyword(lookahead) && + ts_subtree_symbol(lookahead) != self->language->keyword_capture_token + ) { + ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry); + if (table_entry.action_count > 0) { + LOG( + "switch from_keyword:%s, to_word_token:%s", + TREE_NAME(lookahead), + SYM_NAME(self->language->keyword_capture_token) + ); + + MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language); + lookahead = ts_subtree_from_mut(mutable_lookahead); + continue; + } + } + + // If the current lookahead token is not valid and the parser is + // already in the error state, restart the error recovery process. + // TODO - can this be unified with the other `RECOVER` case above? + if (state == ERROR_STATE) { + ts_parser__recover(self, version, lookahead); + return true; + } + + // If the current lookahead token is not valid and the previous + // subtree on the stack was reused from an old tree, it isn't actually + // valid to reuse it. Remove it from the stack, and in its place, + // push each of its children. Then try again to process the current + // lookahead. + if (ts_parser__breakdown_top_of_stack(self, version)) { + state = ts_stack_state(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + needs_lex = true; + continue; + } + + // At this point, the current lookahead token is definitely not valid + // for this parse stack version. Mark this version as paused and continue + // processing any other stack versions that might exist. If some other + // version advances successfully, then this version can simply be removed. + // But if all versions end up paused, then error recovery is needed. + LOG("detect_error"); + ts_stack_pause(self->stack, version, lookahead); + return true; + } +} + +static unsigned ts_parser__condense_stack(t_parser *self) { + bool made_changes = false; + unsigned min_error_cost = UINT_MAX; + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { + // Prune any versions that have been marked for removal. + if (ts_stack_is_halted(self->stack, i)) { + ts_stack_remove_version(self->stack, i); + i--; + continue; + } + + // Keep track of the minimum error cost of any stack version so + // that it can be returned. + ErrorStatus status_i = ts_parser__version_status(self, i); + if (!status_i.is_in_error && status_i.cost < min_error_cost) { + min_error_cost = status_i.cost; + } + + // Examine each pair of stack versions, removing any versions that + // are clearly worse than another version. Ensure that the versions + // are ordered from most promising to least promising. + for (StackVersion j = 0; j < i; j++) { + ErrorStatus status_j = ts_parser__version_status(self, j); + + switch (ts_parser__compare_versions(self, status_j, status_i)) { + case ErrorComparisonTakeLeft: + made_changes = true; + ts_stack_remove_version(self->stack, i); + i--; + j = i; + break; + + case ErrorComparisonPreferLeft: + case ErrorComparisonNone: + if (ts_stack_merge(self->stack, j, i)) { + made_changes = true; + i--; + j = i; + } + break; + + case ErrorComparisonPreferRight: + made_changes = true; + if (ts_stack_merge(self->stack, j, i)) { + i--; + j = i; + } else { + ts_stack_swap_versions(self->stack, i, j); + } + break; + + case ErrorComparisonTakeRight: + made_changes = true; + ts_stack_remove_version(self->stack, j); + i--; + j--; + break; + } + } + } + + // Enforce a hard upper bound on the number of stack versions by + // discarding the least promising versions. + while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); + made_changes = true; + } + + // If the best-performing stack version is currently paused, or all + // versions are paused, then resume the best paused version and begin + // the error recovery process. Otherwise, remove the paused versions. + if (ts_stack_version_count(self->stack) > 0) { + bool has_unpaused_version = false; + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { + if (ts_stack_is_paused(self->stack, i)) { + if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) { + LOG("resume version:%u", i); + min_error_cost = ts_stack_error_cost(self->stack, i); + Subtree lookahead = ts_stack_resume(self->stack, i); + ts_parser__handle_error(self, i, lookahead); + has_unpaused_version = true; + } else { + ts_stack_remove_version(self->stack, i); + i--; + n--; + } + } else { + has_unpaused_version = true; + } + } + } + + if (made_changes) { + LOG("condense"); + LOG_STACK(); + } + + return min_error_cost; +} + +static bool ts_parser_has_outstanding_parse(t_parser *self) { + return ( + self->external_scanner_payload || + ts_stack_state(self->stack, 0) != 1 || + ts_stack_node_count_since_error(self->stack, 0) != 0 + ); +} + +// Parser - Public + +t_parser *ts_parser_new(void) { + t_parser *self = ts_calloc(1, sizeof(t_parser)); + ts_lexer_init(&self->lexer); + array_init(&self->reduce_actions); + array_reserve(&self->reduce_actions, 4); + self->tree_pool = ts_subtree_pool_new(32); + self->stack = ts_stack_new(&self->tree_pool); + self->finished_tree = NULL_SUBTREE; + self->reusable_node = reusable_node_new(); + self->dot_graph_file = NULL; + self->cancellation_flag = NULL; + self->timeout_duration = 0; + self->language = NULL; + self->has_scanner_error = false; + self->external_scanner_payload = NULL; + self->end_clock = clock_null(); + self->operation_count = 0; + self->old_tree = NULL_SUBTREE; + self->included_range_differences = (TSRangeArray) array_new(); + self->included_range_difference_index = 0; + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + return self; +} + +void ts_parser_delete(t_parser *self) { + if (!self) return; + + ts_parser_set_language(self, NULL); + ts_stack_delete(self->stack); + if (self->reduce_actions.contents) { + array_delete(&self->reduce_actions); + } + if (self->included_range_differences.contents) { + array_delete(&self->included_range_differences); + } + if (self->old_tree.ptr) { + ts_subtree_release(&self->tree_pool, self->old_tree); + self->old_tree = NULL_SUBTREE; + } + ts_lexer_delete(&self->lexer); + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + ts_subtree_pool_delete(&self->tree_pool); + reusable_node_delete(&self->reusable_node); + array_delete(&self->trailing_extras); + array_delete(&self->trailing_extras2); + array_delete(&self->scratch_trees); + ts_free(self); +} + +const t_language *ts_parser_language(const t_parser *self) { + return self->language; +} + +bool ts_parser_set_language(t_parser *self, const t_language *language) { + ts_parser_reset(self); + ts_language_delete(self->language); + self->language = NULL; + + if (language) { + if ( + language->version > TREE_SITTER_LANGUAGE_VERSION || + language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + ) return false; + + + } + + self->language = ts_language_copy(language); + return true; +} + +t_logger ts_parser_logger(const t_parser *self) { + return self->lexer.logger; +} + +void ts_parser_set_logger(t_parser *self, t_logger logger) { + self->lexer.logger = logger; +} + +void ts_parser_print_dot_graphs(t_parser *self, int fd) { + if (self->dot_graph_file) { + fclose(self->dot_graph_file); + } + + if (fd >= 0) { + #ifdef _WIN32 + self->dot_graph_file = _fdopen(fd, "a"); + #else + self->dot_graph_file = fdopen(fd, "a"); + #endif + } else { + self->dot_graph_file = NULL; + } +} + +const size_t *ts_parser_cancellation_flag(const t_parser *self) { + return (const size_t *)self->cancellation_flag; +} + +void ts_parser_set_cancellation_flag(t_parser *self, const size_t *flag) { + self->cancellation_flag = (const volatile size_t *)flag; +} + +uint64_t ts_parser_timeout_micros(const t_parser *self) { + return duration_to_micros(self->timeout_duration); +} + +void ts_parser_set_timeout_micros(t_parser *self, uint64_t timeout_micros) { + self->timeout_duration = duration_from_micros(timeout_micros); +} + +bool ts_parser_set_included_ranges( + t_parser *self, + const t_range *ranges, + uint32_t count +) { + return ts_lexer_set_included_ranges(&self->lexer, ranges, count); +} + +const t_range *ts_parser_included_ranges(const t_parser *self, uint32_t *count) { + return ts_lexer_included_ranges(&self->lexer, count); +} + +void ts_parser_reset(t_parser *self) { + ts_parser__external_scanner_destroy(self); + + if (self->old_tree.ptr) { + ts_subtree_release(&self->tree_pool, self->old_tree); + self->old_tree = NULL_SUBTREE; + } + + reusable_node_clear(&self->reusable_node); + ts_lexer_reset(&self->lexer, length_zero()); + ts_stack_clear(self->stack); + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + if (self->finished_tree.ptr) { + ts_subtree_release(&self->tree_pool, self->finished_tree); + self->finished_tree = NULL_SUBTREE; + } + self->accept_count = 0; + self->has_scanner_error = false; +} + +t_tree *ts_parser_parse( + t_parser *self, + const t_tree *old_tree, + t_input input +) { + t_tree *result = NULL; + if (!self->language || !input.read) return NULL; + + + + ts_lexer_set_input(&self->lexer, input); + array_clear(&self->included_range_differences); + self->included_range_difference_index = 0; + + if (ts_parser_has_outstanding_parse(self)) { + LOG("resume_parsing"); + } else { + ts_parser__external_scanner_create(self); + if (self->has_scanner_error) goto exit; + + if (old_tree) { + ts_subtree_retain(old_tree->root); + self->old_tree = old_tree->root; + ts_range_array_get_changed_ranges( + old_tree->included_ranges, old_tree->included_range_count, + self->lexer.included_ranges, self->lexer.included_range_count, + &self->included_range_differences + ); + reusable_node_reset(&self->reusable_node, old_tree->root); + LOG("parse_after_edit"); + LOG_TREE(self->old_tree); + for (unsigned i = 0; i < self->included_range_differences.size; i++) { + t_range *range = &self->included_range_differences.contents[i]; + LOG("different_included_range %u - %u", range->start_byte, range->end_byte); + } + } else { + reusable_node_clear(&self->reusable_node); + LOG("new_parse"); + } + } + + self->operation_count = 0; + if (self->timeout_duration) { + self->end_clock = clock_after(clock_now(), self->timeout_duration); + } else { + self->end_clock = clock_null(); + } + + uint32_t position = 0, last_position = 0, version_count = 0; + do { + for ( + StackVersion version = 0; + version_count = ts_stack_version_count(self->stack), + version < version_count; + version++ + ) { + bool allow_node_reuse = version_count == 1; + while (ts_stack_is_active(self->stack, version)) { + LOG( + "process version:%u, version_count:%u, state:%d, row:%u, col:%u", + version, + ts_stack_version_count(self->stack), + ts_stack_state(self->stack, version), + ts_stack_position(self->stack, version).extent.row, + ts_stack_position(self->stack, version).extent.column + ); + + if (!ts_parser__advance(self, version, allow_node_reuse)) { + if (self->has_scanner_error) goto exit; + return NULL; + } + + LOG_STACK(); + + position = ts_stack_position(self->stack, version).bytes; + if (position > last_position || (version > 0 && position == last_position)) { + last_position = position; + break; + } + } + } + + // After advancing each version of the stack, re-sort the versions by their cost, + // removing any versions that are no longer worth pursuing. + unsigned min_error_cost = ts_parser__condense_stack(self); + + // If there's already a finished parse tree that's better than any in-progress version, + // then terminate parsing. Clear the parse stack to remove any extra references to subtrees + // within the finished tree, ensuring that these subtrees can be safely mutated in-place + // for rebalancing. + if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) { + ts_stack_clear(self->stack); + break; + } + + while (self->included_range_difference_index < self->included_range_differences.size) { + t_range *range = &self->included_range_differences.contents[self->included_range_difference_index]; + if (range->end_byte <= position) { + self->included_range_difference_index++; + } else { + break; + } + } + } while (version_count != 0); + + assert(self->finished_tree.ptr); + ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); + LOG("done"); + LOG_TREE(self->finished_tree); + + result = ts_tree_new( + self->finished_tree, + self->language, + self->lexer.included_ranges, + self->lexer.included_range_count + ); + self->finished_tree = NULL_SUBTREE; + +exit: + ts_parser_reset(self); + return result; +} + +t_tree *ts_parser_parse_string( + t_parser *self, + const t_tree *old_tree, + const char *string, + uint32_t length +) { + return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8); +} + +t_tree *ts_parser_parse_string_encoding( + t_parser *self, + const t_tree *old_tree, + const char *string, + uint32_t length, + t_input_encoding encoding +) { + TSStringInput input = {string, length}; + return ts_parser_parse(self, old_tree, (t_input) { + &input, + ts_string_input_read, + encoding, + }); +} + +#undef LOG +#include "src/api.h" +#include "src/alloc.h" +#include "src/array.h" +#include "src/language.h" +#include "src/point.h" +#include "src/tree_cursor.h" +// #include "src/unicode.h" +#include + +// #define DEBUG_ANALYZE_QUERY +// #define DEBUG_EXECUTE_QUERY + +#define MAX_STEP_CAPTURE_COUNT 3 +#define MAX_NEGATED_FIELD_COUNT 8 +#define MAX_STATE_PREDECESSOR_COUNT 256 +#define MAX_ANALYSIS_STATE_DEPTH 8 +#define MAX_ANALYSIS_ITERATION_COUNT 256 + +/* + * Stream - A sequence of unicode characters derived from a UTF8 string. + * This struct is used in parsing queries from S-expressions. + */ +typedef struct { + const char *input; + const char *start; + const char *end; + int32_t next; + uint8_t next_size; +} Stream; + +/* + * QueryStep - A step in the process of matching a query. Each node within + * a query S-expression corresponds to one of these steps. An entire pattern + * is represented as a sequence of these steps. The basic properties of a + * node are represented by these fields: + * - `symbol` - The grammar symbol to match. A zero value represents the + * wildcard symbol, '_'. + * - `field` - The field name to match. A zero value means that a field name + * was not specified. + * - `capture_ids` - An array of integers representing the names of captures + * associated with this node in the pattern, terminated by a `NONE` value. + * - `depth` - The depth where this node occurs in the pattern. The root node + * of the pattern has depth zero. + * - `negated_field_list_id` - An id representing a set of fields that must + * not be present on a node matching this step. + * + * Steps have some additional fields in order to handle the `.` (or "anchor") operator, + * which forbids additional child nodes: + * - `is_immediate` - Indicates that the node matching this step cannot be preceded + * by other sibling nodes that weren't specified in the pattern. + * - `is_last_child` - Indicates that the node matching this step cannot have any + * subsequent named siblings. + * + * For simple patterns, steps are matched in sequential order. But in order to + * handle alternative/repeated/optional sub-patterns, query steps are not always + * structured as a linear sequence; they sometimes need to split and merge. This + * is done using the following fields: + * - `alternative_index` - The index of a different query step that serves as + * an alternative to this step. A `NONE` value represents no alternative. + * When a query state reaches a step with an alternative index, the state + * is duplicated, with one copy remaining at the original step, and one copy + * moving to the alternative step. The alternative may have its own alternative + * step, so this splitting is an iterative process. + * - `is_dead_end` - Indicates that this state cannot be passed directly, and + * exists only in order to redirect to an alternative index, with no splitting. + * - `is_pass_through` - Indicates that state has no matching logic of its own, + * and exists only to split a state. One copy of the state advances immediately + * to the next step, and one moves to the alternative step. + * - `alternative_is_immediate` - Indicates that this step's alternative step + * should be treated as if `is_immediate` is true. + * + * Steps also store some derived state that summarizes how they relate to other + * steps within the same pattern. This is used to optimize the matching process: + * - `contains_captures` - Indicates that this step or one of its child steps + * has a non-empty `capture_ids` list. + * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then + * it and all of its subsequent sibling steps within the same parent pattern + * are guaranteed to match. + * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but + * for the entire top-level pattern. When iterating through a query's + * captures using `ts_query_cursor_next_capture`, this field is used to + * detect that a capture can safely be returned from a match that has not + * even completed yet. + */ +typedef struct { + t_symbol symbol; + t_symbol supertype_symbol; + t_field_id field; + uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; + uint16_t depth; + uint16_t alternative_index; + uint16_t negated_field_list_id; + bool is_named: 1; + bool is_immediate: 1; + bool is_last_child: 1; + bool is_pass_through: 1; + bool is_dead_end: 1; + bool alternative_is_immediate: 1; + bool contains_captures: 1; + bool root_pattern_guaranteed: 1; + bool parent_pattern_guaranteed: 1; +} QueryStep; + +/* + * Slice - A slice of an external array. Within a query, capture names, + * literal string values, and predicate step information are stored in three + * contiguous arrays. Individual captures, string values, and predicates are + * represented as slices of these three arrays. + */ +typedef struct { + uint32_t offset; + uint32_t length; +} Slice; + +/* + * SymbolTable - a two-way mapping of strings to ids. + */ +typedef struct { + Array(char) characters; + Array(Slice) slices; +} SymbolTable; + +/** + * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. + */ +typedef Array(uint8_t) CaptureQuantifiers; + +/* + * PatternEntry - Information about the starting point for matching a particular + * pattern. These entries are stored in a 'pattern map' - a sorted array that + * makes it possible to efficiently lookup patterns based on the symbol for their + * first step. The entry consists of the following fields: + * - `pattern_index` - the index of the pattern within the query + * - `step_index` - the index of the pattern's first step in the shared `steps` array + * - `is_rooted` - whether or not the pattern has a single root node. This property + * affects decisions about whether or not to start the pattern for nodes outside + * of a QueryCursor's range restriction. + */ +typedef struct { + uint16_t step_index; + uint16_t pattern_index; + bool is_rooted; +} PatternEntry; + +typedef struct { + Slice steps; + Slice predicate_steps; + uint32_t start_byte; + bool is_non_local; +} QueryPattern; + +typedef struct { + uint32_t byte_offset; + uint16_t step_index; +} StepOffset; + +/* + * QueryState - The state of an in-progress match of a particular pattern + * in a query. While executing, a `TSQueryCursor` must keep track of a number + * of possible in-progress matches. Each of those possible matches is + * represented as one of these states. Fields: + * - `id` - A numeric id that is exposed to the public API. This allows the + * caller to remove a given match, preventing any more of its captures + * from being returned. + * - `start_depth` - The depth in the tree where the first step of the state's + * pattern was matched. + * - `pattern_index` - The pattern that the state is matching. + * - `consumed_capture_count` - The number of captures from this match that + * have already been returned. + * - `capture_list_id` - A numeric id that can be used to retrieve the state's + * list of captures from the `CaptureListPool`. + * - `seeking_immediate_match` - A flag that indicates that the state's next + * step must be matched by the very next sibling. This is used when + * processing repetitions. + * - `has_in_progress_alternatives` - A flag that indicates that there is are + * other states that have the same captures as this state, but are at + * different steps in their pattern. This means that in order to obey the + * 'longest-match' rule, this state should not be returned as a match until + * it is clear that there can be no other alternative match with more captures. + */ +typedef struct { + uint32_t id; + uint32_t capture_list_id; + uint16_t start_depth; + uint16_t step_index; + uint16_t pattern_index; + uint16_t consumed_capture_count: 12; + bool seeking_immediate_match: 1; + bool has_in_progress_alternatives: 1; + bool dead: 1; + bool needs_parent: 1; +} QueryState; + +typedef Array(t_query_capture) CaptureList; + +/* + * CaptureListPool - A collection of *lists* of captures. Each query state needs + * to maintain its own list of captures. To avoid repeated allocations, this struct + * maintains a fixed set of capture lists, and keeps track of which ones are + * currently in use by a query state. + */ +typedef struct { + Array(CaptureList) list; + CaptureList empty_list; + // The maximum number of capture lists that we are allowed to allocate. We + // never allow `list` to allocate more entries than this, dropping pending + // matches if needed to stay under the limit. + uint32_t max_capture_list_count; + // The number of capture lists allocated in `list` that are not currently in + // use. We reuse those existing-but-unused capture lists before trying to + // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture + // list's length to indicate that it's not in use. + uint32_t free_capture_list_count; +} CaptureListPool; + +/* + * AnalysisState - The state needed for walking the parse table when analyzing + * a query pattern, to determine at which steps the pattern might fail to match. + */ +typedef struct { + t_state_id parse_state; + t_symbol parent_symbol; + uint16_t child_index; + t_field_id field_id: 15; + bool done: 1; +} AnalysisStateEntry; + +typedef struct { + AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; + uint16_t depth; + uint16_t step_index; + t_symbol root_symbol; +} AnalysisState; + +typedef Array(AnalysisState *) AnalysisStateSet; + +typedef struct { + AnalysisStateSet states; + AnalysisStateSet next_states; + AnalysisStateSet deeper_states; + AnalysisStateSet state_pool; + Array(uint16_t) final_step_indices; + Array(t_symbol) finished_parent_symbols; + bool did_abort; +} QueryAnalysis; + +/* + * AnalysisSubgraph - A subset of the states in the parse table that are used + * in constructing nodes with a certain symbol. Each state is accompanied by + * some information about the possible node that could be produced in + * downstream states. + */ +typedef struct { + t_state_id state; + uint16_t production_id; + uint8_t child_index: 7; + bool done: 1; +} AnalysisSubgraphNode; + +typedef struct { + t_symbol symbol; + Array(t_state_id) start_states; + Array(AnalysisSubgraphNode) nodes; +} AnalysisSubgraph; + +typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; + +/* + * StatePredecessorMap - A map that stores the predecessors of each parse state. + * This is used during query analysis to determine which parse states can lead + * to which reduce actions. + */ +typedef struct { + t_state_id *contents; +} StatePredecessorMap; + +/* + * TSQuery - A tree query, compiled from a string of S-expressions. The query + * itself is immutable. The mutable state used in the process of executing the + * query is stored in a `TSQueryCursor`. + */ +struct t_query { + SymbolTable captures; + SymbolTable predicate_values; + Array(CaptureQuantifiers) capture_quantifiers; + Array(QueryStep) steps; + Array(PatternEntry) pattern_map; + Array(t_query_predicate_step) predicate_steps; + Array(QueryPattern) patterns; + Array(StepOffset) step_offsets; + Array(t_field_id) negated_fields; + Array(char) string_buffer; + Array(t_symbol) repeat_symbols_with_rootless_patterns; + const t_language *language; + uint16_t wildcard_root_pattern_count; +}; + +/* + * TSQueryCursor - A stateful struct used to execute a query on a tree. + */ +struct t_query_cursor { + const t_query *query; + t_tree_cursor cursor; + Array(QueryState) states; + Array(QueryState) finished_states; + CaptureListPool capture_list_pool; + uint32_t depth; + uint32_t max_start_depth; + uint32_t start_byte; + uint32_t end_byte; + t_point start_point; + t_point end_point; + uint32_t next_state_id; + bool on_visible_node; + bool ascending; + bool halted; + bool did_exceed_match_limit; +}; + +static const t_query_error PARENT_DONE = -1; +static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; +static const uint16_t NONE = UINT16_MAX; +static const t_symbol WILDCARD_SYMBOL = 0; + +/********** + * Stream + **********/ + +// Advance to the next unicode code point in the stream. +static bool stream_advance(Stream *self) { + self->input += self->next_size; + if (self->input < self->end) { + uint32_t size = ts_decode_ascii( + (const uint8_t *)self->input, + (uint32_t)(self->end - self->input), + &self->next + ); + if (size > 0) { + self->next_size = size; + return true; + } + } else { + self->next_size = 0; + self->next = '\0'; + } + return false; +} + +// Reset the stream to the given input position, represented as a pointer +// into the input string. +static void stream_reset(Stream *self, const char *input) { + self->input = input; + self->next_size = 0; + stream_advance(self); +} + +static Stream stream_new(const char *string, uint32_t length) { + Stream self = { + .next = 0, + .input = string, + .start = string, + .end = string + length, + }; + stream_advance(&self); + return self; +} + +static void stream_skip_whitespace(Stream *self) { + for (;;) { + if (iswspace(self->next)) { + stream_advance(self); + } else if (self->next == ';') { + // skip over comments + stream_advance(self); + while (self->next && self->next != '\n') { + if (!stream_advance(self)) break; + } + } else { + break; + } + } +} + +static bool stream_is_ident_start(Stream *self) { + return iswalnum(self->next) || self->next == '_' || self->next == '-'; +} + +static void stream_scan_identifier(Stream *stream) { + do { + stream_advance(stream); + } while ( + iswalnum(stream->next) || + stream->next == '_' || + stream->next == '-' || + stream->next == '.' || + stream->next == '?' || + stream->next == '!' + ); +} + +static uint32_t stream_offset(Stream *self) { + return (uint32_t)(self->input - self->start); +} + +/****************** + * CaptureListPool + ******************/ + +static CaptureListPool capture_list_pool_new(void) { + return (CaptureListPool) { + .list = array_new(), + .empty_list = array_new(), + .max_capture_list_count = UINT32_MAX, + .free_capture_list_count = 0, + }; +} + +static void capture_list_pool_reset(CaptureListPool *self) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { + // This invalid size means that the list is not in use. + self->list.contents[i].size = UINT32_MAX; + } + self->free_capture_list_count = self->list.size; +} + +static void capture_list_pool_delete(CaptureListPool *self) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { + array_delete(&self->list.contents[i]); + } + array_delete(&self->list); +} + +static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { + if (id >= self->list.size) return &self->empty_list; + return &self->list.contents[id]; +} + +static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { + assert(id < self->list.size); + return &self->list.contents[id]; +} + +static bool capture_list_pool_is_empty(const CaptureListPool *self) { + // The capture list pool is empty if all allocated lists are in use, and we + // have reached the maximum allowed number of allocated lists. + return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; +} + +static uint16_t capture_list_pool_acquire(CaptureListPool *self) { + // First see if any already allocated capture list is currently unused. + if (self->free_capture_list_count > 0) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { + if (self->list.contents[i].size == UINT32_MAX) { + array_clear(&self->list.contents[i]); + self->free_capture_list_count--; + return i; + } + } + } + + // Otherwise allocate and initialize a new capture list, as long as that + // doesn't put us over the requested maximum. + uint32_t i = self->list.size; + if (i >= self->max_capture_list_count) { + return NONE; + } + CaptureList list; + array_init(&list); + array_push(&self->list, list); + return i; +} + +static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { + if (id >= self->list.size) return; + self->list.contents[id].size = UINT32_MAX; + self->free_capture_list_count++; +} + +/************** + * Quantifiers + **************/ + +static t_quantifier quantifier_mul( + t_quantifier left, + t_quantifier right +) { + switch (left) + { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierZeroOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierOne: + return right; + case TSQuantifierOneOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +static t_quantifier quantifier_join( + t_quantifier left, + t_quantifier right +) { + switch (left) + { + case TSQuantifierZero: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + break; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + break; + }; + break; + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + return TSQuantifierOne; + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOneOrMore: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +static t_quantifier quantifier_add( + t_quantifier left, + t_quantifier right +) { + switch (left) + { + case TSQuantifierZero: + return right; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierZeroOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZeroOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierOne; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +// Create new capture quantifiers structure +static CaptureQuantifiers capture_quantifiers_new(void) { + return (CaptureQuantifiers) array_new(); +} + +// Delete capture quantifiers structure +static void capture_quantifiers_delete( + CaptureQuantifiers *self +) { + array_delete(self); +} + +// Clear capture quantifiers structure +static void capture_quantifiers_clear( + CaptureQuantifiers *self +) { + array_clear(self); +} + +// Replace capture quantifiers with the given quantifiers +static void capture_quantifiers_replace( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + array_clear(self); + array_push_all(self, quantifiers); +} + +// Return capture quantifier for the given capture id +static t_quantifier capture_quantifier_for_id( + const CaptureQuantifiers *self, + uint16_t id +) { + return (self->size <= id) ? TSQuantifierZero : (t_quantifier) *array_get(self, id); +} + +// Add the given quantifier to the current value for id +static void capture_quantifiers_add_for_id( + CaptureQuantifiers *self, + uint16_t id, + t_quantifier quantifier +) { + if (self->size <= id) { + array_grow_by(self, id + 1 - self->size); + } + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_add((t_quantifier) *own_quantifier, quantifier); +} + +// Point-wise add the given quantifiers to the current values +static void capture_quantifiers_add_all( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + if (self->size < quantifiers->size) { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_add((t_quantifier) *own_quantifier, (t_quantifier) *quantifier); + } +} + +// Join the given quantifier with the current values +static void capture_quantifiers_mul( + CaptureQuantifiers *self, + t_quantifier quantifier +) { + for (uint16_t id = 0; id < (uint16_t)self->size; id++) { + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_mul((t_quantifier) *own_quantifier, quantifier); + } +} + +// Point-wise join the quantifiers from a list of alternatives with the current values +static void capture_quantifiers_join_all( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + if (self->size < quantifiers->size) { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint32_t id = 0; id < quantifiers->size; id++) { + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_join((t_quantifier) *own_quantifier, (t_quantifier) *quantifier); + } + for (uint32_t id = quantifiers->size; id < self->size; id++) { + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_join((t_quantifier) *own_quantifier, TSQuantifierZero); + } +} + +/************** + * SymbolTable + **************/ + +static SymbolTable symbol_table_new(void) { + return (SymbolTable) { + .characters = array_new(), + .slices = array_new(), + }; +} + +static void symbol_table_delete(SymbolTable *self) { + array_delete(&self->characters); + array_delete(&self->slices); +} + +static int symbol_table_id_for_name( + const SymbolTable *self, + const char *name, + uint32_t length +) { + for (unsigned i = 0; i < self->slices.size; i++) { + Slice slice = self->slices.contents[i]; + if ( + slice.length == length && + !strncmp(&self->characters.contents[slice.offset], name, length) + ) return i; + } + return -1; +} + +static const char *symbol_table_name_for_id( + const SymbolTable *self, + uint16_t id, + uint32_t *length +) { + Slice slice = self->slices.contents[id]; + *length = slice.length; + return &self->characters.contents[slice.offset]; +} + +static uint16_t symbol_table_insert_name( + SymbolTable *self, + const char *name, + uint32_t length +) { + int id = symbol_table_id_for_name(self, name, length); + if (id >= 0) return (uint16_t)id; + Slice slice = { + .offset = self->characters.size, + .length = length, + }; + array_grow_by(&self->characters, length + 1); + memcpy(&self->characters.contents[slice.offset], name, length); + self->characters.contents[self->characters.size - 1] = 0; + array_push(&self->slices, slice); + return self->slices.size - 1; +} + +/************ + * QueryStep + ************/ + +static QueryStep query_step__new( + t_symbol symbol, + uint16_t depth, + bool is_immediate +) { + QueryStep step = { + .symbol = symbol, + .depth = depth, + .field = 0, + .alternative_index = NONE, + .negated_field_list_id = 0, + .contains_captures = false, + .is_last_child = false, + .is_named = false, + .is_pass_through = false, + .is_dead_end = false, + .root_pattern_guaranteed = false, + .is_immediate = is_immediate, + .alternative_is_immediate = false, + }; + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + step.capture_ids[i] = NONE; + } + return step; +} + +static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + if (self->capture_ids[i] == NONE) { + self->capture_ids[i] = capture_id; + break; + } + } +} + +static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + if (self->capture_ids[i] == capture_id) { + self->capture_ids[i] = NONE; + while (i + 1 < MAX_STEP_CAPTURE_COUNT) { + if (self->capture_ids[i + 1] == NONE) break; + self->capture_ids[i] = self->capture_ids[i + 1]; + self->capture_ids[i + 1] = NONE; + i++; + } + break; + } + } +} + +/********************** + * StatePredecessorMap + **********************/ + +static inline StatePredecessorMap state_predecessor_map_new( + const t_language *language +) { + return (StatePredecessorMap) { + .contents = ts_calloc( + (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), + sizeof(t_state_id) + ), + }; +} + +static inline void state_predecessor_map_delete(StatePredecessorMap *self) { + ts_free(self->contents); +} + +static inline void state_predecessor_map_add( + StatePredecessorMap *self, + t_state_id state, + t_state_id predecessor +) { + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); + t_state_id *count = &self->contents[index]; + if ( + *count == 0 || + (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) + ) { + (*count)++; + self->contents[index + *count] = predecessor; + } +} + +static inline const t_state_id *state_predecessor_map_get( + const StatePredecessorMap *self, + t_state_id state, + unsigned *count +) { + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); + *count = self->contents[index]; + return &self->contents[index + 1]; +} + +/**************** + * AnalysisState + ****************/ + +static unsigned analysis_state__recursion_depth(const AnalysisState *self) { + unsigned result = 0; + for (unsigned i = 0; i < self->depth; i++) { + t_symbol symbol = self->stack[i].parent_symbol; + for (unsigned j = 0; j < i; j++) { + if (self->stack[j].parent_symbol == symbol) { + result++; + break; + } + } + } + return result; +} + +static inline int analysis_state__compare_position( + AnalysisState *const *self, + AnalysisState *const *other +) { + for (unsigned i = 0; i < (*self)->depth; i++) { + if (i >= (*other)->depth) return -1; + if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1; + if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1; + } + if ((*self)->depth < (*other)->depth) return 1; + if ((*self)->step_index < (*other)->step_index) return -1; + if ((*self)->step_index > (*other)->step_index) return 1; + return 0; +} + +static inline int analysis_state__compare( + AnalysisState *const *self, + AnalysisState *const *other +) { + int result = analysis_state__compare_position(self, other); + if (result != 0) return result; + for (unsigned i = 0; i < (*self)->depth; i++) { + if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1; + if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1; + if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1; + if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1; + if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1; + if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1; + } + return 0; +} + +static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { + if (self->depth == 0) { + return &self->stack[0]; + } + return &self->stack[self->depth - 1]; +} + +static inline bool analysis_state__has_supertype(AnalysisState *self, t_symbol symbol) { + for (unsigned i = 0; i < self->depth; i++) { + if (self->stack[i].parent_symbol == symbol) return true; + } + return false; +} + +/****************** + * AnalysisStateSet + ******************/ + +// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by +// cloning one from scratch. +static inline AnalysisState *analysis_state_pool__clone_or_reuse( + AnalysisStateSet *self, + AnalysisState *borrowed_item +) { + AnalysisState *new_item; + if (self->size) { + new_item = array_pop(self); + } else { + new_item = ts_malloc(sizeof(AnalysisState)); + } + *new_item = *borrowed_item; + return new_item; +} + +// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this +// set. The set does not contain duplicates, so if the item is already present, it will not be +// inserted, and no clone will be made. +// +// The caller retains ownership of the passed-in memory. However, the clone that is created by this +// function will be managed by the state set. +static inline void analysis_state_set__insert_sorted( + AnalysisStateSet *self, + AnalysisStateSet *pool, + AnalysisState *borrowed_item +) { + unsigned index, exists; + array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); + if (!exists) { + AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); + array_insert(self, index, new_item); + } +} + +// Inserts a clone of the passed-in item at the end position of this list. +// +// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function +// `analysis_state__compare`) than largest item already in this set. If items are inserted in the +// wrong order, the set will not function properly for future use. +// +// The caller retains ownership of the passed-in memory. However, the clone that is created by this +// function will be managed by the state set. +static inline void analysis_state_set__push( + AnalysisStateSet *self, + AnalysisStateSet *pool, + AnalysisState *borrowed_item +) { + AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); + array_push(self, new_item); +} + +// Removes all items from this set, returning it to an empty state. +static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { + array_push_all(pool, self); + array_clear(self); +} + +// Releases all memory that is managed with this state set, including any items currently present. +// After calling this function, the set is no longer suitable for use. +static inline void analysis_state_set__delete(AnalysisStateSet *self) { + for (unsigned i = 0; i < self->size; i++) { + ts_free(self->contents[i]); + } + array_delete(self); +} + +/**************** + * QueryAnalyzer + ****************/ + +static inline QueryAnalysis query_analysis__new(void) { + return (QueryAnalysis) { + .states = array_new(), + .next_states = array_new(), + .deeper_states = array_new(), + .state_pool = array_new(), + .final_step_indices = array_new(), + .finished_parent_symbols = array_new(), + .did_abort = false, + }; +} + +static inline void query_analysis__delete(QueryAnalysis *self) { + analysis_state_set__delete(&self->states); + analysis_state_set__delete(&self->next_states); + analysis_state_set__delete(&self->deeper_states); + analysis_state_set__delete(&self->state_pool); + array_delete(&self->final_step_indices); + array_delete(&self->finished_parent_symbols); +} + +/*********************** + * AnalysisSubgraphNode + ***********************/ + +static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { + if (self->state < other->state) return -1; + if (self->state > other->state) return 1; + if (self->child_index < other->child_index) return -1; + if (self->child_index > other->child_index) return 1; + if (self->done < other->done) return -1; + if (self->done > other->done) return 1; + if (self->production_id < other->production_id) return -1; + if (self->production_id > other->production_id) return 1; + return 0; +} + +/********* + * Query + *********/ + +// The `pattern_map` contains a mapping from TSSymbol values to indices in the +// `steps` array. For a given syntax node, the `pattern_map` makes it possible +// to quickly find the starting steps of all of the patterns whose root matches +// that node. Each entry has two fields: a `pattern_index`, which identifies one +// of the patterns in the query, and a `step_index`, which indicates the start +// offset of that pattern's steps within the `steps` array. +// +// The entries are sorted by the patterns' root symbols, and lookups use a +// binary search. This ensures that the cost of this initial lookup step +// scales logarithmically with the number of patterns in the query. +// +// This returns `true` if the symbol is present and `false` otherwise. +// If the symbol is not present `*result` is set to the index where the +// symbol should be inserted. +static inline bool ts_query__pattern_map_search( + const t_query *self, + t_symbol needle, + uint32_t *result +) { + uint32_t base_index = self->wildcard_root_pattern_count; + uint32_t size = self->pattern_map.size - base_index; + if (size == 0) { + *result = base_index; + return false; + } + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = base_index + half_size; + t_symbol mid_symbol = self->steps.contents[ + self->pattern_map.contents[mid_index].step_index + ].symbol; + if (needle > mid_symbol) base_index = mid_index; + size -= half_size; + } + + t_symbol symbol = self->steps.contents[ + self->pattern_map.contents[base_index].step_index + ].symbol; + + if (needle > symbol) { + base_index++; + if (base_index < self->pattern_map.size) { + symbol = self->steps.contents[ + self->pattern_map.contents[base_index].step_index + ].symbol; + } + } + + *result = base_index; + return needle == symbol; +} + +// Insert a new pattern's start index into the pattern map, maintaining +// the pattern map's ordering invariant. +static inline void ts_query__pattern_map_insert( + t_query *self, + t_symbol symbol, + PatternEntry new_entry +) { + uint32_t index; + ts_query__pattern_map_search(self, symbol, &index); + + // Ensure that the entries are sorted not only by symbol, but also + // by pattern_index. This way, states for earlier patterns will be + // initiated first, which allows the ordering of the states array + // to be maintained more efficiently. + while (index < self->pattern_map.size) { + PatternEntry *entry = &self->pattern_map.contents[index]; + if ( + self->steps.contents[entry->step_index].symbol == symbol && + entry->pattern_index < new_entry.pattern_index + ) { + index++; + } else { + break; + } + } + + array_insert(&self->pattern_map, index, new_entry); +} + +// Walk the subgraph for this non-terminal, tracking all of the possible +// sequences of progress within the pattern. +static void ts_query__perform_analysis( + t_query *self, + const AnalysisSubgraphArray *subgraphs, + QueryAnalysis *analysis +) { + unsigned recursion_depth_limit = 0; + unsigned prev_final_step_count = 0; + array_clear(&analysis->final_step_indices); + array_clear(&analysis->finished_parent_symbols); + + for (unsigned iteration = 0;; iteration++) { + if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { + analysis->did_abort = true; + break; + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("Iteration: %u. Final step indices:", iteration); + for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { + printf(" %4u", analysis->final_step_indices.contents[j]); + } + printf("\n"); + for (unsigned j = 0; j < analysis->states.size; j++) { + AnalysisState *state = analysis->states.contents[j]; + printf(" %3u: step: %u, stack: [", j, state->step_index); + for (unsigned k = 0; k < state->depth; k++) { + printf( + " {%s, child: %u, state: %4u", + self->language->symbol_names[state->stack[k].parent_symbol], + state->stack[k].child_index, + state->stack[k].parse_state + ); + if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); + if (state->stack[k].done) printf(", DONE"); + printf("}"); + } + printf(" ]\n"); + } + #endif + + // If no further progress can be made within the current recursion depth limit, then + // bump the depth limit by one, and continue to process the states the exceeded the + // limit. But only allow this if progress has been made since the last time the depth + // limit was increased. + if (analysis->states.size == 0) { + if ( + analysis->deeper_states.size > 0 && + analysis->final_step_indices.size > prev_final_step_count + ) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); + #endif + + prev_final_step_count = analysis->final_step_indices.size; + recursion_depth_limit++; + AnalysisStateSet _states = analysis->states; + analysis->states = analysis->deeper_states; + analysis->deeper_states = _states; + continue; + } + + break; + } + + analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); + for (unsigned j = 0; j < analysis->states.size; j++) { + AnalysisState * const state = analysis->states.contents[j]; + + // For efficiency, it's important to avoid processing the same analysis state more + // than once. To achieve this, keep the states in order of ascending position within + // their hypothetical syntax trees. In each iteration of this loop, start by advancing + // the states that have made the least progress. Avoid advancing states that have already + // made more progress. + if (analysis->next_states.size > 0) { + int comparison = analysis_state__compare_position( + &state, + array_back(&analysis->next_states) + ); + if (comparison == 0) { + analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); + continue; + } else if (comparison > 0) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Terminate iteration at state %u\n", j); + #endif + while (j < analysis->states.size) { + analysis_state_set__push( + &analysis->next_states, + &analysis->state_pool, + analysis->states.contents[j] + ); + j++; + } + break; + } + } + + const t_state_id parse_state = analysis_state__top(state)->parse_state; + const t_symbol parent_symbol = analysis_state__top(state)->parent_symbol; + const t_field_id parent_field_id = analysis_state__top(state)->field_id; + const unsigned child_index = analysis_state__top(state)->child_index; + const QueryStep * const step = &self->steps.contents[state->step_index]; + + unsigned subgraph_index, exists; + array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); + if (!exists) continue; + const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index]; + + // Follow every possible path in the parse table, but only visit states that + // are part of the subgraph for the current symbol. + LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); + while (ts_lookahead_iterator__next(&lookahead_iterator)) { + t_symbol sym = lookahead_iterator.symbol; + + AnalysisSubgraphNode successor = { + .state = parse_state, + .child_index = child_index, + }; + if (lookahead_iterator.action_count) { + const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; + if (action->type == TSParseActionTypeShift) { + if (!action->shift.extra) { + successor.state = action->shift.state; + successor.child_index++; + } + } else { + continue; + } + } else if (lookahead_iterator.next_state != 0) { + successor.state = lookahead_iterator.next_state; + successor.child_index++; + } else { + continue; + } + + unsigned node_index; + array_search_sorted_with( + &subgraph->nodes, + analysis_subgraph_node__compare, &successor, + &node_index, &exists + ); + while (node_index < subgraph->nodes.size) { + AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; + if (node->state != successor.state || node->child_index != successor.child_index) break; + + // Use the subgraph to determine what alias and field will eventually be applied + // to this child node. + t_symbol alias = ts_language_alias_at(self->language, node->production_id, child_index); + t_symbol visible_symbol = alias + ? alias + : self->language->symbol_metadata[sym].visible + ? self->language->public_symbol_map[sym] + : 0; + t_field_id field_id = parent_field_id; + if (!field_id) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); + for (; field_map != field_map_end; field_map++) { + if (!field_map->inherited && field_map->child_index == child_index) { + field_id = field_map->field_id; + break; + } + } + } + + // Create a new state that has advanced past this hypothetical subtree. + AnalysisState next_state = *state; + AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); + next_state_top->child_index = successor.child_index; + next_state_top->parse_state = successor.state; + if (node->done) next_state_top->done = true; + + // Determine if this hypothetical child node would match the current step + // of the query pattern. + bool does_match = false; + if (visible_symbol) { + does_match = true; + if (step->symbol == WILDCARD_SYMBOL) { + if ( + step->is_named && + !self->language->symbol_metadata[visible_symbol].named + ) does_match = false; + } else if (step->symbol != visible_symbol) { + does_match = false; + } + if (step->field && step->field != field_id) { + does_match = false; + } + if ( + step->supertype_symbol && + !analysis_state__has_supertype(state, step->supertype_symbol) + ) does_match = false; + } + + // If this child is hidden, then descend into it and walk through its children. + // If the top entry of the stack is at the end of its rule, then that entry can + // be replaced. Otherwise, push a new entry onto the stack. + else if (sym >= self->language->token_count) { + if (!next_state_top->done) { + if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Exceeded depth limit for state %u\n", j); + #endif + + analysis->did_abort = true; + continue; + } + + next_state.depth++; + next_state_top = analysis_state__top(&next_state); + } + + *next_state_top = (AnalysisStateEntry) { + .parse_state = parse_state, + .parent_symbol = sym, + .child_index = 0, + .field_id = field_id, + .done = false, + }; + + if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { + analysis_state_set__insert_sorted( + &analysis->deeper_states, + &analysis->state_pool, + &next_state + ); + continue; + } + } + + // Pop from the stack when this state reached the end of its current syntax node. + while (next_state.depth > 0 && next_state_top->done) { + next_state.depth--; + next_state_top = analysis_state__top(&next_state); + } + + // If this hypothetical child did match the current step of the query pattern, + // then advance to the next step at the current depth. This involves skipping + // over any descendant steps of the current child. + const QueryStep *next_step = step; + if (does_match) { + for (;;) { + next_state.step_index++; + next_step = &self->steps.contents[next_state.step_index]; + if ( + next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= step->depth + ) break; + } + } else if (successor.state == parse_state) { + continue; + } + + for (;;) { + // Skip pass-through states. Although these states have alternatives, they are only + // used to implement repetitions, and query analysis does not need to process + // repetitions in order to determine whether steps are possible and definite. + if (next_step->is_pass_through) { + next_state.step_index++; + next_step++; + continue; + } + + // If the pattern is finished or hypothetical parent node is complete, then + // record that matching can terminate at this step of the pattern. Otherwise, + // add this state to the list of states to process on the next iteration. + if (!next_step->is_dead_end) { + bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; + if (did_finish_pattern) { + array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); + } else if (next_state.depth == 0) { + array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); + } else { + analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); + } + } + + // If the state has advanced to a step with an alternative step, then add another state + // at that alternative step. This process is simpler than the process of actually matching a + // pattern during query execution, because for the purposes of query analysis, there is no + // need to process repetitions. + if ( + does_match && + next_step->alternative_index != NONE && + next_step->alternative_index > next_state.step_index + ) { + next_state.step_index = next_step->alternative_index; + next_step = &self->steps.contents[next_state.step_index]; + } else { + break; + } + } + } + } + } + + AnalysisStateSet _states = analysis->states; + analysis->states = analysis->next_states; + analysis->next_states = _states; + } +} + +static bool ts_query__analyze_patterns(t_query *self, unsigned *error_offset) { + Array(uint16_t) non_rooted_pattern_start_steps = array_new(); + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (!pattern->is_rooted) { + QueryStep *step = &self->steps.contents[pattern->step_index]; + if (step->symbol != WILDCARD_SYMBOL) { + array_push(&non_rooted_pattern_start_steps, i); + } + } + } + + // Walk forward through all of the steps in the query, computing some + // basic information about each step. Mark all of the steps that contain + // captures, and record the indices of all of the steps that have child steps. + Array(uint32_t) parent_step_indices = array_new(); + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) { + step->parent_pattern_guaranteed = true; + step->root_pattern_guaranteed = true; + continue; + } + + bool has_children = false; + bool is_wildcard = step->symbol == WILDCARD_SYMBOL; + step->contains_captures = step->capture_ids[0] != NONE; + for (unsigned j = i + 1; j < self->steps.size; j++) { + QueryStep *next_step = &self->steps.contents[j]; + if ( + next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= step->depth + ) break; + if (next_step->capture_ids[0] != NONE) { + step->contains_captures = true; + } + if (!is_wildcard) { + next_step->root_pattern_guaranteed = true; + next_step->parent_pattern_guaranteed = true; + } + has_children = true; + } + + if (has_children && !is_wildcard) { + array_push(&parent_step_indices, i); + } + } + + // For every parent symbol in the query, initialize an 'analysis subgraph'. + // This subgraph lists all of the states in the parse table that are directly + // involved in building subtrees for this symbol. + // + // In addition to the parent symbols in the query, construct subgraphs for all + // of the hidden symbols in the grammar, because these might occur within + // one of the parent nodes, such that their children appear to belong to the + // parent. + AnalysisSubgraphArray subgraphs = array_new(); + for (unsigned i = 0; i < parent_step_indices.size; i++) { + uint32_t parent_step_index = parent_step_indices.contents[i]; + t_symbol parent_symbol = self->steps.contents[parent_step_index].symbol; + AnalysisSubgraph subgraph = { .symbol = parent_symbol }; + array_insert_sorted_by(&subgraphs, .symbol, subgraph); + } + for (t_symbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { + if (!ts_language_symbol_metadata(self->language, sym).visible) { + AnalysisSubgraph subgraph = { .symbol = sym }; + array_insert_sorted_by(&subgraphs, .symbol, subgraph); + } + } + + // Scan the parse table to find the data needed to populate these subgraphs. + // Collect three things during this scan: + // 1) All of the parse states where one of these symbols can start. + // 2) All of the parse states where one of these symbols can end, along + // with information about the node that would be created. + // 3) A list of predecessor states for each state. + StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); + for (t_state_id state = 1; state < (uint16_t)self->language->state_count; state++) { + unsigned subgraph_index, exists; + LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); + while (ts_lookahead_iterator__next(&lookahead_iterator)) { + if (lookahead_iterator.action_count) { + for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { + const TSParseAction *action = &lookahead_iterator.actions[i]; + if (action->type == TSParseActionTypeReduce) { + const t_symbol *aliases, *aliases_end; + ts_language_aliases_for_symbol( + self->language, + action->reduce.symbol, + &aliases, + &aliases_end + ); + for (const t_symbol *symbol = aliases; symbol < aliases_end; symbol++) { + array_search_sorted_by( + &subgraphs, + .symbol, + *symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { + array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { + .state = state, + .production_id = action->reduce.production_id, + .child_index = action->reduce.child_count, + .done = true, + })); + } + } + } + } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { + t_state_id next_state = action->shift.state; + state_predecessor_map_add(&predecessor_map, next_state, state); + } + } + } else if (lookahead_iterator.next_state != 0) { + if (lookahead_iterator.next_state != state) { + state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); + } + if (ts_language_state_is_primary(self->language, state)) { + const t_symbol *aliases, *aliases_end; + ts_language_aliases_for_symbol( + self->language, + lookahead_iterator.symbol, + &aliases, + &aliases_end + ); + for (const t_symbol *symbol = aliases; symbol < aliases_end; symbol++) { + array_search_sorted_by( + &subgraphs, + .symbol, + *symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if ( + subgraph->start_states.size == 0 || + *array_back(&subgraph->start_states) != state + ) + array_push(&subgraph->start_states, state); + } + } + } + } + } + } + + // For each subgraph, compute the preceding states by walking backward + // from the end states using the predecessor map. + Array(AnalysisSubgraphNode) next_nodes = array_new(); + for (unsigned i = 0; i < subgraphs.size; i++) { + AnalysisSubgraph *subgraph = &subgraphs.contents[i]; + if (subgraph->nodes.size == 0) { + array_delete(&subgraph->start_states); + array_erase(&subgraphs, i); + i--; + continue; + } + array_assign(&next_nodes, &subgraph->nodes); + while (next_nodes.size > 0) { + AnalysisSubgraphNode node = array_pop(&next_nodes); + if (node.child_index > 1) { + unsigned predecessor_count; + const t_state_id *predecessors = state_predecessor_map_get( + &predecessor_map, + node.state, + &predecessor_count + ); + for (unsigned j = 0; j < predecessor_count; j++) { + AnalysisSubgraphNode predecessor_node = { + .state = predecessors[j], + .child_index = node.child_index - 1, + .production_id = node.production_id, + .done = false, + }; + unsigned index, exists; + array_search_sorted_with( + &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, + &index, &exists + ); + if (!exists) { + array_insert(&subgraph->nodes, index, predecessor_node); + array_push(&next_nodes, predecessor_node); + } + } + } + } + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("\nSubgraphs:\n"); + for (unsigned i = 0; i < subgraphs.size; i++) { + AnalysisSubgraph *subgraph = &subgraphs.contents[i]; + printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); + for (unsigned j = 0; j < subgraph->start_states.size; j++) { + printf( + " {state: %u}\n", + subgraph->start_states.contents[j] + ); + } + for (unsigned j = 0; j < subgraph->nodes.size; j++) { + AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; + printf( + " {state: %u, child_index: %u, production_id: %u, done: %d}\n", + node->state, node->child_index, node->production_id, node->done + ); + } + printf("\n"); + } + #endif + + // For each non-terminal pattern, determine if the pattern can successfully match, + // and identify all of the possible children within the pattern where matching could fail. + bool all_patterns_are_valid = true; + QueryAnalysis analysis = query_analysis__new(); + for (unsigned i = 0; i < parent_step_indices.size; i++) { + uint16_t parent_step_index = parent_step_indices.contents[i]; + uint16_t parent_depth = self->steps.contents[parent_step_index].depth; + t_symbol parent_symbol = self->steps.contents[parent_step_index].symbol; + if (parent_symbol == ts_builtin_sym_error) continue; + + // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's + // root symbol is a terminal, then return an error. + unsigned subgraph_index, exists; + array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); + if (!exists) { + unsigned first_child_step_index = parent_step_index + 1; + uint32_t j, child_exists; + array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); + assert(child_exists); + *error_offset = self->step_offsets.contents[j].byte_offset; + all_patterns_are_valid = false; + break; + } + + // Initialize an analysis state at every parse state in the table where + // this parent symbol can occur. + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + analysis_state_set__clear(&analysis.states, &analysis.state_pool); + analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); + for (unsigned j = 0; j < subgraph->start_states.size; j++) { + t_state_id parse_state = subgraph->start_states.contents[j]; + analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { + .step_index = parent_step_index + 1, + .stack = { + [0] = { + .parse_state = parse_state, + .parent_symbol = parent_symbol, + .child_index = 0, + .field_id = 0, + .done = false, + }, + }, + .depth = 1, + .root_symbol = parent_symbol, + })); + } + + #ifdef DEBUG_ANALYZE_QUERY + printf( + "\nWalk states for %s:\n", + ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol) + ); + #endif + + analysis.did_abort = false; + ts_query__perform_analysis(self, &subgraphs, &analysis); + + // If this pattern could not be fully analyzed, then every step should + // be considered fallible. + if (analysis.did_abort) { + for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { + QueryStep *step = &self->steps.contents[j]; + if ( + step->depth <= parent_depth || + step->depth == PATTERN_DONE_MARKER + ) break; + if (!step->is_dead_end) { + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; + } + } + continue; + } + + // If this pattern cannot match, store the pattern index so that it can be + // returned to the caller. + if (analysis.finished_parent_symbols.size == 0) { + assert(analysis.final_step_indices.size > 0); + uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); + uint32_t j, impossible_exists; + array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); + if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; + *error_offset = self->step_offsets.contents[j].byte_offset; + all_patterns_are_valid = false; + break; + } + + // Mark as fallible any step where a match terminated. + // Later, this property will be propagated to all of the step's predecessors. + for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { + uint32_t final_step_index = analysis.final_step_indices.contents[j]; + QueryStep *step = &self->steps.contents[final_step_index]; + if ( + step->depth != PATTERN_DONE_MARKER && + step->depth > parent_depth && + !step->is_dead_end + ) { + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; + } + } + } + + // Mark as indefinite any step with captures that are used in predicates. + Array(uint16_t) predicate_capture_ids = array_new(); + for (unsigned i = 0; i < self->patterns.size; i++) { + QueryPattern *pattern = &self->patterns.contents[i]; + + // Gather all of the captures that are used in predicates for this pattern. + array_clear(&predicate_capture_ids); + for ( + unsigned start = pattern->predicate_steps.offset, + end = start + pattern->predicate_steps.length, + j = start; j < end; j++ + ) { + t_query_predicate_step *step = &self->predicate_steps.contents[j]; + if (step->type == TSQueryPredicateStepTypeCapture) { + uint16_t value_id = step->value_id; + array_insert_sorted_by(&predicate_capture_ids, , value_id); + } + } + + // Find all of the steps that have these captures. + for ( + unsigned start = pattern->steps.offset, + end = start + pattern->steps.length, + j = start; j < end; j++ + ) { + QueryStep *step = &self->steps.contents[j]; + for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { + uint16_t capture_id = step->capture_ids[k]; + if (capture_id == NONE) break; + unsigned index, exists; + array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); + if (exists) { + step->root_pattern_guaranteed = false; + break; + } + } + } + } + + // Propagate fallibility. If a pattern is fallible at a given step, then it is + // fallible at all of its preceding steps. + bool done = self->steps.size == 0; + while (!done) { + done = true; + for (unsigned i = self->steps.size - 1; i > 0; i--) { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) continue; + + // Determine if this step is definite or has definite alternatives. + bool parent_pattern_guaranteed = false; + for (;;) { + if (step->root_pattern_guaranteed) { + parent_pattern_guaranteed = true; + break; + } + if (step->alternative_index == NONE || step->alternative_index < i) { + break; + } + step = &self->steps.contents[step->alternative_index]; + } + + // If not, mark its predecessor as indefinite. + if (!parent_pattern_guaranteed) { + QueryStep *prev_step = &self->steps.contents[i - 1]; + if ( + !prev_step->is_dead_end && + prev_step->depth != PATTERN_DONE_MARKER && + prev_step->root_pattern_guaranteed + ) { + prev_step->root_pattern_guaranteed = false; + done = false; + } + } + } + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("Steps:\n"); + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) { + printf(" %u: DONE\n", i); + } else { + printf( + " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", + i, + (step->symbol == WILDCARD_SYMBOL) + ? "ANY" + : ts_language_symbol_name(self->language, step->symbol), + (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), + step->depth, + step->parent_pattern_guaranteed, + step->root_pattern_guaranteed + ); + } + } + #endif + + // Determine which repetition symbols in this language have the possibility + // of matching non-rooted patterns in this query. These repetition symbols + // prevent certain optimizations with range restrictions. + analysis.did_abort = false; + for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { + uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i]; + PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index]; + + analysis_state_set__clear(&analysis.states, &analysis.state_pool); + analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); + for (unsigned j = 0; j < subgraphs.size; j++) { + AnalysisSubgraph *subgraph = &subgraphs.contents[j]; + TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); + if (metadata.visible || metadata.named) continue; + + for (uint32_t k = 0; k < subgraph->start_states.size; k++) { + t_state_id parse_state = subgraph->start_states.contents[k]; + analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { + .step_index = pattern_entry->step_index, + .stack = { + [0] = { + .parse_state = parse_state, + .parent_symbol = subgraph->symbol, + .child_index = 0, + .field_id = 0, + .done = false, + }, + }, + .root_symbol = subgraph->symbol, + .depth = 1, + })); + } + } + + #ifdef DEBUG_ANALYZE_QUERY + printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); + #endif + + ts_query__perform_analysis( + self, + &subgraphs, + &analysis + ); + + if (analysis.finished_parent_symbols.size > 0) { + self->patterns.contents[pattern_entry->pattern_index].is_non_local = true; + } + + for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { + t_symbol symbol = analysis.finished_parent_symbols.contents[k]; + array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); + } + } + + #ifdef DEBUG_ANALYZE_QUERY + if (self->repeat_symbols_with_rootless_patterns.size > 0) { + printf("\nRepetition symbols with rootless patterns:\n"); + printf("aborted analysis: %d\n", analysis.did_abort); + for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { + TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; + printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); + } + printf("\n"); + } + #endif + + // Cleanup + for (unsigned i = 0; i < subgraphs.size; i++) { + array_delete(&subgraphs.contents[i].start_states); + array_delete(&subgraphs.contents[i].nodes); + } + array_delete(&subgraphs); + query_analysis__delete(&analysis); + array_delete(&next_nodes); + array_delete(&non_rooted_pattern_start_steps); + array_delete(&parent_step_indices); + array_delete(&predicate_capture_ids); + state_predecessor_map_delete(&predecessor_map); + + return all_patterns_are_valid; +} + +static void ts_query__add_negated_fields( + t_query *self, + uint16_t step_index, + t_field_id *field_ids, + uint16_t field_count +) { + QueryStep *step = &self->steps.contents[step_index]; + + // The negated field array stores a list of field lists, separated by zeros. + // Try to find the start index of an existing list that matches this new list. + bool failed_match = false; + unsigned match_count = 0; + unsigned start_i = 0; + for (unsigned i = 0; i < self->negated_fields.size; i++) { + t_field_id existing_field_id = self->negated_fields.contents[i]; + + // At each zero value, terminate the match attempt. If we've exactly + // matched the new field list, then reuse this index. Otherwise, + // start over the matching process. + if (existing_field_id == 0) { + if (match_count == field_count) { + step->negated_field_list_id = start_i; + return; + } else { + start_i = i + 1; + match_count = 0; + failed_match = false; + } + } + + // If the existing list matches our new list so far, then advance + // to the next element of the new list. + else if ( + match_count < field_count && + existing_field_id == field_ids[match_count] && + !failed_match + ) { + match_count++; + } + + // Otherwise, this existing list has failed to match. + else { + match_count = 0; + failed_match = true; + } + } + + step->negated_field_list_id = self->negated_fields.size; + array_extend(&self->negated_fields, field_count, field_ids); + array_push(&self->negated_fields, 0); +} + +static t_query_error ts_query__parse_string_literal( + t_query *self, + Stream *stream +) { + const char *string_start = stream->input; + if (stream->next != '"') return TSQueryErrorSyntax; + stream_advance(stream); + const char *prev_position = stream->input; + + bool is_escaped = false; + array_clear(&self->string_buffer); + for (;;) { + if (is_escaped) { + is_escaped = false; + switch (stream->next) { + case 'n': + array_push(&self->string_buffer, '\n'); + break; + case 'r': + array_push(&self->string_buffer, '\r'); + break; + case 't': + array_push(&self->string_buffer, '\t'); + break; + case '0': + array_push(&self->string_buffer, '\0'); + break; + default: + array_extend(&self->string_buffer, stream->next_size, stream->input); + break; + } + prev_position = stream->input + stream->next_size; + } else { + if (stream->next == '\\') { + array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); + prev_position = stream->input + 1; + is_escaped = true; + } else if (stream->next == '"') { + array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); + stream_advance(stream); + return TSQueryErrorNone; + } else if (stream->next == '\n') { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } + if (!stream_advance(stream)) { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } +} + +// Parse a single predicate associated with a pattern, adding it to the +// query's internal `predicate_steps` array. Predicates are arbitrary +// S-expressions associated with a pattern which are meant to be handled at +// a higher level of abstraction, such as the Rust/JavaScript bindings. They +// can contain '@'-prefixed capture names, double-quoted strings, and bare +// symbols, which also represent strings. +static t_query_error ts_query__parse_predicate( + t_query *self, + Stream *stream +) { + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *predicate_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - predicate_name); + uint16_t id = symbol_table_insert_name( + &self->predicate_values, + predicate_name, + length + ); + array_push(&self->predicate_steps, ((t_query_predicate_step) { + .type = TSQueryPredicateStepTypeString, + .value_id = id, + })); + stream_skip_whitespace(stream); + + for (;;) { + if (stream->next == ')') { + stream_advance(stream); + stream_skip_whitespace(stream); + array_push(&self->predicate_steps, ((t_query_predicate_step) { + .type = TSQueryPredicateStepTypeDone, + .value_id = 0, + })); + break; + } + + // Parse an '@'-prefixed capture name + else if (stream->next == '@') { + stream_advance(stream); + + // Parse the capture name + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *capture_name = stream->input; + stream_scan_identifier(stream); + uint32_t capture_length = (uint32_t)(stream->input - capture_name); + + // Add the capture id to the first step of the pattern + int capture_id = symbol_table_id_for_name( + &self->captures, + capture_name, + capture_length + ); + if (capture_id == -1) { + stream_reset(stream, capture_name); + return TSQueryErrorCapture; + } + + array_push(&self->predicate_steps, ((t_query_predicate_step) { + .type = TSQueryPredicateStepTypeCapture, + .value_id = capture_id, + })); + } + + // Parse a string literal + else if (stream->next == '"') { + t_query_error e = ts_query__parse_string_literal(self, stream); + if (e) return e; + uint16_t query_id = symbol_table_insert_name( + &self->predicate_values, + self->string_buffer.contents, + self->string_buffer.size + ); + array_push(&self->predicate_steps, ((t_query_predicate_step) { + .type = TSQueryPredicateStepTypeString, + .value_id = query_id, + })); + } + + // Parse a bare symbol + else if (stream_is_ident_start(stream)) { + const char *symbol_start = stream->input; + stream_scan_identifier(stream); + uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); + uint16_t query_id = symbol_table_insert_name( + &self->predicate_values, + symbol_start, + symbol_length + ); + array_push(&self->predicate_steps, ((t_query_predicate_step) { + .type = TSQueryPredicateStepTypeString, + .value_id = query_id, + })); + } + + else { + return TSQueryErrorSyntax; + } + + stream_skip_whitespace(stream); + } + + return 0; +} + +// Read one S-expression pattern from the stream, and incorporate it into +// the query's internal state machine representation. For nested patterns, +// this function calls itself recursively. +// +// The caller is responsible for passing in a dedicated CaptureQuantifiers. +// These should not be shared between different calls to ts_query__parse_pattern! +static t_query_error ts_query__parse_pattern( + t_query *self, + Stream *stream, + uint32_t depth, + bool is_immediate, + CaptureQuantifiers *capture_quantifiers +) { + if (stream->next == 0) return TSQueryErrorSyntax; + if (stream->next == ')' || stream->next == ']') return PARENT_DONE; + + const uint32_t starting_step_index = self->steps.size; + + // Store the byte offset of each step in the query. + if ( + self->step_offsets.size == 0 || + array_back(&self->step_offsets)->step_index != starting_step_index + ) { + array_push(&self->step_offsets, ((StepOffset) { + .step_index = starting_step_index, + .byte_offset = stream_offset(stream), + })); + } + + // An open bracket is the start of an alternation. + if (stream->next == '[') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse each branch, and add a placeholder step in between the branches. + Array(uint32_t) branch_step_indices = array_new(); + CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); + for (;;) { + uint32_t start_index = self->steps.size; + t_query_error e = ts_query__parse_pattern( + self, + stream, + depth, + is_immediate, + &branch_capture_quantifiers + ); + + if (e == PARENT_DONE) { + if (stream->next == ']' && branch_step_indices.size > 0) { + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) { + capture_quantifiers_delete(&branch_capture_quantifiers); + array_delete(&branch_step_indices); + return e; + } + + if (start_index == starting_step_index) { + capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); + } else { + capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); + } + + array_push(&branch_step_indices, start_index); + array_push(&self->steps, query_step__new(0, depth, false)); + capture_quantifiers_clear(&branch_capture_quantifiers); + } + (void)array_pop(&self->steps); + + // For all of the branches except for the last one, add the subsequent branch as an + // alternative, and link the end of the branch to the current end of the steps. + for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { + uint32_t step_index = branch_step_indices.contents[i]; + uint32_t next_step_index = branch_step_indices.contents[i + 1]; + QueryStep *start_step = &self->steps.contents[step_index]; + QueryStep *end_step = &self->steps.contents[next_step_index - 1]; + start_step->alternative_index = next_step_index; + end_step->alternative_index = self->steps.size; + end_step->is_dead_end = true; + } + + capture_quantifiers_delete(&branch_capture_quantifiers); + array_delete(&branch_step_indices); + } + + // An open parenthesis can be the start of three possible constructs: + // * A grouped sequence + // * A predicate + // * A named node + else if (stream->next == '(') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // If this parenthesis is followed by a node, then it represents a grouped sequence. + if (stream->next == '(' || stream->next == '"' || stream->next == '[') { + bool child_is_immediate = is_immediate; + CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); + for (;;) { + if (stream->next == '.') { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + t_query_error e = ts_query__parse_pattern( + self, + stream, + depth, + child_is_immediate, + &child_capture_quantifiers + ); + if (e == PARENT_DONE) { + if (stream->next == ')') { + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); + capture_quantifiers_clear(&child_capture_quantifiers); + child_is_immediate = false; + } + + capture_quantifiers_delete(&child_capture_quantifiers); + } + + // A dot/pound character indicates the start of a predicate. + else if (stream->next == '.' || stream->next == '#') { + stream_advance(stream); + return ts_query__parse_predicate(self, stream); + } + + // Otherwise, this parenthesis is the start of a named node. + else { + t_symbol symbol; + + // Parse a normal node name + if (stream_is_ident_start(stream)) { + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - node_name); + + // Parse the wildcard symbol + if (length == 1 && node_name[0] == '_') { + symbol = WILDCARD_SYMBOL; + } + + else { + symbol = ts_language_symbol_for_name( + self->language, + node_name, + length, + true + ); + if (!symbol) { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; + } + } + } else { + return TSQueryErrorSyntax; + } + + // Add a step for the node. + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); + QueryStep *step = array_back(&self->steps); + if (ts_language_symbol_metadata(self->language, symbol).supertype) { + step->supertype_symbol = step->symbol; + step->symbol = WILDCARD_SYMBOL; + } + if (symbol == WILDCARD_SYMBOL) { + step->is_named = true; + } + + stream_skip_whitespace(stream); + + if (stream->next == '/') { + stream_advance(stream); + if (!stream_is_ident_start(stream)) { + return TSQueryErrorSyntax; + } + + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - node_name); + + step->symbol = ts_language_symbol_for_name( + self->language, + node_name, + length, + true + ); + if (!step->symbol) { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; + } + + stream_skip_whitespace(stream); + } + + // Parse the child patterns + bool child_is_immediate = false; + uint16_t last_child_step_index = 0; + uint16_t negated_field_count = 0; + t_field_id negated_field_ids[MAX_NEGATED_FIELD_COUNT]; + CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); + for (;;) { + // Parse a negated field assertion + if (stream->next == '!') { + stream_advance(stream); + stream_skip_whitespace(stream); + if (!stream_is_ident_start(stream)) { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - field_name); + stream_skip_whitespace(stream); + + t_field_id field_id = ts_language_field_id_for_name( + self->language, + field_name, + length + ); + if (!field_id) { + stream->input = field_name; + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorField; + } + + // Keep the field ids sorted. + if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { + negated_field_ids[negated_field_count] = field_id; + negated_field_count++; + } + + continue; + } + + // Parse a sibling anchor + if (stream->next == '.') { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + + uint16_t step_index = self->steps.size; + t_query_error e = ts_query__parse_pattern( + self, + stream, + depth + 1, + child_is_immediate, + &child_capture_quantifiers + ); + if (e == PARENT_DONE) { + if (stream->next == ')') { + if (child_is_immediate) { + if (last_child_step_index == 0) { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } + self->steps.contents[last_child_step_index].is_last_child = true; + } + + if (negated_field_count) { + ts_query__add_negated_fields( + self, + starting_step_index, + negated_field_ids, + negated_field_count + ); + } + + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); + + last_child_step_index = step_index; + child_is_immediate = false; + capture_quantifiers_clear(&child_capture_quantifiers); + } + capture_quantifiers_delete(&child_capture_quantifiers); + } + } + + // Parse a wildcard pattern + else if (stream->next == '_') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Add a step that matches any kind of node + array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); + } + + // Parse a double-quoted anonymous leaf node expression + else if (stream->next == '"') { + const char *string_start = stream->input; + t_query_error e = ts_query__parse_string_literal(self, stream); + if (e) return e; + + // Add a step for the node + t_symbol symbol = ts_language_symbol_for_name( + self->language, + self->string_buffer.contents, + self->string_buffer.size, + false + ); + if (!symbol) { + stream_reset(stream, string_start + 1); + return TSQueryErrorNodeType; + } + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); + } + + // Parse a field-prefixed pattern + else if (stream_is_ident_start(stream)) { + // Parse the field name + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - field_name); + stream_skip_whitespace(stream); + + if (stream->next != ':') { + stream_reset(stream, field_name); + return TSQueryErrorSyntax; + } + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse the pattern + CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); + t_query_error e = ts_query__parse_pattern( + self, + stream, + depth, + is_immediate, + &field_capture_quantifiers + ); + if (e) { + capture_quantifiers_delete(&field_capture_quantifiers); + if (e == PARENT_DONE) e = TSQueryErrorSyntax; + return e; + } + + // Add the field name to the first step of the pattern + t_field_id field_id = ts_language_field_id_for_name( + self->language, + field_name, + length + ); + if (!field_id) { + stream->input = field_name; + return TSQueryErrorField; + } + + uint32_t step_index = starting_step_index; + QueryStep *step = &self->steps.contents[step_index]; + for (;;) { + step->field = field_id; + if ( + step->alternative_index != NONE && + step->alternative_index > step_index && + step->alternative_index < self->steps.size + ) { + step_index = step->alternative_index; + step = &self->steps.contents[step_index]; + } else { + break; + } + } + + capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); + capture_quantifiers_delete(&field_capture_quantifiers); + } + + else { + return TSQueryErrorSyntax; + } + + stream_skip_whitespace(stream); + + // Parse suffixes modifiers for this pattern + t_quantifier quantifier = TSQuantifierOne; + for (;;) { + // Parse the one-or-more operator. + if (stream->next == '+') { + quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_pass_through = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + } + + // Parse the zero-or-more repetition operator. + else if (stream->next == '*') { + quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_pass_through = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + + // Stop when `step->alternative_index` is `NONE` or it points to + // `repeat_step` or beyond. Note that having just been pushed, + // `repeat_step` occupies slot `self->steps.size - 1`. + QueryStep *step = &self->steps.contents[starting_step_index]; + while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { + step = &self->steps.contents[step->alternative_index]; + } + step->alternative_index = self->steps.size; + } + + // Parse the optional operator. + else if (stream->next == '?') { + quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep *step = &self->steps.contents[starting_step_index]; + while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { + step = &self->steps.contents[step->alternative_index]; + } + step->alternative_index = self->steps.size; + } + + // Parse an '@'-prefixed capture pattern + else if (stream->next == '@') { + stream_advance(stream); + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *capture_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - capture_name); + stream_skip_whitespace(stream); + + // Add the capture id to the first step of the pattern + uint16_t capture_id = symbol_table_insert_name( + &self->captures, + capture_name, + length + ); + + // Add the capture quantifier + capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); + + uint32_t step_index = starting_step_index; + for (;;) { + QueryStep *step = &self->steps.contents[step_index]; + query_step__add_capture(step, capture_id); + if ( + step->alternative_index != NONE && + step->alternative_index > step_index && + step->alternative_index < self->steps.size + ) { + step_index = step->alternative_index; + } else { + break; + } + } + } + + // No more suffix modifiers + else { + break; + } + } + + capture_quantifiers_mul(capture_quantifiers, quantifier); + + return 0; +} + +t_query *ts_query_new( + const t_language *language, + const char *source, + uint32_t source_len, + uint32_t *error_offset, + t_query_error *error_type +) { + if ( + !language || + language->version > TREE_SITTER_LANGUAGE_VERSION || + language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + ) { + *error_type = TSQueryErrorLanguage; + return NULL; + } + + t_query *self = ts_malloc(sizeof(t_query)); + *self = (t_query) { + .steps = array_new(), + .pattern_map = array_new(), + .captures = symbol_table_new(), + .capture_quantifiers = array_new(), + .predicate_values = symbol_table_new(), + .predicate_steps = array_new(), + .patterns = array_new(), + .step_offsets = array_new(), + .string_buffer = array_new(), + .negated_fields = array_new(), + .repeat_symbols_with_rootless_patterns = array_new(), + .wildcard_root_pattern_count = 0, + .language = ts_language_copy(language), + }; + + array_push(&self->negated_fields, 0); + + // Parse all of the S-expressions in the given string. + Stream stream = stream_new(source, source_len); + stream_skip_whitespace(&stream); + while (stream.input < stream.end) { + uint32_t pattern_index = self->patterns.size; + uint32_t start_step_index = self->steps.size; + uint32_t start_predicate_step_index = self->predicate_steps.size; + array_push(&self->patterns, ((QueryPattern) { + .steps = (Slice) {.offset = start_step_index}, + .predicate_steps = (Slice) {.offset = start_predicate_step_index}, + .start_byte = stream_offset(&stream), + .is_non_local = false, + })); + CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); + *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); + array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); + + QueryPattern *pattern = array_back(&self->patterns); + pattern->steps.length = self->steps.size - start_step_index; + pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; + + // If any pattern could not be parsed, then report the error information + // and terminate. + if (*error_type) { + if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; + *error_offset = stream_offset(&stream); + capture_quantifiers_delete(&capture_quantifiers); + ts_query_delete(self); + return NULL; + } + + // Maintain a list of capture quantifiers for each pattern + array_push(&self->capture_quantifiers, capture_quantifiers); + + // Maintain a map that can look up patterns for a given root symbol. + uint16_t wildcard_root_alternative_index = NONE; + for (;;) { + QueryStep *step = &self->steps.contents[start_step_index]; + + // If a pattern has a wildcard at its root, but it has a non-wildcard child, + // then optimize the matching process by skipping matching the wildcard. + // Later, during the matching process, the query cursor will check that + // there is a parent node, and capture it if necessary. + if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { + QueryStep *second_step = &self->steps.contents[start_step_index + 1]; + if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { + wildcard_root_alternative_index = step->alternative_index; + start_step_index += 1; + step = second_step; + } + } + + // Determine whether the pattern has a single root node. This affects + // decisions about whether or not to start matching the pattern when + // a query cursor has a range restriction or when immediately within an + // error node. + uint32_t start_depth = step->depth; + bool is_rooted = start_depth == 0; + for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { + QueryStep *child_step = &self->steps.contents[step_index]; + if (child_step->is_dead_end) break; + if (child_step->depth == start_depth) { + is_rooted = false; + break; + } + } + + ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { + .step_index = start_step_index, + .pattern_index = pattern_index, + .is_rooted = is_rooted + }); + if (step->symbol == WILDCARD_SYMBOL) { + self->wildcard_root_pattern_count++; + } + + // If there are alternatives or options at the root of the pattern, + // then add multiple entries to the pattern map. + if (step->alternative_index != NONE) { + start_step_index = step->alternative_index; + } else if (wildcard_root_alternative_index != NONE) { + start_step_index = wildcard_root_alternative_index; + wildcard_root_alternative_index = NONE; + } else { + break; + } + } + } + + if (!ts_query__analyze_patterns(self, error_offset)) { + *error_type = TSQueryErrorStructure; + ts_query_delete(self); + return NULL; + } + + array_delete(&self->string_buffer); + return self; +} + +void ts_query_delete(t_query *self) { + if (self) { + array_delete(&self->steps); + array_delete(&self->pattern_map); + array_delete(&self->predicate_steps); + array_delete(&self->patterns); + array_delete(&self->step_offsets); + array_delete(&self->string_buffer); + array_delete(&self->negated_fields); + array_delete(&self->repeat_symbols_with_rootless_patterns); + ts_language_delete(self->language); + symbol_table_delete(&self->captures); + symbol_table_delete(&self->predicate_values); + for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { + CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); + capture_quantifiers_delete(capture_quantifiers); + } + array_delete(&self->capture_quantifiers); + ts_free(self); + } +} + +uint32_t ts_query_pattern_count(const t_query *self) { + return self->patterns.size; +} + +uint32_t ts_query_capture_count(const t_query *self) { + return self->captures.slices.size; +} + +uint32_t ts_query_string_count(const t_query *self) { + return self->predicate_values.slices.size; +} + +const char *ts_query_capture_name_for_id( + const t_query *self, + uint32_t index, + uint32_t *length +) { + return symbol_table_name_for_id(&self->captures, index, length); +} + +t_quantifier ts_query_capture_quantifier_for_id( + const t_query *self, + uint32_t pattern_index, + uint32_t capture_index +) { + CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); + return capture_quantifier_for_id(capture_quantifiers, capture_index); +} + +const char *ts_query_string_value_for_id( + const t_query *self, + uint32_t index, + uint32_t *length +) { + return symbol_table_name_for_id(&self->predicate_values, index, length); +} + +const t_query_predicate_step *ts_query_predicates_for_pattern( + const t_query *self, + uint32_t pattern_index, + uint32_t *step_count +) { + Slice slice = self->patterns.contents[pattern_index].predicate_steps; + *step_count = slice.length; + if (self->predicate_steps.contents == NULL) { + return NULL; + } + return &self->predicate_steps.contents[slice.offset]; +} + +uint32_t ts_query_start_byte_for_pattern( + const t_query *self, + uint32_t pattern_index +) { + return self->patterns.contents[pattern_index].start_byte; +} + +bool ts_query_is_pattern_rooted( + const t_query *self, + uint32_t pattern_index +) { + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *entry = &self->pattern_map.contents[i]; + if (entry->pattern_index == pattern_index) { + if (!entry->is_rooted) return false; + } + } + return true; +} + +bool ts_query_is_pattern_non_local( + const t_query *self, + uint32_t pattern_index +) { + if (pattern_index < self->patterns.size) { + return self->patterns.contents[pattern_index].is_non_local; + } else { + return false; + } +} + +bool ts_query_is_pattern_guaranteed_at_step( + const t_query *self, + uint32_t byte_offset +) { + uint32_t step_index = UINT32_MAX; + for (unsigned i = 0; i < self->step_offsets.size; i++) { + StepOffset *step_offset = &self->step_offsets.contents[i]; + if (step_offset->byte_offset > byte_offset) break; + step_index = step_offset->step_index; + } + if (step_index < self->steps.size) { + return self->steps.contents[step_index].root_pattern_guaranteed; + } else { + return false; + } +} + +bool ts_query__step_is_fallible( + const t_query *self, + uint16_t step_index +) { + assert((uint32_t)step_index + 1 < self->steps.size); + QueryStep *step = &self->steps.contents[step_index]; + QueryStep *next_step = &self->steps.contents[step_index + 1]; + return ( + next_step->depth != PATTERN_DONE_MARKER && + next_step->depth > step->depth && + !next_step->parent_pattern_guaranteed + ); +} + +void ts_query_disable_capture( + t_query *self, + const char *name, + uint32_t length +) { + // Remove capture information for any pattern step that previously + // captured with the given name. + int id = symbol_table_id_for_name(&self->captures, name, length); + if (id != -1) { + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + query_step__remove_capture(step, id); + } + } +} + +void ts_query_disable_pattern( + t_query *self, + uint32_t pattern_index +) { + // Remove the given pattern from the pattern map. Its steps will still + // be in the `steps` array, but they will never be read. + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (pattern->pattern_index == pattern_index) { + array_erase(&self->pattern_map, i); + i--; + } + } +} + +/*************** + * QueryCursor + ***************/ + +t_query_cursor *ts_query_cursor_new(void) { + t_query_cursor *self = ts_malloc(sizeof(t_query_cursor)); + *self = (t_query_cursor) { + .did_exceed_match_limit = false, + .ascending = false, + .halted = false, + .states = array_new(), + .finished_states = array_new(), + .capture_list_pool = capture_list_pool_new(), + .start_byte = 0, + .end_byte = UINT32_MAX, + .start_point = {0, 0}, + .end_point = POINT_MAX, + .max_start_depth = UINT32_MAX, + }; + array_reserve(&self->states, 8); + array_reserve(&self->finished_states, 8); + return self; +} + +void ts_query_cursor_delete(t_query_cursor *self) { + array_delete(&self->states); + array_delete(&self->finished_states); + ts_tree_cursor_delete(&self->cursor); + capture_list_pool_delete(&self->capture_list_pool); + ts_free(self); +} + +bool ts_query_cursor_did_exceed_match_limit(const t_query_cursor *self) { + return self->did_exceed_match_limit; +} + +uint32_t ts_query_cursor_match_limit(const t_query_cursor *self) { + return self->capture_list_pool.max_capture_list_count; +} + +void ts_query_cursor_set_match_limit(t_query_cursor *self, uint32_t limit) { + self->capture_list_pool.max_capture_list_count = limit; +} + +#ifdef DEBUG_EXECUTE_QUERY +#define LOG(...) fprintf(stderr, __VA_ARGS__) +#else +#define LOG(...) +#endif + +void ts_query_cursor_exec( + t_query_cursor *self, + const t_query *query, + t_parse_node node +) { + if (query) { + LOG("query steps:\n"); + for (unsigned i = 0; i < query->steps.size; i++) { + QueryStep *step = &query->steps.contents[i]; + LOG(" %u: {", i); + if (step->depth == PATTERN_DONE_MARKER) { + LOG("DONE"); + } else if (step->is_dead_end) { + LOG("dead_end"); + } else if (step->is_pass_through) { + LOG("pass_through"); + } else if (step->symbol != WILDCARD_SYMBOL) { + LOG("symbol: %s", query->language->symbol_names[step->symbol]); + } else { + LOG("symbol: *"); + } + if (step->field) { + LOG(", field: %s", query->language->field_names[step->field]); + } + if (step->alternative_index != NONE) { + LOG(", alternative: %u", step->alternative_index); + } + LOG("},\n"); + } + } + + array_clear(&self->states); + array_clear(&self->finished_states); + ts_tree_cursor_reset(&self->cursor, node); + capture_list_pool_reset(&self->capture_list_pool); + self->on_visible_node = true; + self->next_state_id = 0; + self->depth = 0; + self->ascending = false; + self->halted = false; + self->query = query; + self->did_exceed_match_limit = false; +} + +void ts_query_cursor_set_byte_range( + t_query_cursor *self, + uint32_t start_byte, + uint32_t end_byte +) { + if (end_byte == 0) { + end_byte = UINT32_MAX; + } + self->start_byte = start_byte; + self->end_byte = end_byte; +} + +void ts_query_cursor_set_point_range( + t_query_cursor *self, + t_point start_point, + t_point end_point +) { + if (end_point.row == 0 && end_point.column == 0) { + end_point = POINT_MAX; + } + self->start_point = start_point; + self->end_point = end_point; +} + +// Search through all of the in-progress states, and find the captured +// node that occurs earliest in the document. +static bool ts_query_cursor__first_in_progress_capture( + t_query_cursor *self, + uint32_t *state_index, + uint32_t *byte_offset, + uint32_t *pattern_index, + bool *root_pattern_guaranteed +) { + bool result = false; + *state_index = UINT32_MAX; + *byte_offset = UINT32_MAX; + *pattern_index = UINT32_MAX; + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i]; + if (state->dead) continue; + + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + if (state->consumed_capture_count >= captures->size) { + continue; + } + + t_parse_node node = captures->contents[state->consumed_capture_count].node; + if ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ) { + state->consumed_capture_count++; + i--; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if ( + !result || + node_start_byte < *byte_offset || + (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) + ) { + QueryStep *step = &self->query->steps.contents[state->step_index]; + if (root_pattern_guaranteed) { + *root_pattern_guaranteed = step->root_pattern_guaranteed; + } else if (step->root_pattern_guaranteed) { + continue; + } + + result = true; + *state_index = i; + *byte_offset = node_start_byte; + *pattern_index = state->pattern_index; + } + } + return result; +} + +// Determine which node is first in a depth-first traversal +int ts_query_cursor__compare_nodes(t_parse_node left, t_parse_node right) { + if (left.id != right.id) { + uint32_t left_start = ts_node_start_byte(left); + uint32_t right_start = ts_node_start_byte(right); + if (left_start < right_start) return -1; + if (left_start > right_start) return 1; + uint32_t left_node_count = ts_node_end_byte(left); + uint32_t right_node_count = ts_node_end_byte(right); + if (left_node_count > right_node_count) return -1; + if (left_node_count < right_node_count) return 1; + } + return 0; +} + +// Determine if either state contains a superset of the other state's captures. +void ts_query_cursor__compare_captures( + t_query_cursor *self, + QueryState *left_state, + QueryState *right_state, + bool *left_contains_right, + bool *right_contains_left +) { + const CaptureList *left_captures = capture_list_pool_get( + &self->capture_list_pool, + left_state->capture_list_id + ); + const CaptureList *right_captures = capture_list_pool_get( + &self->capture_list_pool, + right_state->capture_list_id + ); + *left_contains_right = true; + *right_contains_left = true; + unsigned i = 0, j = 0; + for (;;) { + if (i < left_captures->size) { + if (j < right_captures->size) { + t_query_capture *left = &left_captures->contents[i]; + t_query_capture *right = &right_captures->contents[j]; + if (left->node.id == right->node.id && left->index == right->index) { + i++; + j++; + } else { + switch (ts_query_cursor__compare_nodes(left->node, right->node)) { + case -1: + *right_contains_left = false; + i++; + break; + case 1: + *left_contains_right = false; + j++; + break; + default: + *right_contains_left = false; + *left_contains_right = false; + i++; + j++; + break; + } + } + } else { + *right_contains_left = false; + break; + } + } else { + if (j < right_captures->size) { + *left_contains_right = false; + } + break; + } + } +} + +static void ts_query_cursor__add_state( + t_query_cursor *self, + const PatternEntry *pattern +) { + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + + // Keep the states array in ascending order of start_depth and pattern_index, + // so that it can be processed more efficiently elsewhere. Usually, there is + // no work to do here because of two facts: + // * States with lower start_depth are naturally added first due to the + // order in which nodes are visited. + // * Earlier patterns are naturally added first because of the ordering of the + // pattern_map data structure that's used to initiate matches. + // + // This loop is only needed in cases where two conditions hold: + // * A pattern consists of more than one sibling node, so that its states + // remain in progress after exiting the node that started the match. + // * The first node in the pattern matches against multiple nodes at the + // same depth. + // + // An example of this is the pattern '((comment)* (function))'. If multiple + // `comment` nodes appear in a row, then we may initiate a new state for this + // pattern while another state for the same pattern is already in progress. + // If there are multiple patterns like this in a query, then this loop will + // need to execute in order to keep the states ordered by pattern_index. + uint32_t index = self->states.size; + while (index > 0) { + QueryState *prev_state = &self->states.contents[index - 1]; + if (prev_state->start_depth < start_depth) break; + if (prev_state->start_depth == start_depth) { + // Avoid inserting an unnecessary duplicate state, which would be + // immediately pruned by the longest-match criteria. + if ( + prev_state->pattern_index == pattern->pattern_index && + prev_state->step_index == pattern->step_index + ) return; + if (prev_state->pattern_index <= pattern->pattern_index) break; + } + index--; + } + + LOG( + " start state. pattern:%u, step:%u\n", + pattern->pattern_index, + pattern->step_index + ); + array_insert(&self->states, index, ((QueryState) { + .id = UINT32_MAX, + .capture_list_id = NONE, + .step_index = pattern->step_index, + .pattern_index = pattern->pattern_index, + .start_depth = start_depth, + .consumed_capture_count = 0, + .seeking_immediate_match = true, + .has_in_progress_alternatives = false, + .needs_parent = step->depth == 1, + .dead = false, + })); +} + +// Acquire a capture list for this state. If there are no capture lists left in the +// pool, this will steal the capture list from another existing state, and mark that +// other state as 'dead'. +static CaptureList *ts_query_cursor__prepare_to_capture( + t_query_cursor *self, + QueryState *state, + unsigned state_index_to_preserve +) { + if (state->capture_list_id == NONE) { + state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); + + // If there are no capture lists left in the pool, then terminate whichever + // state has captured the earliest node in the document, and steal its + // capture list. + if (state->capture_list_id == NONE) { + self->did_exceed_match_limit = true; + uint32_t state_index, byte_offset, pattern_index; + if ( + ts_query_cursor__first_in_progress_capture( + self, + &state_index, + &byte_offset, + &pattern_index, + NULL + ) && + state_index != state_index_to_preserve + ) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + state_index, pattern_index, byte_offset + ); + QueryState *other_state = &self->states.contents[state_index]; + state->capture_list_id = other_state->capture_list_id; + other_state->capture_list_id = NONE; + other_state->dead = true; + CaptureList *list = capture_list_pool_get_mut( + &self->capture_list_pool, + state->capture_list_id + ); + array_clear(list); + return list; + } else { + LOG(" ran out of capture lists"); + return NULL; + } + } + } + return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); +} + +static void ts_query_cursor__capture( + t_query_cursor *self, + QueryState *state, + QueryStep *step, + t_parse_node node +) { + if (state->dead) return; + CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); + if (!capture_list) { + state->dead = true; + return; + } + + for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { + uint16_t capture_id = step->capture_ids[j]; + if (step->capture_ids[j] == NONE) break; + array_push(capture_list, ((t_query_capture) { node, capture_id })); + LOG( + " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", + ts_node_type(node), + state->pattern_index, + capture_id, + capture_list->size + ); + } +} + +// Duplicate the given state and insert the newly-created state immediately after +// the given state in the `states` array. Ensures that the given state reference is +// still valid, even if the states array is reallocated. +static QueryState *ts_query_cursor__copy_state( + t_query_cursor *self, + QueryState **state_ref +) { + const QueryState *state = *state_ref; + uint32_t state_index = (uint32_t)(state - self->states.contents); + QueryState copy = *state; + copy.capture_list_id = NONE; + + // If the state has captures, copy its capture list. + if (state->capture_list_id != NONE) { + CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); + if (!new_captures) return NULL; + const CaptureList *old_captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + array_push_all(new_captures, old_captures); + } + + array_insert(&self->states, state_index + 1, copy); + *state_ref = &self->states.contents[state_index]; + return &self->states.contents[state_index + 1]; +} + +static inline bool ts_query_cursor__should_descend( + t_query_cursor *self, + bool node_intersects_range +) { + + if (node_intersects_range && self->depth < self->max_start_depth) { + return true; + } + + // If there are in-progress matches whose remaining steps occur + // deeper in the tree, then descend. + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i];; + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if ( + next_step->depth != PATTERN_DONE_MARKER && + state->start_depth + next_step->depth > self->depth + ) { + return true; + } + } + + if (self->depth >= self->max_start_depth) { + return false; + } + + // If the current node is hidden, then a non-rooted pattern might match + // one if its roots inside of this node, and match another of its roots + // as part of a sibling node, so we may need to descend. + if (!self->on_visible_node) { + // Descending into a repetition node outside of the range can be + // expensive, because these nodes can have many visible children. + // Avoid descending into repetition nodes unless we have already + // determined that this query can match rootless patterns inside + // of this type of repetition node. + Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); + if (ts_subtree_is_repetition(subtree)) { + bool exists; + uint32_t index; + array_search_sorted_by( + &self->query->repeat_symbols_with_rootless_patterns,, + ts_subtree_symbol(subtree), + &index, + &exists + ); + return exists; + } + + return true; + } + + return false; +} + +// Walk the tree, processing patterns until at least one pattern finishes, +// If one or more patterns finish, return `true` and store their states in the +// `finished_states` array. Multiple patterns can finish on the same node. If +// there are no more matches, return `false`. +static inline bool ts_query_cursor__advance( + t_query_cursor *self, + bool stop_on_definite_step +) { + bool did_match = false; + for (;;) { + if (self->halted) { + while (self->states.size > 0) { + QueryState state = array_pop(&self->states); + capture_list_pool_release( + &self->capture_list_pool, + state.capture_list_id + ); + } + } + + if (did_match || self->halted) return did_match; + + // Exit the current node. + if (self->ascending) { + if (self->on_visible_node) { + LOG( + "leave node. depth:%u, type:%s\n", + self->depth, + ts_node_type(ts_tree_cursor_current_node(&self->cursor)) + ); + + // After leaving a node, remove any states that cannot make further progress. + uint32_t deleted_count = 0; + for (unsigned i = 0, n = self->states.size; i < n; i++) { + QueryState *state = &self->states.contents[i]; + QueryStep *step = &self->query->steps.contents[state->step_index]; + + // If a state completed its pattern inside of this node, but was deferred from finishing + // in order to search for longer matches, mark it as finished. + if ( + step->depth == PATTERN_DONE_MARKER && + (state->start_depth > self->depth || self->depth == 0) + ) { + LOG(" finish pattern %u\n", state->pattern_index); + array_push(&self->finished_states, *state); + did_match = true; + deleted_count++; + } + + // If a state needed to match something within this node, then remove that state + // as it has failed to match. + else if ( + step->depth != PATTERN_DONE_MARKER && + (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth + ) { + LOG( + " failed to match. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + deleted_count++; + } + + else if (deleted_count > 0) { + self->states.contents[i - deleted_count] = *state; + } + } + self->states.size -= deleted_count; + } + + // Leave this node by stepping to its next sibling or to its parent. + switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { + case TreeCursorStepVisible: + if (!self->on_visible_node) { + self->depth++; + self->on_visible_node = true; + } + self->ascending = false; + break; + case TreeCursorStepHidden: + if (self->on_visible_node) { + self->depth--; + self->on_visible_node = false; + } + self->ascending = false; + break; + default: + if (ts_tree_cursor_goto_parent(&self->cursor)) { + self->depth--; + } else { + LOG("halt at root\n"); + self->halted = true; + } + } + } + + // Enter a new node. + else { + // Get the properties of the current node. + t_parse_node node = ts_tree_cursor_current_node(&self->cursor); + t_parse_node parent_node = ts_tree_cursor_parent_node(&self->cursor); + bool parent_precedes_range = !ts_node_is_null(parent_node) && ( + ts_node_end_byte(parent_node) <= self->start_byte || + point_lte(ts_node_end_point(parent_node), self->start_point) + ); + bool parent_follows_range = !ts_node_is_null(parent_node) && ( + ts_node_start_byte(parent_node) >= self->end_byte || + point_gte(ts_node_start_point(parent_node), self->end_point) + ); + bool node_precedes_range = parent_precedes_range || ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ); + bool node_follows_range = parent_follows_range || ( + ts_node_start_byte(node) >= self->end_byte || + point_gte(ts_node_start_point(node), self->end_point) + ); + bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; + bool node_intersects_range = !node_precedes_range && !node_follows_range; + + if (self->on_visible_node) { + t_symbol symbol = ts_node_symbol(node); + bool is_named = ts_node_is_named(node); + bool has_later_siblings; + bool has_later_named_siblings; + bool can_have_later_siblings_with_this_field; + t_field_id field_id = 0; + t_symbol supertypes[8] = {0}; + unsigned supertype_count = 8; + ts_tree_cursor_current_status( + &self->cursor, + &field_id, + &has_later_siblings, + &has_later_named_siblings, + &can_have_later_siblings_with_this_field, + supertypes, + &supertype_count + ); + LOG( + "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", + self->depth, + ts_node_type(node), + ts_language_field_name_for_id(self->query->language, field_id), + ts_node_start_point(node).row, + self->states.size, + self->finished_states.size + ); + + bool node_is_error = symbol == ts_builtin_sym_error; + bool parent_is_error = + !ts_node_is_null(parent_node) && + ts_node_symbol(parent_node) == ts_builtin_sym_error; + + // Add new states for any patterns whose root node is a wildcard. + if (!node_is_error) { + for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + + // If this node matches the first step of the pattern, then add a new + // state at the start of this pattern. + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + if ( + (pattern->is_rooted ? + node_intersects_range : + (parent_intersects_range && !parent_is_error)) && + (!step->field || field_id == step->field) && + (!step->supertype_symbol || supertype_count > 0) && + (start_depth <= self->max_start_depth) + ) { + ts_query_cursor__add_state(self, pattern); + } + } + } + + // Add new states for any patterns whose root node matches this node. + unsigned i; + if (ts_query__pattern_map_search(self->query, symbol, &i)) { + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + do { + // If this node matches the first step of the pattern, then add a new + // state at the start of this pattern. + if ( + (pattern->is_rooted ? + node_intersects_range : + (parent_intersects_range && !parent_is_error)) && + (!step->field || field_id == step->field) && + (start_depth <= self->max_start_depth) + ) { + ts_query_cursor__add_state(self, pattern); + } + + // Advance to the next pattern whose root node matches this node. + i++; + if (i == self->query->pattern_map.size) break; + pattern = &self->query->pattern_map.contents[i]; + step = &self->query->steps.contents[pattern->step_index]; + } while (step->symbol == symbol); + } + + // Update all of the in-progress states with current node. + for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { + QueryState *state = &self->states.contents[j]; + QueryStep *step = &self->query->steps.contents[state->step_index]; + state->has_in_progress_alternatives = false; + copy_count = 0; + + // Check that the node matches all of the criteria for the next + // step of the pattern. + if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; + + // Determine if this node matches this step of the pattern, and also + // if this node can have later siblings that match this step of the + // pattern. + bool node_does_match = false; + if (step->symbol == WILDCARD_SYMBOL) { + node_does_match = !node_is_error && (is_named || !step->is_named); + } else { + node_does_match = symbol == step->symbol; + } + bool later_sibling_can_match = has_later_siblings; + if ((step->is_immediate && is_named) || state->seeking_immediate_match) { + later_sibling_can_match = false; + } + if (step->is_last_child && has_later_named_siblings) { + node_does_match = false; + } + if (step->supertype_symbol) { + bool has_supertype = false; + for (unsigned k = 0; k < supertype_count; k++) { + if (supertypes[k] == step->supertype_symbol) { + has_supertype = true; + break; + } + } + if (!has_supertype) node_does_match = false; + } + if (step->field) { + if (step->field == field_id) { + if (!can_have_later_siblings_with_this_field) { + later_sibling_can_match = false; + } + } else { + node_does_match = false; + } + } + + if (step->negated_field_list_id) { + t_field_id *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; + for (;;) { + t_field_id negated_field_id = *negated_field_ids; + if (negated_field_id) { + negated_field_ids++; + if (ts_node_child_by_field_id(node, negated_field_id).id) { + node_does_match = false; + break; + } + } else { + break; + } + } + } + + // Remove states immediately if it is ever clear that they cannot match. + if (!node_does_match) { + if (!later_sibling_can_match) { + LOG( + " discard state. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->states, j); + j--; + } + continue; + } + + // Some patterns can match their root node in multiple ways, capturing different + // children. If this pattern step could match later children within the same + // parent, then this query state cannot simply be updated in place. It must be + // split into two states: one that matches this node, and one which skips over + // this node, to preserve the possibility of matching later siblings. + if (later_sibling_can_match && ( + step->contains_captures || + ts_query__step_is_fallible(self->query, state->step_index) + )) { + if (ts_query_cursor__copy_state(self, &state)) { + LOG( + " split state for capture. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + copy_count++; + } + } + + // If this pattern started with a wildcard, such that the pattern map + // actually points to the *second* step of the pattern, then check + // that the node has a parent, and capture the parent node if necessary. + if (state->needs_parent) { + t_parse_node parent = ts_tree_cursor_parent_node(&self->cursor); + if (ts_node_is_null(parent)) { + LOG(" missing parent node\n"); + state->dead = true; + } else { + state->needs_parent = false; + QueryStep *skipped_wildcard_step = step; + do { + skipped_wildcard_step--; + } while ( + skipped_wildcard_step->is_dead_end || + skipped_wildcard_step->is_pass_through || + skipped_wildcard_step->depth > 0 + ); + if (skipped_wildcard_step->capture_ids[0] != NONE) { + LOG(" capture wildcard parent\n"); + ts_query_cursor__capture( + self, + state, + skipped_wildcard_step, + parent + ); + } + } + } + + // If the current node is captured in this pattern, add it to the capture list. + if (step->capture_ids[0] != NONE) { + ts_query_cursor__capture(self, state, step, node); + } + + if (state->dead) { + array_erase(&self->states, j); + j--; + continue; + } + + // Advance this state to the next step of its pattern. + state->step_index++; + state->seeking_immediate_match = false; + LOG( + " advance state. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); + + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; + + // If this state's next step has an alternative step, then copy the state in order + // to pursue both alternatives. The alternative step itself may have an alternative, + // so this is an interactive process. + unsigned end_index = j + 1; + for (unsigned k = j; k < end_index; k++) { + QueryState *child_state = &self->states.contents[k]; + QueryStep *child_step = &self->query->steps.contents[child_state->step_index]; + if (child_step->alternative_index != NONE) { + // A "dead-end" step exists only to add a non-sequential jump into the step sequence, + // via its alternative index. When a state reaches a dead-end step, it jumps straight + // to the step's alternative. + if (child_step->is_dead_end) { + child_state->step_index = child_step->alternative_index; + k--; + continue; + } + + // A "pass-through" step exists only to add a branch into the step sequence, + // via its alternative_index. When a state reaches a pass-through step, it splits + // in order to process the alternative step, and then it advances to the next step. + if (child_step->is_pass_through) { + child_state->step_index++; + k--; + } + + QueryState *copy = ts_query_cursor__copy_state(self, &child_state); + if (copy) { + LOG( + " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", + copy->pattern_index, + copy->step_index, + next_step->alternative_index, + next_step->alternative_is_immediate, + capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size + ); + end_index++; + copy_count++; + copy->step_index = child_step->alternative_index; + if (child_step->alternative_is_immediate) { + copy->seeking_immediate_match = true; + } + } + } + } + } + + for (unsigned j = 0; j < self->states.size; j++) { + QueryState *state = &self->states.contents[j]; + if (state->dead) { + array_erase(&self->states, j); + j--; + continue; + } + + // Enforce the longest-match criteria. When a query pattern contains optional or + // repeated nodes, this is necessary to avoid multiple redundant states, where + // one state has a strict subset of another state's captures. + bool did_remove = false; + for (unsigned k = j + 1; k < self->states.size; k++) { + QueryState *other_state = &self->states.contents[k]; + + // Query states are kept in ascending order of start_depth and pattern_index. + // Since the longest-match criteria is only used for deduping matches of the same + // pattern and root node, we only need to perform pairwise comparisons within a + // small slice of the states array. + if ( + other_state->start_depth != state->start_depth || + other_state->pattern_index != state->pattern_index + ) break; + + bool left_contains_right, right_contains_left; + ts_query_cursor__compare_captures( + self, + state, + other_state, + &left_contains_right, + &right_contains_left + ); + if (left_contains_right) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); + array_erase(&self->states, k); + k--; + continue; + } + other_state->has_in_progress_alternatives = true; + } + if (right_contains_left) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); + array_erase(&self->states, j); + j--; + did_remove = true; + break; + } + state->has_in_progress_alternatives = true; + } + } + + // If the state is at the end of its pattern, remove it from the list + // of in-progress states and add it to the list of finished states. + if (!did_remove) { + LOG( + " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", + state->pattern_index, + state->start_depth, + state->step_index, + capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size + ); + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (next_step->depth == PATTERN_DONE_MARKER) { + if (state->has_in_progress_alternatives) { + LOG(" defer finishing pattern %u\n", state->pattern_index); + } else { + LOG(" finish pattern %u\n", state->pattern_index); + array_push(&self->finished_states, *state); + array_erase(&self->states, (uint32_t)(state - self->states.contents)); + did_match = true; + j--; + } + } + } + } + } + + if (ts_query_cursor__should_descend(self, node_intersects_range)) { + switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { + case TreeCursorStepVisible: + self->depth++; + self->on_visible_node = true; + continue; + case TreeCursorStepHidden: + self->on_visible_node = false; + continue; + default: + break; + } + } + + self->ascending = true; + } + } +} + +bool ts_query_cursor_next_match( + t_query_cursor *self, + t_query_match *match +) { + if (self->finished_states.size == 0) { + if (!ts_query_cursor__advance(self, false)) { + return false; + } + } + + QueryState *state = &self->finished_states.contents[0]; + if (state->id == UINT32_MAX) state->id = self->next_state_id++; + match->id = state->id; + match->pattern_index = state->pattern_index; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + match->captures = captures->contents; + match->capture_count = captures->size; + capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); + array_erase(&self->finished_states, 0); + return true; +} + +void ts_query_cursor_remove_match( + t_query_cursor *self, + uint32_t match_id +) { + for (unsigned i = 0; i < self->finished_states.size; i++) { + const QueryState *state = &self->finished_states.contents[i]; + if (state->id == match_id) { + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->finished_states, i); + return; + } + } + + // Remove unfinished query states as well to prevent future + // captures for a match being removed. + for (unsigned i = 0; i < self->states.size; i++) { + const QueryState *state = &self->states.contents[i]; + if (state->id == match_id) { + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->states, i); + return; + } + } +} + +bool ts_query_cursor_next_capture( + t_query_cursor *self, + t_query_match *match, + uint32_t *capture_index +) { + // The goal here is to return captures in order, even though they may not + // be discovered in order, because patterns can overlap. Search for matches + // until there is a finished capture that is before any unfinished capture. + for (;;) { + // First, find the earliest capture in an unfinished match. + uint32_t first_unfinished_capture_byte; + uint32_t first_unfinished_pattern_index; + uint32_t first_unfinished_state_index; + bool first_unfinished_state_is_definite = false; + ts_query_cursor__first_in_progress_capture( + self, + &first_unfinished_state_index, + &first_unfinished_capture_byte, + &first_unfinished_pattern_index, + &first_unfinished_state_is_definite + ); + + // Then find the earliest capture in a finished match. It must occur + // before the first capture in an *unfinished* match. + QueryState *first_finished_state = NULL; + uint32_t first_finished_capture_byte = first_unfinished_capture_byte; + uint32_t first_finished_pattern_index = first_unfinished_pattern_index; + for (unsigned i = 0; i < self->finished_states.size;) { + QueryState *state = &self->finished_states.contents[i]; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + + // Remove states whose captures are all consumed. + if (state->consumed_capture_count >= captures->size) { + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->finished_states, i); + continue; + } + + t_parse_node node = captures->contents[state->consumed_capture_count].node; + + bool node_precedes_range = ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ); + bool node_follows_range = ( + ts_node_start_byte(node) >= self->end_byte || + point_gte(ts_node_start_point(node), self->end_point) + ); + bool node_outside_of_range = node_precedes_range || node_follows_range; + + // Skip captures that are outside of the cursor's range. + if (node_outside_of_range) { + state->consumed_capture_count++; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if ( + node_start_byte < first_finished_capture_byte || + ( + node_start_byte == first_finished_capture_byte && + state->pattern_index < first_finished_pattern_index + ) + ) { + first_finished_state = state; + first_finished_capture_byte = node_start_byte; + first_finished_pattern_index = state->pattern_index; + } + i++; + } + + // If there is finished capture that is clearly before any unfinished + // capture, then return its match, and its capture index. Internally + // record the fact that the capture has been 'consumed'. + QueryState *state; + if (first_finished_state) { + state = first_finished_state; + } else if (first_unfinished_state_is_definite) { + state = &self->states.contents[first_unfinished_state_index]; + } else { + state = NULL; + } + + if (state) { + if (state->id == UINT32_MAX) state->id = self->next_state_id++; + match->id = state->id; + match->pattern_index = state->pattern_index; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + match->captures = captures->contents; + match->capture_count = captures->size; + *capture_index = state->consumed_capture_count; + state->consumed_capture_count++; + return true; + } + + if (capture_list_pool_is_empty(&self->capture_list_pool)) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + first_unfinished_state_index, + first_unfinished_pattern_index, + first_unfinished_capture_byte + ); + capture_list_pool_release( + &self->capture_list_pool, + self->states.contents[first_unfinished_state_index].capture_list_id + ); + array_erase(&self->states, first_unfinished_state_index); + } + + // If there are no finished matches that are ready to be returned, then + // continue finding more matches. + if ( + !ts_query_cursor__advance(self, true) && + self->finished_states.size == 0 + ) return false; + } +} + +void ts_query_cursor_set_max_start_depth( + t_query_cursor *self, + uint32_t max_start_depth +) { + self->max_start_depth = max_start_depth; +} + +#undef LOG +#include "src/array.h" +#include "src/parser.h" + +#include +#include +#include +#include + +enum TokenType { + HEREDOC_START, + SIMPLE_HEREDOC_BODY, + HEREDOC_BODY_BEGINNING, + HEREDOC_CONTENT, + HEREDOC_END, + FILE_DESCRIPTOR, + EMPTY_VALUE, + CONCAT, + VARIABLE_NAME, + TEST_OPERATOR, + REGEX, + REGEX_NO_SLASH, + REGEX_NO_SPACE, + EXPANSION_WORD, + EXTGLOB_PATTERN, + BARE_DOLLAR, + BRACE_START, + IMMEDIATE_DOUBLE_HASH, + EXTERNAL_EXPANSION_SYM_HASH, + EXTERNAL_EXPANSION_SYM_BANG, + EXTERNAL_EXPANSION_SYM_EQUAL, + CLOSING_BRACE, + CLOSING_BRACKET, + HEREDOC_ARROW, + HEREDOC_ARROW_DASH, + NEWLINE, + OPENING_PAREN, + ESAC, + ERROR_RECOVERY, +}; + +typedef Array(char) String; + +typedef struct { + bool is_raw; + bool started; + bool allows_indent; + String delimiter; + String current_leading_word; +} Heredoc; + +#define heredoc_new() \ + { \ + .is_raw = false, \ + .started = false, \ + .allows_indent = false, \ + .delimiter = array_new(), \ + .current_leading_word = array_new(), \ + }; + +typedef struct { + uint8_t last_glob_paren_depth; + bool ext_was_in_double_quote; + bool ext_saw_outside_quote; + Array(Heredoc) heredocs; +} Scanner; + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static inline bool in_error_recovery(const bool *valid_symbols) { return valid_symbols[ERROR_RECOVERY]; } + +static inline void reset_string(String *string) { + if (string->size > 0) { + memset(string->contents, 0, string->size); + array_clear(string); + } +} + +static inline void reset_heredoc(Heredoc *heredoc) { + heredoc->is_raw = false; + heredoc->started = false; + heredoc->allows_indent = false; + reset_string(&heredoc->delimiter); +} + +static inline void reset(Scanner *scanner) { + for (uint32_t i = 0; i < scanner->heredocs.size; i++) { + reset_heredoc(array_get(&scanner->heredocs, i)); + } +} + +static unsigned serialize(Scanner *scanner, char *buffer) { + uint32_t size = 0; + + buffer[size++] = (char)scanner->last_glob_paren_depth; + buffer[size++] = (char)scanner->ext_was_in_double_quote; + buffer[size++] = (char)scanner->ext_saw_outside_quote; + buffer[size++] = (char)scanner->heredocs.size; + + for (uint32_t i = 0; i < scanner->heredocs.size; i++) { + Heredoc *heredoc = array_get(&scanner->heredocs, i); + if (heredoc->delimiter.size + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + return 0; + } + + buffer[size++] = (char)heredoc->is_raw; + buffer[size++] = (char)heredoc->started; + buffer[size++] = (char)heredoc->allows_indent; + + memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(uint32_t)); + size += sizeof(uint32_t); + if (heredoc->delimiter.size > 0) { + memcpy(&buffer[size], heredoc->delimiter.contents, heredoc->delimiter.size); + size += heredoc->delimiter.size; + } + } + return size; +} + +static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { + if (length == 0) { + reset(scanner); + } else { + uint32_t size = 0; + scanner->last_glob_paren_depth = buffer[size++]; + scanner->ext_was_in_double_quote = buffer[size++]; + scanner->ext_saw_outside_quote = buffer[size++]; + uint32_t heredoc_count = (unsigned char)buffer[size++]; + for (uint32_t i = 0; i < heredoc_count; i++) { + Heredoc *heredoc = NULL; + if (i < scanner->heredocs.size) { + heredoc = array_get(&scanner->heredocs, i); + } else { + Heredoc new_heredoc = heredoc_new(); + array_push(&scanner->heredocs, new_heredoc); + heredoc = array_back(&scanner->heredocs); + } + + heredoc->is_raw = buffer[size++]; + heredoc->started = buffer[size++]; + heredoc->allows_indent = buffer[size++]; + + memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t)); + size += sizeof(uint32_t); + array_reserve(&heredoc->delimiter, heredoc->delimiter.size); + + if (heredoc->delimiter.size > 0) { + memcpy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size); + size += heredoc->delimiter.size; + } + } + assert(size == length); + } +} + +/** + * Consume a "word" in POSIX parlance, and returns it unquoted. + * + * This is an approximate implementation that doesn't deal with any + * POSIX-mandated substitution, and assumes the default value for + * IFS. + */ +static bool advance_word(TSLexer *lexer, String *unquoted_word) { + bool empty = true; + + int32_t quote = 0; + if (lexer->lookahead == '\'' || lexer->lookahead == '"') { + quote = lexer->lookahead; + advance(lexer); + } + + while (lexer->lookahead && + !(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n' + : iswspace(lexer->lookahead))) { + if (lexer->lookahead == '\\') { + advance(lexer); + if (!lexer->lookahead) { + return false; + } + } + empty = false; + array_push(unquoted_word, lexer->lookahead); + advance(lexer); + } + array_push(unquoted_word, '\0'); + + if (quote && lexer->lookahead == quote) { + advance(lexer); + } + + return !empty; +} + +static inline bool scan_bare_dollar(TSLexer *lexer) { + while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer)) { + skip(lexer); + } + + if (lexer->lookahead == '$') { + advance(lexer); + lexer->result_symbol = BARE_DOLLAR; + lexer->mark_end(lexer); + return iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"'; + } + + return false; +} + +static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + lexer->result_symbol = HEREDOC_START; + heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\'; + + bool found_delimiter = advance_word(lexer, &heredoc->delimiter); + if (!found_delimiter) { + reset_string(&heredoc->delimiter); + return false; + } + return found_delimiter; +} + +static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer) { + reset_string(&heredoc->current_leading_word); + // Scan the first 'n' characters on this line, to see if they match the + // heredoc delimiter + int32_t size = 0; + if (heredoc->delimiter.size > 0) { + while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && + (int32_t)*array_get(&heredoc->delimiter, size) == lexer->lookahead && + heredoc->current_leading_word.size < heredoc->delimiter.size) { + array_push(&heredoc->current_leading_word, lexer->lookahead); + advance(lexer); + size++; + } + } + array_push(&heredoc->current_leading_word, '\0'); + return heredoc->delimiter.size == 0 + ? false + : strcmp(heredoc->current_leading_word.contents, heredoc->delimiter.contents) == 0; +} + +static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, enum TokenType middle_type, + enum TokenType end_type) { + bool did_advance = false; + Heredoc *heredoc = array_back(&scanner->heredocs); + + for (;;) { + switch (lexer->lookahead) { + case '\0': { + if (lexer->eof(lexer) && did_advance) { + reset_heredoc(heredoc); + lexer->result_symbol = end_type; + return true; + } + return false; + } + + case '\\': { + did_advance = true; + advance(lexer); + advance(lexer); + break; + } + + case '$': { + if (heredoc->is_raw) { + did_advance = true; + advance(lexer); + break; + } + if (did_advance) { + lexer->mark_end(lexer); + lexer->result_symbol = middle_type; + heredoc->started = true; + advance(lexer); + if (iswalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(') { + return true; + } + break; + } + if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0) { + lexer->result_symbol = middle_type; + heredoc->started = true; + return true; + } + return false; + } + + case '\n': { + if (!did_advance) { + skip(lexer); + } else { + advance(lexer); + } + did_advance = true; + if (heredoc->allows_indent) { + while (iswspace(lexer->lookahead)) { + advance(lexer); + } + } + lexer->result_symbol = heredoc->started ? middle_type : end_type; + lexer->mark_end(lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + if (lexer->result_symbol == HEREDOC_END) { + array_pop(&scanner->heredocs); + } + return true; + } + break; + } + + default: { + if (lexer->get_column(lexer) == 0) { + // an alternative is to check the starting column of the + // heredoc body and track that statefully + while (iswspace(lexer->lookahead)) { + if (did_advance) { + advance(lexer); + } else { + skip(lexer); + } + } + if (end_type != SIMPLE_HEREDOC_BODY) { + lexer->result_symbol = middle_type; + if (scan_heredoc_end_identifier(heredoc, lexer)) { + return true; + } + } + if (end_type == SIMPLE_HEREDOC_BODY) { + lexer->result_symbol = end_type; + lexer->mark_end(lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + return true; + } + } + } + did_advance = true; + advance(lexer); + break; + } + } + } +} + +static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { + if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) { + if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) || lexer->lookahead == '>' || + lexer->lookahead == '<' || lexer->lookahead == ')' || lexer->lookahead == '(' || + lexer->lookahead == ';' || lexer->lookahead == '&' || lexer->lookahead == '|' || + (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) || + (lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET]))) { + lexer->result_symbol = CONCAT; + // So for a`b`, we want to return a concat. We check if the + // 2nd backtick has whitespace after it, and if it does we + // return concat. + if (lexer->lookahead == '`') { + lexer->mark_end(lexer); + advance(lexer); + while (lexer->lookahead != '`' && !lexer->eof(lexer)) { + advance(lexer); + } + if (lexer->eof(lexer)) { + return false; + } + if (lexer->lookahead == '`') { + advance(lexer); + } + return iswspace(lexer->lookahead) || lexer->eof(lexer); + } + // strings w/ expansions that contains escaped quotes or + // backslashes need this to return a concat + if (lexer->lookahead == '\\') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\') { + return true; + } + if (lexer->eof(lexer)) { + return false; + } + } else { + return true; + } + } + if (iswspace(lexer->lookahead) && valid_symbols[CLOSING_BRACE] && !valid_symbols[EXPANSION_WORD]) { + lexer->result_symbol = CONCAT; + return true; + } + } + + if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols)) { + // advance two # and ensure not } after + if (lexer->lookahead == '#') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '#') { + advance(lexer); + if (lexer->lookahead != '}') { + lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; + lexer->mark_end(lexer); + return true; + } + } + } + } + + if (valid_symbols[EXTERNAL_EXPANSION_SYM_HASH] && !in_error_recovery(valid_symbols)) { + if (lexer->lookahead == '#' || lexer->lookahead == '=' || lexer->lookahead == '!') { + lexer->result_symbol = lexer->lookahead == '#' ? EXTERNAL_EXPANSION_SYM_HASH + : lexer->lookahead == '!' ? EXTERNAL_EXPANSION_SYM_BANG + : EXTERNAL_EXPANSION_SYM_EQUAL; + advance(lexer); + lexer->mark_end(lexer); + while (lexer->lookahead == '#' || lexer->lookahead == '=' || lexer->lookahead == '!') { + advance(lexer); + } + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (lexer->lookahead == '}') { + return true; + } + return false; + } + } + + if (valid_symbols[EMPTY_VALUE]) { + if (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&') { + lexer->result_symbol = EMPTY_VALUE; + return true; + } + } + + if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 && + !array_back(&scanner->heredocs)->started && !in_error_recovery(valid_symbols)) { + return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY); + } + + if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0) { + Heredoc *heredoc = array_back(&scanner->heredocs); + if (scan_heredoc_end_identifier(heredoc, lexer)) { + array_delete(&heredoc->current_leading_word); + array_delete(&heredoc->delimiter); + array_pop(&scanner->heredocs); + lexer->result_symbol = HEREDOC_END; + return true; + } + } + + if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started && + !in_error_recovery(valid_symbols)) { + return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END); + } + + if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && scanner->heredocs.size > 0) { + return scan_heredoc_start(array_back(&scanner->heredocs), lexer); + } + + if (valid_symbols[TEST_OPERATOR] && !valid_symbols[EXPANSION_WORD]) { + while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') { + skip(lexer); + } + + if (lexer->lookahead == '\\') { + if (valid_symbols[EXTGLOB_PATTERN]) { + goto extglob_pattern; + } + if (valid_symbols[REGEX_NO_SPACE]) { + goto regex; + } + skip(lexer); + + if (lexer->eof(lexer)) { + return false; + } + + if (lexer->lookahead == '\r') { + skip(lexer); + if (lexer->lookahead == '\n') { + skip(lexer); + } + } else if (lexer->lookahead == '\n') { + skip(lexer); + } else { + return false; + } + + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } + + if (lexer->lookahead == '\n' && !valid_symbols[NEWLINE]) { + skip(lexer); + + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } + + if (lexer->lookahead == '-') { + advance(lexer); + + bool advanced_once = false; + while (iswalpha(lexer->lookahead)) { + advanced_once = true; + advance(lexer); + } + + if (iswspace(lexer->lookahead) && advanced_once) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) { + if (valid_symbols[EXPANSION_WORD]) { + lexer->mark_end(lexer); + lexer->result_symbol = EXPANSION_WORD; + return true; + } + return false; + } + lexer->result_symbol = TEST_OPERATOR; + return true; + } + if (iswspace(lexer->lookahead) && valid_symbols[EXTGLOB_PATTERN]) { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) { + return true; + } + } + + if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && + !valid_symbols[REGEX_NO_SLASH] && !in_error_recovery(valid_symbols)) { + for (;;) { + if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || + (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && + !valid_symbols[EXPANSION_WORD]) { + skip(lexer); + } else if (lexer->lookahead == '\\') { + skip(lexer); + + if (lexer->eof(lexer)) { + lexer->mark_end(lexer); + lexer->result_symbol = VARIABLE_NAME; + return true; + } + + if (lexer->lookahead == '\r') { + skip(lexer); + } + if (lexer->lookahead == '\n') { + skip(lexer); + } else { + if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD]) { + goto expansion_word; + } + return false; + } + } else { + break; + } + } + + // no '*', '@', '?', '-', '$', '0', '_' + if (!valid_symbols[EXPANSION_WORD] && + (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || lexer->lookahead == '-' || + lexer->lookahead == '0' || lexer->lookahead == '_')) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || + lexer->lookahead == '-' || lexer->lookahead == '%' || lexer->lookahead == '#' || + lexer->lookahead == '/') { + return false; + } + if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead)) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') { + advance(lexer); + if (lexer->lookahead == '<') { + advance(lexer); + if (lexer->lookahead == '-') { + advance(lexer); + Heredoc heredoc = heredoc_new(); + heredoc.allows_indent = true; + array_push(&scanner->heredocs, heredoc); + lexer->result_symbol = HEREDOC_ARROW_DASH; + } else if (lexer->lookahead == '<' || lexer->lookahead == '=') { + return false; + } else { + Heredoc heredoc = heredoc_new(); + array_push(&scanner->heredocs, heredoc); + lexer->result_symbol = HEREDOC_ARROW; + } + return true; + } + return false; + } + + bool is_number = true; + if (iswdigit(lexer->lookahead)) { + advance(lexer); + } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') { + is_number = false; + advance(lexer); + } else { + if (lexer->lookahead == '{') { + goto brace_start; + } + if (valid_symbols[EXPANSION_WORD]) { + goto expansion_word; + } + if (valid_symbols[EXTGLOB_PATTERN]) { + goto extglob_pattern; + } + return false; + } + + for (;;) { + if (iswdigit(lexer->lookahead)) { + advance(lexer); + } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') { + is_number = false; + advance(lexer); + } else { + break; + } + } + + if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<')) { + lexer->result_symbol = FILE_DESCRIPTOR; + return true; + } + + if (valid_symbols[VARIABLE_NAME]) { + if (lexer->lookahead == '+') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '=' || lexer->lookahead == ':' || valid_symbols[CLOSING_BRACE]) { + lexer->result_symbol = VARIABLE_NAME; + return true; + } + return false; + } + if (lexer->lookahead == '/') { + return false; + } + if (lexer->lookahead == '=' || lexer->lookahead == '[' || + (lexer->lookahead == ':' && !valid_symbols[CLOSING_BRACE] && + !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable + // names for function words, only handling : for now? #235 + lexer->lookahead == '%' || + (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || + (lexer->lookahead == '-' && valid_symbols[CLOSING_BRACE])) { + lexer->mark_end(lexer); + lexer->result_symbol = VARIABLE_NAME; + return true; + } + + if (lexer->lookahead == '?') { + lexer->mark_end(lexer); + advance(lexer); + lexer->result_symbol = VARIABLE_NAME; + return iswalpha(lexer->lookahead); + } + } + + return false; + } + + if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) { + return true; + } + +regex: + if ((valid_symbols[REGEX] || valid_symbols[REGEX_NO_SLASH] || valid_symbols[REGEX_NO_SPACE]) && + !in_error_recovery(valid_symbols)) { + if (valid_symbols[REGEX] || valid_symbols[REGEX_NO_SPACE]) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } + + if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || + ((lexer->lookahead == '$' || lexer->lookahead == '\'') && valid_symbols[REGEX_NO_SLASH]) || + (lexer->lookahead == '\'' && valid_symbols[REGEX_NO_SPACE])) { + typedef struct { + bool done; + bool advanced_once; + bool found_non_alnumdollarunderdash; + bool last_was_escape; + bool in_single_quote; + uint32_t paren_depth; + uint32_t bracket_depth; + uint32_t brace_depth; + } State; + + if (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH]) { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '(') { + return false; + } + } + + lexer->mark_end(lexer); + + State state = {false, false, false, false, false, 0, 0, 0}; + while (!state.done) { + if (state.in_single_quote) { + if (lexer->lookahead == '\'') { + state.in_single_quote = false; + advance(lexer); + lexer->mark_end(lexer); + } + } + switch (lexer->lookahead) { + case '\\': + state.last_was_escape = true; + break; + case '\0': + return false; + case '(': + state.paren_depth++; + state.last_was_escape = false; + break; + case '[': + state.bracket_depth++; + state.last_was_escape = false; + break; + case '{': + if (!state.last_was_escape) { + state.brace_depth++; + } + state.last_was_escape = false; + break; + case ')': + if (state.paren_depth == 0) { + state.done = true; + } + state.paren_depth--; + state.last_was_escape = false; + break; + case ']': + if (state.bracket_depth == 0) { + state.done = true; + } + state.bracket_depth--; + state.last_was_escape = false; + break; + case '}': + if (state.brace_depth == 0) { + state.done = true; + } + state.brace_depth--; + state.last_was_escape = false; + break; + case '\'': + // Enter or exit a single-quoted string. + state.in_single_quote = !state.in_single_quote; + advance(lexer); + state.advanced_once = true; + state.last_was_escape = false; + continue; + default: + state.last_was_escape = false; + break; + } + + if (!state.done) { + if (valid_symbols[REGEX]) { + bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); + advance(lexer); + state.advanced_once = true; + if (!was_space || state.paren_depth > 0) { + lexer->mark_end(lexer); + } + } else if (valid_symbols[REGEX_NO_SLASH]) { + if (lexer->lookahead == '/') { + lexer->mark_end(lexer); + lexer->result_symbol = REGEX_NO_SLASH; + return state.advanced_once; + } + if (lexer->lookahead == '\\') { + advance(lexer); + state.advanced_once = true; + if (!lexer->eof(lexer) && lexer->lookahead != '[' && lexer->lookahead != '/') { + advance(lexer); + lexer->mark_end(lexer); + } + } else { + bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); + advance(lexer); + state.advanced_once = true; + if (!was_space) { + lexer->mark_end(lexer); + } + } + } else if (valid_symbols[REGEX_NO_SPACE]) { + if (lexer->lookahead == '\\') { + state.found_non_alnumdollarunderdash = true; + advance(lexer); + if (!lexer->eof(lexer)) { + advance(lexer); + } + } else if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + // do not parse a command + // substitution + if (lexer->lookahead == '(') { + return false; + } + // end $ always means regex, e.g. + // 99999999$ + if (iswspace(lexer->lookahead)) { + lexer->result_symbol = REGEX_NO_SPACE; + lexer->mark_end(lexer); + return true; + } + } else { + bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); + if (was_space && state.paren_depth == 0) { + lexer->mark_end(lexer); + lexer->result_symbol = REGEX_NO_SPACE; + return state.found_non_alnumdollarunderdash; + } + if (!iswalnum(lexer->lookahead) && lexer->lookahead != '$' && lexer->lookahead != '-' && + lexer->lookahead != '_') { + state.found_non_alnumdollarunderdash = true; + } + advance(lexer); + } + } + } + } + + lexer->result_symbol = valid_symbols[REGEX_NO_SLASH] ? REGEX_NO_SLASH + : valid_symbols[REGEX_NO_SPACE] ? REGEX_NO_SPACE + : REGEX; + if (valid_symbols[REGEX] && !state.advanced_once) { + return false; + } + return true; + } + } + +extglob_pattern: + if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) { + // first skip ws, then check for ? * + @ ! + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' || + lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' || + lexer->lookahead == '.' || lexer->lookahead == '[' || (iswalpha(lexer->lookahead))) { + if (lexer->lookahead == '\\') { + advance(lexer); + if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && + lexer->lookahead != '\n') { + advance(lexer); + } else { + return false; + } + } + + if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) { + lexer->mark_end(lexer); + advance(lexer); + + if (iswspace(lexer->lookahead)) { + return false; + } + } + + lexer->mark_end(lexer); + bool was_non_alpha = !iswalpha(lexer->lookahead); + if (lexer->lookahead != '[') { + // no esac + if (lexer->lookahead == 'e') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == 's') { + advance(lexer); + if (lexer->lookahead == 'a') { + advance(lexer); + if (lexer->lookahead == 'c') { + advance(lexer); + if (iswspace(lexer->lookahead)) { + return false; + } + } + } + } + } else { + advance(lexer); + } + } + + // -\w is just a word, find something else special + if (lexer->lookahead == '-') { + lexer->mark_end(lexer); + advance(lexer); + while (iswalnum(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.') { + return false; + } + lexer->mark_end(lexer); + } + + // case item -) or *) + if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) { + lexer->mark_end(lexer); + advance(lexer); + if (iswspace(lexer->lookahead)) { + lexer->result_symbol = EXTGLOB_PATTERN; + return was_non_alpha; + } + } + + if (iswspace(lexer->lookahead)) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return true; + } + + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || lexer->lookahead == '(') { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (lexer->lookahead == '|') { + lexer->mark_end(lexer); + advance(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + + if (!iswalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && + lexer->lookahead != '[' && lexer->lookahead != '?' && lexer->lookahead != '/' && + lexer->lookahead != '\\' && lexer->lookahead != '_' && lexer->lookahead != '*') { + return false; + } + + typedef struct { + bool done; + bool saw_non_alphadot; + uint32_t paren_depth; + uint32_t bracket_depth; + uint32_t brace_depth; + } State; + + State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0}; + while (!state.done) { + switch (lexer->lookahead) { + case '\0': + return false; + case '(': + state.paren_depth++; + break; + case '[': + state.bracket_depth++; + break; + case '{': + state.brace_depth++; + break; + case ')': + if (state.paren_depth == 0) { + state.done = true; + } + state.paren_depth--; + break; + case ']': + if (state.bracket_depth == 0) { + state.done = true; + } + state.bracket_depth--; + break; + case '}': + if (state.brace_depth == 0) { + state.done = true; + } + state.brace_depth--; + break; + } + + if (lexer->lookahead == '|') { + lexer->mark_end(lexer); + advance(lexer); + if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0) { + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (!state.done) { + bool was_space = iswspace(lexer->lookahead); + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { + state.saw_non_alphadot = true; + } + advance(lexer); + if (lexer->lookahead == '(' || lexer->lookahead == '{') { + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = state.paren_depth; + return state.saw_non_alphadot; + } + } + if (was_space) { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return state.saw_non_alphadot; + } + if (lexer->lookahead == '"') { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return state.saw_non_alphadot; + } + if (lexer->lookahead == '\\') { + if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { + state.saw_non_alphadot = true; + } + advance(lexer); + if (iswspace(lexer->lookahead) || lexer->lookahead == '"') { + advance(lexer); + } + } else { + if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { + state.saw_non_alphadot = true; + } + advance(lexer); + } + if (!was_space) { + lexer->mark_end(lexer); + } + } + } + + lexer->result_symbol = EXTGLOB_PATTERN; + scanner->last_glob_paren_depth = 0; + return state.saw_non_alphadot; + } + scanner->last_glob_paren_depth = 0; + + return false; + } + +expansion_word: + if (valid_symbols[EXPANSION_WORD]) { + bool advanced_once = false; + bool advance_once_space = false; + for (;;) { + if (lexer->lookahead == '\"') { + return false; + } + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || + iswalnum(lexer->lookahead)) { + lexer->result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } + + if (lexer->lookahead == '}') { + lexer->mark_end(lexer); + lexer->result_symbol = EXPANSION_WORD; + return advanced_once || advance_once_space; + } + + if (lexer->lookahead == '(' && !(advanced_once || advance_once_space)) { + lexer->mark_end(lexer); + advance(lexer); + while (lexer->lookahead != ')' && !lexer->eof(lexer)) { + // if we find a $( or ${ assume this is valid and is + // a garbage concatenation of some weird word + an + // expansion + // I wonder where this can fail + if (lexer->lookahead == '$') { + lexer->mark_end(lexer); + advance(lexer); + if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || + iswalnum(lexer->lookahead)) { + lexer->result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } else { + advanced_once = advanced_once || !iswspace(lexer->lookahead); + advance_once_space = advance_once_space || iswspace(lexer->lookahead); + advance(lexer); + } + } + lexer->mark_end(lexer); + if (lexer->lookahead == ')') { + advanced_once = true; + advance(lexer); + lexer->mark_end(lexer); + if (lexer->lookahead == '}') { + return false; + } + } else { + return false; + } + } + + if (lexer->lookahead == '\'') { + return false; + } + + if (lexer->eof(lexer)) { + return false; + } + advanced_once = advanced_once || !iswspace(lexer->lookahead); + advance_once_space = advance_once_space || iswspace(lexer->lookahead); + advance(lexer); + } + } + +brace_start: + if (valid_symbols[BRACE_START] && !in_error_recovery(valid_symbols)) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead != '{') { + return false; + } + + advance(lexer); + lexer->mark_end(lexer); + + while (isdigit(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead != '.') { + return false; + } + advance(lexer); + + if (lexer->lookahead != '.') { + return false; + } + advance(lexer); + + while (isdigit(lexer->lookahead)) { + advance(lexer); + } + + if (lexer->lookahead != '}') { + return false; + } + + lexer->result_symbol = BRACE_START; + return true; + } + + return false; +} + +void *tree_sitter_bash_external_scanner_create() { + Scanner *scanner = calloc(1, sizeof(Scanner)); + array_init(&scanner->heredocs); + return scanner; +} + +bool tree_sitter_bash_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + Scanner *scanner = (Scanner *)payload; + return scan(scanner, lexer, valid_symbols); +} + +unsigned tree_sitter_bash_external_scanner_serialize(void *payload, char *state) { + Scanner *scanner = (Scanner *)payload; + return serialize(scanner, state); +} + +void tree_sitter_bash_external_scanner_deserialize(void *payload, const char *state, unsigned length) { + Scanner *scanner = (Scanner *)payload; + deserialize(scanner, state, length); +} + +void tree_sitter_bash_external_scanner_destroy(void *payload) { + Scanner *scanner = (Scanner *)payload; + for (size_t i = 0; i < scanner->heredocs.size; i++) { + Heredoc *heredoc = array_get(&scanner->heredocs, i); + array_delete(&heredoc->current_leading_word); + array_delete(&heredoc->delimiter); + } + array_delete(&scanner->heredocs); + free(scanner); +} +#include "src/alloc.h" +#include "src/language.h" +#include "src/subtree.h" +#include "src/array.h" +#include "src/stack.h" +#include "src/length.h" +#include +#include +#include + +#define MAX_LINK_COUNT 8 +#define MAX_NODE_POOL_SIZE 50 +#define MAX_ITERATOR_COUNT 64 + +#if defined _WIN32 && !defined __GNUC__ +#define forceinline __forceinline +#else +#define forceinline static inline __attribute__((always_inline)) +#endif + +typedef struct StackNode StackNode; + +typedef struct { + StackNode *node; + Subtree subtree; + bool is_pending; +} StackLink; + +struct StackNode { + t_state_id state; + Length position; + StackLink links[MAX_LINK_COUNT]; + short unsigned int link_count; + uint32_t ref_count; + unsigned error_cost; + unsigned node_count; + int dynamic_precedence; +}; + +typedef struct { + StackNode *node; + SubtreeArray subtrees; + uint32_t subtree_count; + bool is_pending; +} StackIterator; + +typedef Array(StackNode *) StackNodeArray; + +typedef enum { + StackStatusActive, + StackStatusPaused, + StackStatusHalted, +} StackStatus; + +typedef struct { + StackNode *node; + StackSummary *summary; + unsigned node_count_at_last_error; + Subtree last_external_token; + Subtree lookahead_when_paused; + StackStatus status; +} StackHead; + +struct Stack { + Array(StackHead) heads; + StackSliceArray slices; + Array(StackIterator) iterators; + StackNodeArray node_pool; + StackNode *base_node; + SubtreePool *subtree_pool; +}; + +typedef unsigned StackAction; +enum { + StackActionNone, + StackActionStop = 1, + StackActionPop = 2, +}; + +typedef StackAction (*StackCallback)(void *, const StackIterator *); + +static void stack_node_retain(StackNode *self) { + if (!self) + return; + assert(self->ref_count > 0); + self->ref_count++; + assert(self->ref_count != 0); +} + +static void stack_node_release( + StackNode *self, + StackNodeArray *pool, + SubtreePool *subtree_pool +) { +recur: + assert(self->ref_count != 0); + self->ref_count--; + if (self->ref_count > 0) return; + + StackNode *first_predecessor = NULL; + if (self->link_count > 0) { + for (unsigned i = self->link_count - 1; i > 0; i--) { + StackLink link = self->links[i]; + if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); + stack_node_release(link.node, pool, subtree_pool); + } + StackLink link = self->links[0]; + if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); + first_predecessor = self->links[0].node; + } + + if (pool->size < MAX_NODE_POOL_SIZE) { + array_push(pool, self); + } else { + ts_free(self); + } + + if (first_predecessor) { + self = first_predecessor; + goto recur; + } +} + +/// Get the number of nodes in the subtree, for the purpose of measuring +/// how much progress has been made by a given version of the stack. +static uint32_t stack__subtree_node_count(Subtree subtree) { + uint32_t count = ts_subtree_visible_descendant_count(subtree); + if (ts_subtree_visible(subtree)) count++; + + // Count intermediate error nodes even though they are not visible, + // because a stack version's node count is used to check whether it + // has made any progress since the last time it encountered an error. + if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++; + + return count; +} + +static StackNode *stack_node_new( + StackNode *previous_node, + Subtree subtree, + bool is_pending, + t_state_id state, + StackNodeArray *pool +) { + StackNode *node = pool->size > 0 + ? array_pop(pool) + : ts_malloc(sizeof(StackNode)); + *node = (StackNode) { + .ref_count = 1, + .link_count = 0, + .state = state + }; + + if (previous_node) { + node->link_count = 1; + node->links[0] = (StackLink) { + .node = previous_node, + .subtree = subtree, + .is_pending = is_pending, + }; + + node->position = previous_node->position; + node->error_cost = previous_node->error_cost; + node->dynamic_precedence = previous_node->dynamic_precedence; + node->node_count = previous_node->node_count; + + if (subtree.ptr) { + node->error_cost += ts_subtree_error_cost(subtree); + node->position = length_add(node->position, ts_subtree_total_size(subtree)); + node->node_count += stack__subtree_node_count(subtree); + node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); + } + } else { + node->position = length_zero(); + node->error_cost = 0; + } + + return node; +} + +static bool stack__subtree_is_equivalent(Subtree left, Subtree right) { + if (left.ptr == right.ptr) return true; + if (!left.ptr || !right.ptr) return false; + + // Symbols must match + if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false; + + // If both have errors, don't bother keeping both. + if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true; + + return ( + ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && + ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && + ts_subtree_child_count(left) == ts_subtree_child_count(right) && + ts_subtree_extra(left) == ts_subtree_extra(right) && + ts_subtree_external_scanner_state_eq(left, right) + ); +} + +static void stack_node_add_link( + StackNode *self, + StackLink link, + SubtreePool *subtree_pool +) { + if (link.node == self) return; + + for (int i = 0; i < self->link_count; i++) { + StackLink *existing_link = &self->links[i]; + if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) { + // In general, we preserve ambiguities until they are removed from the stack + // during a pop operation where multiple paths lead to the same node. But in + // the special case where two links directly connect the same pair of nodes, + // we can safely remove the ambiguity ahead of time without changing behavior. + if (existing_link->node == link.node) { + if ( + ts_subtree_dynamic_precedence(link.subtree) > + ts_subtree_dynamic_precedence(existing_link->subtree) + ) { + ts_subtree_retain(link.subtree); + ts_subtree_release(subtree_pool, existing_link->subtree); + existing_link->subtree = link.subtree; + self->dynamic_precedence = + link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree); + } + return; + } + + // If the previous nodes are mergeable, merge them recursively. + if ( + existing_link->node->state == link.node->state && + existing_link->node->position.bytes == link.node->position.bytes && + existing_link->node->error_cost == link.node->error_cost + ) { + for (int j = 0; j < link.node->link_count; j++) { + stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); + } + int32_t dynamic_precedence = link.node->dynamic_precedence; + if (link.subtree.ptr) { + dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); + } + if (dynamic_precedence > self->dynamic_precedence) { + self->dynamic_precedence = dynamic_precedence; + } + return; + } + } + } + + if (self->link_count == MAX_LINK_COUNT) return; + + stack_node_retain(link.node); + unsigned node_count = link.node->node_count; + int dynamic_precedence = link.node->dynamic_precedence; + self->links[self->link_count++] = link; + + if (link.subtree.ptr) { + ts_subtree_retain(link.subtree); + node_count += stack__subtree_node_count(link.subtree); + dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); + } + + if (node_count > self->node_count) self->node_count = node_count; + if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence; +} + +static void stack_head_delete( + StackHead *self, + StackNodeArray *pool, + SubtreePool *subtree_pool +) { + if (self->node) { + if (self->last_external_token.ptr) { + ts_subtree_release(subtree_pool, self->last_external_token); + } + if (self->lookahead_when_paused.ptr) { + ts_subtree_release(subtree_pool, self->lookahead_when_paused); + } + if (self->summary) { + array_delete(self->summary); + ts_free(self->summary); + } + stack_node_release(self->node, pool, subtree_pool); + } +} + +static StackVersion ts_stack__add_version( + Stack *self, + StackVersion original_version, + StackNode *node +) { + StackHead head = { + .node = node, + .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, + .last_external_token = self->heads.contents[original_version].last_external_token, + .status = StackStatusActive, + .lookahead_when_paused = NULL_SUBTREE, + }; + array_push(&self->heads, head); + stack_node_retain(node); + if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token); + return (StackVersion)(self->heads.size - 1); +} + +static void ts_stack__add_slice( + Stack *self, + StackVersion original_version, + StackNode *node, + SubtreeArray *subtrees +) { + for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { + StackVersion version = self->slices.contents[i].version; + if (self->heads.contents[version].node == node) { + StackSlice slice = {*subtrees, version}; + array_insert(&self->slices, i + 1, slice); + return; + } + } + + StackVersion version = ts_stack__add_version(self, original_version, node); + StackSlice slice = { *subtrees, version }; + array_push(&self->slices, slice); +} + +static StackSliceArray stack__iter( + Stack *self, + StackVersion version, + StackCallback callback, + void *payload, + int goal_subtree_count +) { + array_clear(&self->slices); + array_clear(&self->iterators); + + StackHead *head = array_get(&self->heads, version); + StackIterator new_iterator = { + .node = head->node, + .subtrees = array_new(), + .subtree_count = 0, + .is_pending = true, + }; + + bool include_subtrees = false; + if (goal_subtree_count >= 0) { + include_subtrees = true; + array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); + } + + array_push(&self->iterators, new_iterator); + + while (self->iterators.size > 0) { + for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { + StackIterator *iterator = &self->iterators.contents[i]; + StackNode *node = iterator->node; + + StackAction action = callback(payload, iterator); + bool should_pop = action & StackActionPop; + bool should_stop = action & StackActionStop || node->link_count == 0; + + if (should_pop) { + SubtreeArray subtrees = iterator->subtrees; + if (!should_stop) { + ts_subtree_array_copy(subtrees, &subtrees); + } + ts_subtree_array_reverse(&subtrees); + ts_stack__add_slice( + self, + version, + node, + &subtrees + ); + } + + if (should_stop) { + if (!should_pop) { + ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); + } + array_erase(&self->iterators, i); + i--, size--; + continue; + } + + for (uint32_t j = 1; j <= node->link_count; j++) { + StackIterator *next_iterator; + StackLink link; + if (j == node->link_count) { + link = node->links[0]; + next_iterator = &self->iterators.contents[i]; + } else { + if (self->iterators.size >= MAX_ITERATOR_COUNT) continue; + link = node->links[j]; + StackIterator current_iterator = self->iterators.contents[i]; + array_push(&self->iterators, current_iterator); + next_iterator = array_back(&self->iterators); + ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); + } + + next_iterator->node = link.node; + if (link.subtree.ptr) { + if (include_subtrees) { + array_push(&next_iterator->subtrees, link.subtree); + ts_subtree_retain(link.subtree); + } + + if (!ts_subtree_extra(link.subtree)) { + next_iterator->subtree_count++; + if (!link.is_pending) { + next_iterator->is_pending = false; + } + } + } else { + next_iterator->subtree_count++; + next_iterator->is_pending = false; + } + } + } + } + + return self->slices; +} + +Stack *ts_stack_new(SubtreePool *subtree_pool) { + Stack *self = ts_calloc(1, sizeof(Stack)); + + array_init(&self->heads); + array_init(&self->slices); + array_init(&self->iterators); + array_init(&self->node_pool); + array_reserve(&self->heads, 4); + array_reserve(&self->slices, 4); + array_reserve(&self->iterators, 4); + array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); + + self->subtree_pool = subtree_pool; + self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); + ts_stack_clear(self); + + return self; +} + +void ts_stack_delete(Stack *self) { + if (self->slices.contents) + array_delete(&self->slices); + if (self->iterators.contents) + array_delete(&self->iterators); + stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); + for (uint32_t i = 0; i < self->heads.size; i++) { + stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); + } + array_clear(&self->heads); + if (self->node_pool.contents) { + for (uint32_t i = 0; i < self->node_pool.size; i++) + ts_free(self->node_pool.contents[i]); + array_delete(&self->node_pool); + } + array_delete(&self->heads); + ts_free(self); +} + +uint32_t ts_stack_version_count(const Stack *self) { + return self->heads.size; +} + +t_state_id ts_stack_state(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->node->state; +} + +Length ts_stack_position(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->node->position; +} + +Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->last_external_token; +} + +void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) { + StackHead *head = array_get(&self->heads, version); + if (token.ptr) ts_subtree_retain(token); + if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token); + head->last_external_token = token; +} + +unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { + StackHead *head = array_get(&self->heads, version); + unsigned result = head->node->error_cost; + if ( + head->status == StackStatusPaused || + (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) { + result += ERROR_COST_PER_RECOVERY; + } + return result; +} + +unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { + StackHead *head = array_get(&self->heads, version); + if (head->node->node_count < head->node_count_at_last_error) { + head->node_count_at_last_error = head->node->node_count; + } + return head->node->node_count - head->node_count_at_last_error; +} + +void ts_stack_push( + Stack *self, + StackVersion version, + Subtree subtree, + bool pending, + t_state_id state +) { + StackHead *head = array_get(&self->heads, version); + StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); + if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count; + head->node = new_node; +} + +forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { + unsigned *goal_subtree_count = payload; + if (iterator->subtree_count == *goal_subtree_count) { + return StackActionPop | StackActionStop; + } else { + return StackActionNone; + } +} + +StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { + return stack__iter(self, version, pop_count_callback, &count, (int)count); +} + +forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { + (void)payload; + if (iterator->subtree_count >= 1) { + if (iterator->is_pending) { + return StackActionPop | StackActionStop; + } else { + return StackActionStop; + } + } else { + return StackActionNone; + } +} + +StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { + StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0); + if (pop.size > 0) { + ts_stack_renumber_version(self, pop.contents[0].version, version); + pop.contents[0].version = version; + } + return pop; +} + +forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { + if (iterator->subtrees.size > 0) { + bool *found_error = payload; + if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) { + *found_error = true; + return StackActionPop | StackActionStop; + } else { + return StackActionStop; + } + } else { + return StackActionNone; + } +} + +SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { + StackNode *node = array_get(&self->heads, version)->node; + for (unsigned i = 0; i < node->link_count; i++) { + if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) { + bool found_error = false; + StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1); + if (pop.size > 0) { + assert(pop.size == 1); + ts_stack_renumber_version(self, pop.contents[0].version, version); + return pop.contents[0].subtrees; + } + break; + } + } + return (SubtreeArray) {.size = 0}; +} + +forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { + (void)payload; + return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; +} + +StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) { + return stack__iter(self, version, pop_all_callback, NULL, 0); +} + +typedef struct { + StackSummary *summary; + unsigned max_depth; +} SummarizeStackSession; + +forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { + SummarizeStackSession *session = payload; + t_state_id state = iterator->node->state; + unsigned depth = iterator->subtree_count; + if (depth > session->max_depth) return StackActionStop; + for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { + StackSummaryEntry entry = session->summary->contents[i]; + if (entry.depth < depth) break; + if (entry.depth == depth && entry.state == state) return StackActionNone; + } + array_push(session->summary, ((StackSummaryEntry) { + .position = iterator->node->position, + .depth = depth, + .state = state, + })); + return StackActionNone; +} + +void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { + SummarizeStackSession session = { + .summary = ts_malloc(sizeof(StackSummary)), + .max_depth = max_depth + }; + array_init(session.summary); + stack__iter(self, version, summarize_stack_callback, &session, -1); + StackHead *head = &self->heads.contents[version]; + if (head->summary) { + array_delete(head->summary); + ts_free(head->summary); + } + head->summary = session.summary; +} + +StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { + return array_get(&self->heads, version)->summary; +} + +int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { + return array_get(&self->heads, version)->node->dynamic_precedence; +} + +bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) { + const StackHead *head = array_get(&self->heads, version); + const StackNode *node = head->node; + if (node->error_cost == 0) return true; + while (node) { + if (node->link_count > 0) { + Subtree subtree = node->links[0].subtree; + if (subtree.ptr) { + if (ts_subtree_total_bytes(subtree) > 0) { + return true; + } else if ( + node->node_count > head->node_count_at_last_error && + ts_subtree_error_cost(subtree) == 0 + ) { + node = node->links[0].node; + continue; + } + } + } + break; + } + return false; +} + +void ts_stack_remove_version(Stack *self, StackVersion version) { + stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); + array_erase(&self->heads, version); +} + +void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { + if (v1 == v2) return; + assert(v2 < v1); + assert((uint32_t)v1 < self->heads.size); + StackHead *source_head = &self->heads.contents[v1]; + StackHead *target_head = &self->heads.contents[v2]; + if (target_head->summary && !source_head->summary) { + source_head->summary = target_head->summary; + target_head->summary = NULL; + } + stack_head_delete(target_head, &self->node_pool, self->subtree_pool); + *target_head = *source_head; + array_erase(&self->heads, v1); +} + +void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) { + StackHead temporary_head = self->heads.contents[v1]; + self->heads.contents[v1] = self->heads.contents[v2]; + self->heads.contents[v2] = temporary_head; +} + +StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { + assert(version < self->heads.size); + array_push(&self->heads, self->heads.contents[version]); + StackHead *head = array_back(&self->heads); + stack_node_retain(head->node); + if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token); + head->summary = NULL; + return self->heads.size - 1; +} + +bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { + if (!ts_stack_can_merge(self, version1, version2)) return false; + StackHead *head1 = &self->heads.contents[version1]; + StackHead *head2 = &self->heads.contents[version2]; + for (uint32_t i = 0; i < head2->node->link_count; i++) { + stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); + } + if (head1->node->state == ERROR_STATE) { + head1->node_count_at_last_error = head1->node->node_count; + } + ts_stack_remove_version(self, version2); + return true; +} + +bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) { + StackHead *head1 = &self->heads.contents[version1]; + StackHead *head2 = &self->heads.contents[version2]; + return + head1->status == StackStatusActive && + head2->status == StackStatusActive && + head1->node->state == head2->node->state && + head1->node->position.bytes == head2->node->position.bytes && + head1->node->error_cost == head2->node->error_cost && + ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token); +} + +void ts_stack_halt(Stack *self, StackVersion version) { + array_get(&self->heads, version)->status = StackStatusHalted; +} + +void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) { + StackHead *head = array_get(&self->heads, version); + head->status = StackStatusPaused; + head->lookahead_when_paused = lookahead; + head->node_count_at_last_error = head->node->node_count; +} + +bool ts_stack_is_active(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusActive; +} + +bool ts_stack_is_halted(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusHalted; +} + +bool ts_stack_is_paused(const Stack *self, StackVersion version) { + return array_get(&self->heads, version)->status == StackStatusPaused; +} + +Subtree ts_stack_resume(Stack *self, StackVersion version) { + StackHead *head = array_get(&self->heads, version); + assert(head->status == StackStatusPaused); + Subtree result = head->lookahead_when_paused; + head->status = StackStatusActive; + head->lookahead_when_paused = NULL_SUBTREE; + return result; +} + +void ts_stack_clear(Stack *self) { + stack_node_retain(self->base_node); + for (uint32_t i = 0; i < self->heads.size; i++) { + stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); + } + array_clear(&self->heads); + array_push(&self->heads, ((StackHead) { + .node = self->base_node, + .status = StackStatusActive, + .last_external_token = NULL_SUBTREE, + .lookahead_when_paused = NULL_SUBTREE, + })); +} + +bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) { + array_reserve(&self->iterators, 32); + if (!f) f = stderr; + + fprintf(f, "digraph stack {\n"); + fprintf(f, "rankdir=\"RL\";\n"); + fprintf(f, "edge [arrowhead=none]\n"); + + Array(StackNode *) visited_nodes = array_new(); + + array_clear(&self->iterators); + for (uint32_t i = 0; i < self->heads.size; i++) { + StackHead *head = &self->heads.contents[i]; + if (head->status == StackStatusHalted) continue; + + fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); + fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); + + if (head->status == StackStatusPaused) { + fprintf(f, "color=red "); + } + fprintf(f, + "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u", + i, + ts_stack_node_count_since_error(self, i), + ts_stack_error_cost(self, i) + ); + + if (head->summary) { + fprintf(f, "\nsummary:"); + for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state); + } + + if (head->last_external_token.ptr) { + const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; + const char *data = ts_external_scanner_state_data(state); + fprintf(f, "\nexternal_scanner_state:"); + for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); + } + + fprintf(f, "\"]\n"); + array_push(&self->iterators, ((StackIterator) { + .node = head->node + })); + } + + bool all_iterators_done = false; + while (!all_iterators_done) { + all_iterators_done = true; + + for (uint32_t i = 0; i < self->iterators.size; i++) { + StackIterator iterator = self->iterators.contents[i]; + StackNode *node = iterator.node; + + for (uint32_t j = 0; j < visited_nodes.size; j++) { + if (visited_nodes.contents[j] == node) { + node = NULL; + break; + } + } + + if (!node) continue; + all_iterators_done = false; + + fprintf(f, "node_%p [", (void *)node); + if (node->state == ERROR_STATE) { + fprintf(f, "label=\"?\""); + } else if ( + node->link_count == 1 && + node->links[0].subtree.ptr && + ts_subtree_extra(node->links[0].subtree) + ) { + fprintf(f, "shape=point margin=0 label=\"\""); + } else { + fprintf(f, "label=\"%d\"", node->state); + } + + fprintf( + f, + " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", + node->position.extent.row + 1, + node->position.extent.column, + node->node_count, + node->error_cost, + node->dynamic_precedence + ); + + for (int j = 0; j < node->link_count; j++) { + StackLink link = node->links[j]; + fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); + if (link.is_pending) fprintf(f, "style=dashed "); + if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray "); + + if (!link.subtree.ptr) { + fprintf(f, "color=red"); + } else { + fprintf(f, "label=\""); + bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree); + if (quoted) fprintf(f, "'"); + ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree)); + if (quoted) fprintf(f, "'"); + fprintf(f, "\""); + fprintf( + f, + "labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"", + ts_subtree_error_cost(link.subtree), + ts_subtree_dynamic_precedence(link.subtree) + ); + } + + fprintf(f, "];\n"); + + StackIterator *next_iterator; + if (j == 0) { + next_iterator = &self->iterators.contents[i]; + } else { + array_push(&self->iterators, iterator); + next_iterator = array_back(&self->iterators); + } + next_iterator->node = link.node; + } + + array_push(&visited_nodes, node); + } + } + + fprintf(f, "}\n"); + + array_delete(&visited_nodes); + return true; +} + +#undef forceinline +#include +#include +#include +#include +#include +#include +#include "src/alloc.h" +#include "src/array.h" +#include "src/atomic.h" +#include "src/subtree.h" +#include "src/length.h" +#include "src/language.h" +#include "src/error_costs.h" +#include + +typedef struct { + Length start; + Length old_end; + Length new_end; +} Edit; + +#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX +#define TS_MAX_TREE_POOL_SIZE 32 + +// ExternalScannerState + +void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { + self->length = length; + if (length > sizeof(self->short_data)) { + self->long_data = ts_malloc(length); + memcpy(self->long_data, data, length); + } else { + memcpy(self->short_data, data, length); + } +} + +ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) { + ExternalScannerState result = *self; + if (self->length > sizeof(self->short_data)) { + result.long_data = ts_malloc(self->length); + memcpy(result.long_data, self->long_data, self->length); + } + return result; +} + +void ts_external_scanner_state_delete(ExternalScannerState *self) { + if (self->length > sizeof(self->short_data)) { + ts_free(self->long_data); + } +} + +const char *ts_external_scanner_state_data(const ExternalScannerState *self) { + if (self->length > sizeof(self->short_data)) { + return self->long_data; + } else { + return self->short_data; + } +} + +bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) { + return + self->length == length && + memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; +} + +// SubtreeArray + +void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { + dest->size = self.size; + dest->capacity = self.capacity; + dest->contents = self.contents; + if (self.capacity > 0) { + dest->contents = ts_calloc(self.capacity, sizeof(Subtree)); + memcpy(dest->contents, self.contents, self.size * sizeof(Subtree)); + for (uint32_t i = 0; i < self.size; i++) { + ts_subtree_retain(dest->contents[i]); + } + } +} + +void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { + for (uint32_t i = 0; i < self->size; i++) { + ts_subtree_release(pool, self->contents[i]); + } + array_clear(self); +} + +void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { + ts_subtree_array_clear(pool, self); + array_delete(self); +} + +void ts_subtree_array_remove_trailing_extras( + SubtreeArray *self, + SubtreeArray *destination +) { + array_clear(destination); + while (self->size > 0) { + Subtree last = self->contents[self->size - 1]; + if (ts_subtree_extra(last)) { + self->size--; + array_push(destination, last); + } else { + break; + } + } + ts_subtree_array_reverse(destination); +} + +void ts_subtree_array_reverse(SubtreeArray *self) { + for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) { + size_t reverse_index = self->size - 1 - i; + Subtree swap = self->contents[i]; + self->contents[i] = self->contents[reverse_index]; + self->contents[reverse_index] = swap; + } +} + +// SubtreePool + +SubtreePool ts_subtree_pool_new(uint32_t capacity) { + SubtreePool self = {array_new(), array_new()}; + array_reserve(&self.free_trees, capacity); + return self; +} + +void ts_subtree_pool_delete(SubtreePool *self) { + if (self->free_trees.contents) { + for (unsigned i = 0; i < self->free_trees.size; i++) { + ts_free(self->free_trees.contents[i].ptr); + } + array_delete(&self->free_trees); + } + if (self->tree_stack.contents) array_delete(&self->tree_stack); +} + +static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) { + if (self->free_trees.size > 0) { + return array_pop(&self->free_trees).ptr; + } else { + return ts_malloc(sizeof(SubtreeHeapData)); + } +} + +static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) { + if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) { + array_push(&self->free_trees, (MutableSubtree) {.ptr = tree}); + } else { + ts_free(tree); + } +} + +// Subtree + +static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) { + return + padding.bytes < TS_MAX_INLINE_TREE_LENGTH && + padding.extent.row < 16 && + padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && + size.extent.row == 0 && + size.extent.column < TS_MAX_INLINE_TREE_LENGTH && + lookahead_bytes < 16; +} + +Subtree ts_subtree_new_leaf( + SubtreePool *pool, t_symbol symbol, Length padding, Length size, + uint32_t lookahead_bytes, t_state_id parse_state, + bool has_external_tokens, bool depends_on_column, + bool is_keyword, const t_language *language +) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + bool extra = symbol == ts_builtin_sym_end; + + bool is_inline = ( + symbol <= UINT8_MAX && + !has_external_tokens && + ts_subtree_can_inline(padding, size, lookahead_bytes) + ); + + if (is_inline) { + return (Subtree) {{ + .parse_state = parse_state, + .symbol = symbol, + .padding_bytes = padding.bytes, + .padding_rows = padding.extent.row, + .padding_columns = padding.extent.column, + .size_bytes = size.bytes, + .lookahead_bytes = lookahead_bytes, + .visible = metadata.visible, + .named = metadata.named, + .extra = extra, + .has_changes = false, + .is_missing = false, + .is_keyword = is_keyword, + .is_inline = true, + }}; + } else { + SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + *data = (SubtreeHeapData) { + .ref_count = 1, + .padding = padding, + .size = size, + .lookahead_bytes = lookahead_bytes, + .error_cost = 0, + .child_count = 0, + .symbol = symbol, + .parse_state = parse_state, + .visible = metadata.visible, + .named = metadata.named, + .extra = extra, + .fragile_left = false, + .fragile_right = false, + .has_changes = false, + .has_external_tokens = has_external_tokens, + .has_external_scanner_state_change = false, + .depends_on_column = depends_on_column, + .is_missing = false, + .is_keyword = is_keyword, + {{.first_leaf = {.symbol = 0, .parse_state = 0}}} + }; + return (Subtree) {.ptr = data}; + } +} + +void ts_subtree_set_symbol( + MutableSubtree *self, + t_symbol symbol, + const t_language *language +) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + if (self->data.is_inline) { + assert(symbol < UINT8_MAX); + self->data.symbol = symbol; + self->data.named = metadata.named; + self->data.visible = metadata.visible; + } else { + self->ptr->symbol = symbol; + self->ptr->named = metadata.named; + self->ptr->visible = metadata.visible; + } +} + +Subtree ts_subtree_new_error( + SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, + uint32_t bytes_scanned, t_state_id parse_state, const t_language *language +) { + Subtree result = ts_subtree_new_leaf( + pool, ts_builtin_sym_error, padding, size, bytes_scanned, + parse_state, false, false, false, language + ); + SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; + data->fragile_left = true; + data->fragile_right = true; + data->lookahead_char = lookahead_char; + return result; +} + +// Clone a subtree. +MutableSubtree ts_subtree_clone(Subtree self) { + size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); + Subtree *new_children = ts_malloc(alloc_size); + Subtree *old_children = ts_subtree_children(self); + memcpy(new_children, old_children, alloc_size); + SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count]; + if (self.ptr->child_count > 0) { + for (uint32_t i = 0; i < self.ptr->child_count; i++) { + ts_subtree_retain(new_children[i]); + } + } else if (self.ptr->has_external_tokens) { + result->external_scanner_state = ts_external_scanner_state_copy( + &self.ptr->external_scanner_state + ); + } + result->ref_count = 1; + return (MutableSubtree) {.ptr = result}; +} + +// Get mutable version of a subtree. +// +// This takes ownership of the subtree. If the subtree has only one owner, +// this will directly convert it into a mutable version. Otherwise, it will +// perform a copy. +MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { + if (self.data.is_inline) return (MutableSubtree) {self.data}; + if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); + MutableSubtree result = ts_subtree_clone(self); + ts_subtree_release(pool, self); + return result; +} + +static void ts_subtree__compress( + MutableSubtree self, + unsigned count, + const t_language *language, + MutableSubtreeArray *stack +) { + unsigned initial_stack_size = stack->size; + + MutableSubtree tree = self; + t_symbol symbol = tree.ptr->symbol; + for (unsigned i = 0; i < count; i++) { + if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; + + MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); + if ( + child.data.is_inline || + child.ptr->child_count < 2 || + child.ptr->ref_count > 1 || + child.ptr->symbol != symbol + ) break; + + MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); + if ( + grandchild.data.is_inline || + grandchild.ptr->child_count < 2 || + grandchild.ptr->ref_count > 1 || + grandchild.ptr->symbol != symbol + ) break; + + ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); + ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; + ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); + array_push(stack, tree); + tree = grandchild; + } + + while (stack->size > initial_stack_size) { + tree = array_pop(stack); + MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); + MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]); + ts_subtree_summarize_children(grandchild, language); + ts_subtree_summarize_children(child, language); + ts_subtree_summarize_children(tree, language); + } +} + +void ts_subtree_balance(Subtree self, SubtreePool *pool, const t_language *language) { + array_clear(&pool->tree_stack); + + if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); + } + + while (pool->tree_stack.size > 0) { + MutableSubtree tree = array_pop(&pool->tree_stack); + + if (tree.ptr->repeat_depth > 0) { + Subtree child1 = ts_subtree_children(tree)[0]; + Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; + long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); + if (repeat_delta > 0) { + unsigned n = (unsigned)repeat_delta; + for (unsigned i = n / 2; i > 0; i /= 2) { + ts_subtree__compress(tree, i, language, &pool->tree_stack); + n -= i; + } + } + } + + for (uint32_t i = 0; i < tree.ptr->child_count; i++) { + Subtree child = ts_subtree_children(tree)[i]; + if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); + } + } + } +} + +// Assign all of the node's properties that depend on its children. +void ts_subtree_summarize_children( + MutableSubtree self, + const t_language *language +) { + assert(!self.data.is_inline); + + self.ptr->named_child_count = 0; + self.ptr->visible_child_count = 0; + self.ptr->error_cost = 0; + self.ptr->repeat_depth = 0; + self.ptr->visible_descendant_count = 0; + self.ptr->has_external_tokens = false; + self.ptr->depends_on_column = false; + self.ptr->has_external_scanner_state_change = false; + self.ptr->dynamic_precedence = 0; + + uint32_t structural_index = 0; + const t_symbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + uint32_t lookahead_end_byte = 0; + + const Subtree *children = ts_subtree_children(self); + for (uint32_t i = 0; i < self.ptr->child_count; i++) { + Subtree child = children[i]; + + if ( + self.ptr->size.extent.row == 0 && + ts_subtree_depends_on_column(child) + ) { + self.ptr->depends_on_column = true; + } + + if (ts_subtree_has_external_scanner_state_change(child)) { + self.ptr->has_external_scanner_state_change = true; + } + + if (i == 0) { + self.ptr->padding = ts_subtree_padding(child); + self.ptr->size = ts_subtree_size(child); + } else { + self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child)); + } + + uint32_t child_lookahead_end_byte = + self.ptr->padding.bytes + + self.ptr->size.bytes + + ts_subtree_lookahead_bytes(child); + if (child_lookahead_end_byte > lookahead_end_byte) { + lookahead_end_byte = child_lookahead_end_byte; + } + + if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) { + self.ptr->error_cost += ts_subtree_error_cost(child); + } + + uint32_t grandchild_count = ts_subtree_child_count(child); + if ( + self.ptr->symbol == ts_builtin_sym_error || + self.ptr->symbol == ts_builtin_sym_error_repeat + ) { + if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { + if (ts_subtree_visible(child)) { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; + } else if (grandchild_count > 0) { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; + } + } + } + + self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); + self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child); + + if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) { + self.ptr->visible_descendant_count++; + self.ptr->visible_child_count++; + if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) { + self.ptr->named_child_count++; + } + } else if (ts_subtree_visible(child)) { + self.ptr->visible_descendant_count++; + self.ptr->visible_child_count++; + if (ts_subtree_named(child)) self.ptr->named_child_count++; + } else if (grandchild_count > 0) { + self.ptr->visible_child_count += child.ptr->visible_child_count; + self.ptr->named_child_count += child.ptr->named_child_count; + } + + if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true; + + if (ts_subtree_is_error(child)) { + self.ptr->fragile_left = self.ptr->fragile_right = true; + self.ptr->parse_state = TS_TREE_STATE_NONE; + } + + if (!ts_subtree_extra(child)) structural_index++; + } + + self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; + + if ( + self.ptr->symbol == ts_builtin_sym_error || + self.ptr->symbol == ts_builtin_sym_error_repeat + ) { + self.ptr->error_cost += + ERROR_COST_PER_RECOVERY + + ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + + ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; + } + + if (self.ptr->child_count > 0) { + Subtree first_child = children[0]; + Subtree last_child = children[self.ptr->child_count - 1]; + + self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); + self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child); + + if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true; + if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true; + + if ( + self.ptr->child_count >= 2 && + !self.ptr->visible && + !self.ptr->named && + ts_subtree_symbol(first_child) == self.ptr->symbol + ) { + if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) { + self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1; + } else { + self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1; + } + } + } +} + +// Create a new parent node with the given children. +// +// This takes ownership of the children array. +MutableSubtree ts_subtree_new_node( + t_symbol symbol, + SubtreeArray *children, + unsigned production_id, + const t_language *language +) { + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; + + // Allocate the node's data at the end of the array of children. + size_t new_byte_size = ts_subtree_alloc_size(children->size); + if (children->capacity * sizeof(Subtree) < new_byte_size) { + children->contents = ts_realloc(children->contents, new_byte_size); + children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); + } + SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; + + *data = (SubtreeHeapData) { + .ref_count = 1, + .symbol = symbol, + .child_count = children->size, + .visible = metadata.visible, + .named = metadata.named, + .has_changes = false, + .has_external_scanner_state_change = false, + .fragile_left = fragile, + .fragile_right = fragile, + .is_keyword = false, + {{ + .visible_descendant_count = 0, + .production_id = production_id, + .first_leaf = {.symbol = 0, .parse_state = 0}, + }} + }; + MutableSubtree result = {.ptr = data}; + ts_subtree_summarize_children(result, language); + return result; +} + +// Create a new error node containing the given children. +// +// This node is treated as 'extra'. Its children are prevented from having +// having any effect on the parse state. +Subtree ts_subtree_new_error_node( + SubtreeArray *children, + bool extra, + const t_language *language +) { + MutableSubtree result = ts_subtree_new_node( + ts_builtin_sym_error, children, 0, language + ); + result.ptr->extra = extra; + return ts_subtree_from_mut(result); +} + +// Create a new 'missing leaf' node. +// +// This node is treated as 'extra'. Its children are prevented from having +// having any effect on the parse state. +Subtree ts_subtree_new_missing_leaf( + SubtreePool *pool, + t_symbol symbol, + Length padding, + uint32_t lookahead_bytes, + const t_language *language +) { + Subtree result = ts_subtree_new_leaf( + pool, symbol, padding, length_zero(), lookahead_bytes, + 0, false, false, false, language + ); + if (result.data.is_inline) { + result.data.is_missing = true; + } else { + ((SubtreeHeapData *)result.ptr)->is_missing = true; + } + return result; +} + +void ts_subtree_retain(Subtree self) { + if (self.data.is_inline) return; + assert(self.ptr->ref_count > 0); + atomic_inc((volatile uint32_t *)&self.ptr->ref_count); + assert(self.ptr->ref_count != 0); +} + +void ts_subtree_release(SubtreePool *pool, Subtree self) { + if (self.data.is_inline) return; + array_clear(&pool->tree_stack); + + assert(self.ptr->ref_count > 0); + if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); + } + + while (pool->tree_stack.size > 0) { + MutableSubtree tree = array_pop(&pool->tree_stack); + if (tree.ptr->child_count > 0) { + Subtree *children = ts_subtree_children(tree); + for (uint32_t i = 0; i < tree.ptr->child_count; i++) { + Subtree child = children[i]; + if (child.data.is_inline) continue; + assert(child.ptr->ref_count > 0); + if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); + } + } + ts_free(children); + } else { + if (tree.ptr->has_external_tokens) { + ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); + } + ts_subtree_pool_free(pool, tree.ptr); + } + } +} + +int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left)); + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right)); + + while (pool->tree_stack.size > 0) { + right = ts_subtree_from_mut(array_pop(&pool->tree_stack)); + left = ts_subtree_from_mut(array_pop(&pool->tree_stack)); + + int result = 0; + if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) result = -1; + else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) result = 1; + else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) result = -1; + else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) result = 1; + if (result != 0) { + array_clear(&pool->tree_stack); + return result; + } + + for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) { + Subtree left_child = ts_subtree_children(left)[i - 1]; + Subtree right_child = ts_subtree_children(right)[i - 1]; + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child)); + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child)); + } + } + + return 0; +} + +static inline void ts_subtree_set_has_changes(MutableSubtree *self) { + if (self->data.is_inline) { + self->data.has_changes = true; + } else { + self->ptr->has_changes = true; + } +} + +Subtree ts_subtree_edit(Subtree self, const t_input_edit *input_edit, SubtreePool *pool) { + typedef struct { + Subtree *tree; + Edit edit; + } EditEntry; + + Array(EditEntry) stack = array_new(); + array_push(&stack, ((EditEntry) { + .tree = &self, + .edit = (Edit) { + .start = {input_edit->start_byte, input_edit->start_point}, + .old_end = {input_edit->old_end_byte, input_edit->old_end_point}, + .new_end = {input_edit->new_end_byte, input_edit->new_end_point}, + }, + })); + + while (stack.size) { + EditEntry entry = array_pop(&stack); + Edit edit = entry.edit; + bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; + bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; + bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); + + Length size = ts_subtree_size(*entry.tree); + Length padding = ts_subtree_padding(*entry.tree); + Length total_size = length_add(padding, size); + uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); + uint32_t end_byte = total_size.bytes + lookahead_bytes; + if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; + + // If the edit is entirely within the space before this subtree, then shift this + // subtree over according to the edit without changing its size. + if (edit.old_end.bytes <= padding.bytes) { + padding = length_add(edit.new_end, length_sub(padding, edit.old_end)); + } + + // If the edit starts in the space before this subtree and extends into this subtree, + // shrink the subtree's content to compensate for the change in the space before it. + else if (edit.start.bytes < padding.bytes) { + size = length_saturating_sub(size, length_sub(edit.old_end, padding)); + padding = edit.new_end; + } + + // If the edit is a pure insertion right at the start of the subtree, + // shift the subtree over according to the insertion. + else if (edit.start.bytes == padding.bytes && is_pure_insertion) { + padding = edit.new_end; + } + + // If the edit is within this subtree, resize the subtree to reflect the edit. + else if ( + edit.start.bytes < total_size.bytes || + (edit.start.bytes == total_size.bytes && is_pure_insertion) + ) { + size = length_add( + length_sub(edit.new_end, padding), + length_saturating_sub(total_size, edit.old_end) + ); + } + + MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); + + if (result.data.is_inline) { + if (ts_subtree_can_inline(padding, size, lookahead_bytes)) { + result.data.padding_bytes = padding.bytes; + result.data.padding_rows = padding.extent.row; + result.data.padding_columns = padding.extent.column; + result.data.size_bytes = size.bytes; + } else { + SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + data->ref_count = 1; + data->padding = padding; + data->size = size; + data->lookahead_bytes = lookahead_bytes; + data->error_cost = 0; + data->child_count = 0; + data->symbol = result.data.symbol; + data->parse_state = result.data.parse_state; + data->visible = result.data.visible; + data->named = result.data.named; + data->extra = result.data.extra; + data->fragile_left = false; + data->fragile_right = false; + data->has_changes = false; + data->has_external_tokens = false; + data->depends_on_column = false; + data->is_missing = result.data.is_missing; + data->is_keyword = result.data.is_keyword; + result.ptr = data; + } + } else { + result.ptr->padding = padding; + result.ptr->size = size; + } + + ts_subtree_set_has_changes(&result); + *entry.tree = ts_subtree_from_mut(result); + + Length child_left, child_right = length_zero(); + for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { + Subtree *child = &ts_subtree_children(*entry.tree)[i]; + Length child_size = ts_subtree_total_size(*child); + child_left = child_right; + child_right = length_add(child_left, child_size); + + // If this child ends before the edit, it is not affected. + if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; + + // Keep editing child nodes until a node is reached that starts after the edit. + // Also, if this node's validity depends on its column position, then continue + // invaliditing child nodes until reaching a line break. + if (( + (child_left.bytes > edit.old_end.bytes) || + (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) + ) && ( + !invalidate_first_row || + child_left.extent.row > entry.tree->ptr->padding.extent.row + )) { + break; + } + + // Transform edit into the child's coordinate space. + Edit child_edit = { + .start = length_saturating_sub(edit.start, child_left), + .old_end = length_saturating_sub(edit.old_end, child_left), + .new_end = length_saturating_sub(edit.new_end, child_left), + }; + + // Interpret all inserted text as applying to the *first* child that touches the edit. + // Subsequent children are only never have any text inserted into them; they are only + // shrunk to compensate for the edit. + if ( + child_right.bytes > edit.start.bytes || + (child_right.bytes == edit.start.bytes && is_pure_insertion) + ) { + edit.new_end = edit.start; + } + + // Children that occur before the edit are not reshaped by the edit. + else { + child_edit.old_end = child_edit.start; + child_edit.new_end = child_edit.start; + } + + // Queue processing of this child's subtree. + array_push(&stack, ((EditEntry) { + .tree = child, + .edit = child_edit, + })); + } + } + + array_delete(&stack); + return self; +} + +Subtree ts_subtree_last_external_token(Subtree tree) { + if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE; + while (tree.ptr->child_count > 0) { + for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) { + Subtree child = ts_subtree_children(tree)[i]; + if (ts_subtree_has_external_tokens(child)) { + tree = child; + break; + } + } + } + return tree; +} + +static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) { + if (chr == -1) + return snprintf(str, n, "INVALID"); + else if (chr == '\0') + return snprintf(str, n, "'\\0'"); + else if (chr == '\n') + return snprintf(str, n, "'\\n'"); + else if (chr == '\t') + return snprintf(str, n, "'\\t'"); + else if (chr == '\r') + return snprintf(str, n, "'\\r'"); + else if (0 < chr && chr < 128 && isprint(chr)) + return snprintf(str, n, "'%c'", chr); + else + return snprintf(str, n, "%d", chr); +} + +static const char *const ROOT_FIELD = "__ROOT__"; + +static size_t ts_subtree__write_to_string( + Subtree self, char *string, size_t limit, + const t_language *language, bool include_all, + t_symbol alias_symbol, bool alias_is_named, const char *field_name +) { + if (!self.ptr) return snprintf(string, limit, "(NULL)"); + + char *cursor = string; + char **writer = (limit > 1) ? &cursor : &string; + bool is_root = field_name == ROOT_FIELD; + bool is_visible = + include_all || + ts_subtree_missing(self) || + ( + alias_symbol + ? alias_is_named + : ts_subtree_visible(self) && ts_subtree_named(self) + ); + + if (is_visible) { + if (!is_root) { + cursor += snprintf(*writer, limit, " "); + if (field_name) { + cursor += snprintf(*writer, limit, "%s: ", field_name); + } + } + + if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) { + cursor += snprintf(*writer, limit, "(UNEXPECTED "); + cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); + } else { + t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); + const char *symbol_name = ts_language_symbol_name(language, symbol); + if (ts_subtree_missing(self)) { + cursor += snprintf(*writer, limit, "(MISSING "); + if (alias_is_named || ts_subtree_named(self)) { + cursor += snprintf(*writer, limit, "%s", symbol_name); + } else { + cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); + } + } else { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } + } + } else if (is_root) { + t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); + const char *symbol_name = ts_language_symbol_name(language, symbol); + if (ts_subtree_child_count(self) > 0) { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } else if (ts_subtree_named(self)) { + cursor += snprintf(*writer, limit, "(%s)", symbol_name); + } else { + cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); + } + } + + if (ts_subtree_child_count(self)) { + const t_symbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + language, + self.ptr->production_id, + &field_map, + &field_map_end + ); + + uint32_t structural_child_index = 0; + for (uint32_t i = 0; i < self.ptr->child_count; i++) { + Subtree child = ts_subtree_children(self)[i]; + if (ts_subtree_extra(child)) { + cursor += ts_subtree__write_to_string( + child, *writer, limit, + language, include_all, + 0, false, NULL + ); + } else { + t_symbol subtree_alias_symbol = alias_sequence + ? alias_sequence[structural_child_index] + : 0; + bool subtree_alias_is_named = subtree_alias_symbol + ? ts_language_symbol_metadata(language, subtree_alias_symbol).named + : false; + + const char *child_field_name = is_visible ? NULL : field_name; + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { + if (!map->inherited && map->child_index == structural_child_index) { + child_field_name = language->field_names[map->field_id]; + break; + } + } + + cursor += ts_subtree__write_to_string( + child, *writer, limit, + language, include_all, + subtree_alias_symbol, subtree_alias_is_named, child_field_name + ); + structural_child_index++; + } + } + } + + if (is_visible) cursor += snprintf(*writer, limit, ")"); + + return cursor - string; +} + +char *ts_subtree_string( + Subtree self, + t_symbol alias_symbol, + bool alias_is_named, + const t_language *language, + bool include_all +) { + char scratch_string[1]; + size_t size = ts_subtree__write_to_string( + self, scratch_string, 1, + language, include_all, + alias_symbol, alias_is_named, ROOT_FIELD + ) + 1; + char *result = ts_malloc(size * sizeof(char)); + ts_subtree__write_to_string( + self, result, size, + language, include_all, + alias_symbol, alias_is_named, ROOT_FIELD + ); + return result; +} + +void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, + const t_language *language, t_symbol alias_symbol, + FILE *f) { + t_symbol subtree_symbol = ts_subtree_symbol(*self); + t_symbol symbol = alias_symbol ? alias_symbol : subtree_symbol; + uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); + fprintf(f, "tree_%p [label=\"", (void *)self); + ts_language_write_symbol_as_dot_string(language, f, symbol); + fprintf(f, "\""); + + if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext"); + if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray"); + + fprintf(f, ", tooltip=\"" + "range: %u - %u\n" + "state: %d\n" + "error-cost: %u\n" + "has-changes: %u\n" + "depends-on-column: %u\n" + "descendant-count: %u\n" + "repeat-depth: %u\n" + "lookahead-bytes: %u", + start_offset, end_offset, + ts_subtree_parse_state(*self), + ts_subtree_error_cost(*self), + ts_subtree_has_changes(*self), + ts_subtree_depends_on_column(*self), + ts_subtree_visible_descendant_count(*self), + ts_subtree_repeat_depth(*self), + ts_subtree_lookahead_bytes(*self) + ); + + if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) { + fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); + } + + fprintf(f, "\"]\n"); + + uint32_t child_start_offset = start_offset; + uint32_t child_info_offset = + language->max_alias_sequence_length * + ts_subtree_production_id(*self); + for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { + const Subtree *child = &ts_subtree_children(*self)[i]; + t_symbol subtree_alias_symbol = 0; + if (!ts_subtree_extra(*child) && child_info_offset) { + subtree_alias_symbol = language->alias_sequences[child_info_offset]; + child_info_offset++; + } + ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f); + fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); + child_start_offset += ts_subtree_total_bytes(*child); + } +} + +void ts_subtree_print_dot_graph(Subtree self, const t_language *language, FILE *f) { + fprintf(f, "digraph tree {\n"); + fprintf(f, "edge [arrowhead=none]\n"); + ts_subtree__print_dot_graph(&self, 0, language, 0, f); + fprintf(f, "}\n"); +} + +const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { + static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; + if ( + self.ptr && + !self.data.is_inline && + self.ptr->has_external_tokens && + self.ptr->child_count == 0 + ) { + return &self.ptr->external_scanner_state; + } else { + return &empty_state; + } +} + +bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) { + const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self); + const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other); + return ts_external_scanner_state_eq( + state_self, + ts_external_scanner_state_data(state_other), + state_other->length + ); +} + + +#include "src/api.h" +#include "src/array.h" +#include "src/get_changed_ranges.h" +#include "src/length.h" +#include "src/subtree.h" +#include "src/tree_cursor.h" +#include "src/tree.h" + +t_tree *ts_tree_new( + Subtree root, const t_language *language, + const t_range *included_ranges, unsigned included_range_count +) { + t_tree *result = ts_malloc(sizeof(t_tree)); + result->root = root; + result->language = ts_language_copy(language); + result->included_ranges = ts_calloc(included_range_count, sizeof(t_range)); + memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(t_range)); + result->included_range_count = included_range_count; + return result; +} + +t_tree *ts_tree_copy(const t_tree *self) { + ts_subtree_retain(self->root); + return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); +} + +void ts_tree_delete(t_tree *self) { + if (!self) return; + + SubtreePool pool = ts_subtree_pool_new(0); + ts_subtree_release(&pool, self->root); + ts_subtree_pool_delete(&pool); + ts_language_delete(self->language); + ts_free(self->included_ranges); + ts_free(self); +} + +t_parse_node ts_tree_root_node(const t_tree *self) { + return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); +} + +t_parse_node ts_tree_root_node_with_offset( + const t_tree *self, + uint32_t offset_bytes, + t_point offset_extent +) { + Length offset = {offset_bytes, offset_extent}; + return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); +} + +const t_language *ts_tree_language(const t_tree *self) { + return self->language; +} + +void ts_tree_edit(t_tree *self, const t_input_edit *edit) { + for (unsigned i = 0; i < self->included_range_count; i++) { + t_range *range = &self->included_ranges[i]; + if (range->end_byte >= edit->old_end_byte) { + if (range->end_byte != UINT32_MAX) { + range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); + range->end_point = point_add( + edit->new_end_point, + point_sub(range->end_point, edit->old_end_point) + ); + if (range->end_byte < edit->new_end_byte) { + range->end_byte = UINT32_MAX; + range->end_point = POINT_MAX; + } + } + } else if (range->end_byte > edit->start_byte) { + range->end_byte = edit->start_byte; + range->end_point = edit->start_point; + } + if (range->start_byte >= edit->old_end_byte) { + range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); + range->start_point = point_add( + edit->new_end_point, + point_sub(range->start_point, edit->old_end_point) + ); + if (range->start_byte < edit->new_end_byte) { + range->start_byte = UINT32_MAX; + range->start_point = POINT_MAX; + } + } else if (range->start_byte > edit->start_byte) { + range->start_byte = edit->start_byte; + range->start_point = edit->start_point; + } + } + + SubtreePool pool = ts_subtree_pool_new(0); + self->root = ts_subtree_edit(self->root, edit, &pool); + ts_subtree_pool_delete(&pool); +} + +t_range *ts_tree_included_ranges(const t_tree *self, uint32_t *length) { + *length = self->included_range_count; + t_range *ranges = ts_calloc(self->included_range_count, sizeof(t_range)); + memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(t_range)); + return ranges; +} + +t_range *ts_tree_get_changed_ranges(const t_tree *old_tree, const t_tree *new_tree, uint32_t *length) { + TreeCursor cursor1 = {NULL, array_new(), 0}; + TreeCursor cursor2 = {NULL, array_new(), 0}; + ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); + ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); + + TSRangeArray included_range_differences = array_new(); + ts_range_array_get_changed_ranges( + old_tree->included_ranges, old_tree->included_range_count, + new_tree->included_ranges, new_tree->included_range_count, + &included_range_differences + ); + + t_range *result; + *length = ts_subtree_get_changed_ranges( + &old_tree->root, &new_tree->root, &cursor1, &cursor2, + old_tree->language, &included_range_differences, &result + ); + + array_delete(&included_range_differences); + array_delete(&cursor1.stack); + array_delete(&cursor2.stack); + return result; +} + +#ifdef _WIN32 + +#include +#include + +int _ts_dup(HANDLE handle) { + HANDLE dup_handle; + if (!DuplicateHandle( + GetCurrentProcess(), handle, + GetCurrentProcess(), &dup_handle, + 0, FALSE, DUPLICATE_SAME_ACCESS + )) return -1; + + return _open_osfhandle((intptr_t)dup_handle, 0); +} + +void ts_tree_print_dot_graph(const TSTree *self, int fd) { + FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); + ts_subtree_print_dot_graph(self->root, self->language, file); + fclose(file); +} + +#else + +#include + +int _ts_dup(int file_descriptor) { + return dup(file_descriptor); +} + +void ts_tree_print_dot_graph(const t_tree *self, int file_descriptor) { + FILE *file = fdopen(_ts_dup(file_descriptor), "a"); + ts_subtree_print_dot_graph(self->root, self->language, file); + fclose(file); +} + +#endif +#include "src/api.h" +#include "src/alloc.h" +#include "src/tree_cursor.h" +#include "src/language.h" +#include "src/tree.h" + +typedef struct { + Subtree parent; + const t_tree *tree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + uint32_t descendant_index; + const t_symbol *alias_sequence; +} CursorChildIterator; + +// CursorChildIterator + +static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) { + TreeCursorEntry *entry = &self->stack.contents[index]; + if (index == 0 || ts_subtree_visible(*entry->subtree)) { + return true; + } else if (!ts_subtree_extra(*entry->subtree)) { + TreeCursorEntry *parent_entry = &self->stack.contents[index - 1]; + return ts_language_alias_at( + self->tree->language, + parent_entry->subtree->ptr->production_id, + entry->structural_child_index + ); + } else { + return false; + } +} + +static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { + TreeCursorEntry *last_entry = array_back(&self->stack); + if (ts_subtree_child_count(*last_entry->subtree) == 0) { + return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; + } + const t_symbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + last_entry->subtree->ptr->production_id + ); + + uint32_t descendant_index = last_entry->descendant_index; + if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { + descendant_index += 1; + } + + return (CursorChildIterator) { + .tree = self->tree, + .parent = *last_entry->subtree, + .position = last_entry->position, + .child_index = 0, + .structural_child_index = 0, + .descendant_index = descendant_index, + .alias_sequence = alias_sequence, + }; +} + +static inline bool ts_tree_cursor_child_iterator_next( + CursorChildIterator *self, + TreeCursorEntry *result, + bool *visible +) { + if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; + *result = (TreeCursorEntry) { + .subtree = child, + .position = self->position, + .child_index = self->child_index, + .structural_child_index = self->structural_child_index, + .descendant_index = self->descendant_index, + }; + *visible = ts_subtree_visible(*child); + bool extra = ts_subtree_extra(*child); + if (!extra) { + if (self->alias_sequence) { + *visible |= self->alias_sequence[self->structural_child_index]; + } + self->structural_child_index++; + } + + self->descendant_index += ts_subtree_visible_descendant_count(*child); + if (*visible) { + self->descendant_index += 1; + } + + self->position = length_add(self->position, ts_subtree_size(*child)); + self->child_index++; + + if (self->child_index < self->parent.ptr->child_count) { + Subtree next_child = ts_subtree_children(self->parent)[self->child_index]; + self->position = length_add(self->position, ts_subtree_padding(next_child)); + } + + return true; +} + +// Return a position that, when `b` is added to it, yields `a`. This +// can only be computed if `b` has zero rows. Otherwise, this function +// returns `LENGTH_UNDEFINED`, and the caller needs to recompute +// the position some other way. +static inline Length length_backtrack(Length a, Length b) { + if (length_is_undefined(a) || b.extent.row != 0) { + return LENGTH_UNDEFINED; + } + + Length result; + result.bytes = a.bytes - b.bytes; + result.extent.row = a.extent.row; + result.extent.column = a.extent.column - b.extent.column; + return result; +} + +static inline bool ts_tree_cursor_child_iterator_previous( + CursorChildIterator *self, + TreeCursorEntry *result, + bool *visible +) { + // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into + // account unsigned underflow + if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; + *result = (TreeCursorEntry) { + .subtree = child, + .position = self->position, + .child_index = self->child_index, + .structural_child_index = self->structural_child_index, + }; + *visible = ts_subtree_visible(*child); + bool extra = ts_subtree_extra(*child); + if (!extra && self->alias_sequence) { + *visible |= self->alias_sequence[self->structural_child_index]; + self->structural_child_index--; + } + + self->position = length_backtrack(self->position, ts_subtree_padding(*child)); + self->child_index--; + + // unsigned can underflow so compare it to child_count + if (self->child_index < self->parent.ptr->child_count) { + Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; + Length size = ts_subtree_size(previous_child); + self->position = length_backtrack(self->position, size); + } + + return true; +} + +// TSTreeCursor - lifecycle + +t_tree_cursor ts_tree_cursor_new(t_parse_node node) { + t_tree_cursor self = {NULL, NULL, {0, 0, 0}}; + ts_tree_cursor_init((TreeCursor *)&self, node); + return self; +} + +void ts_tree_cursor_reset(t_tree_cursor *_self, t_parse_node node) { + ts_tree_cursor_init((TreeCursor *)_self, node); +} + +void ts_tree_cursor_init(TreeCursor *self, t_parse_node node) { + self->tree = node.tree; + self->root_alias_symbol = node.context[3]; + array_clear(&self->stack); + array_push(&self->stack, ((TreeCursorEntry) { + .subtree = (const Subtree *)node.id, + .position = { + ts_node_start_byte(node), + ts_node_start_point(node) + }, + .child_index = 0, + .structural_child_index = 0, + .descendant_index = 0, + })); +} + +void ts_tree_cursor_delete(t_tree_cursor *_self) { + TreeCursor *self = (TreeCursor *)_self; + array_delete(&self->stack); +} + +// TSTreeCursor - walking the tree + +TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_tree_cursor *_self) { + TreeCursor *self = (TreeCursor *)_self; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + if (visible) { + array_push(&self->stack, entry); + return TreeCursorStepVisible; + } + if (ts_subtree_visible_child_count(*entry.subtree) > 0) { + array_push(&self->stack, entry); + return TreeCursorStepHidden; + } + } + return TreeCursorStepNone; +} + +bool ts_tree_cursor_goto_first_child(t_tree_cursor *self) { + for (;;) { + switch (ts_tree_cursor_goto_first_child_internal(self)) { + case TreeCursorStepHidden: + continue; + case TreeCursorStepVisible: + return true; + default: + return false; + } + } + return false; +} + +TreeCursorStep ts_tree_cursor_goto_last_child_internal(t_tree_cursor *_self) { + TreeCursor *self = (TreeCursor *)_self; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone; + + TreeCursorEntry last_entry = {0}; + TreeCursorStep last_step = TreeCursorStepNone; + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + if (visible) { + last_entry = entry; + last_step = TreeCursorStepVisible; + } + else if (ts_subtree_visible_child_count(*entry.subtree) > 0) { + last_entry = entry; + last_step = TreeCursorStepHidden; + } + } + if (last_entry.subtree) { + array_push(&self->stack, last_entry); + return last_step; + } + + return TreeCursorStepNone; +} + +bool ts_tree_cursor_goto_last_child(t_tree_cursor *self) { + for (;;) { + switch (ts_tree_cursor_goto_last_child_internal(self)) { + case TreeCursorStepHidden: + continue; + case TreeCursorStepVisible: + return true; + default: + return false; + } + } + return false; +} + +static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( + t_tree_cursor *_self, + uint32_t goal_byte, + t_point goal_point +) { + TreeCursor *self = (TreeCursor *)_self; + uint32_t initial_size = self->stack.size; + uint32_t visible_child_index = 0; + + bool did_descend; + do { + did_descend = false; + + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); + bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); + uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); + if (at_goal) { + if (visible) { + array_push(&self->stack, entry); + return visible_child_index; + } + if (visible_child_count > 0) { + array_push(&self->stack, entry); + did_descend = true; + break; + } + } else if (visible) { + visible_child_index++; + } else { + visible_child_index += visible_child_count; + } + } + } while (did_descend); + + self->stack.size = initial_size; + return -1; +} + +int64_t ts_tree_cursor_goto_first_child_for_byte(t_tree_cursor *self, uint32_t goal_byte) { + return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); +} + +int64_t ts_tree_cursor_goto_first_child_for_point(t_tree_cursor *self, t_point goal_point) { + return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); +} + +TreeCursorStep ts_tree_cursor_goto_sibling_internal( + t_tree_cursor *_self, + bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { + TreeCursor *self = (TreeCursor *)_self; + uint32_t initial_size = self->stack.size; + + while (self->stack.size > 1) { + TreeCursorEntry entry = array_pop(&self->stack); + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + iterator.child_index = entry.child_index; + iterator.structural_child_index = entry.structural_child_index; + iterator.position = entry.position; + iterator.descendant_index = entry.descendant_index; + + bool visible = false; + advance(&iterator, &entry, &visible); + if (visible && self->stack.size + 1 < initial_size) break; + + while (advance(&iterator, &entry, &visible)) { + if (visible) { + array_push(&self->stack, entry); + return TreeCursorStepVisible; + } + + if (ts_subtree_visible_child_count(*entry.subtree)) { + array_push(&self->stack, entry); + return TreeCursorStepHidden; + } + } + } + + self->stack.size = initial_size; + return TreeCursorStepNone; +} + +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_tree_cursor *_self) { + return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); +} + +bool ts_tree_cursor_goto_next_sibling(t_tree_cursor *self) { + switch (ts_tree_cursor_goto_next_sibling_internal(self)) { + case TreeCursorStepHidden: + ts_tree_cursor_goto_first_child(self); + return true; + case TreeCursorStepVisible: + return true; + default: + return false; + } +} + +TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_tree_cursor *_self) { + // since subtracting across row loses column information, we may have to + // restore it + TreeCursor *self = (TreeCursor *)_self; + + // for that, save current position before traversing + TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( + _self, ts_tree_cursor_child_iterator_previous); + if (step == TreeCursorStepNone) + return step; + + // if length is already valid, there's no need to recompute it + if (!length_is_undefined(array_back(&self->stack)->position)) + return step; + + // restore position from the parent node + const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; + Length position = parent->position; + uint32_t child_index = array_back(&self->stack)->child_index; + const Subtree *children = ts_subtree_children((*(parent->subtree))); + + if (child_index > 0) { + // skip first child padding since its position should match the position of the parent + position = length_add(position, ts_subtree_size(children[0])); + for (uint32_t i = 1; i < child_index; ++i) { + position = length_add(position, ts_subtree_total_size(children[i])); + } + position = length_add(position, ts_subtree_padding(children[child_index])); + } + + array_back(&self->stack)->position = position; + + return step; +} + +bool ts_tree_cursor_goto_previous_sibling(t_tree_cursor *self) { + switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { + case TreeCursorStepHidden: + ts_tree_cursor_goto_last_child(self); + return true; + case TreeCursorStepVisible: + return true; + default: + return false; + } +} + +bool ts_tree_cursor_goto_parent(t_tree_cursor *_self) { + TreeCursor *self = (TreeCursor *)_self; + for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { + if (ts_tree_cursor_is_entry_visible(self, i)) { + self->stack.size = i + 1; + return true; + } + } + return false; +} + +void ts_tree_cursor_goto_descendant( + t_tree_cursor *_self, + uint32_t goal_descendant_index +) { + TreeCursor *self = (TreeCursor *)_self; + + // Ascend to the lowest ancestor that contains the goal node. + for (;;) { + uint32_t i = self->stack.size - 1; + TreeCursorEntry *entry = &self->stack.contents[i]; + uint32_t next_descendant_index = + entry->descendant_index + + (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + + ts_subtree_visible_descendant_count(*entry->subtree); + if ( + (entry->descendant_index <= goal_descendant_index) && + (next_descendant_index > goal_descendant_index) + ) { + break; + } else if (self->stack.size <= 1) { + return; + } else { + self->stack.size--; + } + } + + // Descend to the goal node. + bool did_descend = true; + do { + did_descend = false; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + if (iterator.descendant_index > goal_descendant_index) { + return; + } + + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + if (iterator.descendant_index > goal_descendant_index) { + array_push(&self->stack, entry); + if (visible && entry.descendant_index == goal_descendant_index) { + return; + } else { + did_descend = true; + break; + } + } + } + } while (did_descend); +} + +uint32_t ts_tree_cursor_current_descendant_index(const t_tree_cursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *last_entry = array_back(&self->stack); + return last_entry->descendant_index; +} + +t_parse_node ts_tree_cursor_current_node(const t_tree_cursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *last_entry = array_back(&self->stack); + t_symbol alias_symbol = self->root_alias_symbol; + if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { + TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; + alias_symbol = ts_language_alias_at( + self->tree->language, + parent_entry->subtree->ptr->production_id, + last_entry->structural_child_index + ); + } + return ts_node_new( + self->tree, + last_entry->subtree, + last_entry->position, + alias_symbol + ); +} + +// Private - Get various facts about the current node that are needed +// when executing tree queries. +void ts_tree_cursor_current_status( + const t_tree_cursor *_self, + t_field_id *field_id, + bool *has_later_siblings, + bool *has_later_named_siblings, + bool *can_have_later_siblings_with_this_field, + t_symbol *supertypes, + unsigned *supertype_count +) { + const TreeCursor *self = (const TreeCursor *)_self; + unsigned max_supertypes = *supertype_count; + *field_id = 0; + *supertype_count = 0; + *has_later_siblings = false; + *has_later_named_siblings = false; + *can_have_later_siblings_with_this_field = false; + + // Walk up the tree, visiting the current node and its invisible ancestors, + // because fields can refer to nodes through invisible *wrapper* nodes, + for (unsigned i = self->stack.size - 1; i > 0; i--) { + TreeCursorEntry *entry = &self->stack.contents[i]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + + const t_symbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->ptr->production_id + ); + + #define subtree_symbol(subtree, structural_child_index) \ + (( \ + !ts_subtree_extra(subtree) && \ + alias_sequence && \ + alias_sequence[structural_child_index] \ + ) ? \ + alias_sequence[structural_child_index] : \ + ts_subtree_symbol(subtree)) + + // Stop walking up when a visible ancestor is found. + t_symbol entry_symbol = subtree_symbol( + *entry->subtree, + entry->structural_child_index + ); + TSSymbolMetadata entry_metadata = ts_language_symbol_metadata( + self->tree->language, + entry_symbol + ); + if (i != self->stack.size - 1 && entry_metadata.visible) break; + + // Record any supertypes + if (entry_metadata.supertype && *supertype_count < max_supertypes) { + supertypes[*supertype_count] = entry_symbol; + (*supertype_count)++; + } + + // Determine if the current node has later siblings. + if (!*has_later_siblings) { + unsigned sibling_count = parent_entry->subtree->ptr->child_count; + unsigned structural_child_index = entry->structural_child_index; + if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; + for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { + Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; + TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( + self->tree->language, + subtree_symbol(sibling, structural_child_index) + ); + if (sibling_metadata.visible) { + *has_later_siblings = true; + if (*has_later_named_siblings) break; + if (sibling_metadata.named) { + *has_later_named_siblings = true; + break; + } + } else if (ts_subtree_visible_child_count(sibling) > 0) { + *has_later_siblings = true; + if (*has_later_named_siblings) break; + if (sibling.ptr->named_child_count > 0) { + *has_later_named_siblings = true; + break; + } + } + if (!ts_subtree_extra(sibling)) structural_child_index++; + } + } + + #undef subtree_symbol + + if (!ts_subtree_extra(*entry->subtree)) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self->tree->language, + parent_entry->subtree->ptr->production_id, + &field_map, &field_map_end + ); + + // Look for a field name associated with the current node. + if (!*field_id) { + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { + if (!map->inherited && map->child_index == entry->structural_child_index) { + *field_id = map->field_id; + break; + } + } + } + + // Determine if the current node can have later siblings with the same field name. + if (*field_id) { + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { + if ( + map->field_id == *field_id && + map->child_index > entry->structural_child_index + ) { + *can_have_later_siblings_with_this_field = true; + break; + } + } + } + } + } +} + +uint32_t ts_tree_cursor_current_depth(const t_tree_cursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + uint32_t depth = 0; + for (unsigned i = 1; i < self->stack.size; i++) { + if (ts_tree_cursor_is_entry_visible(self, i)) { + depth++; + } + } + return depth; +} + +t_parse_node ts_tree_cursor_parent_node(const t_tree_cursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + for (int i = (int)self->stack.size - 2; i >= 0; i--) { + TreeCursorEntry *entry = &self->stack.contents[i]; + bool is_visible = true; + t_symbol alias_symbol = 0; + if (i > 0) { + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + alias_symbol = ts_language_alias_at( + self->tree->language, + parent_entry->subtree->ptr->production_id, + entry->structural_child_index + ); + is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree); + } + if (is_visible) { + return ts_node_new( + self->tree, + entry->subtree, + entry->position, + alias_symbol + ); + } + } + return ts_node_new(NULL, NULL, length_zero(), 0); +} + +t_field_id ts_tree_cursor_current_field_id(const t_tree_cursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + + // Walk up the tree, visiting the current node and its invisible ancestors. + for (unsigned i = self->stack.size - 1; i > 0; i--) { + TreeCursorEntry *entry = &self->stack.contents[i]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + + // Stop walking up when another visible node is found. + if ( + i != self->stack.size - 1 && + ts_tree_cursor_is_entry_visible(self, i) + ) break; + + if (ts_subtree_extra(*entry->subtree)) break; + + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self->tree->language, + parent_entry->subtree->ptr->production_id, + &field_map, &field_map_end + ); + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { + if (!map->inherited && map->child_index == entry->structural_child_index) { + return map->field_id; + } + } + } + return 0; +} + +const char *ts_tree_cursor_current_field_name(const t_tree_cursor *_self) { + t_field_id id = ts_tree_cursor_current_field_id(_self); + if (id) { + const TreeCursor *self = (const TreeCursor *)_self; + return self->tree->language->field_names[id]; + } else { + return NULL; + } +} + +t_tree_cursor ts_tree_cursor_copy(const t_tree_cursor *_cursor) { + const TreeCursor *cursor = (const TreeCursor *)_cursor; + t_tree_cursor res = {NULL, NULL, {0, 0}}; + TreeCursor *copy = (TreeCursor *)&res; + copy->tree = cursor->tree; + copy->root_alias_symbol = cursor->root_alias_symbol; + array_init(©->stack); + array_push_all(©->stack, &cursor->stack); + return res; +} + +void ts_tree_cursor_reset_to(t_tree_cursor *_dst, const t_tree_cursor *_src) { + const TreeCursor *cursor = (const TreeCursor *)_src; + TreeCursor *copy = (TreeCursor *)_dst; + copy->tree = cursor->tree; + copy->root_alias_symbol = cursor->root_alias_symbol; + array_clear(©->stack); + array_push_all(©->stack, &cursor->stack); +} diff --git a/parser/src/api.h b/parser/src/api.h index deb2364e..d423b229 100644 --- a/parser/src/api.h +++ b/parser/src/api.h @@ -7,10 +7,6 @@ #endif #endif -#ifdef __cplusplus -extern "C" { -#endif - #include #include #include @@ -38,108 +34,108 @@ extern "C" { /* Section - Types */ /*******************/ -typedef uint16_t TSStateId; -typedef uint16_t TSSymbol; -typedef uint16_t TSFieldId; -typedef struct TSLanguage TSLanguage; -typedef struct TSParser TSParser; -typedef struct TSTree TSTree; -typedef struct TSQuery TSQuery; -typedef struct TSQueryCursor TSQueryCursor; -typedef struct TSLookaheadIterator TSLookaheadIterator; +typedef uint16_t t_state_id; +typedef uint16_t t_symbol; +typedef uint16_t t_field_id; +typedef struct t_language t_language; +typedef struct t_parser t_parser; +typedef struct t_tree t_tree; +typedef struct t_query t_query; +typedef struct t_query_cursor t_query_cursor; +typedef struct t_lookahead_iterator t_lookahead_iterator; -typedef enum TSInputEncoding { +typedef enum t_input_encoding { TSInputEncodingUTF8, TSInputEncodingUTF16, -} TSInputEncoding; +} t_input_encoding; -typedef enum TSSymbolType { +typedef enum t_symbol_type { TSSymbolTypeRegular, TSSymbolTypeAnonymous, TSSymbolTypeAuxiliary, -} TSSymbolType; +} t_symbol_type; -typedef struct TSPoint { +typedef struct t_point { uint32_t row; uint32_t column; -} TSPoint; +} t_point; -typedef struct TSRange { - TSPoint start_point; - TSPoint end_point; +typedef struct t_range { + t_point start_point; + t_point end_point; uint32_t start_byte; uint32_t end_byte; -} TSRange; +} t_range; -typedef struct TSInput { +typedef struct t_input { void *payload; - const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read); - TSInputEncoding encoding; -} TSInput; + const char *(*read)(void *payload, uint32_t byte_index, t_point position, uint32_t *bytes_read); + t_input_encoding encoding; +} t_input; -typedef enum TSLogType { +typedef enum t_log_type { TSLogTypeParse, TSLogTypeLex, -} TSLogType; +} t_log_type; -typedef struct TSLogger { +typedef struct t_logger { void *payload; - void (*log)(void *payload, TSLogType log_type, const char *buffer); -} TSLogger; + void (*log)(void *payload, t_log_type log_type, const char *buffer); +} t_logger; -typedef struct TSInputEdit { +typedef struct t_input_edit { uint32_t start_byte; uint32_t old_end_byte; uint32_t new_end_byte; - TSPoint start_point; - TSPoint old_end_point; - TSPoint new_end_point; -} TSInputEdit; + t_point start_point; + t_point old_end_point; + t_point new_end_point; +} t_input_edit; -typedef struct TSNode { +typedef struct t_parse_node { uint32_t context[4]; const void *id; - const TSTree *tree; -} TSNode; + const t_tree *tree; +} t_parse_node; -typedef struct TSTreeCursor { +typedef struct t_tree_cursor { const void *tree; const void *id; uint32_t context[3]; -} TSTreeCursor; +} t_tree_cursor; -typedef struct TSQueryCapture { - TSNode node; +typedef struct t_query_capture { + t_parse_node node; uint32_t index; -} TSQueryCapture; +} t_query_capture; -typedef enum TSQuantifier { +typedef enum t_quantifier { TSQuantifierZero = 0, // must match the array initialization value TSQuantifierZeroOrOne, TSQuantifierZeroOrMore, TSQuantifierOne, TSQuantifierOneOrMore, -} TSQuantifier; +} t_quantifier; -typedef struct TSQueryMatch { +typedef struct t_query_match { uint32_t id; uint16_t pattern_index; uint16_t capture_count; - const TSQueryCapture *captures; -} TSQueryMatch; + const t_query_capture *captures; +} t_query_match; -typedef enum TSQueryPredicateStepType { +typedef enum t_query_predicate_step_type { TSQueryPredicateStepTypeDone, TSQueryPredicateStepTypeCapture, TSQueryPredicateStepTypeString, -} TSQueryPredicateStepType; +} t_query_predicate_step_type; -typedef struct TSQueryPredicateStep { - TSQueryPredicateStepType type; +typedef struct t_query_predicate_step { + t_query_predicate_step_type type; uint32_t value_id; -} TSQueryPredicateStep; +} t_query_predicate_step; -typedef enum TSQueryError { +typedef enum t_query_error { TSQueryErrorNone = 0, TSQueryErrorSyntax, TSQueryErrorNodeType, @@ -147,7 +143,7 @@ typedef enum TSQueryError { TSQueryErrorCapture, TSQueryErrorStructure, TSQueryErrorLanguage, -} TSQueryError; +} t_query_error; /********************/ /* Section - Parser */ @@ -156,17 +152,17 @@ typedef enum TSQueryError { /** * Create a new parser. */ -TSParser *ts_parser_new(void); +t_parser *ts_parser_new(void); /** * Delete the parser, freeing all of the memory that it used. */ -void ts_parser_delete(TSParser *self); +void ts_parser_delete(t_parser *self); /** * Get the parser's current language. */ -const TSLanguage *ts_parser_language(const TSParser *self); +const t_language *ts_parser_language(const t_parser *self); /** * Set the language that the parser should use for parsing. @@ -178,7 +174,7 @@ const TSLanguage *ts_parser_language(const TSParser *self); * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. */ -bool ts_parser_set_language(TSParser *self, const TSLanguage *language); +bool ts_parser_set_language(t_parser *self, const t_language *language); /** * Set the ranges of text that the parser should include when parsing. @@ -203,8 +199,8 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language); * this function returns `true` */ bool ts_parser_set_included_ranges( - TSParser *self, - const TSRange *ranges, + t_parser *self, + const t_range *ranges, uint32_t count ); @@ -215,8 +211,8 @@ bool ts_parser_set_included_ranges( * or write to it. The length of the array will be written to the given * `count` pointer. */ -const TSRange *ts_parser_included_ranges( - const TSParser *self, +const t_range *ts_parser_included_ranges( + const t_parser *self, uint32_t *count ); @@ -263,10 +259,10 @@ const TSRange *ts_parser_included_ranges( * [`encoding`]: TSInput::encoding * [`bytes_read`]: TSInput::read */ -TSTree *ts_parser_parse( - TSParser *self, - const TSTree *old_tree, - TSInput input +t_tree *ts_parser_parse( + t_parser *self, + const t_tree *old_tree, + t_input input ); /** @@ -275,9 +271,9 @@ TSTree *ts_parser_parse( * above. The second two parameters indicate the location of the buffer and its * length in bytes. */ -TSTree *ts_parser_parse_string( - TSParser *self, - const TSTree *old_tree, +t_tree *ts_parser_parse_string( + t_parser *self, + const t_tree *old_tree, const char *string, uint32_t length ); @@ -288,12 +284,12 @@ TSTree *ts_parser_parse_string( * [`ts_parser_parse_string`] method above. The final parameter indicates whether * the text is encoded as UTF8 or UTF16. */ -TSTree *ts_parser_parse_string_encoding( - TSParser *self, - const TSTree *old_tree, +t_tree *ts_parser_parse_string_encoding( + t_parser *self, + const t_tree *old_tree, const char *string, uint32_t length, - TSInputEncoding encoding + t_input_encoding encoding ); /** @@ -305,7 +301,7 @@ TSTree *ts_parser_parse_string_encoding( * and instead intend to use this parser to parse some other document, you must * call [`ts_parser_reset`] first. */ -void ts_parser_reset(TSParser *self); +void ts_parser_reset(t_parser *self); /** * Set the maximum duration in microseconds that parsing should be allowed to @@ -314,12 +310,12 @@ void ts_parser_reset(TSParser *self); * If parsing takes longer than this, it will halt early, returning NULL. * See [`ts_parser_parse`] for more information. */ -void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros); +void ts_parser_set_timeout_micros(t_parser *self, uint64_t timeout_micros); /** * Get the duration in microseconds that parsing is allowed to take. */ -uint64_t ts_parser_timeout_micros(const TSParser *self); +uint64_t ts_parser_timeout_micros(const t_parser *self); /** * Set the parser's current cancellation flag pointer. @@ -328,12 +324,12 @@ uint64_t ts_parser_timeout_micros(const TSParser *self); * from this pointer during parsing. If it reads a non-zero value, it will * halt early, returning NULL. See [`ts_parser_parse`] for more information. */ -void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag); +void ts_parser_set_cancellation_flag(t_parser *self, const size_t *flag); /** * Get the parser's current cancellation flag pointer. */ -const size_t *ts_parser_cancellation_flag(const TSParser *self); +const size_t *ts_parser_cancellation_flag(const t_parser *self); /** * Set the logger that a parser should use during parsing. @@ -342,12 +338,12 @@ const size_t *ts_parser_cancellation_flag(const TSParser *self); * previously assigned, the caller is responsible for releasing any memory * owned by the previous logger. */ -void ts_parser_set_logger(TSParser *self, TSLogger logger); +void ts_parser_set_logger(t_parser *self, t_logger logger); /** * Get the parser's current logger. */ -TSLogger ts_parser_logger(const TSParser *self); +t_logger ts_parser_logger(const t_parser *self); /** * Set the file descriptor to which the parser should write debugging graphs @@ -355,7 +351,7 @@ TSLogger ts_parser_logger(const TSParser *self); * to pipe these graphs directly to a `dot(1)` process in order to generate * SVG output. You can turn off this logging by passing a negative number. */ -void ts_parser_print_dot_graphs(TSParser *self, int fd); +void ts_parser_print_dot_graphs(t_parser *self, int fd); /******************/ /* Section - Tree */ @@ -367,39 +363,39 @@ void ts_parser_print_dot_graphs(TSParser *self, int fd); * You need to copy a syntax tree in order to use it on more than one thread at * a time, as syntax trees are not thread safe. */ -TSTree *ts_tree_copy(const TSTree *self); +t_tree *ts_tree_copy(const t_tree *self); /** * Delete the syntax tree, freeing all of the memory that it used. */ -void ts_tree_delete(TSTree *self); +void ts_tree_delete(t_tree *self); /** * Get the root node of the syntax tree. */ -TSNode ts_tree_root_node(const TSTree *self); +t_parse_node ts_tree_root_node(const t_tree *self); /** * Get the root node of the syntax tree, but with its position * shifted forward by the given offset. */ -TSNode ts_tree_root_node_with_offset( - const TSTree *self, +t_parse_node ts_tree_root_node_with_offset( + const t_tree *self, uint32_t offset_bytes, - TSPoint offset_extent + t_point offset_extent ); /** * Get the language that was used to parse the syntax tree. */ -const TSLanguage *ts_tree_language(const TSTree *self); +const t_language *ts_tree_language(const t_tree *self); /** * Get the array of included ranges that was used to parse the syntax tree. * * The returned pointer must be freed by the caller. */ -TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length); +t_range *ts_tree_included_ranges(const t_tree *self, uint32_t *length); /** * Edit the syntax tree to keep it in sync with source code that has been @@ -408,7 +404,7 @@ TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length); * You must describe the edit both in terms of byte offsets and in terms of * (row, column) coordinates. */ -void ts_tree_edit(TSTree *self, const TSInputEdit *edit); +void ts_tree_edit(t_tree *self, const t_input_edit *edit); /** * Compare an old edited syntax tree to a new syntax tree representing the same @@ -424,16 +420,16 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit); * for freeing it using `free`. The length of the array will be written to the * given `length` pointer. */ -TSRange *ts_tree_get_changed_ranges( - const TSTree *old_tree, - const TSTree *new_tree, +t_range *ts_tree_get_changed_ranges( + const t_tree *old_tree, + const t_tree *new_tree, uint32_t *length ); /** * Write a DOT graph describing the syntax tree to the given file. */ -void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor); +void ts_tree_print_dot_graph(const t_tree *self, int file_descriptor); /******************/ /* Section - Node */ @@ -442,50 +438,50 @@ void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor); /** * Get the node's type as a null-terminated string. */ -const char *ts_node_type(TSNode self); +const char *ts_node_type(t_parse_node self); /** * Get the node's type as a numerical id. */ -TSSymbol ts_node_symbol(TSNode self); +t_symbol ts_node_symbol(t_parse_node self); /** * Get the node's language. */ -const TSLanguage *ts_node_language(TSNode self); +const t_language *ts_node_language(t_parse_node self); /** * Get the node's type as it appears in the grammar ignoring aliases as a * null-terminated string. */ -const char *ts_node_grammar_type(TSNode self); +const char *ts_node_grammar_type(t_parse_node self); /** * Get the node's type as a numerical id as it appears in the grammar ignoring * aliases. This should be used in [`ts_language_next_state`] instead of * [`ts_node_symbol`]. */ -TSSymbol ts_node_grammar_symbol(TSNode self); +t_symbol ts_node_grammar_symbol(t_parse_node self); /** * Get the node's start byte. */ -uint32_t ts_node_start_byte(TSNode self); +uint32_t ts_node_start_byte(t_parse_node self); /** * Get the node's start position in terms of rows and columns. */ -TSPoint ts_node_start_point(TSNode self); +t_point ts_node_start_point(t_parse_node self); /** * Get the node's end byte. */ -uint32_t ts_node_end_byte(TSNode self); +uint32_t ts_node_end_byte(t_parse_node self); /** * Get the node's end position in terms of rows and columns. */ -TSPoint ts_node_end_point(TSNode self); +t_point ts_node_end_point(t_parse_node self); /** * Get an S-expression representing the node as a string. @@ -493,107 +489,107 @@ TSPoint ts_node_end_point(TSNode self); * This string is allocated with `malloc` and the caller is responsible for * freeing it using `free`. */ -char *ts_node_string(TSNode self); +char *ts_node_string(t_parse_node self); /** * Check if the node is null. Functions like [`ts_node_child`] and * [`ts_node_next_sibling`] will return a null node to indicate that no such node * was found. */ -bool ts_node_is_null(TSNode self); +bool ts_node_is_null(t_parse_node self); /** * Check if the node is *named*. Named nodes correspond to named rules in the * grammar, whereas *anonymous* nodes correspond to string literals in the * grammar. */ -bool ts_node_is_named(TSNode self); +bool ts_node_is_named(t_parse_node self); /** * Check if the node is *missing*. Missing nodes are inserted by the parser in * order to recover from certain kinds of syntax errors. */ -bool ts_node_is_missing(TSNode self); +bool ts_node_is_missing(t_parse_node self); /** * Check if the node is *extra*. Extra nodes represent things like comments, * which are not required the grammar, but can appear anywhere. */ -bool ts_node_is_extra(TSNode self); +bool ts_node_is_extra(t_parse_node self); /** * Check if a syntax node has been edited. */ -bool ts_node_has_changes(TSNode self); +bool ts_node_has_changes(t_parse_node self); /** * Check if the node is a syntax error or contains any syntax errors. */ -bool ts_node_has_error(TSNode self); +bool ts_node_has_error(t_parse_node self); /** * Check if the node is a syntax error. */ -bool ts_node_is_error(TSNode self); +bool ts_node_is_error(t_parse_node self); /** * Get this node's parse state. */ -TSStateId ts_node_parse_state(TSNode self); +t_state_id ts_node_parse_state(t_parse_node self); /** * Get the parse state after this node. */ -TSStateId ts_node_next_parse_state(TSNode self); +t_state_id ts_node_next_parse_state(t_parse_node self); /** * Get the node's immediate parent. * Prefer [`ts_node_child_containing_descendant`] for * iterating over the node's ancestors. */ -TSNode ts_node_parent(TSNode self); +t_parse_node ts_node_parent(t_parse_node self); /** * Get the node's child that contains `descendant`. */ -TSNode ts_node_child_containing_descendant(TSNode self, TSNode descendant); +t_parse_node ts_node_child_containing_descendant(t_parse_node self, t_parse_node descendant); /** * Get the node's child at the given index, where zero represents the first * child. */ -TSNode ts_node_child(TSNode self, uint32_t child_index); +t_parse_node ts_node_child(t_parse_node self, uint32_t child_index); /** * Get the field name for node's child at the given index, where zero represents * the first child. Returns NULL, if no field is found. */ -const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index); +const char *ts_node_field_name_for_child(t_parse_node self, uint32_t child_index); /** * Get the node's number of children. */ -uint32_t ts_node_child_count(TSNode self); +uint32_t ts_node_child_count(t_parse_node self); /** * Get the node's *named* child at the given index. * * See also [`ts_node_is_named`]. */ -TSNode ts_node_named_child(TSNode self, uint32_t child_index); +t_parse_node ts_node_named_child(t_parse_node self, uint32_t child_index); /** * Get the node's number of *named* children. * * See also [`ts_node_is_named`]. */ -uint32_t ts_node_named_child_count(TSNode self); +uint32_t ts_node_named_child_count(t_parse_node self); /** * Get the node's child with the given field name. */ -TSNode ts_node_child_by_field_name( - TSNode self, +t_parse_node ts_node_child_by_field_name( + t_parse_node self, const char *name, uint32_t name_length ); @@ -604,48 +600,48 @@ TSNode ts_node_child_by_field_name( * You can convert a field name to an id using the * [`ts_language_field_id_for_name`] function. */ -TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id); +t_parse_node ts_node_child_by_field_id(t_parse_node self, t_field_id field_id); /** * Get the node's next / previous sibling. */ -TSNode ts_node_next_sibling(TSNode self); -TSNode ts_node_prev_sibling(TSNode self); +t_parse_node ts_node_next_sibling(t_parse_node self); +t_parse_node ts_node_prev_sibling(t_parse_node self); /** * Get the node's next / previous *named* sibling. */ -TSNode ts_node_next_named_sibling(TSNode self); -TSNode ts_node_prev_named_sibling(TSNode self); +t_parse_node ts_node_next_named_sibling(t_parse_node self); +t_parse_node ts_node_prev_named_sibling(t_parse_node self); /** * Get the node's first child that extends beyond the given byte offset. */ -TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte); +t_parse_node ts_node_first_child_for_byte(t_parse_node self, uint32_t byte); /** * Get the node's first named child that extends beyond the given byte offset. */ -TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte); +t_parse_node ts_node_first_named_child_for_byte(t_parse_node self, uint32_t byte); /** * Get the node's number of descendants, including one for the node itself. */ -uint32_t ts_node_descendant_count(TSNode self); +uint32_t ts_node_descendant_count(t_parse_node self); /** * Get the smallest node within this node that spans the given range of bytes * or (row, column) positions. */ -TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); -TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); +t_parse_node ts_node_descendant_for_byte_range(t_parse_node self, uint32_t start, uint32_t end); +t_parse_node ts_node_descendant_for_point_range(t_parse_node self, t_point start, t_point end); /** * Get the smallest named node within this node that spans the given range of * bytes or (row, column) positions. */ -TSNode ts_node_named_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); -TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); +t_parse_node ts_node_named_descendant_for_byte_range(t_parse_node self, uint32_t start, uint32_t end); +t_parse_node ts_node_named_descendant_for_point_range(t_parse_node self, t_point start, t_point end); /** * Edit the node to keep it in-sync with source code that has been edited. @@ -656,12 +652,12 @@ TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPo * when you have a [`TSNode`] instance that you want to keep and continue to use * after an edit. */ -void ts_node_edit(TSNode *self, const TSInputEdit *edit); +void ts_node_edit(t_parse_node *self, const t_input_edit *edit); /** * Check if two nodes are identical. */ -bool ts_node_eq(TSNode self, TSNode other); +bool ts_node_eq(t_parse_node self, t_parse_node other); /************************/ /* Section - TreeCursor */ @@ -674,17 +670,17 @@ bool ts_node_eq(TSNode self, TSNode other); * possible using the [`TSNode`] functions. It is a mutable object that is always * on a certain syntax node, and can be moved imperatively to different nodes. */ -TSTreeCursor ts_tree_cursor_new(TSNode node); +t_tree_cursor ts_tree_cursor_new(t_parse_node node); /** * Delete a tree cursor, freeing all of the memory that it used. */ -void ts_tree_cursor_delete(TSTreeCursor *self); +void ts_tree_cursor_delete(t_tree_cursor *self); /** * Re-initialize a tree cursor to start at a different node. */ -void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node); +void ts_tree_cursor_reset(t_tree_cursor *self, t_parse_node node); /** * Re-initialize a tree cursor to the same position as another cursor. @@ -692,12 +688,12 @@ void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node); * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and * allows reusing already created cursors. */ -void ts_tree_cursor_reset_to(TSTreeCursor *dst, const TSTreeCursor *src); +void ts_tree_cursor_reset_to(t_tree_cursor *dst, const t_tree_cursor *src); /** * Get the tree cursor's current node. */ -TSNode ts_tree_cursor_current_node(const TSTreeCursor *self); +t_parse_node ts_tree_cursor_current_node(const t_tree_cursor *self); /** * Get the field name of the tree cursor's current node. @@ -705,7 +701,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *self); * This returns `NULL` if the current node doesn't have a field. * See also [`ts_node_child_by_field_name`]. */ -const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self); +const char *ts_tree_cursor_current_field_name(const t_tree_cursor *self); /** * Get the field id of the tree cursor's current node. @@ -713,7 +709,7 @@ const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self); * This returns zero if the current node doesn't have a field. * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]. */ -TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self); +t_field_id ts_tree_cursor_current_field_id(const t_tree_cursor *self); /** * Move the cursor to the parent of its current node. @@ -721,7 +717,7 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self); * This returns `true` if the cursor successfully moved, and returns `false` * if there was no parent node (the cursor was already on the root node). */ -bool ts_tree_cursor_goto_parent(TSTreeCursor *self); +bool ts_tree_cursor_goto_parent(t_tree_cursor *self); /** * Move the cursor to the next sibling of its current node. @@ -729,7 +725,7 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *self); * This returns `true` if the cursor successfully moved, and returns `false` * if there was no next sibling node. */ -bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self); +bool ts_tree_cursor_goto_next_sibling(t_tree_cursor *self); /** * Move the cursor to the previous sibling of its current node. @@ -742,7 +738,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self); * the worst case, this will need to iterate through all the children upto the * previous sibling node to recalculate its position. */ -bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self); +bool ts_tree_cursor_goto_previous_sibling(t_tree_cursor *self); /** * Move the cursor to the first child of its current node. @@ -750,7 +746,7 @@ bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self); * This returns `true` if the cursor successfully moved, and returns `false` * if there were no children. */ -bool ts_tree_cursor_goto_first_child(TSTreeCursor *self); +bool ts_tree_cursor_goto_first_child(t_tree_cursor *self); /** * Move the cursor to the last child of its current node. @@ -762,26 +758,26 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *self); * because it needs to iterate through all the children to compute the child's * position. */ -bool ts_tree_cursor_goto_last_child(TSTreeCursor *self); +bool ts_tree_cursor_goto_last_child(t_tree_cursor *self); /** * Move the cursor to the node that is the nth descendant of * the original node that the cursor was constructed with, where * zero represents the original node itself. */ -void ts_tree_cursor_goto_descendant(TSTreeCursor *self, uint32_t goal_descendant_index); +void ts_tree_cursor_goto_descendant(t_tree_cursor *self, uint32_t goal_descendant_index); /** * Get the index of the cursor's current node out of all of the * descendants of the original node that the cursor was constructed with. */ -uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *self); +uint32_t ts_tree_cursor_current_descendant_index(const t_tree_cursor *self); /** * Get the depth of the cursor's current node relative to the original * node that the cursor was constructed with. */ -uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *self); +uint32_t ts_tree_cursor_current_depth(const t_tree_cursor *self); /** * Move the cursor to the first child of its current node that extends beyond @@ -790,10 +786,10 @@ uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *self); * This returns the index of the child node if one was found, and returns -1 * if no such child was found. */ -int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte); -int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point); +int64_t ts_tree_cursor_goto_first_child_for_byte(t_tree_cursor *self, uint32_t goal_byte); +int64_t ts_tree_cursor_goto_first_child_for_point(t_tree_cursor *self, t_point goal_point); -TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor); +t_tree_cursor ts_tree_cursor_copy(const t_tree_cursor *cursor); /*******************/ /* Section - Query */ @@ -810,25 +806,25 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor); * 1. The byte offset of the error is written to the `error_offset` parameter. * 2. The type of error is written to the `error_type` parameter. */ -TSQuery *ts_query_new( - const TSLanguage *language, +t_query *ts_query_new( + const t_language *language, const char *source, uint32_t source_len, uint32_t *error_offset, - TSQueryError *error_type + t_query_error *error_type ); /** * Delete a query, freeing all of the memory that it used. */ -void ts_query_delete(TSQuery *self); +void ts_query_delete(t_query *self); /** * Get the number of patterns, captures, or string literals in the query. */ -uint32_t ts_query_pattern_count(const TSQuery *self); -uint32_t ts_query_capture_count(const TSQuery *self); -uint32_t ts_query_string_count(const TSQuery *self); +uint32_t ts_query_pattern_count(const t_query *self); +uint32_t ts_query_capture_count(const t_query *self); +uint32_t ts_query_string_count(const t_query *self); /** * Get the byte offset where the given pattern starts in the query's source. @@ -836,7 +832,7 @@ uint32_t ts_query_string_count(const TSQuery *self); * This can be useful when combining queries by concatenating their source * code strings. */ -uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index); +uint32_t ts_query_start_byte_for_pattern(const t_query *self, uint32_t pattern_index); /** * Get all of the predicates for the given pattern in the query. @@ -854,8 +850,8 @@ uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_i * that represent the end of an individual predicate. If a pattern has two * predicates, then there will be two steps with this `type` in the array. */ -const TSQueryPredicateStep *ts_query_predicates_for_pattern( - const TSQuery *self, +const t_query_predicate_step *ts_query_predicates_for_pattern( + const t_query *self, uint32_t pattern_index, uint32_t *step_count ); @@ -863,7 +859,7 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( /* * Check if the given pattern in the query has a single root node. */ -bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index); +bool ts_query_is_pattern_rooted(const t_query *self, uint32_t pattern_index); /* * Check if the given pattern in the query is 'non local'. @@ -873,13 +869,13 @@ bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index); * patterns disable certain optimizations that would otherwise be possible * when executing a query on a specific range of a syntax tree. */ -bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index); +bool ts_query_is_pattern_non_local(const t_query *self, uint32_t pattern_index); /* * Check if a given pattern is guaranteed to match once a given step is reached. * The step is specified by its byte offset in the query's source code. */ -bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset); +bool ts_query_is_pattern_guaranteed_at_step(const t_query *self, uint32_t byte_offset); /** * Get the name and length of one of the query's captures, or one of the @@ -887,7 +883,7 @@ bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_o * numeric id based on the order that it appeared in the query's source. */ const char *ts_query_capture_name_for_id( - const TSQuery *self, + const t_query *self, uint32_t index, uint32_t *length ); @@ -896,14 +892,14 @@ const char *ts_query_capture_name_for_id( * Get the quantifier of the query's captures. Each capture is * associated * with a numeric id based on the order that it appeared in the query's source. */ -TSQuantifier ts_query_capture_quantifier_for_id( - const TSQuery *self, +t_quantifier ts_query_capture_quantifier_for_id( + const t_query *self, uint32_t pattern_index, uint32_t capture_index ); const char *ts_query_string_value_for_id( - const TSQuery *self, + const t_query *self, uint32_t index, uint32_t *length ); @@ -915,7 +911,7 @@ const char *ts_query_string_value_for_id( * any resource usage associated with recording the capture. Currently, there * is no way to undo this. */ -void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length); +void ts_query_disable_capture(t_query *self, const char *name, uint32_t length); /** * Disable a certain pattern within a query. @@ -923,7 +919,7 @@ void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length); * This prevents the pattern from matching and removes most of the overhead * associated with the pattern. Currently, there is no way to undo this. */ -void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index); +void ts_query_disable_pattern(t_query *self, uint32_t pattern_index); /** * Create a new cursor for executing a given query. @@ -947,17 +943,17 @@ void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index); * You can then start executing another query on another node by calling * [`ts_query_cursor_exec`] again. */ -TSQueryCursor *ts_query_cursor_new(void); +t_query_cursor *ts_query_cursor_new(void); /** * Delete a query cursor, freeing all of the memory that it used. */ -void ts_query_cursor_delete(TSQueryCursor *self); +void ts_query_cursor_delete(t_query_cursor *self); /** * Start running a given query on a given node. */ -void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node); +void ts_query_cursor_exec(t_query_cursor *self, const t_query *query, t_parse_node node); /** * Manage the maximum number of in-progress matches allowed by this query @@ -970,16 +966,16 @@ void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node * any number of pending matches, dynamically allocating new space for them as * needed as the query is executed. */ -bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self); -uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self); -void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit); +bool ts_query_cursor_did_exceed_match_limit(const t_query_cursor *self); +uint32_t ts_query_cursor_match_limit(const t_query_cursor *self); +void ts_query_cursor_set_match_limit(t_query_cursor *self, uint32_t limit); /** * Set the range of bytes or (row, column) positions in which the query * will be executed. */ -void ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte); -void ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, TSPoint end_point); +void ts_query_cursor_set_byte_range(t_query_cursor *self, uint32_t start_byte, uint32_t end_byte); +void ts_query_cursor_set_point_range(t_query_cursor *self, t_point start_point, t_point end_point); /** * Advance to the next match of the currently running query. @@ -987,8 +983,8 @@ void ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, T * If there is a match, write it to `*match` and return `true`. * Otherwise, return `false`. */ -bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match); -void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id); +bool ts_query_cursor_next_match(t_query_cursor *self, t_query_match *match); +void ts_query_cursor_remove_match(t_query_cursor *self, uint32_t match_id); /** * Advance to the next capture of the currently running query. @@ -997,8 +993,8 @@ void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id); * the matche's capture list to `*capture_index`. Otherwise, return `false`. */ bool ts_query_cursor_next_capture( - TSQueryCursor *self, - TSQueryMatch *match, + t_query_cursor *self, + t_query_match *match, uint32_t *capture_index ); @@ -1016,7 +1012,7 @@ bool ts_query_cursor_next_capture( * * Set to `UINT32_MAX` to remove the maximum start depth. */ -void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, uint32_t max_start_depth); +void ts_query_cursor_set_max_start_depth(t_query_cursor *self, uint32_t max_start_depth); /**********************/ /* Section - Language */ @@ -1025,34 +1021,34 @@ void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, uint32_t max_start /** * Get another reference to the given language. */ -const TSLanguage *ts_language_copy(const TSLanguage *self); +const t_language *ts_language_copy(const t_language *self); /** * Free any dynamically-allocated resources for this language, if * this is the last reference. */ -void ts_language_delete(const TSLanguage *self); +void ts_language_delete(const t_language *self); /** * Get the number of distinct node types in the language. */ -uint32_t ts_language_symbol_count(const TSLanguage *self); +uint32_t ts_language_symbol_count(const t_language *self); /** * Get the number of valid states in this language. */ -uint32_t ts_language_state_count(const TSLanguage *self); +uint32_t ts_language_state_count(const t_language *self); /** * Get a node type string for the given numerical id. */ -const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol); +const char *ts_language_symbol_name(const t_language *self, t_symbol symbol); /** * Get the numerical id for the given node type string. */ -TSSymbol ts_language_symbol_for_name( - const TSLanguage *self, +t_symbol ts_language_symbol_for_name( + const t_language *self, const char *string, uint32_t length, bool is_named @@ -1061,17 +1057,17 @@ TSSymbol ts_language_symbol_for_name( /** * Get the number of distinct field names in the language. */ -uint32_t ts_language_field_count(const TSLanguage *self); +uint32_t ts_language_field_count(const t_language *self); /** * Get the field name string for the given numerical id. */ -const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id); +const char *ts_language_field_name_for_id(const t_language *self, t_field_id id); /** * Get the numerical id for the given field name string. */ -TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name, uint32_t name_length); +t_field_id ts_language_field_id_for_name(const t_language *self, const char *name, uint32_t name_length); /** * Check whether the given node type id belongs to named nodes, anonymous nodes, @@ -1079,7 +1075,7 @@ TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name * * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API. */ -TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); +t_symbol_type ts_language_symbol_type(const t_language *self, t_symbol symbol); /** * Get the ABI version number for this language. This version number is used @@ -1088,14 +1084,14 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); * * See also [`ts_parser_set_language`]. */ -uint32_t ts_language_version(const TSLanguage *self); +uint32_t ts_language_version(const t_language *self); /** * Get the next parse state. Combine this with lookahead iterators to generate * completion suggestions or valid symbols in error nodes. Use * [`ts_node_grammar_symbol`] for valid symbols. */ -TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); +t_state_id ts_language_next_state(const t_language *self, t_state_id state, t_symbol symbol); /********************************/ /* Section - Lookahead Iterator */ @@ -1116,12 +1112,12 @@ TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymb * iterator on its first leaf node state. For `MISSING` nodes, a lookahead * iterator created on the previous non-extra leaf node may be appropriate. */ -TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state); +t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state); /** * Delete a lookahead iterator freeing all the memory used. */ -void ts_lookahead_iterator_delete(TSLookaheadIterator *self); +void ts_lookahead_iterator_delete(t_lookahead_iterator *self); /** * Reset the lookahead iterator to another state. @@ -1129,7 +1125,7 @@ void ts_lookahead_iterator_delete(TSLookaheadIterator *self); * This returns `true` if the iterator was reset to the given state and `false` * otherwise. */ -bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, TSStateId state); +bool ts_lookahead_iterator_reset_state(t_lookahead_iterator *self, t_state_id state); /** * Reset the lookahead iterator. @@ -1137,37 +1133,37 @@ bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, TSStateId stat * This returns `true` if the language was set successfully and `false` * otherwise. */ -bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state); +bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state); /** * Get the current language of the lookahead iterator. */ -const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self); +const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self); /** * Advance the lookahead iterator to the next symbol. * * This returns `true` if there is a new symbol and `false` otherwise. */ -bool ts_lookahead_iterator_next(TSLookaheadIterator *self); +bool ts_lookahead_iterator_next(t_lookahead_iterator *self); /** * Get the current symbol of the lookahead iterator; */ -TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self); +t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self); /** * Get the current symbol type of the lookahead iterator as a null terminated * string. */ -const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self); +const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self); /*************************************/ /* Section - WebAssembly Integration */ /************************************/ -typedef struct wasm_engine_t TSWasmEngine; -typedef struct TSWasmStore TSWasmStore; +typedef struct wasm_engine_t t_wasm_engine; +typedef struct t_wasm_store t_wasm_store; typedef enum { TSWasmErrorKindNone = 0, @@ -1175,25 +1171,25 @@ typedef enum { TSWasmErrorKindCompile, TSWasmErrorKindInstantiate, TSWasmErrorKindAllocate, -} TSWasmErrorKind; +} t_wasm_error_kind; typedef struct { - TSWasmErrorKind kind; + t_wasm_error_kind kind; char *message; -} TSWasmError; +} t_wasm_error; /** * Create a Wasm store. */ -TSWasmStore *ts_wasm_store_new( - TSWasmEngine *engine, - TSWasmError *error +t_wasm_store *ts_wasm_store_new( + t_wasm_engine *engine, + t_wasm_error *error ); /** * Free the memory associated with the given Wasm store. */ -void ts_wasm_store_delete(TSWasmStore *); +void ts_wasm_store_delete(t_wasm_store *); /** * Create a language from a buffer of Wasm. The resulting language behaves @@ -1202,36 +1198,36 @@ void ts_wasm_store_delete(TSWasmStore *); * can be used with any Wasm store, it doesn't need to be the same store that * was used to originally load it. */ -const TSLanguage *ts_wasm_store_load_language( - TSWasmStore *, +const t_language *ts_wasm_store_load_language( + t_wasm_store *, const char *name, const char *wasm, uint32_t wasm_len, - TSWasmError *error + t_wasm_error *error ); /** * Get the number of languages instantiated in the given wasm store. */ -size_t ts_wasm_store_language_count(const TSWasmStore *); +size_t ts_wasm_store_language_count(const t_wasm_store *); /** * Check if the language came from a Wasm module. If so, then in order to use * this language with a Parser, that parser must have a Wasm store assigned. */ -bool ts_language_is_wasm(const TSLanguage *); +bool ts_language_is_wasm(const t_language *); /** * Assign the given Wasm store to the parser. A parser must have a Wasm store * in order to use Wasm languages. */ -void ts_parser_set_wasm_store(TSParser *, TSWasmStore *); +void ts_parser_set_wasm_store(t_parser *, t_wasm_store *); /** * Remove the parser's current Wasm store and return it. This returns NULL if * the parser doesn't have a Wasm store. */ -TSWasmStore *ts_parser_take_wasm_store(TSParser *); +t_wasm_store *ts_parser_take_wasm_store(t_parser *); /**********************************/ /* Section - Global Configuration */ @@ -1260,9 +1256,6 @@ void ts_set_allocator( void (*new_free)(void *) ); -#ifdef __cplusplus -} -#endif #ifndef TREE_SITTER_HIDE_SYMBOLS #if defined(__GNUC__) || defined(__clang__) diff --git a/parser/src/get_changed_ranges.c b/parser/src/get_changed_ranges.c index bcf8da94..902e1a4f 100644 --- a/parser/src/get_changed_ranges.c +++ b/parser/src/get_changed_ranges.c @@ -13,7 +13,7 @@ static void ts_range_array_add( Length end ) { if (self->size > 0) { - TSRange *last_range = array_back(self); + t_range *last_range = array_back(self); if (start.bytes <= last_range->end_byte) { last_range->end_byte = end.bytes; last_range->end_point = end.extent; @@ -22,7 +22,7 @@ static void ts_range_array_add( } if (start.bytes < end.bytes) { - TSRange range = { start.extent, end.extent, start.bytes, end.bytes }; + t_range range = { start.extent, end.extent, start.bytes, end.bytes }; array_push(self, range); } } @@ -34,7 +34,7 @@ bool ts_range_array_intersects( uint32_t end_byte ) { for (unsigned i = start_index; i < self->size; i++) { - TSRange *range = &self->contents[i]; + t_range *range = &self->contents[i]; if (range->end_byte > start_byte) { if (range->start_byte >= end_byte) break; return true; @@ -44,8 +44,8 @@ bool ts_range_array_intersects( } void ts_range_array_get_changed_ranges( - const TSRange *old_ranges, unsigned old_range_count, - const TSRange *new_ranges, unsigned new_range_count, + const t_range *old_ranges, unsigned old_range_count, + const t_range *new_ranges, unsigned new_range_count, TSRangeArray *differences ) { unsigned new_index = 0; @@ -55,8 +55,8 @@ void ts_range_array_get_changed_ranges( bool in_new_range = false; while (old_index < old_range_count || new_index < new_range_count) { - const TSRange *old_range = &old_ranges[old_index]; - const TSRange *new_range = &new_ranges[new_index]; + const t_range *old_range = &old_ranges[old_index]; + const t_range *new_range = &new_ranges[new_index]; Length next_old_position; if (in_old_range) { @@ -105,7 +105,7 @@ void ts_range_array_get_changed_ranges( typedef struct { TreeCursor cursor; - const TSLanguage *language; + const t_language *language; unsigned visible_depth; bool in_padding; } Iterator; @@ -113,7 +113,7 @@ typedef struct { static Iterator iterator_new( TreeCursor *cursor, const Subtree *tree, - const TSLanguage *language + const t_language *language ) { array_clear(&cursor->stack); array_push(&cursor->stack, ((TreeCursorEntry) { @@ -170,7 +170,7 @@ static bool iterator_tree_is_visible(const Iterator *self) { static void iterator_get_visible_state( const Iterator *self, Subtree *tree, - TSSymbol *alias_symbol, + t_symbol *alias_symbol, uint32_t *start_byte ) { uint32_t i = self->cursor.stack.size - 1; @@ -309,8 +309,8 @@ static IteratorComparison iterator_compare( Subtree new_tree = NULL_SUBTREE; uint32_t old_start = 0; uint32_t new_start = 0; - TSSymbol old_alias_symbol = 0; - TSSymbol new_alias_symbol = 0; + t_symbol old_alias_symbol = 0; + t_symbol new_alias_symbol = 0; iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); @@ -357,9 +357,9 @@ static inline void iterator_print_state(Iterator *self) { unsigned ts_subtree_get_changed_ranges( const Subtree *old_tree, const Subtree *new_tree, TreeCursor *cursor1, TreeCursor *cursor2, - const TSLanguage *language, + const t_language *language, const TSRangeArray *included_range_differences, - TSRange **ranges + t_range **ranges ) { TSRangeArray results = array_new(); @@ -475,7 +475,7 @@ unsigned ts_subtree_get_changed_ranges( // Keep track of the current position in the included range differences // array in order to avoid scanning the entire array on each iteration. while (included_range_difference_index < included_range_differences->size) { - const TSRange *range = &included_range_differences->contents[ + const t_range *range = &included_range_differences->contents[ included_range_difference_index ]; if (range->end_byte <= position.bytes) { diff --git a/parser/src/get_changed_ranges.h b/parser/src/get_changed_ranges.h index a1f1dbb4..982a7047 100644 --- a/parser/src/get_changed_ranges.h +++ b/parser/src/get_changed_ranges.h @@ -8,11 +8,11 @@ extern "C" { #include "./tree_cursor.h" #include "./subtree.h" -typedef Array(TSRange) TSRangeArray; +typedef Array(t_range) TSRangeArray; void ts_range_array_get_changed_ranges( - const TSRange *old_ranges, unsigned old_range_count, - const TSRange *new_ranges, unsigned new_range_count, + const t_range *old_ranges, unsigned old_range_count, + const t_range *new_ranges, unsigned new_range_count, TSRangeArray *differences ); @@ -24,9 +24,9 @@ bool ts_range_array_intersects( unsigned ts_subtree_get_changed_ranges( const Subtree *old_tree, const Subtree *new_tree, TreeCursor *cursor1, TreeCursor *cursor2, - const TSLanguage *language, + const t_language *language, const TSRangeArray *included_range_differences, - TSRange **ranges + t_range **ranges ); #ifdef __cplusplus diff --git a/parser/src/language.c b/parser/src/language.c index d3d6ef5e..5cf86906 100644 --- a/parser/src/language.c +++ b/parser/src/language.c @@ -3,34 +3,34 @@ #include "./api.h" #include -const TSLanguage *ts_language_copy(const TSLanguage *self) { +const t_language *ts_language_copy(const t_language *self) { return self; } -void ts_language_delete(const TSLanguage *self) { +void ts_language_delete(const t_language *self) { (void)(self); } -uint32_t ts_language_symbol_count(const TSLanguage *self) { +uint32_t ts_language_symbol_count(const t_language *self) { return self->symbol_count + self->alias_count; } -uint32_t ts_language_state_count(const TSLanguage *self) { +uint32_t ts_language_state_count(const t_language *self) { return self->state_count; } -uint32_t ts_language_version(const TSLanguage *self) { +uint32_t ts_language_version(const t_language *self) { return self->version; } -uint32_t ts_language_field_count(const TSLanguage *self) { +uint32_t ts_language_field_count(const t_language *self) { return self->field_count; } void ts_language_table_entry( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol, + const t_language *self, + t_state_id state, + t_symbol symbol, TableEntry *result ) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { @@ -48,8 +48,8 @@ void ts_language_table_entry( } TSSymbolMetadata ts_language_symbol_metadata( - const TSLanguage *self, - TSSymbol symbol + const t_language *self, + t_symbol symbol ) { if (symbol == ts_builtin_sym_error) { return (TSSymbolMetadata) {.visible = true, .named = true}; @@ -60,18 +60,18 @@ TSSymbolMetadata ts_language_symbol_metadata( } } -TSSymbol ts_language_public_symbol( - const TSLanguage *self, - TSSymbol symbol +t_symbol ts_language_public_symbol( + const t_language *self, + t_symbol symbol ) { if (symbol == ts_builtin_sym_error) return symbol; return self->public_symbol_map[symbol]; } -TSStateId ts_language_next_state( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol +t_state_id ts_language_next_state( + const t_language *self, + t_state_id state, + t_symbol symbol ) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { return 0; @@ -91,8 +91,8 @@ TSStateId ts_language_next_state( } const char *ts_language_symbol_name( - const TSLanguage *self, - TSSymbol symbol + const t_language *self, + t_symbol symbol ) { if (symbol == ts_builtin_sym_error) { return "ERROR"; @@ -105,15 +105,15 @@ const char *ts_language_symbol_name( } } -TSSymbol ts_language_symbol_for_name( - const TSLanguage *self, +t_symbol ts_language_symbol_for_name( + const t_language *self, const char *string, uint32_t length, bool is_named ) { if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; uint16_t count = (uint16_t)ts_language_symbol_count(self); - for (TSSymbol i = 0; i < count; i++) { + for (t_symbol i = 0; i < count; i++) { TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; const char *symbol_name = self->symbol_names[i]; @@ -124,9 +124,9 @@ TSSymbol ts_language_symbol_for_name( return 0; } -TSSymbolType ts_language_symbol_type( - const TSLanguage *self, - TSSymbol symbol +t_symbol_type ts_language_symbol_type( + const t_language *self, + t_symbol symbol ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); if (metadata.named && metadata.visible) { @@ -139,8 +139,8 @@ TSSymbolType ts_language_symbol_type( } const char *ts_language_field_name_for_id( - const TSLanguage *self, - TSFieldId id + const t_language *self, + t_field_id id ) { uint32_t count = ts_language_field_count(self); if (count && id <= count) { @@ -150,13 +150,13 @@ const char *ts_language_field_name_for_id( } } -TSFieldId ts_language_field_id_for_name( - const TSLanguage *self, +t_field_id ts_language_field_id_for_name( + const t_language *self, const char *name, uint32_t name_length ) { uint16_t count = (uint16_t)ts_language_field_count(self); - for (TSSymbol i = 1; i < count + 1; i++) { + for (t_symbol i = 1; i < count + 1; i++) { switch (strncmp(name, self->field_names[i], name_length)) { case 0: if (self->field_names[i][name_length] == 0) return i; @@ -170,47 +170,47 @@ TSFieldId ts_language_field_id_for_name( return 0; } -TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) { +t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { if (state >= self->state_count) return NULL; LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); *iterator = ts_language_lookaheads(self, state); - return (TSLookaheadIterator *)iterator; + return (t_lookahead_iterator *)iterator; } -void ts_lookahead_iterator_delete(TSLookaheadIterator *self) { +void ts_lookahead_iterator_delete(t_lookahead_iterator *self) { ts_free(self); } -bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) { +bool ts_lookahead_iterator_reset_state(t_lookahead_iterator * self, t_state_id state) { LookaheadIterator *iterator = (LookaheadIterator *)self; if (state >= iterator->language->state_count) return false; *iterator = ts_language_lookaheads(iterator->language, state); return true; } -const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) { +const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self) { const LookaheadIterator *iterator = (const LookaheadIterator *)self; return iterator->language; } -bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) { +bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state) { if (state >= language->state_count) return false; LookaheadIterator *iterator = (LookaheadIterator *)self; *iterator = ts_language_lookaheads(language, state); return true; } -bool ts_lookahead_iterator_next(TSLookaheadIterator *self) { +bool ts_lookahead_iterator_next(t_lookahead_iterator *self) { LookaheadIterator *iterator = (LookaheadIterator *)self; return ts_lookahead_iterator__next(iterator); } -TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) { +t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self) { const LookaheadIterator *iterator = (const LookaheadIterator *)self; return iterator->symbol; } -const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) { +const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self) { const LookaheadIterator *iterator = (const LookaheadIterator *)self; return ts_language_symbol_name(iterator->language, iterator->symbol); } diff --git a/parser/src/language.h b/parser/src/language.h index 4e2769b4..94167b45 100644 --- a/parser/src/language.h +++ b/parser/src/language.h @@ -20,37 +20,37 @@ typedef struct { } TableEntry; typedef struct { - const TSLanguage *language; + const t_language *language; const uint16_t *data; const uint16_t *group_end; - TSStateId state; + t_state_id state; uint16_t table_value; uint16_t section_index; uint16_t group_count; bool is_small_state; const TSParseAction *actions; - TSSymbol symbol; - TSStateId next_state; + t_symbol symbol; + t_state_id next_state; uint16_t action_count; } LookaheadIterator; -void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *); +void ts_language_table_entry(const t_language *, t_state_id, t_symbol, TableEntry *); -TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); +TSSymbolMetadata ts_language_symbol_metadata(const t_language *, t_symbol); -TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol); +t_symbol ts_language_public_symbol(const t_language *, t_symbol); -TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); +t_state_id ts_language_next_state(const t_language *self, t_state_id state, t_symbol symbol); -static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) { +static inline bool ts_language_is_symbol_external(const t_language *self, t_symbol symbol) { return 0 < symbol && symbol < self->external_token_count + 1; } static inline const TSParseAction *ts_language_actions( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol, + const t_language *self, + t_state_id state, + t_symbol symbol, uint32_t *count ) { TableEntry entry; @@ -60,9 +60,9 @@ static inline const TSParseAction *ts_language_actions( } static inline bool ts_language_has_reduce_action( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol + const t_language *self, + t_state_id state, + t_symbol symbol ) { TableEntry entry; ts_language_table_entry(self, state, symbol, &entry); @@ -77,9 +77,9 @@ static inline bool ts_language_has_reduce_action( // states, this requires searching through the symbol groups to find // the given symbol. static inline uint16_t ts_language_lookup( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol + const t_language *self, + t_state_id state, + t_symbol symbol ) { if (state >= self->large_state_count) { uint32_t index = self->small_parse_table_map[state - self->large_state_count]; @@ -99,9 +99,9 @@ static inline uint16_t ts_language_lookup( } static inline bool ts_language_has_actions( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol + const t_language *self, + t_state_id state, + t_symbol symbol ) { return ts_language_lookup(self, state, symbol) != 0; } @@ -113,8 +113,8 @@ static inline bool ts_language_has_actions( // For 'small' parse states, this exploits the structure of the // table to only visit the valid symbols. static inline LookaheadIterator ts_language_lookaheads( - const TSLanguage *self, - TSStateId state + const t_language *self, + t_state_id state ) { bool is_small_state = state >= self->large_state_count; const uint16_t *data; @@ -186,8 +186,8 @@ static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) { // Whether the state is a "primary state". If this returns false, it indicates that there exists // another state that behaves identically to this one with respect to query analysis. static inline bool ts_language_state_is_primary( - const TSLanguage *self, - TSStateId state + const t_language *self, + t_state_id state ) { if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { return state == self->primary_state_ids[state]; @@ -197,7 +197,7 @@ static inline bool ts_language_state_is_primary( } static inline const bool *ts_language_enabled_external_tokens( - const TSLanguage *self, + const t_language *self, unsigned external_scanner_state ) { if (external_scanner_state == 0) { @@ -207,8 +207,8 @@ static inline const bool *ts_language_enabled_external_tokens( } } -static inline const TSSymbol *ts_language_alias_sequence( - const TSLanguage *self, +static inline const t_symbol *ts_language_alias_sequence( + const t_language *self, uint32_t production_id ) { return production_id ? @@ -216,8 +216,8 @@ static inline const TSSymbol *ts_language_alias_sequence( NULL; } -static inline TSSymbol ts_language_alias_at( - const TSLanguage *self, +static inline t_symbol ts_language_alias_at( + const t_language *self, uint32_t production_id, uint32_t child_index ) { @@ -227,7 +227,7 @@ static inline TSSymbol ts_language_alias_at( } static inline void ts_language_field_map( - const TSLanguage *self, + const t_language *self, uint32_t production_id, const TSFieldMapEntry **start, const TSFieldMapEntry **end @@ -244,17 +244,17 @@ static inline void ts_language_field_map( } static inline void ts_language_aliases_for_symbol( - const TSLanguage *self, - TSSymbol original_symbol, - const TSSymbol **start, - const TSSymbol **end + const t_language *self, + t_symbol original_symbol, + const t_symbol **start, + const t_symbol **end ) { *start = &self->public_symbol_map[original_symbol]; *end = *start + 1; unsigned idx = 0; for (;;) { - TSSymbol symbol = self->alias_map[idx++]; + t_symbol symbol = self->alias_map[idx++]; if (symbol == 0 || symbol > original_symbol) break; uint16_t count = self->alias_map[idx++]; if (symbol == original_symbol) { @@ -267,9 +267,9 @@ static inline void ts_language_aliases_for_symbol( } static inline void ts_language_write_symbol_as_dot_string( - const TSLanguage *self, + const t_language *self, FILE *f, - TSSymbol symbol + t_symbol symbol ) { const char *name = ts_language_symbol_name(self, symbol); for (const char *chr = name; *chr; chr++) { diff --git a/parser/src/length.h b/parser/src/length.h index 82003c02..83a947d1 100644 --- a/parser/src/length.h +++ b/parser/src/length.h @@ -8,7 +8,7 @@ typedef struct { uint32_t bytes; - TSPoint extent; + t_point extent; } Length; static const Length LENGTH_UNDEFINED = {0, {0, 1}}; diff --git a/parser/src/lexer.c b/parser/src/lexer.c index b6c19cf1..daf62f3d 100644 --- a/parser/src/lexer.c +++ b/parser/src/lexer.c @@ -23,7 +23,7 @@ static const int32_t BYTE_ORDER_MARK = 0xFEFF; -static const TSRange DEFAULT_RANGE = { +static const t_range DEFAULT_RANGE = { .start_point = { .row = 0, .column = 0, @@ -127,7 +127,7 @@ static void ts_lexer_goto(Lexer *self, Length position) { // Move to the first valid position at or after the given position. bool found_included_range = false; for (unsigned i = 0; i < self->included_range_count; i++) { - TSRange *included_range = &self->included_ranges[i]; + t_range *included_range = &self->included_ranges[i]; if ( included_range->end_byte > self->current_position.bytes && included_range->end_byte > included_range->start_byte @@ -163,7 +163,7 @@ static void ts_lexer_goto(Lexer *self, Length position) { // state - past the end of the included ranges. else { self->current_included_range_index = self->included_range_count; - TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; + t_range *last_included_range = &self->included_ranges[self->included_range_count - 1]; self->current_position = (Length) { .bytes = last_included_range->end_byte, .extent = last_included_range->end_point, @@ -186,7 +186,7 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) { } } - const TSRange *current_range = &self->included_ranges[self->current_included_range_index]; + const t_range *current_range = &self->included_ranges[self->current_included_range_index]; while ( self->current_position.bytes >= current_range->end_byte || current_range->end_byte == current_range->start_byte @@ -246,14 +246,14 @@ static void ts_lexer__mark_end(TSLexer *_self) { // If the lexer is right at the beginning of included range, // then the token should be considered to end at the *end* of the // previous included range, rather than here. - TSRange *current_included_range = &self->included_ranges[ + t_range *current_included_range = &self->included_ranges[ self->current_included_range_index ]; if ( self->current_included_range_index > 0 && self->current_position.bytes == current_included_range->start_byte ) { - TSRange *previous_included_range = current_included_range - 1; + t_range *previous_included_range = current_included_range - 1; self->token_end_position = (Length) { previous_included_range->end_byte, previous_included_range->end_point, @@ -296,7 +296,7 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) { static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { const Lexer *self = (const Lexer *)_self; if (self->current_included_range_index < self->included_range_count) { - TSRange *current_range = &self->included_ranges[self->current_included_range_index]; + t_range *current_range = &self->included_ranges[self->current_included_range_index]; return self->current_position.bytes == current_range->start_byte; } else { return false; @@ -336,7 +336,7 @@ void ts_lexer_delete(Lexer *self) { ts_free(self->included_ranges); } -void ts_lexer_set_input(Lexer *self, TSInput input) { +void ts_lexer_set_input(Lexer *self, t_input input) { self->input = input; ts_lexer__clear_chunk(self); ts_lexer_goto(self, self->current_position); @@ -404,7 +404,7 @@ void ts_lexer_mark_end(Lexer *self) { bool ts_lexer_set_included_ranges( Lexer *self, - const TSRange *ranges, + const t_range *ranges, uint32_t count ) { if (count == 0 || !ranges) { @@ -413,7 +413,7 @@ bool ts_lexer_set_included_ranges( } else { uint32_t previous_byte = 0; for (unsigned i = 0; i < count; i++) { - const TSRange *range = &ranges[i]; + const t_range *range = &ranges[i]; if ( range->start_byte < previous_byte || range->end_byte < range->start_byte @@ -422,7 +422,7 @@ bool ts_lexer_set_included_ranges( } } - size_t size = count * sizeof(TSRange); + size_t size = count * sizeof(t_range); self->included_ranges = ts_realloc(self->included_ranges, size); memcpy(self->included_ranges, ranges, size); self->included_range_count = count; @@ -430,7 +430,7 @@ bool ts_lexer_set_included_ranges( return true; } -TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { +t_range *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { *count = self->included_range_count; return self->included_ranges; } diff --git a/parser/src/lexer.h b/parser/src/lexer.h index 1d9482b5..4b63629b 100644 --- a/parser/src/lexer.h +++ b/parser/src/lexer.h @@ -16,10 +16,10 @@ typedef struct { Length token_start_position; Length token_end_position; - TSRange *included_ranges; + t_range *included_ranges; const char *chunk; - TSInput input; - TSLogger logger; + t_input input; + t_logger logger; uint32_t included_range_count; uint32_t current_included_range_index; @@ -33,14 +33,14 @@ typedef struct { void ts_lexer_init(Lexer *); void ts_lexer_delete(Lexer *); -void ts_lexer_set_input(Lexer *, TSInput); +void ts_lexer_set_input(Lexer *, t_input); void ts_lexer_reset(Lexer *, Length); void ts_lexer_start(Lexer *); void ts_lexer_finish(Lexer *, uint32_t *); void ts_lexer_advance_to_end(Lexer *); void ts_lexer_mark_end(Lexer *); -bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count); -TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); +bool ts_lexer_set_included_ranges(Lexer *self, const t_range *ranges, uint32_t count); +t_range *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); #ifdef __cplusplus } diff --git a/parser/src/node.c b/parser/src/node.c index 203d79b2..b1582632 100644 --- a/parser/src/node.c +++ b/parser/src/node.c @@ -5,58 +5,58 @@ typedef struct { Subtree parent; - const TSTree *tree; + const t_tree *tree; Length position; uint32_t child_index; uint32_t structural_child_index; - const TSSymbol *alias_sequence; + const t_symbol *alias_sequence; } NodeChildIterator; // TSNode - constructors -TSNode ts_node_new( - const TSTree *tree, +t_parse_node ts_node_new( + const t_tree *tree, const Subtree *subtree, Length position, - TSSymbol alias + t_symbol alias ) { - return (TSNode) { + return (t_parse_node) { {position.bytes, position.extent.row, position.extent.column, alias}, subtree, tree, }; } -static inline TSNode ts_node__null(void) { +static inline t_parse_node ts_node__null(void) { return ts_node_new(NULL, NULL, length_zero(), 0); } // TSNode - accessors -uint32_t ts_node_start_byte(TSNode self) { +uint32_t ts_node_start_byte(t_parse_node self) { return self.context[0]; } -TSPoint ts_node_start_point(TSNode self) { - return (TSPoint) {self.context[1], self.context[2]}; +t_point ts_node_start_point(t_parse_node self) { + return (t_point) {self.context[1], self.context[2]}; } -static inline uint32_t ts_node__alias(const TSNode *self) { +static inline uint32_t ts_node__alias(const t_parse_node *self) { return self->context[3]; } -static inline Subtree ts_node__subtree(TSNode self) { +static inline Subtree ts_node__subtree(t_parse_node self) { return *(const Subtree *)self.id; } // NodeChildIterator -static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { +static inline NodeChildIterator ts_node_iterate_children(const t_parse_node *node) { Subtree subtree = ts_node__subtree(*node); if (ts_subtree_child_count(subtree) == 0) { return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; } - const TSSymbol *alias_sequence = ts_language_alias_sequence( + const t_symbol *alias_sequence = ts_language_alias_sequence( node->tree->language, subtree.ptr->production_id ); @@ -76,11 +76,11 @@ static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { static inline bool ts_node_child_iterator_next( NodeChildIterator *self, - TSNode *result + t_parse_node *result ) { if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - TSSymbol alias_symbol = 0; + t_symbol alias_symbol = 0; if (!ts_subtree_extra(*child)) { if (self->alias_sequence) { alias_symbol = self->alias_sequence[self->structural_child_index]; @@ -103,12 +103,12 @@ static inline bool ts_node_child_iterator_next( // TSNode - private -static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { +static inline bool ts_node__is_relevant(t_parse_node self, bool include_anonymous) { Subtree tree = ts_node__subtree(self); if (include_anonymous) { return ts_subtree_visible(tree) || ts_node__alias(&self); } else { - TSSymbol alias = ts_node__alias(&self); + t_symbol alias = ts_node__alias(&self); if (alias) { return ts_language_symbol_metadata(self.tree->language, alias).named; } else { @@ -118,7 +118,7 @@ static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { } static inline uint32_t ts_node__relevant_child_count( - TSNode self, + t_parse_node self, bool include_anonymous ) { Subtree tree = ts_node__subtree(self); @@ -133,18 +133,18 @@ static inline uint32_t ts_node__relevant_child_count( } } -static inline TSNode ts_node__child( - TSNode self, +static inline t_parse_node ts_node__child( + t_parse_node self, uint32_t child_index, bool include_anonymous ) { - TSNode result = self; + t_parse_node result = self; bool did_descend = true; while (did_descend) { did_descend = false; - TSNode child; + t_parse_node child; uint32_t index = 0; NodeChildIterator iterator = ts_node_iterate_children(&result); while (ts_node_child_iterator_next(&iterator, &child)) { @@ -184,21 +184,21 @@ static bool ts_subtree_has_trailing_empty_descendant( return false; } -static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) { +static inline t_parse_node ts_node__prev_sibling(t_parse_node self, bool include_anonymous) { Subtree self_subtree = ts_node__subtree(self); bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; uint32_t target_end_byte = ts_node_end_byte(self); - TSNode node = ts_node_parent(self); - TSNode earlier_node = ts_node__null(); + t_parse_node node = ts_node_parent(self); + t_parse_node earlier_node = ts_node__null(); bool earlier_node_is_relevant = false; while (!ts_node_is_null(node)) { - TSNode earlier_child = ts_node__null(); + t_parse_node earlier_child = ts_node__null(); bool earlier_child_is_relevant = false; bool found_child_containing_target = false; - TSNode child; + t_parse_node child; NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if (child.id == self.id) break; @@ -245,19 +245,19 @@ static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) return ts_node__null(); } -static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) { +static inline t_parse_node ts_node__next_sibling(t_parse_node self, bool include_anonymous) { uint32_t target_end_byte = ts_node_end_byte(self); - TSNode node = ts_node_parent(self); - TSNode later_node = ts_node__null(); + t_parse_node node = ts_node_parent(self); + t_parse_node later_node = ts_node__null(); bool later_node_is_relevant = false; while (!ts_node_is_null(node)) { - TSNode later_child = ts_node__null(); + t_parse_node later_child = ts_node__null(); bool later_child_is_relevant = false; - TSNode child_containing_target = ts_node__null(); + t_parse_node child_containing_target = ts_node__null(); - TSNode child; + t_parse_node child; NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if (iterator.position.bytes < target_end_byte) continue; @@ -296,18 +296,18 @@ static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) return ts_node__null(); } -static inline TSNode ts_node__first_child_for_byte( - TSNode self, +static inline t_parse_node ts_node__first_child_for_byte( + t_parse_node self, uint32_t goal, bool include_anonymous ) { - TSNode node = self; + t_parse_node node = self; bool did_descend = true; while (did_descend) { did_descend = false; - TSNode child; + t_parse_node child; NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node_end_byte(child) > goal) { @@ -325,20 +325,20 @@ static inline TSNode ts_node__first_child_for_byte( return ts_node__null(); } -static inline TSNode ts_node__descendant_for_byte_range( - TSNode self, +static inline t_parse_node ts_node__descendant_for_byte_range( + t_parse_node self, uint32_t range_start, uint32_t range_end, bool include_anonymous ) { - TSNode node = self; - TSNode last_visible_node = self; + t_parse_node node = self; + t_parse_node last_visible_node = self; bool did_descend = true; while (did_descend) { did_descend = false; - TSNode child; + t_parse_node child; NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { uint32_t node_end = iterator.position.bytes; @@ -364,23 +364,23 @@ static inline TSNode ts_node__descendant_for_byte_range( return last_visible_node; } -static inline TSNode ts_node__descendant_for_point_range( - TSNode self, - TSPoint range_start, - TSPoint range_end, +static inline t_parse_node ts_node__descendant_for_point_range( + t_parse_node self, + t_point range_start, + t_point range_end, bool include_anonymous ) { - TSNode node = self; - TSNode last_visible_node = self; + t_parse_node node = self; + t_parse_node last_visible_node = self; bool did_descend = true; while (did_descend) { did_descend = false; - TSNode child; + t_parse_node child; NodeChildIterator iterator = ts_node_iterate_children(&node); while (ts_node_child_iterator_next(&iterator, &child)) { - TSPoint node_end = iterator.position.extent; + t_point node_end = iterator.position.extent; // The end of this node must extend far enough forward to touch // the end of the range and exceed the start of the range. @@ -405,41 +405,41 @@ static inline TSNode ts_node__descendant_for_point_range( // TSNode - public -uint32_t ts_node_end_byte(TSNode self) { +uint32_t ts_node_end_byte(t_parse_node self) { return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes; } -TSPoint ts_node_end_point(TSNode self) { +t_point ts_node_end_point(t_parse_node self) { return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent); } -TSSymbol ts_node_symbol(TSNode self) { - TSSymbol symbol = ts_node__alias(&self); +t_symbol ts_node_symbol(t_parse_node self) { + t_symbol symbol = ts_node__alias(&self); if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); return ts_language_public_symbol(self.tree->language, symbol); } -const char *ts_node_type(TSNode self) { - TSSymbol symbol = ts_node__alias(&self); +const char *ts_node_type(t_parse_node self) { + t_symbol symbol = ts_node__alias(&self); if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); return ts_language_symbol_name(self.tree->language, symbol); } -const TSLanguage *ts_node_language(TSNode self) { +const t_language *ts_node_language(t_parse_node self) { return self.tree->language; } -TSSymbol ts_node_grammar_symbol(TSNode self) { +t_symbol ts_node_grammar_symbol(t_parse_node self) { return ts_subtree_symbol(ts_node__subtree(self)); } -const char *ts_node_grammar_type(TSNode self) { - TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self)); +const char *ts_node_grammar_type(t_parse_node self) { + t_symbol symbol = ts_subtree_symbol(ts_node__subtree(self)); return ts_language_symbol_name(self.tree->language, symbol); } -char *ts_node_string(TSNode self) { - TSSymbol alias_symbol = ts_node__alias(&self); +char *ts_node_string(t_parse_node self) { + t_symbol alias_symbol = ts_node__alias(&self); return ts_subtree_string( ts_node__subtree(self), alias_symbol, @@ -449,52 +449,52 @@ char *ts_node_string(TSNode self) { ); } -bool ts_node_eq(TSNode self, TSNode other) { +bool ts_node_eq(t_parse_node self, t_parse_node other) { return self.tree == other.tree && self.id == other.id; } -bool ts_node_is_null(TSNode self) { +bool ts_node_is_null(t_parse_node self) { return self.id == 0; } -bool ts_node_is_extra(TSNode self) { +bool ts_node_is_extra(t_parse_node self) { return ts_subtree_extra(ts_node__subtree(self)); } -bool ts_node_is_named(TSNode self) { - TSSymbol alias = ts_node__alias(&self); +bool ts_node_is_named(t_parse_node self) { + t_symbol alias = ts_node__alias(&self); return alias ? ts_language_symbol_metadata(self.tree->language, alias).named : ts_subtree_named(ts_node__subtree(self)); } -bool ts_node_is_missing(TSNode self) { +bool ts_node_is_missing(t_parse_node self) { return ts_subtree_missing(ts_node__subtree(self)); } -bool ts_node_has_changes(TSNode self) { +bool ts_node_has_changes(t_parse_node self) { return ts_subtree_has_changes(ts_node__subtree(self)); } -bool ts_node_has_error(TSNode self) { +bool ts_node_has_error(t_parse_node self) { return ts_subtree_error_cost(ts_node__subtree(self)) > 0; } -bool ts_node_is_error(TSNode self) { - TSSymbol symbol = ts_node_symbol(self); +bool ts_node_is_error(t_parse_node self) { + t_symbol symbol = ts_node_symbol(self); return symbol == ts_builtin_sym_error; } -uint32_t ts_node_descendant_count(TSNode self) { +uint32_t ts_node_descendant_count(t_parse_node self) { return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; } -TSStateId ts_node_parse_state(TSNode self) { +t_state_id ts_node_parse_state(t_parse_node self) { return ts_subtree_parse_state(ts_node__subtree(self)); } -TSStateId ts_node_next_parse_state(TSNode self) { - const TSLanguage *language = self.tree->language; +t_state_id ts_node_next_parse_state(t_parse_node self) { + const t_language *language = self.tree->language; uint16_t state = ts_node_parse_state(self); if (state == TS_TREE_STATE_NONE) { return TS_TREE_STATE_NONE; @@ -503,12 +503,12 @@ TSStateId ts_node_next_parse_state(TSNode self) { return ts_language_next_state(language, state, symbol); } -TSNode ts_node_parent(TSNode self) { - TSNode node = ts_tree_root_node(self.tree); +t_parse_node ts_node_parent(t_parse_node self) { + t_parse_node node = ts_tree_root_node(self.tree); if (node.id == self.id) return ts_node__null(); while (true) { - TSNode next_node = ts_node_child_containing_descendant(node, self); + t_parse_node next_node = ts_node_child_containing_descendant(node, self); if (ts_node_is_null(next_node)) break; node = next_node; } @@ -516,7 +516,7 @@ TSNode ts_node_parent(TSNode self) { return node; } -TSNode ts_node_child_containing_descendant(TSNode self, TSNode subnode) { +t_parse_node ts_node_child_containing_descendant(t_parse_node self, t_parse_node subnode) { uint32_t start_byte = ts_node_start_byte(subnode); uint32_t end_byte = ts_node_end_byte(subnode); @@ -536,15 +536,15 @@ TSNode ts_node_child_containing_descendant(TSNode self, TSNode subnode) { return self; } -TSNode ts_node_child(TSNode self, uint32_t child_index) { +t_parse_node ts_node_child(t_parse_node self, uint32_t child_index) { return ts_node__child(self, child_index, true); } -TSNode ts_node_named_child(TSNode self, uint32_t child_index) { +t_parse_node ts_node_named_child(t_parse_node self, uint32_t child_index) { return ts_node__child(self, child_index, false); } -TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { +t_parse_node ts_node_child_by_field_id(t_parse_node self, t_field_id field_id) { recur: if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); @@ -568,7 +568,7 @@ recur: if (field_map == field_map_end) return ts_node__null(); } - TSNode child; + t_parse_node child; NodeChildIterator iterator = ts_node_iterate_children(&self); while (ts_node_child_iterator_next(&iterator, &child)) { if (!ts_subtree_extra(ts_node__subtree(child))) { @@ -588,7 +588,7 @@ recur: // Otherwise, descend into this child, but if it doesn't contain // the field, continue searching subsequent children. else { - TSNode result = ts_node_child_by_field_id(child, field_id); + t_parse_node result = ts_node_child_by_field_id(child, field_id); if (result.id) return result; field_map++; if (field_map == field_map_end) return ts_node__null(); @@ -616,7 +616,7 @@ recur: return ts_node__null(); } -static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) { +static inline const char *ts_node__field_name_from_language(t_parse_node self, uint32_t structural_child_index) { const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self.tree->language, @@ -632,15 +632,15 @@ static inline const char *ts_node__field_name_from_language(TSNode self, uint32_ return NULL; } -const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { - TSNode result = self; +const char *ts_node_field_name_for_child(t_parse_node self, uint32_t child_index) { + t_parse_node result = self; bool did_descend = true; const char *inherited_field_name = NULL; while (did_descend) { did_descend = false; - TSNode child; + t_parse_node child; uint32_t index = 0; NodeChildIterator iterator = ts_node_iterate_children(&result); while (ts_node_child_iterator_next(&iterator, &child)) { @@ -671,12 +671,12 @@ const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { return NULL; } -TSNode ts_node_child_by_field_name( - TSNode self, +t_parse_node ts_node_child_by_field_name( + t_parse_node self, const char *name, uint32_t name_length ) { - TSFieldId field_id = ts_language_field_id_for_name( + t_field_id field_id = ts_language_field_id_for_name( self.tree->language, name, name_length @@ -684,7 +684,7 @@ TSNode ts_node_child_by_field_name( return ts_node_child_by_field_id(self, field_id); } -uint32_t ts_node_child_count(TSNode self) { +uint32_t ts_node_child_count(t_parse_node self) { Subtree tree = ts_node__subtree(self); if (ts_subtree_child_count(tree) > 0) { return tree.ptr->visible_child_count; @@ -693,7 +693,7 @@ uint32_t ts_node_child_count(TSNode self) { } } -uint32_t ts_node_named_child_count(TSNode self) { +uint32_t ts_node_named_child_count(t_parse_node self) { Subtree tree = ts_node__subtree(self); if (ts_subtree_child_count(tree) > 0) { return tree.ptr->named_child_count; @@ -702,65 +702,65 @@ uint32_t ts_node_named_child_count(TSNode self) { } } -TSNode ts_node_next_sibling(TSNode self) { +t_parse_node ts_node_next_sibling(t_parse_node self) { return ts_node__next_sibling(self, true); } -TSNode ts_node_next_named_sibling(TSNode self) { +t_parse_node ts_node_next_named_sibling(t_parse_node self) { return ts_node__next_sibling(self, false); } -TSNode ts_node_prev_sibling(TSNode self) { +t_parse_node ts_node_prev_sibling(t_parse_node self) { return ts_node__prev_sibling(self, true); } -TSNode ts_node_prev_named_sibling(TSNode self) { +t_parse_node ts_node_prev_named_sibling(t_parse_node self) { return ts_node__prev_sibling(self, false); } -TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) { +t_parse_node ts_node_first_child_for_byte(t_parse_node self, uint32_t byte) { return ts_node__first_child_for_byte(self, byte, true); } -TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) { +t_parse_node ts_node_first_named_child_for_byte(t_parse_node self, uint32_t byte) { return ts_node__first_child_for_byte(self, byte, false); } -TSNode ts_node_descendant_for_byte_range( - TSNode self, +t_parse_node ts_node_descendant_for_byte_range( + t_parse_node self, uint32_t start, uint32_t end ) { return ts_node__descendant_for_byte_range(self, start, end, true); } -TSNode ts_node_named_descendant_for_byte_range( - TSNode self, +t_parse_node ts_node_named_descendant_for_byte_range( + t_parse_node self, uint32_t start, uint32_t end ) { return ts_node__descendant_for_byte_range(self, start, end, false); } -TSNode ts_node_descendant_for_point_range( - TSNode self, - TSPoint start, - TSPoint end +t_parse_node ts_node_descendant_for_point_range( + t_parse_node self, + t_point start, + t_point end ) { return ts_node__descendant_for_point_range(self, start, end, true); } -TSNode ts_node_named_descendant_for_point_range( - TSNode self, - TSPoint start, - TSPoint end +t_parse_node ts_node_named_descendant_for_point_range( + t_parse_node self, + t_point start, + t_point end ) { return ts_node__descendant_for_point_range(self, start, end, false); } -void ts_node_edit(TSNode *self, const TSInputEdit *edit) { +void ts_node_edit(t_parse_node *self, const t_input_edit *edit) { uint32_t start_byte = ts_node_start_byte(*self); - TSPoint start_point = ts_node_start_point(*self); + t_point start_point = ts_node_start_point(*self); if (start_byte >= edit->old_end_byte) { start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); diff --git a/parser/src/parser.c b/parser/src/parser.c index ac145cea..7ad2d788 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -87,11 +87,11 @@ typedef struct { uint32_t byte_index; } TokenCache; -struct TSParser { +struct t_parser { Lexer lexer; Stack *stack; SubtreePool tree_pool; - const TSLanguage *language; + const t_language *language; ReduceActionSet reduce_actions; Subtree finished_tree; SubtreeArray trailing_extras; @@ -137,7 +137,7 @@ typedef struct { static const char *ts_string_input_read( void *_self, uint32_t byte, - TSPoint point, + t_point point, uint32_t *length ) { (void)point; @@ -153,7 +153,7 @@ static const char *ts_string_input_read( // Parser - Private -static void ts_parser__log(TSParser *self) { +static void ts_parser__log(t_parser *self) { if (self->lexer.logger.log) { self->lexer.logger.log( self->lexer.logger.payload, @@ -173,7 +173,7 @@ static void ts_parser__log(TSParser *self) { } static bool ts_parser__breakdown_top_of_stack( - TSParser *self, + t_parser *self, StackVersion version ) { bool did_break_down = false; @@ -187,7 +187,7 @@ static bool ts_parser__breakdown_top_of_stack( pending = false; for (uint32_t i = 0; i < pop.size; i++) { StackSlice slice = pop.contents[i]; - TSStateId state = ts_stack_state(self->stack, slice.version); + t_state_id state = ts_stack_state(self->stack, slice.version); Subtree parent = *array_front(&slice.subtrees); for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { @@ -221,9 +221,9 @@ static bool ts_parser__breakdown_top_of_stack( } static void ts_parser__breakdown_lookahead( - TSParser *self, + t_parser *self, Subtree *lookahead, - TSStateId state, + t_state_id state, ReusableNode *reusable_node ) { bool did_descend = false; @@ -243,7 +243,7 @@ static void ts_parser__breakdown_lookahead( } static ErrorComparison ts_parser__compare_versions( - TSParser *self, + t_parser *self, ErrorStatus a, ErrorStatus b ) { @@ -286,7 +286,7 @@ static ErrorComparison ts_parser__compare_versions( } static ErrorStatus ts_parser__version_status( - TSParser *self, + t_parser *self, StackVersion version ) { unsigned cost = ts_stack_error_cost(self->stack, version); @@ -301,7 +301,7 @@ static ErrorStatus ts_parser__version_status( } static bool ts_parser__better_version_exists( - TSParser *self, + t_parser *self, StackVersion version, bool is_in_error, unsigned cost @@ -337,13 +337,13 @@ static bool ts_parser__better_version_exists( return false; } -static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) { +static bool ts_parser__call_main_lex_fn(t_parser *self, TSLexMode lex_mode) { return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); } -static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) { +static bool ts_parser__call_keyword_lex_fn(t_parser *self, TSLexMode lex_mode) { (void)(lex_mode); return self->language->keyword_lex_fn(&self->lexer.data, 0); @@ -351,7 +351,7 @@ static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) { } static void ts_parser__external_scanner_create( - TSParser *self + t_parser *self ) { if (self->language && self->language->external_scanner.states) { if (self->language->external_scanner.create) { @@ -361,7 +361,7 @@ if (self->language->external_scanner.create) { }} static void ts_parser__external_scanner_destroy( - TSParser *self + t_parser *self ) { if ( self->language && @@ -376,7 +376,7 @@ static void ts_parser__external_scanner_destroy( } static unsigned ts_parser__external_scanner_serialize( - TSParser *self + t_parser *self ) { uint32_t length = self->language->external_scanner.serialize( self->external_scanner_payload, @@ -388,7 +388,7 @@ static unsigned ts_parser__external_scanner_serialize( } static void ts_parser__external_scanner_deserialize( - TSParser *self, + t_parser *self, Subtree external_token ) { const char *data = NULL; @@ -408,8 +408,8 @@ static void ts_parser__external_scanner_deserialize( } static bool ts_parser__external_scanner_scan( - TSParser *self, - TSStateId external_lex_state + t_parser *self, + t_state_id external_lex_state ) { const bool *valid_external_tokens = ts_language_enabled_external_tokens( @@ -425,14 +425,14 @@ static bool ts_parser__external_scanner_scan( } static bool ts_parser__can_reuse_first_leaf( - TSParser *self, - TSStateId state, + t_parser *self, + t_state_id state, Subtree tree, TableEntry *table_entry ) { TSLexMode current_lex_mode = self->language->lex_modes[state]; - TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree); - TSStateId leaf_state = ts_subtree_leaf_parse_state(tree); + t_symbol leaf_symbol = ts_subtree_leaf_symbol(tree); + t_state_id leaf_state = ts_subtree_leaf_parse_state(tree); TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state]; // At the end of a non-terminal extra node, the lexer normally returns @@ -460,9 +460,9 @@ static bool ts_parser__can_reuse_first_leaf( } static Subtree ts_parser__lex( - TSParser *self, + t_parser *self, StackVersion version, - TSStateId parse_state + t_state_id parse_state ) { TSLexMode lex_mode = self->language->lex_modes[parse_state]; if (lex_mode.lex_state == (uint16_t)-1) { @@ -596,7 +596,7 @@ static Subtree ts_parser__lex( ); } else { bool is_keyword = false; - TSSymbol symbol = self->lexer.data.result_symbol; + t_symbol symbol = self->lexer.data.result_symbol; Length padding = length_sub(self->lexer.token_start_position, start_position); Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; @@ -651,8 +651,8 @@ static Subtree ts_parser__lex( } static Subtree ts_parser__get_cached_token( - TSParser *self, - TSStateId state, + t_parser *self, + t_state_id state, size_t position, Subtree last_external_token, TableEntry *table_entry @@ -672,7 +672,7 @@ static Subtree ts_parser__get_cached_token( } static void ts_parser__set_cached_token( - TSParser *self, + t_parser *self, uint32_t byte_index, Subtree last_external_token, Subtree token @@ -688,7 +688,7 @@ static void ts_parser__set_cached_token( } static bool ts_parser__has_included_range_difference( - const TSParser *self, + const t_parser *self, uint32_t start_position, uint32_t end_position ) { @@ -701,9 +701,9 @@ static bool ts_parser__has_included_range_difference( } static Subtree ts_parser__reuse_node( - TSParser *self, + t_parser *self, StackVersion version, - TSStateId *state, + t_state_id *state, uint32_t position, Subtree last_external_token, TableEntry *table_entry @@ -759,7 +759,7 @@ static Subtree ts_parser__reuse_node( continue; } - TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result); + t_symbol leaf_symbol = ts_subtree_leaf_symbol(result); ts_language_table_entry(self->language, *state, leaf_symbol, table_entry); if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { LOG( @@ -783,7 +783,7 @@ static Subtree ts_parser__reuse_node( // // The decision is based on the trees' error costs (if any), their dynamic precedence, // and finally, as a default, by a recursive comparison of the trees' symbols. -static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) { +static bool ts_parser__select_tree(t_parser *self, Subtree left, Subtree right) { if (!left.ptr) return true; if (!right.ptr) return false; @@ -831,7 +831,7 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) // Determine if a given tree's children should be replaced by an alternative // array of children. static bool ts_parser__select_children( - TSParser *self, + t_parser *self, Subtree left, const SubtreeArray *children ) { @@ -856,9 +856,9 @@ static bool ts_parser__select_children( } static void ts_parser__shift( - TSParser *self, + t_parser *self, StackVersion version, - TSStateId state, + t_state_id state, Subtree lookahead, bool extra ) { @@ -879,9 +879,9 @@ static void ts_parser__shift( } static StackVersion ts_parser__reduce( - TSParser *self, + t_parser *self, StackVersion version, - TSSymbol symbol, + t_symbol symbol, uint32_t count, int dynamic_precedence, uint16_t production_id, @@ -957,8 +957,8 @@ static StackVersion ts_parser__reduce( } } - TSStateId state = ts_stack_state(self->stack, slice_version); - TSStateId next_state = ts_language_next_state(self->language, state, symbol); + t_state_id state = ts_stack_state(self->stack, slice_version); + t_state_id next_state = ts_language_next_state(self->language, state, symbol); if (end_of_non_terminal_extra && next_state == state) { parent.ptr->extra = true; } @@ -994,7 +994,7 @@ static StackVersion ts_parser__reduce( } static void ts_parser__accept( - TSParser *self, + t_parser *self, StackVersion version, Subtree lookahead ) { @@ -1047,9 +1047,9 @@ static void ts_parser__accept( } static bool ts_parser__do_all_potential_reductions( - TSParser *self, + t_parser *self, StackVersion starting_version, - TSSymbol lookahead_symbol + t_symbol lookahead_symbol ) { uint32_t initial_version_count = ts_stack_version_count(self->stack); @@ -1068,11 +1068,11 @@ static bool ts_parser__do_all_potential_reductions( } if (merged) continue; - TSStateId state = ts_stack_state(self->stack, version); + t_state_id state = ts_stack_state(self->stack, version); bool has_shift_action = false; array_clear(&self->reduce_actions); - TSSymbol first_symbol, end_symbol; + t_symbol first_symbol, end_symbol; if (lookahead_symbol != 0) { first_symbol = lookahead_symbol; end_symbol = lookahead_symbol + 1; @@ -1081,7 +1081,7 @@ static bool ts_parser__do_all_potential_reductions( end_symbol = self->language->token_count; } - for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) { + for (t_symbol symbol = first_symbol; symbol < end_symbol; symbol++) { TableEntry entry; ts_language_table_entry(self->language, state, symbol, &entry); for (uint32_t j = 0; j < entry.action_count; j++) { @@ -1137,10 +1137,10 @@ static bool ts_parser__do_all_potential_reductions( } static bool ts_parser__recover_to_state( - TSParser *self, + t_parser *self, StackVersion version, unsigned depth, - TSStateId goal_state + t_state_id goal_state ) { StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); StackVersion previous_version = STACK_VERSION_NONE; @@ -1196,7 +1196,7 @@ static bool ts_parser__recover_to_state( } static void ts_parser__recover( - TSParser *self, + t_parser *self, StackVersion version, Subtree lookahead ) { @@ -1373,7 +1373,7 @@ static void ts_parser__recover( } static void ts_parser__handle_error( - TSParser *self, + t_parser *self, StackVersion version, Subtree lookahead ) { @@ -1391,13 +1391,13 @@ static void ts_parser__handle_error( bool did_insert_missing_token = false; for (StackVersion v = version; v < version_count;) { if (!did_insert_missing_token) { - TSStateId state = ts_stack_state(self->stack, v); + t_state_id state = ts_stack_state(self->stack, v); for ( - TSSymbol missing_symbol = 1; + t_symbol missing_symbol = 1; missing_symbol < (uint16_t)self->language->token_count; missing_symbol++ ) { - TSStateId state_after_missing_symbol = ts_language_next_state( + t_state_id state_after_missing_symbol = ts_language_next_state( self->language, state, missing_symbol ); if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) { @@ -1471,11 +1471,11 @@ static void ts_parser__handle_error( } static bool ts_parser__advance( - TSParser *self, + t_parser *self, StackVersion version, bool allow_node_reuse ) { - TSStateId state = ts_stack_state(self->stack, version); + t_state_id state = ts_stack_state(self->stack, version); uint32_t position = ts_stack_position(self->stack, version).bytes; Subtree last_external_token = ts_stack_last_external_token(self->stack, version); @@ -1548,7 +1548,7 @@ static bool ts_parser__advance( switch (action.type) { case TSParseActionTypeShift: { if (action.shift.repetition) break; - TSStateId next_state; + t_state_id next_state; if (action.shift.extra) { next_state = state; LOG("shift_extra"); @@ -1688,7 +1688,7 @@ static bool ts_parser__advance( } } -static unsigned ts_parser__condense_stack(TSParser *self) { +static unsigned ts_parser__condense_stack(t_parser *self) { bool made_changes = false; unsigned min_error_cost = UINT_MAX; for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { @@ -1788,7 +1788,7 @@ static unsigned ts_parser__condense_stack(TSParser *self) { return min_error_cost; } -static bool ts_parser_has_outstanding_parse(TSParser *self) { +static bool ts_parser_has_outstanding_parse(t_parser *self) { return ( self->external_scanner_payload || ts_stack_state(self->stack, 0) != 1 || @@ -1798,8 +1798,8 @@ static bool ts_parser_has_outstanding_parse(TSParser *self) { // Parser - Public -TSParser *ts_parser_new(void) { - TSParser *self = ts_calloc(1, sizeof(TSParser)); +t_parser *ts_parser_new(void) { + t_parser *self = ts_calloc(1, sizeof(t_parser)); ts_lexer_init(&self->lexer); array_init(&self->reduce_actions); array_reserve(&self->reduce_actions, 4); @@ -1822,7 +1822,7 @@ TSParser *ts_parser_new(void) { return self; } -void ts_parser_delete(TSParser *self) { +void ts_parser_delete(t_parser *self) { if (!self) return; ts_parser_set_language(self, NULL); @@ -1847,11 +1847,11 @@ void ts_parser_delete(TSParser *self) { ts_free(self); } -const TSLanguage *ts_parser_language(const TSParser *self) { +const t_language *ts_parser_language(const t_parser *self) { return self->language; } -bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { +bool ts_parser_set_language(t_parser *self, const t_language *language) { ts_parser_reset(self); ts_language_delete(self->language); self->language = NULL; @@ -1869,15 +1869,15 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { return true; } -TSLogger ts_parser_logger(const TSParser *self) { +t_logger ts_parser_logger(const t_parser *self) { return self->lexer.logger; } -void ts_parser_set_logger(TSParser *self, TSLogger logger) { +void ts_parser_set_logger(t_parser *self, t_logger logger) { self->lexer.logger = logger; } -void ts_parser_print_dot_graphs(TSParser *self, int fd) { +void ts_parser_print_dot_graphs(t_parser *self, int fd) { if (self->dot_graph_file) { fclose(self->dot_graph_file); } @@ -1893,35 +1893,35 @@ void ts_parser_print_dot_graphs(TSParser *self, int fd) { } } -const size_t *ts_parser_cancellation_flag(const TSParser *self) { +const size_t *ts_parser_cancellation_flag(const t_parser *self) { return (const size_t *)self->cancellation_flag; } -void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) { +void ts_parser_set_cancellation_flag(t_parser *self, const size_t *flag) { self->cancellation_flag = (const volatile size_t *)flag; } -uint64_t ts_parser_timeout_micros(const TSParser *self) { +uint64_t ts_parser_timeout_micros(const t_parser *self) { return duration_to_micros(self->timeout_duration); } -void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { +void ts_parser_set_timeout_micros(t_parser *self, uint64_t timeout_micros) { self->timeout_duration = duration_from_micros(timeout_micros); } bool ts_parser_set_included_ranges( - TSParser *self, - const TSRange *ranges, + t_parser *self, + const t_range *ranges, uint32_t count ) { return ts_lexer_set_included_ranges(&self->lexer, ranges, count); } -const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) { +const t_range *ts_parser_included_ranges(const t_parser *self, uint32_t *count) { return ts_lexer_included_ranges(&self->lexer, count); } -void ts_parser_reset(TSParser *self) { +void ts_parser_reset(t_parser *self) { ts_parser__external_scanner_destroy(self); if (self->old_tree.ptr) { @@ -1941,12 +1941,12 @@ void ts_parser_reset(TSParser *self) { self->has_scanner_error = false; } -TSTree *ts_parser_parse( - TSParser *self, - const TSTree *old_tree, - TSInput input +t_tree *ts_parser_parse( + t_parser *self, + const t_tree *old_tree, + t_input input ) { - TSTree *result = NULL; + t_tree *result = NULL; if (!self->language || !input.read) return NULL; @@ -1973,7 +1973,7 @@ TSTree *ts_parser_parse( LOG("parse_after_edit"); LOG_TREE(self->old_tree); for (unsigned i = 0; i < self->included_range_differences.size; i++) { - TSRange *range = &self->included_range_differences.contents[i]; + t_range *range = &self->included_range_differences.contents[i]; LOG("different_included_range %u - %u", range->start_byte, range->end_byte); } } else { @@ -2037,7 +2037,7 @@ TSTree *ts_parser_parse( } while (self->included_range_difference_index < self->included_range_differences.size) { - TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index]; + t_range *range = &self->included_range_differences.contents[self->included_range_difference_index]; if (range->end_byte <= position) { self->included_range_difference_index++; } else { @@ -2064,24 +2064,24 @@ exit: return result; } -TSTree *ts_parser_parse_string( - TSParser *self, - const TSTree *old_tree, +t_tree *ts_parser_parse_string( + t_parser *self, + const t_tree *old_tree, const char *string, uint32_t length ) { return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8); } -TSTree *ts_parser_parse_string_encoding( - TSParser *self, - const TSTree *old_tree, +t_tree *ts_parser_parse_string_encoding( + t_parser *self, + const t_tree *old_tree, const char *string, uint32_t length, - TSInputEncoding encoding + t_input_encoding encoding ) { TSStringInput input = {string, length}; - return ts_parser_parse(self, old_tree, (TSInput) { + return ts_parser_parse(self, old_tree, (t_input) { &input, ts_string_input_read, encoding, diff --git a/parser/src/parser.h b/parser/src/parser.h index 17f0e94b..fa49485b 100644 --- a/parser/src/parser.h +++ b/parser/src/parser.h @@ -9,7 +9,7 @@ extern "C" { #include #include -#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_error ((t_symbol)-1) #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 @@ -21,7 +21,7 @@ typedef struct TSLanguage TSLanguage; #endif typedef struct { - TSFieldId field_id; + t_field_id field_id; uint8_t child_index; bool inherited; } TSFieldMapEntry; @@ -41,7 +41,7 @@ typedef struct TSLexer TSLexer; struct TSLexer { int32_t lookahead; - TSSymbol result_symbol; + t_symbol result_symbol; void (*advance)(TSLexer *, bool); void (*mark_end)(TSLexer *); uint32_t (*get_column)(TSLexer *); @@ -59,14 +59,14 @@ typedef enum { typedef union { struct { uint8_t type; - TSStateId state; + t_state_id state; bool extra; bool repetition; } shift; struct { uint8_t type; uint8_t child_count; - TSSymbol symbol; + t_symbol symbol; int16_t dynamic_precedence; uint16_t production_id; } reduce; @@ -91,7 +91,7 @@ typedef struct { int32_t end; } TSCharacterRange; -struct TSLanguage { +struct t_language { uint32_t version; uint32_t symbol_count; uint32_t alias_count; @@ -111,23 +111,23 @@ struct TSLanguage { const TSFieldMapSlice *field_map_slices; const TSFieldMapEntry *field_map_entries; const TSSymbolMetadata *symbol_metadata; - const TSSymbol *public_symbol_map; + const t_symbol *public_symbol_map; const uint16_t *alias_map; - const TSSymbol *alias_sequences; + const t_symbol *alias_sequences; const TSLexMode *lex_modes; - bool (*lex_fn)(TSLexer *, TSStateId); - bool (*keyword_lex_fn)(TSLexer *, TSStateId); - TSSymbol keyword_capture_token; + bool (*lex_fn)(TSLexer *, t_state_id); + bool (*keyword_lex_fn)(TSLexer *, t_state_id); + t_symbol keyword_capture_token; struct { const bool *states; - const TSSymbol *symbol_map; + const t_symbol *symbol_map; void *(*create)(void); void (*destroy)(void *); bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); unsigned (*serialize)(void *, char *); void (*deserialize)(void *, const char *, unsigned); } external_scanner; - const TSStateId *primary_state_ids; + const t_state_id *primary_state_ids; }; static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { diff --git a/parser/src/point.h b/parser/src/point.h index 1c8b7133..3723299e 100644 --- a/parser/src/point.h +++ b/parser/src/point.h @@ -3,56 +3,56 @@ #include "./api.h" -#define POINT_ZERO ((TSPoint) {0, 0}) -#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX}) +#define POINT_ZERO ((t_point) {0, 0}) +#define POINT_MAX ((t_point) {UINT32_MAX, UINT32_MAX}) -static inline TSPoint point__new(unsigned row, unsigned column) { - TSPoint result = {row, column}; +static inline t_point point__new(unsigned row, unsigned column) { + t_point result = {row, column}; return result; } -static inline TSPoint point_add(TSPoint a, TSPoint b) { +static inline t_point point_add(t_point a, t_point b) { if (b.row > 0) return point__new(a.row + b.row, b.column); else return point__new(a.row, a.column + b.column); } -static inline TSPoint point_sub(TSPoint a, TSPoint b) { +static inline t_point point_sub(t_point a, t_point b) { if (a.row > b.row) return point__new(a.row - b.row, a.column); else return point__new(0, a.column - b.column); } -static inline bool point_lte(TSPoint a, TSPoint b) { +static inline bool point_lte(t_point a, t_point b) { return (a.row < b.row) || (a.row == b.row && a.column <= b.column); } -static inline bool point_lt(TSPoint a, TSPoint b) { +static inline bool point_lt(t_point a, t_point b) { return (a.row < b.row) || (a.row == b.row && a.column < b.column); } -static inline bool point_gt(TSPoint a, TSPoint b) { +static inline bool point_gt(t_point a, t_point b) { return (a.row > b.row) || (a.row == b.row && a.column > b.column); } -static inline bool point_gte(TSPoint a, TSPoint b) { +static inline bool point_gte(t_point a, t_point b) { return (a.row > b.row) || (a.row == b.row && a.column >= b.column); } -static inline bool point_eq(TSPoint a, TSPoint b) { +static inline bool point_eq(t_point a, t_point b) { return a.row == b.row && a.column == b.column; } -static inline TSPoint point_min(TSPoint a, TSPoint b) { +static inline t_point point_min(t_point a, t_point b) { if (a.row < b.row || (a.row == b.row && a.column < b.column)) return a; else return b; } -static inline TSPoint point_max(TSPoint a, TSPoint b) { +static inline t_point point_max(t_point a, t_point b) { if (a.row > b.row || (a.row == b.row && a.column > b.column)) return a; else diff --git a/parser/src/query.c b/parser/src/query.c index c75700b8..ffded551 100644 --- a/parser/src/query.c +++ b/parser/src/query.c @@ -83,9 +83,9 @@ typedef struct { * even completed yet. */ typedef struct { - TSSymbol symbol; - TSSymbol supertype_symbol; - TSFieldId field; + t_symbol symbol; + t_symbol supertype_symbol; + t_field_id field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; uint16_t depth; uint16_t alternative_index; @@ -191,7 +191,7 @@ typedef struct { bool needs_parent: 1; } QueryState; -typedef Array(TSQueryCapture) CaptureList; +typedef Array(t_query_capture) CaptureList; /* * CaptureListPool - A collection of *lists* of captures. Each query state needs @@ -218,10 +218,10 @@ typedef struct { * a query pattern, to determine at which steps the pattern might fail to match. */ typedef struct { - TSStateId parse_state; - TSSymbol parent_symbol; + t_state_id parse_state; + t_symbol parent_symbol; uint16_t child_index; - TSFieldId field_id: 15; + t_field_id field_id: 15; bool done: 1; } AnalysisStateEntry; @@ -229,7 +229,7 @@ typedef struct { AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; uint16_t depth; uint16_t step_index; - TSSymbol root_symbol; + t_symbol root_symbol; } AnalysisState; typedef Array(AnalysisState *) AnalysisStateSet; @@ -240,7 +240,7 @@ typedef struct { AnalysisStateSet deeper_states; AnalysisStateSet state_pool; Array(uint16_t) final_step_indices; - Array(TSSymbol) finished_parent_symbols; + Array(t_symbol) finished_parent_symbols; bool did_abort; } QueryAnalysis; @@ -251,15 +251,15 @@ typedef struct { * downstream states. */ typedef struct { - TSStateId state; + t_state_id state; uint16_t production_id; uint8_t child_index: 7; bool done: 1; } AnalysisSubgraphNode; typedef struct { - TSSymbol symbol; - Array(TSStateId) start_states; + t_symbol symbol; + Array(t_state_id) start_states; Array(AnalysisSubgraphNode) nodes; } AnalysisSubgraph; @@ -271,7 +271,7 @@ typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; * to which reduce actions. */ typedef struct { - TSStateId *contents; + t_state_id *contents; } StatePredecessorMap; /* @@ -279,28 +279,28 @@ typedef struct { * itself is immutable. The mutable state used in the process of executing the * query is stored in a `TSQueryCursor`. */ -struct TSQuery { +struct t_query { SymbolTable captures; SymbolTable predicate_values; Array(CaptureQuantifiers) capture_quantifiers; Array(QueryStep) steps; Array(PatternEntry) pattern_map; - Array(TSQueryPredicateStep) predicate_steps; + Array(t_query_predicate_step) predicate_steps; Array(QueryPattern) patterns; Array(StepOffset) step_offsets; - Array(TSFieldId) negated_fields; + Array(t_field_id) negated_fields; Array(char) string_buffer; - Array(TSSymbol) repeat_symbols_with_rootless_patterns; - const TSLanguage *language; + Array(t_symbol) repeat_symbols_with_rootless_patterns; + const t_language *language; uint16_t wildcard_root_pattern_count; }; /* * TSQueryCursor - A stateful struct used to execute a query on a tree. */ -struct TSQueryCursor { - const TSQuery *query; - TSTreeCursor cursor; +struct t_query_cursor { + const t_query *query; + t_tree_cursor cursor; Array(QueryState) states; Array(QueryState) finished_states; CaptureListPool capture_list_pool; @@ -308,8 +308,8 @@ struct TSQueryCursor { uint32_t max_start_depth; uint32_t start_byte; uint32_t end_byte; - TSPoint start_point; - TSPoint end_point; + t_point start_point; + t_point end_point; uint32_t next_state_id; bool on_visible_node; bool ascending; @@ -317,10 +317,10 @@ struct TSQueryCursor { bool did_exceed_match_limit; }; -static const TSQueryError PARENT_DONE = -1; +static const t_query_error PARENT_DONE = -1; static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; static const uint16_t NONE = UINT16_MAX; -static const TSSymbol WILDCARD_SYMBOL = 0; +static const t_symbol WILDCARD_SYMBOL = 0; /********** * Stream @@ -480,9 +480,9 @@ static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { * Quantifiers **************/ -static TSQuantifier quantifier_mul( - TSQuantifier left, - TSQuantifier right +static t_quantifier quantifier_mul( + t_quantifier left, + t_quantifier right ) { switch (left) { @@ -529,9 +529,9 @@ static TSQuantifier quantifier_mul( return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! } -static TSQuantifier quantifier_join( - TSQuantifier left, - TSQuantifier right +static t_quantifier quantifier_join( + t_quantifier left, + t_quantifier right ) { switch (left) { @@ -590,9 +590,9 @@ static TSQuantifier quantifier_join( return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! } -static TSQuantifier quantifier_add( - TSQuantifier left, - TSQuantifier right +static t_quantifier quantifier_add( + t_quantifier left, + t_quantifier right ) { switch (left) { @@ -668,24 +668,24 @@ static void capture_quantifiers_replace( } // Return capture quantifier for the given capture id -static TSQuantifier capture_quantifier_for_id( +static t_quantifier capture_quantifier_for_id( const CaptureQuantifiers *self, uint16_t id ) { - return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id); + return (self->size <= id) ? TSQuantifierZero : (t_quantifier) *array_get(self, id); } // Add the given quantifier to the current value for id static void capture_quantifiers_add_for_id( CaptureQuantifiers *self, uint16_t id, - TSQuantifier quantifier + t_quantifier quantifier ) { if (self->size <= id) { array_grow_by(self, id + 1 - self->size); } uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier); + *own_quantifier = (uint8_t) quantifier_add((t_quantifier) *own_quantifier, quantifier); } // Point-wise add the given quantifiers to the current values @@ -699,18 +699,18 @@ static void capture_quantifiers_add_all( for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { uint8_t *quantifier = array_get(quantifiers, id); uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); + *own_quantifier = (uint8_t) quantifier_add((t_quantifier) *own_quantifier, (t_quantifier) *quantifier); } } // Join the given quantifier with the current values static void capture_quantifiers_mul( CaptureQuantifiers *self, - TSQuantifier quantifier + t_quantifier quantifier ) { for (uint16_t id = 0; id < (uint16_t)self->size; id++) { uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); + *own_quantifier = (uint8_t) quantifier_mul((t_quantifier) *own_quantifier, quantifier); } } @@ -725,11 +725,11 @@ static void capture_quantifiers_join_all( for (uint32_t id = 0; id < quantifiers->size; id++) { uint8_t *quantifier = array_get(quantifiers, id); uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); + *own_quantifier = (uint8_t) quantifier_join((t_quantifier) *own_quantifier, (t_quantifier) *quantifier); } for (uint32_t id = quantifiers->size; id < self->size; id++) { uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero); + *own_quantifier = (uint8_t) quantifier_join((t_quantifier) *own_quantifier, TSQuantifierZero); } } @@ -797,7 +797,7 @@ static uint16_t symbol_table_insert_name( ************/ static QueryStep query_step__new( - TSSymbol symbol, + t_symbol symbol, uint16_t depth, bool is_immediate ) { @@ -851,12 +851,12 @@ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { **********************/ static inline StatePredecessorMap state_predecessor_map_new( - const TSLanguage *language + const t_language *language ) { return (StatePredecessorMap) { .contents = ts_calloc( (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), - sizeof(TSStateId) + sizeof(t_state_id) ), }; } @@ -867,11 +867,11 @@ static inline void state_predecessor_map_delete(StatePredecessorMap *self) { static inline void state_predecessor_map_add( StatePredecessorMap *self, - TSStateId state, - TSStateId predecessor + t_state_id state, + t_state_id predecessor ) { size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - TSStateId *count = &self->contents[index]; + t_state_id *count = &self->contents[index]; if ( *count == 0 || (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) @@ -881,9 +881,9 @@ static inline void state_predecessor_map_add( } } -static inline const TSStateId *state_predecessor_map_get( +static inline const t_state_id *state_predecessor_map_get( const StatePredecessorMap *self, - TSStateId state, + t_state_id state, unsigned *count ) { size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); @@ -898,7 +898,7 @@ static inline const TSStateId *state_predecessor_map_get( static unsigned analysis_state__recursion_depth(const AnalysisState *self) { unsigned result = 0; for (unsigned i = 0; i < self->depth; i++) { - TSSymbol symbol = self->stack[i].parent_symbol; + t_symbol symbol = self->stack[i].parent_symbol; for (unsigned j = 0; j < i; j++) { if (self->stack[j].parent_symbol == symbol) { result++; @@ -948,7 +948,7 @@ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { return &self->stack[self->depth - 1]; } -static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) { +static inline bool analysis_state__has_supertype(AnalysisState *self, t_symbol symbol) { for (unsigned i = 0; i < self->depth; i++) { if (self->stack[i].parent_symbol == symbol) return true; } @@ -1086,8 +1086,8 @@ static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *se // If the symbol is not present `*result` is set to the index where the // symbol should be inserted. static inline bool ts_query__pattern_map_search( - const TSQuery *self, - TSSymbol needle, + const t_query *self, + t_symbol needle, uint32_t *result ) { uint32_t base_index = self->wildcard_root_pattern_count; @@ -1099,14 +1099,14 @@ static inline bool ts_query__pattern_map_search( while (size > 1) { uint32_t half_size = size / 2; uint32_t mid_index = base_index + half_size; - TSSymbol mid_symbol = self->steps.contents[ + t_symbol mid_symbol = self->steps.contents[ self->pattern_map.contents[mid_index].step_index ].symbol; if (needle > mid_symbol) base_index = mid_index; size -= half_size; } - TSSymbol symbol = self->steps.contents[ + t_symbol symbol = self->steps.contents[ self->pattern_map.contents[base_index].step_index ].symbol; @@ -1126,8 +1126,8 @@ static inline bool ts_query__pattern_map_search( // Insert a new pattern's start index into the pattern map, maintaining // the pattern map's ordering invariant. static inline void ts_query__pattern_map_insert( - TSQuery *self, - TSSymbol symbol, + t_query *self, + t_symbol symbol, PatternEntry new_entry ) { uint32_t index; @@ -1155,7 +1155,7 @@ static inline void ts_query__pattern_map_insert( // Walk the subgraph for this non-terminal, tracking all of the possible // sequences of progress within the pattern. static void ts_query__perform_analysis( - TSQuery *self, + t_query *self, const AnalysisSubgraphArray *subgraphs, QueryAnalysis *analysis ) { @@ -1251,9 +1251,9 @@ static void ts_query__perform_analysis( } } - const TSStateId parse_state = analysis_state__top(state)->parse_state; - const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; - const TSFieldId parent_field_id = analysis_state__top(state)->field_id; + const t_state_id parse_state = analysis_state__top(state)->parse_state; + const t_symbol parent_symbol = analysis_state__top(state)->parent_symbol; + const t_field_id parent_field_id = analysis_state__top(state)->field_id; const unsigned child_index = analysis_state__top(state)->child_index; const QueryStep * const step = &self->steps.contents[state->step_index]; @@ -1266,7 +1266,7 @@ static void ts_query__perform_analysis( // are part of the subgraph for the current symbol. LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); while (ts_lookahead_iterator__next(&lookahead_iterator)) { - TSSymbol sym = lookahead_iterator.symbol; + t_symbol sym = lookahead_iterator.symbol; AnalysisSubgraphNode successor = { .state = parse_state, @@ -1301,13 +1301,13 @@ static void ts_query__perform_analysis( // Use the subgraph to determine what alias and field will eventually be applied // to this child node. - TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); - TSSymbol visible_symbol = alias + t_symbol alias = ts_language_alias_at(self->language, node->production_id, child_index); + t_symbol visible_symbol = alias ? alias : self->language->symbol_metadata[sym].visible ? self->language->public_symbol_map[sym] : 0; - TSFieldId field_id = parent_field_id; + t_field_id field_id = parent_field_id; if (!field_id) { const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); @@ -1456,7 +1456,7 @@ static void ts_query__perform_analysis( } } -static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { +static bool ts_query__analyze_patterns(t_query *self, unsigned *error_offset) { Array(uint16_t) non_rooted_pattern_start_steps = array_new(); for (unsigned i = 0; i < self->pattern_map.size; i++) { PatternEntry *pattern = &self->pattern_map.contents[i]; @@ -1515,11 +1515,11 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { AnalysisSubgraphArray subgraphs = array_new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { uint32_t parent_step_index = parent_step_indices.contents[i]; - TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + t_symbol parent_symbol = self->steps.contents[parent_step_index].symbol; AnalysisSubgraph subgraph = { .symbol = parent_symbol }; array_insert_sorted_by(&subgraphs, .symbol, subgraph); } - for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { + for (t_symbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { if (!ts_language_symbol_metadata(self->language, sym).visible) { AnalysisSubgraph subgraph = { .symbol = sym }; array_insert_sorted_by(&subgraphs, .symbol, subgraph); @@ -1533,7 +1533,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // with information about the node that would be created. // 3) A list of predecessor states for each state. StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); - for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) { + for (t_state_id state = 1; state < (uint16_t)self->language->state_count; state++) { unsigned subgraph_index, exists; LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); while (ts_lookahead_iterator__next(&lookahead_iterator)) { @@ -1541,14 +1541,14 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { const TSParseAction *action = &lookahead_iterator.actions[i]; if (action->type == TSParseActionTypeReduce) { - const TSSymbol *aliases, *aliases_end; + const t_symbol *aliases, *aliases_end; ts_language_aliases_for_symbol( self->language, action->reduce.symbol, &aliases, &aliases_end ); - for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { + for (const t_symbol *symbol = aliases; symbol < aliases_end; symbol++) { array_search_sorted_by( &subgraphs, .symbol, @@ -1569,7 +1569,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } } } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { - TSStateId next_state = action->shift.state; + t_state_id next_state = action->shift.state; state_predecessor_map_add(&predecessor_map, next_state, state); } } @@ -1578,14 +1578,14 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); } if (ts_language_state_is_primary(self->language, state)) { - const TSSymbol *aliases, *aliases_end; + const t_symbol *aliases, *aliases_end; ts_language_aliases_for_symbol( self->language, lookahead_iterator.symbol, &aliases, &aliases_end ); - for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { + for (const t_symbol *symbol = aliases; symbol < aliases_end; symbol++) { array_search_sorted_by( &subgraphs, .symbol, @@ -1623,7 +1623,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { AnalysisSubgraphNode node = array_pop(&next_nodes); if (node.child_index > 1) { unsigned predecessor_count; - const TSStateId *predecessors = state_predecessor_map_get( + const t_state_id *predecessors = state_predecessor_map_get( &predecessor_map, node.state, &predecessor_count @@ -1678,7 +1678,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { for (unsigned i = 0; i < parent_step_indices.size; i++) { uint16_t parent_step_index = parent_step_indices.contents[i]; uint16_t parent_depth = self->steps.contents[parent_step_index].depth; - TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + t_symbol parent_symbol = self->steps.contents[parent_step_index].symbol; if (parent_symbol == ts_builtin_sym_error) continue; // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's @@ -1701,7 +1701,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { analysis_state_set__clear(&analysis.states, &analysis.state_pool); analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); for (unsigned j = 0; j < subgraph->start_states.size; j++) { - TSStateId parse_state = subgraph->start_states.contents[j]; + t_state_id parse_state = subgraph->start_states.contents[j]; analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { .step_index = parent_step_index + 1, .stack = { @@ -1786,7 +1786,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { end = start + pattern->predicate_steps.length, j = start; j < end; j++ ) { - TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; + t_query_predicate_step *step = &self->predicate_steps.contents[j]; if (step->type == TSQueryPredicateStepTypeCapture) { uint16_t value_id = step->value_id; array_insert_sorted_by(&predicate_capture_ids, , value_id); @@ -1888,7 +1888,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (metadata.visible || metadata.named) continue; for (uint32_t k = 0; k < subgraph->start_states.size; k++) { - TSStateId parse_state = subgraph->start_states.contents[k]; + t_state_id parse_state = subgraph->start_states.contents[k]; analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { .step_index = pattern_entry->step_index, .stack = { @@ -1921,7 +1921,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { - TSSymbol symbol = analysis.finished_parent_symbols.contents[k]; + t_symbol symbol = analysis.finished_parent_symbols.contents[k]; array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); } } @@ -1955,9 +1955,9 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } static void ts_query__add_negated_fields( - TSQuery *self, + t_query *self, uint16_t step_index, - TSFieldId *field_ids, + t_field_id *field_ids, uint16_t field_count ) { QueryStep *step = &self->steps.contents[step_index]; @@ -1968,7 +1968,7 @@ static void ts_query__add_negated_fields( unsigned match_count = 0; unsigned start_i = 0; for (unsigned i = 0; i < self->negated_fields.size; i++) { - TSFieldId existing_field_id = self->negated_fields.contents[i]; + t_field_id existing_field_id = self->negated_fields.contents[i]; // At each zero value, terminate the match attempt. If we've exactly // matched the new field list, then reuse this index. Otherwise, @@ -2006,8 +2006,8 @@ static void ts_query__add_negated_fields( array_push(&self->negated_fields, 0); } -static TSQueryError ts_query__parse_string_literal( - TSQuery *self, +static t_query_error ts_query__parse_string_literal( + t_query *self, Stream *stream ) { const char *string_start = stream->input; @@ -2065,8 +2065,8 @@ static TSQueryError ts_query__parse_string_literal( // a higher level of abstraction, such as the Rust/JavaScript bindings. They // can contain '@'-prefixed capture names, double-quoted strings, and bare // symbols, which also represent strings. -static TSQueryError ts_query__parse_predicate( - TSQuery *self, +static t_query_error ts_query__parse_predicate( + t_query *self, Stream *stream ) { if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; @@ -2078,7 +2078,7 @@ static TSQueryError ts_query__parse_predicate( predicate_name, length ); - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + array_push(&self->predicate_steps, ((t_query_predicate_step) { .type = TSQueryPredicateStepTypeString, .value_id = id, })); @@ -2088,7 +2088,7 @@ static TSQueryError ts_query__parse_predicate( if (stream->next == ')') { stream_advance(stream); stream_skip_whitespace(stream); - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + array_push(&self->predicate_steps, ((t_query_predicate_step) { .type = TSQueryPredicateStepTypeDone, .value_id = 0, })); @@ -2116,7 +2116,7 @@ static TSQueryError ts_query__parse_predicate( return TSQueryErrorCapture; } - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + array_push(&self->predicate_steps, ((t_query_predicate_step) { .type = TSQueryPredicateStepTypeCapture, .value_id = capture_id, })); @@ -2124,14 +2124,14 @@ static TSQueryError ts_query__parse_predicate( // Parse a string literal else if (stream->next == '"') { - TSQueryError e = ts_query__parse_string_literal(self, stream); + t_query_error e = ts_query__parse_string_literal(self, stream); if (e) return e; uint16_t query_id = symbol_table_insert_name( &self->predicate_values, self->string_buffer.contents, self->string_buffer.size ); - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + array_push(&self->predicate_steps, ((t_query_predicate_step) { .type = TSQueryPredicateStepTypeString, .value_id = query_id, })); @@ -2147,7 +2147,7 @@ static TSQueryError ts_query__parse_predicate( symbol_start, symbol_length ); - array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + array_push(&self->predicate_steps, ((t_query_predicate_step) { .type = TSQueryPredicateStepTypeString, .value_id = query_id, })); @@ -2169,8 +2169,8 @@ static TSQueryError ts_query__parse_predicate( // // The caller is responsible for passing in a dedicated CaptureQuantifiers. // These should not be shared between different calls to ts_query__parse_pattern! -static TSQueryError ts_query__parse_pattern( - TSQuery *self, +static t_query_error ts_query__parse_pattern( + t_query *self, Stream *stream, uint32_t depth, bool is_immediate, @@ -2202,7 +2202,7 @@ static TSQueryError ts_query__parse_pattern( CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); for (;;) { uint32_t start_index = self->steps.size; - TSQueryError e = ts_query__parse_pattern( + t_query_error e = ts_query__parse_pattern( self, stream, depth, @@ -2269,7 +2269,7 @@ static TSQueryError ts_query__parse_pattern( stream_advance(stream); stream_skip_whitespace(stream); } - TSQueryError e = ts_query__parse_pattern( + t_query_error e = ts_query__parse_pattern( self, stream, depth, @@ -2304,7 +2304,7 @@ static TSQueryError ts_query__parse_pattern( // Otherwise, this parenthesis is the start of a named node. else { - TSSymbol symbol; + t_symbol symbol; // Parse a normal node name if (stream_is_ident_start(stream)) { @@ -2374,7 +2374,7 @@ static TSQueryError ts_query__parse_pattern( bool child_is_immediate = false; uint16_t last_child_step_index = 0; uint16_t negated_field_count = 0; - TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; + t_field_id negated_field_ids[MAX_NEGATED_FIELD_COUNT]; CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); for (;;) { // Parse a negated field assertion @@ -2390,7 +2390,7 @@ static TSQueryError ts_query__parse_pattern( uint32_t length = (uint32_t)(stream->input - field_name); stream_skip_whitespace(stream); - TSFieldId field_id = ts_language_field_id_for_name( + t_field_id field_id = ts_language_field_id_for_name( self->language, field_name, length @@ -2418,7 +2418,7 @@ static TSQueryError ts_query__parse_pattern( } uint16_t step_index = self->steps.size; - TSQueryError e = ts_query__parse_pattern( + t_query_error e = ts_query__parse_pattern( self, stream, depth + 1, @@ -2476,11 +2476,11 @@ static TSQueryError ts_query__parse_pattern( // Parse a double-quoted anonymous leaf node expression else if (stream->next == '"') { const char *string_start = stream->input; - TSQueryError e = ts_query__parse_string_literal(self, stream); + t_query_error e = ts_query__parse_string_literal(self, stream); if (e) return e; // Add a step for the node - TSSymbol symbol = ts_language_symbol_for_name( + t_symbol symbol = ts_language_symbol_for_name( self->language, self->string_buffer.contents, self->string_buffer.size, @@ -2510,7 +2510,7 @@ static TSQueryError ts_query__parse_pattern( // Parse the pattern CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); - TSQueryError e = ts_query__parse_pattern( + t_query_error e = ts_query__parse_pattern( self, stream, depth, @@ -2524,7 +2524,7 @@ static TSQueryError ts_query__parse_pattern( } // Add the field name to the first step of the pattern - TSFieldId field_id = ts_language_field_id_for_name( + t_field_id field_id = ts_language_field_id_for_name( self->language, field_name, length @@ -2561,7 +2561,7 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Parse suffixes modifiers for this pattern - TSQuantifier quantifier = TSQuantifierOne; + t_quantifier quantifier = TSQuantifierOne; for (;;) { // Parse the one-or-more operator. if (stream->next == '+') { @@ -2660,12 +2660,12 @@ static TSQueryError ts_query__parse_pattern( return 0; } -TSQuery *ts_query_new( - const TSLanguage *language, +t_query *ts_query_new( + const t_language *language, const char *source, uint32_t source_len, uint32_t *error_offset, - TSQueryError *error_type + t_query_error *error_type ) { if ( !language || @@ -2676,8 +2676,8 @@ TSQuery *ts_query_new( return NULL; } - TSQuery *self = ts_malloc(sizeof(TSQuery)); - *self = (TSQuery) { + t_query *self = ts_malloc(sizeof(t_query)); + *self = (t_query) { .steps = array_new(), .pattern_map = array_new(), .captures = symbol_table_new(), @@ -2794,7 +2794,7 @@ TSQuery *ts_query_new( return self; } -void ts_query_delete(TSQuery *self) { +void ts_query_delete(t_query *self) { if (self) { array_delete(&self->steps); array_delete(&self->pattern_map); @@ -2816,28 +2816,28 @@ void ts_query_delete(TSQuery *self) { } } -uint32_t ts_query_pattern_count(const TSQuery *self) { +uint32_t ts_query_pattern_count(const t_query *self) { return self->patterns.size; } -uint32_t ts_query_capture_count(const TSQuery *self) { +uint32_t ts_query_capture_count(const t_query *self) { return self->captures.slices.size; } -uint32_t ts_query_string_count(const TSQuery *self) { +uint32_t ts_query_string_count(const t_query *self) { return self->predicate_values.slices.size; } const char *ts_query_capture_name_for_id( - const TSQuery *self, + const t_query *self, uint32_t index, uint32_t *length ) { return symbol_table_name_for_id(&self->captures, index, length); } -TSQuantifier ts_query_capture_quantifier_for_id( - const TSQuery *self, +t_quantifier ts_query_capture_quantifier_for_id( + const t_query *self, uint32_t pattern_index, uint32_t capture_index ) { @@ -2846,15 +2846,15 @@ TSQuantifier ts_query_capture_quantifier_for_id( } const char *ts_query_string_value_for_id( - const TSQuery *self, + const t_query *self, uint32_t index, uint32_t *length ) { return symbol_table_name_for_id(&self->predicate_values, index, length); } -const TSQueryPredicateStep *ts_query_predicates_for_pattern( - const TSQuery *self, +const t_query_predicate_step *ts_query_predicates_for_pattern( + const t_query *self, uint32_t pattern_index, uint32_t *step_count ) { @@ -2867,14 +2867,14 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( } uint32_t ts_query_start_byte_for_pattern( - const TSQuery *self, + const t_query *self, uint32_t pattern_index ) { return self->patterns.contents[pattern_index].start_byte; } bool ts_query_is_pattern_rooted( - const TSQuery *self, + const t_query *self, uint32_t pattern_index ) { for (unsigned i = 0; i < self->pattern_map.size; i++) { @@ -2887,7 +2887,7 @@ bool ts_query_is_pattern_rooted( } bool ts_query_is_pattern_non_local( - const TSQuery *self, + const t_query *self, uint32_t pattern_index ) { if (pattern_index < self->patterns.size) { @@ -2898,7 +2898,7 @@ bool ts_query_is_pattern_non_local( } bool ts_query_is_pattern_guaranteed_at_step( - const TSQuery *self, + const t_query *self, uint32_t byte_offset ) { uint32_t step_index = UINT32_MAX; @@ -2915,7 +2915,7 @@ bool ts_query_is_pattern_guaranteed_at_step( } bool ts_query__step_is_fallible( - const TSQuery *self, + const t_query *self, uint16_t step_index ) { assert((uint32_t)step_index + 1 < self->steps.size); @@ -2929,7 +2929,7 @@ bool ts_query__step_is_fallible( } void ts_query_disable_capture( - TSQuery *self, + t_query *self, const char *name, uint32_t length ) { @@ -2945,7 +2945,7 @@ void ts_query_disable_capture( } void ts_query_disable_pattern( - TSQuery *self, + t_query *self, uint32_t pattern_index ) { // Remove the given pattern from the pattern map. Its steps will still @@ -2963,9 +2963,9 @@ void ts_query_disable_pattern( * QueryCursor ***************/ -TSQueryCursor *ts_query_cursor_new(void) { - TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); - *self = (TSQueryCursor) { +t_query_cursor *ts_query_cursor_new(void) { + t_query_cursor *self = ts_malloc(sizeof(t_query_cursor)); + *self = (t_query_cursor) { .did_exceed_match_limit = false, .ascending = false, .halted = false, @@ -2983,7 +2983,7 @@ TSQueryCursor *ts_query_cursor_new(void) { return self; } -void ts_query_cursor_delete(TSQueryCursor *self) { +void ts_query_cursor_delete(t_query_cursor *self) { array_delete(&self->states); array_delete(&self->finished_states); ts_tree_cursor_delete(&self->cursor); @@ -2991,15 +2991,15 @@ void ts_query_cursor_delete(TSQueryCursor *self) { ts_free(self); } -bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { +bool ts_query_cursor_did_exceed_match_limit(const t_query_cursor *self) { return self->did_exceed_match_limit; } -uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { +uint32_t ts_query_cursor_match_limit(const t_query_cursor *self) { return self->capture_list_pool.max_capture_list_count; } -void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { +void ts_query_cursor_set_match_limit(t_query_cursor *self, uint32_t limit) { self->capture_list_pool.max_capture_list_count = limit; } @@ -3010,9 +3010,9 @@ void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { #endif void ts_query_cursor_exec( - TSQueryCursor *self, - const TSQuery *query, - TSNode node + t_query_cursor *self, + const t_query *query, + t_parse_node node ) { if (query) { LOG("query steps:\n"); @@ -3054,7 +3054,7 @@ void ts_query_cursor_exec( } void ts_query_cursor_set_byte_range( - TSQueryCursor *self, + t_query_cursor *self, uint32_t start_byte, uint32_t end_byte ) { @@ -3066,9 +3066,9 @@ void ts_query_cursor_set_byte_range( } void ts_query_cursor_set_point_range( - TSQueryCursor *self, - TSPoint start_point, - TSPoint end_point + t_query_cursor *self, + t_point start_point, + t_point end_point ) { if (end_point.row == 0 && end_point.column == 0) { end_point = POINT_MAX; @@ -3080,7 +3080,7 @@ void ts_query_cursor_set_point_range( // Search through all of the in-progress states, and find the captured // node that occurs earliest in the document. static bool ts_query_cursor__first_in_progress_capture( - TSQueryCursor *self, + t_query_cursor *self, uint32_t *state_index, uint32_t *byte_offset, uint32_t *pattern_index, @@ -3102,7 +3102,7 @@ static bool ts_query_cursor__first_in_progress_capture( continue; } - TSNode node = captures->contents[state->consumed_capture_count].node; + t_parse_node node = captures->contents[state->consumed_capture_count].node; if ( ts_node_end_byte(node) <= self->start_byte || point_lte(ts_node_end_point(node), self->start_point) @@ -3135,7 +3135,7 @@ static bool ts_query_cursor__first_in_progress_capture( } // Determine which node is first in a depth-first traversal -int ts_query_cursor__compare_nodes(TSNode left, TSNode right) { +int ts_query_cursor__compare_nodes(t_parse_node left, t_parse_node right) { if (left.id != right.id) { uint32_t left_start = ts_node_start_byte(left); uint32_t right_start = ts_node_start_byte(right); @@ -3151,7 +3151,7 @@ int ts_query_cursor__compare_nodes(TSNode left, TSNode right) { // Determine if either state contains a superset of the other state's captures. void ts_query_cursor__compare_captures( - TSQueryCursor *self, + t_query_cursor *self, QueryState *left_state, QueryState *right_state, bool *left_contains_right, @@ -3171,8 +3171,8 @@ void ts_query_cursor__compare_captures( for (;;) { if (i < left_captures->size) { if (j < right_captures->size) { - TSQueryCapture *left = &left_captures->contents[i]; - TSQueryCapture *right = &right_captures->contents[j]; + t_query_capture *left = &left_captures->contents[i]; + t_query_capture *right = &right_captures->contents[j]; if (left->node.id == right->node.id && left->index == right->index) { i++; j++; @@ -3208,7 +3208,7 @@ void ts_query_cursor__compare_captures( } static void ts_query_cursor__add_state( - TSQueryCursor *self, + t_query_cursor *self, const PatternEntry *pattern ) { QueryStep *step = &self->query->steps.contents[pattern->step_index]; @@ -3272,7 +3272,7 @@ static void ts_query_cursor__add_state( // pool, this will steal the capture list from another existing state, and mark that // other state as 'dead'. static CaptureList *ts_query_cursor__prepare_to_capture( - TSQueryCursor *self, + t_query_cursor *self, QueryState *state, unsigned state_index_to_preserve ) { @@ -3319,10 +3319,10 @@ static CaptureList *ts_query_cursor__prepare_to_capture( } static void ts_query_cursor__capture( - TSQueryCursor *self, + t_query_cursor *self, QueryState *state, QueryStep *step, - TSNode node + t_parse_node node ) { if (state->dead) return; CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); @@ -3334,7 +3334,7 @@ static void ts_query_cursor__capture( for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { uint16_t capture_id = step->capture_ids[j]; if (step->capture_ids[j] == NONE) break; - array_push(capture_list, ((TSQueryCapture) { node, capture_id })); + array_push(capture_list, ((t_query_capture) { node, capture_id })); LOG( " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", ts_node_type(node), @@ -3349,7 +3349,7 @@ static void ts_query_cursor__capture( // the given state in the `states` array. Ensures that the given state reference is // still valid, even if the states array is reallocated. static QueryState *ts_query_cursor__copy_state( - TSQueryCursor *self, + t_query_cursor *self, QueryState **state_ref ) { const QueryState *state = *state_ref; @@ -3374,7 +3374,7 @@ static QueryState *ts_query_cursor__copy_state( } static inline bool ts_query_cursor__should_descend( - TSQueryCursor *self, + t_query_cursor *self, bool node_intersects_range ) { @@ -3432,7 +3432,7 @@ static inline bool ts_query_cursor__should_descend( // `finished_states` array. Multiple patterns can finish on the same node. If // there are no more matches, return `false`. static inline bool ts_query_cursor__advance( - TSQueryCursor *self, + t_query_cursor *self, bool stop_on_definite_step ) { bool did_match = false; @@ -3530,8 +3530,8 @@ static inline bool ts_query_cursor__advance( // Enter a new node. else { // Get the properties of the current node. - TSNode node = ts_tree_cursor_current_node(&self->cursor); - TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); + t_parse_node node = ts_tree_cursor_current_node(&self->cursor); + t_parse_node parent_node = ts_tree_cursor_parent_node(&self->cursor); bool parent_precedes_range = !ts_node_is_null(parent_node) && ( ts_node_end_byte(parent_node) <= self->start_byte || point_lte(ts_node_end_point(parent_node), self->start_point) @@ -3552,13 +3552,13 @@ static inline bool ts_query_cursor__advance( bool node_intersects_range = !node_precedes_range && !node_follows_range; if (self->on_visible_node) { - TSSymbol symbol = ts_node_symbol(node); + t_symbol symbol = ts_node_symbol(node); bool is_named = ts_node_is_named(node); bool has_later_siblings; bool has_later_named_siblings; bool can_have_later_siblings_with_this_field; - TSFieldId field_id = 0; - TSSymbol supertypes[8] = {0}; + t_field_id field_id = 0; + t_symbol supertypes[8] = {0}; unsigned supertype_count = 8; ts_tree_cursor_current_status( &self->cursor, @@ -3682,9 +3682,9 @@ static inline bool ts_query_cursor__advance( } if (step->negated_field_list_id) { - TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; + t_field_id *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; for (;;) { - TSFieldId negated_field_id = *negated_field_ids; + t_field_id negated_field_id = *negated_field_ids; if (negated_field_id) { negated_field_ids++; if (ts_node_child_by_field_id(node, negated_field_id).id) { @@ -3738,7 +3738,7 @@ static inline bool ts_query_cursor__advance( // actually points to the *second* step of the pattern, then check // that the node has a parent, and capture the parent node if necessary. if (state->needs_parent) { - TSNode parent = ts_tree_cursor_parent_node(&self->cursor); + t_parse_node parent = ts_tree_cursor_parent_node(&self->cursor); if (ts_node_is_null(parent)) { LOG(" missing parent node\n"); state->dead = true; @@ -3942,8 +3942,8 @@ static inline bool ts_query_cursor__advance( } bool ts_query_cursor_next_match( - TSQueryCursor *self, - TSQueryMatch *match + t_query_cursor *self, + t_query_match *match ) { if (self->finished_states.size == 0) { if (!ts_query_cursor__advance(self, false)) { @@ -3967,7 +3967,7 @@ bool ts_query_cursor_next_match( } void ts_query_cursor_remove_match( - TSQueryCursor *self, + t_query_cursor *self, uint32_t match_id ) { for (unsigned i = 0; i < self->finished_states.size; i++) { @@ -3998,8 +3998,8 @@ void ts_query_cursor_remove_match( } bool ts_query_cursor_next_capture( - TSQueryCursor *self, - TSQueryMatch *match, + t_query_cursor *self, + t_query_match *match, uint32_t *capture_index ) { // The goal here is to return captures in order, even though they may not @@ -4041,7 +4041,7 @@ bool ts_query_cursor_next_capture( continue; } - TSNode node = captures->contents[state->consumed_capture_count].node; + t_parse_node node = captures->contents[state->consumed_capture_count].node; bool node_precedes_range = ( ts_node_end_byte(node) <= self->start_byte || @@ -4125,7 +4125,7 @@ bool ts_query_cursor_next_capture( } void ts_query_cursor_set_max_start_depth( - TSQueryCursor *self, + t_query_cursor *self, uint32_t max_start_depth ) { self->max_start_depth = max_start_depth; diff --git a/parser/src/reduce_action.h b/parser/src/reduce_action.h index 2d95b8bc..fbc6c1d3 100644 --- a/parser/src/reduce_action.h +++ b/parser/src/reduce_action.h @@ -10,7 +10,7 @@ extern "C" { typedef struct { uint32_t count; - TSSymbol symbol; + t_symbol symbol; int dynamic_precedence; unsigned short production_id; } ReduceAction; diff --git a/parser/src/stack.c b/parser/src/stack.c index 98d8c561..8f7816ca 100644 --- a/parser/src/stack.c +++ b/parser/src/stack.c @@ -27,7 +27,7 @@ typedef struct { } StackLink; struct StackNode { - TSStateId state; + t_state_id state; Length position; StackLink links[MAX_LINK_COUNT]; short unsigned int link_count; @@ -139,7 +139,7 @@ static StackNode *stack_node_new( StackNode *previous_node, Subtree subtree, bool is_pending, - TSStateId state, + t_state_id state, StackNodeArray *pool ) { StackNode *node = pool->size > 0 @@ -460,7 +460,7 @@ uint32_t ts_stack_version_count(const Stack *self) { return self->heads.size; } -TSStateId ts_stack_state(const Stack *self, StackVersion version) { +t_state_id ts_stack_state(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->node->state; } @@ -503,7 +503,7 @@ void ts_stack_push( StackVersion version, Subtree subtree, bool pending, - TSStateId state + t_state_id state ) { StackHead *head = array_get(&self->heads, version); StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); @@ -593,7 +593,7 @@ typedef struct { forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { SummarizeStackSession *session = payload; - TSStateId state = iterator->node->state; + t_state_id state = iterator->node->state; unsigned depth = iterator->subtree_count; if (depth > session->max_depth) return StackActionStop; for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { @@ -764,7 +764,7 @@ void ts_stack_clear(Stack *self) { })); } -bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) { +bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) { array_reserve(&self->iterators, 32); if (!f) f = stderr; diff --git a/parser/src/stack.h b/parser/src/stack.h index 86abbc9d..1f40c25a 100644 --- a/parser/src/stack.h +++ b/parser/src/stack.h @@ -24,7 +24,7 @@ typedef Array(StackSlice) StackSliceArray; typedef struct { Length position; unsigned depth; - TSStateId state; + t_state_id state; } StackSummaryEntry; typedef Array(StackSummaryEntry) StackSummary; @@ -39,7 +39,7 @@ uint32_t ts_stack_version_count(const Stack *); // Get the state at the top of the given version of the stack. If the stack is // empty, this returns the initial state, 0. -TSStateId ts_stack_state(const Stack *, StackVersion); +t_state_id ts_stack_state(const Stack *, StackVersion); // Get the last external token associated with a given version of the stack. Subtree ts_stack_last_external_token(const Stack *, StackVersion); @@ -55,7 +55,7 @@ Length ts_stack_position(const Stack *, StackVersion); // This transfers ownership of the tree to the Stack. Callers that // need to retain ownership of the tree for their own purposes should // first retain the tree. -void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId); +void ts_stack_push(Stack *, StackVersion, Subtree , bool, t_state_id); // Pop the given number of entries from the given version of the stack. This // operation can increase the number of stack versions by revealing multiple @@ -122,9 +122,9 @@ void ts_stack_remove_version(Stack *, StackVersion); void ts_stack_clear(Stack *); -bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *); +bool ts_stack_print_dot_graph(Stack *, const t_language *, FILE *); -typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t); +typedef void (*StackIterateCallback)(void *, t_state_id, uint32_t); #ifdef __cplusplus } diff --git a/parser/src/subtree.c b/parser/src/subtree.c index 4524e182..e3a33ce7 100644 --- a/parser/src/subtree.c +++ b/parser/src/subtree.c @@ -163,10 +163,10 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t l } Subtree ts_subtree_new_leaf( - SubtreePool *pool, TSSymbol symbol, Length padding, Length size, - uint32_t lookahead_bytes, TSStateId parse_state, + SubtreePool *pool, t_symbol symbol, Length padding, Length size, + uint32_t lookahead_bytes, t_state_id parse_state, bool has_external_tokens, bool depends_on_column, - bool is_keyword, const TSLanguage *language + bool is_keyword, const t_language *language ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool extra = symbol == ts_builtin_sym_end; @@ -224,8 +224,8 @@ Subtree ts_subtree_new_leaf( void ts_subtree_set_symbol( MutableSubtree *self, - TSSymbol symbol, - const TSLanguage *language + t_symbol symbol, + const t_language *language ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); if (self->data.is_inline) { @@ -242,7 +242,7 @@ void ts_subtree_set_symbol( Subtree ts_subtree_new_error( SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, - uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language + uint32_t bytes_scanned, t_state_id parse_state, const t_language *language ) { Subtree result = ts_subtree_new_leaf( pool, ts_builtin_sym_error, padding, size, bytes_scanned, @@ -291,13 +291,13 @@ MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { static void ts_subtree__compress( MutableSubtree self, unsigned count, - const TSLanguage *language, + const t_language *language, MutableSubtreeArray *stack ) { unsigned initial_stack_size = stack->size; MutableSubtree tree = self; - TSSymbol symbol = tree.ptr->symbol; + t_symbol symbol = tree.ptr->symbol; for (unsigned i = 0; i < count; i++) { if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; @@ -334,7 +334,7 @@ static void ts_subtree__compress( } } -void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) { +void ts_subtree_balance(Subtree self, SubtreePool *pool, const t_language *language) { array_clear(&pool->tree_stack); if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) { @@ -369,7 +369,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu // Assign all of the node's properties that depend on its children. void ts_subtree_summarize_children( MutableSubtree self, - const TSLanguage *language + const t_language *language ) { assert(!self.data.is_inline); @@ -384,7 +384,7 @@ void ts_subtree_summarize_children( self.ptr->dynamic_precedence = 0; uint32_t structural_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + const t_symbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); uint32_t lookahead_end_byte = 0; const Subtree *children = ts_subtree_children(self); @@ -504,10 +504,10 @@ void ts_subtree_summarize_children( // // This takes ownership of the children array. MutableSubtree ts_subtree_new_node( - TSSymbol symbol, + t_symbol symbol, SubtreeArray *children, unsigned production_id, - const TSLanguage *language + const t_language *language ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; @@ -549,7 +549,7 @@ MutableSubtree ts_subtree_new_node( Subtree ts_subtree_new_error_node( SubtreeArray *children, bool extra, - const TSLanguage *language + const t_language *language ) { MutableSubtree result = ts_subtree_new_node( ts_builtin_sym_error, children, 0, language @@ -564,10 +564,10 @@ Subtree ts_subtree_new_error_node( // having any effect on the parse state. Subtree ts_subtree_new_missing_leaf( SubtreePool *pool, - TSSymbol symbol, + t_symbol symbol, Length padding, uint32_t lookahead_bytes, - const TSLanguage *language + const t_language *language ) { Subtree result = ts_subtree_new_leaf( pool, symbol, padding, length_zero(), lookahead_bytes, @@ -656,7 +656,7 @@ static inline void ts_subtree_set_has_changes(MutableSubtree *self) { } } -Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) { +Subtree ts_subtree_edit(Subtree self, const t_input_edit *input_edit, SubtreePool *pool) { typedef struct { Subtree *tree; Edit edit; @@ -847,8 +847,8 @@ static const char *const ROOT_FIELD = "__ROOT__"; static size_t ts_subtree__write_to_string( Subtree self, char *string, size_t limit, - const TSLanguage *language, bool include_all, - TSSymbol alias_symbol, bool alias_is_named, const char *field_name + const t_language *language, bool include_all, + t_symbol alias_symbol, bool alias_is_named, const char *field_name ) { if (!self.ptr) return snprintf(string, limit, "(NULL)"); @@ -876,7 +876,7 @@ static size_t ts_subtree__write_to_string( cursor += snprintf(*writer, limit, "(UNEXPECTED "); cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); } else { - TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); + t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); const char *symbol_name = ts_language_symbol_name(language, symbol); if (ts_subtree_missing(self)) { cursor += snprintf(*writer, limit, "(MISSING "); @@ -890,7 +890,7 @@ static size_t ts_subtree__write_to_string( } } } else if (is_root) { - TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); + t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); const char *symbol_name = ts_language_symbol_name(language, symbol); if (ts_subtree_child_count(self) > 0) { cursor += snprintf(*writer, limit, "(%s", symbol_name); @@ -902,7 +902,7 @@ static size_t ts_subtree__write_to_string( } if (ts_subtree_child_count(self)) { - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + const t_symbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( language, @@ -921,7 +921,7 @@ static size_t ts_subtree__write_to_string( 0, false, NULL ); } else { - TSSymbol subtree_alias_symbol = alias_sequence + t_symbol subtree_alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; bool subtree_alias_is_named = subtree_alias_symbol @@ -953,9 +953,9 @@ static size_t ts_subtree__write_to_string( char *ts_subtree_string( Subtree self, - TSSymbol alias_symbol, + t_symbol alias_symbol, bool alias_is_named, - const TSLanguage *language, + const t_language *language, bool include_all ) { char scratch_string[1]; @@ -974,10 +974,10 @@ char *ts_subtree_string( } void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, - const TSLanguage *language, TSSymbol alias_symbol, + const t_language *language, t_symbol alias_symbol, FILE *f) { - TSSymbol subtree_symbol = ts_subtree_symbol(*self); - TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol; + t_symbol subtree_symbol = ts_subtree_symbol(*self); + t_symbol symbol = alias_symbol ? alias_symbol : subtree_symbol; uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); fprintf(f, "tree_%p [label=\"", (void *)self); ts_language_write_symbol_as_dot_string(language, f, symbol); @@ -1017,7 +1017,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, ts_subtree_production_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &ts_subtree_children(*self)[i]; - TSSymbol subtree_alias_symbol = 0; + t_symbol subtree_alias_symbol = 0; if (!ts_subtree_extra(*child) && child_info_offset) { subtree_alias_symbol = language->alias_sequences[child_info_offset]; child_info_offset++; @@ -1028,7 +1028,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, } } -void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) { +void ts_subtree_print_dot_graph(Subtree self, const t_language *language, FILE *f) { fprintf(f, "digraph tree {\n"); fprintf(f, "edge [arrowhead=none]\n"); ts_subtree__print_dot_graph(&self, 0, language, 0, f); diff --git a/parser/src/subtree.h b/parser/src/subtree.h index 58c7e483..4b218fa6 100644 --- a/parser/src/subtree.h +++ b/parser/src/subtree.h @@ -115,8 +115,8 @@ typedef struct { uint32_t lookahead_bytes; uint32_t error_cost; uint32_t child_count; - TSSymbol symbol; - TSStateId parse_state; + t_symbol symbol; + t_state_id parse_state; bool visible : 1; bool named : 1; @@ -140,8 +140,8 @@ typedef struct { uint16_t repeat_depth; uint16_t production_id; struct { - TSSymbol symbol; - TSStateId parse_state; + t_symbol symbol; + t_state_id parse_state; } first_leaf; }; @@ -188,40 +188,40 @@ SubtreePool ts_subtree_pool_new(uint32_t capacity); void ts_subtree_pool_delete(SubtreePool *); Subtree ts_subtree_new_leaf( - SubtreePool *, TSSymbol, Length, Length, uint32_t, - TSStateId, bool, bool, bool, const TSLanguage * + SubtreePool *, t_symbol, Length, Length, uint32_t, + t_state_id, bool, bool, bool, const t_language * ); Subtree ts_subtree_new_error( - SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * + SubtreePool *, int32_t, Length, Length, uint32_t, t_state_id, const t_language * ); -MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); -Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); -Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *); +MutableSubtree ts_subtree_new_node(t_symbol, SubtreeArray *, unsigned, const t_language *); +Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const t_language *); +Subtree ts_subtree_new_missing_leaf(SubtreePool *, t_symbol, Length, uint32_t, const t_language *); MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); void ts_subtree_retain(Subtree); void ts_subtree_release(SubtreePool *, Subtree); int ts_subtree_compare(Subtree, Subtree, SubtreePool *); -void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); -void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); -void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); -void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); -Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); -char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all); -void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); +void ts_subtree_set_symbol(MutableSubtree *, t_symbol, const t_language *); +void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const t_language *); +void ts_subtree_summarize_children(MutableSubtree, const t_language *); +void ts_subtree_balance(Subtree, SubtreePool *, const t_language *); +Subtree ts_subtree_edit(Subtree, const t_input_edit *edit, SubtreePool *); +char *ts_subtree_string(Subtree, t_symbol, bool, const t_language *, bool include_all); +void ts_subtree_print_dot_graph(Subtree, const t_language *, FILE *); Subtree ts_subtree_last_external_token(Subtree); const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); #define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name) -static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } +static inline t_symbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); } static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); } static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); } static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); } static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); } static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); } -static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); } +static inline t_state_id ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); } static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); } #undef SUBTREE_GET @@ -245,13 +245,13 @@ static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { } } -static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) { +static inline t_symbol ts_subtree_leaf_symbol(Subtree self) { if (self.data.is_inline) return self.data.symbol; if (self.ptr->child_count == 0) return self.ptr->symbol; return self.ptr->first_leaf.symbol; } -static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) { +static inline t_state_id ts_subtree_leaf_parse_state(Subtree self) { if (self.data.is_inline) return self.data.parse_state; if (self.ptr->child_count == 0) return self.ptr->parse_state; return self.ptr->first_leaf.parse_state; diff --git a/parser/src/tree.c b/parser/src/tree.c index 328a7984..fd25e53f 100644 --- a/parser/src/tree.c +++ b/parser/src/tree.c @@ -8,25 +8,25 @@ #include "./tree_cursor.h" #include "./tree.h" -TSTree *ts_tree_new( - Subtree root, const TSLanguage *language, - const TSRange *included_ranges, unsigned included_range_count +t_tree *ts_tree_new( + Subtree root, const t_language *language, + const t_range *included_ranges, unsigned included_range_count ) { - TSTree *result = ts_malloc(sizeof(TSTree)); + t_tree *result = ts_malloc(sizeof(t_tree)); result->root = root; result->language = ts_language_copy(language); - result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); - memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); + result->included_ranges = ts_calloc(included_range_count, sizeof(t_range)); + memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(t_range)); result->included_range_count = included_range_count; return result; } -TSTree *ts_tree_copy(const TSTree *self) { +t_tree *ts_tree_copy(const t_tree *self) { ts_subtree_retain(self->root); return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); } -void ts_tree_delete(TSTree *self) { +void ts_tree_delete(t_tree *self) { if (!self) return; SubtreePool pool = ts_subtree_pool_new(0); @@ -37,26 +37,26 @@ void ts_tree_delete(TSTree *self) { ts_free(self); } -TSNode ts_tree_root_node(const TSTree *self) { +t_parse_node ts_tree_root_node(const t_tree *self) { return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); } -TSNode ts_tree_root_node_with_offset( - const TSTree *self, +t_parse_node ts_tree_root_node_with_offset( + const t_tree *self, uint32_t offset_bytes, - TSPoint offset_extent + t_point offset_extent ) { Length offset = {offset_bytes, offset_extent}; return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); } -const TSLanguage *ts_tree_language(const TSTree *self) { +const t_language *ts_tree_language(const t_tree *self) { return self->language; } -void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { +void ts_tree_edit(t_tree *self, const t_input_edit *edit) { for (unsigned i = 0; i < self->included_range_count; i++) { - TSRange *range = &self->included_ranges[i]; + t_range *range = &self->included_ranges[i]; if (range->end_byte >= edit->old_end_byte) { if (range->end_byte != UINT32_MAX) { range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); @@ -94,14 +94,14 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { ts_subtree_pool_delete(&pool); } -TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) { +t_range *ts_tree_included_ranges(const t_tree *self, uint32_t *length) { *length = self->included_range_count; - TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange)); - memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange)); + t_range *ranges = ts_calloc(self->included_range_count, sizeof(t_range)); + memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(t_range)); return ranges; } -TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) { +t_range *ts_tree_get_changed_ranges(const t_tree *old_tree, const t_tree *new_tree, uint32_t *length) { TreeCursor cursor1 = {NULL, array_new(), 0}; TreeCursor cursor2 = {NULL, array_new(), 0}; ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); @@ -114,7 +114,7 @@ TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tr &included_range_differences ); - TSRange *result; + t_range *result; *length = ts_subtree_get_changed_ranges( &old_tree->root, &new_tree->root, &cursor1, &cursor2, old_tree->language, &included_range_differences, &result @@ -156,7 +156,7 @@ int _ts_dup(int file_descriptor) { return dup(file_descriptor); } -void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { +void ts_tree_print_dot_graph(const t_tree *self, int file_descriptor) { FILE *file = fdopen(_ts_dup(file_descriptor), "a"); ts_subtree_print_dot_graph(self->root, self->language, file); fclose(file); diff --git a/parser/src/tree.h b/parser/src/tree.h index f012f888..9488a40b 100644 --- a/parser/src/tree.h +++ b/parser/src/tree.h @@ -11,18 +11,18 @@ typedef struct { const Subtree *child; const Subtree *parent; Length position; - TSSymbol alias_symbol; + t_symbol alias_symbol; } ParentCacheEntry; -struct TSTree { +struct t_tree { Subtree root; - const TSLanguage *language; - TSRange *included_ranges; + const t_language *language; + t_range *included_ranges; unsigned included_range_count; }; -TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned); -TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); +t_tree *ts_tree_new(Subtree root, const t_language *language, const t_range *, unsigned); +t_parse_node ts_node_new(const t_tree *, const Subtree *, Length, t_symbol); #ifdef __cplusplus } diff --git a/parser/src/tree_cursor.c b/parser/src/tree_cursor.c index 0a351606..7c36317b 100644 --- a/parser/src/tree_cursor.c +++ b/parser/src/tree_cursor.c @@ -6,12 +6,12 @@ typedef struct { Subtree parent; - const TSTree *tree; + const t_tree *tree; Length position; uint32_t child_index; uint32_t structural_child_index; uint32_t descendant_index; - const TSSymbol *alias_sequence; + const t_symbol *alias_sequence; } CursorChildIterator; // CursorChildIterator @@ -37,7 +37,7 @@ static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCurs if (ts_subtree_child_count(*last_entry->subtree) == 0) { return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; } - const TSSymbol *alias_sequence = ts_language_alias_sequence( + const t_symbol *alias_sequence = ts_language_alias_sequence( self->tree->language, last_entry->subtree->ptr->production_id ); @@ -150,17 +150,17 @@ static inline bool ts_tree_cursor_child_iterator_previous( // TSTreeCursor - lifecycle -TSTreeCursor ts_tree_cursor_new(TSNode node) { - TSTreeCursor self = {NULL, NULL, {0, 0, 0}}; +t_tree_cursor ts_tree_cursor_new(t_parse_node node) { + t_tree_cursor self = {NULL, NULL, {0, 0, 0}}; ts_tree_cursor_init((TreeCursor *)&self, node); return self; } -void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) { +void ts_tree_cursor_reset(t_tree_cursor *_self, t_parse_node node) { ts_tree_cursor_init((TreeCursor *)_self, node); } -void ts_tree_cursor_init(TreeCursor *self, TSNode node) { +void ts_tree_cursor_init(TreeCursor *self, t_parse_node node) { self->tree = node.tree; self->root_alias_symbol = node.context[3]; array_clear(&self->stack); @@ -176,14 +176,14 @@ void ts_tree_cursor_init(TreeCursor *self, TSNode node) { })); } -void ts_tree_cursor_delete(TSTreeCursor *_self) { +void ts_tree_cursor_delete(t_tree_cursor *_self) { TreeCursor *self = (TreeCursor *)_self; array_delete(&self->stack); } // TSTreeCursor - walking the tree -TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) { +TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_tree_cursor *_self) { TreeCursor *self = (TreeCursor *)_self; bool visible; TreeCursorEntry entry; @@ -201,7 +201,7 @@ TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) { return TreeCursorStepNone; } -bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { +bool ts_tree_cursor_goto_first_child(t_tree_cursor *self) { for (;;) { switch (ts_tree_cursor_goto_first_child_internal(self)) { case TreeCursorStepHidden: @@ -215,7 +215,7 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { return false; } -TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) { +TreeCursorStep ts_tree_cursor_goto_last_child_internal(t_tree_cursor *_self) { TreeCursor *self = (TreeCursor *)_self; bool visible; TreeCursorEntry entry; @@ -242,7 +242,7 @@ TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) { return TreeCursorStepNone; } -bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) { +bool ts_tree_cursor_goto_last_child(t_tree_cursor *self) { for (;;) { switch (ts_tree_cursor_goto_last_child_internal(self)) { case TreeCursorStepHidden: @@ -257,9 +257,9 @@ bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) { } static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( - TSTreeCursor *_self, + t_tree_cursor *_self, uint32_t goal_byte, - TSPoint goal_point + t_point goal_point ) { TreeCursor *self = (TreeCursor *)_self; uint32_t initial_size = self->stack.size; @@ -298,16 +298,16 @@ static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( return -1; } -int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) { +int64_t ts_tree_cursor_goto_first_child_for_byte(t_tree_cursor *self, uint32_t goal_byte) { return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); } -int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) { +int64_t ts_tree_cursor_goto_first_child_for_point(t_tree_cursor *self, t_point goal_point) { return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); } TreeCursorStep ts_tree_cursor_goto_sibling_internal( - TSTreeCursor *_self, + t_tree_cursor *_self, bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { TreeCursor *self = (TreeCursor *)_self; uint32_t initial_size = self->stack.size; @@ -341,11 +341,11 @@ TreeCursorStep ts_tree_cursor_goto_sibling_internal( return TreeCursorStepNone; } -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_tree_cursor *_self) { return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); } -bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { +bool ts_tree_cursor_goto_next_sibling(t_tree_cursor *self) { switch (ts_tree_cursor_goto_next_sibling_internal(self)) { case TreeCursorStepHidden: ts_tree_cursor_goto_first_child(self); @@ -357,7 +357,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { } } -TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) { +TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_tree_cursor *_self) { // since subtracting across row loses column information, we may have to // restore it TreeCursor *self = (TreeCursor *)_self; @@ -392,7 +392,7 @@ TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self return step; } -bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) { +bool ts_tree_cursor_goto_previous_sibling(t_tree_cursor *self) { switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { case TreeCursorStepHidden: ts_tree_cursor_goto_last_child(self); @@ -404,7 +404,7 @@ bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) { } } -bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { +bool ts_tree_cursor_goto_parent(t_tree_cursor *_self) { TreeCursor *self = (TreeCursor *)_self; for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { if (ts_tree_cursor_is_entry_visible(self, i)) { @@ -416,7 +416,7 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { } void ts_tree_cursor_goto_descendant( - TSTreeCursor *_self, + t_tree_cursor *_self, uint32_t goal_descendant_index ) { TreeCursor *self = (TreeCursor *)_self; @@ -466,16 +466,16 @@ void ts_tree_cursor_goto_descendant( } while (did_descend); } -uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) { +uint32_t ts_tree_cursor_current_descendant_index(const t_tree_cursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); return last_entry->descendant_index; } -TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { +t_parse_node ts_tree_cursor_current_node(const t_tree_cursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); - TSSymbol alias_symbol = self->root_alias_symbol; + t_symbol alias_symbol = self->root_alias_symbol; if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; alias_symbol = ts_language_alias_at( @@ -495,12 +495,12 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { // Private - Get various facts about the current node that are needed // when executing tree queries. void ts_tree_cursor_current_status( - const TSTreeCursor *_self, - TSFieldId *field_id, + const t_tree_cursor *_self, + t_field_id *field_id, bool *has_later_siblings, bool *has_later_named_siblings, bool *can_have_later_siblings_with_this_field, - TSSymbol *supertypes, + t_symbol *supertypes, unsigned *supertype_count ) { const TreeCursor *self = (const TreeCursor *)_self; @@ -517,7 +517,7 @@ void ts_tree_cursor_current_status( TreeCursorEntry *entry = &self->stack.contents[i]; TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - const TSSymbol *alias_sequence = ts_language_alias_sequence( + const t_symbol *alias_sequence = ts_language_alias_sequence( self->tree->language, parent_entry->subtree->ptr->production_id ); @@ -532,7 +532,7 @@ void ts_tree_cursor_current_status( ts_subtree_symbol(subtree)) // Stop walking up when a visible ancestor is found. - TSSymbol entry_symbol = subtree_symbol( + t_symbol entry_symbol = subtree_symbol( *entry->subtree, entry->structural_child_index ); @@ -614,7 +614,7 @@ void ts_tree_cursor_current_status( } } -uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) { +uint32_t ts_tree_cursor_current_depth(const t_tree_cursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; uint32_t depth = 0; for (unsigned i = 1; i < self->stack.size; i++) { @@ -625,12 +625,12 @@ uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) { return depth; } -TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) { +t_parse_node ts_tree_cursor_parent_node(const t_tree_cursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; for (int i = (int)self->stack.size - 2; i >= 0; i--) { TreeCursorEntry *entry = &self->stack.contents[i]; bool is_visible = true; - TSSymbol alias_symbol = 0; + t_symbol alias_symbol = 0; if (i > 0) { TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; alias_symbol = ts_language_alias_at( @@ -652,7 +652,7 @@ TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) { return ts_node_new(NULL, NULL, length_zero(), 0); } -TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { +t_field_id ts_tree_cursor_current_field_id(const t_tree_cursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; // Walk up the tree, visiting the current node and its invisible ancestors. @@ -683,8 +683,8 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { return 0; } -const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { - TSFieldId id = ts_tree_cursor_current_field_id(_self); +const char *ts_tree_cursor_current_field_name(const t_tree_cursor *_self) { + t_field_id id = ts_tree_cursor_current_field_id(_self); if (id) { const TreeCursor *self = (const TreeCursor *)_self; return self->tree->language->field_names[id]; @@ -693,9 +693,9 @@ const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { } } -TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { +t_tree_cursor ts_tree_cursor_copy(const t_tree_cursor *_cursor) { const TreeCursor *cursor = (const TreeCursor *)_cursor; - TSTreeCursor res = {NULL, NULL, {0, 0}}; + t_tree_cursor res = {NULL, NULL, {0, 0}}; TreeCursor *copy = (TreeCursor *)&res; copy->tree = cursor->tree; copy->root_alias_symbol = cursor->root_alias_symbol; @@ -704,7 +704,7 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { return res; } -void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) { +void ts_tree_cursor_reset_to(t_tree_cursor *_dst, const t_tree_cursor *_src) { const TreeCursor *cursor = (const TreeCursor *)_src; TreeCursor *copy = (TreeCursor *)_dst; copy->tree = cursor->tree; diff --git a/parser/src/tree_cursor.h b/parser/src/tree_cursor.h index 96a386df..39edd0e0 100644 --- a/parser/src/tree_cursor.h +++ b/parser/src/tree_cursor.h @@ -12,9 +12,9 @@ typedef struct { } TreeCursorEntry; typedef struct { - const TSTree *tree; + const t_tree *tree; Array(TreeCursorEntry) stack; - TSSymbol root_alias_symbol; + t_symbol root_alias_symbol; } TreeCursor; typedef enum { @@ -23,26 +23,26 @@ typedef enum { TreeCursorStepVisible, } TreeCursorStep; -void ts_tree_cursor_init(TreeCursor *, TSNode); +void ts_tree_cursor_init(TreeCursor *, t_parse_node); void ts_tree_cursor_current_status( - const TSTreeCursor *, - TSFieldId *, + const t_tree_cursor *, + t_field_id *, bool *, bool *, bool *, - TSSymbol *, + t_symbol *, unsigned * ); -TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *); -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *); +TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_tree_cursor *); +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_tree_cursor *); -static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) { +static inline Subtree ts_tree_cursor_current_subtree(const t_tree_cursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); return *last_entry->subtree; } -TSNode ts_tree_cursor_parent_node(const TSTreeCursor *); +t_parse_node ts_tree_cursor_parent_node(const t_tree_cursor *); #endif // TREE_SITTER_TREE_CURSOR_H_ From 39c1c5026a8fde08d333dc58d9caef74ad44a745 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Wed, 1 May 2024 17:07:19 +0200 Subject: [PATCH 12/14] updating to work on header+source combnined combo --- Minishell.mk | 7 +- parser/Filelist.mk | 4 +- parser/Makefile | 4 +- parser/combined.c | 12273 --------------------------- parser/src/alloc.c | 48 - parser/src/alloc.h | 41 - parser/src/api.h | 3131 ++++--- parser/src/array.h | 290 - parser/src/atomic.h | 68 - parser/src/clock.h | 146 - parser/src/combined.c | 11843 ++++++++++++++++++++++++++ parser/{ => src}/create_language.c | 8 +- parser/src/error_costs.h | 11 - parser/src/get_changed_ranges.c | 501 -- parser/src/get_changed_ranges.h | 36 - parser/src/host.h | 21 - parser/src/language.c | 216 - parser/src/language.h | 299 - parser/src/length.h | 52 - parser/src/lexer.c | 438 - parser/src/lexer.h | 49 - parser/src/lib.c | 13 - parser/src/node.c | 776 -- parser/src/parser.c | 2091 ----- parser/src/parser.h | 265 - parser/src/point.h | 62 - parser/src/query.c | 4134 --------- parser/src/reduce_action.h | 34 - parser/src/reusable_node.h | 95 - parser/src/scanner.c | 3 +- parser/src/stack.c | 899 -- parser/src/stack.h | 133 - parser/src/subtree.c | 1060 --- parser/src/subtree.h | 382 - parser/src/tree.c | 165 - parser/src/tree.h | 31 - parser/src/tree_cursor.c | 714 -- parser/src/tree_cursor.h | 48 - parser/src/unicode.h | 50 - sources/main.c | 3 +- 40 files changed, 13728 insertions(+), 26716 deletions(-) delete mode 100644 parser/combined.c delete mode 100644 parser/src/alloc.c delete mode 100644 parser/src/alloc.h delete mode 100644 parser/src/array.h delete mode 100644 parser/src/atomic.h delete mode 100644 parser/src/clock.h create mode 100644 parser/src/combined.c rename parser/{ => src}/create_language.c (96%) delete mode 100644 parser/src/error_costs.h delete mode 100644 parser/src/get_changed_ranges.c delete mode 100644 parser/src/get_changed_ranges.h delete mode 100644 parser/src/host.h delete mode 100644 parser/src/language.c delete mode 100644 parser/src/language.h delete mode 100644 parser/src/length.h delete mode 100644 parser/src/lexer.c delete mode 100644 parser/src/lexer.h delete mode 100644 parser/src/lib.c delete mode 100644 parser/src/node.c delete mode 100644 parser/src/parser.c delete mode 100644 parser/src/parser.h delete mode 100644 parser/src/point.h delete mode 100644 parser/src/query.c delete mode 100644 parser/src/reduce_action.h delete mode 100644 parser/src/reusable_node.h delete mode 100644 parser/src/stack.c delete mode 100644 parser/src/stack.h delete mode 100644 parser/src/subtree.c delete mode 100644 parser/src/subtree.h delete mode 100644 parser/src/tree.c delete mode 100644 parser/src/tree.h delete mode 100644 parser/src/tree_cursor.c delete mode 100644 parser/src/tree_cursor.h delete mode 100644 parser/src/unicode.h diff --git a/Minishell.mk b/Minishell.mk index d38fa064..597a0e09 100644 --- a/Minishell.mk +++ b/Minishell.mk @@ -6,7 +6,7 @@ # By: maiboyer +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2024/04/28 17:28:30 by maiboyer #+# #+# # -# Updated: 2024/04/30 21:32:49 by maiboyer ### ########.fr # +# Updated: 2024/05/01 10:34:21 by maiboyer ### ########.fr # # # # **************************************************************************** # @@ -64,6 +64,7 @@ bonus: $(OBJ) $(LIB_OBJ) $(OBJDIRNAME)/libme.a $(OBJDIRNAME)/libgmr.a @mkdir -p $(OBJDIRNAME)/$(LIBDIRNAME) @mkdir -p $(OBJDIRNAME)/$(SRCDIRNAME) @echo -e '$(GREY) Be Carefull ur in $(END)$(GREEN)Debug Mode$(END)' + @echo -e '$(GREY) Linking\t$(END)$(GREEN)$(NAME)$(END)' @cc $(CFLAGS) -D DEBUG=42 -o $(NAME) $(OBJ) -L$(OBJDIRNAME) -lgmr -lme # Dependences for all @@ -71,13 +72,13 @@ $(NAME): $(OBJ) $(LIB_OBJ) $(OBJDIRNAME)/libgmr.a $(OBJDIRNAME)/libme.a @mkdir -p $(OBJDIRNAME) @mkdir -p $(OBJDIRNAME)/$(LIBDIRNAME) @mkdir -p $(OBJDIRNAME)/$(SRCDIRNAME) - @echo -e "$(GREY) Linking $(END)$(GREEN)$(NAME)$(END)" + @echo -e '$(GREY) Linking\t$(END)$(GREEN)$(NAME)$(END)' @cc $(CFLAGS) -o $(NAME) $(OBJ) $(LIB_OBJ) -L$(OBJDIRNAME) -lgmr -lme # Creating the objects $(OBJDIRNAME)/%.o: %.c @mkdir -p $(dir $@) - @echo -e '$(GREY) Compiling $(END)$(GREEN)$<$(END)' + @echo -e '$(GREY) Compiling\t$(END)$(GREEN)$<$(END)' @cc $(CFLAGS) -o $@ -c $< -include ${OBJ:.o=.d} diff --git a/parser/Filelist.mk b/parser/Filelist.mk index b9e4a64b..56b2018c 100644 --- a/parser/Filelist.mk +++ b/parser/Filelist.mk @@ -1,5 +1,5 @@ SRC_FILES = \ -create_language \ +src/create_language \ static/alias_sequences/alias_sequences_0 \ static/create/create_alias_sequences \ static/create/create_external_scanner_states \ @@ -4376,4 +4376,4 @@ static/unique_symbols_map/unique_symbols_map_2 \ static/lex_funcs/lex_normal/state_helper \ static/lex_funcs/lex_normal/state_helper2 \ static/lex_funcs/lex_keywords/state_0_bis \ -static/lex_funcs/lex_keywords/state_4_bis \ No newline at end of file +static/lex_funcs/lex_keywords/state_4_bis diff --git a/parser/Makefile b/parser/Makefile index 9e9612e0..c4ff27d9 100644 --- a/parser/Makefile +++ b/parser/Makefile @@ -6,7 +6,7 @@ # By: maiboyer +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2023/11/03 13:20:01 by maiboyer #+# #+# # -# Updated: 2024/04/30 22:23:58 by maiboyer ### ########.fr # +# Updated: 2024/05/01 15:52:16 by maiboyer ### ########.fr # # # # **************************************************************************** # @@ -22,7 +22,7 @@ CFLAGS = -Wall -Wextra -Werror -MMD -I./includes -I../includes -I../output/inc include ./Filelist.mk -SRC_FILES += ./combined +SRC_FILES += ./src/combined ./src/scanner SRC = $(addsuffix .c,$(addprefix $(SRC_DIR)/,$(SRC_FILES))) OBJ = $(addsuffix .o,$(addprefix $(BUILD_DIR)/,$(SRC_FILES))) DEPS = $(addsuffix .d,$(addprefix $(BUILD_DIR)/,$(SRC_FILES))) diff --git a/parser/combined.c b/parser/combined.c deleted file mode 100644 index 5791ec0f..00000000 --- a/parser/combined.c +++ /dev/null @@ -1,12273 +0,0 @@ -#include "./src/alloc.h" -#include "src/api.h" -#include - -static void *ts_malloc_default(size_t size) { - void *result = malloc(size); - if (size > 0 && !result) { - fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); - abort(); - } - return result; -} - -static void *ts_calloc_default(size_t count, size_t size) { - void *result = calloc(count, size); - if (count > 0 && !result) { - fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); - abort(); - } - return result; -} - -static void *ts_realloc_default(void *buffer, size_t size) { - void *result = realloc(buffer, size); - if (size > 0 && !result) { - fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); - abort(); - } - return result; -} - -// Allow clients to override allocation functions dynamically -TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default; -TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; -TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; -TS_PUBLIC void (*ts_current_free)(void *) = free; - -void ts_set_allocator( - void *(*new_malloc)(size_t size), - void *(*new_calloc)(size_t count, size_t size), - void *(*new_realloc)(void *ptr, size_t size), - void (*new_free)(void *ptr) -) { - ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; - ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; - ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; - ts_current_free = new_free ? new_free : free; -} - -#include "src/get_changed_ranges.h" -#include "src/subtree.h" -#include "src/language.h" -#include "src/error_costs.h" -#include "src/tree_cursor.h" -#include - -// #define DEBUG_GET_CHANGED_RANGES - -static void ts_range_array_add( - TSRangeArray *self, - Length start, - Length end -) { - if (self->size > 0) { - t_range *last_range = array_back(self); - if (start.bytes <= last_range->end_byte) { - last_range->end_byte = end.bytes; - last_range->end_point = end.extent; - return; - } - } - - if (start.bytes < end.bytes) { - t_range range = { start.extent, end.extent, start.bytes, end.bytes }; - array_push(self, range); - } -} - -bool ts_range_array_intersects( - const TSRangeArray *self, - unsigned start_index, - uint32_t start_byte, - uint32_t end_byte -) { - for (unsigned i = start_index; i < self->size; i++) { - t_range *range = &self->contents[i]; - if (range->end_byte > start_byte) { - if (range->start_byte >= end_byte) break; - return true; - } - } - return false; -} - -void ts_range_array_get_changed_ranges( - const t_range *old_ranges, unsigned old_range_count, - const t_range *new_ranges, unsigned new_range_count, - TSRangeArray *differences -) { - unsigned new_index = 0; - unsigned old_index = 0; - Length current_position = length_zero(); - bool in_old_range = false; - bool in_new_range = false; - - while (old_index < old_range_count || new_index < new_range_count) { - const t_range *old_range = &old_ranges[old_index]; - const t_range *new_range = &new_ranges[new_index]; - - Length next_old_position; - if (in_old_range) { - next_old_position = (Length) {old_range->end_byte, old_range->end_point}; - } else if (old_index < old_range_count) { - next_old_position = (Length) {old_range->start_byte, old_range->start_point}; - } else { - next_old_position = LENGTH_MAX; - } - - Length next_new_position; - if (in_new_range) { - next_new_position = (Length) {new_range->end_byte, new_range->end_point}; - } else if (new_index < new_range_count) { - next_new_position = (Length) {new_range->start_byte, new_range->start_point}; - } else { - next_new_position = LENGTH_MAX; - } - - if (next_old_position.bytes < next_new_position.bytes) { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_old_position); - } - if (in_old_range) old_index++; - current_position = next_old_position; - in_old_range = !in_old_range; - } else if (next_new_position.bytes < next_old_position.bytes) { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_new_position); - } - if (in_new_range) new_index++; - current_position = next_new_position; - in_new_range = !in_new_range; - } else { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_new_position); - } - if (in_old_range) old_index++; - if (in_new_range) new_index++; - in_old_range = !in_old_range; - in_new_range = !in_new_range; - current_position = next_new_position; - } - } -} - -typedef struct { - TreeCursor cursor; - const t_language *language; - unsigned visible_depth; - bool in_padding; -} Iterator; - -static Iterator iterator_new( - TreeCursor *cursor, - const Subtree *tree, - const t_language *language -) { - array_clear(&cursor->stack); - array_push(&cursor->stack, ((TreeCursorEntry) { - .subtree = tree, - .position = length_zero(), - .child_index = 0, - .structural_child_index = 0, - })); - return (Iterator) { - .cursor = *cursor, - .language = language, - .visible_depth = 1, - .in_padding = false, - }; -} - -static bool iterator_done(Iterator *self) { - return self->cursor.stack.size == 0; -} - -static Length iterator_start_position(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - if (self->in_padding) { - return entry.position; - } else { - return length_add(entry.position, ts_subtree_padding(*entry.subtree)); - } -} - -static Length iterator_end_position(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); - if (self->in_padding) { - return result; - } else { - return length_add(result, ts_subtree_size(*entry.subtree)); - } -} - -static bool iterator_tree_is_visible(const Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - if (ts_subtree_visible(*entry.subtree)) return true; - if (self->cursor.stack.size > 1) { - Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; - return ts_language_alias_at( - self->language, - parent.ptr->production_id, - entry.structural_child_index - ) != 0; - } - return false; -} - -static void iterator_get_visible_state( - const Iterator *self, - Subtree *tree, - t_symbol *alias_symbol, - uint32_t *start_byte -) { - uint32_t i = self->cursor.stack.size - 1; - - if (self->in_padding) { - if (i == 0) return; - i--; - } - - for (; i + 1 > 0; i--) { - TreeCursorEntry entry = self->cursor.stack.contents[i]; - - if (i > 0) { - const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; - *alias_symbol = ts_language_alias_at( - self->language, - parent->ptr->production_id, - entry.structural_child_index - ); - } - - if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { - *tree = *entry.subtree; - *start_byte = entry.position.bytes; - break; - } - } -} - -static void iterator_ascend(Iterator *self) { - if (iterator_done(self)) return; - if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; - if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; - self->cursor.stack.size--; -} - -static bool iterator_descend(Iterator *self, uint32_t goal_position) { - if (self->in_padding) return false; - - bool did_descend = false; - do { - did_descend = false; - TreeCursorEntry entry = *array_back(&self->cursor.stack); - Length position = entry.position; - uint32_t structural_child_index = 0; - for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { - const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; - Length child_left = length_add(position, ts_subtree_padding(*child)); - Length child_right = length_add(child_left, ts_subtree_size(*child)); - - if (child_right.bytes > goal_position) { - array_push(&self->cursor.stack, ((TreeCursorEntry) { - .subtree = child, - .position = position, - .child_index = i, - .structural_child_index = structural_child_index, - })); - - if (iterator_tree_is_visible(self)) { - if (child_left.bytes > goal_position) { - self->in_padding = true; - } else { - self->visible_depth++; - } - return true; - } - - did_descend = true; - break; - } - - position = child_right; - if (!ts_subtree_extra(*child)) structural_child_index++; - } - } while (did_descend); - - return false; -} - -static void iterator_advance(Iterator *self) { - if (self->in_padding) { - self->in_padding = false; - if (iterator_tree_is_visible(self)) { - self->visible_depth++; - } else { - iterator_descend(self, 0); - } - return; - } - - for (;;) { - if (iterator_tree_is_visible(self)) self->visible_depth--; - TreeCursorEntry entry = array_pop(&self->cursor.stack); - if (iterator_done(self)) return; - - const Subtree *parent = array_back(&self->cursor.stack)->subtree; - uint32_t child_index = entry.child_index + 1; - if (ts_subtree_child_count(*parent) > child_index) { - Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); - uint32_t structural_child_index = entry.structural_child_index; - if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; - const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; - - array_push(&self->cursor.stack, ((TreeCursorEntry) { - .subtree = next_child, - .position = position, - .child_index = child_index, - .structural_child_index = structural_child_index, - })); - - if (iterator_tree_is_visible(self)) { - if (ts_subtree_padding(*next_child).bytes > 0) { - self->in_padding = true; - } else { - self->visible_depth++; - } - } else { - iterator_descend(self, 0); - } - break; - } - } -} - -typedef enum { - IteratorDiffers, - IteratorMayDiffer, - IteratorMatches, -} IteratorComparison; - -static IteratorComparison iterator_compare( - const Iterator *old_iter, - const Iterator *new_iter -) { - Subtree old_tree = NULL_SUBTREE; - Subtree new_tree = NULL_SUBTREE; - uint32_t old_start = 0; - uint32_t new_start = 0; - t_symbol old_alias_symbol = 0; - t_symbol new_alias_symbol = 0; - iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); - iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); - - if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; - if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; - - if ( - old_alias_symbol == new_alias_symbol && - ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree) - ) { - if (old_start == new_start && - !ts_subtree_has_changes(old_tree) && - ts_subtree_symbol(old_tree) != ts_builtin_sym_error && - ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && - ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && - ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && - (ts_subtree_parse_state(old_tree) == ERROR_STATE) == - (ts_subtree_parse_state(new_tree) == ERROR_STATE)) { - return IteratorMatches; - } else { - return IteratorMayDiffer; - } - } - - return IteratorDiffers; -} - -#ifdef DEBUG_GET_CHANGED_RANGES -static inline void iterator_print_state(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - TSPoint start = iterator_start_position(self).extent; - TSPoint end = iterator_end_position(self).extent; - const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); - printf( - "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", - name, self->in_padding ? "(p)" : " ", - self->visible_depth, - start.row + 1, start.column, - end.row + 1, end.column - ); -} -#endif - -unsigned ts_subtree_get_changed_ranges( - const Subtree *old_tree, const Subtree *new_tree, - TreeCursor *cursor1, TreeCursor *cursor2, - const t_language *language, - const TSRangeArray *included_range_differences, - t_range **ranges -) { - TSRangeArray results = array_new(); - - Iterator old_iter = iterator_new(cursor1, old_tree, language); - Iterator new_iter = iterator_new(cursor2, new_tree, language); - - unsigned included_range_difference_index = 0; - - Length position = iterator_start_position(&old_iter); - Length next_position = iterator_start_position(&new_iter); - if (position.bytes < next_position.bytes) { - ts_range_array_add(&results, position, next_position); - position = next_position; - } else if (position.bytes > next_position.bytes) { - ts_range_array_add(&results, next_position, position); - next_position = position; - } - - do { - #ifdef DEBUG_GET_CHANGED_RANGES - printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); - iterator_print_state(&old_iter); - printf("\tvs\t"); - iterator_print_state(&new_iter); - puts(""); - #endif - - // Compare the old and new subtrees. - IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); - - // Even if the two subtrees appear to be identical, they could differ - // internally if they contain a range of text that was previously - // excluded from the parse, and is now included, or vice-versa. - if (comparison == IteratorMatches && ts_range_array_intersects( - included_range_differences, - included_range_difference_index, - position.bytes, - iterator_end_position(&old_iter).bytes - )) { - comparison = IteratorMayDiffer; - } - - bool is_changed = false; - switch (comparison) { - // If the subtrees are definitely identical, move to the end - // of both subtrees. - case IteratorMatches: - next_position = iterator_end_position(&old_iter); - break; - - // If the subtrees might differ internally, descend into both - // subtrees, finding the first child that spans the current position. - case IteratorMayDiffer: - if (iterator_descend(&old_iter, position.bytes)) { - if (!iterator_descend(&new_iter, position.bytes)) { - is_changed = true; - next_position = iterator_end_position(&old_iter); - } - } else if (iterator_descend(&new_iter, position.bytes)) { - is_changed = true; - next_position = iterator_end_position(&new_iter); - } else { - next_position = length_min( - iterator_end_position(&old_iter), - iterator_end_position(&new_iter) - ); - } - break; - - // If the subtrees are different, record a change and then move - // to the end of both subtrees. - case IteratorDiffers: - is_changed = true; - next_position = length_min( - iterator_end_position(&old_iter), - iterator_end_position(&new_iter) - ); - break; - } - - // Ensure that both iterators are caught up to the current position. - while ( - !iterator_done(&old_iter) && - iterator_end_position(&old_iter).bytes <= next_position.bytes - ) iterator_advance(&old_iter); - while ( - !iterator_done(&new_iter) && - iterator_end_position(&new_iter).bytes <= next_position.bytes - ) iterator_advance(&new_iter); - - // Ensure that both iterators are at the same depth in the tree. - while (old_iter.visible_depth > new_iter.visible_depth) { - iterator_ascend(&old_iter); - } - while (new_iter.visible_depth > old_iter.visible_depth) { - iterator_ascend(&new_iter); - } - - if (is_changed) { - #ifdef DEBUG_GET_CHANGED_RANGES - printf( - " change: [[%u, %u] - [%u, %u]]\n", - position.extent.row + 1, position.extent.column, - next_position.extent.row + 1, next_position.extent.column - ); - #endif - - ts_range_array_add(&results, position, next_position); - } - - position = next_position; - - // Keep track of the current position in the included range differences - // array in order to avoid scanning the entire array on each iteration. - while (included_range_difference_index < included_range_differences->size) { - const t_range *range = &included_range_differences->contents[ - included_range_difference_index - ]; - if (range->end_byte <= position.bytes) { - included_range_difference_index++; - } else { - break; - } - } - } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); - - Length old_size = ts_subtree_total_size(*old_tree); - Length new_size = ts_subtree_total_size(*new_tree); - if (old_size.bytes < new_size.bytes) { - ts_range_array_add(&results, old_size, new_size); - } else if (new_size.bytes < old_size.bytes) { - ts_range_array_add(&results, new_size, old_size); - } - - *cursor1 = old_iter.cursor; - *cursor2 = new_iter.cursor; - *ranges = results.contents; - return results.size; -} -#include "src/language.h" - -#include "src/api.h" -#include - -const t_language *ts_language_copy(const t_language *self) { - return self; -} - -void ts_language_delete(const t_language *self) { - (void)(self); -} - -uint32_t ts_language_symbol_count(const t_language *self) { - return self->symbol_count + self->alias_count; -} - -uint32_t ts_language_state_count(const t_language *self) { - return self->state_count; -} - -uint32_t ts_language_version(const t_language *self) { - return self->version; -} - -uint32_t ts_language_field_count(const t_language *self) { - return self->field_count; -} - -void ts_language_table_entry( - const t_language *self, - t_state_id state, - t_symbol symbol, - TableEntry *result -) { - if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - result->action_count = 0; - result->is_reusable = false; - result->actions = NULL; - } else { - assert(symbol < self->token_count); - uint32_t action_index = ts_language_lookup(self, state, symbol); - const TSParseActionEntry *entry = &self->parse_actions[action_index]; - result->action_count = entry->entry.count; - result->is_reusable = entry->entry.reusable; - result->actions = (const TSParseAction *)(entry + 1); - } -} - -TSSymbolMetadata ts_language_symbol_metadata( - const t_language *self, - t_symbol symbol -) { - if (symbol == ts_builtin_sym_error) { - return (TSSymbolMetadata) {.visible = true, .named = true}; - } else if (symbol == ts_builtin_sym_error_repeat) { - return (TSSymbolMetadata) {.visible = false, .named = false}; - } else { - return self->symbol_metadata[symbol]; - } -} - -t_symbol ts_language_public_symbol( - const t_language *self, - t_symbol symbol -) { - if (symbol == ts_builtin_sym_error) return symbol; - return self->public_symbol_map[symbol]; -} - -t_state_id ts_language_next_state( - const t_language *self, - t_state_id state, - t_symbol symbol -) { - if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - return 0; - } else if (symbol < self->token_count) { - uint32_t count; - const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); - if (count > 0) { - TSParseAction action = actions[count - 1]; - if (action.type == TSParseActionTypeShift) { - return action.shift.extra ? state : action.shift.state; - } - } - return 0; - } else { - return ts_language_lookup(self, state, symbol); - } -} - -const char *ts_language_symbol_name( - const t_language *self, - t_symbol symbol -) { - if (symbol == ts_builtin_sym_error) { - return "ERROR"; - } else if (symbol == ts_builtin_sym_error_repeat) { - return "_ERROR"; - } else if (symbol < ts_language_symbol_count(self)) { - return self->symbol_names[symbol]; - } else { - return NULL; - } -} - -t_symbol ts_language_symbol_for_name( - const t_language *self, - const char *string, - uint32_t length, - bool is_named -) { - if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; - uint16_t count = (uint16_t)ts_language_symbol_count(self); - for (t_symbol i = 0; i < count; i++) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); - if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; - const char *symbol_name = self->symbol_names[i]; - if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { - return self->public_symbol_map[i]; - } - } - return 0; -} - -t_symbol_type ts_language_symbol_type( - const t_language *self, - t_symbol symbol -) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); - if (metadata.named && metadata.visible) { - return TSSymbolTypeRegular; - } else if (metadata.visible) { - return TSSymbolTypeAnonymous; - } else { - return TSSymbolTypeAuxiliary; - } -} - -const char *ts_language_field_name_for_id( - const t_language *self, - t_field_id id -) { - uint32_t count = ts_language_field_count(self); - if (count && id <= count) { - return self->field_names[id]; - } else { - return NULL; - } -} - -t_field_id ts_language_field_id_for_name( - const t_language *self, - const char *name, - uint32_t name_length -) { - uint16_t count = (uint16_t)ts_language_field_count(self); - for (t_symbol i = 1; i < count + 1; i++) { - switch (strncmp(name, self->field_names[i], name_length)) { - case 0: - if (self->field_names[i][name_length] == 0) return i; - break; - case -1: - return 0; - default: - break; - } - } - return 0; -} - -t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { - if (state >= self->state_count) return NULL; - LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); - *iterator = ts_language_lookaheads(self, state); - return (t_lookahead_iterator *)iterator; -} - -void ts_lookahead_iterator_delete(t_lookahead_iterator *self) { - ts_free(self); -} - -bool ts_lookahead_iterator_reset_state(t_lookahead_iterator * self, t_state_id state) { - LookaheadIterator *iterator = (LookaheadIterator *)self; - if (state >= iterator->language->state_count) return false; - *iterator = ts_language_lookaheads(iterator->language, state); - return true; -} - -const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; - return iterator->language; -} - -bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state) { - if (state >= language->state_count) return false; - LookaheadIterator *iterator = (LookaheadIterator *)self; - *iterator = ts_language_lookaheads(language, state); - return true; -} - -bool ts_lookahead_iterator_next(t_lookahead_iterator *self) { - LookaheadIterator *iterator = (LookaheadIterator *)self; - return ts_lookahead_iterator__next(iterator); -} - -t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; - return iterator->symbol; -} - -const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; - return ts_language_symbol_name(iterator->language, iterator->symbol); -} -#include -#include "src/lexer.h" -#include "src/subtree.h" -#include "src/length.h" -//#include "src/unicode.h" - -#define LOG(message, character) \ - if (self->logger.log) { \ - snprintf( \ - self->debug_buffer, \ - TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ - 32 <= character && character < 127 ? \ - message " character:'%c'" : \ - message " character:%d", \ - character \ - ); \ - self->logger.log( \ - self->logger.payload, \ - TSLogTypeLex, \ - self->debug_buffer \ - ); \ - } - -static const int32_t BYTE_ORDER_MARK = 0xFEFF; - -static const t_range DEFAULT_RANGE = { - .start_point = { - .row = 0, - .column = 0, - }, - .end_point = { - .row = UINT32_MAX, - .column = UINT32_MAX, - }, - .start_byte = 0, - .end_byte = UINT32_MAX -}; - -// Check if the lexer has reached EOF. This state is stored -// by setting the lexer's `current_included_range_index` such that -// it has consumed all of its available ranges. -static bool ts_lexer__eof(const TSLexer *_self) { - Lexer *self = (Lexer *)_self; - return self->current_included_range_index == self->included_range_count; -} - -// Clear the currently stored chunk of source code, because the lexer's -// position has changed. -static void ts_lexer__clear_chunk(Lexer *self) { - self->chunk = NULL; - self->chunk_size = 0; - self->chunk_start = 0; -} - -// Call the lexer's input callback to obtain a new chunk of source code -// for the current position. -static void ts_lexer__get_chunk(Lexer *self) { - self->chunk_start = self->current_position.bytes; - self->chunk = self->input.read( - self->input.payload, - self->current_position.bytes, - self->current_position.extent, - &self->chunk_size - ); - if (!self->chunk_size) { - self->current_included_range_index = self->included_range_count; - self->chunk = NULL; - } -} -typedef uint32_t (*DecodeFunc)( - const uint8_t *string, - uint32_t length, - int32_t *code_point -); - -static uint32_t ts_decode_ascii( - const uint8_t *string, - uint32_t length, - int32_t *code_point -) { - uint32_t i = 1; - (void)(length); - *code_point = *string; - return i; -} - -// Decode the next unicode character in the current chunk of source code. -// This assumes that the lexer has already retrieved a chunk of source -// code that spans the current position. -static void ts_lexer__get_lookahead(Lexer *self) { - uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; - uint32_t size = self->chunk_size - position_in_chunk; - - if (size == 0) { - self->lookahead_size = 1; - self->data.lookahead = '\0'; - return; - } - - #define TS_DECODE_ERROR -1 - - const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; - // UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8 - // ? ts_decode_utf8 - // : ts_decode_utf16; - - - self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead); - - // If this chunk ended in the middle of a multi-byte character, - // try again with a fresh chunk. - if (self->data.lookahead == TS_DECODE_ERROR && size < 4) { - ts_lexer__get_chunk(self); - chunk = (const uint8_t *)self->chunk; - size = self->chunk_size; - self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead); - } - - if (self->data.lookahead == TS_DECODE_ERROR) { - self->lookahead_size = 1; - } -} - -static void ts_lexer_goto(Lexer *self, Length position) { - self->current_position = position; - - // Move to the first valid position at or after the given position. - bool found_included_range = false; - for (unsigned i = 0; i < self->included_range_count; i++) { - t_range *included_range = &self->included_ranges[i]; - if ( - included_range->end_byte > self->current_position.bytes && - included_range->end_byte > included_range->start_byte - ) { - if (included_range->start_byte >= self->current_position.bytes) { - self->current_position = (Length) { - .bytes = included_range->start_byte, - .extent = included_range->start_point, - }; - } - - self->current_included_range_index = i; - found_included_range = true; - break; - } - } - - if (found_included_range) { - // If the current position is outside of the current chunk of text, - // then clear out the current chunk of text. - if (self->chunk && ( - self->current_position.bytes < self->chunk_start || - self->current_position.bytes >= self->chunk_start + self->chunk_size - )) { - ts_lexer__clear_chunk(self); - } - - self->lookahead_size = 0; - self->data.lookahead = '\0'; - } - - // If the given position is beyond any of included ranges, move to the EOF - // state - past the end of the included ranges. - else { - self->current_included_range_index = self->included_range_count; - t_range *last_included_range = &self->included_ranges[self->included_range_count - 1]; - self->current_position = (Length) { - .bytes = last_included_range->end_byte, - .extent = last_included_range->end_point, - }; - ts_lexer__clear_chunk(self); - self->lookahead_size = 1; - self->data.lookahead = '\0'; - } -} - -// Intended to be called only from functions that control logging. -static void ts_lexer__do_advance(Lexer *self, bool skip) { - if (self->lookahead_size) { - self->current_position.bytes += self->lookahead_size; - if (self->data.lookahead == '\n') { - self->current_position.extent.row++; - self->current_position.extent.column = 0; - } else { - self->current_position.extent.column += self->lookahead_size; - } - } - - const t_range *current_range = &self->included_ranges[self->current_included_range_index]; - while ( - self->current_position.bytes >= current_range->end_byte || - current_range->end_byte == current_range->start_byte - ) { - if (self->current_included_range_index < self->included_range_count) { - self->current_included_range_index++; - } - if (self->current_included_range_index < self->included_range_count) { - current_range++; - self->current_position = (Length) { - current_range->start_byte, - current_range->start_point, - }; - } else { - current_range = NULL; - break; - } - } - - if (skip) self->token_start_position = self->current_position; - - if (current_range) { - if ( - self->current_position.bytes < self->chunk_start || - self->current_position.bytes >= self->chunk_start + self->chunk_size - ) { - ts_lexer__get_chunk(self); - } - ts_lexer__get_lookahead(self); - } else { - ts_lexer__clear_chunk(self); - self->data.lookahead = '\0'; - self->lookahead_size = 1; - } -} - -// Advance to the next character in the source code, retrieving a new -// chunk of source code if needed. -static void ts_lexer__advance(TSLexer *_self, bool skip) { - Lexer *self = (Lexer *)_self; - if (!self->chunk) return; - - if (skip) { - LOG("skip", self->data.lookahead) - } else { - LOG("consume", self->data.lookahead) - } - - ts_lexer__do_advance(self, skip); -} - -// Mark that a token match has completed. This can be called multiple -// times if a longer match is found later. -static void ts_lexer__mark_end(TSLexer *_self) { - Lexer *self = (Lexer *)_self; - if (!ts_lexer__eof(&self->data)) { - // If the lexer is right at the beginning of included range, - // then the token should be considered to end at the *end* of the - // previous included range, rather than here. - t_range *current_included_range = &self->included_ranges[ - self->current_included_range_index - ]; - if ( - self->current_included_range_index > 0 && - self->current_position.bytes == current_included_range->start_byte - ) { - t_range *previous_included_range = current_included_range - 1; - self->token_end_position = (Length) { - previous_included_range->end_byte, - previous_included_range->end_point, - }; - return; - } - } - self->token_end_position = self->current_position; -} - -static uint32_t ts_lexer__get_column(TSLexer *_self) { - Lexer *self = (Lexer *)_self; - - uint32_t goal_byte = self->current_position.bytes; - - self->did_get_column = true; - self->current_position.bytes -= self->current_position.extent.column; - self->current_position.extent.column = 0; - - if (self->current_position.bytes < self->chunk_start) { - ts_lexer__get_chunk(self); - } - - uint32_t result = 0; - if (!ts_lexer__eof(_self)) { - ts_lexer__get_lookahead(self); - while (self->current_position.bytes < goal_byte && self->chunk) { - result++; - ts_lexer__do_advance(self, false); - if (ts_lexer__eof(_self)) break; - } - } - - return result; -} - -// Is the lexer at a boundary between two disjoint included ranges of -// source code? This is exposed as an API because some languages' external -// scanners need to perform custom actions at these boundaries. -static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { - const Lexer *self = (const Lexer *)_self; - if (self->current_included_range_index < self->included_range_count) { - t_range *current_range = &self->included_ranges[self->current_included_range_index]; - return self->current_position.bytes == current_range->start_byte; - } else { - return false; - } -} - -void ts_lexer_init(Lexer *self) { - *self = (Lexer) { - .data = { - // The lexer's methods are stored as struct fields so that generated - // parsers can call them without needing to be linked against this - // library. - .advance = ts_lexer__advance, - .mark_end = ts_lexer__mark_end, - .get_column = ts_lexer__get_column, - .is_at_included_range_start = ts_lexer__is_at_included_range_start, - .eof = ts_lexer__eof, - .lookahead = 0, - .result_symbol = 0, - }, - .chunk = NULL, - .chunk_size = 0, - .chunk_start = 0, - .current_position = {0, {0, 0}}, - .logger = { - .payload = NULL, - .log = NULL - }, - .included_ranges = NULL, - .included_range_count = 0, - .current_included_range_index = 0, - }; - ts_lexer_set_included_ranges(self, NULL, 0); -} - -void ts_lexer_delete(Lexer *self) { - ts_free(self->included_ranges); -} - -void ts_lexer_set_input(Lexer *self, t_input input) { - self->input = input; - ts_lexer__clear_chunk(self); - ts_lexer_goto(self, self->current_position); -} - -// Move the lexer to the given position. This doesn't do any work -// if the parser is already at the given position. -void ts_lexer_reset(Lexer *self, Length position) { - if (position.bytes != self->current_position.bytes) { - ts_lexer_goto(self, position); - } -} - -void ts_lexer_start(Lexer *self) { - self->token_start_position = self->current_position; - self->token_end_position = LENGTH_UNDEFINED; - self->data.result_symbol = 0; - self->did_get_column = false; - if (!ts_lexer__eof(&self->data)) { - if (!self->chunk_size) ts_lexer__get_chunk(self); - if (!self->lookahead_size) ts_lexer__get_lookahead(self); - if ( - self->current_position.bytes == 0 && - self->data.lookahead == BYTE_ORDER_MARK - ) ts_lexer__advance(&self->data, true); - } -} - -void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { - if (length_is_undefined(self->token_end_position)) { - ts_lexer__mark_end(&self->data); - } - - // If the token ended at an included range boundary, then its end position - // will have been reset to the end of the preceding range. Reset the start - // position to match. - if (self->token_end_position.bytes < self->token_start_position.bytes) { - self->token_start_position = self->token_end_position; - } - - uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; - - // In order to determine that a byte sequence is invalid UTF8 or UTF16, - // the character decoding algorithm may have looked at the following byte. - // Therefore, the next byte *after* the current (invalid) character - // affects the interpretation of the current character. - if (self->data.lookahead == TS_DECODE_ERROR) { - current_lookahead_end_byte++; - } - - if (current_lookahead_end_byte > *lookahead_end_byte) { - *lookahead_end_byte = current_lookahead_end_byte; - } -} - -void ts_lexer_advance_to_end(Lexer *self) { - while (self->chunk) { - ts_lexer__advance(&self->data, false); - } -} - -void ts_lexer_mark_end(Lexer *self) { - ts_lexer__mark_end(&self->data); -} - -bool ts_lexer_set_included_ranges( - Lexer *self, - const t_range *ranges, - uint32_t count -) { - if (count == 0 || !ranges) { - ranges = &DEFAULT_RANGE; - count = 1; - } else { - uint32_t previous_byte = 0; - for (unsigned i = 0; i < count; i++) { - const t_range *range = &ranges[i]; - if ( - range->start_byte < previous_byte || - range->end_byte < range->start_byte - ) return false; - previous_byte = range->end_byte; - } - } - - size_t size = count * sizeof(t_range); - self->included_ranges = ts_realloc(self->included_ranges, size); - memcpy(self->included_ranges, ranges, size); - self->included_range_count = count; - ts_lexer_goto(self, self->current_position); - return true; -} - -t_range *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { - *count = self->included_range_count; - return self->included_ranges; -} - -#undef LOG - - - - - - - - - - - - - -#include -#include "src/subtree.h" -#include "src/tree.h" -#include "src/language.h" - -typedef struct { - Subtree parent; - const t_tree *tree; - Length position; - uint32_t child_index; - uint32_t structural_child_index; - const t_symbol *alias_sequence; -} NodeChildIterator; - -// TSNode - constructors - -t_parse_node ts_node_new( - const t_tree *tree, - const Subtree *subtree, - Length position, - t_symbol alias -) { - return (t_parse_node) { - {position.bytes, position.extent.row, position.extent.column, alias}, - subtree, - tree, - }; -} - -static inline t_parse_node ts_node__null(void) { - return ts_node_new(NULL, NULL, length_zero(), 0); -} - -// TSNode - accessors - -uint32_t ts_node_start_byte(t_parse_node self) { - return self.context[0]; -} - -t_point ts_node_start_point(t_parse_node self) { - return (t_point) {self.context[1], self.context[2]}; -} - -static inline uint32_t ts_node__alias(const t_parse_node *self) { - return self->context[3]; -} - -static inline Subtree ts_node__subtree(t_parse_node self) { - return *(const Subtree *)self.id; -} - -// NodeChildIterator - -static inline NodeChildIterator ts_node_iterate_children(const t_parse_node *node) { - Subtree subtree = ts_node__subtree(*node); - if (ts_subtree_child_count(subtree) == 0) { - return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; - } - const t_symbol *alias_sequence = ts_language_alias_sequence( - node->tree->language, - subtree.ptr->production_id - ); - return (NodeChildIterator) { - .tree = node->tree, - .parent = subtree, - .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, - .child_index = 0, - .structural_child_index = 0, - .alias_sequence = alias_sequence, - }; -} - -static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { - return self->child_index == self->parent.ptr->child_count; -} - -static inline bool ts_node_child_iterator_next( - NodeChildIterator *self, - t_parse_node *result -) { - if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; - const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - t_symbol alias_symbol = 0; - if (!ts_subtree_extra(*child)) { - if (self->alias_sequence) { - alias_symbol = self->alias_sequence[self->structural_child_index]; - } - self->structural_child_index++; - } - if (self->child_index > 0) { - self->position = length_add(self->position, ts_subtree_padding(*child)); - } - *result = ts_node_new( - self->tree, - child, - self->position, - alias_symbol - ); - self->position = length_add(self->position, ts_subtree_size(*child)); - self->child_index++; - return true; -} - -// TSNode - private - -static inline bool ts_node__is_relevant(t_parse_node self, bool include_anonymous) { - Subtree tree = ts_node__subtree(self); - if (include_anonymous) { - return ts_subtree_visible(tree) || ts_node__alias(&self); - } else { - t_symbol alias = ts_node__alias(&self); - if (alias) { - return ts_language_symbol_metadata(self.tree->language, alias).named; - } else { - return ts_subtree_visible(tree) && ts_subtree_named(tree); - } - } -} - -static inline uint32_t ts_node__relevant_child_count( - t_parse_node self, - bool include_anonymous -) { - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) { - if (include_anonymous) { - return tree.ptr->visible_child_count; - } else { - return tree.ptr->named_child_count; - } - } else { - return 0; - } -} - -static inline t_parse_node ts_node__child( - t_parse_node self, - uint32_t child_index, - bool include_anonymous -) { - t_parse_node result = self; - bool did_descend = true; - - while (did_descend) { - did_descend = false; - - t_parse_node child; - uint32_t index = 0; - NodeChildIterator iterator = ts_node_iterate_children(&result); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (ts_node__is_relevant(child, include_anonymous)) { - if (index == child_index) { - return child; - } - index++; - } else { - uint32_t grandchild_index = child_index - index; - uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous); - if (grandchild_index < grandchild_count) { - did_descend = true; - result = child; - child_index = grandchild_index; - break; - } - index += grandchild_count; - } - } - } - - return ts_node__null(); -} - -static bool ts_subtree_has_trailing_empty_descendant( - Subtree self, - Subtree other -) { - for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { - Subtree child = ts_subtree_children(self)[i]; - if (ts_subtree_total_bytes(child) > 0) break; - if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) { - return true; - } - } - return false; -} - -static inline t_parse_node ts_node__prev_sibling(t_parse_node self, bool include_anonymous) { - Subtree self_subtree = ts_node__subtree(self); - bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; - uint32_t target_end_byte = ts_node_end_byte(self); - - t_parse_node node = ts_node_parent(self); - t_parse_node earlier_node = ts_node__null(); - bool earlier_node_is_relevant = false; - - while (!ts_node_is_null(node)) { - t_parse_node earlier_child = ts_node__null(); - bool earlier_child_is_relevant = false; - bool found_child_containing_target = false; - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (child.id == self.id) break; - if (iterator.position.bytes > target_end_byte) { - found_child_containing_target = true; - break; - } - - if (iterator.position.bytes == target_end_byte && - (!self_is_empty || - ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) { - found_child_containing_target = true; - break; - } - - if (ts_node__is_relevant(child, include_anonymous)) { - earlier_child = child; - earlier_child_is_relevant = true; - } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { - earlier_child = child; - earlier_child_is_relevant = false; - } - } - - if (found_child_containing_target) { - if (!ts_node_is_null(earlier_child)) { - earlier_node = earlier_child; - earlier_node_is_relevant = earlier_child_is_relevant; - } - node = child; - } else if (earlier_child_is_relevant) { - return earlier_child; - } else if (!ts_node_is_null(earlier_child)) { - node = earlier_child; - } else if (earlier_node_is_relevant) { - return earlier_node; - } else { - node = earlier_node; - earlier_node = ts_node__null(); - earlier_node_is_relevant = false; - } - } - - return ts_node__null(); -} - -static inline t_parse_node ts_node__next_sibling(t_parse_node self, bool include_anonymous) { - uint32_t target_end_byte = ts_node_end_byte(self); - - t_parse_node node = ts_node_parent(self); - t_parse_node later_node = ts_node__null(); - bool later_node_is_relevant = false; - - while (!ts_node_is_null(node)) { - t_parse_node later_child = ts_node__null(); - bool later_child_is_relevant = false; - t_parse_node child_containing_target = ts_node__null(); - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (iterator.position.bytes < target_end_byte) continue; - if (ts_node_start_byte(child) <= ts_node_start_byte(self)) { - if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) { - child_containing_target = child; - } - } else if (ts_node__is_relevant(child, include_anonymous)) { - later_child = child; - later_child_is_relevant = true; - break; - } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { - later_child = child; - later_child_is_relevant = false; - break; - } - } - - if (!ts_node_is_null(child_containing_target)) { - if (!ts_node_is_null(later_child)) { - later_node = later_child; - later_node_is_relevant = later_child_is_relevant; - } - node = child_containing_target; - } else if (later_child_is_relevant) { - return later_child; - } else if (!ts_node_is_null(later_child)) { - node = later_child; - } else if (later_node_is_relevant) { - return later_node; - } else { - node = later_node; - } - } - - return ts_node__null(); -} - -static inline t_parse_node ts_node__first_child_for_byte( - t_parse_node self, - uint32_t goal, - bool include_anonymous -) { - t_parse_node node = self; - bool did_descend = true; - - while (did_descend) { - did_descend = false; - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (ts_node_end_byte(child) > goal) { - if (ts_node__is_relevant(child, include_anonymous)) { - return child; - } else if (ts_node_child_count(child) > 0) { - did_descend = true; - node = child; - break; - } - } - } - } - - return ts_node__null(); -} - -static inline t_parse_node ts_node__descendant_for_byte_range( - t_parse_node self, - uint32_t range_start, - uint32_t range_end, - bool include_anonymous -) { - t_parse_node node = self; - t_parse_node last_visible_node = self; - - bool did_descend = true; - while (did_descend) { - did_descend = false; - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - uint32_t node_end = iterator.position.bytes; - - // The end of this node must extend far enough forward to touch - // the end of the range and exceed the start of the range. - if (node_end < range_end) continue; - if (node_end <= range_start) continue; - - // The start of this node must extend far enough backward to - // touch the start of the range. - if (range_start < ts_node_start_byte(child)) break; - - node = child; - if (ts_node__is_relevant(node, include_anonymous)) { - last_visible_node = node; - } - did_descend = true; - break; - } - } - - return last_visible_node; -} - -static inline t_parse_node ts_node__descendant_for_point_range( - t_parse_node self, - t_point range_start, - t_point range_end, - bool include_anonymous -) { - t_parse_node node = self; - t_parse_node last_visible_node = self; - - bool did_descend = true; - while (did_descend) { - did_descend = false; - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - t_point node_end = iterator.position.extent; - - // The end of this node must extend far enough forward to touch - // the end of the range and exceed the start of the range. - if (point_lt(node_end, range_end)) continue; - if (point_lte(node_end, range_start)) continue; - - // The start of this node must extend far enough backward to - // touch the start of the range. - if (point_lt(range_start, ts_node_start_point(child))) break; - - node = child; - if (ts_node__is_relevant(node, include_anonymous)) { - last_visible_node = node; - } - did_descend = true; - break; - } - } - - return last_visible_node; -} - -// TSNode - public - -uint32_t ts_node_end_byte(t_parse_node self) { - return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes; -} - -t_point ts_node_end_point(t_parse_node self) { - return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent); -} - -t_symbol ts_node_symbol(t_parse_node self) { - t_symbol symbol = ts_node__alias(&self); - if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_public_symbol(self.tree->language, symbol); -} - -const char *ts_node_type(t_parse_node self) { - t_symbol symbol = ts_node__alias(&self); - if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_symbol_name(self.tree->language, symbol); -} - -const t_language *ts_node_language(t_parse_node self) { - return self.tree->language; -} - -t_symbol ts_node_grammar_symbol(t_parse_node self) { - return ts_subtree_symbol(ts_node__subtree(self)); -} - -const char *ts_node_grammar_type(t_parse_node self) { - t_symbol symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_symbol_name(self.tree->language, symbol); -} - -char *ts_node_string(t_parse_node self) { - t_symbol alias_symbol = ts_node__alias(&self); - return ts_subtree_string( - ts_node__subtree(self), - alias_symbol, - ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, - self.tree->language, - false - ); -} - -bool ts_node_eq(t_parse_node self, t_parse_node other) { - return self.tree == other.tree && self.id == other.id; -} - -bool ts_node_is_null(t_parse_node self) { - return self.id == 0; -} - -bool ts_node_is_extra(t_parse_node self) { - return ts_subtree_extra(ts_node__subtree(self)); -} - -bool ts_node_is_named(t_parse_node self) { - t_symbol alias = ts_node__alias(&self); - return alias - ? ts_language_symbol_metadata(self.tree->language, alias).named - : ts_subtree_named(ts_node__subtree(self)); -} - -bool ts_node_is_missing(t_parse_node self) { - return ts_subtree_missing(ts_node__subtree(self)); -} - -bool ts_node_has_changes(t_parse_node self) { - return ts_subtree_has_changes(ts_node__subtree(self)); -} - -bool ts_node_has_error(t_parse_node self) { - return ts_subtree_error_cost(ts_node__subtree(self)) > 0; -} - -bool ts_node_is_error(t_parse_node self) { - t_symbol symbol = ts_node_symbol(self); - return symbol == ts_builtin_sym_error; -} - -uint32_t ts_node_descendant_count(t_parse_node self) { - return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; -} - -t_state_id ts_node_parse_state(t_parse_node self) { - return ts_subtree_parse_state(ts_node__subtree(self)); -} - -t_state_id ts_node_next_parse_state(t_parse_node self) { - const t_language *language = self.tree->language; - uint16_t state = ts_node_parse_state(self); - if (state == TS_TREE_STATE_NONE) { - return TS_TREE_STATE_NONE; - } - uint16_t symbol = ts_node_grammar_symbol(self); - return ts_language_next_state(language, state, symbol); -} - -t_parse_node ts_node_parent(t_parse_node self) { - t_parse_node node = ts_tree_root_node(self.tree); - if (node.id == self.id) return ts_node__null(); - - while (true) { - t_parse_node next_node = ts_node_child_containing_descendant(node, self); - if (ts_node_is_null(next_node)) break; - node = next_node; - } - - return node; -} - -t_parse_node ts_node_child_containing_descendant(t_parse_node self, t_parse_node subnode) { - uint32_t start_byte = ts_node_start_byte(subnode); - uint32_t end_byte = ts_node_end_byte(subnode); - - do { - NodeChildIterator iter = ts_node_iterate_children(&self); - do { - if ( - !ts_node_child_iterator_next(&iter, &self) - || ts_node_start_byte(self) > start_byte - || self.id == subnode.id - ) { - return ts_node__null(); - } - } while (iter.position.bytes < end_byte || ts_node_child_count(self) == 0); - } while (!ts_node__is_relevant(self, true)); - - return self; -} - -t_parse_node ts_node_child(t_parse_node self, uint32_t child_index) { - return ts_node__child(self, child_index, true); -} - -t_parse_node ts_node_named_child(t_parse_node self, uint32_t child_index) { - return ts_node__child(self, child_index, false); -} - -t_parse_node ts_node_child_by_field_id(t_parse_node self, t_field_id field_id) { -recur: - if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); - - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self.tree->language, - ts_node__subtree(self).ptr->production_id, - &field_map, - &field_map_end - ); - if (field_map == field_map_end) return ts_node__null(); - - // The field mappings are sorted by their field id. Scan all - // the mappings to find the ones for the given field id. - while (field_map->field_id < field_id) { - field_map++; - if (field_map == field_map_end) return ts_node__null(); - } - while (field_map_end[-1].field_id > field_id) { - field_map_end--; - if (field_map == field_map_end) return ts_node__null(); - } - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&self); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (!ts_subtree_extra(ts_node__subtree(child))) { - uint32_t index = iterator.structural_child_index - 1; - if (index < field_map->child_index) continue; - - // Hidden nodes' fields are "inherited" by their visible parent. - if (field_map->inherited) { - - // If this is the *last* possible child node for this field, - // then perform a tail call to avoid recursion. - if (field_map + 1 == field_map_end) { - self = child; - goto recur; - } - - // Otherwise, descend into this child, but if it doesn't contain - // the field, continue searching subsequent children. - else { - t_parse_node result = ts_node_child_by_field_id(child, field_id); - if (result.id) return result; - field_map++; - if (field_map == field_map_end) return ts_node__null(); - } - } - - else if (ts_node__is_relevant(child, true)) { - return child; - } - - // If the field refers to a hidden node with visible children, - // return the first visible child. - else if (ts_node_child_count(child) > 0 ) { - return ts_node_child(child, 0); - } - - // Otherwise, continue searching subsequent children. - else { - field_map++; - if (field_map == field_map_end) return ts_node__null(); - } - } - } - - return ts_node__null(); -} - -static inline const char *ts_node__field_name_from_language(t_parse_node self, uint32_t structural_child_index) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self.tree->language, - ts_node__subtree(self).ptr->production_id, - &field_map, - &field_map_end - ); - for (; field_map != field_map_end; field_map++) { - if (!field_map->inherited && field_map->child_index == structural_child_index) { - return self.tree->language->field_names[field_map->field_id]; - } - } - return NULL; -} - -const char *ts_node_field_name_for_child(t_parse_node self, uint32_t child_index) { - t_parse_node result = self; - bool did_descend = true; - const char *inherited_field_name = NULL; - - while (did_descend) { - did_descend = false; - - t_parse_node child; - uint32_t index = 0; - NodeChildIterator iterator = ts_node_iterate_children(&result); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (ts_node__is_relevant(child, true)) { - if (index == child_index) { - const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); - if (field_name) return field_name; - return inherited_field_name; - } - index++; - } else { - uint32_t grandchild_index = child_index - index; - uint32_t grandchild_count = ts_node__relevant_child_count(child, true); - if (grandchild_index < grandchild_count) { - const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); - if (field_name) inherited_field_name = field_name; - - did_descend = true; - result = child; - child_index = grandchild_index; - break; - } - index += grandchild_count; - } - } - } - - return NULL; -} - -t_parse_node ts_node_child_by_field_name( - t_parse_node self, - const char *name, - uint32_t name_length -) { - t_field_id field_id = ts_language_field_id_for_name( - self.tree->language, - name, - name_length - ); - return ts_node_child_by_field_id(self, field_id); -} - -uint32_t ts_node_child_count(t_parse_node self) { - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) { - return tree.ptr->visible_child_count; - } else { - return 0; - } -} - -uint32_t ts_node_named_child_count(t_parse_node self) { - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) { - return tree.ptr->named_child_count; - } else { - return 0; - } -} - -t_parse_node ts_node_next_sibling(t_parse_node self) { - return ts_node__next_sibling(self, true); -} - -t_parse_node ts_node_next_named_sibling(t_parse_node self) { - return ts_node__next_sibling(self, false); -} - -t_parse_node ts_node_prev_sibling(t_parse_node self) { - return ts_node__prev_sibling(self, true); -} - -t_parse_node ts_node_prev_named_sibling(t_parse_node self) { - return ts_node__prev_sibling(self, false); -} - -t_parse_node ts_node_first_child_for_byte(t_parse_node self, uint32_t byte) { - return ts_node__first_child_for_byte(self, byte, true); -} - -t_parse_node ts_node_first_named_child_for_byte(t_parse_node self, uint32_t byte) { - return ts_node__first_child_for_byte(self, byte, false); -} - -t_parse_node ts_node_descendant_for_byte_range( - t_parse_node self, - uint32_t start, - uint32_t end -) { - return ts_node__descendant_for_byte_range(self, start, end, true); -} - -t_parse_node ts_node_named_descendant_for_byte_range( - t_parse_node self, - uint32_t start, - uint32_t end -) { - return ts_node__descendant_for_byte_range(self, start, end, false); -} - -t_parse_node ts_node_descendant_for_point_range( - t_parse_node self, - t_point start, - t_point end -) { - return ts_node__descendant_for_point_range(self, start, end, true); -} - -t_parse_node ts_node_named_descendant_for_point_range( - t_parse_node self, - t_point start, - t_point end -) { - return ts_node__descendant_for_point_range(self, start, end, false); -} - -void ts_node_edit(t_parse_node *self, const t_input_edit *edit) { - uint32_t start_byte = ts_node_start_byte(*self); - t_point start_point = ts_node_start_point(*self); - - if (start_byte >= edit->old_end_byte) { - start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); - start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point)); - } else if (start_byte > edit->start_byte) { - start_byte = edit->new_end_byte; - start_point = edit->new_end_point; - } - - self->context[0] = start_byte; - self->context[1] = start_point.row; - self->context[2] = start_point.column; -} - - -#include -#include -#include -#include -#include -#include -#include "src/api.h" -#include "src/alloc.h" -#include "src/array.h" -#include "src/atomic.h" -#include "src/clock.h" -#include "src/error_costs.h" -#include "src/get_changed_ranges.h" -#include "src/language.h" -#include "src/length.h" -#include "src/lexer.h" -#include "src/reduce_action.h" -#include "src/reusable_node.h" -#include "src/stack.h" -#include "src/subtree.h" -#include "src/tree.h" - - -#define LOG(...) \ - if (self->lexer.logger.log || self->dot_graph_file) { \ - snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ - ts_parser__log(self); \ - } - -#define LOG_LOOKAHEAD(symbol_name, size) \ - if (self->lexer.logger.log || self->dot_graph_file) { \ - char *buf = self->lexer.debug_buffer; \ - const char *symbol = symbol_name; \ - int off = sprintf(buf, "lexed_lookahead sym:"); \ - for ( \ - int i = 0; \ - symbol[i] != '\0' \ - && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ - i++ \ - ) { \ - switch (symbol[i]) { \ - case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \ - case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \ - case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \ - case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \ - case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \ - case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \ - default: buf[off++] = symbol[i]; break; \ - } \ - } \ - snprintf( \ - buf + off, \ - TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ - ", size:%u", \ - size \ - ); \ - ts_parser__log(self); \ - } - -#define LOG_STACK() \ - if (self->dot_graph_file) { \ - ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ - fputs("\n\n", self->dot_graph_file); \ - } - -#define LOG_TREE(tree) \ - if (self->dot_graph_file) { \ - ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \ - fputs("\n", self->dot_graph_file); \ - } - -#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) - -#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree)) - -static const unsigned MAX_VERSION_COUNT = 6; -static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; -static const unsigned MAX_SUMMARY_DEPTH = 16; -static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; -static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; - -typedef struct { - Subtree token; - Subtree last_external_token; - uint32_t byte_index; -} TokenCache; - -struct t_parser { - Lexer lexer; - Stack *stack; - SubtreePool tree_pool; - const t_language *language; - ReduceActionSet reduce_actions; - Subtree finished_tree; - SubtreeArray trailing_extras; - SubtreeArray trailing_extras2; - SubtreeArray scratch_trees; - TokenCache token_cache; - ReusableNode reusable_node; - void *external_scanner_payload; - FILE *dot_graph_file; - TSClock end_clock; - TSDuration timeout_duration; - unsigned accept_count; - unsigned operation_count; - const volatile size_t *cancellation_flag; - Subtree old_tree; - TSRangeArray included_range_differences; - unsigned included_range_difference_index; - bool has_scanner_error; -}; - -typedef struct { - unsigned cost; - unsigned node_count; - int dynamic_precedence; - bool is_in_error; -} ErrorStatus; - -typedef enum { - ErrorComparisonTakeLeft, - ErrorComparisonPreferLeft, - ErrorComparisonNone, - ErrorComparisonPreferRight, - ErrorComparisonTakeRight, -} ErrorComparison; - -typedef struct { - const char *string; - uint32_t length; -} TSStringInput; - -// StringInput - -static const char *ts_string_input_read( - void *_self, - uint32_t byte, - t_point point, - uint32_t *length -) { - (void)point; - TSStringInput *self = (TSStringInput *)_self; - if (byte >= self->length) { - *length = 0; - return ""; - } else { - *length = self->length - byte; - return self->string + byte; - } -} - -// Parser - Private - -static void ts_parser__log(t_parser *self) { - if (self->lexer.logger.log) { - self->lexer.logger.log( - self->lexer.logger.payload, - TSLogTypeParse, - self->lexer.debug_buffer - ); - } - - if (self->dot_graph_file) { - fprintf(self->dot_graph_file, "graph {\nlabel=\""); - for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) { - if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file); - fputc(*chr, self->dot_graph_file); - } - fprintf(self->dot_graph_file, "\"\n}\n\n"); - } -} - -static bool ts_parser__breakdown_top_of_stack( - t_parser *self, - StackVersion version -) { - bool did_break_down = false; - bool pending = false; - - do { - StackSliceArray pop = ts_stack_pop_pending(self->stack, version); - if (!pop.size) break; - - did_break_down = true; - pending = false; - for (uint32_t i = 0; i < pop.size; i++) { - StackSlice slice = pop.contents[i]; - t_state_id state = ts_stack_state(self->stack, slice.version); - Subtree parent = *array_front(&slice.subtrees); - - for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { - Subtree child = ts_subtree_children(parent)[j]; - pending = ts_subtree_child_count(child) > 0; - - if (ts_subtree_is_error(child)) { - state = ERROR_STATE; - } else if (!ts_subtree_extra(child)) { - state = ts_language_next_state(self->language, state, ts_subtree_symbol(child)); - } - - ts_subtree_retain(child); - ts_stack_push(self->stack, slice.version, child, pending, state); - } - - for (uint32_t j = 1; j < slice.subtrees.size; j++) { - Subtree tree = slice.subtrees.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, state); - } - - ts_subtree_release(&self->tree_pool, parent); - array_delete(&slice.subtrees); - - LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent)); - LOG_STACK(); - } - } while (pending); - - return did_break_down; -} - -static void ts_parser__breakdown_lookahead( - t_parser *self, - Subtree *lookahead, - t_state_id state, - ReusableNode *reusable_node -) { - bool did_descend = false; - Subtree tree = reusable_node_tree(reusable_node); - while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) { - LOG("state_mismatch sym:%s", TREE_NAME(tree)); - reusable_node_descend(reusable_node); - tree = reusable_node_tree(reusable_node); - did_descend = true; - } - - if (did_descend) { - ts_subtree_release(&self->tree_pool, *lookahead); - *lookahead = tree; - ts_subtree_retain(*lookahead); - } -} - -static ErrorComparison ts_parser__compare_versions( - t_parser *self, - ErrorStatus a, - ErrorStatus b -) { - (void)self; - if (!a.is_in_error && b.is_in_error) { - if (a.cost < b.cost) { - return ErrorComparisonTakeLeft; - } else { - return ErrorComparisonPreferLeft; - } - } - - if (a.is_in_error && !b.is_in_error) { - if (b.cost < a.cost) { - return ErrorComparisonTakeRight; - } else { - return ErrorComparisonPreferRight; - } - } - - if (a.cost < b.cost) { - if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { - return ErrorComparisonTakeLeft; - } else { - return ErrorComparisonPreferLeft; - } - } - - if (b.cost < a.cost) { - if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { - return ErrorComparisonTakeRight; - } else { - return ErrorComparisonPreferRight; - } - } - - if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft; - if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight; - return ErrorComparisonNone; -} - -static ErrorStatus ts_parser__version_status( - t_parser *self, - StackVersion version -) { - unsigned cost = ts_stack_error_cost(self->stack, version); - bool is_paused = ts_stack_is_paused(self->stack, version); - if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; - return (ErrorStatus) { - .cost = cost, - .node_count = ts_stack_node_count_since_error(self->stack, version), - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE - }; -} - -static bool ts_parser__better_version_exists( - t_parser *self, - StackVersion version, - bool is_in_error, - unsigned cost -) { - if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) { - return true; - } - - Length position = ts_stack_position(self->stack, version); - ErrorStatus status = { - .cost = cost, - .is_in_error = is_in_error, - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .node_count = ts_stack_node_count_since_error(self->stack, version), - }; - - for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { - if (i == version || - !ts_stack_is_active(self->stack, i) || - ts_stack_position(self->stack, i).bytes < position.bytes) continue; - ErrorStatus status_i = ts_parser__version_status(self, i); - switch (ts_parser__compare_versions(self, status, status_i)) { - case ErrorComparisonTakeRight: - return true; - case ErrorComparisonPreferRight: - if (ts_stack_can_merge(self->stack, i, version)) return true; - break; - default: - break; - } - } - - return false; -} - -static bool ts_parser__call_main_lex_fn(t_parser *self, TSLexMode lex_mode) { - - return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); - -} - -static bool ts_parser__call_keyword_lex_fn(t_parser *self, TSLexMode lex_mode) { - (void)(lex_mode); - - return self->language->keyword_lex_fn(&self->lexer.data, 0); - -} - -static void ts_parser__external_scanner_create( - t_parser *self -) { - if (self->language && self->language->external_scanner.states) { -if (self->language->external_scanner.create) { - self->external_scanner_payload = self->language->external_scanner.create(); - - } -}} - -static void ts_parser__external_scanner_destroy( - t_parser *self -) { - if ( - self->language && - self->external_scanner_payload && - self->language->external_scanner.destroy - ) { - self->language->external_scanner.destroy( - self->external_scanner_payload - ); - } - self->external_scanner_payload = NULL; -} - -static unsigned ts_parser__external_scanner_serialize( - t_parser *self -) { - uint32_t length = self->language->external_scanner.serialize( - self->external_scanner_payload, - self->lexer.debug_buffer - ); - assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); - return length; - -} - -static void ts_parser__external_scanner_deserialize( - t_parser *self, - Subtree external_token -) { - const char *data = NULL; - uint32_t length = 0; - if (external_token.ptr) { - data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state); - length = external_token.ptr->external_scanner_state.length; - } - - - self->language->external_scanner.deserialize( - self->external_scanner_payload, - data, - length - ); - -} - -static bool ts_parser__external_scanner_scan( - t_parser *self, - t_state_id external_lex_state -) { - - const bool *valid_external_tokens = ts_language_enabled_external_tokens( - self->language, - external_lex_state - ); - return self->language->external_scanner.scan( - self->external_scanner_payload, - &self->lexer.data, - valid_external_tokens - ); - -} - -static bool ts_parser__can_reuse_first_leaf( - t_parser *self, - t_state_id state, - Subtree tree, - TableEntry *table_entry -) { - TSLexMode current_lex_mode = self->language->lex_modes[state]; - t_symbol leaf_symbol = ts_subtree_leaf_symbol(tree); - t_state_id leaf_state = ts_subtree_leaf_parse_state(tree); - TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state]; - - // At the end of a non-terminal extra node, the lexer normally returns - // NULL, which indicates that the parser should look for a reduce action - // at symbol `0`. Avoid reusing tokens in this situation to ensure that - // the same thing happens when incrementally reparsing. - if (current_lex_mode.lex_state == (uint16_t)(-1)) return false; - - // If the token was created in a state with the same set of lookaheads, it is reusable. - if ( - table_entry->action_count > 0 && - memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexMode)) == 0 && - ( - leaf_symbol != self->language->keyword_capture_token || - (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state) - ) - ) return true; - - // Empty tokens are not reusable in states with different lookaheads. - if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false; - - // If the current state allows external tokens or other tokens that conflict with this - // token, this token is not reusable. - return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; -} - -static Subtree ts_parser__lex( - t_parser *self, - StackVersion version, - t_state_id parse_state -) { - TSLexMode lex_mode = self->language->lex_modes[parse_state]; - if (lex_mode.lex_state == (uint16_t)-1) { - LOG("no_lookahead_after_non_terminal_extra"); - return NULL_SUBTREE; - } - - const Length start_position = ts_stack_position(self->stack, version); - const Subtree external_token = ts_stack_last_external_token(self->stack, version); - - bool found_external_token = false; - bool error_mode = parse_state == ERROR_STATE; - bool skipped_error = false; - bool called_get_column = false; - int32_t first_error_character = 0; - Length error_start_position = length_zero(); - Length error_end_position = length_zero(); - uint32_t lookahead_end_byte = 0; - uint32_t external_scanner_state_len = 0; - bool external_scanner_state_changed = false; - ts_lexer_reset(&self->lexer, start_position); - - for (;;) { - bool found_token = false; - Length current_position = self->lexer.current_position; - - if (lex_mode.external_lex_state != 0) { - LOG( - "lex_external state:%d, row:%u, column:%u", - lex_mode.external_lex_state, - current_position.extent.row, - current_position.extent.column - ); - ts_lexer_start(&self->lexer); - ts_parser__external_scanner_deserialize(self, external_token); - found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state); - if (self->has_scanner_error) return NULL_SUBTREE; - ts_lexer_finish(&self->lexer, &lookahead_end_byte); - - if (found_token) { - external_scanner_state_len = ts_parser__external_scanner_serialize(self); - external_scanner_state_changed = !ts_external_scanner_state_eq( - ts_subtree_external_scanner_state(external_token), - self->lexer.debug_buffer, - external_scanner_state_len - ); - - // When recovering from an error, ignore any zero-length external tokens - // unless they have changed the external scanner's state. This helps to - // avoid infinite loops which could otherwise occur, because the lexer is - // looking for any possible token, instead of looking for the specific set of - // tokens that are valid in some parse state. - // - // Note that it's possible that the token end position may be *before* the - // original position of the lexer because of the way that tokens are positioned - // at included range boundaries: when a token is terminated at the start of - // an included range, it is marked as ending at the *end* of the preceding - // included range. - if ( - self->lexer.token_end_position.bytes <= current_position.bytes && - (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) && - !external_scanner_state_changed - ) { - LOG( - "ignore_empty_external_token symbol:%s", - SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol]) - ) - found_token = false; - } - } - - if (found_token) { - found_external_token = true; - called_get_column = self->lexer.did_get_column; - break; - } - - ts_lexer_reset(&self->lexer, current_position); - } - - LOG( - "lex_internal state:%d, row:%u, column:%u", - lex_mode.lex_state, - current_position.extent.row, - current_position.extent.column - ); - ts_lexer_start(&self->lexer); - found_token = ts_parser__call_main_lex_fn(self, lex_mode); - ts_lexer_finish(&self->lexer, &lookahead_end_byte); - if (found_token) break; - - if (!error_mode) { - error_mode = true; - lex_mode = self->language->lex_modes[ERROR_STATE]; - ts_lexer_reset(&self->lexer, start_position); - continue; - } - - if (!skipped_error) { - LOG("skip_unrecognized_character"); - skipped_error = true; - error_start_position = self->lexer.token_start_position; - error_end_position = self->lexer.token_start_position; - first_error_character = self->lexer.data.lookahead; - } - - if (self->lexer.current_position.bytes == error_end_position.bytes) { - if (self->lexer.data.eof(&self->lexer.data)) { - self->lexer.data.result_symbol = ts_builtin_sym_error; - break; - } - self->lexer.data.advance(&self->lexer.data, false); - } - - error_end_position = self->lexer.current_position; - } - - Subtree result; - if (skipped_error) { - Length padding = length_sub(error_start_position, start_position); - Length size = length_sub(error_end_position, error_start_position); - uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes; - result = ts_subtree_new_error( - &self->tree_pool, - first_error_character, - padding, - size, - lookahead_bytes, - parse_state, - self->language - ); - } else { - bool is_keyword = false; - t_symbol symbol = self->lexer.data.result_symbol; - Length padding = length_sub(self->lexer.token_start_position, start_position); - Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); - uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; - - if (found_external_token) { - symbol = self->language->external_scanner.symbol_map[symbol]; - } else if (symbol == self->language->keyword_capture_token && symbol != 0) { - uint32_t end_byte = self->lexer.token_end_position.bytes; - ts_lexer_reset(&self->lexer, self->lexer.token_start_position); - ts_lexer_start(&self->lexer); - - is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode); - - if ( - is_keyword && - self->lexer.token_end_position.bytes == end_byte && - ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) - ) { - symbol = self->lexer.data.result_symbol; - } - } - - result = ts_subtree_new_leaf( - &self->tree_pool, - symbol, - padding, - size, - lookahead_bytes, - parse_state, - found_external_token, - called_get_column, - is_keyword, - self->language - ); - - if (found_external_token) { - MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result); - ts_external_scanner_state_init( - &mut_result.ptr->external_scanner_state, - self->lexer.debug_buffer, - external_scanner_state_len - ); - mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed; - } - } - - LOG_LOOKAHEAD( - SYM_NAME(ts_subtree_symbol(result)), - ts_subtree_total_size(result).bytes - ); - return result; -} - -static Subtree ts_parser__get_cached_token( - t_parser *self, - t_state_id state, - size_t position, - Subtree last_external_token, - TableEntry *table_entry -) { - TokenCache *cache = &self->token_cache; - if ( - cache->token.ptr && cache->byte_index == position && - ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token) - ) { - ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry); - if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) { - ts_subtree_retain(cache->token); - return cache->token; - } - } - return NULL_SUBTREE; -} - -static void ts_parser__set_cached_token( - t_parser *self, - uint32_t byte_index, - Subtree last_external_token, - Subtree token -) { - TokenCache *cache = &self->token_cache; - if (token.ptr) ts_subtree_retain(token); - if (last_external_token.ptr) ts_subtree_retain(last_external_token); - if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token); - if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token); - cache->token = token; - cache->byte_index = byte_index; - cache->last_external_token = last_external_token; -} - -static bool ts_parser__has_included_range_difference( - const t_parser *self, - uint32_t start_position, - uint32_t end_position -) { - return ts_range_array_intersects( - &self->included_range_differences, - self->included_range_difference_index, - start_position, - end_position - ); -} - -static Subtree ts_parser__reuse_node( - t_parser *self, - StackVersion version, - t_state_id *state, - uint32_t position, - Subtree last_external_token, - TableEntry *table_entry -) { - Subtree result; - while ((result = reusable_node_tree(&self->reusable_node)).ptr) { - uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node); - uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result); - - // Do not reuse an EOF node if the included ranges array has changes - // later on in the file. - if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX; - - if (byte_offset > position) { - LOG("before_reusable_node symbol:%s", TREE_NAME(result)); - break; - } - - if (byte_offset < position) { - LOG("past_reusable_node symbol:%s", TREE_NAME(result)); - if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) { - reusable_node_advance(&self->reusable_node); - } - continue; - } - - if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) { - LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result)); - reusable_node_advance(&self->reusable_node); - continue; - } - - const char *reason = NULL; - if (ts_subtree_has_changes(result)) { - reason = "has_changes"; - } else if (ts_subtree_is_error(result)) { - reason = "is_error"; - } else if (ts_subtree_missing(result)) { - reason = "is_missing"; - } else if (ts_subtree_is_fragile(result)) { - reason = "is_fragile"; - } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) { - reason = "contains_different_included_range"; - } - - if (reason) { - LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result)); - if (!reusable_node_descend(&self->reusable_node)) { - reusable_node_advance(&self->reusable_node); - ts_parser__breakdown_top_of_stack(self, version); - *state = ts_stack_state(self->stack, version); - } - continue; - } - - t_symbol leaf_symbol = ts_subtree_leaf_symbol(result); - ts_language_table_entry(self->language, *state, leaf_symbol, table_entry); - if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { - LOG( - "cant_reuse_node symbol:%s, first_leaf_symbol:%s", - TREE_NAME(result), - SYM_NAME(leaf_symbol) - ); - reusable_node_advance_past_leaf(&self->reusable_node); - break; - } - - LOG("reuse_node symbol:%s", TREE_NAME(result)); - ts_subtree_retain(result); - return result; - } - - return NULL_SUBTREE; -} - -// Determine if a given tree should be replaced by an alternative tree. -// -// The decision is based on the trees' error costs (if any), their dynamic precedence, -// and finally, as a default, by a recursive comparison of the trees' symbols. -static bool ts_parser__select_tree(t_parser *self, Subtree left, Subtree right) { - if (!left.ptr) return true; - if (!right.ptr) return false; - - if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) { - LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); - return true; - } - - if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) { - LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - return false; - } - - if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) { - LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, - TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left), - ts_subtree_dynamic_precedence(left)); - return true; - } - - if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) { - LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, - TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right), - ts_subtree_dynamic_precedence(right)); - return false; - } - - if (ts_subtree_error_cost(left) > 0) return true; - - int comparison = ts_subtree_compare(left, right, &self->tree_pool); - switch (comparison) { - case -1: - LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - return false; - break; - case 1: - LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); - return true; - default: - LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - return false; - } -} - -// Determine if a given tree's children should be replaced by an alternative -// array of children. -static bool ts_parser__select_children( - t_parser *self, - Subtree left, - const SubtreeArray *children -) { - array_assign(&self->scratch_trees, children); - - // Create a temporary subtree using the scratch trees array. This node does - // not perform any allocation except for possibly growing the array to make - // room for its own heap data. The scratch tree is never explicitly released, - // so the same 'scratch trees' array can be reused again later. - MutableSubtree scratch_tree = ts_subtree_new_node( - ts_subtree_symbol(left), - &self->scratch_trees, - 0, - self->language - ); - - return ts_parser__select_tree( - self, - left, - ts_subtree_from_mut(scratch_tree) - ); -} - -static void ts_parser__shift( - t_parser *self, - StackVersion version, - t_state_id state, - Subtree lookahead, - bool extra -) { - bool is_leaf = ts_subtree_child_count(lookahead) == 0; - Subtree subtree_to_push = lookahead; - if (extra != ts_subtree_extra(lookahead) && is_leaf) { - MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_extra(&result, extra); - subtree_to_push = ts_subtree_from_mut(result); - } - - ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); - if (ts_subtree_has_external_tokens(subtree_to_push)) { - ts_stack_set_last_external_token( - self->stack, version, ts_subtree_last_external_token(subtree_to_push) - ); - } -} - -static StackVersion ts_parser__reduce( - t_parser *self, - StackVersion version, - t_symbol symbol, - uint32_t count, - int dynamic_precedence, - uint16_t production_id, - bool is_fragile, - bool end_of_non_terminal_extra -) { - uint32_t initial_version_count = ts_stack_version_count(self->stack); - - // Pop the given number of nodes from the given version of the parse stack. - // If stack versions have previously merged, then there may be more than one - // path back through the stack. For each path, create a new parent node to - // contain the popped children, and push it onto the stack in place of the - // children. - StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); - uint32_t removed_version_count = 0; - for (uint32_t i = 0; i < pop.size; i++) { - StackSlice slice = pop.contents[i]; - StackVersion slice_version = slice.version - removed_version_count; - - // This is where new versions are added to the parse stack. The versions - // will all be sorted and truncated at the end of the outer parsing loop. - // Allow the maximum version count to be temporarily exceeded, but only - // by a limited threshold. - if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) { - ts_stack_remove_version(self->stack, slice_version); - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - removed_version_count++; - while (i + 1 < pop.size) { - StackSlice next_slice = pop.contents[i + 1]; - if (next_slice.version != slice.version) break; - ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); - i++; - } - continue; - } - - // Extra tokens on top of the stack should not be included in this new parent - // node. They will be re-pushed onto the stack after the parent node is - // created and pushed. - SubtreeArray children = slice.subtrees; - ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras); - - MutableSubtree parent = ts_subtree_new_node( - symbol, &children, production_id, self->language - ); - - // This pop operation may have caused multiple stack versions to collapse - // into one, because they all diverged from a common state. In that case, - // choose one of the arrays of trees to be the parent node's children, and - // delete the rest of the tree arrays. - while (i + 1 < pop.size) { - StackSlice next_slice = pop.contents[i + 1]; - if (next_slice.version != slice.version) break; - i++; - - SubtreeArray next_slice_children = next_slice.subtrees; - ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2); - - if (ts_parser__select_children( - self, - ts_subtree_from_mut(parent), - &next_slice_children - )) { - ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); - ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); - array_swap(&self->trailing_extras, &self->trailing_extras2); - parent = ts_subtree_new_node( - symbol, &next_slice_children, production_id, self->language - ); - } else { - array_clear(&self->trailing_extras2); - ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); - } - } - - t_state_id state = ts_stack_state(self->stack, slice_version); - t_state_id next_state = ts_language_next_state(self->language, state, symbol); - if (end_of_non_terminal_extra && next_state == state) { - parent.ptr->extra = true; - } - if (is_fragile || pop.size > 1 || initial_version_count > 1) { - parent.ptr->fragile_left = true; - parent.ptr->fragile_right = true; - parent.ptr->parse_state = TS_TREE_STATE_NONE; - } else { - parent.ptr->parse_state = state; - } - parent.ptr->dynamic_precedence += dynamic_precedence; - - // Push the parent node onto the stack, along with any extra tokens that - // were previously on top of the stack. - ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); - for (uint32_t j = 0; j < self->trailing_extras.size; j++) { - ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); - } - - for (StackVersion j = 0; j < slice_version; j++) { - if (j == version) continue; - if (ts_stack_merge(self->stack, j, slice_version)) { - removed_version_count++; - break; - } - } - } - - // Return the first new stack version that was created. - return ts_stack_version_count(self->stack) > initial_version_count - ? initial_version_count - : STACK_VERSION_NONE; -} - -static void ts_parser__accept( - t_parser *self, - StackVersion version, - Subtree lookahead -) { - assert(ts_subtree_is_eof(lookahead)); - ts_stack_push(self->stack, version, lookahead, false, 1); - - StackSliceArray pop = ts_stack_pop_all(self->stack, version); - for (uint32_t i = 0; i < pop.size; i++) { - SubtreeArray trees = pop.contents[i].subtrees; - - Subtree root = NULL_SUBTREE; - for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { - Subtree tree = trees.contents[j]; - if (!ts_subtree_extra(tree)) { - assert(!tree.data.is_inline); - uint32_t child_count = ts_subtree_child_count(tree); - const Subtree *children = ts_subtree_children(tree); - for (uint32_t k = 0; k < child_count; k++) { - ts_subtree_retain(children[k]); - } - array_splice(&trees, j, 1, child_count, children); - root = ts_subtree_from_mut(ts_subtree_new_node( - ts_subtree_symbol(tree), - &trees, - tree.ptr->production_id, - self->language - )); - ts_subtree_release(&self->tree_pool, tree); - break; - } - } - - assert(root.ptr); - self->accept_count++; - - if (self->finished_tree.ptr) { - if (ts_parser__select_tree(self, self->finished_tree, root)) { - ts_subtree_release(&self->tree_pool, self->finished_tree); - self->finished_tree = root; - } else { - ts_subtree_release(&self->tree_pool, root); - } - } else { - self->finished_tree = root; - } - } - - ts_stack_remove_version(self->stack, pop.contents[0].version); - ts_stack_halt(self->stack, version); -} - -static bool ts_parser__do_all_potential_reductions( - t_parser *self, - StackVersion starting_version, - t_symbol lookahead_symbol -) { - uint32_t initial_version_count = ts_stack_version_count(self->stack); - - bool can_shift_lookahead_symbol = false; - StackVersion version = starting_version; - for (unsigned i = 0; true; i++) { - uint32_t version_count = ts_stack_version_count(self->stack); - if (version >= version_count) break; - - bool merged = false; - for (StackVersion j = initial_version_count; j < version; j++) { - if (ts_stack_merge(self->stack, j, version)) { - merged = true; - break; - } - } - if (merged) continue; - - t_state_id state = ts_stack_state(self->stack, version); - bool has_shift_action = false; - array_clear(&self->reduce_actions); - - t_symbol first_symbol, end_symbol; - if (lookahead_symbol != 0) { - first_symbol = lookahead_symbol; - end_symbol = lookahead_symbol + 1; - } else { - first_symbol = 1; - end_symbol = self->language->token_count; - } - - for (t_symbol symbol = first_symbol; symbol < end_symbol; symbol++) { - TableEntry entry; - ts_language_table_entry(self->language, state, symbol, &entry); - for (uint32_t j = 0; j < entry.action_count; j++) { - TSParseAction action = entry.actions[j]; - switch (action.type) { - case TSParseActionTypeShift: - case TSParseActionTypeRecover: - if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; - break; - case TSParseActionTypeReduce: - if (action.reduce.child_count > 0) - ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) { - .symbol = action.reduce.symbol, - .count = action.reduce.child_count, - .dynamic_precedence = action.reduce.dynamic_precedence, - .production_id = action.reduce.production_id, - }); - break; - default: - break; - } - } - } - - StackVersion reduction_version = STACK_VERSION_NONE; - for (uint32_t j = 0; j < self->reduce_actions.size; j++) { - ReduceAction action = self->reduce_actions.contents[j]; - - reduction_version = ts_parser__reduce( - self, version, action.symbol, action.count, - action.dynamic_precedence, action.production_id, - true, false - ); - } - - if (has_shift_action) { - can_shift_lookahead_symbol = true; - } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) { - ts_stack_renumber_version(self->stack, reduction_version, version); - continue; - } else if (lookahead_symbol != 0) { - ts_stack_remove_version(self->stack, version); - } - - if (version == starting_version) { - version = version_count; - } else { - version++; - } - } - - return can_shift_lookahead_symbol; -} - -static bool ts_parser__recover_to_state( - t_parser *self, - StackVersion version, - unsigned depth, - t_state_id goal_state -) { - StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); - StackVersion previous_version = STACK_VERSION_NONE; - - for (unsigned i = 0; i < pop.size; i++) { - StackSlice slice = pop.contents[i]; - - if (slice.version == previous_version) { - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - array_erase(&pop, i--); - continue; - } - - if (ts_stack_state(self->stack, slice.version) != goal_state) { - ts_stack_halt(self->stack, slice.version); - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - array_erase(&pop, i--); - continue; - } - - SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); - if (error_trees.size > 0) { - assert(error_trees.size == 1); - Subtree error_tree = error_trees.contents[0]; - uint32_t error_child_count = ts_subtree_child_count(error_tree); - if (error_child_count > 0) { - array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree)); - for (unsigned j = 0; j < error_child_count; j++) { - ts_subtree_retain(slice.subtrees.contents[j]); - } - } - ts_subtree_array_delete(&self->tree_pool, &error_trees); - } - - ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); - - if (slice.subtrees.size > 0) { - Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); - ts_stack_push(self->stack, slice.version, error, false, goal_state); - } else { - array_delete(&slice.subtrees); - } - - for (unsigned j = 0; j < self->trailing_extras.size; j++) { - Subtree tree = self->trailing_extras.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, goal_state); - } - - previous_version = slice.version; - } - - return previous_version != STACK_VERSION_NONE; -} - -static void ts_parser__recover( - t_parser *self, - StackVersion version, - Subtree lookahead -) { - bool did_recover = false; - unsigned previous_version_count = ts_stack_version_count(self->stack); - Length position = ts_stack_position(self->stack, version); - StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); - unsigned current_error_cost = ts_stack_error_cost(self->stack, version); - - // When the parser is in the error state, there are two strategies for recovering with a - // given lookahead token: - // 1. Find a previous state on the stack in which that lookahead token would be valid. Then, - // create a new stack version that is in that state again. This entails popping all of the - // subtrees that have been pushed onto the stack since that previous state, and wrapping - // them in an ERROR node. - // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and - // move on to the next lookahead token, remaining in the error state. - // - // First, try the strategy 1. Upon entering the error state, the parser recorded a summary - // of the previous parse states and their depths. Look at each state in the summary, to see - // if the current lookahead token would be valid in that state. - if (summary && !ts_subtree_is_error(lookahead)) { - for (unsigned i = 0; i < summary->size; i++) { - StackSummaryEntry entry = summary->contents[i]; - - if (entry.state == ERROR_STATE) continue; - if (entry.position.bytes == position.bytes) continue; - unsigned depth = entry.depth; - if (node_count_since_error > 0) depth++; - - // Do not recover in ways that create redundant stack versions. - bool would_merge = false; - for (unsigned j = 0; j < previous_version_count; j++) { - if ( - ts_stack_state(self->stack, j) == entry.state && - ts_stack_position(self->stack, j).bytes == position.bytes - ) { - would_merge = true; - break; - } - } - if (would_merge) continue; - - // Do not recover if the result would clearly be worse than some existing stack version. - unsigned new_cost = - current_error_cost + - entry.depth * ERROR_COST_PER_SKIPPED_TREE + - (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + - (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; - if (ts_parser__better_version_exists(self, version, false, new_cost)) break; - - // If the current lookahead token is valid in some previous state, recover to that state. - // Then stop looking for further recoveries. - if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) { - if (ts_parser__recover_to_state(self, version, depth, entry.state)) { - did_recover = true; - LOG("recover_to_previous state:%u, depth:%u", entry.state, depth); - LOG_STACK(); - break; - } - } - } - } - - // In the process of attempting to recover, some stack versions may have been created - // and subsequently halted. Remove those versions. - for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { - if (!ts_stack_is_active(self->stack, i)) { - ts_stack_remove_version(self->stack, i--); - } - } - - // If strategy 1 succeeded, a new stack version will have been created which is able to handle - // the current lookahead token. Now, in addition, try strategy 2 described above: skip the - // current lookahead token by wrapping it in an ERROR node. - - // Don't pursue this additional strategy if there are already too many stack versions. - if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } - - if ( - did_recover && - ts_subtree_has_external_scanner_state_change(lookahead) - ) { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } - - // If the parser is still in the error state at the end of the file, just wrap everything - // in an ERROR node and terminate. - if (ts_subtree_is_eof(lookahead)) { - LOG("recover_eof"); - SubtreeArray children = array_new(); - Subtree parent = ts_subtree_new_error_node(&children, false, self->language); - ts_stack_push(self->stack, version, parent, false, 1); - ts_parser__accept(self, version, lookahead); - return; - } - - // Do not recover if the result would clearly be worse than some existing stack version. - unsigned new_cost = - current_error_cost + ERROR_COST_PER_SKIPPED_TREE + - ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + - ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; - if (ts_parser__better_version_exists(self, version, false, new_cost)) { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } - - // If the current lookahead token is an extra token, mark it as extra. This means it won't - // be counted in error cost calculations. - unsigned n; - const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); - if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { - MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_extra(&mutable_lookahead, true); - lookahead = ts_subtree_from_mut(mutable_lookahead); - } - - // Wrap the lookahead token in an ERROR. - LOG("skip_token symbol:%s", TREE_NAME(lookahead)); - SubtreeArray children = array_new(); - array_reserve(&children, 1); - array_push(&children, lookahead); - MutableSubtree error_repeat = ts_subtree_new_node( - ts_builtin_sym_error_repeat, - &children, - 0, - self->language - ); - - // If other tokens have already been skipped, so there is already an ERROR at the top of the - // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger - // ERROR. - if (node_count_since_error > 0) { - StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); - - // TODO: Figure out how to make this condition occur. - // See https://github.com/atom/atom/issues/18450#issuecomment-439579778 - // If multiple stack versions have merged at this point, just pick one of the errors - // arbitrarily and discard the rest. - if (pop.size > 1) { - for (unsigned i = 1; i < pop.size; i++) { - ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees); - } - while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) { - ts_stack_remove_version(self->stack, pop.contents[0].version + 1); - } - } - - ts_stack_renumber_version(self->stack, pop.contents[0].version, version); - array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat)); - error_repeat = ts_subtree_new_node( - ts_builtin_sym_error_repeat, - &pop.contents[0].subtrees, - 0, - self->language - ); - } - - // Push the new ERROR onto the stack. - ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE); - if (ts_subtree_has_external_tokens(lookahead)) { - ts_stack_set_last_external_token( - self->stack, version, ts_subtree_last_external_token(lookahead) - ); - } -} - -static void ts_parser__handle_error( - t_parser *self, - StackVersion version, - Subtree lookahead -) { - uint32_t previous_version_count = ts_stack_version_count(self->stack); - - // Perform any reductions that can happen in this state, regardless of the lookahead. After - // skipping one or more invalid tokens, the parser might find a token that would have allowed - // a reduction to take place. - ts_parser__do_all_potential_reductions(self, version, 0); - uint32_t version_count = ts_stack_version_count(self->stack); - Length position = ts_stack_position(self->stack, version); - - // Push a discontinuity onto the stack. Merge all of the stack versions that - // were created in the previous step. - bool did_insert_missing_token = false; - for (StackVersion v = version; v < version_count;) { - if (!did_insert_missing_token) { - t_state_id state = ts_stack_state(self->stack, v); - for ( - t_symbol missing_symbol = 1; - missing_symbol < (uint16_t)self->language->token_count; - missing_symbol++ - ) { - t_state_id state_after_missing_symbol = ts_language_next_state( - self->language, state, missing_symbol - ); - if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) { - continue; - } - - if (ts_language_has_reduce_action( - self->language, - state_after_missing_symbol, - ts_subtree_leaf_symbol(lookahead) - )) { - // In case the parser is currently outside of any included range, the lexer will - // snap to the beginning of the next included range. The missing token's padding - // must be assigned to position it within the next included range. - ts_lexer_reset(&self->lexer, position); - ts_lexer_mark_end(&self->lexer); - Length padding = length_sub(self->lexer.token_end_position, position); - uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead); - - StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); - Subtree missing_tree = ts_subtree_new_missing_leaf( - &self->tree_pool, missing_symbol, - padding, lookahead_bytes, - self->language - ); - ts_stack_push( - self->stack, version_with_missing_tree, - missing_tree, false, - state_after_missing_symbol - ); - - if (ts_parser__do_all_potential_reductions( - self, version_with_missing_tree, - ts_subtree_leaf_symbol(lookahead) - )) { - LOG( - "recover_with_missing symbol:%s, state:%u", - SYM_NAME(missing_symbol), - ts_stack_state(self->stack, version_with_missing_tree) - ); - did_insert_missing_token = true; - break; - } - } - } - } - - ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); - v = (v == version) ? previous_version_count : v + 1; - } - - for (unsigned i = previous_version_count; i < version_count; i++) { - bool did_merge = ts_stack_merge(self->stack, version, previous_version_count); - assert(did_merge); - (void)did_merge; // fix warning/error with clang -Os - } - - ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); - - // Begin recovery with the current lookahead node, rather than waiting for the - // next turn of the parse loop. This ensures that the tree accounts for the - // current lookahead token's "lookahead bytes" value, which describes how far - // the lexer needed to look ahead beyond the content of the token in order to - // recognize it. - if (ts_subtree_child_count(lookahead) > 0) { - ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); - } - ts_parser__recover(self, version, lookahead); - - LOG_STACK(); -} - -static bool ts_parser__advance( - t_parser *self, - StackVersion version, - bool allow_node_reuse -) { - t_state_id state = ts_stack_state(self->stack, version); - uint32_t position = ts_stack_position(self->stack, version).bytes; - Subtree last_external_token = ts_stack_last_external_token(self->stack, version); - - bool did_reuse = true; - Subtree lookahead = NULL_SUBTREE; - TableEntry table_entry = {.action_count = 0}; - - // If possible, reuse a node from the previous syntax tree. - if (allow_node_reuse) { - lookahead = ts_parser__reuse_node( - self, version, &state, position, last_external_token, &table_entry - ); - } - - // If no node from the previous syntax tree could be reused, then try to - // reuse the token previously returned by the lexer. - if (!lookahead.ptr) { - did_reuse = false; - lookahead = ts_parser__get_cached_token( - self, state, position, last_external_token, &table_entry - ); - } - - bool needs_lex = !lookahead.ptr; - for (;;) { - // Otherwise, re-run the lexer. - if (needs_lex) { - needs_lex = false; - lookahead = ts_parser__lex(self, version, state); - if (self->has_scanner_error) return false; - - if (lookahead.ptr) { - ts_parser__set_cached_token(self, position, last_external_token, lookahead); - ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); - } - - // When parsing a non-terminal extra, a null lookahead indicates the - // end of the rule. The reduction is stored in the EOF table entry. - // After the reduction, the lexer needs to be run again. - else { - ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); - } - } - - // If a cancellation flag or a timeout was provided, then check every - // time a fixed number of parse actions has been processed. - if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { - self->operation_count = 0; - } - if ( - self->operation_count == 0 && - ((self->cancellation_flag && atomic_load(self->cancellation_flag)) || - (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock))) - ) { - if (lookahead.ptr) { - ts_subtree_release(&self->tree_pool, lookahead); - } - return false; - } - - // Process each parse action for the current lookahead token in - // the current state. If there are multiple actions, then this is - // an ambiguous state. REDUCE actions always create a new stack - // version, whereas SHIFT actions update the existing stack version - // and terminate this loop. - StackVersion last_reduction_version = STACK_VERSION_NONE; - for (uint32_t i = 0; i < table_entry.action_count; i++) { - TSParseAction action = table_entry.actions[i]; - - switch (action.type) { - case TSParseActionTypeShift: { - if (action.shift.repetition) break; - t_state_id next_state; - if (action.shift.extra) { - next_state = state; - LOG("shift_extra"); - } else { - next_state = action.shift.state; - LOG("shift state:%u", next_state); - } - - if (ts_subtree_child_count(lookahead) > 0) { - ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node); - next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); - } - - ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); - if (did_reuse) reusable_node_advance(&self->reusable_node); - return true; - } - - case TSParseActionTypeReduce: { - bool is_fragile = table_entry.action_count > 1; - bool end_of_non_terminal_extra = lookahead.ptr == NULL; - LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); - StackVersion reduction_version = ts_parser__reduce( - self, version, action.reduce.symbol, action.reduce.child_count, - action.reduce.dynamic_precedence, action.reduce.production_id, - is_fragile, end_of_non_terminal_extra - ); - if (reduction_version != STACK_VERSION_NONE) { - last_reduction_version = reduction_version; - } - break; - } - - case TSParseActionTypeAccept: { - LOG("accept"); - ts_parser__accept(self, version, lookahead); - return true; - } - - case TSParseActionTypeRecover: { - if (ts_subtree_child_count(lookahead) > 0) { - ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); - } - - ts_parser__recover(self, version, lookahead); - if (did_reuse) reusable_node_advance(&self->reusable_node); - return true; - } - } - } - - // If a reduction was performed, then replace the current stack version - // with one of the stack versions created by a reduction, and continue - // processing this version of the stack with the same lookahead symbol. - if (last_reduction_version != STACK_VERSION_NONE) { - ts_stack_renumber_version(self->stack, last_reduction_version, version); - LOG_STACK(); - state = ts_stack_state(self->stack, version); - - // At the end of a non-terminal extra rule, the lexer will return a - // null subtree, because the parser needs to perform a fixed reduction - // regardless of the lookahead node. After performing that reduction, - // (and completing the non-terminal extra rule) run the lexer again based - // on the current parse state. - if (!lookahead.ptr) { - needs_lex = true; - } else { - ts_language_table_entry( - self->language, - state, - ts_subtree_leaf_symbol(lookahead), - &table_entry - ); - } - - continue; - } - - // A non-terminal extra rule was reduced and merged into an existing - // stack version. This version can be discarded. - if (!lookahead.ptr) { - ts_stack_halt(self->stack, version); - return true; - } - - // If there were no parse actions for the current lookahead token, then - // it is not valid in this state. If the current lookahead token is a - // keyword, then switch to treating it as the normal word token if that - // token is valid in this state. - if ( - ts_subtree_is_keyword(lookahead) && - ts_subtree_symbol(lookahead) != self->language->keyword_capture_token - ) { - ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry); - if (table_entry.action_count > 0) { - LOG( - "switch from_keyword:%s, to_word_token:%s", - TREE_NAME(lookahead), - SYM_NAME(self->language->keyword_capture_token) - ); - - MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language); - lookahead = ts_subtree_from_mut(mutable_lookahead); - continue; - } - } - - // If the current lookahead token is not valid and the parser is - // already in the error state, restart the error recovery process. - // TODO - can this be unified with the other `RECOVER` case above? - if (state == ERROR_STATE) { - ts_parser__recover(self, version, lookahead); - return true; - } - - // If the current lookahead token is not valid and the previous - // subtree on the stack was reused from an old tree, it isn't actually - // valid to reuse it. Remove it from the stack, and in its place, - // push each of its children. Then try again to process the current - // lookahead. - if (ts_parser__breakdown_top_of_stack(self, version)) { - state = ts_stack_state(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - needs_lex = true; - continue; - } - - // At this point, the current lookahead token is definitely not valid - // for this parse stack version. Mark this version as paused and continue - // processing any other stack versions that might exist. If some other - // version advances successfully, then this version can simply be removed. - // But if all versions end up paused, then error recovery is needed. - LOG("detect_error"); - ts_stack_pause(self->stack, version, lookahead); - return true; - } -} - -static unsigned ts_parser__condense_stack(t_parser *self) { - bool made_changes = false; - unsigned min_error_cost = UINT_MAX; - for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { - // Prune any versions that have been marked for removal. - if (ts_stack_is_halted(self->stack, i)) { - ts_stack_remove_version(self->stack, i); - i--; - continue; - } - - // Keep track of the minimum error cost of any stack version so - // that it can be returned. - ErrorStatus status_i = ts_parser__version_status(self, i); - if (!status_i.is_in_error && status_i.cost < min_error_cost) { - min_error_cost = status_i.cost; - } - - // Examine each pair of stack versions, removing any versions that - // are clearly worse than another version. Ensure that the versions - // are ordered from most promising to least promising. - for (StackVersion j = 0; j < i; j++) { - ErrorStatus status_j = ts_parser__version_status(self, j); - - switch (ts_parser__compare_versions(self, status_j, status_i)) { - case ErrorComparisonTakeLeft: - made_changes = true; - ts_stack_remove_version(self->stack, i); - i--; - j = i; - break; - - case ErrorComparisonPreferLeft: - case ErrorComparisonNone: - if (ts_stack_merge(self->stack, j, i)) { - made_changes = true; - i--; - j = i; - } - break; - - case ErrorComparisonPreferRight: - made_changes = true; - if (ts_stack_merge(self->stack, j, i)) { - i--; - j = i; - } else { - ts_stack_swap_versions(self->stack, i, j); - } - break; - - case ErrorComparisonTakeRight: - made_changes = true; - ts_stack_remove_version(self->stack, j); - i--; - j--; - break; - } - } - } - - // Enforce a hard upper bound on the number of stack versions by - // discarding the least promising versions. - while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { - ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); - made_changes = true; - } - - // If the best-performing stack version is currently paused, or all - // versions are paused, then resume the best paused version and begin - // the error recovery process. Otherwise, remove the paused versions. - if (ts_stack_version_count(self->stack) > 0) { - bool has_unpaused_version = false; - for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { - if (ts_stack_is_paused(self->stack, i)) { - if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) { - LOG("resume version:%u", i); - min_error_cost = ts_stack_error_cost(self->stack, i); - Subtree lookahead = ts_stack_resume(self->stack, i); - ts_parser__handle_error(self, i, lookahead); - has_unpaused_version = true; - } else { - ts_stack_remove_version(self->stack, i); - i--; - n--; - } - } else { - has_unpaused_version = true; - } - } - } - - if (made_changes) { - LOG("condense"); - LOG_STACK(); - } - - return min_error_cost; -} - -static bool ts_parser_has_outstanding_parse(t_parser *self) { - return ( - self->external_scanner_payload || - ts_stack_state(self->stack, 0) != 1 || - ts_stack_node_count_since_error(self->stack, 0) != 0 - ); -} - -// Parser - Public - -t_parser *ts_parser_new(void) { - t_parser *self = ts_calloc(1, sizeof(t_parser)); - ts_lexer_init(&self->lexer); - array_init(&self->reduce_actions); - array_reserve(&self->reduce_actions, 4); - self->tree_pool = ts_subtree_pool_new(32); - self->stack = ts_stack_new(&self->tree_pool); - self->finished_tree = NULL_SUBTREE; - self->reusable_node = reusable_node_new(); - self->dot_graph_file = NULL; - self->cancellation_flag = NULL; - self->timeout_duration = 0; - self->language = NULL; - self->has_scanner_error = false; - self->external_scanner_payload = NULL; - self->end_clock = clock_null(); - self->operation_count = 0; - self->old_tree = NULL_SUBTREE; - self->included_range_differences = (TSRangeArray) array_new(); - self->included_range_difference_index = 0; - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - return self; -} - -void ts_parser_delete(t_parser *self) { - if (!self) return; - - ts_parser_set_language(self, NULL); - ts_stack_delete(self->stack); - if (self->reduce_actions.contents) { - array_delete(&self->reduce_actions); - } - if (self->included_range_differences.contents) { - array_delete(&self->included_range_differences); - } - if (self->old_tree.ptr) { - ts_subtree_release(&self->tree_pool, self->old_tree); - self->old_tree = NULL_SUBTREE; - } - ts_lexer_delete(&self->lexer); - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - ts_subtree_pool_delete(&self->tree_pool); - reusable_node_delete(&self->reusable_node); - array_delete(&self->trailing_extras); - array_delete(&self->trailing_extras2); - array_delete(&self->scratch_trees); - ts_free(self); -} - -const t_language *ts_parser_language(const t_parser *self) { - return self->language; -} - -bool ts_parser_set_language(t_parser *self, const t_language *language) { - ts_parser_reset(self); - ts_language_delete(self->language); - self->language = NULL; - - if (language) { - if ( - language->version > TREE_SITTER_LANGUAGE_VERSION || - language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION - ) return false; - - - } - - self->language = ts_language_copy(language); - return true; -} - -t_logger ts_parser_logger(const t_parser *self) { - return self->lexer.logger; -} - -void ts_parser_set_logger(t_parser *self, t_logger logger) { - self->lexer.logger = logger; -} - -void ts_parser_print_dot_graphs(t_parser *self, int fd) { - if (self->dot_graph_file) { - fclose(self->dot_graph_file); - } - - if (fd >= 0) { - #ifdef _WIN32 - self->dot_graph_file = _fdopen(fd, "a"); - #else - self->dot_graph_file = fdopen(fd, "a"); - #endif - } else { - self->dot_graph_file = NULL; - } -} - -const size_t *ts_parser_cancellation_flag(const t_parser *self) { - return (const size_t *)self->cancellation_flag; -} - -void ts_parser_set_cancellation_flag(t_parser *self, const size_t *flag) { - self->cancellation_flag = (const volatile size_t *)flag; -} - -uint64_t ts_parser_timeout_micros(const t_parser *self) { - return duration_to_micros(self->timeout_duration); -} - -void ts_parser_set_timeout_micros(t_parser *self, uint64_t timeout_micros) { - self->timeout_duration = duration_from_micros(timeout_micros); -} - -bool ts_parser_set_included_ranges( - t_parser *self, - const t_range *ranges, - uint32_t count -) { - return ts_lexer_set_included_ranges(&self->lexer, ranges, count); -} - -const t_range *ts_parser_included_ranges(const t_parser *self, uint32_t *count) { - return ts_lexer_included_ranges(&self->lexer, count); -} - -void ts_parser_reset(t_parser *self) { - ts_parser__external_scanner_destroy(self); - - if (self->old_tree.ptr) { - ts_subtree_release(&self->tree_pool, self->old_tree); - self->old_tree = NULL_SUBTREE; - } - - reusable_node_clear(&self->reusable_node); - ts_lexer_reset(&self->lexer, length_zero()); - ts_stack_clear(self->stack); - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - if (self->finished_tree.ptr) { - ts_subtree_release(&self->tree_pool, self->finished_tree); - self->finished_tree = NULL_SUBTREE; - } - self->accept_count = 0; - self->has_scanner_error = false; -} - -t_tree *ts_parser_parse( - t_parser *self, - const t_tree *old_tree, - t_input input -) { - t_tree *result = NULL; - if (!self->language || !input.read) return NULL; - - - - ts_lexer_set_input(&self->lexer, input); - array_clear(&self->included_range_differences); - self->included_range_difference_index = 0; - - if (ts_parser_has_outstanding_parse(self)) { - LOG("resume_parsing"); - } else { - ts_parser__external_scanner_create(self); - if (self->has_scanner_error) goto exit; - - if (old_tree) { - ts_subtree_retain(old_tree->root); - self->old_tree = old_tree->root; - ts_range_array_get_changed_ranges( - old_tree->included_ranges, old_tree->included_range_count, - self->lexer.included_ranges, self->lexer.included_range_count, - &self->included_range_differences - ); - reusable_node_reset(&self->reusable_node, old_tree->root); - LOG("parse_after_edit"); - LOG_TREE(self->old_tree); - for (unsigned i = 0; i < self->included_range_differences.size; i++) { - t_range *range = &self->included_range_differences.contents[i]; - LOG("different_included_range %u - %u", range->start_byte, range->end_byte); - } - } else { - reusable_node_clear(&self->reusable_node); - LOG("new_parse"); - } - } - - self->operation_count = 0; - if (self->timeout_duration) { - self->end_clock = clock_after(clock_now(), self->timeout_duration); - } else { - self->end_clock = clock_null(); - } - - uint32_t position = 0, last_position = 0, version_count = 0; - do { - for ( - StackVersion version = 0; - version_count = ts_stack_version_count(self->stack), - version < version_count; - version++ - ) { - bool allow_node_reuse = version_count == 1; - while (ts_stack_is_active(self->stack, version)) { - LOG( - "process version:%u, version_count:%u, state:%d, row:%u, col:%u", - version, - ts_stack_version_count(self->stack), - ts_stack_state(self->stack, version), - ts_stack_position(self->stack, version).extent.row, - ts_stack_position(self->stack, version).extent.column - ); - - if (!ts_parser__advance(self, version, allow_node_reuse)) { - if (self->has_scanner_error) goto exit; - return NULL; - } - - LOG_STACK(); - - position = ts_stack_position(self->stack, version).bytes; - if (position > last_position || (version > 0 && position == last_position)) { - last_position = position; - break; - } - } - } - - // After advancing each version of the stack, re-sort the versions by their cost, - // removing any versions that are no longer worth pursuing. - unsigned min_error_cost = ts_parser__condense_stack(self); - - // If there's already a finished parse tree that's better than any in-progress version, - // then terminate parsing. Clear the parse stack to remove any extra references to subtrees - // within the finished tree, ensuring that these subtrees can be safely mutated in-place - // for rebalancing. - if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) { - ts_stack_clear(self->stack); - break; - } - - while (self->included_range_difference_index < self->included_range_differences.size) { - t_range *range = &self->included_range_differences.contents[self->included_range_difference_index]; - if (range->end_byte <= position) { - self->included_range_difference_index++; - } else { - break; - } - } - } while (version_count != 0); - - assert(self->finished_tree.ptr); - ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); - LOG("done"); - LOG_TREE(self->finished_tree); - - result = ts_tree_new( - self->finished_tree, - self->language, - self->lexer.included_ranges, - self->lexer.included_range_count - ); - self->finished_tree = NULL_SUBTREE; - -exit: - ts_parser_reset(self); - return result; -} - -t_tree *ts_parser_parse_string( - t_parser *self, - const t_tree *old_tree, - const char *string, - uint32_t length -) { - return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8); -} - -t_tree *ts_parser_parse_string_encoding( - t_parser *self, - const t_tree *old_tree, - const char *string, - uint32_t length, - t_input_encoding encoding -) { - TSStringInput input = {string, length}; - return ts_parser_parse(self, old_tree, (t_input) { - &input, - ts_string_input_read, - encoding, - }); -} - -#undef LOG -#include "src/api.h" -#include "src/alloc.h" -#include "src/array.h" -#include "src/language.h" -#include "src/point.h" -#include "src/tree_cursor.h" -// #include "src/unicode.h" -#include - -// #define DEBUG_ANALYZE_QUERY -// #define DEBUG_EXECUTE_QUERY - -#define MAX_STEP_CAPTURE_COUNT 3 -#define MAX_NEGATED_FIELD_COUNT 8 -#define MAX_STATE_PREDECESSOR_COUNT 256 -#define MAX_ANALYSIS_STATE_DEPTH 8 -#define MAX_ANALYSIS_ITERATION_COUNT 256 - -/* - * Stream - A sequence of unicode characters derived from a UTF8 string. - * This struct is used in parsing queries from S-expressions. - */ -typedef struct { - const char *input; - const char *start; - const char *end; - int32_t next; - uint8_t next_size; -} Stream; - -/* - * QueryStep - A step in the process of matching a query. Each node within - * a query S-expression corresponds to one of these steps. An entire pattern - * is represented as a sequence of these steps. The basic properties of a - * node are represented by these fields: - * - `symbol` - The grammar symbol to match. A zero value represents the - * wildcard symbol, '_'. - * - `field` - The field name to match. A zero value means that a field name - * was not specified. - * - `capture_ids` - An array of integers representing the names of captures - * associated with this node in the pattern, terminated by a `NONE` value. - * - `depth` - The depth where this node occurs in the pattern. The root node - * of the pattern has depth zero. - * - `negated_field_list_id` - An id representing a set of fields that must - * not be present on a node matching this step. - * - * Steps have some additional fields in order to handle the `.` (or "anchor") operator, - * which forbids additional child nodes: - * - `is_immediate` - Indicates that the node matching this step cannot be preceded - * by other sibling nodes that weren't specified in the pattern. - * - `is_last_child` - Indicates that the node matching this step cannot have any - * subsequent named siblings. - * - * For simple patterns, steps are matched in sequential order. But in order to - * handle alternative/repeated/optional sub-patterns, query steps are not always - * structured as a linear sequence; they sometimes need to split and merge. This - * is done using the following fields: - * - `alternative_index` - The index of a different query step that serves as - * an alternative to this step. A `NONE` value represents no alternative. - * When a query state reaches a step with an alternative index, the state - * is duplicated, with one copy remaining at the original step, and one copy - * moving to the alternative step. The alternative may have its own alternative - * step, so this splitting is an iterative process. - * - `is_dead_end` - Indicates that this state cannot be passed directly, and - * exists only in order to redirect to an alternative index, with no splitting. - * - `is_pass_through` - Indicates that state has no matching logic of its own, - * and exists only to split a state. One copy of the state advances immediately - * to the next step, and one moves to the alternative step. - * - `alternative_is_immediate` - Indicates that this step's alternative step - * should be treated as if `is_immediate` is true. - * - * Steps also store some derived state that summarizes how they relate to other - * steps within the same pattern. This is used to optimize the matching process: - * - `contains_captures` - Indicates that this step or one of its child steps - * has a non-empty `capture_ids` list. - * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then - * it and all of its subsequent sibling steps within the same parent pattern - * are guaranteed to match. - * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but - * for the entire top-level pattern. When iterating through a query's - * captures using `ts_query_cursor_next_capture`, this field is used to - * detect that a capture can safely be returned from a match that has not - * even completed yet. - */ -typedef struct { - t_symbol symbol; - t_symbol supertype_symbol; - t_field_id field; - uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t depth; - uint16_t alternative_index; - uint16_t negated_field_list_id; - bool is_named: 1; - bool is_immediate: 1; - bool is_last_child: 1; - bool is_pass_through: 1; - bool is_dead_end: 1; - bool alternative_is_immediate: 1; - bool contains_captures: 1; - bool root_pattern_guaranteed: 1; - bool parent_pattern_guaranteed: 1; -} QueryStep; - -/* - * Slice - A slice of an external array. Within a query, capture names, - * literal string values, and predicate step information are stored in three - * contiguous arrays. Individual captures, string values, and predicates are - * represented as slices of these three arrays. - */ -typedef struct { - uint32_t offset; - uint32_t length; -} Slice; - -/* - * SymbolTable - a two-way mapping of strings to ids. - */ -typedef struct { - Array(char) characters; - Array(Slice) slices; -} SymbolTable; - -/** - * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. - */ -typedef Array(uint8_t) CaptureQuantifiers; - -/* - * PatternEntry - Information about the starting point for matching a particular - * pattern. These entries are stored in a 'pattern map' - a sorted array that - * makes it possible to efficiently lookup patterns based on the symbol for their - * first step. The entry consists of the following fields: - * - `pattern_index` - the index of the pattern within the query - * - `step_index` - the index of the pattern's first step in the shared `steps` array - * - `is_rooted` - whether or not the pattern has a single root node. This property - * affects decisions about whether or not to start the pattern for nodes outside - * of a QueryCursor's range restriction. - */ -typedef struct { - uint16_t step_index; - uint16_t pattern_index; - bool is_rooted; -} PatternEntry; - -typedef struct { - Slice steps; - Slice predicate_steps; - uint32_t start_byte; - bool is_non_local; -} QueryPattern; - -typedef struct { - uint32_t byte_offset; - uint16_t step_index; -} StepOffset; - -/* - * QueryState - The state of an in-progress match of a particular pattern - * in a query. While executing, a `TSQueryCursor` must keep track of a number - * of possible in-progress matches. Each of those possible matches is - * represented as one of these states. Fields: - * - `id` - A numeric id that is exposed to the public API. This allows the - * caller to remove a given match, preventing any more of its captures - * from being returned. - * - `start_depth` - The depth in the tree where the first step of the state's - * pattern was matched. - * - `pattern_index` - The pattern that the state is matching. - * - `consumed_capture_count` - The number of captures from this match that - * have already been returned. - * - `capture_list_id` - A numeric id that can be used to retrieve the state's - * list of captures from the `CaptureListPool`. - * - `seeking_immediate_match` - A flag that indicates that the state's next - * step must be matched by the very next sibling. This is used when - * processing repetitions. - * - `has_in_progress_alternatives` - A flag that indicates that there is are - * other states that have the same captures as this state, but are at - * different steps in their pattern. This means that in order to obey the - * 'longest-match' rule, this state should not be returned as a match until - * it is clear that there can be no other alternative match with more captures. - */ -typedef struct { - uint32_t id; - uint32_t capture_list_id; - uint16_t start_depth; - uint16_t step_index; - uint16_t pattern_index; - uint16_t consumed_capture_count: 12; - bool seeking_immediate_match: 1; - bool has_in_progress_alternatives: 1; - bool dead: 1; - bool needs_parent: 1; -} QueryState; - -typedef Array(t_query_capture) CaptureList; - -/* - * CaptureListPool - A collection of *lists* of captures. Each query state needs - * to maintain its own list of captures. To avoid repeated allocations, this struct - * maintains a fixed set of capture lists, and keeps track of which ones are - * currently in use by a query state. - */ -typedef struct { - Array(CaptureList) list; - CaptureList empty_list; - // The maximum number of capture lists that we are allowed to allocate. We - // never allow `list` to allocate more entries than this, dropping pending - // matches if needed to stay under the limit. - uint32_t max_capture_list_count; - // The number of capture lists allocated in `list` that are not currently in - // use. We reuse those existing-but-unused capture lists before trying to - // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture - // list's length to indicate that it's not in use. - uint32_t free_capture_list_count; -} CaptureListPool; - -/* - * AnalysisState - The state needed for walking the parse table when analyzing - * a query pattern, to determine at which steps the pattern might fail to match. - */ -typedef struct { - t_state_id parse_state; - t_symbol parent_symbol; - uint16_t child_index; - t_field_id field_id: 15; - bool done: 1; -} AnalysisStateEntry; - -typedef struct { - AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; - uint16_t depth; - uint16_t step_index; - t_symbol root_symbol; -} AnalysisState; - -typedef Array(AnalysisState *) AnalysisStateSet; - -typedef struct { - AnalysisStateSet states; - AnalysisStateSet next_states; - AnalysisStateSet deeper_states; - AnalysisStateSet state_pool; - Array(uint16_t) final_step_indices; - Array(t_symbol) finished_parent_symbols; - bool did_abort; -} QueryAnalysis; - -/* - * AnalysisSubgraph - A subset of the states in the parse table that are used - * in constructing nodes with a certain symbol. Each state is accompanied by - * some information about the possible node that could be produced in - * downstream states. - */ -typedef struct { - t_state_id state; - uint16_t production_id; - uint8_t child_index: 7; - bool done: 1; -} AnalysisSubgraphNode; - -typedef struct { - t_symbol symbol; - Array(t_state_id) start_states; - Array(AnalysisSubgraphNode) nodes; -} AnalysisSubgraph; - -typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; - -/* - * StatePredecessorMap - A map that stores the predecessors of each parse state. - * This is used during query analysis to determine which parse states can lead - * to which reduce actions. - */ -typedef struct { - t_state_id *contents; -} StatePredecessorMap; - -/* - * TSQuery - A tree query, compiled from a string of S-expressions. The query - * itself is immutable. The mutable state used in the process of executing the - * query is stored in a `TSQueryCursor`. - */ -struct t_query { - SymbolTable captures; - SymbolTable predicate_values; - Array(CaptureQuantifiers) capture_quantifiers; - Array(QueryStep) steps; - Array(PatternEntry) pattern_map; - Array(t_query_predicate_step) predicate_steps; - Array(QueryPattern) patterns; - Array(StepOffset) step_offsets; - Array(t_field_id) negated_fields; - Array(char) string_buffer; - Array(t_symbol) repeat_symbols_with_rootless_patterns; - const t_language *language; - uint16_t wildcard_root_pattern_count; -}; - -/* - * TSQueryCursor - A stateful struct used to execute a query on a tree. - */ -struct t_query_cursor { - const t_query *query; - t_tree_cursor cursor; - Array(QueryState) states; - Array(QueryState) finished_states; - CaptureListPool capture_list_pool; - uint32_t depth; - uint32_t max_start_depth; - uint32_t start_byte; - uint32_t end_byte; - t_point start_point; - t_point end_point; - uint32_t next_state_id; - bool on_visible_node; - bool ascending; - bool halted; - bool did_exceed_match_limit; -}; - -static const t_query_error PARENT_DONE = -1; -static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; -static const uint16_t NONE = UINT16_MAX; -static const t_symbol WILDCARD_SYMBOL = 0; - -/********** - * Stream - **********/ - -// Advance to the next unicode code point in the stream. -static bool stream_advance(Stream *self) { - self->input += self->next_size; - if (self->input < self->end) { - uint32_t size = ts_decode_ascii( - (const uint8_t *)self->input, - (uint32_t)(self->end - self->input), - &self->next - ); - if (size > 0) { - self->next_size = size; - return true; - } - } else { - self->next_size = 0; - self->next = '\0'; - } - return false; -} - -// Reset the stream to the given input position, represented as a pointer -// into the input string. -static void stream_reset(Stream *self, const char *input) { - self->input = input; - self->next_size = 0; - stream_advance(self); -} - -static Stream stream_new(const char *string, uint32_t length) { - Stream self = { - .next = 0, - .input = string, - .start = string, - .end = string + length, - }; - stream_advance(&self); - return self; -} - -static void stream_skip_whitespace(Stream *self) { - for (;;) { - if (iswspace(self->next)) { - stream_advance(self); - } else if (self->next == ';') { - // skip over comments - stream_advance(self); - while (self->next && self->next != '\n') { - if (!stream_advance(self)) break; - } - } else { - break; - } - } -} - -static bool stream_is_ident_start(Stream *self) { - return iswalnum(self->next) || self->next == '_' || self->next == '-'; -} - -static void stream_scan_identifier(Stream *stream) { - do { - stream_advance(stream); - } while ( - iswalnum(stream->next) || - stream->next == '_' || - stream->next == '-' || - stream->next == '.' || - stream->next == '?' || - stream->next == '!' - ); -} - -static uint32_t stream_offset(Stream *self) { - return (uint32_t)(self->input - self->start); -} - -/****************** - * CaptureListPool - ******************/ - -static CaptureListPool capture_list_pool_new(void) { - return (CaptureListPool) { - .list = array_new(), - .empty_list = array_new(), - .max_capture_list_count = UINT32_MAX, - .free_capture_list_count = 0, - }; -} - -static void capture_list_pool_reset(CaptureListPool *self) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - // This invalid size means that the list is not in use. - self->list.contents[i].size = UINT32_MAX; - } - self->free_capture_list_count = self->list.size; -} - -static void capture_list_pool_delete(CaptureListPool *self) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - array_delete(&self->list.contents[i]); - } - array_delete(&self->list); -} - -static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { - if (id >= self->list.size) return &self->empty_list; - return &self->list.contents[id]; -} - -static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { - assert(id < self->list.size); - return &self->list.contents[id]; -} - -static bool capture_list_pool_is_empty(const CaptureListPool *self) { - // The capture list pool is empty if all allocated lists are in use, and we - // have reached the maximum allowed number of allocated lists. - return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; -} - -static uint16_t capture_list_pool_acquire(CaptureListPool *self) { - // First see if any already allocated capture list is currently unused. - if (self->free_capture_list_count > 0) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - if (self->list.contents[i].size == UINT32_MAX) { - array_clear(&self->list.contents[i]); - self->free_capture_list_count--; - return i; - } - } - } - - // Otherwise allocate and initialize a new capture list, as long as that - // doesn't put us over the requested maximum. - uint32_t i = self->list.size; - if (i >= self->max_capture_list_count) { - return NONE; - } - CaptureList list; - array_init(&list); - array_push(&self->list, list); - return i; -} - -static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { - if (id >= self->list.size) return; - self->list.contents[id].size = UINT32_MAX; - self->free_capture_list_count++; -} - -/************** - * Quantifiers - **************/ - -static t_quantifier quantifier_mul( - t_quantifier left, - t_quantifier right -) { - switch (left) - { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierZeroOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierOne: - return right; - case TSQuantifierOneOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -static t_quantifier quantifier_join( - t_quantifier left, - t_quantifier right -) { - switch (left) - { - case TSQuantifierZero: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - break; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - break; - }; - break; - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - return TSQuantifierOne; - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOneOrMore: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -static t_quantifier quantifier_add( - t_quantifier left, - t_quantifier right -) { - switch (left) - { - case TSQuantifierZero: - return right; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierZeroOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZeroOrMore; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierOne; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -// Create new capture quantifiers structure -static CaptureQuantifiers capture_quantifiers_new(void) { - return (CaptureQuantifiers) array_new(); -} - -// Delete capture quantifiers structure -static void capture_quantifiers_delete( - CaptureQuantifiers *self -) { - array_delete(self); -} - -// Clear capture quantifiers structure -static void capture_quantifiers_clear( - CaptureQuantifiers *self -) { - array_clear(self); -} - -// Replace capture quantifiers with the given quantifiers -static void capture_quantifiers_replace( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - array_clear(self); - array_push_all(self, quantifiers); -} - -// Return capture quantifier for the given capture id -static t_quantifier capture_quantifier_for_id( - const CaptureQuantifiers *self, - uint16_t id -) { - return (self->size <= id) ? TSQuantifierZero : (t_quantifier) *array_get(self, id); -} - -// Add the given quantifier to the current value for id -static void capture_quantifiers_add_for_id( - CaptureQuantifiers *self, - uint16_t id, - t_quantifier quantifier -) { - if (self->size <= id) { - array_grow_by(self, id + 1 - self->size); - } - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_add((t_quantifier) *own_quantifier, quantifier); -} - -// Point-wise add the given quantifiers to the current values -static void capture_quantifiers_add_all( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - if (self->size < quantifiers->size) { - array_grow_by(self, quantifiers->size - self->size); - } - for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { - uint8_t *quantifier = array_get(quantifiers, id); - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_add((t_quantifier) *own_quantifier, (t_quantifier) *quantifier); - } -} - -// Join the given quantifier with the current values -static void capture_quantifiers_mul( - CaptureQuantifiers *self, - t_quantifier quantifier -) { - for (uint16_t id = 0; id < (uint16_t)self->size; id++) { - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_mul((t_quantifier) *own_quantifier, quantifier); - } -} - -// Point-wise join the quantifiers from a list of alternatives with the current values -static void capture_quantifiers_join_all( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - if (self->size < quantifiers->size) { - array_grow_by(self, quantifiers->size - self->size); - } - for (uint32_t id = 0; id < quantifiers->size; id++) { - uint8_t *quantifier = array_get(quantifiers, id); - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_join((t_quantifier) *own_quantifier, (t_quantifier) *quantifier); - } - for (uint32_t id = quantifiers->size; id < self->size; id++) { - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_join((t_quantifier) *own_quantifier, TSQuantifierZero); - } -} - -/************** - * SymbolTable - **************/ - -static SymbolTable symbol_table_new(void) { - return (SymbolTable) { - .characters = array_new(), - .slices = array_new(), - }; -} - -static void symbol_table_delete(SymbolTable *self) { - array_delete(&self->characters); - array_delete(&self->slices); -} - -static int symbol_table_id_for_name( - const SymbolTable *self, - const char *name, - uint32_t length -) { - for (unsigned i = 0; i < self->slices.size; i++) { - Slice slice = self->slices.contents[i]; - if ( - slice.length == length && - !strncmp(&self->characters.contents[slice.offset], name, length) - ) return i; - } - return -1; -} - -static const char *symbol_table_name_for_id( - const SymbolTable *self, - uint16_t id, - uint32_t *length -) { - Slice slice = self->slices.contents[id]; - *length = slice.length; - return &self->characters.contents[slice.offset]; -} - -static uint16_t symbol_table_insert_name( - SymbolTable *self, - const char *name, - uint32_t length -) { - int id = symbol_table_id_for_name(self, name, length); - if (id >= 0) return (uint16_t)id; - Slice slice = { - .offset = self->characters.size, - .length = length, - }; - array_grow_by(&self->characters, length + 1); - memcpy(&self->characters.contents[slice.offset], name, length); - self->characters.contents[self->characters.size - 1] = 0; - array_push(&self->slices, slice); - return self->slices.size - 1; -} - -/************ - * QueryStep - ************/ - -static QueryStep query_step__new( - t_symbol symbol, - uint16_t depth, - bool is_immediate -) { - QueryStep step = { - .symbol = symbol, - .depth = depth, - .field = 0, - .alternative_index = NONE, - .negated_field_list_id = 0, - .contains_captures = false, - .is_last_child = false, - .is_named = false, - .is_pass_through = false, - .is_dead_end = false, - .root_pattern_guaranteed = false, - .is_immediate = is_immediate, - .alternative_is_immediate = false, - }; - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - step.capture_ids[i] = NONE; - } - return step; -} - -static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - if (self->capture_ids[i] == NONE) { - self->capture_ids[i] = capture_id; - break; - } - } -} - -static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - if (self->capture_ids[i] == capture_id) { - self->capture_ids[i] = NONE; - while (i + 1 < MAX_STEP_CAPTURE_COUNT) { - if (self->capture_ids[i + 1] == NONE) break; - self->capture_ids[i] = self->capture_ids[i + 1]; - self->capture_ids[i + 1] = NONE; - i++; - } - break; - } - } -} - -/********************** - * StatePredecessorMap - **********************/ - -static inline StatePredecessorMap state_predecessor_map_new( - const t_language *language -) { - return (StatePredecessorMap) { - .contents = ts_calloc( - (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), - sizeof(t_state_id) - ), - }; -} - -static inline void state_predecessor_map_delete(StatePredecessorMap *self) { - ts_free(self->contents); -} - -static inline void state_predecessor_map_add( - StatePredecessorMap *self, - t_state_id state, - t_state_id predecessor -) { - size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - t_state_id *count = &self->contents[index]; - if ( - *count == 0 || - (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) - ) { - (*count)++; - self->contents[index + *count] = predecessor; - } -} - -static inline const t_state_id *state_predecessor_map_get( - const StatePredecessorMap *self, - t_state_id state, - unsigned *count -) { - size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - *count = self->contents[index]; - return &self->contents[index + 1]; -} - -/**************** - * AnalysisState - ****************/ - -static unsigned analysis_state__recursion_depth(const AnalysisState *self) { - unsigned result = 0; - for (unsigned i = 0; i < self->depth; i++) { - t_symbol symbol = self->stack[i].parent_symbol; - for (unsigned j = 0; j < i; j++) { - if (self->stack[j].parent_symbol == symbol) { - result++; - break; - } - } - } - return result; -} - -static inline int analysis_state__compare_position( - AnalysisState *const *self, - AnalysisState *const *other -) { - for (unsigned i = 0; i < (*self)->depth; i++) { - if (i >= (*other)->depth) return -1; - if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1; - if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1; - } - if ((*self)->depth < (*other)->depth) return 1; - if ((*self)->step_index < (*other)->step_index) return -1; - if ((*self)->step_index > (*other)->step_index) return 1; - return 0; -} - -static inline int analysis_state__compare( - AnalysisState *const *self, - AnalysisState *const *other -) { - int result = analysis_state__compare_position(self, other); - if (result != 0) return result; - for (unsigned i = 0; i < (*self)->depth; i++) { - if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1; - if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1; - if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1; - if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1; - if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1; - if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1; - } - return 0; -} - -static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { - if (self->depth == 0) { - return &self->stack[0]; - } - return &self->stack[self->depth - 1]; -} - -static inline bool analysis_state__has_supertype(AnalysisState *self, t_symbol symbol) { - for (unsigned i = 0; i < self->depth; i++) { - if (self->stack[i].parent_symbol == symbol) return true; - } - return false; -} - -/****************** - * AnalysisStateSet - ******************/ - -// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by -// cloning one from scratch. -static inline AnalysisState *analysis_state_pool__clone_or_reuse( - AnalysisStateSet *self, - AnalysisState *borrowed_item -) { - AnalysisState *new_item; - if (self->size) { - new_item = array_pop(self); - } else { - new_item = ts_malloc(sizeof(AnalysisState)); - } - *new_item = *borrowed_item; - return new_item; -} - -// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this -// set. The set does not contain duplicates, so if the item is already present, it will not be -// inserted, and no clone will be made. -// -// The caller retains ownership of the passed-in memory. However, the clone that is created by this -// function will be managed by the state set. -static inline void analysis_state_set__insert_sorted( - AnalysisStateSet *self, - AnalysisStateSet *pool, - AnalysisState *borrowed_item -) { - unsigned index, exists; - array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); - if (!exists) { - AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); - array_insert(self, index, new_item); - } -} - -// Inserts a clone of the passed-in item at the end position of this list. -// -// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function -// `analysis_state__compare`) than largest item already in this set. If items are inserted in the -// wrong order, the set will not function properly for future use. -// -// The caller retains ownership of the passed-in memory. However, the clone that is created by this -// function will be managed by the state set. -static inline void analysis_state_set__push( - AnalysisStateSet *self, - AnalysisStateSet *pool, - AnalysisState *borrowed_item -) { - AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); - array_push(self, new_item); -} - -// Removes all items from this set, returning it to an empty state. -static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { - array_push_all(pool, self); - array_clear(self); -} - -// Releases all memory that is managed with this state set, including any items currently present. -// After calling this function, the set is no longer suitable for use. -static inline void analysis_state_set__delete(AnalysisStateSet *self) { - for (unsigned i = 0; i < self->size; i++) { - ts_free(self->contents[i]); - } - array_delete(self); -} - -/**************** - * QueryAnalyzer - ****************/ - -static inline QueryAnalysis query_analysis__new(void) { - return (QueryAnalysis) { - .states = array_new(), - .next_states = array_new(), - .deeper_states = array_new(), - .state_pool = array_new(), - .final_step_indices = array_new(), - .finished_parent_symbols = array_new(), - .did_abort = false, - }; -} - -static inline void query_analysis__delete(QueryAnalysis *self) { - analysis_state_set__delete(&self->states); - analysis_state_set__delete(&self->next_states); - analysis_state_set__delete(&self->deeper_states); - analysis_state_set__delete(&self->state_pool); - array_delete(&self->final_step_indices); - array_delete(&self->finished_parent_symbols); -} - -/*********************** - * AnalysisSubgraphNode - ***********************/ - -static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { - if (self->state < other->state) return -1; - if (self->state > other->state) return 1; - if (self->child_index < other->child_index) return -1; - if (self->child_index > other->child_index) return 1; - if (self->done < other->done) return -1; - if (self->done > other->done) return 1; - if (self->production_id < other->production_id) return -1; - if (self->production_id > other->production_id) return 1; - return 0; -} - -/********* - * Query - *********/ - -// The `pattern_map` contains a mapping from TSSymbol values to indices in the -// `steps` array. For a given syntax node, the `pattern_map` makes it possible -// to quickly find the starting steps of all of the patterns whose root matches -// that node. Each entry has two fields: a `pattern_index`, which identifies one -// of the patterns in the query, and a `step_index`, which indicates the start -// offset of that pattern's steps within the `steps` array. -// -// The entries are sorted by the patterns' root symbols, and lookups use a -// binary search. This ensures that the cost of this initial lookup step -// scales logarithmically with the number of patterns in the query. -// -// This returns `true` if the symbol is present and `false` otherwise. -// If the symbol is not present `*result` is set to the index where the -// symbol should be inserted. -static inline bool ts_query__pattern_map_search( - const t_query *self, - t_symbol needle, - uint32_t *result -) { - uint32_t base_index = self->wildcard_root_pattern_count; - uint32_t size = self->pattern_map.size - base_index; - if (size == 0) { - *result = base_index; - return false; - } - while (size > 1) { - uint32_t half_size = size / 2; - uint32_t mid_index = base_index + half_size; - t_symbol mid_symbol = self->steps.contents[ - self->pattern_map.contents[mid_index].step_index - ].symbol; - if (needle > mid_symbol) base_index = mid_index; - size -= half_size; - } - - t_symbol symbol = self->steps.contents[ - self->pattern_map.contents[base_index].step_index - ].symbol; - - if (needle > symbol) { - base_index++; - if (base_index < self->pattern_map.size) { - symbol = self->steps.contents[ - self->pattern_map.contents[base_index].step_index - ].symbol; - } - } - - *result = base_index; - return needle == symbol; -} - -// Insert a new pattern's start index into the pattern map, maintaining -// the pattern map's ordering invariant. -static inline void ts_query__pattern_map_insert( - t_query *self, - t_symbol symbol, - PatternEntry new_entry -) { - uint32_t index; - ts_query__pattern_map_search(self, symbol, &index); - - // Ensure that the entries are sorted not only by symbol, but also - // by pattern_index. This way, states for earlier patterns will be - // initiated first, which allows the ordering of the states array - // to be maintained more efficiently. - while (index < self->pattern_map.size) { - PatternEntry *entry = &self->pattern_map.contents[index]; - if ( - self->steps.contents[entry->step_index].symbol == symbol && - entry->pattern_index < new_entry.pattern_index - ) { - index++; - } else { - break; - } - } - - array_insert(&self->pattern_map, index, new_entry); -} - -// Walk the subgraph for this non-terminal, tracking all of the possible -// sequences of progress within the pattern. -static void ts_query__perform_analysis( - t_query *self, - const AnalysisSubgraphArray *subgraphs, - QueryAnalysis *analysis -) { - unsigned recursion_depth_limit = 0; - unsigned prev_final_step_count = 0; - array_clear(&analysis->final_step_indices); - array_clear(&analysis->finished_parent_symbols); - - for (unsigned iteration = 0;; iteration++) { - if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { - analysis->did_abort = true; - break; - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("Iteration: %u. Final step indices:", iteration); - for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { - printf(" %4u", analysis->final_step_indices.contents[j]); - } - printf("\n"); - for (unsigned j = 0; j < analysis->states.size; j++) { - AnalysisState *state = analysis->states.contents[j]; - printf(" %3u: step: %u, stack: [", j, state->step_index); - for (unsigned k = 0; k < state->depth; k++) { - printf( - " {%s, child: %u, state: %4u", - self->language->symbol_names[state->stack[k].parent_symbol], - state->stack[k].child_index, - state->stack[k].parse_state - ); - if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); - if (state->stack[k].done) printf(", DONE"); - printf("}"); - } - printf(" ]\n"); - } - #endif - - // If no further progress can be made within the current recursion depth limit, then - // bump the depth limit by one, and continue to process the states the exceeded the - // limit. But only allow this if progress has been made since the last time the depth - // limit was increased. - if (analysis->states.size == 0) { - if ( - analysis->deeper_states.size > 0 && - analysis->final_step_indices.size > prev_final_step_count - ) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); - #endif - - prev_final_step_count = analysis->final_step_indices.size; - recursion_depth_limit++; - AnalysisStateSet _states = analysis->states; - analysis->states = analysis->deeper_states; - analysis->deeper_states = _states; - continue; - } - - break; - } - - analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); - for (unsigned j = 0; j < analysis->states.size; j++) { - AnalysisState * const state = analysis->states.contents[j]; - - // For efficiency, it's important to avoid processing the same analysis state more - // than once. To achieve this, keep the states in order of ascending position within - // their hypothetical syntax trees. In each iteration of this loop, start by advancing - // the states that have made the least progress. Avoid advancing states that have already - // made more progress. - if (analysis->next_states.size > 0) { - int comparison = analysis_state__compare_position( - &state, - array_back(&analysis->next_states) - ); - if (comparison == 0) { - analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); - continue; - } else if (comparison > 0) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Terminate iteration at state %u\n", j); - #endif - while (j < analysis->states.size) { - analysis_state_set__push( - &analysis->next_states, - &analysis->state_pool, - analysis->states.contents[j] - ); - j++; - } - break; - } - } - - const t_state_id parse_state = analysis_state__top(state)->parse_state; - const t_symbol parent_symbol = analysis_state__top(state)->parent_symbol; - const t_field_id parent_field_id = analysis_state__top(state)->field_id; - const unsigned child_index = analysis_state__top(state)->child_index; - const QueryStep * const step = &self->steps.contents[state->step_index]; - - unsigned subgraph_index, exists; - array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) continue; - const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index]; - - // Follow every possible path in the parse table, but only visit states that - // are part of the subgraph for the current symbol. - LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); - while (ts_lookahead_iterator__next(&lookahead_iterator)) { - t_symbol sym = lookahead_iterator.symbol; - - AnalysisSubgraphNode successor = { - .state = parse_state, - .child_index = child_index, - }; - if (lookahead_iterator.action_count) { - const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; - if (action->type == TSParseActionTypeShift) { - if (!action->shift.extra) { - successor.state = action->shift.state; - successor.child_index++; - } - } else { - continue; - } - } else if (lookahead_iterator.next_state != 0) { - successor.state = lookahead_iterator.next_state; - successor.child_index++; - } else { - continue; - } - - unsigned node_index; - array_search_sorted_with( - &subgraph->nodes, - analysis_subgraph_node__compare, &successor, - &node_index, &exists - ); - while (node_index < subgraph->nodes.size) { - AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; - if (node->state != successor.state || node->child_index != successor.child_index) break; - - // Use the subgraph to determine what alias and field will eventually be applied - // to this child node. - t_symbol alias = ts_language_alias_at(self->language, node->production_id, child_index); - t_symbol visible_symbol = alias - ? alias - : self->language->symbol_metadata[sym].visible - ? self->language->public_symbol_map[sym] - : 0; - t_field_id field_id = parent_field_id; - if (!field_id) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); - for (; field_map != field_map_end; field_map++) { - if (!field_map->inherited && field_map->child_index == child_index) { - field_id = field_map->field_id; - break; - } - } - } - - // Create a new state that has advanced past this hypothetical subtree. - AnalysisState next_state = *state; - AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); - next_state_top->child_index = successor.child_index; - next_state_top->parse_state = successor.state; - if (node->done) next_state_top->done = true; - - // Determine if this hypothetical child node would match the current step - // of the query pattern. - bool does_match = false; - if (visible_symbol) { - does_match = true; - if (step->symbol == WILDCARD_SYMBOL) { - if ( - step->is_named && - !self->language->symbol_metadata[visible_symbol].named - ) does_match = false; - } else if (step->symbol != visible_symbol) { - does_match = false; - } - if (step->field && step->field != field_id) { - does_match = false; - } - if ( - step->supertype_symbol && - !analysis_state__has_supertype(state, step->supertype_symbol) - ) does_match = false; - } - - // If this child is hidden, then descend into it and walk through its children. - // If the top entry of the stack is at the end of its rule, then that entry can - // be replaced. Otherwise, push a new entry onto the stack. - else if (sym >= self->language->token_count) { - if (!next_state_top->done) { - if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Exceeded depth limit for state %u\n", j); - #endif - - analysis->did_abort = true; - continue; - } - - next_state.depth++; - next_state_top = analysis_state__top(&next_state); - } - - *next_state_top = (AnalysisStateEntry) { - .parse_state = parse_state, - .parent_symbol = sym, - .child_index = 0, - .field_id = field_id, - .done = false, - }; - - if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { - analysis_state_set__insert_sorted( - &analysis->deeper_states, - &analysis->state_pool, - &next_state - ); - continue; - } - } - - // Pop from the stack when this state reached the end of its current syntax node. - while (next_state.depth > 0 && next_state_top->done) { - next_state.depth--; - next_state_top = analysis_state__top(&next_state); - } - - // If this hypothetical child did match the current step of the query pattern, - // then advance to the next step at the current depth. This involves skipping - // over any descendant steps of the current child. - const QueryStep *next_step = step; - if (does_match) { - for (;;) { - next_state.step_index++; - next_step = &self->steps.contents[next_state.step_index]; - if ( - next_step->depth == PATTERN_DONE_MARKER || - next_step->depth <= step->depth - ) break; - } - } else if (successor.state == parse_state) { - continue; - } - - for (;;) { - // Skip pass-through states. Although these states have alternatives, they are only - // used to implement repetitions, and query analysis does not need to process - // repetitions in order to determine whether steps are possible and definite. - if (next_step->is_pass_through) { - next_state.step_index++; - next_step++; - continue; - } - - // If the pattern is finished or hypothetical parent node is complete, then - // record that matching can terminate at this step of the pattern. Otherwise, - // add this state to the list of states to process on the next iteration. - if (!next_step->is_dead_end) { - bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; - if (did_finish_pattern) { - array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); - } else if (next_state.depth == 0) { - array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); - } else { - analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); - } - } - - // If the state has advanced to a step with an alternative step, then add another state - // at that alternative step. This process is simpler than the process of actually matching a - // pattern during query execution, because for the purposes of query analysis, there is no - // need to process repetitions. - if ( - does_match && - next_step->alternative_index != NONE && - next_step->alternative_index > next_state.step_index - ) { - next_state.step_index = next_step->alternative_index; - next_step = &self->steps.contents[next_state.step_index]; - } else { - break; - } - } - } - } - } - - AnalysisStateSet _states = analysis->states; - analysis->states = analysis->next_states; - analysis->next_states = _states; - } -} - -static bool ts_query__analyze_patterns(t_query *self, unsigned *error_offset) { - Array(uint16_t) non_rooted_pattern_start_steps = array_new(); - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *pattern = &self->pattern_map.contents[i]; - if (!pattern->is_rooted) { - QueryStep *step = &self->steps.contents[pattern->step_index]; - if (step->symbol != WILDCARD_SYMBOL) { - array_push(&non_rooted_pattern_start_steps, i); - } - } - } - - // Walk forward through all of the steps in the query, computing some - // basic information about each step. Mark all of the steps that contain - // captures, and record the indices of all of the steps that have child steps. - Array(uint32_t) parent_step_indices = array_new(); - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) { - step->parent_pattern_guaranteed = true; - step->root_pattern_guaranteed = true; - continue; - } - - bool has_children = false; - bool is_wildcard = step->symbol == WILDCARD_SYMBOL; - step->contains_captures = step->capture_ids[0] != NONE; - for (unsigned j = i + 1; j < self->steps.size; j++) { - QueryStep *next_step = &self->steps.contents[j]; - if ( - next_step->depth == PATTERN_DONE_MARKER || - next_step->depth <= step->depth - ) break; - if (next_step->capture_ids[0] != NONE) { - step->contains_captures = true; - } - if (!is_wildcard) { - next_step->root_pattern_guaranteed = true; - next_step->parent_pattern_guaranteed = true; - } - has_children = true; - } - - if (has_children && !is_wildcard) { - array_push(&parent_step_indices, i); - } - } - - // For every parent symbol in the query, initialize an 'analysis subgraph'. - // This subgraph lists all of the states in the parse table that are directly - // involved in building subtrees for this symbol. - // - // In addition to the parent symbols in the query, construct subgraphs for all - // of the hidden symbols in the grammar, because these might occur within - // one of the parent nodes, such that their children appear to belong to the - // parent. - AnalysisSubgraphArray subgraphs = array_new(); - for (unsigned i = 0; i < parent_step_indices.size; i++) { - uint32_t parent_step_index = parent_step_indices.contents[i]; - t_symbol parent_symbol = self->steps.contents[parent_step_index].symbol; - AnalysisSubgraph subgraph = { .symbol = parent_symbol }; - array_insert_sorted_by(&subgraphs, .symbol, subgraph); - } - for (t_symbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { - if (!ts_language_symbol_metadata(self->language, sym).visible) { - AnalysisSubgraph subgraph = { .symbol = sym }; - array_insert_sorted_by(&subgraphs, .symbol, subgraph); - } - } - - // Scan the parse table to find the data needed to populate these subgraphs. - // Collect three things during this scan: - // 1) All of the parse states where one of these symbols can start. - // 2) All of the parse states where one of these symbols can end, along - // with information about the node that would be created. - // 3) A list of predecessor states for each state. - StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); - for (t_state_id state = 1; state < (uint16_t)self->language->state_count; state++) { - unsigned subgraph_index, exists; - LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); - while (ts_lookahead_iterator__next(&lookahead_iterator)) { - if (lookahead_iterator.action_count) { - for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { - const TSParseAction *action = &lookahead_iterator.actions[i]; - if (action->type == TSParseActionTypeReduce) { - const t_symbol *aliases, *aliases_end; - ts_language_aliases_for_symbol( - self->language, - action->reduce.symbol, - &aliases, - &aliases_end - ); - for (const t_symbol *symbol = aliases; symbol < aliases_end; symbol++) { - array_search_sorted_by( - &subgraphs, - .symbol, - *symbol, - &subgraph_index, - &exists - ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { - array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { - .state = state, - .production_id = action->reduce.production_id, - .child_index = action->reduce.child_count, - .done = true, - })); - } - } - } - } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { - t_state_id next_state = action->shift.state; - state_predecessor_map_add(&predecessor_map, next_state, state); - } - } - } else if (lookahead_iterator.next_state != 0) { - if (lookahead_iterator.next_state != state) { - state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); - } - if (ts_language_state_is_primary(self->language, state)) { - const t_symbol *aliases, *aliases_end; - ts_language_aliases_for_symbol( - self->language, - lookahead_iterator.symbol, - &aliases, - &aliases_end - ); - for (const t_symbol *symbol = aliases; symbol < aliases_end; symbol++) { - array_search_sorted_by( - &subgraphs, - .symbol, - *symbol, - &subgraph_index, - &exists - ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if ( - subgraph->start_states.size == 0 || - *array_back(&subgraph->start_states) != state - ) - array_push(&subgraph->start_states, state); - } - } - } - } - } - } - - // For each subgraph, compute the preceding states by walking backward - // from the end states using the predecessor map. - Array(AnalysisSubgraphNode) next_nodes = array_new(); - for (unsigned i = 0; i < subgraphs.size; i++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[i]; - if (subgraph->nodes.size == 0) { - array_delete(&subgraph->start_states); - array_erase(&subgraphs, i); - i--; - continue; - } - array_assign(&next_nodes, &subgraph->nodes); - while (next_nodes.size > 0) { - AnalysisSubgraphNode node = array_pop(&next_nodes); - if (node.child_index > 1) { - unsigned predecessor_count; - const t_state_id *predecessors = state_predecessor_map_get( - &predecessor_map, - node.state, - &predecessor_count - ); - for (unsigned j = 0; j < predecessor_count; j++) { - AnalysisSubgraphNode predecessor_node = { - .state = predecessors[j], - .child_index = node.child_index - 1, - .production_id = node.production_id, - .done = false, - }; - unsigned index, exists; - array_search_sorted_with( - &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, - &index, &exists - ); - if (!exists) { - array_insert(&subgraph->nodes, index, predecessor_node); - array_push(&next_nodes, predecessor_node); - } - } - } - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("\nSubgraphs:\n"); - for (unsigned i = 0; i < subgraphs.size; i++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[i]; - printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); - for (unsigned j = 0; j < subgraph->start_states.size; j++) { - printf( - " {state: %u}\n", - subgraph->start_states.contents[j] - ); - } - for (unsigned j = 0; j < subgraph->nodes.size; j++) { - AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; - printf( - " {state: %u, child_index: %u, production_id: %u, done: %d}\n", - node->state, node->child_index, node->production_id, node->done - ); - } - printf("\n"); - } - #endif - - // For each non-terminal pattern, determine if the pattern can successfully match, - // and identify all of the possible children within the pattern where matching could fail. - bool all_patterns_are_valid = true; - QueryAnalysis analysis = query_analysis__new(); - for (unsigned i = 0; i < parent_step_indices.size; i++) { - uint16_t parent_step_index = parent_step_indices.contents[i]; - uint16_t parent_depth = self->steps.contents[parent_step_index].depth; - t_symbol parent_symbol = self->steps.contents[parent_step_index].symbol; - if (parent_symbol == ts_builtin_sym_error) continue; - - // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's - // root symbol is a terminal, then return an error. - unsigned subgraph_index, exists; - array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) { - unsigned first_child_step_index = parent_step_index + 1; - uint32_t j, child_exists; - array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); - assert(child_exists); - *error_offset = self->step_offsets.contents[j].byte_offset; - all_patterns_are_valid = false; - break; - } - - // Initialize an analysis state at every parse state in the table where - // this parent symbol can occur. - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - analysis_state_set__clear(&analysis.states, &analysis.state_pool); - analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); - for (unsigned j = 0; j < subgraph->start_states.size; j++) { - t_state_id parse_state = subgraph->start_states.contents[j]; - analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { - .step_index = parent_step_index + 1, - .stack = { - [0] = { - .parse_state = parse_state, - .parent_symbol = parent_symbol, - .child_index = 0, - .field_id = 0, - .done = false, - }, - }, - .depth = 1, - .root_symbol = parent_symbol, - })); - } - - #ifdef DEBUG_ANALYZE_QUERY - printf( - "\nWalk states for %s:\n", - ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol) - ); - #endif - - analysis.did_abort = false; - ts_query__perform_analysis(self, &subgraphs, &analysis); - - // If this pattern could not be fully analyzed, then every step should - // be considered fallible. - if (analysis.did_abort) { - for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { - QueryStep *step = &self->steps.contents[j]; - if ( - step->depth <= parent_depth || - step->depth == PATTERN_DONE_MARKER - ) break; - if (!step->is_dead_end) { - step->parent_pattern_guaranteed = false; - step->root_pattern_guaranteed = false; - } - } - continue; - } - - // If this pattern cannot match, store the pattern index so that it can be - // returned to the caller. - if (analysis.finished_parent_symbols.size == 0) { - assert(analysis.final_step_indices.size > 0); - uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); - uint32_t j, impossible_exists; - array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); - if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; - *error_offset = self->step_offsets.contents[j].byte_offset; - all_patterns_are_valid = false; - break; - } - - // Mark as fallible any step where a match terminated. - // Later, this property will be propagated to all of the step's predecessors. - for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { - uint32_t final_step_index = analysis.final_step_indices.contents[j]; - QueryStep *step = &self->steps.contents[final_step_index]; - if ( - step->depth != PATTERN_DONE_MARKER && - step->depth > parent_depth && - !step->is_dead_end - ) { - step->parent_pattern_guaranteed = false; - step->root_pattern_guaranteed = false; - } - } - } - - // Mark as indefinite any step with captures that are used in predicates. - Array(uint16_t) predicate_capture_ids = array_new(); - for (unsigned i = 0; i < self->patterns.size; i++) { - QueryPattern *pattern = &self->patterns.contents[i]; - - // Gather all of the captures that are used in predicates for this pattern. - array_clear(&predicate_capture_ids); - for ( - unsigned start = pattern->predicate_steps.offset, - end = start + pattern->predicate_steps.length, - j = start; j < end; j++ - ) { - t_query_predicate_step *step = &self->predicate_steps.contents[j]; - if (step->type == TSQueryPredicateStepTypeCapture) { - uint16_t value_id = step->value_id; - array_insert_sorted_by(&predicate_capture_ids, , value_id); - } - } - - // Find all of the steps that have these captures. - for ( - unsigned start = pattern->steps.offset, - end = start + pattern->steps.length, - j = start; j < end; j++ - ) { - QueryStep *step = &self->steps.contents[j]; - for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { - uint16_t capture_id = step->capture_ids[k]; - if (capture_id == NONE) break; - unsigned index, exists; - array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); - if (exists) { - step->root_pattern_guaranteed = false; - break; - } - } - } - } - - // Propagate fallibility. If a pattern is fallible at a given step, then it is - // fallible at all of its preceding steps. - bool done = self->steps.size == 0; - while (!done) { - done = true; - for (unsigned i = self->steps.size - 1; i > 0; i--) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) continue; - - // Determine if this step is definite or has definite alternatives. - bool parent_pattern_guaranteed = false; - for (;;) { - if (step->root_pattern_guaranteed) { - parent_pattern_guaranteed = true; - break; - } - if (step->alternative_index == NONE || step->alternative_index < i) { - break; - } - step = &self->steps.contents[step->alternative_index]; - } - - // If not, mark its predecessor as indefinite. - if (!parent_pattern_guaranteed) { - QueryStep *prev_step = &self->steps.contents[i - 1]; - if ( - !prev_step->is_dead_end && - prev_step->depth != PATTERN_DONE_MARKER && - prev_step->root_pattern_guaranteed - ) { - prev_step->root_pattern_guaranteed = false; - done = false; - } - } - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("Steps:\n"); - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) { - printf(" %u: DONE\n", i); - } else { - printf( - " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", - i, - (step->symbol == WILDCARD_SYMBOL) - ? "ANY" - : ts_language_symbol_name(self->language, step->symbol), - (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), - step->depth, - step->parent_pattern_guaranteed, - step->root_pattern_guaranteed - ); - } - } - #endif - - // Determine which repetition symbols in this language have the possibility - // of matching non-rooted patterns in this query. These repetition symbols - // prevent certain optimizations with range restrictions. - analysis.did_abort = false; - for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { - uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i]; - PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index]; - - analysis_state_set__clear(&analysis.states, &analysis.state_pool); - analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); - for (unsigned j = 0; j < subgraphs.size; j++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[j]; - TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); - if (metadata.visible || metadata.named) continue; - - for (uint32_t k = 0; k < subgraph->start_states.size; k++) { - t_state_id parse_state = subgraph->start_states.contents[k]; - analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { - .step_index = pattern_entry->step_index, - .stack = { - [0] = { - .parse_state = parse_state, - .parent_symbol = subgraph->symbol, - .child_index = 0, - .field_id = 0, - .done = false, - }, - }, - .root_symbol = subgraph->symbol, - .depth = 1, - })); - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); - #endif - - ts_query__perform_analysis( - self, - &subgraphs, - &analysis - ); - - if (analysis.finished_parent_symbols.size > 0) { - self->patterns.contents[pattern_entry->pattern_index].is_non_local = true; - } - - for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { - t_symbol symbol = analysis.finished_parent_symbols.contents[k]; - array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); - } - } - - #ifdef DEBUG_ANALYZE_QUERY - if (self->repeat_symbols_with_rootless_patterns.size > 0) { - printf("\nRepetition symbols with rootless patterns:\n"); - printf("aborted analysis: %d\n", analysis.did_abort); - for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { - TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; - printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); - } - printf("\n"); - } - #endif - - // Cleanup - for (unsigned i = 0; i < subgraphs.size; i++) { - array_delete(&subgraphs.contents[i].start_states); - array_delete(&subgraphs.contents[i].nodes); - } - array_delete(&subgraphs); - query_analysis__delete(&analysis); - array_delete(&next_nodes); - array_delete(&non_rooted_pattern_start_steps); - array_delete(&parent_step_indices); - array_delete(&predicate_capture_ids); - state_predecessor_map_delete(&predecessor_map); - - return all_patterns_are_valid; -} - -static void ts_query__add_negated_fields( - t_query *self, - uint16_t step_index, - t_field_id *field_ids, - uint16_t field_count -) { - QueryStep *step = &self->steps.contents[step_index]; - - // The negated field array stores a list of field lists, separated by zeros. - // Try to find the start index of an existing list that matches this new list. - bool failed_match = false; - unsigned match_count = 0; - unsigned start_i = 0; - for (unsigned i = 0; i < self->negated_fields.size; i++) { - t_field_id existing_field_id = self->negated_fields.contents[i]; - - // At each zero value, terminate the match attempt. If we've exactly - // matched the new field list, then reuse this index. Otherwise, - // start over the matching process. - if (existing_field_id == 0) { - if (match_count == field_count) { - step->negated_field_list_id = start_i; - return; - } else { - start_i = i + 1; - match_count = 0; - failed_match = false; - } - } - - // If the existing list matches our new list so far, then advance - // to the next element of the new list. - else if ( - match_count < field_count && - existing_field_id == field_ids[match_count] && - !failed_match - ) { - match_count++; - } - - // Otherwise, this existing list has failed to match. - else { - match_count = 0; - failed_match = true; - } - } - - step->negated_field_list_id = self->negated_fields.size; - array_extend(&self->negated_fields, field_count, field_ids); - array_push(&self->negated_fields, 0); -} - -static t_query_error ts_query__parse_string_literal( - t_query *self, - Stream *stream -) { - const char *string_start = stream->input; - if (stream->next != '"') return TSQueryErrorSyntax; - stream_advance(stream); - const char *prev_position = stream->input; - - bool is_escaped = false; - array_clear(&self->string_buffer); - for (;;) { - if (is_escaped) { - is_escaped = false; - switch (stream->next) { - case 'n': - array_push(&self->string_buffer, '\n'); - break; - case 'r': - array_push(&self->string_buffer, '\r'); - break; - case 't': - array_push(&self->string_buffer, '\t'); - break; - case '0': - array_push(&self->string_buffer, '\0'); - break; - default: - array_extend(&self->string_buffer, stream->next_size, stream->input); - break; - } - prev_position = stream->input + stream->next_size; - } else { - if (stream->next == '\\') { - array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); - prev_position = stream->input + 1; - is_escaped = true; - } else if (stream->next == '"') { - array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); - stream_advance(stream); - return TSQueryErrorNone; - } else if (stream->next == '\n') { - stream_reset(stream, string_start); - return TSQueryErrorSyntax; - } - } - if (!stream_advance(stream)) { - stream_reset(stream, string_start); - return TSQueryErrorSyntax; - } - } -} - -// Parse a single predicate associated with a pattern, adding it to the -// query's internal `predicate_steps` array. Predicates are arbitrary -// S-expressions associated with a pattern which are meant to be handled at -// a higher level of abstraction, such as the Rust/JavaScript bindings. They -// can contain '@'-prefixed capture names, double-quoted strings, and bare -// symbols, which also represent strings. -static t_query_error ts_query__parse_predicate( - t_query *self, - Stream *stream -) { - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *predicate_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - predicate_name); - uint16_t id = symbol_table_insert_name( - &self->predicate_values, - predicate_name, - length - ); - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeString, - .value_id = id, - })); - stream_skip_whitespace(stream); - - for (;;) { - if (stream->next == ')') { - stream_advance(stream); - stream_skip_whitespace(stream); - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeDone, - .value_id = 0, - })); - break; - } - - // Parse an '@'-prefixed capture name - else if (stream->next == '@') { - stream_advance(stream); - - // Parse the capture name - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *capture_name = stream->input; - stream_scan_identifier(stream); - uint32_t capture_length = (uint32_t)(stream->input - capture_name); - - // Add the capture id to the first step of the pattern - int capture_id = symbol_table_id_for_name( - &self->captures, - capture_name, - capture_length - ); - if (capture_id == -1) { - stream_reset(stream, capture_name); - return TSQueryErrorCapture; - } - - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeCapture, - .value_id = capture_id, - })); - } - - // Parse a string literal - else if (stream->next == '"') { - t_query_error e = ts_query__parse_string_literal(self, stream); - if (e) return e; - uint16_t query_id = symbol_table_insert_name( - &self->predicate_values, - self->string_buffer.contents, - self->string_buffer.size - ); - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeString, - .value_id = query_id, - })); - } - - // Parse a bare symbol - else if (stream_is_ident_start(stream)) { - const char *symbol_start = stream->input; - stream_scan_identifier(stream); - uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); - uint16_t query_id = symbol_table_insert_name( - &self->predicate_values, - symbol_start, - symbol_length - ); - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeString, - .value_id = query_id, - })); - } - - else { - return TSQueryErrorSyntax; - } - - stream_skip_whitespace(stream); - } - - return 0; -} - -// Read one S-expression pattern from the stream, and incorporate it into -// the query's internal state machine representation. For nested patterns, -// this function calls itself recursively. -// -// The caller is responsible for passing in a dedicated CaptureQuantifiers. -// These should not be shared between different calls to ts_query__parse_pattern! -static t_query_error ts_query__parse_pattern( - t_query *self, - Stream *stream, - uint32_t depth, - bool is_immediate, - CaptureQuantifiers *capture_quantifiers -) { - if (stream->next == 0) return TSQueryErrorSyntax; - if (stream->next == ')' || stream->next == ']') return PARENT_DONE; - - const uint32_t starting_step_index = self->steps.size; - - // Store the byte offset of each step in the query. - if ( - self->step_offsets.size == 0 || - array_back(&self->step_offsets)->step_index != starting_step_index - ) { - array_push(&self->step_offsets, ((StepOffset) { - .step_index = starting_step_index, - .byte_offset = stream_offset(stream), - })); - } - - // An open bracket is the start of an alternation. - if (stream->next == '[') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // Parse each branch, and add a placeholder step in between the branches. - Array(uint32_t) branch_step_indices = array_new(); - CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - uint32_t start_index = self->steps.size; - t_query_error e = ts_query__parse_pattern( - self, - stream, - depth, - is_immediate, - &branch_capture_quantifiers - ); - - if (e == PARENT_DONE) { - if (stream->next == ']' && branch_step_indices.size > 0) { - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&branch_capture_quantifiers); - array_delete(&branch_step_indices); - return e; - } - - if (start_index == starting_step_index) { - capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); - } else { - capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); - } - - array_push(&branch_step_indices, start_index); - array_push(&self->steps, query_step__new(0, depth, false)); - capture_quantifiers_clear(&branch_capture_quantifiers); - } - (void)array_pop(&self->steps); - - // For all of the branches except for the last one, add the subsequent branch as an - // alternative, and link the end of the branch to the current end of the steps. - for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { - uint32_t step_index = branch_step_indices.contents[i]; - uint32_t next_step_index = branch_step_indices.contents[i + 1]; - QueryStep *start_step = &self->steps.contents[step_index]; - QueryStep *end_step = &self->steps.contents[next_step_index - 1]; - start_step->alternative_index = next_step_index; - end_step->alternative_index = self->steps.size; - end_step->is_dead_end = true; - } - - capture_quantifiers_delete(&branch_capture_quantifiers); - array_delete(&branch_step_indices); - } - - // An open parenthesis can be the start of three possible constructs: - // * A grouped sequence - // * A predicate - // * A named node - else if (stream->next == '(') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // If this parenthesis is followed by a node, then it represents a grouped sequence. - if (stream->next == '(' || stream->next == '"' || stream->next == '[') { - bool child_is_immediate = is_immediate; - CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - if (stream->next == '.') { - child_is_immediate = true; - stream_advance(stream); - stream_skip_whitespace(stream); - } - t_query_error e = ts_query__parse_pattern( - self, - stream, - depth, - child_is_immediate, - &child_capture_quantifiers - ); - if (e == PARENT_DONE) { - if (stream->next == ')') { - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&child_capture_quantifiers); - return e; - } - - capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); - capture_quantifiers_clear(&child_capture_quantifiers); - child_is_immediate = false; - } - - capture_quantifiers_delete(&child_capture_quantifiers); - } - - // A dot/pound character indicates the start of a predicate. - else if (stream->next == '.' || stream->next == '#') { - stream_advance(stream); - return ts_query__parse_predicate(self, stream); - } - - // Otherwise, this parenthesis is the start of a named node. - else { - t_symbol symbol; - - // Parse a normal node name - if (stream_is_ident_start(stream)) { - const char *node_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - node_name); - - // Parse the wildcard symbol - if (length == 1 && node_name[0] == '_') { - symbol = WILDCARD_SYMBOL; - } - - else { - symbol = ts_language_symbol_for_name( - self->language, - node_name, - length, - true - ); - if (!symbol) { - stream_reset(stream, node_name); - return TSQueryErrorNodeType; - } - } - } else { - return TSQueryErrorSyntax; - } - - // Add a step for the node. - array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - QueryStep *step = array_back(&self->steps); - if (ts_language_symbol_metadata(self->language, symbol).supertype) { - step->supertype_symbol = step->symbol; - step->symbol = WILDCARD_SYMBOL; - } - if (symbol == WILDCARD_SYMBOL) { - step->is_named = true; - } - - stream_skip_whitespace(stream); - - if (stream->next == '/') { - stream_advance(stream); - if (!stream_is_ident_start(stream)) { - return TSQueryErrorSyntax; - } - - const char *node_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - node_name); - - step->symbol = ts_language_symbol_for_name( - self->language, - node_name, - length, - true - ); - if (!step->symbol) { - stream_reset(stream, node_name); - return TSQueryErrorNodeType; - } - - stream_skip_whitespace(stream); - } - - // Parse the child patterns - bool child_is_immediate = false; - uint16_t last_child_step_index = 0; - uint16_t negated_field_count = 0; - t_field_id negated_field_ids[MAX_NEGATED_FIELD_COUNT]; - CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - // Parse a negated field assertion - if (stream->next == '!') { - stream_advance(stream); - stream_skip_whitespace(stream); - if (!stream_is_ident_start(stream)) { - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorSyntax; - } - const char *field_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - field_name); - stream_skip_whitespace(stream); - - t_field_id field_id = ts_language_field_id_for_name( - self->language, - field_name, - length - ); - if (!field_id) { - stream->input = field_name; - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorField; - } - - // Keep the field ids sorted. - if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { - negated_field_ids[negated_field_count] = field_id; - negated_field_count++; - } - - continue; - } - - // Parse a sibling anchor - if (stream->next == '.') { - child_is_immediate = true; - stream_advance(stream); - stream_skip_whitespace(stream); - } - - uint16_t step_index = self->steps.size; - t_query_error e = ts_query__parse_pattern( - self, - stream, - depth + 1, - child_is_immediate, - &child_capture_quantifiers - ); - if (e == PARENT_DONE) { - if (stream->next == ')') { - if (child_is_immediate) { - if (last_child_step_index == 0) { - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorSyntax; - } - self->steps.contents[last_child_step_index].is_last_child = true; - } - - if (negated_field_count) { - ts_query__add_negated_fields( - self, - starting_step_index, - negated_field_ids, - negated_field_count - ); - } - - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&child_capture_quantifiers); - return e; - } - - capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); - - last_child_step_index = step_index; - child_is_immediate = false; - capture_quantifiers_clear(&child_capture_quantifiers); - } - capture_quantifiers_delete(&child_capture_quantifiers); - } - } - - // Parse a wildcard pattern - else if (stream->next == '_') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // Add a step that matches any kind of node - array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); - } - - // Parse a double-quoted anonymous leaf node expression - else if (stream->next == '"') { - const char *string_start = stream->input; - t_query_error e = ts_query__parse_string_literal(self, stream); - if (e) return e; - - // Add a step for the node - t_symbol symbol = ts_language_symbol_for_name( - self->language, - self->string_buffer.contents, - self->string_buffer.size, - false - ); - if (!symbol) { - stream_reset(stream, string_start + 1); - return TSQueryErrorNodeType; - } - array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - } - - // Parse a field-prefixed pattern - else if (stream_is_ident_start(stream)) { - // Parse the field name - const char *field_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - field_name); - stream_skip_whitespace(stream); - - if (stream->next != ':') { - stream_reset(stream, field_name); - return TSQueryErrorSyntax; - } - stream_advance(stream); - stream_skip_whitespace(stream); - - // Parse the pattern - CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); - t_query_error e = ts_query__parse_pattern( - self, - stream, - depth, - is_immediate, - &field_capture_quantifiers - ); - if (e) { - capture_quantifiers_delete(&field_capture_quantifiers); - if (e == PARENT_DONE) e = TSQueryErrorSyntax; - return e; - } - - // Add the field name to the first step of the pattern - t_field_id field_id = ts_language_field_id_for_name( - self->language, - field_name, - length - ); - if (!field_id) { - stream->input = field_name; - return TSQueryErrorField; - } - - uint32_t step_index = starting_step_index; - QueryStep *step = &self->steps.contents[step_index]; - for (;;) { - step->field = field_id; - if ( - step->alternative_index != NONE && - step->alternative_index > step_index && - step->alternative_index < self->steps.size - ) { - step_index = step->alternative_index; - step = &self->steps.contents[step_index]; - } else { - break; - } - } - - capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); - capture_quantifiers_delete(&field_capture_quantifiers); - } - - else { - return TSQueryErrorSyntax; - } - - stream_skip_whitespace(stream); - - // Parse suffixes modifiers for this pattern - t_quantifier quantifier = TSQuantifierOne; - for (;;) { - // Parse the one-or-more operator. - if (stream->next == '+') { - quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); - repeat_step.alternative_index = starting_step_index; - repeat_step.is_pass_through = true; - repeat_step.alternative_is_immediate = true; - array_push(&self->steps, repeat_step); - } - - // Parse the zero-or-more repetition operator. - else if (stream->next == '*') { - quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); - repeat_step.alternative_index = starting_step_index; - repeat_step.is_pass_through = true; - repeat_step.alternative_is_immediate = true; - array_push(&self->steps, repeat_step); - - // Stop when `step->alternative_index` is `NONE` or it points to - // `repeat_step` or beyond. Note that having just been pushed, - // `repeat_step` occupies slot `self->steps.size - 1`. - QueryStep *step = &self->steps.contents[starting_step_index]; - while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { - step = &self->steps.contents[step->alternative_index]; - } - step->alternative_index = self->steps.size; - } - - // Parse the optional operator. - else if (stream->next == '?') { - quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep *step = &self->steps.contents[starting_step_index]; - while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { - step = &self->steps.contents[step->alternative_index]; - } - step->alternative_index = self->steps.size; - } - - // Parse an '@'-prefixed capture pattern - else if (stream->next == '@') { - stream_advance(stream); - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *capture_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - capture_name); - stream_skip_whitespace(stream); - - // Add the capture id to the first step of the pattern - uint16_t capture_id = symbol_table_insert_name( - &self->captures, - capture_name, - length - ); - - // Add the capture quantifier - capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); - - uint32_t step_index = starting_step_index; - for (;;) { - QueryStep *step = &self->steps.contents[step_index]; - query_step__add_capture(step, capture_id); - if ( - step->alternative_index != NONE && - step->alternative_index > step_index && - step->alternative_index < self->steps.size - ) { - step_index = step->alternative_index; - } else { - break; - } - } - } - - // No more suffix modifiers - else { - break; - } - } - - capture_quantifiers_mul(capture_quantifiers, quantifier); - - return 0; -} - -t_query *ts_query_new( - const t_language *language, - const char *source, - uint32_t source_len, - uint32_t *error_offset, - t_query_error *error_type -) { - if ( - !language || - language->version > TREE_SITTER_LANGUAGE_VERSION || - language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION - ) { - *error_type = TSQueryErrorLanguage; - return NULL; - } - - t_query *self = ts_malloc(sizeof(t_query)); - *self = (t_query) { - .steps = array_new(), - .pattern_map = array_new(), - .captures = symbol_table_new(), - .capture_quantifiers = array_new(), - .predicate_values = symbol_table_new(), - .predicate_steps = array_new(), - .patterns = array_new(), - .step_offsets = array_new(), - .string_buffer = array_new(), - .negated_fields = array_new(), - .repeat_symbols_with_rootless_patterns = array_new(), - .wildcard_root_pattern_count = 0, - .language = ts_language_copy(language), - }; - - array_push(&self->negated_fields, 0); - - // Parse all of the S-expressions in the given string. - Stream stream = stream_new(source, source_len); - stream_skip_whitespace(&stream); - while (stream.input < stream.end) { - uint32_t pattern_index = self->patterns.size; - uint32_t start_step_index = self->steps.size; - uint32_t start_predicate_step_index = self->predicate_steps.size; - array_push(&self->patterns, ((QueryPattern) { - .steps = (Slice) {.offset = start_step_index}, - .predicate_steps = (Slice) {.offset = start_predicate_step_index}, - .start_byte = stream_offset(&stream), - .is_non_local = false, - })); - CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); - *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); - array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); - - QueryPattern *pattern = array_back(&self->patterns); - pattern->steps.length = self->steps.size - start_step_index; - pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; - - // If any pattern could not be parsed, then report the error information - // and terminate. - if (*error_type) { - if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; - *error_offset = stream_offset(&stream); - capture_quantifiers_delete(&capture_quantifiers); - ts_query_delete(self); - return NULL; - } - - // Maintain a list of capture quantifiers for each pattern - array_push(&self->capture_quantifiers, capture_quantifiers); - - // Maintain a map that can look up patterns for a given root symbol. - uint16_t wildcard_root_alternative_index = NONE; - for (;;) { - QueryStep *step = &self->steps.contents[start_step_index]; - - // If a pattern has a wildcard at its root, but it has a non-wildcard child, - // then optimize the matching process by skipping matching the wildcard. - // Later, during the matching process, the query cursor will check that - // there is a parent node, and capture it if necessary. - if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { - QueryStep *second_step = &self->steps.contents[start_step_index + 1]; - if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { - wildcard_root_alternative_index = step->alternative_index; - start_step_index += 1; - step = second_step; - } - } - - // Determine whether the pattern has a single root node. This affects - // decisions about whether or not to start matching the pattern when - // a query cursor has a range restriction or when immediately within an - // error node. - uint32_t start_depth = step->depth; - bool is_rooted = start_depth == 0; - for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { - QueryStep *child_step = &self->steps.contents[step_index]; - if (child_step->is_dead_end) break; - if (child_step->depth == start_depth) { - is_rooted = false; - break; - } - } - - ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { - .step_index = start_step_index, - .pattern_index = pattern_index, - .is_rooted = is_rooted - }); - if (step->symbol == WILDCARD_SYMBOL) { - self->wildcard_root_pattern_count++; - } - - // If there are alternatives or options at the root of the pattern, - // then add multiple entries to the pattern map. - if (step->alternative_index != NONE) { - start_step_index = step->alternative_index; - } else if (wildcard_root_alternative_index != NONE) { - start_step_index = wildcard_root_alternative_index; - wildcard_root_alternative_index = NONE; - } else { - break; - } - } - } - - if (!ts_query__analyze_patterns(self, error_offset)) { - *error_type = TSQueryErrorStructure; - ts_query_delete(self); - return NULL; - } - - array_delete(&self->string_buffer); - return self; -} - -void ts_query_delete(t_query *self) { - if (self) { - array_delete(&self->steps); - array_delete(&self->pattern_map); - array_delete(&self->predicate_steps); - array_delete(&self->patterns); - array_delete(&self->step_offsets); - array_delete(&self->string_buffer); - array_delete(&self->negated_fields); - array_delete(&self->repeat_symbols_with_rootless_patterns); - ts_language_delete(self->language); - symbol_table_delete(&self->captures); - symbol_table_delete(&self->predicate_values); - for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { - CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); - capture_quantifiers_delete(capture_quantifiers); - } - array_delete(&self->capture_quantifiers); - ts_free(self); - } -} - -uint32_t ts_query_pattern_count(const t_query *self) { - return self->patterns.size; -} - -uint32_t ts_query_capture_count(const t_query *self) { - return self->captures.slices.size; -} - -uint32_t ts_query_string_count(const t_query *self) { - return self->predicate_values.slices.size; -} - -const char *ts_query_capture_name_for_id( - const t_query *self, - uint32_t index, - uint32_t *length -) { - return symbol_table_name_for_id(&self->captures, index, length); -} - -t_quantifier ts_query_capture_quantifier_for_id( - const t_query *self, - uint32_t pattern_index, - uint32_t capture_index -) { - CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); - return capture_quantifier_for_id(capture_quantifiers, capture_index); -} - -const char *ts_query_string_value_for_id( - const t_query *self, - uint32_t index, - uint32_t *length -) { - return symbol_table_name_for_id(&self->predicate_values, index, length); -} - -const t_query_predicate_step *ts_query_predicates_for_pattern( - const t_query *self, - uint32_t pattern_index, - uint32_t *step_count -) { - Slice slice = self->patterns.contents[pattern_index].predicate_steps; - *step_count = slice.length; - if (self->predicate_steps.contents == NULL) { - return NULL; - } - return &self->predicate_steps.contents[slice.offset]; -} - -uint32_t ts_query_start_byte_for_pattern( - const t_query *self, - uint32_t pattern_index -) { - return self->patterns.contents[pattern_index].start_byte; -} - -bool ts_query_is_pattern_rooted( - const t_query *self, - uint32_t pattern_index -) { - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *entry = &self->pattern_map.contents[i]; - if (entry->pattern_index == pattern_index) { - if (!entry->is_rooted) return false; - } - } - return true; -} - -bool ts_query_is_pattern_non_local( - const t_query *self, - uint32_t pattern_index -) { - if (pattern_index < self->patterns.size) { - return self->patterns.contents[pattern_index].is_non_local; - } else { - return false; - } -} - -bool ts_query_is_pattern_guaranteed_at_step( - const t_query *self, - uint32_t byte_offset -) { - uint32_t step_index = UINT32_MAX; - for (unsigned i = 0; i < self->step_offsets.size; i++) { - StepOffset *step_offset = &self->step_offsets.contents[i]; - if (step_offset->byte_offset > byte_offset) break; - step_index = step_offset->step_index; - } - if (step_index < self->steps.size) { - return self->steps.contents[step_index].root_pattern_guaranteed; - } else { - return false; - } -} - -bool ts_query__step_is_fallible( - const t_query *self, - uint16_t step_index -) { - assert((uint32_t)step_index + 1 < self->steps.size); - QueryStep *step = &self->steps.contents[step_index]; - QueryStep *next_step = &self->steps.contents[step_index + 1]; - return ( - next_step->depth != PATTERN_DONE_MARKER && - next_step->depth > step->depth && - !next_step->parent_pattern_guaranteed - ); -} - -void ts_query_disable_capture( - t_query *self, - const char *name, - uint32_t length -) { - // Remove capture information for any pattern step that previously - // captured with the given name. - int id = symbol_table_id_for_name(&self->captures, name, length); - if (id != -1) { - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - query_step__remove_capture(step, id); - } - } -} - -void ts_query_disable_pattern( - t_query *self, - uint32_t pattern_index -) { - // Remove the given pattern from the pattern map. Its steps will still - // be in the `steps` array, but they will never be read. - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *pattern = &self->pattern_map.contents[i]; - if (pattern->pattern_index == pattern_index) { - array_erase(&self->pattern_map, i); - i--; - } - } -} - -/*************** - * QueryCursor - ***************/ - -t_query_cursor *ts_query_cursor_new(void) { - t_query_cursor *self = ts_malloc(sizeof(t_query_cursor)); - *self = (t_query_cursor) { - .did_exceed_match_limit = false, - .ascending = false, - .halted = false, - .states = array_new(), - .finished_states = array_new(), - .capture_list_pool = capture_list_pool_new(), - .start_byte = 0, - .end_byte = UINT32_MAX, - .start_point = {0, 0}, - .end_point = POINT_MAX, - .max_start_depth = UINT32_MAX, - }; - array_reserve(&self->states, 8); - array_reserve(&self->finished_states, 8); - return self; -} - -void ts_query_cursor_delete(t_query_cursor *self) { - array_delete(&self->states); - array_delete(&self->finished_states); - ts_tree_cursor_delete(&self->cursor); - capture_list_pool_delete(&self->capture_list_pool); - ts_free(self); -} - -bool ts_query_cursor_did_exceed_match_limit(const t_query_cursor *self) { - return self->did_exceed_match_limit; -} - -uint32_t ts_query_cursor_match_limit(const t_query_cursor *self) { - return self->capture_list_pool.max_capture_list_count; -} - -void ts_query_cursor_set_match_limit(t_query_cursor *self, uint32_t limit) { - self->capture_list_pool.max_capture_list_count = limit; -} - -#ifdef DEBUG_EXECUTE_QUERY -#define LOG(...) fprintf(stderr, __VA_ARGS__) -#else -#define LOG(...) -#endif - -void ts_query_cursor_exec( - t_query_cursor *self, - const t_query *query, - t_parse_node node -) { - if (query) { - LOG("query steps:\n"); - for (unsigned i = 0; i < query->steps.size; i++) { - QueryStep *step = &query->steps.contents[i]; - LOG(" %u: {", i); - if (step->depth == PATTERN_DONE_MARKER) { - LOG("DONE"); - } else if (step->is_dead_end) { - LOG("dead_end"); - } else if (step->is_pass_through) { - LOG("pass_through"); - } else if (step->symbol != WILDCARD_SYMBOL) { - LOG("symbol: %s", query->language->symbol_names[step->symbol]); - } else { - LOG("symbol: *"); - } - if (step->field) { - LOG(", field: %s", query->language->field_names[step->field]); - } - if (step->alternative_index != NONE) { - LOG(", alternative: %u", step->alternative_index); - } - LOG("},\n"); - } - } - - array_clear(&self->states); - array_clear(&self->finished_states); - ts_tree_cursor_reset(&self->cursor, node); - capture_list_pool_reset(&self->capture_list_pool); - self->on_visible_node = true; - self->next_state_id = 0; - self->depth = 0; - self->ascending = false; - self->halted = false; - self->query = query; - self->did_exceed_match_limit = false; -} - -void ts_query_cursor_set_byte_range( - t_query_cursor *self, - uint32_t start_byte, - uint32_t end_byte -) { - if (end_byte == 0) { - end_byte = UINT32_MAX; - } - self->start_byte = start_byte; - self->end_byte = end_byte; -} - -void ts_query_cursor_set_point_range( - t_query_cursor *self, - t_point start_point, - t_point end_point -) { - if (end_point.row == 0 && end_point.column == 0) { - end_point = POINT_MAX; - } - self->start_point = start_point; - self->end_point = end_point; -} - -// Search through all of the in-progress states, and find the captured -// node that occurs earliest in the document. -static bool ts_query_cursor__first_in_progress_capture( - t_query_cursor *self, - uint32_t *state_index, - uint32_t *byte_offset, - uint32_t *pattern_index, - bool *root_pattern_guaranteed -) { - bool result = false; - *state_index = UINT32_MAX; - *byte_offset = UINT32_MAX; - *pattern_index = UINT32_MAX; - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i]; - if (state->dead) continue; - - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - if (state->consumed_capture_count >= captures->size) { - continue; - } - - t_parse_node node = captures->contents[state->consumed_capture_count].node; - if ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ) { - state->consumed_capture_count++; - i--; - continue; - } - - uint32_t node_start_byte = ts_node_start_byte(node); - if ( - !result || - node_start_byte < *byte_offset || - (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) - ) { - QueryStep *step = &self->query->steps.contents[state->step_index]; - if (root_pattern_guaranteed) { - *root_pattern_guaranteed = step->root_pattern_guaranteed; - } else if (step->root_pattern_guaranteed) { - continue; - } - - result = true; - *state_index = i; - *byte_offset = node_start_byte; - *pattern_index = state->pattern_index; - } - } - return result; -} - -// Determine which node is first in a depth-first traversal -int ts_query_cursor__compare_nodes(t_parse_node left, t_parse_node right) { - if (left.id != right.id) { - uint32_t left_start = ts_node_start_byte(left); - uint32_t right_start = ts_node_start_byte(right); - if (left_start < right_start) return -1; - if (left_start > right_start) return 1; - uint32_t left_node_count = ts_node_end_byte(left); - uint32_t right_node_count = ts_node_end_byte(right); - if (left_node_count > right_node_count) return -1; - if (left_node_count < right_node_count) return 1; - } - return 0; -} - -// Determine if either state contains a superset of the other state's captures. -void ts_query_cursor__compare_captures( - t_query_cursor *self, - QueryState *left_state, - QueryState *right_state, - bool *left_contains_right, - bool *right_contains_left -) { - const CaptureList *left_captures = capture_list_pool_get( - &self->capture_list_pool, - left_state->capture_list_id - ); - const CaptureList *right_captures = capture_list_pool_get( - &self->capture_list_pool, - right_state->capture_list_id - ); - *left_contains_right = true; - *right_contains_left = true; - unsigned i = 0, j = 0; - for (;;) { - if (i < left_captures->size) { - if (j < right_captures->size) { - t_query_capture *left = &left_captures->contents[i]; - t_query_capture *right = &right_captures->contents[j]; - if (left->node.id == right->node.id && left->index == right->index) { - i++; - j++; - } else { - switch (ts_query_cursor__compare_nodes(left->node, right->node)) { - case -1: - *right_contains_left = false; - i++; - break; - case 1: - *left_contains_right = false; - j++; - break; - default: - *right_contains_left = false; - *left_contains_right = false; - i++; - j++; - break; - } - } - } else { - *right_contains_left = false; - break; - } - } else { - if (j < right_captures->size) { - *left_contains_right = false; - } - break; - } - } -} - -static void ts_query_cursor__add_state( - t_query_cursor *self, - const PatternEntry *pattern -) { - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - - // Keep the states array in ascending order of start_depth and pattern_index, - // so that it can be processed more efficiently elsewhere. Usually, there is - // no work to do here because of two facts: - // * States with lower start_depth are naturally added first due to the - // order in which nodes are visited. - // * Earlier patterns are naturally added first because of the ordering of the - // pattern_map data structure that's used to initiate matches. - // - // This loop is only needed in cases where two conditions hold: - // * A pattern consists of more than one sibling node, so that its states - // remain in progress after exiting the node that started the match. - // * The first node in the pattern matches against multiple nodes at the - // same depth. - // - // An example of this is the pattern '((comment)* (function))'. If multiple - // `comment` nodes appear in a row, then we may initiate a new state for this - // pattern while another state for the same pattern is already in progress. - // If there are multiple patterns like this in a query, then this loop will - // need to execute in order to keep the states ordered by pattern_index. - uint32_t index = self->states.size; - while (index > 0) { - QueryState *prev_state = &self->states.contents[index - 1]; - if (prev_state->start_depth < start_depth) break; - if (prev_state->start_depth == start_depth) { - // Avoid inserting an unnecessary duplicate state, which would be - // immediately pruned by the longest-match criteria. - if ( - prev_state->pattern_index == pattern->pattern_index && - prev_state->step_index == pattern->step_index - ) return; - if (prev_state->pattern_index <= pattern->pattern_index) break; - } - index--; - } - - LOG( - " start state. pattern:%u, step:%u\n", - pattern->pattern_index, - pattern->step_index - ); - array_insert(&self->states, index, ((QueryState) { - .id = UINT32_MAX, - .capture_list_id = NONE, - .step_index = pattern->step_index, - .pattern_index = pattern->pattern_index, - .start_depth = start_depth, - .consumed_capture_count = 0, - .seeking_immediate_match = true, - .has_in_progress_alternatives = false, - .needs_parent = step->depth == 1, - .dead = false, - })); -} - -// Acquire a capture list for this state. If there are no capture lists left in the -// pool, this will steal the capture list from another existing state, and mark that -// other state as 'dead'. -static CaptureList *ts_query_cursor__prepare_to_capture( - t_query_cursor *self, - QueryState *state, - unsigned state_index_to_preserve -) { - if (state->capture_list_id == NONE) { - state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); - - // If there are no capture lists left in the pool, then terminate whichever - // state has captured the earliest node in the document, and steal its - // capture list. - if (state->capture_list_id == NONE) { - self->did_exceed_match_limit = true; - uint32_t state_index, byte_offset, pattern_index; - if ( - ts_query_cursor__first_in_progress_capture( - self, - &state_index, - &byte_offset, - &pattern_index, - NULL - ) && - state_index != state_index_to_preserve - ) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - state_index, pattern_index, byte_offset - ); - QueryState *other_state = &self->states.contents[state_index]; - state->capture_list_id = other_state->capture_list_id; - other_state->capture_list_id = NONE; - other_state->dead = true; - CaptureList *list = capture_list_pool_get_mut( - &self->capture_list_pool, - state->capture_list_id - ); - array_clear(list); - return list; - } else { - LOG(" ran out of capture lists"); - return NULL; - } - } - } - return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); -} - -static void ts_query_cursor__capture( - t_query_cursor *self, - QueryState *state, - QueryStep *step, - t_parse_node node -) { - if (state->dead) return; - CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); - if (!capture_list) { - state->dead = true; - return; - } - - for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { - uint16_t capture_id = step->capture_ids[j]; - if (step->capture_ids[j] == NONE) break; - array_push(capture_list, ((t_query_capture) { node, capture_id })); - LOG( - " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", - ts_node_type(node), - state->pattern_index, - capture_id, - capture_list->size - ); - } -} - -// Duplicate the given state and insert the newly-created state immediately after -// the given state in the `states` array. Ensures that the given state reference is -// still valid, even if the states array is reallocated. -static QueryState *ts_query_cursor__copy_state( - t_query_cursor *self, - QueryState **state_ref -) { - const QueryState *state = *state_ref; - uint32_t state_index = (uint32_t)(state - self->states.contents); - QueryState copy = *state; - copy.capture_list_id = NONE; - - // If the state has captures, copy its capture list. - if (state->capture_list_id != NONE) { - CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); - if (!new_captures) return NULL; - const CaptureList *old_captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - array_push_all(new_captures, old_captures); - } - - array_insert(&self->states, state_index + 1, copy); - *state_ref = &self->states.contents[state_index]; - return &self->states.contents[state_index + 1]; -} - -static inline bool ts_query_cursor__should_descend( - t_query_cursor *self, - bool node_intersects_range -) { - - if (node_intersects_range && self->depth < self->max_start_depth) { - return true; - } - - // If there are in-progress matches whose remaining steps occur - // deeper in the tree, then descend. - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i];; - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if ( - next_step->depth != PATTERN_DONE_MARKER && - state->start_depth + next_step->depth > self->depth - ) { - return true; - } - } - - if (self->depth >= self->max_start_depth) { - return false; - } - - // If the current node is hidden, then a non-rooted pattern might match - // one if its roots inside of this node, and match another of its roots - // as part of a sibling node, so we may need to descend. - if (!self->on_visible_node) { - // Descending into a repetition node outside of the range can be - // expensive, because these nodes can have many visible children. - // Avoid descending into repetition nodes unless we have already - // determined that this query can match rootless patterns inside - // of this type of repetition node. - Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); - if (ts_subtree_is_repetition(subtree)) { - bool exists; - uint32_t index; - array_search_sorted_by( - &self->query->repeat_symbols_with_rootless_patterns,, - ts_subtree_symbol(subtree), - &index, - &exists - ); - return exists; - } - - return true; - } - - return false; -} - -// Walk the tree, processing patterns until at least one pattern finishes, -// If one or more patterns finish, return `true` and store their states in the -// `finished_states` array. Multiple patterns can finish on the same node. If -// there are no more matches, return `false`. -static inline bool ts_query_cursor__advance( - t_query_cursor *self, - bool stop_on_definite_step -) { - bool did_match = false; - for (;;) { - if (self->halted) { - while (self->states.size > 0) { - QueryState state = array_pop(&self->states); - capture_list_pool_release( - &self->capture_list_pool, - state.capture_list_id - ); - } - } - - if (did_match || self->halted) return did_match; - - // Exit the current node. - if (self->ascending) { - if (self->on_visible_node) { - LOG( - "leave node. depth:%u, type:%s\n", - self->depth, - ts_node_type(ts_tree_cursor_current_node(&self->cursor)) - ); - - // After leaving a node, remove any states that cannot make further progress. - uint32_t deleted_count = 0; - for (unsigned i = 0, n = self->states.size; i < n; i++) { - QueryState *state = &self->states.contents[i]; - QueryStep *step = &self->query->steps.contents[state->step_index]; - - // If a state completed its pattern inside of this node, but was deferred from finishing - // in order to search for longer matches, mark it as finished. - if ( - step->depth == PATTERN_DONE_MARKER && - (state->start_depth > self->depth || self->depth == 0) - ) { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - did_match = true; - deleted_count++; - } - - // If a state needed to match something within this node, then remove that state - // as it has failed to match. - else if ( - step->depth != PATTERN_DONE_MARKER && - (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth - ) { - LOG( - " failed to match. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - deleted_count++; - } - - else if (deleted_count > 0) { - self->states.contents[i - deleted_count] = *state; - } - } - self->states.size -= deleted_count; - } - - // Leave this node by stepping to its next sibling or to its parent. - switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { - case TreeCursorStepVisible: - if (!self->on_visible_node) { - self->depth++; - self->on_visible_node = true; - } - self->ascending = false; - break; - case TreeCursorStepHidden: - if (self->on_visible_node) { - self->depth--; - self->on_visible_node = false; - } - self->ascending = false; - break; - default: - if (ts_tree_cursor_goto_parent(&self->cursor)) { - self->depth--; - } else { - LOG("halt at root\n"); - self->halted = true; - } - } - } - - // Enter a new node. - else { - // Get the properties of the current node. - t_parse_node node = ts_tree_cursor_current_node(&self->cursor); - t_parse_node parent_node = ts_tree_cursor_parent_node(&self->cursor); - bool parent_precedes_range = !ts_node_is_null(parent_node) && ( - ts_node_end_byte(parent_node) <= self->start_byte || - point_lte(ts_node_end_point(parent_node), self->start_point) - ); - bool parent_follows_range = !ts_node_is_null(parent_node) && ( - ts_node_start_byte(parent_node) >= self->end_byte || - point_gte(ts_node_start_point(parent_node), self->end_point) - ); - bool node_precedes_range = parent_precedes_range || ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ); - bool node_follows_range = parent_follows_range || ( - ts_node_start_byte(node) >= self->end_byte || - point_gte(ts_node_start_point(node), self->end_point) - ); - bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; - bool node_intersects_range = !node_precedes_range && !node_follows_range; - - if (self->on_visible_node) { - t_symbol symbol = ts_node_symbol(node); - bool is_named = ts_node_is_named(node); - bool has_later_siblings; - bool has_later_named_siblings; - bool can_have_later_siblings_with_this_field; - t_field_id field_id = 0; - t_symbol supertypes[8] = {0}; - unsigned supertype_count = 8; - ts_tree_cursor_current_status( - &self->cursor, - &field_id, - &has_later_siblings, - &has_later_named_siblings, - &can_have_later_siblings_with_this_field, - supertypes, - &supertype_count - ); - LOG( - "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", - self->depth, - ts_node_type(node), - ts_language_field_name_for_id(self->query->language, field_id), - ts_node_start_point(node).row, - self->states.size, - self->finished_states.size - ); - - bool node_is_error = symbol == ts_builtin_sym_error; - bool parent_is_error = - !ts_node_is_null(parent_node) && - ts_node_symbol(parent_node) == ts_builtin_sym_error; - - // Add new states for any patterns whose root node is a wildcard. - if (!node_is_error) { - for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { - PatternEntry *pattern = &self->query->pattern_map.contents[i]; - - // If this node matches the first step of the pattern, then add a new - // state at the start of this pattern. - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - if ( - (pattern->is_rooted ? - node_intersects_range : - (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) && - (!step->supertype_symbol || supertype_count > 0) && - (start_depth <= self->max_start_depth) - ) { - ts_query_cursor__add_state(self, pattern); - } - } - } - - // Add new states for any patterns whose root node matches this node. - unsigned i; - if (ts_query__pattern_map_search(self->query, symbol, &i)) { - PatternEntry *pattern = &self->query->pattern_map.contents[i]; - - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - do { - // If this node matches the first step of the pattern, then add a new - // state at the start of this pattern. - if ( - (pattern->is_rooted ? - node_intersects_range : - (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) && - (start_depth <= self->max_start_depth) - ) { - ts_query_cursor__add_state(self, pattern); - } - - // Advance to the next pattern whose root node matches this node. - i++; - if (i == self->query->pattern_map.size) break; - pattern = &self->query->pattern_map.contents[i]; - step = &self->query->steps.contents[pattern->step_index]; - } while (step->symbol == symbol); - } - - // Update all of the in-progress states with current node. - for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { - QueryState *state = &self->states.contents[j]; - QueryStep *step = &self->query->steps.contents[state->step_index]; - state->has_in_progress_alternatives = false; - copy_count = 0; - - // Check that the node matches all of the criteria for the next - // step of the pattern. - if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; - - // Determine if this node matches this step of the pattern, and also - // if this node can have later siblings that match this step of the - // pattern. - bool node_does_match = false; - if (step->symbol == WILDCARD_SYMBOL) { - node_does_match = !node_is_error && (is_named || !step->is_named); - } else { - node_does_match = symbol == step->symbol; - } - bool later_sibling_can_match = has_later_siblings; - if ((step->is_immediate && is_named) || state->seeking_immediate_match) { - later_sibling_can_match = false; - } - if (step->is_last_child && has_later_named_siblings) { - node_does_match = false; - } - if (step->supertype_symbol) { - bool has_supertype = false; - for (unsigned k = 0; k < supertype_count; k++) { - if (supertypes[k] == step->supertype_symbol) { - has_supertype = true; - break; - } - } - if (!has_supertype) node_does_match = false; - } - if (step->field) { - if (step->field == field_id) { - if (!can_have_later_siblings_with_this_field) { - later_sibling_can_match = false; - } - } else { - node_does_match = false; - } - } - - if (step->negated_field_list_id) { - t_field_id *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; - for (;;) { - t_field_id negated_field_id = *negated_field_ids; - if (negated_field_id) { - negated_field_ids++; - if (ts_node_child_by_field_id(node, negated_field_id).id) { - node_does_match = false; - break; - } - } else { - break; - } - } - } - - // Remove states immediately if it is ever clear that they cannot match. - if (!node_does_match) { - if (!later_sibling_can_match) { - LOG( - " discard state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->states, j); - j--; - } - continue; - } - - // Some patterns can match their root node in multiple ways, capturing different - // children. If this pattern step could match later children within the same - // parent, then this query state cannot simply be updated in place. It must be - // split into two states: one that matches this node, and one which skips over - // this node, to preserve the possibility of matching later siblings. - if (later_sibling_can_match && ( - step->contains_captures || - ts_query__step_is_fallible(self->query, state->step_index) - )) { - if (ts_query_cursor__copy_state(self, &state)) { - LOG( - " split state for capture. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - copy_count++; - } - } - - // If this pattern started with a wildcard, such that the pattern map - // actually points to the *second* step of the pattern, then check - // that the node has a parent, and capture the parent node if necessary. - if (state->needs_parent) { - t_parse_node parent = ts_tree_cursor_parent_node(&self->cursor); - if (ts_node_is_null(parent)) { - LOG(" missing parent node\n"); - state->dead = true; - } else { - state->needs_parent = false; - QueryStep *skipped_wildcard_step = step; - do { - skipped_wildcard_step--; - } while ( - skipped_wildcard_step->is_dead_end || - skipped_wildcard_step->is_pass_through || - skipped_wildcard_step->depth > 0 - ); - if (skipped_wildcard_step->capture_ids[0] != NONE) { - LOG(" capture wildcard parent\n"); - ts_query_cursor__capture( - self, - state, - skipped_wildcard_step, - parent - ); - } - } - } - - // If the current node is captured in this pattern, add it to the capture list. - if (step->capture_ids[0] != NONE) { - ts_query_cursor__capture(self, state, step, node); - } - - if (state->dead) { - array_erase(&self->states, j); - j--; - continue; - } - - // Advance this state to the next step of its pattern. - state->step_index++; - state->seeking_immediate_match = false; - LOG( - " advance state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; - - // If this state's next step has an alternative step, then copy the state in order - // to pursue both alternatives. The alternative step itself may have an alternative, - // so this is an interactive process. - unsigned end_index = j + 1; - for (unsigned k = j; k < end_index; k++) { - QueryState *child_state = &self->states.contents[k]; - QueryStep *child_step = &self->query->steps.contents[child_state->step_index]; - if (child_step->alternative_index != NONE) { - // A "dead-end" step exists only to add a non-sequential jump into the step sequence, - // via its alternative index. When a state reaches a dead-end step, it jumps straight - // to the step's alternative. - if (child_step->is_dead_end) { - child_state->step_index = child_step->alternative_index; - k--; - continue; - } - - // A "pass-through" step exists only to add a branch into the step sequence, - // via its alternative_index. When a state reaches a pass-through step, it splits - // in order to process the alternative step, and then it advances to the next step. - if (child_step->is_pass_through) { - child_state->step_index++; - k--; - } - - QueryState *copy = ts_query_cursor__copy_state(self, &child_state); - if (copy) { - LOG( - " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", - copy->pattern_index, - copy->step_index, - next_step->alternative_index, - next_step->alternative_is_immediate, - capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size - ); - end_index++; - copy_count++; - copy->step_index = child_step->alternative_index; - if (child_step->alternative_is_immediate) { - copy->seeking_immediate_match = true; - } - } - } - } - } - - for (unsigned j = 0; j < self->states.size; j++) { - QueryState *state = &self->states.contents[j]; - if (state->dead) { - array_erase(&self->states, j); - j--; - continue; - } - - // Enforce the longest-match criteria. When a query pattern contains optional or - // repeated nodes, this is necessary to avoid multiple redundant states, where - // one state has a strict subset of another state's captures. - bool did_remove = false; - for (unsigned k = j + 1; k < self->states.size; k++) { - QueryState *other_state = &self->states.contents[k]; - - // Query states are kept in ascending order of start_depth and pattern_index. - // Since the longest-match criteria is only used for deduping matches of the same - // pattern and root node, we only need to perform pairwise comparisons within a - // small slice of the states array. - if ( - other_state->start_depth != state->start_depth || - other_state->pattern_index != state->pattern_index - ) break; - - bool left_contains_right, right_contains_left; - ts_query_cursor__compare_captures( - self, - state, - other_state, - &left_contains_right, - &right_contains_left - ); - if (left_contains_right) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); - array_erase(&self->states, k); - k--; - continue; - } - other_state->has_in_progress_alternatives = true; - } - if (right_contains_left) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->states, j); - j--; - did_remove = true; - break; - } - state->has_in_progress_alternatives = true; - } - } - - // If the state is at the end of its pattern, remove it from the list - // of in-progress states and add it to the list of finished states. - if (!did_remove) { - LOG( - " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", - state->pattern_index, - state->start_depth, - state->step_index, - capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size - ); - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (next_step->depth == PATTERN_DONE_MARKER) { - if (state->has_in_progress_alternatives) { - LOG(" defer finishing pattern %u\n", state->pattern_index); - } else { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - array_erase(&self->states, (uint32_t)(state - self->states.contents)); - did_match = true; - j--; - } - } - } - } - } - - if (ts_query_cursor__should_descend(self, node_intersects_range)) { - switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { - case TreeCursorStepVisible: - self->depth++; - self->on_visible_node = true; - continue; - case TreeCursorStepHidden: - self->on_visible_node = false; - continue; - default: - break; - } - } - - self->ascending = true; - } - } -} - -bool ts_query_cursor_next_match( - t_query_cursor *self, - t_query_match *match -) { - if (self->finished_states.size == 0) { - if (!ts_query_cursor__advance(self, false)) { - return false; - } - } - - QueryState *state = &self->finished_states.contents[0]; - if (state->id == UINT32_MAX) state->id = self->next_state_id++; - match->id = state->id; - match->pattern_index = state->pattern_index; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - match->captures = captures->contents; - match->capture_count = captures->size; - capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->finished_states, 0); - return true; -} - -void ts_query_cursor_remove_match( - t_query_cursor *self, - uint32_t match_id -) { - for (unsigned i = 0; i < self->finished_states.size; i++) { - const QueryState *state = &self->finished_states.contents[i]; - if (state->id == match_id) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->finished_states, i); - return; - } - } - - // Remove unfinished query states as well to prevent future - // captures for a match being removed. - for (unsigned i = 0; i < self->states.size; i++) { - const QueryState *state = &self->states.contents[i]; - if (state->id == match_id) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->states, i); - return; - } - } -} - -bool ts_query_cursor_next_capture( - t_query_cursor *self, - t_query_match *match, - uint32_t *capture_index -) { - // The goal here is to return captures in order, even though they may not - // be discovered in order, because patterns can overlap. Search for matches - // until there is a finished capture that is before any unfinished capture. - for (;;) { - // First, find the earliest capture in an unfinished match. - uint32_t first_unfinished_capture_byte; - uint32_t first_unfinished_pattern_index; - uint32_t first_unfinished_state_index; - bool first_unfinished_state_is_definite = false; - ts_query_cursor__first_in_progress_capture( - self, - &first_unfinished_state_index, - &first_unfinished_capture_byte, - &first_unfinished_pattern_index, - &first_unfinished_state_is_definite - ); - - // Then find the earliest capture in a finished match. It must occur - // before the first capture in an *unfinished* match. - QueryState *first_finished_state = NULL; - uint32_t first_finished_capture_byte = first_unfinished_capture_byte; - uint32_t first_finished_pattern_index = first_unfinished_pattern_index; - for (unsigned i = 0; i < self->finished_states.size;) { - QueryState *state = &self->finished_states.contents[i]; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - - // Remove states whose captures are all consumed. - if (state->consumed_capture_count >= captures->size) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->finished_states, i); - continue; - } - - t_parse_node node = captures->contents[state->consumed_capture_count].node; - - bool node_precedes_range = ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ); - bool node_follows_range = ( - ts_node_start_byte(node) >= self->end_byte || - point_gte(ts_node_start_point(node), self->end_point) - ); - bool node_outside_of_range = node_precedes_range || node_follows_range; - - // Skip captures that are outside of the cursor's range. - if (node_outside_of_range) { - state->consumed_capture_count++; - continue; - } - - uint32_t node_start_byte = ts_node_start_byte(node); - if ( - node_start_byte < first_finished_capture_byte || - ( - node_start_byte == first_finished_capture_byte && - state->pattern_index < first_finished_pattern_index - ) - ) { - first_finished_state = state; - first_finished_capture_byte = node_start_byte; - first_finished_pattern_index = state->pattern_index; - } - i++; - } - - // If there is finished capture that is clearly before any unfinished - // capture, then return its match, and its capture index. Internally - // record the fact that the capture has been 'consumed'. - QueryState *state; - if (first_finished_state) { - state = first_finished_state; - } else if (first_unfinished_state_is_definite) { - state = &self->states.contents[first_unfinished_state_index]; - } else { - state = NULL; - } - - if (state) { - if (state->id == UINT32_MAX) state->id = self->next_state_id++; - match->id = state->id; - match->pattern_index = state->pattern_index; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - match->captures = captures->contents; - match->capture_count = captures->size; - *capture_index = state->consumed_capture_count; - state->consumed_capture_count++; - return true; - } - - if (capture_list_pool_is_empty(&self->capture_list_pool)) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - first_unfinished_state_index, - first_unfinished_pattern_index, - first_unfinished_capture_byte - ); - capture_list_pool_release( - &self->capture_list_pool, - self->states.contents[first_unfinished_state_index].capture_list_id - ); - array_erase(&self->states, first_unfinished_state_index); - } - - // If there are no finished matches that are ready to be returned, then - // continue finding more matches. - if ( - !ts_query_cursor__advance(self, true) && - self->finished_states.size == 0 - ) return false; - } -} - -void ts_query_cursor_set_max_start_depth( - t_query_cursor *self, - uint32_t max_start_depth -) { - self->max_start_depth = max_start_depth; -} - -#undef LOG -#include "src/array.h" -#include "src/parser.h" - -#include -#include -#include -#include - -enum TokenType { - HEREDOC_START, - SIMPLE_HEREDOC_BODY, - HEREDOC_BODY_BEGINNING, - HEREDOC_CONTENT, - HEREDOC_END, - FILE_DESCRIPTOR, - EMPTY_VALUE, - CONCAT, - VARIABLE_NAME, - TEST_OPERATOR, - REGEX, - REGEX_NO_SLASH, - REGEX_NO_SPACE, - EXPANSION_WORD, - EXTGLOB_PATTERN, - BARE_DOLLAR, - BRACE_START, - IMMEDIATE_DOUBLE_HASH, - EXTERNAL_EXPANSION_SYM_HASH, - EXTERNAL_EXPANSION_SYM_BANG, - EXTERNAL_EXPANSION_SYM_EQUAL, - CLOSING_BRACE, - CLOSING_BRACKET, - HEREDOC_ARROW, - HEREDOC_ARROW_DASH, - NEWLINE, - OPENING_PAREN, - ESAC, - ERROR_RECOVERY, -}; - -typedef Array(char) String; - -typedef struct { - bool is_raw; - bool started; - bool allows_indent; - String delimiter; - String current_leading_word; -} Heredoc; - -#define heredoc_new() \ - { \ - .is_raw = false, \ - .started = false, \ - .allows_indent = false, \ - .delimiter = array_new(), \ - .current_leading_word = array_new(), \ - }; - -typedef struct { - uint8_t last_glob_paren_depth; - bool ext_was_in_double_quote; - bool ext_saw_outside_quote; - Array(Heredoc) heredocs; -} Scanner; - -static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } - -static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } - -static inline bool in_error_recovery(const bool *valid_symbols) { return valid_symbols[ERROR_RECOVERY]; } - -static inline void reset_string(String *string) { - if (string->size > 0) { - memset(string->contents, 0, string->size); - array_clear(string); - } -} - -static inline void reset_heredoc(Heredoc *heredoc) { - heredoc->is_raw = false; - heredoc->started = false; - heredoc->allows_indent = false; - reset_string(&heredoc->delimiter); -} - -static inline void reset(Scanner *scanner) { - for (uint32_t i = 0; i < scanner->heredocs.size; i++) { - reset_heredoc(array_get(&scanner->heredocs, i)); - } -} - -static unsigned serialize(Scanner *scanner, char *buffer) { - uint32_t size = 0; - - buffer[size++] = (char)scanner->last_glob_paren_depth; - buffer[size++] = (char)scanner->ext_was_in_double_quote; - buffer[size++] = (char)scanner->ext_saw_outside_quote; - buffer[size++] = (char)scanner->heredocs.size; - - for (uint32_t i = 0; i < scanner->heredocs.size; i++) { - Heredoc *heredoc = array_get(&scanner->heredocs, i); - if (heredoc->delimiter.size + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { - return 0; - } - - buffer[size++] = (char)heredoc->is_raw; - buffer[size++] = (char)heredoc->started; - buffer[size++] = (char)heredoc->allows_indent; - - memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(uint32_t)); - size += sizeof(uint32_t); - if (heredoc->delimiter.size > 0) { - memcpy(&buffer[size], heredoc->delimiter.contents, heredoc->delimiter.size); - size += heredoc->delimiter.size; - } - } - return size; -} - -static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { - if (length == 0) { - reset(scanner); - } else { - uint32_t size = 0; - scanner->last_glob_paren_depth = buffer[size++]; - scanner->ext_was_in_double_quote = buffer[size++]; - scanner->ext_saw_outside_quote = buffer[size++]; - uint32_t heredoc_count = (unsigned char)buffer[size++]; - for (uint32_t i = 0; i < heredoc_count; i++) { - Heredoc *heredoc = NULL; - if (i < scanner->heredocs.size) { - heredoc = array_get(&scanner->heredocs, i); - } else { - Heredoc new_heredoc = heredoc_new(); - array_push(&scanner->heredocs, new_heredoc); - heredoc = array_back(&scanner->heredocs); - } - - heredoc->is_raw = buffer[size++]; - heredoc->started = buffer[size++]; - heredoc->allows_indent = buffer[size++]; - - memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t)); - size += sizeof(uint32_t); - array_reserve(&heredoc->delimiter, heredoc->delimiter.size); - - if (heredoc->delimiter.size > 0) { - memcpy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size); - size += heredoc->delimiter.size; - } - } - assert(size == length); - } -} - -/** - * Consume a "word" in POSIX parlance, and returns it unquoted. - * - * This is an approximate implementation that doesn't deal with any - * POSIX-mandated substitution, and assumes the default value for - * IFS. - */ -static bool advance_word(TSLexer *lexer, String *unquoted_word) { - bool empty = true; - - int32_t quote = 0; - if (lexer->lookahead == '\'' || lexer->lookahead == '"') { - quote = lexer->lookahead; - advance(lexer); - } - - while (lexer->lookahead && - !(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n' - : iswspace(lexer->lookahead))) { - if (lexer->lookahead == '\\') { - advance(lexer); - if (!lexer->lookahead) { - return false; - } - } - empty = false; - array_push(unquoted_word, lexer->lookahead); - advance(lexer); - } - array_push(unquoted_word, '\0'); - - if (quote && lexer->lookahead == quote) { - advance(lexer); - } - - return !empty; -} - -static inline bool scan_bare_dollar(TSLexer *lexer) { - while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer)) { - skip(lexer); - } - - if (lexer->lookahead == '$') { - advance(lexer); - lexer->result_symbol = BARE_DOLLAR; - lexer->mark_end(lexer); - return iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"'; - } - - return false; -} - -static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer) { - while (iswspace(lexer->lookahead)) { - skip(lexer); - } - - lexer->result_symbol = HEREDOC_START; - heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\'; - - bool found_delimiter = advance_word(lexer, &heredoc->delimiter); - if (!found_delimiter) { - reset_string(&heredoc->delimiter); - return false; - } - return found_delimiter; -} - -static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer) { - reset_string(&heredoc->current_leading_word); - // Scan the first 'n' characters on this line, to see if they match the - // heredoc delimiter - int32_t size = 0; - if (heredoc->delimiter.size > 0) { - while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && - (int32_t)*array_get(&heredoc->delimiter, size) == lexer->lookahead && - heredoc->current_leading_word.size < heredoc->delimiter.size) { - array_push(&heredoc->current_leading_word, lexer->lookahead); - advance(lexer); - size++; - } - } - array_push(&heredoc->current_leading_word, '\0'); - return heredoc->delimiter.size == 0 - ? false - : strcmp(heredoc->current_leading_word.contents, heredoc->delimiter.contents) == 0; -} - -static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, enum TokenType middle_type, - enum TokenType end_type) { - bool did_advance = false; - Heredoc *heredoc = array_back(&scanner->heredocs); - - for (;;) { - switch (lexer->lookahead) { - case '\0': { - if (lexer->eof(lexer) && did_advance) { - reset_heredoc(heredoc); - lexer->result_symbol = end_type; - return true; - } - return false; - } - - case '\\': { - did_advance = true; - advance(lexer); - advance(lexer); - break; - } - - case '$': { - if (heredoc->is_raw) { - did_advance = true; - advance(lexer); - break; - } - if (did_advance) { - lexer->mark_end(lexer); - lexer->result_symbol = middle_type; - heredoc->started = true; - advance(lexer); - if (iswalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(') { - return true; - } - break; - } - if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0) { - lexer->result_symbol = middle_type; - heredoc->started = true; - return true; - } - return false; - } - - case '\n': { - if (!did_advance) { - skip(lexer); - } else { - advance(lexer); - } - did_advance = true; - if (heredoc->allows_indent) { - while (iswspace(lexer->lookahead)) { - advance(lexer); - } - } - lexer->result_symbol = heredoc->started ? middle_type : end_type; - lexer->mark_end(lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) { - if (lexer->result_symbol == HEREDOC_END) { - array_pop(&scanner->heredocs); - } - return true; - } - break; - } - - default: { - if (lexer->get_column(lexer) == 0) { - // an alternative is to check the starting column of the - // heredoc body and track that statefully - while (iswspace(lexer->lookahead)) { - if (did_advance) { - advance(lexer); - } else { - skip(lexer); - } - } - if (end_type != SIMPLE_HEREDOC_BODY) { - lexer->result_symbol = middle_type; - if (scan_heredoc_end_identifier(heredoc, lexer)) { - return true; - } - } - if (end_type == SIMPLE_HEREDOC_BODY) { - lexer->result_symbol = end_type; - lexer->mark_end(lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) { - return true; - } - } - } - did_advance = true; - advance(lexer); - break; - } - } - } -} - -static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { - if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) { - if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) || lexer->lookahead == '>' || - lexer->lookahead == '<' || lexer->lookahead == ')' || lexer->lookahead == '(' || - lexer->lookahead == ';' || lexer->lookahead == '&' || lexer->lookahead == '|' || - (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) || - (lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET]))) { - lexer->result_symbol = CONCAT; - // So for a`b`, we want to return a concat. We check if the - // 2nd backtick has whitespace after it, and if it does we - // return concat. - if (lexer->lookahead == '`') { - lexer->mark_end(lexer); - advance(lexer); - while (lexer->lookahead != '`' && !lexer->eof(lexer)) { - advance(lexer); - } - if (lexer->eof(lexer)) { - return false; - } - if (lexer->lookahead == '`') { - advance(lexer); - } - return iswspace(lexer->lookahead) || lexer->eof(lexer); - } - // strings w/ expansions that contains escaped quotes or - // backslashes need this to return a concat - if (lexer->lookahead == '\\') { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\') { - return true; - } - if (lexer->eof(lexer)) { - return false; - } - } else { - return true; - } - } - if (iswspace(lexer->lookahead) && valid_symbols[CLOSING_BRACE] && !valid_symbols[EXPANSION_WORD]) { - lexer->result_symbol = CONCAT; - return true; - } - } - - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols)) { - // advance two # and ensure not } after - if (lexer->lookahead == '#') { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '#') { - advance(lexer); - if (lexer->lookahead != '}') { - lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; - lexer->mark_end(lexer); - return true; - } - } - } - } - - if (valid_symbols[EXTERNAL_EXPANSION_SYM_HASH] && !in_error_recovery(valid_symbols)) { - if (lexer->lookahead == '#' || lexer->lookahead == '=' || lexer->lookahead == '!') { - lexer->result_symbol = lexer->lookahead == '#' ? EXTERNAL_EXPANSION_SYM_HASH - : lexer->lookahead == '!' ? EXTERNAL_EXPANSION_SYM_BANG - : EXTERNAL_EXPANSION_SYM_EQUAL; - advance(lexer); - lexer->mark_end(lexer); - while (lexer->lookahead == '#' || lexer->lookahead == '=' || lexer->lookahead == '!') { - advance(lexer); - } - while (iswspace(lexer->lookahead)) { - skip(lexer); - } - if (lexer->lookahead == '}') { - return true; - } - return false; - } - } - - if (valid_symbols[EMPTY_VALUE]) { - if (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&') { - lexer->result_symbol = EMPTY_VALUE; - return true; - } - } - - if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 && - !array_back(&scanner->heredocs)->started && !in_error_recovery(valid_symbols)) { - return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY); - } - - if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0) { - Heredoc *heredoc = array_back(&scanner->heredocs); - if (scan_heredoc_end_identifier(heredoc, lexer)) { - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); - array_pop(&scanner->heredocs); - lexer->result_symbol = HEREDOC_END; - return true; - } - } - - if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started && - !in_error_recovery(valid_symbols)) { - return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END); - } - - if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && scanner->heredocs.size > 0) { - return scan_heredoc_start(array_back(&scanner->heredocs), lexer); - } - - if (valid_symbols[TEST_OPERATOR] && !valid_symbols[EXPANSION_WORD]) { - while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') { - skip(lexer); - } - - if (lexer->lookahead == '\\') { - if (valid_symbols[EXTGLOB_PATTERN]) { - goto extglob_pattern; - } - if (valid_symbols[REGEX_NO_SPACE]) { - goto regex; - } - skip(lexer); - - if (lexer->eof(lexer)) { - return false; - } - - if (lexer->lookahead == '\r') { - skip(lexer); - if (lexer->lookahead == '\n') { - skip(lexer); - } - } else if (lexer->lookahead == '\n') { - skip(lexer); - } else { - return false; - } - - while (iswspace(lexer->lookahead)) { - skip(lexer); - } - } - - if (lexer->lookahead == '\n' && !valid_symbols[NEWLINE]) { - skip(lexer); - - while (iswspace(lexer->lookahead)) { - skip(lexer); - } - } - - if (lexer->lookahead == '-') { - advance(lexer); - - bool advanced_once = false; - while (iswalpha(lexer->lookahead)) { - advanced_once = true; - advance(lexer); - } - - if (iswspace(lexer->lookahead) && advanced_once) { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) { - if (valid_symbols[EXPANSION_WORD]) { - lexer->mark_end(lexer); - lexer->result_symbol = EXPANSION_WORD; - return true; - } - return false; - } - lexer->result_symbol = TEST_OPERATOR; - return true; - } - if (iswspace(lexer->lookahead) && valid_symbols[EXTGLOB_PATTERN]) { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) { - return true; - } - } - - if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && - !valid_symbols[REGEX_NO_SLASH] && !in_error_recovery(valid_symbols)) { - for (;;) { - if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || - (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && - !valid_symbols[EXPANSION_WORD]) { - skip(lexer); - } else if (lexer->lookahead == '\\') { - skip(lexer); - - if (lexer->eof(lexer)) { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return true; - } - - if (lexer->lookahead == '\r') { - skip(lexer); - } - if (lexer->lookahead == '\n') { - skip(lexer); - } else { - if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD]) { - goto expansion_word; - } - return false; - } - } else { - break; - } - } - - // no '*', '@', '?', '-', '$', '0', '_' - if (!valid_symbols[EXPANSION_WORD] && - (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || lexer->lookahead == '-' || - lexer->lookahead == '0' || lexer->lookahead == '_')) { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || - lexer->lookahead == '-' || lexer->lookahead == '%' || lexer->lookahead == '#' || - lexer->lookahead == '/') { - return false; - } - if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead)) { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') { - advance(lexer); - if (lexer->lookahead == '<') { - advance(lexer); - if (lexer->lookahead == '-') { - advance(lexer); - Heredoc heredoc = heredoc_new(); - heredoc.allows_indent = true; - array_push(&scanner->heredocs, heredoc); - lexer->result_symbol = HEREDOC_ARROW_DASH; - } else if (lexer->lookahead == '<' || lexer->lookahead == '=') { - return false; - } else { - Heredoc heredoc = heredoc_new(); - array_push(&scanner->heredocs, heredoc); - lexer->result_symbol = HEREDOC_ARROW; - } - return true; - } - return false; - } - - bool is_number = true; - if (iswdigit(lexer->lookahead)) { - advance(lexer); - } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') { - is_number = false; - advance(lexer); - } else { - if (lexer->lookahead == '{') { - goto brace_start; - } - if (valid_symbols[EXPANSION_WORD]) { - goto expansion_word; - } - if (valid_symbols[EXTGLOB_PATTERN]) { - goto extglob_pattern; - } - return false; - } - - for (;;) { - if (iswdigit(lexer->lookahead)) { - advance(lexer); - } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') { - is_number = false; - advance(lexer); - } else { - break; - } - } - - if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<')) { - lexer->result_symbol = FILE_DESCRIPTOR; - return true; - } - - if (valid_symbols[VARIABLE_NAME]) { - if (lexer->lookahead == '+') { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '=' || lexer->lookahead == ':' || valid_symbols[CLOSING_BRACE]) { - lexer->result_symbol = VARIABLE_NAME; - return true; - } - return false; - } - if (lexer->lookahead == '/') { - return false; - } - if (lexer->lookahead == '=' || lexer->lookahead == '[' || - (lexer->lookahead == ':' && !valid_symbols[CLOSING_BRACE] && - !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable - // names for function words, only handling : for now? #235 - lexer->lookahead == '%' || - (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || - (lexer->lookahead == '-' && valid_symbols[CLOSING_BRACE])) { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return true; - } - - if (lexer->lookahead == '?') { - lexer->mark_end(lexer); - advance(lexer); - lexer->result_symbol = VARIABLE_NAME; - return iswalpha(lexer->lookahead); - } - } - - return false; - } - - if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) { - return true; - } - -regex: - if ((valid_symbols[REGEX] || valid_symbols[REGEX_NO_SLASH] || valid_symbols[REGEX_NO_SPACE]) && - !in_error_recovery(valid_symbols)) { - if (valid_symbols[REGEX] || valid_symbols[REGEX_NO_SPACE]) { - while (iswspace(lexer->lookahead)) { - skip(lexer); - } - } - - if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || - ((lexer->lookahead == '$' || lexer->lookahead == '\'') && valid_symbols[REGEX_NO_SLASH]) || - (lexer->lookahead == '\'' && valid_symbols[REGEX_NO_SPACE])) { - typedef struct { - bool done; - bool advanced_once; - bool found_non_alnumdollarunderdash; - bool last_was_escape; - bool in_single_quote; - uint32_t paren_depth; - uint32_t bracket_depth; - uint32_t brace_depth; - } State; - - if (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH]) { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '(') { - return false; - } - } - - lexer->mark_end(lexer); - - State state = {false, false, false, false, false, 0, 0, 0}; - while (!state.done) { - if (state.in_single_quote) { - if (lexer->lookahead == '\'') { - state.in_single_quote = false; - advance(lexer); - lexer->mark_end(lexer); - } - } - switch (lexer->lookahead) { - case '\\': - state.last_was_escape = true; - break; - case '\0': - return false; - case '(': - state.paren_depth++; - state.last_was_escape = false; - break; - case '[': - state.bracket_depth++; - state.last_was_escape = false; - break; - case '{': - if (!state.last_was_escape) { - state.brace_depth++; - } - state.last_was_escape = false; - break; - case ')': - if (state.paren_depth == 0) { - state.done = true; - } - state.paren_depth--; - state.last_was_escape = false; - break; - case ']': - if (state.bracket_depth == 0) { - state.done = true; - } - state.bracket_depth--; - state.last_was_escape = false; - break; - case '}': - if (state.brace_depth == 0) { - state.done = true; - } - state.brace_depth--; - state.last_was_escape = false; - break; - case '\'': - // Enter or exit a single-quoted string. - state.in_single_quote = !state.in_single_quote; - advance(lexer); - state.advanced_once = true; - state.last_was_escape = false; - continue; - default: - state.last_was_escape = false; - break; - } - - if (!state.done) { - if (valid_symbols[REGEX]) { - bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); - advance(lexer); - state.advanced_once = true; - if (!was_space || state.paren_depth > 0) { - lexer->mark_end(lexer); - } - } else if (valid_symbols[REGEX_NO_SLASH]) { - if (lexer->lookahead == '/') { - lexer->mark_end(lexer); - lexer->result_symbol = REGEX_NO_SLASH; - return state.advanced_once; - } - if (lexer->lookahead == '\\') { - advance(lexer); - state.advanced_once = true; - if (!lexer->eof(lexer) && lexer->lookahead != '[' && lexer->lookahead != '/') { - advance(lexer); - lexer->mark_end(lexer); - } - } else { - bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); - advance(lexer); - state.advanced_once = true; - if (!was_space) { - lexer->mark_end(lexer); - } - } - } else if (valid_symbols[REGEX_NO_SPACE]) { - if (lexer->lookahead == '\\') { - state.found_non_alnumdollarunderdash = true; - advance(lexer); - if (!lexer->eof(lexer)) { - advance(lexer); - } - } else if (lexer->lookahead == '$') { - lexer->mark_end(lexer); - advance(lexer); - // do not parse a command - // substitution - if (lexer->lookahead == '(') { - return false; - } - // end $ always means regex, e.g. - // 99999999$ - if (iswspace(lexer->lookahead)) { - lexer->result_symbol = REGEX_NO_SPACE; - lexer->mark_end(lexer); - return true; - } - } else { - bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); - if (was_space && state.paren_depth == 0) { - lexer->mark_end(lexer); - lexer->result_symbol = REGEX_NO_SPACE; - return state.found_non_alnumdollarunderdash; - } - if (!iswalnum(lexer->lookahead) && lexer->lookahead != '$' && lexer->lookahead != '-' && - lexer->lookahead != '_') { - state.found_non_alnumdollarunderdash = true; - } - advance(lexer); - } - } - } - } - - lexer->result_symbol = valid_symbols[REGEX_NO_SLASH] ? REGEX_NO_SLASH - : valid_symbols[REGEX_NO_SPACE] ? REGEX_NO_SPACE - : REGEX; - if (valid_symbols[REGEX] && !state.advanced_once) { - return false; - } - return true; - } - } - -extglob_pattern: - if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) { - // first skip ws, then check for ? * + @ ! - while (iswspace(lexer->lookahead)) { - skip(lexer); - } - - if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' || - lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' || - lexer->lookahead == '.' || lexer->lookahead == '[' || (iswalpha(lexer->lookahead))) { - if (lexer->lookahead == '\\') { - advance(lexer); - if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && - lexer->lookahead != '\n') { - advance(lexer); - } else { - return false; - } - } - - if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) { - lexer->mark_end(lexer); - advance(lexer); - - if (iswspace(lexer->lookahead)) { - return false; - } - } - - lexer->mark_end(lexer); - bool was_non_alpha = !iswalpha(lexer->lookahead); - if (lexer->lookahead != '[') { - // no esac - if (lexer->lookahead == 'e') { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == 's') { - advance(lexer); - if (lexer->lookahead == 'a') { - advance(lexer); - if (lexer->lookahead == 'c') { - advance(lexer); - if (iswspace(lexer->lookahead)) { - return false; - } - } - } - } - } else { - advance(lexer); - } - } - - // -\w is just a word, find something else special - if (lexer->lookahead == '-') { - lexer->mark_end(lexer); - advance(lexer); - while (iswalnum(lexer->lookahead)) { - advance(lexer); - } - - if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.') { - return false; - } - lexer->mark_end(lexer); - } - - // case item -) or *) - if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) { - lexer->mark_end(lexer); - advance(lexer); - if (iswspace(lexer->lookahead)) { - lexer->result_symbol = EXTGLOB_PATTERN; - return was_non_alpha; - } - } - - if (iswspace(lexer->lookahead)) { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return true; - } - - if (lexer->lookahead == '$') { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(') { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (lexer->lookahead == '|') { - lexer->mark_end(lexer); - advance(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - - if (!iswalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && - lexer->lookahead != '[' && lexer->lookahead != '?' && lexer->lookahead != '/' && - lexer->lookahead != '\\' && lexer->lookahead != '_' && lexer->lookahead != '*') { - return false; - } - - typedef struct { - bool done; - bool saw_non_alphadot; - uint32_t paren_depth; - uint32_t bracket_depth; - uint32_t brace_depth; - } State; - - State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0}; - while (!state.done) { - switch (lexer->lookahead) { - case '\0': - return false; - case '(': - state.paren_depth++; - break; - case '[': - state.bracket_depth++; - break; - case '{': - state.brace_depth++; - break; - case ')': - if (state.paren_depth == 0) { - state.done = true; - } - state.paren_depth--; - break; - case ']': - if (state.bracket_depth == 0) { - state.done = true; - } - state.bracket_depth--; - break; - case '}': - if (state.brace_depth == 0) { - state.done = true; - } - state.brace_depth--; - break; - } - - if (lexer->lookahead == '|') { - lexer->mark_end(lexer); - advance(lexer); - if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0) { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (!state.done) { - bool was_space = iswspace(lexer->lookahead); - if (lexer->lookahead == '$') { - lexer->mark_end(lexer); - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { - state.saw_non_alphadot = true; - } - advance(lexer); - if (lexer->lookahead == '(' || lexer->lookahead == '{') { - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = state.paren_depth; - return state.saw_non_alphadot; - } - } - if (was_space) { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - if (lexer->lookahead == '"') { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - if (lexer->lookahead == '\\') { - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { - state.saw_non_alphadot = true; - } - advance(lexer); - if (iswspace(lexer->lookahead) || lexer->lookahead == '"') { - advance(lexer); - } - } else { - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') { - state.saw_non_alphadot = true; - } - advance(lexer); - } - if (!was_space) { - lexer->mark_end(lexer); - } - } - } - - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - scanner->last_glob_paren_depth = 0; - - return false; - } - -expansion_word: - if (valid_symbols[EXPANSION_WORD]) { - bool advanced_once = false; - bool advance_once_space = false; - for (;;) { - if (lexer->lookahead == '\"') { - return false; - } - if (lexer->lookahead == '$') { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || - iswalnum(lexer->lookahead)) { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - - if (lexer->lookahead == '}') { - lexer->mark_end(lexer); - lexer->result_symbol = EXPANSION_WORD; - return advanced_once || advance_once_space; - } - - if (lexer->lookahead == '(' && !(advanced_once || advance_once_space)) { - lexer->mark_end(lexer); - advance(lexer); - while (lexer->lookahead != ')' && !lexer->eof(lexer)) { - // if we find a $( or ${ assume this is valid and is - // a garbage concatenation of some weird word + an - // expansion - // I wonder where this can fail - if (lexer->lookahead == '$') { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || - iswalnum(lexer->lookahead)) { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } else { - advanced_once = advanced_once || !iswspace(lexer->lookahead); - advance_once_space = advance_once_space || iswspace(lexer->lookahead); - advance(lexer); - } - } - lexer->mark_end(lexer); - if (lexer->lookahead == ')') { - advanced_once = true; - advance(lexer); - lexer->mark_end(lexer); - if (lexer->lookahead == '}') { - return false; - } - } else { - return false; - } - } - - if (lexer->lookahead == '\'') { - return false; - } - - if (lexer->eof(lexer)) { - return false; - } - advanced_once = advanced_once || !iswspace(lexer->lookahead); - advance_once_space = advance_once_space || iswspace(lexer->lookahead); - advance(lexer); - } - } - -brace_start: - if (valid_symbols[BRACE_START] && !in_error_recovery(valid_symbols)) { - while (iswspace(lexer->lookahead)) { - skip(lexer); - } - - if (lexer->lookahead != '{') { - return false; - } - - advance(lexer); - lexer->mark_end(lexer); - - while (isdigit(lexer->lookahead)) { - advance(lexer); - } - - if (lexer->lookahead != '.') { - return false; - } - advance(lexer); - - if (lexer->lookahead != '.') { - return false; - } - advance(lexer); - - while (isdigit(lexer->lookahead)) { - advance(lexer); - } - - if (lexer->lookahead != '}') { - return false; - } - - lexer->result_symbol = BRACE_START; - return true; - } - - return false; -} - -void *tree_sitter_bash_external_scanner_create() { - Scanner *scanner = calloc(1, sizeof(Scanner)); - array_init(&scanner->heredocs); - return scanner; -} - -bool tree_sitter_bash_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { - Scanner *scanner = (Scanner *)payload; - return scan(scanner, lexer, valid_symbols); -} - -unsigned tree_sitter_bash_external_scanner_serialize(void *payload, char *state) { - Scanner *scanner = (Scanner *)payload; - return serialize(scanner, state); -} - -void tree_sitter_bash_external_scanner_deserialize(void *payload, const char *state, unsigned length) { - Scanner *scanner = (Scanner *)payload; - deserialize(scanner, state, length); -} - -void tree_sitter_bash_external_scanner_destroy(void *payload) { - Scanner *scanner = (Scanner *)payload; - for (size_t i = 0; i < scanner->heredocs.size; i++) { - Heredoc *heredoc = array_get(&scanner->heredocs, i); - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); - } - array_delete(&scanner->heredocs); - free(scanner); -} -#include "src/alloc.h" -#include "src/language.h" -#include "src/subtree.h" -#include "src/array.h" -#include "src/stack.h" -#include "src/length.h" -#include -#include -#include - -#define MAX_LINK_COUNT 8 -#define MAX_NODE_POOL_SIZE 50 -#define MAX_ITERATOR_COUNT 64 - -#if defined _WIN32 && !defined __GNUC__ -#define forceinline __forceinline -#else -#define forceinline static inline __attribute__((always_inline)) -#endif - -typedef struct StackNode StackNode; - -typedef struct { - StackNode *node; - Subtree subtree; - bool is_pending; -} StackLink; - -struct StackNode { - t_state_id state; - Length position; - StackLink links[MAX_LINK_COUNT]; - short unsigned int link_count; - uint32_t ref_count; - unsigned error_cost; - unsigned node_count; - int dynamic_precedence; -}; - -typedef struct { - StackNode *node; - SubtreeArray subtrees; - uint32_t subtree_count; - bool is_pending; -} StackIterator; - -typedef Array(StackNode *) StackNodeArray; - -typedef enum { - StackStatusActive, - StackStatusPaused, - StackStatusHalted, -} StackStatus; - -typedef struct { - StackNode *node; - StackSummary *summary; - unsigned node_count_at_last_error; - Subtree last_external_token; - Subtree lookahead_when_paused; - StackStatus status; -} StackHead; - -struct Stack { - Array(StackHead) heads; - StackSliceArray slices; - Array(StackIterator) iterators; - StackNodeArray node_pool; - StackNode *base_node; - SubtreePool *subtree_pool; -}; - -typedef unsigned StackAction; -enum { - StackActionNone, - StackActionStop = 1, - StackActionPop = 2, -}; - -typedef StackAction (*StackCallback)(void *, const StackIterator *); - -static void stack_node_retain(StackNode *self) { - if (!self) - return; - assert(self->ref_count > 0); - self->ref_count++; - assert(self->ref_count != 0); -} - -static void stack_node_release( - StackNode *self, - StackNodeArray *pool, - SubtreePool *subtree_pool -) { -recur: - assert(self->ref_count != 0); - self->ref_count--; - if (self->ref_count > 0) return; - - StackNode *first_predecessor = NULL; - if (self->link_count > 0) { - for (unsigned i = self->link_count - 1; i > 0; i--) { - StackLink link = self->links[i]; - if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); - stack_node_release(link.node, pool, subtree_pool); - } - StackLink link = self->links[0]; - if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); - first_predecessor = self->links[0].node; - } - - if (pool->size < MAX_NODE_POOL_SIZE) { - array_push(pool, self); - } else { - ts_free(self); - } - - if (first_predecessor) { - self = first_predecessor; - goto recur; - } -} - -/// Get the number of nodes in the subtree, for the purpose of measuring -/// how much progress has been made by a given version of the stack. -static uint32_t stack__subtree_node_count(Subtree subtree) { - uint32_t count = ts_subtree_visible_descendant_count(subtree); - if (ts_subtree_visible(subtree)) count++; - - // Count intermediate error nodes even though they are not visible, - // because a stack version's node count is used to check whether it - // has made any progress since the last time it encountered an error. - if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++; - - return count; -} - -static StackNode *stack_node_new( - StackNode *previous_node, - Subtree subtree, - bool is_pending, - t_state_id state, - StackNodeArray *pool -) { - StackNode *node = pool->size > 0 - ? array_pop(pool) - : ts_malloc(sizeof(StackNode)); - *node = (StackNode) { - .ref_count = 1, - .link_count = 0, - .state = state - }; - - if (previous_node) { - node->link_count = 1; - node->links[0] = (StackLink) { - .node = previous_node, - .subtree = subtree, - .is_pending = is_pending, - }; - - node->position = previous_node->position; - node->error_cost = previous_node->error_cost; - node->dynamic_precedence = previous_node->dynamic_precedence; - node->node_count = previous_node->node_count; - - if (subtree.ptr) { - node->error_cost += ts_subtree_error_cost(subtree); - node->position = length_add(node->position, ts_subtree_total_size(subtree)); - node->node_count += stack__subtree_node_count(subtree); - node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); - } - } else { - node->position = length_zero(); - node->error_cost = 0; - } - - return node; -} - -static bool stack__subtree_is_equivalent(Subtree left, Subtree right) { - if (left.ptr == right.ptr) return true; - if (!left.ptr || !right.ptr) return false; - - // Symbols must match - if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false; - - // If both have errors, don't bother keeping both. - if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true; - - return ( - ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && - ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && - ts_subtree_child_count(left) == ts_subtree_child_count(right) && - ts_subtree_extra(left) == ts_subtree_extra(right) && - ts_subtree_external_scanner_state_eq(left, right) - ); -} - -static void stack_node_add_link( - StackNode *self, - StackLink link, - SubtreePool *subtree_pool -) { - if (link.node == self) return; - - for (int i = 0; i < self->link_count; i++) { - StackLink *existing_link = &self->links[i]; - if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) { - // In general, we preserve ambiguities until they are removed from the stack - // during a pop operation where multiple paths lead to the same node. But in - // the special case where two links directly connect the same pair of nodes, - // we can safely remove the ambiguity ahead of time without changing behavior. - if (existing_link->node == link.node) { - if ( - ts_subtree_dynamic_precedence(link.subtree) > - ts_subtree_dynamic_precedence(existing_link->subtree) - ) { - ts_subtree_retain(link.subtree); - ts_subtree_release(subtree_pool, existing_link->subtree); - existing_link->subtree = link.subtree; - self->dynamic_precedence = - link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree); - } - return; - } - - // If the previous nodes are mergeable, merge them recursively. - if ( - existing_link->node->state == link.node->state && - existing_link->node->position.bytes == link.node->position.bytes && - existing_link->node->error_cost == link.node->error_cost - ) { - for (int j = 0; j < link.node->link_count; j++) { - stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); - } - int32_t dynamic_precedence = link.node->dynamic_precedence; - if (link.subtree.ptr) { - dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); - } - if (dynamic_precedence > self->dynamic_precedence) { - self->dynamic_precedence = dynamic_precedence; - } - return; - } - } - } - - if (self->link_count == MAX_LINK_COUNT) return; - - stack_node_retain(link.node); - unsigned node_count = link.node->node_count; - int dynamic_precedence = link.node->dynamic_precedence; - self->links[self->link_count++] = link; - - if (link.subtree.ptr) { - ts_subtree_retain(link.subtree); - node_count += stack__subtree_node_count(link.subtree); - dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); - } - - if (node_count > self->node_count) self->node_count = node_count; - if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence; -} - -static void stack_head_delete( - StackHead *self, - StackNodeArray *pool, - SubtreePool *subtree_pool -) { - if (self->node) { - if (self->last_external_token.ptr) { - ts_subtree_release(subtree_pool, self->last_external_token); - } - if (self->lookahead_when_paused.ptr) { - ts_subtree_release(subtree_pool, self->lookahead_when_paused); - } - if (self->summary) { - array_delete(self->summary); - ts_free(self->summary); - } - stack_node_release(self->node, pool, subtree_pool); - } -} - -static StackVersion ts_stack__add_version( - Stack *self, - StackVersion original_version, - StackNode *node -) { - StackHead head = { - .node = node, - .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, - .last_external_token = self->heads.contents[original_version].last_external_token, - .status = StackStatusActive, - .lookahead_when_paused = NULL_SUBTREE, - }; - array_push(&self->heads, head); - stack_node_retain(node); - if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token); - return (StackVersion)(self->heads.size - 1); -} - -static void ts_stack__add_slice( - Stack *self, - StackVersion original_version, - StackNode *node, - SubtreeArray *subtrees -) { - for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { - StackVersion version = self->slices.contents[i].version; - if (self->heads.contents[version].node == node) { - StackSlice slice = {*subtrees, version}; - array_insert(&self->slices, i + 1, slice); - return; - } - } - - StackVersion version = ts_stack__add_version(self, original_version, node); - StackSlice slice = { *subtrees, version }; - array_push(&self->slices, slice); -} - -static StackSliceArray stack__iter( - Stack *self, - StackVersion version, - StackCallback callback, - void *payload, - int goal_subtree_count -) { - array_clear(&self->slices); - array_clear(&self->iterators); - - StackHead *head = array_get(&self->heads, version); - StackIterator new_iterator = { - .node = head->node, - .subtrees = array_new(), - .subtree_count = 0, - .is_pending = true, - }; - - bool include_subtrees = false; - if (goal_subtree_count >= 0) { - include_subtrees = true; - array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); - } - - array_push(&self->iterators, new_iterator); - - while (self->iterators.size > 0) { - for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { - StackIterator *iterator = &self->iterators.contents[i]; - StackNode *node = iterator->node; - - StackAction action = callback(payload, iterator); - bool should_pop = action & StackActionPop; - bool should_stop = action & StackActionStop || node->link_count == 0; - - if (should_pop) { - SubtreeArray subtrees = iterator->subtrees; - if (!should_stop) { - ts_subtree_array_copy(subtrees, &subtrees); - } - ts_subtree_array_reverse(&subtrees); - ts_stack__add_slice( - self, - version, - node, - &subtrees - ); - } - - if (should_stop) { - if (!should_pop) { - ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); - } - array_erase(&self->iterators, i); - i--, size--; - continue; - } - - for (uint32_t j = 1; j <= node->link_count; j++) { - StackIterator *next_iterator; - StackLink link; - if (j == node->link_count) { - link = node->links[0]; - next_iterator = &self->iterators.contents[i]; - } else { - if (self->iterators.size >= MAX_ITERATOR_COUNT) continue; - link = node->links[j]; - StackIterator current_iterator = self->iterators.contents[i]; - array_push(&self->iterators, current_iterator); - next_iterator = array_back(&self->iterators); - ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); - } - - next_iterator->node = link.node; - if (link.subtree.ptr) { - if (include_subtrees) { - array_push(&next_iterator->subtrees, link.subtree); - ts_subtree_retain(link.subtree); - } - - if (!ts_subtree_extra(link.subtree)) { - next_iterator->subtree_count++; - if (!link.is_pending) { - next_iterator->is_pending = false; - } - } - } else { - next_iterator->subtree_count++; - next_iterator->is_pending = false; - } - } - } - } - - return self->slices; -} - -Stack *ts_stack_new(SubtreePool *subtree_pool) { - Stack *self = ts_calloc(1, sizeof(Stack)); - - array_init(&self->heads); - array_init(&self->slices); - array_init(&self->iterators); - array_init(&self->node_pool); - array_reserve(&self->heads, 4); - array_reserve(&self->slices, 4); - array_reserve(&self->iterators, 4); - array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); - - self->subtree_pool = subtree_pool; - self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); - ts_stack_clear(self); - - return self; -} - -void ts_stack_delete(Stack *self) { - if (self->slices.contents) - array_delete(&self->slices); - if (self->iterators.contents) - array_delete(&self->iterators); - stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); - for (uint32_t i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); - } - array_clear(&self->heads); - if (self->node_pool.contents) { - for (uint32_t i = 0; i < self->node_pool.size; i++) - ts_free(self->node_pool.contents[i]); - array_delete(&self->node_pool); - } - array_delete(&self->heads); - ts_free(self); -} - -uint32_t ts_stack_version_count(const Stack *self) { - return self->heads.size; -} - -t_state_id ts_stack_state(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->state; -} - -Length ts_stack_position(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->position; -} - -Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->last_external_token; -} - -void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) { - StackHead *head = array_get(&self->heads, version); - if (token.ptr) ts_subtree_retain(token); - if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token); - head->last_external_token = token; -} - -unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - unsigned result = head->node->error_cost; - if ( - head->status == StackStatusPaused || - (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) { - result += ERROR_COST_PER_RECOVERY; - } - return result; -} - -unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - if (head->node->node_count < head->node_count_at_last_error) { - head->node_count_at_last_error = head->node->node_count; - } - return head->node->node_count - head->node_count_at_last_error; -} - -void ts_stack_push( - Stack *self, - StackVersion version, - Subtree subtree, - bool pending, - t_state_id state -) { - StackHead *head = array_get(&self->heads, version); - StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); - if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count; - head->node = new_node; -} - -forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { - unsigned *goal_subtree_count = payload; - if (iterator->subtree_count == *goal_subtree_count) { - return StackActionPop | StackActionStop; - } else { - return StackActionNone; - } -} - -StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { - return stack__iter(self, version, pop_count_callback, &count, (int)count); -} - -forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { - (void)payload; - if (iterator->subtree_count >= 1) { - if (iterator->is_pending) { - return StackActionPop | StackActionStop; - } else { - return StackActionStop; - } - } else { - return StackActionNone; - } -} - -StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { - StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0); - if (pop.size > 0) { - ts_stack_renumber_version(self, pop.contents[0].version, version); - pop.contents[0].version = version; - } - return pop; -} - -forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { - if (iterator->subtrees.size > 0) { - bool *found_error = payload; - if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) { - *found_error = true; - return StackActionPop | StackActionStop; - } else { - return StackActionStop; - } - } else { - return StackActionNone; - } -} - -SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { - StackNode *node = array_get(&self->heads, version)->node; - for (unsigned i = 0; i < node->link_count; i++) { - if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) { - bool found_error = false; - StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1); - if (pop.size > 0) { - assert(pop.size == 1); - ts_stack_renumber_version(self, pop.contents[0].version, version); - return pop.contents[0].subtrees; - } - break; - } - } - return (SubtreeArray) {.size = 0}; -} - -forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { - (void)payload; - return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; -} - -StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) { - return stack__iter(self, version, pop_all_callback, NULL, 0); -} - -typedef struct { - StackSummary *summary; - unsigned max_depth; -} SummarizeStackSession; - -forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { - SummarizeStackSession *session = payload; - t_state_id state = iterator->node->state; - unsigned depth = iterator->subtree_count; - if (depth > session->max_depth) return StackActionStop; - for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { - StackSummaryEntry entry = session->summary->contents[i]; - if (entry.depth < depth) break; - if (entry.depth == depth && entry.state == state) return StackActionNone; - } - array_push(session->summary, ((StackSummaryEntry) { - .position = iterator->node->position, - .depth = depth, - .state = state, - })); - return StackActionNone; -} - -void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { - SummarizeStackSession session = { - .summary = ts_malloc(sizeof(StackSummary)), - .max_depth = max_depth - }; - array_init(session.summary); - stack__iter(self, version, summarize_stack_callback, &session, -1); - StackHead *head = &self->heads.contents[version]; - if (head->summary) { - array_delete(head->summary); - ts_free(head->summary); - } - head->summary = session.summary; -} - -StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->summary; -} - -int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->dynamic_precedence; -} - -bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) { - const StackHead *head = array_get(&self->heads, version); - const StackNode *node = head->node; - if (node->error_cost == 0) return true; - while (node) { - if (node->link_count > 0) { - Subtree subtree = node->links[0].subtree; - if (subtree.ptr) { - if (ts_subtree_total_bytes(subtree) > 0) { - return true; - } else if ( - node->node_count > head->node_count_at_last_error && - ts_subtree_error_cost(subtree) == 0 - ) { - node = node->links[0].node; - continue; - } - } - } - break; - } - return false; -} - -void ts_stack_remove_version(Stack *self, StackVersion version) { - stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); - array_erase(&self->heads, version); -} - -void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { - if (v1 == v2) return; - assert(v2 < v1); - assert((uint32_t)v1 < self->heads.size); - StackHead *source_head = &self->heads.contents[v1]; - StackHead *target_head = &self->heads.contents[v2]; - if (target_head->summary && !source_head->summary) { - source_head->summary = target_head->summary; - target_head->summary = NULL; - } - stack_head_delete(target_head, &self->node_pool, self->subtree_pool); - *target_head = *source_head; - array_erase(&self->heads, v1); -} - -void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) { - StackHead temporary_head = self->heads.contents[v1]; - self->heads.contents[v1] = self->heads.contents[v2]; - self->heads.contents[v2] = temporary_head; -} - -StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { - assert(version < self->heads.size); - array_push(&self->heads, self->heads.contents[version]); - StackHead *head = array_back(&self->heads); - stack_node_retain(head->node); - if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token); - head->summary = NULL; - return self->heads.size - 1; -} - -bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { - if (!ts_stack_can_merge(self, version1, version2)) return false; - StackHead *head1 = &self->heads.contents[version1]; - StackHead *head2 = &self->heads.contents[version2]; - for (uint32_t i = 0; i < head2->node->link_count; i++) { - stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); - } - if (head1->node->state == ERROR_STATE) { - head1->node_count_at_last_error = head1->node->node_count; - } - ts_stack_remove_version(self, version2); - return true; -} - -bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) { - StackHead *head1 = &self->heads.contents[version1]; - StackHead *head2 = &self->heads.contents[version2]; - return - head1->status == StackStatusActive && - head2->status == StackStatusActive && - head1->node->state == head2->node->state && - head1->node->position.bytes == head2->node->position.bytes && - head1->node->error_cost == head2->node->error_cost && - ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token); -} - -void ts_stack_halt(Stack *self, StackVersion version) { - array_get(&self->heads, version)->status = StackStatusHalted; -} - -void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) { - StackHead *head = array_get(&self->heads, version); - head->status = StackStatusPaused; - head->lookahead_when_paused = lookahead; - head->node_count_at_last_error = head->node->node_count; -} - -bool ts_stack_is_active(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusActive; -} - -bool ts_stack_is_halted(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusHalted; -} - -bool ts_stack_is_paused(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusPaused; -} - -Subtree ts_stack_resume(Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - assert(head->status == StackStatusPaused); - Subtree result = head->lookahead_when_paused; - head->status = StackStatusActive; - head->lookahead_when_paused = NULL_SUBTREE; - return result; -} - -void ts_stack_clear(Stack *self) { - stack_node_retain(self->base_node); - for (uint32_t i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); - } - array_clear(&self->heads); - array_push(&self->heads, ((StackHead) { - .node = self->base_node, - .status = StackStatusActive, - .last_external_token = NULL_SUBTREE, - .lookahead_when_paused = NULL_SUBTREE, - })); -} - -bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) { - array_reserve(&self->iterators, 32); - if (!f) f = stderr; - - fprintf(f, "digraph stack {\n"); - fprintf(f, "rankdir=\"RL\";\n"); - fprintf(f, "edge [arrowhead=none]\n"); - - Array(StackNode *) visited_nodes = array_new(); - - array_clear(&self->iterators); - for (uint32_t i = 0; i < self->heads.size; i++) { - StackHead *head = &self->heads.contents[i]; - if (head->status == StackStatusHalted) continue; - - fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); - fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); - - if (head->status == StackStatusPaused) { - fprintf(f, "color=red "); - } - fprintf(f, - "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u", - i, - ts_stack_node_count_since_error(self, i), - ts_stack_error_cost(self, i) - ); - - if (head->summary) { - fprintf(f, "\nsummary:"); - for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state); - } - - if (head->last_external_token.ptr) { - const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; - const char *data = ts_external_scanner_state_data(state); - fprintf(f, "\nexternal_scanner_state:"); - for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); - } - - fprintf(f, "\"]\n"); - array_push(&self->iterators, ((StackIterator) { - .node = head->node - })); - } - - bool all_iterators_done = false; - while (!all_iterators_done) { - all_iterators_done = true; - - for (uint32_t i = 0; i < self->iterators.size; i++) { - StackIterator iterator = self->iterators.contents[i]; - StackNode *node = iterator.node; - - for (uint32_t j = 0; j < visited_nodes.size; j++) { - if (visited_nodes.contents[j] == node) { - node = NULL; - break; - } - } - - if (!node) continue; - all_iterators_done = false; - - fprintf(f, "node_%p [", (void *)node); - if (node->state == ERROR_STATE) { - fprintf(f, "label=\"?\""); - } else if ( - node->link_count == 1 && - node->links[0].subtree.ptr && - ts_subtree_extra(node->links[0].subtree) - ) { - fprintf(f, "shape=point margin=0 label=\"\""); - } else { - fprintf(f, "label=\"%d\"", node->state); - } - - fprintf( - f, - " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", - node->position.extent.row + 1, - node->position.extent.column, - node->node_count, - node->error_cost, - node->dynamic_precedence - ); - - for (int j = 0; j < node->link_count; j++) { - StackLink link = node->links[j]; - fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); - if (link.is_pending) fprintf(f, "style=dashed "); - if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray "); - - if (!link.subtree.ptr) { - fprintf(f, "color=red"); - } else { - fprintf(f, "label=\""); - bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree); - if (quoted) fprintf(f, "'"); - ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree)); - if (quoted) fprintf(f, "'"); - fprintf(f, "\""); - fprintf( - f, - "labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"", - ts_subtree_error_cost(link.subtree), - ts_subtree_dynamic_precedence(link.subtree) - ); - } - - fprintf(f, "];\n"); - - StackIterator *next_iterator; - if (j == 0) { - next_iterator = &self->iterators.contents[i]; - } else { - array_push(&self->iterators, iterator); - next_iterator = array_back(&self->iterators); - } - next_iterator->node = link.node; - } - - array_push(&visited_nodes, node); - } - } - - fprintf(f, "}\n"); - - array_delete(&visited_nodes); - return true; -} - -#undef forceinline -#include -#include -#include -#include -#include -#include -#include "src/alloc.h" -#include "src/array.h" -#include "src/atomic.h" -#include "src/subtree.h" -#include "src/length.h" -#include "src/language.h" -#include "src/error_costs.h" -#include - -typedef struct { - Length start; - Length old_end; - Length new_end; -} Edit; - -#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX -#define TS_MAX_TREE_POOL_SIZE 32 - -// ExternalScannerState - -void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { - self->length = length; - if (length > sizeof(self->short_data)) { - self->long_data = ts_malloc(length); - memcpy(self->long_data, data, length); - } else { - memcpy(self->short_data, data, length); - } -} - -ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) { - ExternalScannerState result = *self; - if (self->length > sizeof(self->short_data)) { - result.long_data = ts_malloc(self->length); - memcpy(result.long_data, self->long_data, self->length); - } - return result; -} - -void ts_external_scanner_state_delete(ExternalScannerState *self) { - if (self->length > sizeof(self->short_data)) { - ts_free(self->long_data); - } -} - -const char *ts_external_scanner_state_data(const ExternalScannerState *self) { - if (self->length > sizeof(self->short_data)) { - return self->long_data; - } else { - return self->short_data; - } -} - -bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) { - return - self->length == length && - memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; -} - -// SubtreeArray - -void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { - dest->size = self.size; - dest->capacity = self.capacity; - dest->contents = self.contents; - if (self.capacity > 0) { - dest->contents = ts_calloc(self.capacity, sizeof(Subtree)); - memcpy(dest->contents, self.contents, self.size * sizeof(Subtree)); - for (uint32_t i = 0; i < self.size; i++) { - ts_subtree_retain(dest->contents[i]); - } - } -} - -void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { - for (uint32_t i = 0; i < self->size; i++) { - ts_subtree_release(pool, self->contents[i]); - } - array_clear(self); -} - -void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { - ts_subtree_array_clear(pool, self); - array_delete(self); -} - -void ts_subtree_array_remove_trailing_extras( - SubtreeArray *self, - SubtreeArray *destination -) { - array_clear(destination); - while (self->size > 0) { - Subtree last = self->contents[self->size - 1]; - if (ts_subtree_extra(last)) { - self->size--; - array_push(destination, last); - } else { - break; - } - } - ts_subtree_array_reverse(destination); -} - -void ts_subtree_array_reverse(SubtreeArray *self) { - for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) { - size_t reverse_index = self->size - 1 - i; - Subtree swap = self->contents[i]; - self->contents[i] = self->contents[reverse_index]; - self->contents[reverse_index] = swap; - } -} - -// SubtreePool - -SubtreePool ts_subtree_pool_new(uint32_t capacity) { - SubtreePool self = {array_new(), array_new()}; - array_reserve(&self.free_trees, capacity); - return self; -} - -void ts_subtree_pool_delete(SubtreePool *self) { - if (self->free_trees.contents) { - for (unsigned i = 0; i < self->free_trees.size; i++) { - ts_free(self->free_trees.contents[i].ptr); - } - array_delete(&self->free_trees); - } - if (self->tree_stack.contents) array_delete(&self->tree_stack); -} - -static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) { - if (self->free_trees.size > 0) { - return array_pop(&self->free_trees).ptr; - } else { - return ts_malloc(sizeof(SubtreeHeapData)); - } -} - -static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) { - if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) { - array_push(&self->free_trees, (MutableSubtree) {.ptr = tree}); - } else { - ts_free(tree); - } -} - -// Subtree - -static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) { - return - padding.bytes < TS_MAX_INLINE_TREE_LENGTH && - padding.extent.row < 16 && - padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && - size.extent.row == 0 && - size.extent.column < TS_MAX_INLINE_TREE_LENGTH && - lookahead_bytes < 16; -} - -Subtree ts_subtree_new_leaf( - SubtreePool *pool, t_symbol symbol, Length padding, Length size, - uint32_t lookahead_bytes, t_state_id parse_state, - bool has_external_tokens, bool depends_on_column, - bool is_keyword, const t_language *language -) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - bool extra = symbol == ts_builtin_sym_end; - - bool is_inline = ( - symbol <= UINT8_MAX && - !has_external_tokens && - ts_subtree_can_inline(padding, size, lookahead_bytes) - ); - - if (is_inline) { - return (Subtree) {{ - .parse_state = parse_state, - .symbol = symbol, - .padding_bytes = padding.bytes, - .padding_rows = padding.extent.row, - .padding_columns = padding.extent.column, - .size_bytes = size.bytes, - .lookahead_bytes = lookahead_bytes, - .visible = metadata.visible, - .named = metadata.named, - .extra = extra, - .has_changes = false, - .is_missing = false, - .is_keyword = is_keyword, - .is_inline = true, - }}; - } else { - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - *data = (SubtreeHeapData) { - .ref_count = 1, - .padding = padding, - .size = size, - .lookahead_bytes = lookahead_bytes, - .error_cost = 0, - .child_count = 0, - .symbol = symbol, - .parse_state = parse_state, - .visible = metadata.visible, - .named = metadata.named, - .extra = extra, - .fragile_left = false, - .fragile_right = false, - .has_changes = false, - .has_external_tokens = has_external_tokens, - .has_external_scanner_state_change = false, - .depends_on_column = depends_on_column, - .is_missing = false, - .is_keyword = is_keyword, - {{.first_leaf = {.symbol = 0, .parse_state = 0}}} - }; - return (Subtree) {.ptr = data}; - } -} - -void ts_subtree_set_symbol( - MutableSubtree *self, - t_symbol symbol, - const t_language *language -) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - if (self->data.is_inline) { - assert(symbol < UINT8_MAX); - self->data.symbol = symbol; - self->data.named = metadata.named; - self->data.visible = metadata.visible; - } else { - self->ptr->symbol = symbol; - self->ptr->named = metadata.named; - self->ptr->visible = metadata.visible; - } -} - -Subtree ts_subtree_new_error( - SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, - uint32_t bytes_scanned, t_state_id parse_state, const t_language *language -) { - Subtree result = ts_subtree_new_leaf( - pool, ts_builtin_sym_error, padding, size, bytes_scanned, - parse_state, false, false, false, language - ); - SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; - data->fragile_left = true; - data->fragile_right = true; - data->lookahead_char = lookahead_char; - return result; -} - -// Clone a subtree. -MutableSubtree ts_subtree_clone(Subtree self) { - size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - Subtree *new_children = ts_malloc(alloc_size); - Subtree *old_children = ts_subtree_children(self); - memcpy(new_children, old_children, alloc_size); - SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count]; - if (self.ptr->child_count > 0) { - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - ts_subtree_retain(new_children[i]); - } - } else if (self.ptr->has_external_tokens) { - result->external_scanner_state = ts_external_scanner_state_copy( - &self.ptr->external_scanner_state - ); - } - result->ref_count = 1; - return (MutableSubtree) {.ptr = result}; -} - -// Get mutable version of a subtree. -// -// This takes ownership of the subtree. If the subtree has only one owner, -// this will directly convert it into a mutable version. Otherwise, it will -// perform a copy. -MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { - if (self.data.is_inline) return (MutableSubtree) {self.data}; - if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); - MutableSubtree result = ts_subtree_clone(self); - ts_subtree_release(pool, self); - return result; -} - -static void ts_subtree__compress( - MutableSubtree self, - unsigned count, - const t_language *language, - MutableSubtreeArray *stack -) { - unsigned initial_stack_size = stack->size; - - MutableSubtree tree = self; - t_symbol symbol = tree.ptr->symbol; - for (unsigned i = 0; i < count; i++) { - if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; - - MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); - if ( - child.data.is_inline || - child.ptr->child_count < 2 || - child.ptr->ref_count > 1 || - child.ptr->symbol != symbol - ) break; - - MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); - if ( - grandchild.data.is_inline || - grandchild.ptr->child_count < 2 || - grandchild.ptr->ref_count > 1 || - grandchild.ptr->symbol != symbol - ) break; - - ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); - ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; - ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); - array_push(stack, tree); - tree = grandchild; - } - - while (stack->size > initial_stack_size) { - tree = array_pop(stack); - MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); - MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]); - ts_subtree_summarize_children(grandchild, language); - ts_subtree_summarize_children(child, language); - ts_subtree_summarize_children(tree, language); - } -} - -void ts_subtree_balance(Subtree self, SubtreePool *pool, const t_language *language) { - array_clear(&pool->tree_stack); - - if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); - } - - while (pool->tree_stack.size > 0) { - MutableSubtree tree = array_pop(&pool->tree_stack); - - if (tree.ptr->repeat_depth > 0) { - Subtree child1 = ts_subtree_children(tree)[0]; - Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; - long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); - if (repeat_delta > 0) { - unsigned n = (unsigned)repeat_delta; - for (unsigned i = n / 2; i > 0; i /= 2) { - ts_subtree__compress(tree, i, language, &pool->tree_stack); - n -= i; - } - } - } - - for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - Subtree child = ts_subtree_children(tree)[i]; - if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); - } - } - } -} - -// Assign all of the node's properties that depend on its children. -void ts_subtree_summarize_children( - MutableSubtree self, - const t_language *language -) { - assert(!self.data.is_inline); - - self.ptr->named_child_count = 0; - self.ptr->visible_child_count = 0; - self.ptr->error_cost = 0; - self.ptr->repeat_depth = 0; - self.ptr->visible_descendant_count = 0; - self.ptr->has_external_tokens = false; - self.ptr->depends_on_column = false; - self.ptr->has_external_scanner_state_change = false; - self.ptr->dynamic_precedence = 0; - - uint32_t structural_index = 0; - const t_symbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); - uint32_t lookahead_end_byte = 0; - - const Subtree *children = ts_subtree_children(self); - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = children[i]; - - if ( - self.ptr->size.extent.row == 0 && - ts_subtree_depends_on_column(child) - ) { - self.ptr->depends_on_column = true; - } - - if (ts_subtree_has_external_scanner_state_change(child)) { - self.ptr->has_external_scanner_state_change = true; - } - - if (i == 0) { - self.ptr->padding = ts_subtree_padding(child); - self.ptr->size = ts_subtree_size(child); - } else { - self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child)); - } - - uint32_t child_lookahead_end_byte = - self.ptr->padding.bytes + - self.ptr->size.bytes + - ts_subtree_lookahead_bytes(child); - if (child_lookahead_end_byte > lookahead_end_byte) { - lookahead_end_byte = child_lookahead_end_byte; - } - - if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) { - self.ptr->error_cost += ts_subtree_error_cost(child); - } - - uint32_t grandchild_count = ts_subtree_child_count(child); - if ( - self.ptr->symbol == ts_builtin_sym_error || - self.ptr->symbol == ts_builtin_sym_error_repeat - ) { - if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { - if (ts_subtree_visible(child)) { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; - } else if (grandchild_count > 0) { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; - } - } - } - - self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); - self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child); - - if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) { - self.ptr->visible_descendant_count++; - self.ptr->visible_child_count++; - if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) { - self.ptr->named_child_count++; - } - } else if (ts_subtree_visible(child)) { - self.ptr->visible_descendant_count++; - self.ptr->visible_child_count++; - if (ts_subtree_named(child)) self.ptr->named_child_count++; - } else if (grandchild_count > 0) { - self.ptr->visible_child_count += child.ptr->visible_child_count; - self.ptr->named_child_count += child.ptr->named_child_count; - } - - if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true; - - if (ts_subtree_is_error(child)) { - self.ptr->fragile_left = self.ptr->fragile_right = true; - self.ptr->parse_state = TS_TREE_STATE_NONE; - } - - if (!ts_subtree_extra(child)) structural_index++; - } - - self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; - - if ( - self.ptr->symbol == ts_builtin_sym_error || - self.ptr->symbol == ts_builtin_sym_error_repeat - ) { - self.ptr->error_cost += - ERROR_COST_PER_RECOVERY + - ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + - ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; - } - - if (self.ptr->child_count > 0) { - Subtree first_child = children[0]; - Subtree last_child = children[self.ptr->child_count - 1]; - - self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); - self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child); - - if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true; - if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true; - - if ( - self.ptr->child_count >= 2 && - !self.ptr->visible && - !self.ptr->named && - ts_subtree_symbol(first_child) == self.ptr->symbol - ) { - if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) { - self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1; - } else { - self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1; - } - } - } -} - -// Create a new parent node with the given children. -// -// This takes ownership of the children array. -MutableSubtree ts_subtree_new_node( - t_symbol symbol, - SubtreeArray *children, - unsigned production_id, - const t_language *language -) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; - - // Allocate the node's data at the end of the array of children. - size_t new_byte_size = ts_subtree_alloc_size(children->size); - if (children->capacity * sizeof(Subtree) < new_byte_size) { - children->contents = ts_realloc(children->contents, new_byte_size); - children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); - } - SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; - - *data = (SubtreeHeapData) { - .ref_count = 1, - .symbol = symbol, - .child_count = children->size, - .visible = metadata.visible, - .named = metadata.named, - .has_changes = false, - .has_external_scanner_state_change = false, - .fragile_left = fragile, - .fragile_right = fragile, - .is_keyword = false, - {{ - .visible_descendant_count = 0, - .production_id = production_id, - .first_leaf = {.symbol = 0, .parse_state = 0}, - }} - }; - MutableSubtree result = {.ptr = data}; - ts_subtree_summarize_children(result, language); - return result; -} - -// Create a new error node containing the given children. -// -// This node is treated as 'extra'. Its children are prevented from having -// having any effect on the parse state. -Subtree ts_subtree_new_error_node( - SubtreeArray *children, - bool extra, - const t_language *language -) { - MutableSubtree result = ts_subtree_new_node( - ts_builtin_sym_error, children, 0, language - ); - result.ptr->extra = extra; - return ts_subtree_from_mut(result); -} - -// Create a new 'missing leaf' node. -// -// This node is treated as 'extra'. Its children are prevented from having -// having any effect on the parse state. -Subtree ts_subtree_new_missing_leaf( - SubtreePool *pool, - t_symbol symbol, - Length padding, - uint32_t lookahead_bytes, - const t_language *language -) { - Subtree result = ts_subtree_new_leaf( - pool, symbol, padding, length_zero(), lookahead_bytes, - 0, false, false, false, language - ); - if (result.data.is_inline) { - result.data.is_missing = true; - } else { - ((SubtreeHeapData *)result.ptr)->is_missing = true; - } - return result; -} - -void ts_subtree_retain(Subtree self) { - if (self.data.is_inline) return; - assert(self.ptr->ref_count > 0); - atomic_inc((volatile uint32_t *)&self.ptr->ref_count); - assert(self.ptr->ref_count != 0); -} - -void ts_subtree_release(SubtreePool *pool, Subtree self) { - if (self.data.is_inline) return; - array_clear(&pool->tree_stack); - - assert(self.ptr->ref_count > 0); - if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); - } - - while (pool->tree_stack.size > 0) { - MutableSubtree tree = array_pop(&pool->tree_stack); - if (tree.ptr->child_count > 0) { - Subtree *children = ts_subtree_children(tree); - for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - Subtree child = children[i]; - if (child.data.is_inline) continue; - assert(child.ptr->ref_count > 0); - if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); - } - } - ts_free(children); - } else { - if (tree.ptr->has_external_tokens) { - ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); - } - ts_subtree_pool_free(pool, tree.ptr); - } - } -} - -int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left)); - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right)); - - while (pool->tree_stack.size > 0) { - right = ts_subtree_from_mut(array_pop(&pool->tree_stack)); - left = ts_subtree_from_mut(array_pop(&pool->tree_stack)); - - int result = 0; - if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) result = -1; - else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) result = 1; - else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) result = -1; - else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) result = 1; - if (result != 0) { - array_clear(&pool->tree_stack); - return result; - } - - for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) { - Subtree left_child = ts_subtree_children(left)[i - 1]; - Subtree right_child = ts_subtree_children(right)[i - 1]; - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child)); - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child)); - } - } - - return 0; -} - -static inline void ts_subtree_set_has_changes(MutableSubtree *self) { - if (self->data.is_inline) { - self->data.has_changes = true; - } else { - self->ptr->has_changes = true; - } -} - -Subtree ts_subtree_edit(Subtree self, const t_input_edit *input_edit, SubtreePool *pool) { - typedef struct { - Subtree *tree; - Edit edit; - } EditEntry; - - Array(EditEntry) stack = array_new(); - array_push(&stack, ((EditEntry) { - .tree = &self, - .edit = (Edit) { - .start = {input_edit->start_byte, input_edit->start_point}, - .old_end = {input_edit->old_end_byte, input_edit->old_end_point}, - .new_end = {input_edit->new_end_byte, input_edit->new_end_point}, - }, - })); - - while (stack.size) { - EditEntry entry = array_pop(&stack); - Edit edit = entry.edit; - bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; - bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; - bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); - - Length size = ts_subtree_size(*entry.tree); - Length padding = ts_subtree_padding(*entry.tree); - Length total_size = length_add(padding, size); - uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); - uint32_t end_byte = total_size.bytes + lookahead_bytes; - if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; - - // If the edit is entirely within the space before this subtree, then shift this - // subtree over according to the edit without changing its size. - if (edit.old_end.bytes <= padding.bytes) { - padding = length_add(edit.new_end, length_sub(padding, edit.old_end)); - } - - // If the edit starts in the space before this subtree and extends into this subtree, - // shrink the subtree's content to compensate for the change in the space before it. - else if (edit.start.bytes < padding.bytes) { - size = length_saturating_sub(size, length_sub(edit.old_end, padding)); - padding = edit.new_end; - } - - // If the edit is a pure insertion right at the start of the subtree, - // shift the subtree over according to the insertion. - else if (edit.start.bytes == padding.bytes && is_pure_insertion) { - padding = edit.new_end; - } - - // If the edit is within this subtree, resize the subtree to reflect the edit. - else if ( - edit.start.bytes < total_size.bytes || - (edit.start.bytes == total_size.bytes && is_pure_insertion) - ) { - size = length_add( - length_sub(edit.new_end, padding), - length_saturating_sub(total_size, edit.old_end) - ); - } - - MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); - - if (result.data.is_inline) { - if (ts_subtree_can_inline(padding, size, lookahead_bytes)) { - result.data.padding_bytes = padding.bytes; - result.data.padding_rows = padding.extent.row; - result.data.padding_columns = padding.extent.column; - result.data.size_bytes = size.bytes; - } else { - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - data->ref_count = 1; - data->padding = padding; - data->size = size; - data->lookahead_bytes = lookahead_bytes; - data->error_cost = 0; - data->child_count = 0; - data->symbol = result.data.symbol; - data->parse_state = result.data.parse_state; - data->visible = result.data.visible; - data->named = result.data.named; - data->extra = result.data.extra; - data->fragile_left = false; - data->fragile_right = false; - data->has_changes = false; - data->has_external_tokens = false; - data->depends_on_column = false; - data->is_missing = result.data.is_missing; - data->is_keyword = result.data.is_keyword; - result.ptr = data; - } - } else { - result.ptr->padding = padding; - result.ptr->size = size; - } - - ts_subtree_set_has_changes(&result); - *entry.tree = ts_subtree_from_mut(result); - - Length child_left, child_right = length_zero(); - for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { - Subtree *child = &ts_subtree_children(*entry.tree)[i]; - Length child_size = ts_subtree_total_size(*child); - child_left = child_right; - child_right = length_add(child_left, child_size); - - // If this child ends before the edit, it is not affected. - if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; - - // Keep editing child nodes until a node is reached that starts after the edit. - // Also, if this node's validity depends on its column position, then continue - // invaliditing child nodes until reaching a line break. - if (( - (child_left.bytes > edit.old_end.bytes) || - (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) - ) && ( - !invalidate_first_row || - child_left.extent.row > entry.tree->ptr->padding.extent.row - )) { - break; - } - - // Transform edit into the child's coordinate space. - Edit child_edit = { - .start = length_saturating_sub(edit.start, child_left), - .old_end = length_saturating_sub(edit.old_end, child_left), - .new_end = length_saturating_sub(edit.new_end, child_left), - }; - - // Interpret all inserted text as applying to the *first* child that touches the edit. - // Subsequent children are only never have any text inserted into them; they are only - // shrunk to compensate for the edit. - if ( - child_right.bytes > edit.start.bytes || - (child_right.bytes == edit.start.bytes && is_pure_insertion) - ) { - edit.new_end = edit.start; - } - - // Children that occur before the edit are not reshaped by the edit. - else { - child_edit.old_end = child_edit.start; - child_edit.new_end = child_edit.start; - } - - // Queue processing of this child's subtree. - array_push(&stack, ((EditEntry) { - .tree = child, - .edit = child_edit, - })); - } - } - - array_delete(&stack); - return self; -} - -Subtree ts_subtree_last_external_token(Subtree tree) { - if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE; - while (tree.ptr->child_count > 0) { - for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) { - Subtree child = ts_subtree_children(tree)[i]; - if (ts_subtree_has_external_tokens(child)) { - tree = child; - break; - } - } - } - return tree; -} - -static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) { - if (chr == -1) - return snprintf(str, n, "INVALID"); - else if (chr == '\0') - return snprintf(str, n, "'\\0'"); - else if (chr == '\n') - return snprintf(str, n, "'\\n'"); - else if (chr == '\t') - return snprintf(str, n, "'\\t'"); - else if (chr == '\r') - return snprintf(str, n, "'\\r'"); - else if (0 < chr && chr < 128 && isprint(chr)) - return snprintf(str, n, "'%c'", chr); - else - return snprintf(str, n, "%d", chr); -} - -static const char *const ROOT_FIELD = "__ROOT__"; - -static size_t ts_subtree__write_to_string( - Subtree self, char *string, size_t limit, - const t_language *language, bool include_all, - t_symbol alias_symbol, bool alias_is_named, const char *field_name -) { - if (!self.ptr) return snprintf(string, limit, "(NULL)"); - - char *cursor = string; - char **writer = (limit > 1) ? &cursor : &string; - bool is_root = field_name == ROOT_FIELD; - bool is_visible = - include_all || - ts_subtree_missing(self) || - ( - alias_symbol - ? alias_is_named - : ts_subtree_visible(self) && ts_subtree_named(self) - ); - - if (is_visible) { - if (!is_root) { - cursor += snprintf(*writer, limit, " "); - if (field_name) { - cursor += snprintf(*writer, limit, "%s: ", field_name); - } - } - - if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) { - cursor += snprintf(*writer, limit, "(UNEXPECTED "); - cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); - } else { - t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); - const char *symbol_name = ts_language_symbol_name(language, symbol); - if (ts_subtree_missing(self)) { - cursor += snprintf(*writer, limit, "(MISSING "); - if (alias_is_named || ts_subtree_named(self)) { - cursor += snprintf(*writer, limit, "%s", symbol_name); - } else { - cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); - } - } else { - cursor += snprintf(*writer, limit, "(%s", symbol_name); - } - } - } else if (is_root) { - t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); - const char *symbol_name = ts_language_symbol_name(language, symbol); - if (ts_subtree_child_count(self) > 0) { - cursor += snprintf(*writer, limit, "(%s", symbol_name); - } else if (ts_subtree_named(self)) { - cursor += snprintf(*writer, limit, "(%s)", symbol_name); - } else { - cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); - } - } - - if (ts_subtree_child_count(self)) { - const t_symbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - language, - self.ptr->production_id, - &field_map, - &field_map_end - ); - - uint32_t structural_child_index = 0; - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = ts_subtree_children(self)[i]; - if (ts_subtree_extra(child)) { - cursor += ts_subtree__write_to_string( - child, *writer, limit, - language, include_all, - 0, false, NULL - ); - } else { - t_symbol subtree_alias_symbol = alias_sequence - ? alias_sequence[structural_child_index] - : 0; - bool subtree_alias_is_named = subtree_alias_symbol - ? ts_language_symbol_metadata(language, subtree_alias_symbol).named - : false; - - const char *child_field_name = is_visible ? NULL : field_name; - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if (!map->inherited && map->child_index == structural_child_index) { - child_field_name = language->field_names[map->field_id]; - break; - } - } - - cursor += ts_subtree__write_to_string( - child, *writer, limit, - language, include_all, - subtree_alias_symbol, subtree_alias_is_named, child_field_name - ); - structural_child_index++; - } - } - } - - if (is_visible) cursor += snprintf(*writer, limit, ")"); - - return cursor - string; -} - -char *ts_subtree_string( - Subtree self, - t_symbol alias_symbol, - bool alias_is_named, - const t_language *language, - bool include_all -) { - char scratch_string[1]; - size_t size = ts_subtree__write_to_string( - self, scratch_string, 1, - language, include_all, - alias_symbol, alias_is_named, ROOT_FIELD - ) + 1; - char *result = ts_malloc(size * sizeof(char)); - ts_subtree__write_to_string( - self, result, size, - language, include_all, - alias_symbol, alias_is_named, ROOT_FIELD - ); - return result; -} - -void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, - const t_language *language, t_symbol alias_symbol, - FILE *f) { - t_symbol subtree_symbol = ts_subtree_symbol(*self); - t_symbol symbol = alias_symbol ? alias_symbol : subtree_symbol; - uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); - fprintf(f, "tree_%p [label=\"", (void *)self); - ts_language_write_symbol_as_dot_string(language, f, symbol); - fprintf(f, "\""); - - if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext"); - if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray"); - - fprintf(f, ", tooltip=\"" - "range: %u - %u\n" - "state: %d\n" - "error-cost: %u\n" - "has-changes: %u\n" - "depends-on-column: %u\n" - "descendant-count: %u\n" - "repeat-depth: %u\n" - "lookahead-bytes: %u", - start_offset, end_offset, - ts_subtree_parse_state(*self), - ts_subtree_error_cost(*self), - ts_subtree_has_changes(*self), - ts_subtree_depends_on_column(*self), - ts_subtree_visible_descendant_count(*self), - ts_subtree_repeat_depth(*self), - ts_subtree_lookahead_bytes(*self) - ); - - if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) { - fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); - } - - fprintf(f, "\"]\n"); - - uint32_t child_start_offset = start_offset; - uint32_t child_info_offset = - language->max_alias_sequence_length * - ts_subtree_production_id(*self); - for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { - const Subtree *child = &ts_subtree_children(*self)[i]; - t_symbol subtree_alias_symbol = 0; - if (!ts_subtree_extra(*child) && child_info_offset) { - subtree_alias_symbol = language->alias_sequences[child_info_offset]; - child_info_offset++; - } - ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f); - fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); - child_start_offset += ts_subtree_total_bytes(*child); - } -} - -void ts_subtree_print_dot_graph(Subtree self, const t_language *language, FILE *f) { - fprintf(f, "digraph tree {\n"); - fprintf(f, "edge [arrowhead=none]\n"); - ts_subtree__print_dot_graph(&self, 0, language, 0, f); - fprintf(f, "}\n"); -} - -const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { - static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; - if ( - self.ptr && - !self.data.is_inline && - self.ptr->has_external_tokens && - self.ptr->child_count == 0 - ) { - return &self.ptr->external_scanner_state; - } else { - return &empty_state; - } -} - -bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) { - const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self); - const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other); - return ts_external_scanner_state_eq( - state_self, - ts_external_scanner_state_data(state_other), - state_other->length - ); -} - - -#include "src/api.h" -#include "src/array.h" -#include "src/get_changed_ranges.h" -#include "src/length.h" -#include "src/subtree.h" -#include "src/tree_cursor.h" -#include "src/tree.h" - -t_tree *ts_tree_new( - Subtree root, const t_language *language, - const t_range *included_ranges, unsigned included_range_count -) { - t_tree *result = ts_malloc(sizeof(t_tree)); - result->root = root; - result->language = ts_language_copy(language); - result->included_ranges = ts_calloc(included_range_count, sizeof(t_range)); - memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(t_range)); - result->included_range_count = included_range_count; - return result; -} - -t_tree *ts_tree_copy(const t_tree *self) { - ts_subtree_retain(self->root); - return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); -} - -void ts_tree_delete(t_tree *self) { - if (!self) return; - - SubtreePool pool = ts_subtree_pool_new(0); - ts_subtree_release(&pool, self->root); - ts_subtree_pool_delete(&pool); - ts_language_delete(self->language); - ts_free(self->included_ranges); - ts_free(self); -} - -t_parse_node ts_tree_root_node(const t_tree *self) { - return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); -} - -t_parse_node ts_tree_root_node_with_offset( - const t_tree *self, - uint32_t offset_bytes, - t_point offset_extent -) { - Length offset = {offset_bytes, offset_extent}; - return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); -} - -const t_language *ts_tree_language(const t_tree *self) { - return self->language; -} - -void ts_tree_edit(t_tree *self, const t_input_edit *edit) { - for (unsigned i = 0; i < self->included_range_count; i++) { - t_range *range = &self->included_ranges[i]; - if (range->end_byte >= edit->old_end_byte) { - if (range->end_byte != UINT32_MAX) { - range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); - range->end_point = point_add( - edit->new_end_point, - point_sub(range->end_point, edit->old_end_point) - ); - if (range->end_byte < edit->new_end_byte) { - range->end_byte = UINT32_MAX; - range->end_point = POINT_MAX; - } - } - } else if (range->end_byte > edit->start_byte) { - range->end_byte = edit->start_byte; - range->end_point = edit->start_point; - } - if (range->start_byte >= edit->old_end_byte) { - range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); - range->start_point = point_add( - edit->new_end_point, - point_sub(range->start_point, edit->old_end_point) - ); - if (range->start_byte < edit->new_end_byte) { - range->start_byte = UINT32_MAX; - range->start_point = POINT_MAX; - } - } else if (range->start_byte > edit->start_byte) { - range->start_byte = edit->start_byte; - range->start_point = edit->start_point; - } - } - - SubtreePool pool = ts_subtree_pool_new(0); - self->root = ts_subtree_edit(self->root, edit, &pool); - ts_subtree_pool_delete(&pool); -} - -t_range *ts_tree_included_ranges(const t_tree *self, uint32_t *length) { - *length = self->included_range_count; - t_range *ranges = ts_calloc(self->included_range_count, sizeof(t_range)); - memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(t_range)); - return ranges; -} - -t_range *ts_tree_get_changed_ranges(const t_tree *old_tree, const t_tree *new_tree, uint32_t *length) { - TreeCursor cursor1 = {NULL, array_new(), 0}; - TreeCursor cursor2 = {NULL, array_new(), 0}; - ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); - ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); - - TSRangeArray included_range_differences = array_new(); - ts_range_array_get_changed_ranges( - old_tree->included_ranges, old_tree->included_range_count, - new_tree->included_ranges, new_tree->included_range_count, - &included_range_differences - ); - - t_range *result; - *length = ts_subtree_get_changed_ranges( - &old_tree->root, &new_tree->root, &cursor1, &cursor2, - old_tree->language, &included_range_differences, &result - ); - - array_delete(&included_range_differences); - array_delete(&cursor1.stack); - array_delete(&cursor2.stack); - return result; -} - -#ifdef _WIN32 - -#include -#include - -int _ts_dup(HANDLE handle) { - HANDLE dup_handle; - if (!DuplicateHandle( - GetCurrentProcess(), handle, - GetCurrentProcess(), &dup_handle, - 0, FALSE, DUPLICATE_SAME_ACCESS - )) return -1; - - return _open_osfhandle((intptr_t)dup_handle, 0); -} - -void ts_tree_print_dot_graph(const TSTree *self, int fd) { - FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); - ts_subtree_print_dot_graph(self->root, self->language, file); - fclose(file); -} - -#else - -#include - -int _ts_dup(int file_descriptor) { - return dup(file_descriptor); -} - -void ts_tree_print_dot_graph(const t_tree *self, int file_descriptor) { - FILE *file = fdopen(_ts_dup(file_descriptor), "a"); - ts_subtree_print_dot_graph(self->root, self->language, file); - fclose(file); -} - -#endif -#include "src/api.h" -#include "src/alloc.h" -#include "src/tree_cursor.h" -#include "src/language.h" -#include "src/tree.h" - -typedef struct { - Subtree parent; - const t_tree *tree; - Length position; - uint32_t child_index; - uint32_t structural_child_index; - uint32_t descendant_index; - const t_symbol *alias_sequence; -} CursorChildIterator; - -// CursorChildIterator - -static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) { - TreeCursorEntry *entry = &self->stack.contents[index]; - if (index == 0 || ts_subtree_visible(*entry->subtree)) { - return true; - } else if (!ts_subtree_extra(*entry->subtree)) { - TreeCursorEntry *parent_entry = &self->stack.contents[index - 1]; - return ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - ); - } else { - return false; - } -} - -static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { - TreeCursorEntry *last_entry = array_back(&self->stack); - if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; - } - const t_symbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - last_entry->subtree->ptr->production_id - ); - - uint32_t descendant_index = last_entry->descendant_index; - if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { - descendant_index += 1; - } - - return (CursorChildIterator) { - .tree = self->tree, - .parent = *last_entry->subtree, - .position = last_entry->position, - .child_index = 0, - .structural_child_index = 0, - .descendant_index = descendant_index, - .alias_sequence = alias_sequence, - }; -} - -static inline bool ts_tree_cursor_child_iterator_next( - CursorChildIterator *self, - TreeCursorEntry *result, - bool *visible -) { - if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; - const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - *result = (TreeCursorEntry) { - .subtree = child, - .position = self->position, - .child_index = self->child_index, - .structural_child_index = self->structural_child_index, - .descendant_index = self->descendant_index, - }; - *visible = ts_subtree_visible(*child); - bool extra = ts_subtree_extra(*child); - if (!extra) { - if (self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; - } - self->structural_child_index++; - } - - self->descendant_index += ts_subtree_visible_descendant_count(*child); - if (*visible) { - self->descendant_index += 1; - } - - self->position = length_add(self->position, ts_subtree_size(*child)); - self->child_index++; - - if (self->child_index < self->parent.ptr->child_count) { - Subtree next_child = ts_subtree_children(self->parent)[self->child_index]; - self->position = length_add(self->position, ts_subtree_padding(next_child)); - } - - return true; -} - -// Return a position that, when `b` is added to it, yields `a`. This -// can only be computed if `b` has zero rows. Otherwise, this function -// returns `LENGTH_UNDEFINED`, and the caller needs to recompute -// the position some other way. -static inline Length length_backtrack(Length a, Length b) { - if (length_is_undefined(a) || b.extent.row != 0) { - return LENGTH_UNDEFINED; - } - - Length result; - result.bytes = a.bytes - b.bytes; - result.extent.row = a.extent.row; - result.extent.column = a.extent.column - b.extent.column; - return result; -} - -static inline bool ts_tree_cursor_child_iterator_previous( - CursorChildIterator *self, - TreeCursorEntry *result, - bool *visible -) { - // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into - // account unsigned underflow - if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; - const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - *result = (TreeCursorEntry) { - .subtree = child, - .position = self->position, - .child_index = self->child_index, - .structural_child_index = self->structural_child_index, - }; - *visible = ts_subtree_visible(*child); - bool extra = ts_subtree_extra(*child); - if (!extra && self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; - self->structural_child_index--; - } - - self->position = length_backtrack(self->position, ts_subtree_padding(*child)); - self->child_index--; - - // unsigned can underflow so compare it to child_count - if (self->child_index < self->parent.ptr->child_count) { - Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; - Length size = ts_subtree_size(previous_child); - self->position = length_backtrack(self->position, size); - } - - return true; -} - -// TSTreeCursor - lifecycle - -t_tree_cursor ts_tree_cursor_new(t_parse_node node) { - t_tree_cursor self = {NULL, NULL, {0, 0, 0}}; - ts_tree_cursor_init((TreeCursor *)&self, node); - return self; -} - -void ts_tree_cursor_reset(t_tree_cursor *_self, t_parse_node node) { - ts_tree_cursor_init((TreeCursor *)_self, node); -} - -void ts_tree_cursor_init(TreeCursor *self, t_parse_node node) { - self->tree = node.tree; - self->root_alias_symbol = node.context[3]; - array_clear(&self->stack); - array_push(&self->stack, ((TreeCursorEntry) { - .subtree = (const Subtree *)node.id, - .position = { - ts_node_start_byte(node), - ts_node_start_point(node) - }, - .child_index = 0, - .structural_child_index = 0, - .descendant_index = 0, - })); -} - -void ts_tree_cursor_delete(t_tree_cursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - array_delete(&self->stack); -} - -// TSTreeCursor - walking the tree - -TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_tree_cursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (visible) { - array_push(&self->stack, entry); - return TreeCursorStepVisible; - } - if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - array_push(&self->stack, entry); - return TreeCursorStepHidden; - } - } - return TreeCursorStepNone; -} - -bool ts_tree_cursor_goto_first_child(t_tree_cursor *self) { - for (;;) { - switch (ts_tree_cursor_goto_first_child_internal(self)) { - case TreeCursorStepHidden: - continue; - case TreeCursorStepVisible: - return true; - default: - return false; - } - } - return false; -} - -TreeCursorStep ts_tree_cursor_goto_last_child_internal(t_tree_cursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone; - - TreeCursorEntry last_entry = {0}; - TreeCursorStep last_step = TreeCursorStepNone; - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (visible) { - last_entry = entry; - last_step = TreeCursorStepVisible; - } - else if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - last_entry = entry; - last_step = TreeCursorStepHidden; - } - } - if (last_entry.subtree) { - array_push(&self->stack, last_entry); - return last_step; - } - - return TreeCursorStepNone; -} - -bool ts_tree_cursor_goto_last_child(t_tree_cursor *self) { - for (;;) { - switch (ts_tree_cursor_goto_last_child_internal(self)) { - case TreeCursorStepHidden: - continue; - case TreeCursorStepVisible: - return true; - default: - return false; - } - } - return false; -} - -static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( - t_tree_cursor *_self, - uint32_t goal_byte, - t_point goal_point -) { - TreeCursor *self = (TreeCursor *)_self; - uint32_t initial_size = self->stack.size; - uint32_t visible_child_index = 0; - - bool did_descend; - do { - did_descend = false; - - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); - bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); - uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); - if (at_goal) { - if (visible) { - array_push(&self->stack, entry); - return visible_child_index; - } - if (visible_child_count > 0) { - array_push(&self->stack, entry); - did_descend = true; - break; - } - } else if (visible) { - visible_child_index++; - } else { - visible_child_index += visible_child_count; - } - } - } while (did_descend); - - self->stack.size = initial_size; - return -1; -} - -int64_t ts_tree_cursor_goto_first_child_for_byte(t_tree_cursor *self, uint32_t goal_byte) { - return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); -} - -int64_t ts_tree_cursor_goto_first_child_for_point(t_tree_cursor *self, t_point goal_point) { - return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); -} - -TreeCursorStep ts_tree_cursor_goto_sibling_internal( - t_tree_cursor *_self, - bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { - TreeCursor *self = (TreeCursor *)_self; - uint32_t initial_size = self->stack.size; - - while (self->stack.size > 1) { - TreeCursorEntry entry = array_pop(&self->stack); - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - iterator.child_index = entry.child_index; - iterator.structural_child_index = entry.structural_child_index; - iterator.position = entry.position; - iterator.descendant_index = entry.descendant_index; - - bool visible = false; - advance(&iterator, &entry, &visible); - if (visible && self->stack.size + 1 < initial_size) break; - - while (advance(&iterator, &entry, &visible)) { - if (visible) { - array_push(&self->stack, entry); - return TreeCursorStepVisible; - } - - if (ts_subtree_visible_child_count(*entry.subtree)) { - array_push(&self->stack, entry); - return TreeCursorStepHidden; - } - } - } - - self->stack.size = initial_size; - return TreeCursorStepNone; -} - -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_tree_cursor *_self) { - return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); -} - -bool ts_tree_cursor_goto_next_sibling(t_tree_cursor *self) { - switch (ts_tree_cursor_goto_next_sibling_internal(self)) { - case TreeCursorStepHidden: - ts_tree_cursor_goto_first_child(self); - return true; - case TreeCursorStepVisible: - return true; - default: - return false; - } -} - -TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_tree_cursor *_self) { - // since subtracting across row loses column information, we may have to - // restore it - TreeCursor *self = (TreeCursor *)_self; - - // for that, save current position before traversing - TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( - _self, ts_tree_cursor_child_iterator_previous); - if (step == TreeCursorStepNone) - return step; - - // if length is already valid, there's no need to recompute it - if (!length_is_undefined(array_back(&self->stack)->position)) - return step; - - // restore position from the parent node - const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; - Length position = parent->position; - uint32_t child_index = array_back(&self->stack)->child_index; - const Subtree *children = ts_subtree_children((*(parent->subtree))); - - if (child_index > 0) { - // skip first child padding since its position should match the position of the parent - position = length_add(position, ts_subtree_size(children[0])); - for (uint32_t i = 1; i < child_index; ++i) { - position = length_add(position, ts_subtree_total_size(children[i])); - } - position = length_add(position, ts_subtree_padding(children[child_index])); - } - - array_back(&self->stack)->position = position; - - return step; -} - -bool ts_tree_cursor_goto_previous_sibling(t_tree_cursor *self) { - switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { - case TreeCursorStepHidden: - ts_tree_cursor_goto_last_child(self); - return true; - case TreeCursorStepVisible: - return true; - default: - return false; - } -} - -bool ts_tree_cursor_goto_parent(t_tree_cursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { - if (ts_tree_cursor_is_entry_visible(self, i)) { - self->stack.size = i + 1; - return true; - } - } - return false; -} - -void ts_tree_cursor_goto_descendant( - t_tree_cursor *_self, - uint32_t goal_descendant_index -) { - TreeCursor *self = (TreeCursor *)_self; - - // Ascend to the lowest ancestor that contains the goal node. - for (;;) { - uint32_t i = self->stack.size - 1; - TreeCursorEntry *entry = &self->stack.contents[i]; - uint32_t next_descendant_index = - entry->descendant_index + - (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + - ts_subtree_visible_descendant_count(*entry->subtree); - if ( - (entry->descendant_index <= goal_descendant_index) && - (next_descendant_index > goal_descendant_index) - ) { - break; - } else if (self->stack.size <= 1) { - return; - } else { - self->stack.size--; - } - } - - // Descend to the goal node. - bool did_descend = true; - do { - did_descend = false; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - if (iterator.descendant_index > goal_descendant_index) { - return; - } - - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (iterator.descendant_index > goal_descendant_index) { - array_push(&self->stack, entry); - if (visible && entry.descendant_index == goal_descendant_index) { - return; - } else { - did_descend = true; - break; - } - } - } - } while (did_descend); -} - -uint32_t ts_tree_cursor_current_descendant_index(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *last_entry = array_back(&self->stack); - return last_entry->descendant_index; -} - -t_parse_node ts_tree_cursor_current_node(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *last_entry = array_back(&self->stack); - t_symbol alias_symbol = self->root_alias_symbol; - if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { - TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; - alias_symbol = ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - last_entry->structural_child_index - ); - } - return ts_node_new( - self->tree, - last_entry->subtree, - last_entry->position, - alias_symbol - ); -} - -// Private - Get various facts about the current node that are needed -// when executing tree queries. -void ts_tree_cursor_current_status( - const t_tree_cursor *_self, - t_field_id *field_id, - bool *has_later_siblings, - bool *has_later_named_siblings, - bool *can_have_later_siblings_with_this_field, - t_symbol *supertypes, - unsigned *supertype_count -) { - const TreeCursor *self = (const TreeCursor *)_self; - unsigned max_supertypes = *supertype_count; - *field_id = 0; - *supertype_count = 0; - *has_later_siblings = false; - *has_later_named_siblings = false; - *can_have_later_siblings_with_this_field = false; - - // Walk up the tree, visiting the current node and its invisible ancestors, - // because fields can refer to nodes through invisible *wrapper* nodes, - for (unsigned i = self->stack.size - 1; i > 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - - const t_symbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->production_id - ); - - #define subtree_symbol(subtree, structural_child_index) \ - (( \ - !ts_subtree_extra(subtree) && \ - alias_sequence && \ - alias_sequence[structural_child_index] \ - ) ? \ - alias_sequence[structural_child_index] : \ - ts_subtree_symbol(subtree)) - - // Stop walking up when a visible ancestor is found. - t_symbol entry_symbol = subtree_symbol( - *entry->subtree, - entry->structural_child_index - ); - TSSymbolMetadata entry_metadata = ts_language_symbol_metadata( - self->tree->language, - entry_symbol - ); - if (i != self->stack.size - 1 && entry_metadata.visible) break; - - // Record any supertypes - if (entry_metadata.supertype && *supertype_count < max_supertypes) { - supertypes[*supertype_count] = entry_symbol; - (*supertype_count)++; - } - - // Determine if the current node has later siblings. - if (!*has_later_siblings) { - unsigned sibling_count = parent_entry->subtree->ptr->child_count; - unsigned structural_child_index = entry->structural_child_index; - if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; - for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { - Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; - TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( - self->tree->language, - subtree_symbol(sibling, structural_child_index) - ); - if (sibling_metadata.visible) { - *has_later_siblings = true; - if (*has_later_named_siblings) break; - if (sibling_metadata.named) { - *has_later_named_siblings = true; - break; - } - } else if (ts_subtree_visible_child_count(sibling) > 0) { - *has_later_siblings = true; - if (*has_later_named_siblings) break; - if (sibling.ptr->named_child_count > 0) { - *has_later_named_siblings = true; - break; - } - } - if (!ts_subtree_extra(sibling)) structural_child_index++; - } - } - - #undef subtree_symbol - - if (!ts_subtree_extra(*entry->subtree)) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self->tree->language, - parent_entry->subtree->ptr->production_id, - &field_map, &field_map_end - ); - - // Look for a field name associated with the current node. - if (!*field_id) { - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if (!map->inherited && map->child_index == entry->structural_child_index) { - *field_id = map->field_id; - break; - } - } - } - - // Determine if the current node can have later siblings with the same field name. - if (*field_id) { - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if ( - map->field_id == *field_id && - map->child_index > entry->structural_child_index - ) { - *can_have_later_siblings_with_this_field = true; - break; - } - } - } - } - } -} - -uint32_t ts_tree_cursor_current_depth(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - uint32_t depth = 0; - for (unsigned i = 1; i < self->stack.size; i++) { - if (ts_tree_cursor_is_entry_visible(self, i)) { - depth++; - } - } - return depth; -} - -t_parse_node ts_tree_cursor_parent_node(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - for (int i = (int)self->stack.size - 2; i >= 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - bool is_visible = true; - t_symbol alias_symbol = 0; - if (i > 0) { - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - alias_symbol = ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - ); - is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree); - } - if (is_visible) { - return ts_node_new( - self->tree, - entry->subtree, - entry->position, - alias_symbol - ); - } - } - return ts_node_new(NULL, NULL, length_zero(), 0); -} - -t_field_id ts_tree_cursor_current_field_id(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - - // Walk up the tree, visiting the current node and its invisible ancestors. - for (unsigned i = self->stack.size - 1; i > 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - - // Stop walking up when another visible node is found. - if ( - i != self->stack.size - 1 && - ts_tree_cursor_is_entry_visible(self, i) - ) break; - - if (ts_subtree_extra(*entry->subtree)) break; - - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self->tree->language, - parent_entry->subtree->ptr->production_id, - &field_map, &field_map_end - ); - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if (!map->inherited && map->child_index == entry->structural_child_index) { - return map->field_id; - } - } - } - return 0; -} - -const char *ts_tree_cursor_current_field_name(const t_tree_cursor *_self) { - t_field_id id = ts_tree_cursor_current_field_id(_self); - if (id) { - const TreeCursor *self = (const TreeCursor *)_self; - return self->tree->language->field_names[id]; - } else { - return NULL; - } -} - -t_tree_cursor ts_tree_cursor_copy(const t_tree_cursor *_cursor) { - const TreeCursor *cursor = (const TreeCursor *)_cursor; - t_tree_cursor res = {NULL, NULL, {0, 0}}; - TreeCursor *copy = (TreeCursor *)&res; - copy->tree = cursor->tree; - copy->root_alias_symbol = cursor->root_alias_symbol; - array_init(©->stack); - array_push_all(©->stack, &cursor->stack); - return res; -} - -void ts_tree_cursor_reset_to(t_tree_cursor *_dst, const t_tree_cursor *_src) { - const TreeCursor *cursor = (const TreeCursor *)_src; - TreeCursor *copy = (TreeCursor *)_dst; - copy->tree = cursor->tree; - copy->root_alias_symbol = cursor->root_alias_symbol; - array_clear(©->stack); - array_push_all(©->stack, &cursor->stack); -} diff --git a/parser/src/alloc.c b/parser/src/alloc.c deleted file mode 100644 index 79844287..00000000 --- a/parser/src/alloc.c +++ /dev/null @@ -1,48 +0,0 @@ -#include "alloc.h" -#include "./api.h" -#include - -static void *ts_malloc_default(size_t size) { - void *result = malloc(size); - if (size > 0 && !result) { - fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); - abort(); - } - return result; -} - -static void *ts_calloc_default(size_t count, size_t size) { - void *result = calloc(count, size); - if (count > 0 && !result) { - fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); - abort(); - } - return result; -} - -static void *ts_realloc_default(void *buffer, size_t size) { - void *result = realloc(buffer, size); - if (size > 0 && !result) { - fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); - abort(); - } - return result; -} - -// Allow clients to override allocation functions dynamically -TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default; -TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; -TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; -TS_PUBLIC void (*ts_current_free)(void *) = free; - -void ts_set_allocator( - void *(*new_malloc)(size_t size), - void *(*new_calloc)(size_t count, size_t size), - void *(*new_realloc)(void *ptr, size_t size), - void (*new_free)(void *ptr) -) { - ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; - ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; - ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; - ts_current_free = new_free ? new_free : free; -} diff --git a/parser/src/alloc.h b/parser/src/alloc.h deleted file mode 100644 index a0eadb7a..00000000 --- a/parser/src/alloc.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef TREE_SITTER_ALLOC_H_ -#define TREE_SITTER_ALLOC_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32) -#define TS_PUBLIC -#else -#define TS_PUBLIC __attribute__((visibility("default"))) -#endif - -TS_PUBLIC extern void *(*ts_current_malloc)(size_t); -TS_PUBLIC extern void *(*ts_current_calloc)(size_t, size_t); -TS_PUBLIC extern void *(*ts_current_realloc)(void *, size_t); -TS_PUBLIC extern void (*ts_current_free)(void *); - -// Allow clients to override allocation functions -#ifndef ts_malloc -#define ts_malloc ts_current_malloc -#endif -#ifndef ts_calloc -#define ts_calloc ts_current_calloc -#endif -#ifndef ts_realloc -#define ts_realloc ts_current_realloc -#endif -#ifndef ts_free -#define ts_free ts_current_free -#endif - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_ALLOC_H_ diff --git a/parser/src/api.h b/parser/src/api.h index d423b229..2f0ec90f 100644 --- a/parser/src/api.h +++ b/parser/src/api.h @@ -1,1266 +1,1875 @@ -#ifndef TREE_SITTER_API_H_ -#define TREE_SITTER_API_H_ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ -#ifndef TREE_SITTER_HIDE_SYMBOLS -#if defined(__GNUC__) || defined(__clang__) -#pragma GCC visibility push(default) -#endif -#endif - -#include -#include +#include #include - -/****************************/ -/* Section - ABI Versioning */ -/****************************/ - -/** - * The latest ABI version that is supported by the current version of the - * library. When Languages are generated by the Tree-sitter CLI, they are - * assigned an ABI version number that corresponds to the current CLI version. - * The Tree-sitter library is generally backwards-compatible with languages - * generated using older CLI versions, but is not forwards-compatible. - */ -#define TREE_SITTER_LANGUAGE_VERSION 14 - -/** - * The earliest ABI version that is supported by the current version of the - * library. - */ -#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13 - -/*******************/ -/* Section - Types */ -/*******************/ - -typedef uint16_t t_state_id; -typedef uint16_t t_symbol; -typedef uint16_t t_field_id; -typedef struct t_language t_language; -typedef struct t_parser t_parser; -typedef struct t_tree t_tree; -typedef struct t_query t_query; -typedef struct t_query_cursor t_query_cursor; -typedef struct t_lookahead_iterator t_lookahead_iterator; - -typedef enum t_input_encoding { - TSInputEncodingUTF8, - TSInputEncodingUTF16, -} t_input_encoding; - -typedef enum t_symbol_type { - TSSymbolTypeRegular, - TSSymbolTypeAnonymous, - TSSymbolTypeAuxiliary, -} t_symbol_type; - -typedef struct t_point { - uint32_t row; - uint32_t column; -} t_point; - -typedef struct t_range { - t_point start_point; - t_point end_point; - uint32_t start_byte; - uint32_t end_byte; -} t_range; - -typedef struct t_input { - void *payload; - const char *(*read)(void *payload, uint32_t byte_index, t_point position, uint32_t *bytes_read); - t_input_encoding encoding; -} t_input; - -typedef enum t_log_type { - TSLogTypeParse, - TSLogTypeLex, -} t_log_type; - -typedef struct t_logger { - void *payload; - void (*log)(void *payload, t_log_type log_type, const char *buffer); -} t_logger; - -typedef struct t_input_edit { - uint32_t start_byte; - uint32_t old_end_byte; - uint32_t new_end_byte; - t_point start_point; - t_point old_end_point; - t_point new_end_point; -} t_input_edit; - -typedef struct t_parse_node { - uint32_t context[4]; - const void *id; - const t_tree *tree; -} t_parse_node; - -typedef struct t_tree_cursor { - const void *tree; - const void *id; - uint32_t context[3]; -} t_tree_cursor; - -typedef struct t_query_capture { - t_parse_node node; - uint32_t index; -} t_query_capture; - -typedef enum t_quantifier { - TSQuantifierZero = 0, // must match the array initialization value - TSQuantifierZeroOrOne, - TSQuantifierZeroOrMore, - TSQuantifierOne, - TSQuantifierOneOrMore, -} t_quantifier; - -typedef struct t_query_match { - uint32_t id; - uint16_t pattern_index; - uint16_t capture_count; - const t_query_capture *captures; -} t_query_match; - -typedef enum t_query_predicate_step_type { - TSQueryPredicateStepTypeDone, - TSQueryPredicateStepTypeCapture, - TSQueryPredicateStepTypeString, -} t_query_predicate_step_type; - -typedef struct t_query_predicate_step { - t_query_predicate_step_type type; - uint32_t value_id; -} t_query_predicate_step; - -typedef enum t_query_error { - TSQueryErrorNone = 0, - TSQueryErrorSyntax, - TSQueryErrorNodeType, - TSQueryErrorField, - TSQueryErrorCapture, - TSQueryErrorStructure, - TSQueryErrorLanguage, -} t_query_error; - -/********************/ -/* Section - Parser */ -/********************/ - -/** - * Create a new parser. - */ -t_parser *ts_parser_new(void); - -/** - * Delete the parser, freeing all of the memory that it used. - */ -void ts_parser_delete(t_parser *self); - -/** - * Get the parser's current language. - */ -const t_language *ts_parser_language(const t_parser *self); - -/** - * Set the language that the parser should use for parsing. - * - * Returns a boolean indicating whether or not the language was successfully - * assigned. True means assignment succeeded. False means there was a version - * mismatch: the language was generated with an incompatible version of the - * Tree-sitter CLI. Check the language's version using [`ts_language_version`] - * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and - * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. - */ -bool ts_parser_set_language(t_parser *self, const t_language *language); - -/** - * Set the ranges of text that the parser should include when parsing. - * - * By default, the parser will always include entire documents. This function - * allows you to parse only a *portion* of a document but still return a syntax - * tree whose ranges match up with the document as a whole. You can also pass - * multiple disjoint ranges. - * - * The second and third parameters specify the location and length of an array - * of ranges. The parser does *not* take ownership of these ranges; it copies - * the data, so it doesn't matter how these ranges are allocated. - * - * If `count` is zero, then the entire document will be parsed. Otherwise, - * the given ranges must be ordered from earliest to latest in the document, - * and they must not overlap. That is, the following must hold for all: - * - * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte` - * - * If this requirement is not satisfied, the operation will fail, the ranges - * will not be assigned, and this function will return `false`. On success, - * this function returns `true` - */ -bool ts_parser_set_included_ranges( - t_parser *self, - const t_range *ranges, - uint32_t count -); - -/** - * Get the ranges of text that the parser will include when parsing. - * - * The returned pointer is owned by the parser. The caller should not free it - * or write to it. The length of the array will be written to the given - * `count` pointer. - */ -const t_range *ts_parser_included_ranges( - const t_parser *self, - uint32_t *count -); - -/** - * Use the parser to parse some source code and create a syntax tree. - * - * If you are parsing this document for the first time, pass `NULL` for the - * `old_tree` parameter. Otherwise, if you have already parsed an earlier - * version of this document and the document has since been edited, pass the - * previous syntax tree so that the unchanged parts of it can be reused. - * This will save time and memory. For this to work correctly, you must have - * already edited the old syntax tree using the [`ts_tree_edit`] function in a - * way that exactly matches the source code changes. - * - * The [`TSInput`] parameter lets you specify how to read the text. It has the - * following three fields: - * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset - * and (row, column) position. The function should return a pointer to the - * text and write its length to the [`bytes_read`] pointer. The parser does - * not take ownership of this buffer; it just borrows it until it has - * finished reading it. The function should write a zero value to the - * [`bytes_read`] pointer to indicate the end of the document. - * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation - * of the [`read`] function. - * 3. [`encoding`]: An indication of how the text is encoded. Either - * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. - * - * This function returns a syntax tree on success, and `NULL` on failure. There - * are three possible reasons for failure: - * 1. The parser does not have a language assigned. Check for this using the - [`ts_parser_language`] function. - * 2. Parsing was cancelled due to a timeout that was set by an earlier call to - * the [`ts_parser_set_timeout_micros`] function. You can resume parsing from - * where the parser left out by calling [`ts_parser_parse`] again with the - * same arguments. Or you can start parsing from scratch by first calling - * [`ts_parser_reset`]. - * 3. Parsing was cancelled using a cancellation flag that was set by an - * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing - * from where the parser left out by calling [`ts_parser_parse`] again with - * the same arguments. - * - * [`read`]: TSInput::read - * [`payload`]: TSInput::payload - * [`encoding`]: TSInput::encoding - * [`bytes_read`]: TSInput::read - */ -t_tree *ts_parser_parse( - t_parser *self, - const t_tree *old_tree, - t_input input -); - -/** - * Use the parser to parse some source code stored in one contiguous buffer. - * The first two parameters are the same as in the [`ts_parser_parse`] function - * above. The second two parameters indicate the location of the buffer and its - * length in bytes. - */ -t_tree *ts_parser_parse_string( - t_parser *self, - const t_tree *old_tree, - const char *string, - uint32_t length -); - -/** - * Use the parser to parse some source code stored in one contiguous buffer with - * a given encoding. The first four parameters work the same as in the - * [`ts_parser_parse_string`] method above. The final parameter indicates whether - * the text is encoded as UTF8 or UTF16. - */ -t_tree *ts_parser_parse_string_encoding( - t_parser *self, - const t_tree *old_tree, - const char *string, - uint32_t length, - t_input_encoding encoding -); - -/** - * Instruct the parser to start the next parse from the beginning. - * - * If the parser previously failed because of a timeout or a cancellation, then - * by default, it will resume where it left off on the next call to - * [`ts_parser_parse`] or other parsing functions. If you don't want to resume, - * and instead intend to use this parser to parse some other document, you must - * call [`ts_parser_reset`] first. - */ -void ts_parser_reset(t_parser *self); - -/** - * Set the maximum duration in microseconds that parsing should be allowed to - * take before halting. - * - * If parsing takes longer than this, it will halt early, returning NULL. - * See [`ts_parser_parse`] for more information. - */ -void ts_parser_set_timeout_micros(t_parser *self, uint64_t timeout_micros); - -/** - * Get the duration in microseconds that parsing is allowed to take. - */ -uint64_t ts_parser_timeout_micros(const t_parser *self); - -/** - * Set the parser's current cancellation flag pointer. - * - * If a non-null pointer is assigned, then the parser will periodically read - * from this pointer during parsing. If it reads a non-zero value, it will - * halt early, returning NULL. See [`ts_parser_parse`] for more information. - */ -void ts_parser_set_cancellation_flag(t_parser *self, const size_t *flag); - -/** - * Get the parser's current cancellation flag pointer. - */ -const size_t *ts_parser_cancellation_flag(const t_parser *self); - -/** - * Set the logger that a parser should use during parsing. - * - * The parser does not take ownership over the logger payload. If a logger was - * previously assigned, the caller is responsible for releasing any memory - * owned by the previous logger. - */ -void ts_parser_set_logger(t_parser *self, t_logger logger); - -/** - * Get the parser's current logger. - */ -t_logger ts_parser_logger(const t_parser *self); - -/** - * Set the file descriptor to which the parser should write debugging graphs - * during parsing. The graphs are formatted in the DOT language. You may want - * to pipe these graphs directly to a `dot(1)` process in order to generate - * SVG output. You can turn off this logging by passing a negative number. - */ -void ts_parser_print_dot_graphs(t_parser *self, int fd); - -/******************/ -/* Section - Tree */ -/******************/ - -/** - * Create a shallow copy of the syntax tree. This is very fast. - * - * You need to copy a syntax tree in order to use it on more than one thread at - * a time, as syntax trees are not thread safe. - */ -t_tree *ts_tree_copy(const t_tree *self); - -/** - * Delete the syntax tree, freeing all of the memory that it used. - */ -void ts_tree_delete(t_tree *self); - -/** - * Get the root node of the syntax tree. - */ -t_parse_node ts_tree_root_node(const t_tree *self); - -/** - * Get the root node of the syntax tree, but with its position - * shifted forward by the given offset. - */ -t_parse_node ts_tree_root_node_with_offset( - const t_tree *self, - uint32_t offset_bytes, - t_point offset_extent -); - -/** - * Get the language that was used to parse the syntax tree. - */ -const t_language *ts_tree_language(const t_tree *self); - -/** - * Get the array of included ranges that was used to parse the syntax tree. - * - * The returned pointer must be freed by the caller. - */ -t_range *ts_tree_included_ranges(const t_tree *self, uint32_t *length); - -/** - * Edit the syntax tree to keep it in sync with source code that has been - * edited. - * - * You must describe the edit both in terms of byte offsets and in terms of - * (row, column) coordinates. - */ -void ts_tree_edit(t_tree *self, const t_input_edit *edit); - -/** - * Compare an old edited syntax tree to a new syntax tree representing the same - * document, returning an array of ranges whose syntactic structure has changed. - * - * For this to work correctly, the old syntax tree must have been edited such - * that its ranges match up to the new tree. Generally, you'll want to call - * this function right after calling one of the [`ts_parser_parse`] functions. - * You need to pass the old tree that was passed to parse, as well as the new - * tree that was returned from that function. - * - * The returned array is allocated using `malloc` and the caller is responsible - * for freeing it using `free`. The length of the array will be written to the - * given `length` pointer. - */ -t_range *ts_tree_get_changed_ranges( - const t_tree *old_tree, - const t_tree *new_tree, - uint32_t *length -); - -/** - * Write a DOT graph describing the syntax tree to the given file. - */ -void ts_tree_print_dot_graph(const t_tree *self, int file_descriptor); - -/******************/ -/* Section - Node */ -/******************/ - -/** - * Get the node's type as a null-terminated string. - */ -const char *ts_node_type(t_parse_node self); - -/** - * Get the node's type as a numerical id. - */ -t_symbol ts_node_symbol(t_parse_node self); - -/** - * Get the node's language. - */ -const t_language *ts_node_language(t_parse_node self); - -/** - * Get the node's type as it appears in the grammar ignoring aliases as a - * null-terminated string. - */ -const char *ts_node_grammar_type(t_parse_node self); - -/** - * Get the node's type as a numerical id as it appears in the grammar ignoring - * aliases. This should be used in [`ts_language_next_state`] instead of - * [`ts_node_symbol`]. - */ -t_symbol ts_node_grammar_symbol(t_parse_node self); - -/** - * Get the node's start byte. - */ -uint32_t ts_node_start_byte(t_parse_node self); - -/** - * Get the node's start position in terms of rows and columns. - */ -t_point ts_node_start_point(t_parse_node self); - -/** - * Get the node's end byte. - */ -uint32_t ts_node_end_byte(t_parse_node self); - -/** - * Get the node's end position in terms of rows and columns. - */ -t_point ts_node_end_point(t_parse_node self); - -/** - * Get an S-expression representing the node as a string. - * - * This string is allocated with `malloc` and the caller is responsible for - * freeing it using `free`. - */ -char *ts_node_string(t_parse_node self); - -/** - * Check if the node is null. Functions like [`ts_node_child`] and - * [`ts_node_next_sibling`] will return a null node to indicate that no such node - * was found. - */ -bool ts_node_is_null(t_parse_node self); - -/** - * Check if the node is *named*. Named nodes correspond to named rules in the - * grammar, whereas *anonymous* nodes correspond to string literals in the - * grammar. - */ -bool ts_node_is_named(t_parse_node self); - -/** - * Check if the node is *missing*. Missing nodes are inserted by the parser in - * order to recover from certain kinds of syntax errors. - */ -bool ts_node_is_missing(t_parse_node self); - -/** - * Check if the node is *extra*. Extra nodes represent things like comments, - * which are not required the grammar, but can appear anywhere. - */ -bool ts_node_is_extra(t_parse_node self); - -/** - * Check if a syntax node has been edited. - */ -bool ts_node_has_changes(t_parse_node self); - -/** - * Check if the node is a syntax error or contains any syntax errors. - */ -bool ts_node_has_error(t_parse_node self); - -/** - * Check if the node is a syntax error. -*/ -bool ts_node_is_error(t_parse_node self); - -/** - * Get this node's parse state. -*/ -t_state_id ts_node_parse_state(t_parse_node self); - -/** - * Get the parse state after this node. -*/ -t_state_id ts_node_next_parse_state(t_parse_node self); - -/** - * Get the node's immediate parent. - * Prefer [`ts_node_child_containing_descendant`] for - * iterating over the node's ancestors. - */ -t_parse_node ts_node_parent(t_parse_node self); - -/** - * Get the node's child that contains `descendant`. - */ -t_parse_node ts_node_child_containing_descendant(t_parse_node self, t_parse_node descendant); - -/** - * Get the node's child at the given index, where zero represents the first - * child. - */ -t_parse_node ts_node_child(t_parse_node self, uint32_t child_index); - -/** - * Get the field name for node's child at the given index, where zero represents - * the first child. Returns NULL, if no field is found. - */ -const char *ts_node_field_name_for_child(t_parse_node self, uint32_t child_index); - -/** - * Get the node's number of children. - */ -uint32_t ts_node_child_count(t_parse_node self); - -/** - * Get the node's *named* child at the given index. - * - * See also [`ts_node_is_named`]. - */ -t_parse_node ts_node_named_child(t_parse_node self, uint32_t child_index); - -/** - * Get the node's number of *named* children. - * - * See also [`ts_node_is_named`]. - */ -uint32_t ts_node_named_child_count(t_parse_node self); - -/** - * Get the node's child with the given field name. - */ -t_parse_node ts_node_child_by_field_name( - t_parse_node self, - const char *name, - uint32_t name_length -); - -/** - * Get the node's child with the given numerical field id. - * - * You can convert a field name to an id using the - * [`ts_language_field_id_for_name`] function. - */ -t_parse_node ts_node_child_by_field_id(t_parse_node self, t_field_id field_id); - -/** - * Get the node's next / previous sibling. - */ -t_parse_node ts_node_next_sibling(t_parse_node self); -t_parse_node ts_node_prev_sibling(t_parse_node self); - -/** - * Get the node's next / previous *named* sibling. - */ -t_parse_node ts_node_next_named_sibling(t_parse_node self); -t_parse_node ts_node_prev_named_sibling(t_parse_node self); - -/** - * Get the node's first child that extends beyond the given byte offset. - */ -t_parse_node ts_node_first_child_for_byte(t_parse_node self, uint32_t byte); - -/** - * Get the node's first named child that extends beyond the given byte offset. - */ -t_parse_node ts_node_first_named_child_for_byte(t_parse_node self, uint32_t byte); - -/** - * Get the node's number of descendants, including one for the node itself. - */ -uint32_t ts_node_descendant_count(t_parse_node self); - -/** - * Get the smallest node within this node that spans the given range of bytes - * or (row, column) positions. - */ -t_parse_node ts_node_descendant_for_byte_range(t_parse_node self, uint32_t start, uint32_t end); -t_parse_node ts_node_descendant_for_point_range(t_parse_node self, t_point start, t_point end); - -/** - * Get the smallest named node within this node that spans the given range of - * bytes or (row, column) positions. - */ -t_parse_node ts_node_named_descendant_for_byte_range(t_parse_node self, uint32_t start, uint32_t end); -t_parse_node ts_node_named_descendant_for_point_range(t_parse_node self, t_point start, t_point end); - -/** - * Edit the node to keep it in-sync with source code that has been edited. - * - * This function is only rarely needed. When you edit a syntax tree with the - * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree - * afterward will already reflect the edit. You only need to use [`ts_node_edit`] - * when you have a [`TSNode`] instance that you want to keep and continue to use - * after an edit. - */ -void ts_node_edit(t_parse_node *self, const t_input_edit *edit); - -/** - * Check if two nodes are identical. - */ -bool ts_node_eq(t_parse_node self, t_parse_node other); - -/************************/ -/* Section - TreeCursor */ -/************************/ - -/** - * Create a new tree cursor starting from the given node. - * - * A tree cursor allows you to walk a syntax tree more efficiently than is - * possible using the [`TSNode`] functions. It is a mutable object that is always - * on a certain syntax node, and can be moved imperatively to different nodes. - */ -t_tree_cursor ts_tree_cursor_new(t_parse_node node); - -/** - * Delete a tree cursor, freeing all of the memory that it used. - */ -void ts_tree_cursor_delete(t_tree_cursor *self); - -/** - * Re-initialize a tree cursor to start at a different node. - */ -void ts_tree_cursor_reset(t_tree_cursor *self, t_parse_node node); - -/** - * Re-initialize a tree cursor to the same position as another cursor. - * - * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and - * allows reusing already created cursors. -*/ -void ts_tree_cursor_reset_to(t_tree_cursor *dst, const t_tree_cursor *src); - -/** - * Get the tree cursor's current node. - */ -t_parse_node ts_tree_cursor_current_node(const t_tree_cursor *self); - -/** - * Get the field name of the tree cursor's current node. - * - * This returns `NULL` if the current node doesn't have a field. - * See also [`ts_node_child_by_field_name`]. - */ -const char *ts_tree_cursor_current_field_name(const t_tree_cursor *self); - -/** - * Get the field id of the tree cursor's current node. - * - * This returns zero if the current node doesn't have a field. - * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]. - */ -t_field_id ts_tree_cursor_current_field_id(const t_tree_cursor *self); - -/** - * Move the cursor to the parent of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` - * if there was no parent node (the cursor was already on the root node). - */ -bool ts_tree_cursor_goto_parent(t_tree_cursor *self); - -/** - * Move the cursor to the next sibling of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` - * if there was no next sibling node. - */ -bool ts_tree_cursor_goto_next_sibling(t_tree_cursor *self); - -/** - * Move the cursor to the previous sibling of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` if - * there was no previous sibling node. - * - * Note, that this function may be slower than - * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In - * the worst case, this will need to iterate through all the children upto the - * previous sibling node to recalculate its position. - */ -bool ts_tree_cursor_goto_previous_sibling(t_tree_cursor *self); - -/** - * Move the cursor to the first child of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` - * if there were no children. - */ -bool ts_tree_cursor_goto_first_child(t_tree_cursor *self); - -/** - * Move the cursor to the last child of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` if - * there were no children. - * - * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`] - * because it needs to iterate through all the children to compute the child's - * position. - */ -bool ts_tree_cursor_goto_last_child(t_tree_cursor *self); - -/** - * Move the cursor to the node that is the nth descendant of - * the original node that the cursor was constructed with, where - * zero represents the original node itself. - */ -void ts_tree_cursor_goto_descendant(t_tree_cursor *self, uint32_t goal_descendant_index); - -/** - * Get the index of the cursor's current node out of all of the - * descendants of the original node that the cursor was constructed with. - */ -uint32_t ts_tree_cursor_current_descendant_index(const t_tree_cursor *self); - -/** - * Get the depth of the cursor's current node relative to the original - * node that the cursor was constructed with. - */ -uint32_t ts_tree_cursor_current_depth(const t_tree_cursor *self); - -/** - * Move the cursor to the first child of its current node that extends beyond - * the given byte offset or point. - * - * This returns the index of the child node if one was found, and returns -1 - * if no such child was found. - */ -int64_t ts_tree_cursor_goto_first_child_for_byte(t_tree_cursor *self, uint32_t goal_byte); -int64_t ts_tree_cursor_goto_first_child_for_point(t_tree_cursor *self, t_point goal_point); - -t_tree_cursor ts_tree_cursor_copy(const t_tree_cursor *cursor); - -/*******************/ -/* Section - Query */ -/*******************/ - -/** - * Create a new query from a string containing one or more S-expression - * patterns. The query is associated with a particular language, and can - * only be run on syntax nodes parsed with that language. - * - * If all of the given patterns are valid, this returns a [`TSQuery`]. - * If a pattern is invalid, this returns `NULL`, and provides two pieces - * of information about the problem: - * 1. The byte offset of the error is written to the `error_offset` parameter. - * 2. The type of error is written to the `error_type` parameter. - */ -t_query *ts_query_new( - const t_language *language, - const char *source, - uint32_t source_len, - uint32_t *error_offset, - t_query_error *error_type -); - -/** - * Delete a query, freeing all of the memory that it used. - */ -void ts_query_delete(t_query *self); - -/** - * Get the number of patterns, captures, or string literals in the query. - */ -uint32_t ts_query_pattern_count(const t_query *self); -uint32_t ts_query_capture_count(const t_query *self); -uint32_t ts_query_string_count(const t_query *self); - -/** - * Get the byte offset where the given pattern starts in the query's source. - * - * This can be useful when combining queries by concatenating their source - * code strings. - */ -uint32_t ts_query_start_byte_for_pattern(const t_query *self, uint32_t pattern_index); - -/** - * Get all of the predicates for the given pattern in the query. - * - * The predicates are represented as a single array of steps. There are three - * types of steps in this array, which correspond to the three legal values for - * the `type` field: - * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names - * of captures. Their `value_id` can be used with the - * [`ts_query_capture_name_for_id`] function to obtain the name of the capture. - * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal - * strings. Their `value_id` can be used with the - * [`ts_query_string_value_for_id`] function to obtain their string value. - * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels* - * that represent the end of an individual predicate. If a pattern has two - * predicates, then there will be two steps with this `type` in the array. - */ -const t_query_predicate_step *ts_query_predicates_for_pattern( - const t_query *self, - uint32_t pattern_index, - uint32_t *step_count -); - -/* - * Check if the given pattern in the query has a single root node. - */ -bool ts_query_is_pattern_rooted(const t_query *self, uint32_t pattern_index); - -/* - * Check if the given pattern in the query is 'non local'. - * - * A non-local pattern has multiple root nodes and can match within a - * repeating sequence of nodes, as specified by the grammar. Non-local - * patterns disable certain optimizations that would otherwise be possible - * when executing a query on a specific range of a syntax tree. - */ -bool ts_query_is_pattern_non_local(const t_query *self, uint32_t pattern_index); - -/* - * Check if a given pattern is guaranteed to match once a given step is reached. - * The step is specified by its byte offset in the query's source code. - */ -bool ts_query_is_pattern_guaranteed_at_step(const t_query *self, uint32_t byte_offset); - -/** - * Get the name and length of one of the query's captures, or one of the - * query's string literals. Each capture and string is associated with a - * numeric id based on the order that it appeared in the query's source. - */ -const char *ts_query_capture_name_for_id( - const t_query *self, - uint32_t index, - uint32_t *length -); - -/** - * Get the quantifier of the query's captures. Each capture is * associated - * with a numeric id based on the order that it appeared in the query's source. - */ -t_quantifier ts_query_capture_quantifier_for_id( - const t_query *self, - uint32_t pattern_index, - uint32_t capture_index -); - -const char *ts_query_string_value_for_id( - const t_query *self, - uint32_t index, - uint32_t *length -); - -/** - * Disable a certain capture within a query. - * - * This prevents the capture from being returned in matches, and also avoids - * any resource usage associated with recording the capture. Currently, there - * is no way to undo this. - */ -void ts_query_disable_capture(t_query *self, const char *name, uint32_t length); - -/** - * Disable a certain pattern within a query. - * - * This prevents the pattern from matching and removes most of the overhead - * associated with the pattern. Currently, there is no way to undo this. - */ -void ts_query_disable_pattern(t_query *self, uint32_t pattern_index); - -/** - * Create a new cursor for executing a given query. - * - * The cursor stores the state that is needed to iteratively search - * for matches. To use the query cursor, first call [`ts_query_cursor_exec`] - * to start running a given query on a given syntax node. Then, there are - * two options for consuming the results of the query: - * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the - * *matches* in the order that they were found. Each match contains the - * index of the pattern that matched, and an array of captures. Because - * multiple patterns can match the same set of nodes, one match may contain - * captures that appear *before* some of the captures from a previous match. - * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the - * individual *captures* in the order that they appear. This is useful if - * don't care about which pattern matched, and just want a single ordered - * sequence of captures. - * - * If you don't care about consuming all of the results, you can stop calling - * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point. - * You can then start executing another query on another node by calling - * [`ts_query_cursor_exec`] again. - */ -t_query_cursor *ts_query_cursor_new(void); - -/** - * Delete a query cursor, freeing all of the memory that it used. - */ -void ts_query_cursor_delete(t_query_cursor *self); - -/** - * Start running a given query on a given node. - */ -void ts_query_cursor_exec(t_query_cursor *self, const t_query *query, t_parse_node node); - -/** - * Manage the maximum number of in-progress matches allowed by this query - * cursor. - * - * Query cursors have an optional maximum capacity for storing lists of - * in-progress captures. If this capacity is exceeded, then the - * earliest-starting match will silently be dropped to make room for further - * matches. This maximum capacity is optional — by default, query cursors allow - * any number of pending matches, dynamically allocating new space for them as - * needed as the query is executed. - */ -bool ts_query_cursor_did_exceed_match_limit(const t_query_cursor *self); -uint32_t ts_query_cursor_match_limit(const t_query_cursor *self); -void ts_query_cursor_set_match_limit(t_query_cursor *self, uint32_t limit); - -/** - * Set the range of bytes or (row, column) positions in which the query - * will be executed. - */ -void ts_query_cursor_set_byte_range(t_query_cursor *self, uint32_t start_byte, uint32_t end_byte); -void ts_query_cursor_set_point_range(t_query_cursor *self, t_point start_point, t_point end_point); - -/** - * Advance to the next match of the currently running query. - * - * If there is a match, write it to `*match` and return `true`. - * Otherwise, return `false`. - */ -bool ts_query_cursor_next_match(t_query_cursor *self, t_query_match *match); -void ts_query_cursor_remove_match(t_query_cursor *self, uint32_t match_id); - -/** - * Advance to the next capture of the currently running query. - * - * If there is a capture, write its match to `*match` and its index within - * the matche's capture list to `*capture_index`. Otherwise, return `false`. - */ -bool ts_query_cursor_next_capture( - t_query_cursor *self, - t_query_match *match, - uint32_t *capture_index -); - -/** - * Set the maximum start depth for a query cursor. - * - * This prevents cursors from exploring children nodes at a certain depth. - * Note if a pattern includes many children, then they will still be checked. - * - * The zero max start depth value can be used as a special behavior and - * it helps to destructure a subtree by staying on a node and using captures - * for interested parts. Note that the zero max start depth only limit a search - * depth for a pattern's root node but other nodes that are parts of the pattern - * may be searched at any depth what defined by the pattern structure. - * - * Set to `UINT32_MAX` to remove the maximum start depth. - */ -void ts_query_cursor_set_max_start_depth(t_query_cursor *self, uint32_t max_start_depth); - -/**********************/ -/* Section - Language */ -/**********************/ - -/** - * Get another reference to the given language. - */ -const t_language *ts_language_copy(const t_language *self); - -/** - * Free any dynamically-allocated resources for this language, if - * this is the last reference. - */ -void ts_language_delete(const t_language *self); - -/** - * Get the number of distinct node types in the language. - */ -uint32_t ts_language_symbol_count(const t_language *self); - -/** - * Get the number of valid states in this language. -*/ -uint32_t ts_language_state_count(const t_language *self); - -/** - * Get a node type string for the given numerical id. - */ -const char *ts_language_symbol_name(const t_language *self, t_symbol symbol); - -/** - * Get the numerical id for the given node type string. - */ -t_symbol ts_language_symbol_for_name( - const t_language *self, - const char *string, - uint32_t length, - bool is_named -); - -/** - * Get the number of distinct field names in the language. - */ -uint32_t ts_language_field_count(const t_language *self); - -/** - * Get the field name string for the given numerical id. - */ -const char *ts_language_field_name_for_id(const t_language *self, t_field_id id); - -/** - * Get the numerical id for the given field name string. - */ -t_field_id ts_language_field_id_for_name(const t_language *self, const char *name, uint32_t name_length); - -/** - * Check whether the given node type id belongs to named nodes, anonymous nodes, - * or a hidden nodes. - * - * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API. - */ -t_symbol_type ts_language_symbol_type(const t_language *self, t_symbol symbol); - -/** - * Get the ABI version number for this language. This version number is used - * to ensure that languages were generated by a compatible version of - * Tree-sitter. - * - * See also [`ts_parser_set_language`]. - */ -uint32_t ts_language_version(const t_language *self); - -/** - * Get the next parse state. Combine this with lookahead iterators to generate - * completion suggestions or valid symbols in error nodes. Use - * [`ts_node_grammar_symbol`] for valid symbols. -*/ -t_state_id ts_language_next_state(const t_language *self, t_state_id state, t_symbol symbol); - -/********************************/ -/* Section - Lookahead Iterator */ -/********************************/ - -/** - * Create a new lookahead iterator for the given language and parse state. - * - * This returns `NULL` if state is invalid for the language. - * - * Repeatedly using [`ts_lookahead_iterator_next`] and - * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the - * given parse state. Newly created lookahead iterators will contain the `ERROR` - * symbol. - * - * Lookahead iterators can be useful to generate suggestions and improve syntax - * error diagnostics. To get symbols valid in an ERROR node, use the lookahead - * iterator on its first leaf node state. For `MISSING` nodes, a lookahead - * iterator created on the previous non-extra leaf node may be appropriate. -*/ -t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state); - -/** - * Delete a lookahead iterator freeing all the memory used. -*/ -void ts_lookahead_iterator_delete(t_lookahead_iterator *self); - -/** - * Reset the lookahead iterator to another state. - * - * This returns `true` if the iterator was reset to the given state and `false` - * otherwise. -*/ -bool ts_lookahead_iterator_reset_state(t_lookahead_iterator *self, t_state_id state); - -/** - * Reset the lookahead iterator. - * - * This returns `true` if the language was set successfully and `false` - * otherwise. -*/ -bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state); - -/** - * Get the current language of the lookahead iterator. -*/ -const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self); - -/** - * Advance the lookahead iterator to the next symbol. - * - * This returns `true` if there is a new symbol and `false` otherwise. -*/ -bool ts_lookahead_iterator_next(t_lookahead_iterator *self); - -/** - * Get the current symbol of the lookahead iterator; -*/ -t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self); - -/** - * Get the current symbol type of the lookahead iterator as a null terminated - * string. -*/ -const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self); - -/*************************************/ -/* Section - WebAssembly Integration */ -/************************************/ - -typedef struct wasm_engine_t t_wasm_engine; -typedef struct t_wasm_store t_wasm_store; - -typedef enum { - TSWasmErrorKindNone = 0, - TSWasmErrorKindParse, - TSWasmErrorKindCompile, - TSWasmErrorKindInstantiate, - TSWasmErrorKindAllocate, -} t_wasm_error_kind; - -typedef struct { - t_wasm_error_kind kind; - char *message; -} t_wasm_error; - -/** - * Create a Wasm store. - */ -t_wasm_store *ts_wasm_store_new( - t_wasm_engine *engine, - t_wasm_error *error -); - -/** - * Free the memory associated with the given Wasm store. - */ -void ts_wasm_store_delete(t_wasm_store *); - -/** - * Create a language from a buffer of Wasm. The resulting language behaves - * like any other Tree-sitter language, except that in order to use it with - * a parser, that parser must have a Wasm store. Note that the language - * can be used with any Wasm store, it doesn't need to be the same store that - * was used to originally load it. - */ -const t_language *ts_wasm_store_load_language( - t_wasm_store *, - const char *name, - const char *wasm, - uint32_t wasm_len, - t_wasm_error *error -); - -/** - * Get the number of languages instantiated in the given wasm store. - */ -size_t ts_wasm_store_language_count(const t_wasm_store *); - -/** - * Check if the language came from a Wasm module. If so, then in order to use - * this language with a Parser, that parser must have a Wasm store assigned. - */ -bool ts_language_is_wasm(const t_language *); - -/** - * Assign the given Wasm store to the parser. A parser must have a Wasm store - * in order to use Wasm languages. - */ -void ts_parser_set_wasm_store(t_parser *, t_wasm_store *); - -/** - * Remove the parser's current Wasm store and return it. This returns NULL if - * the parser doesn't have a Wasm store. - */ -t_wasm_store *ts_parser_take_wasm_store(t_parser *); - -/**********************************/ -/* Section - Global Configuration */ -/**********************************/ - -/** - * Set the allocation functions used by the library. - * - * By default, Tree-sitter uses the standard libc allocation functions, - * but aborts the process when an allocation fails. This function lets - * you supply alternative allocation functions at runtime. - * - * If you pass `NULL` for any parameter, Tree-sitter will switch back to - * its default implementation of that function. - * - * If you call this function after the library has already been used, then - * you must ensure that either: - * 1. All the existing objects have been freed. - * 2. The new allocator shares its state with the old one, so it is capable - * of freeing memory that was allocated by the old allocator. - */ -void ts_set_allocator( - void *(*new_malloc)(size_t), - void *(*new_calloc)(size_t, size_t), - void *(*new_realloc)(void *, size_t), - void (*new_free)(void *) -); - - -#ifndef TREE_SITTER_HIDE_SYMBOLS -#if defined(__GNUC__) || defined(__clang__) -#pragma GCC visibility pop +#include +#include +#include +#include +#include + +#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) +#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 +#define LANGUAGE_VERSION_USABLE_VIA_WASM 13 +#define ERROR_STATE 0 +#define ERROR_COST_PER_RECOVERY 500 +#define ERROR_COST_PER_MISSING_TREE 110 +#define ERROR_COST_PER_SKIPPED_TREE 100 +#define ERROR_COST_PER_SKIPPED_LINE 30 +#define ERROR_COST_PER_SKIPPED_CHAR 1 +#define MAX_STEP_CAPTURE_COUNT 3 +#define MAX_NEGATED_FIELD_COUNT 8 +#define MAX_STATE_PREDECESSOR_COUNT 256 +#define MAX_ANALYSIS_STATE_DEPTH 8 +#define MAX_ANALYSIS_ITERATION_COUNT 256 +#define MAX_LINK_COUNT 8 +#define MAX_NODE_POOL_SIZE 50 +#define MAX_ITERATOR_COUNT 64 +#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX +#define TS_MAX_TREE_POOL_SIZE 32 +#define ts_builtin_sym_error ((TSSymbol) - 1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 +#define POINT_ZERO ((TSPoint){0, 0}) +#define POINT_MAX ((TSPoint){UINT32_MAX, UINT32_MAX}) +#define TS_TREE_STATE_NONE USHRT_MAX +#define NULL_SUBTREE ((Subtree){.ptr = NULL}) +#define STACK_VERSION_NONE ((StackVersion) - 1) +#define TS_DECODE_ERROR (-1) + +// Get a subtree's children, which are allocated immediately before the +// tree's own heap data. +#define ts_subtree_children(self) \ + ((self).data.is_inline \ + ? NULL \ + : (Subtree *)((self).ptr) - (self).ptr->child_count) + +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +typedef struct TSParser TSParser; +typedef struct TSTree TSTree; +typedef struct TSQuery TSQuery; +typedef struct TSQueryCursor TSQueryCursor; +typedef struct TSLookaheadIterator TSLookaheadIterator; + +typedef struct TSPoint +{ + uint32_t row; + uint32_t column; +} TSPoint; + +typedef struct +{ + uint32_t bytes; + TSPoint extent; +} Length; + +typedef enum TSInputEncoding +{ + TSInputEncodingUTF8, + TSInputEncodingUTF16, +} TSInputEncoding; + +typedef enum TSSymbolType +{ + TSSymbolTypeRegular, + TSSymbolTypeAnonymous, + TSSymbolTypeAuxiliary, +} TSSymbolType; + + +typedef struct TSRange +{ + TSPoint start_point; + TSPoint end_point; + uint32_t start_byte; + uint32_t end_byte; +} TSRange; + +typedef struct TSInput +{ + void *payload; + const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, + uint32_t *bytes_read); + TSInputEncoding encoding; +} TSInput; + +typedef enum TSLogType +{ + TSLogTypeParse, + TSLogTypeLex, +} TSLogType; + +typedef struct TSLogger +{ + void *payload; + void (*log)(void *payload, TSLogType log_type, const char *buffer); +} TSLogger; + +typedef struct TSInputEdit +{ + uint32_t start_byte; + uint32_t old_end_byte; + uint32_t new_end_byte; + TSPoint start_point; + TSPoint old_end_point; + TSPoint new_end_point; +} TSInputEdit; + +typedef struct TSNode +{ + uint32_t context[4]; + const void *id; + const TSTree *tree; +} TSNode; + +typedef struct TSTreeCursor +{ + const void *tree; + const void *id; + uint32_t context[3]; +} TSTreeCursor; + +typedef struct TSQueryCapture +{ + TSNode node; + uint32_t index; +} TSQueryCapture; + +typedef enum TSQuantifier +{ + TSQuantifierZero = 0, // must match the array initialization value + TSQuantifierZeroOrOne, + TSQuantifierZeroOrMore, + TSQuantifierOne, + TSQuantifierOneOrMore, +} TSQuantifier; + +typedef struct TSQueryMatch +{ + uint32_t id; + uint16_t pattern_index; + uint16_t capture_count; + const TSQueryCapture *captures; +} TSQueryMatch; + +typedef enum TSQueryPredicateStepType +{ + TSQueryPredicateStepTypeDone, + TSQueryPredicateStepTypeCapture, + TSQueryPredicateStepTypeString, +} TSQueryPredicateStepType; + +typedef struct TSQueryPredicateStep +{ + TSQueryPredicateStepType type; + uint32_t value_id; +} TSQueryPredicateStep; + +typedef enum TSQueryError +{ + TSQueryErrorNone = 0, + TSQueryErrorSyntax, + TSQueryErrorNodeType, + TSQueryErrorField, + TSQueryErrorCapture, + TSQueryErrorStructure, + TSQueryErrorLanguage, +} TSQueryError; + +#define Array(T) \ + struct \ + { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { \ + NULL, 0, 0 \ + } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + do \ + { \ + if ((count) == 0) \ + break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, \ + (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from +/// the `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice((Array *)(self), array_elem_size(self), (self)->size, 0, \ + count, contents) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from +/// the `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice((Array *)(self), array_elem_size(self), _index, old_count, \ + new_count, new_contents) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, \ + &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), \ + array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the +/// order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do \ + { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) \ + array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do \ + { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value)field, &_index, &_exists); \ + if (!_exists) \ + array_insert(self, _index, value); \ + } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) +{ + if (self->contents) + { + free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) +{ + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, + contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, + uint32_t new_capacity) +{ + if (new_capacity > self->capacity) + { + if (self->contents) + { + self->contents = + realloc(self->contents, new_capacity * element_size); + } + else + { + self->contents = malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, + size_t element_size) +{ + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) +{ + Array swap = *other; + *other = *self; + *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, + size_t element_size) +{ + uint32_t new_size = self->size + count; + if (new_size > self->capacity) + { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) + new_capacity = 8; + if (new_capacity < new_size) + new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) +{ + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); + + _array__reserve(self, element_size, new_size); + + char *contents = (char *)self->contents; + if (self->size > old_end) + { + memmove(contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size); + } + if (new_count > 0) + { + if (elements) + { + memcpy((contents + index * element_size), elements, + new_count * element_size); + } + else + { + memset((contents + index * element_size), 0, + new_count * element_size); + } + } + self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or +/// `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, \ + _exists) \ + do \ + { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) \ + break; \ + int comparison; \ + while (size > 1) \ + { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = \ + compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) \ + *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) \ + *(_exists) = true; \ + else if (comparison < 0) \ + *(_index) += 1; \ + } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left +/// (existing) parameter by reference in order to work with the generic sorting +/// function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#include +#include +#include + +static inline size_t atomic_load(const volatile size_t *p) +{ +#ifdef __ATOMIC_RELAXED + return __atomic_load_n(p, __ATOMIC_RELAXED); +#else + return __sync_fetch_and_add((volatile size_t *)p, 0); #endif +} + +static inline uint32_t atomic_inc(volatile uint32_t *p) +{ +#ifdef __ATOMIC_RELAXED + return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST); +#else + return __sync_add_and_fetch(p, 1U); +#endif +} + +static inline uint32_t atomic_dec(volatile uint32_t *p) +{ +#ifdef __ATOMIC_RELAXED + return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST); +#else + return __sync_sub_and_fetch(p, 1U); +#endif +} + +// The serialized state of an external scanner. +// +// Every time an external token subtree is created after a call to an +// external scanner, the scanner's `serialize` function is called to +// retrieve a serialized copy of its state. The bytes are then copied +// onto the subtree itself so that the scanner's state can later be +// restored using its `deserialize` function. +// +// Small byte arrays are stored inline, and long ones are allocated +// separately on the heap. +typedef struct +{ + union { + char *long_data; + char short_data[24]; + }; + uint32_t length; +} ExternalScannerState; + +// A compact representation of a subtree. +// +// This representation is used for small leaf nodes that are not +// errors, and were not created by an external scanner. +// +// The idea behind the layout of this struct is that the `is_inline` +// bit will fall exactly into the same location as the least significant +// bit of the pointer in `Subtree` or `MutableSubtree`, respectively. +// Because of alignment, for any valid pointer this will be 0, giving +// us the opportunity to make use of this bit to signify whether to use +// the pointer or the inline struct. +typedef struct SubtreeInlineData SubtreeInlineData; + +#define SUBTREE_BITS \ + bool visible : 1; \ + bool named : 1; \ + bool extra : 1; \ + bool has_changes : 1; \ + bool is_missing : 1; \ + bool is_keyword : 1; + +#define SUBTREE_SIZE \ + uint8_t padding_columns; \ + uint8_t padding_rows : 4; \ + uint8_t lookahead_bytes : 4; \ + uint8_t padding_bytes; \ + uint8_t size_bytes; + +#if TS_BIG_ENDIAN +# if TS_PTR_SIZE == 32 + +struct SubtreeInlineData +{ + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; + SUBTREE_SIZE +}; + +# else + +struct SubtreeInlineData +{ + SUBTREE_SIZE + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; +}; + +# endif +#else + +struct SubtreeInlineData +{ + bool is_inline : 1; + SUBTREE_BITS + uint8_t symbol; + uint16_t parse_state; + SUBTREE_SIZE +}; + #endif -#endif // TREE_SITTER_API_H_ +#undef SUBTREE_BITS +#undef SUBTREE_SIZE + + + + +// A heap-allocated representation of a subtree. +// +// This representation is used for parent nodes, external tokens, +// errors, and other leaf nodes whose data is too large to fit into +// the inline representation. +typedef struct +{ + volatile uint32_t ref_count; + Length padding; + Length size; + uint32_t lookahead_bytes; + uint32_t error_cost; + uint32_t child_count; + TSSymbol symbol; + TSStateId parse_state; + + bool visible : 1; + bool named : 1; + bool extra : 1; + bool fragile_left : 1; + bool fragile_right : 1; + bool has_changes : 1; + bool has_external_tokens : 1; + bool has_external_scanner_state_change : 1; + bool depends_on_column : 1; + bool is_missing : 1; + bool is_keyword : 1; + + union { + // Non-terminal subtrees (`child_count > 0`) + struct + { + uint32_t visible_child_count; + uint32_t named_child_count; + uint32_t visible_descendant_count; + int32_t dynamic_precedence; + uint16_t repeat_depth; + uint16_t production_id; + struct + { + TSSymbol symbol; + TSStateId parse_state; + } first_leaf; + }; + + // External terminal subtrees (`child_count == 0 && + // has_external_tokens`) + ExternalScannerState external_scanner_state; + + // Error terminal subtrees (`child_count == 0 && symbol == + // ts_builtin_sym_error`) + int32_t lookahead_char; + }; +} SubtreeHeapData; + +// The fundamental building block of a syntax tree. +typedef union { + SubtreeInlineData data; + const SubtreeHeapData *ptr; +} Subtree; + +// Like Subtree, but mutable. +typedef union { + SubtreeInlineData data; + SubtreeHeapData *ptr; +} MutableSubtree; + +typedef Array(Subtree) SubtreeArray; +typedef Array(MutableSubtree) MutableSubtreeArray; + +typedef struct +{ + MutableSubtreeArray free_trees; + MutableSubtreeArray tree_stack; +} SubtreePool; + +typedef Array(TSRange) TSRangeArray; + +typedef struct +{ + const Subtree *subtree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + uint32_t descendant_index; +} TreeCursorEntry; + +typedef struct +{ + const TSTree *tree; + Array(TreeCursorEntry) stack; + TSSymbol root_alias_symbol; +} TreeCursor; + +typedef union { + struct + { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct + { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +void ts_range_array_get_changed_ranges(const TSRange *old_ranges, + unsigned old_range_count, + const TSRange *new_ranges, + unsigned new_range_count, + TSRangeArray *differences); + +bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index, + uint32_t start_byte, uint32_t end_byte); + +unsigned ts_subtree_get_changed_ranges( + const Subtree *old_tree, const Subtree *new_tree, TreeCursor *cursor1, + TreeCursor *cursor2, const TSLanguage *language, + const TSRangeArray *included_range_differences, TSRange **ranges); + +typedef struct +{ + const TSParseAction *actions; + uint32_t action_count; + bool is_reusable; +} TableEntry; + +typedef struct +{ + const TSLanguage *language; + const uint16_t *data; + const uint16_t *group_end; + TSStateId state; + uint16_t table_value; + uint16_t section_index; + uint16_t group_count; + bool is_small_state; + + const TSParseAction *actions; + TSSymbol symbol; + TSStateId next_state; + uint16_t action_count; +} LookaheadIterator; + +typedef struct +{ + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef enum +{ + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + TSParseAction action; + struct + { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +typedef struct +{ + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +typedef struct +{ + uint16_t index; + uint16_t length; +} TSFieldMapSlice; + +typedef struct TSLexer TSLexer; + +struct TSLexer +{ + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); +}; + +typedef struct +{ + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef struct +{ + int32_t start; + int32_t end; +} TSCharacterRange; + +struct TSLanguage +{ + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char *const *symbol_names; + const char *const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct + { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; +}; + +void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, + TableEntry *); + +TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); + +TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol); + +TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, + TSSymbol symbol); + +static inline bool ts_language_is_symbol_external(const TSLanguage *self, + TSSymbol symbol) +{ + return 0 < symbol && symbol < self->external_token_count + 1; +} + +static inline const TSParseAction *ts_language_actions(const TSLanguage *self, + TSStateId state, + TSSymbol symbol, + uint32_t *count) +{ + TableEntry entry; + ts_language_table_entry(self, state, symbol, &entry); + *count = entry.action_count; + return entry.actions; +} + +static inline bool ts_language_has_reduce_action(const TSLanguage *self, + TSStateId state, + TSSymbol symbol) +{ + TableEntry entry; + ts_language_table_entry(self, state, symbol, &entry); + return entry.action_count > 0 && + entry.actions[0].type == TSParseActionTypeReduce; +} + +// Lookup the table value for a given symbol and state. +// +// For non-terminal symbols, the table value represents a successor state. +// For terminal symbols, it represents an index in the actions table. +// For 'large' parse states, this is a direct lookup. For 'small' parse +// states, this requires searching through the symbol groups to find +// the given symbol. +static inline uint16_t ts_language_lookup(const TSLanguage *self, + TSStateId state, TSSymbol symbol) +{ + if (state >= self->large_state_count) + { + uint32_t index = + self->small_parse_table_map[state - self->large_state_count]; + const uint16_t *data = &self->small_parse_table[index]; + uint16_t group_count = *(data++); + for (unsigned i = 0; i < group_count; i++) + { + uint16_t section_value = *(data++); + uint16_t symbol_count = *(data++); + for (unsigned j = 0; j < symbol_count; j++) + { + if (*(data++) == symbol) + return section_value; + } + } + return 0; + } + else + { + return self->parse_table[state * self->symbol_count + symbol]; + } +} + +static inline bool ts_language_has_actions(const TSLanguage *self, + TSStateId state, TSSymbol symbol) +{ + return ts_language_lookup(self, state, symbol) != 0; +} + +// Iterate over all of the symbols that are valid in the given state. +// +// For 'large' parse states, this just requires iterating through +// all possible symbols and checking the parse table for each one. +// For 'small' parse states, this exploits the structure of the +// table to only visit the valid symbols. +static inline LookaheadIterator ts_language_lookaheads(const TSLanguage *self, + TSStateId state) +{ + bool is_small_state = state >= self->large_state_count; + const uint16_t *data; + const uint16_t *group_end = NULL; + uint16_t group_count = 0; + if (is_small_state) + { + uint32_t index = + self->small_parse_table_map[state - self->large_state_count]; + data = &self->small_parse_table[index]; + group_end = data + 1; + group_count = *data; + } + else + { + data = &self->parse_table[state * self->symbol_count] - 1; + } + return (LookaheadIterator){ + .language = self, + .data = data, + .group_end = group_end, + .group_count = group_count, + .is_small_state = is_small_state, + .symbol = UINT16_MAX, + .next_state = 0, + }; +} + +static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) +{ + // For small parse states, valid symbols are listed explicitly, + // grouped by their value. There's no need to look up the actions + // again until moving to the next group. + if (self->is_small_state) + { + self->data++; + if (self->data == self->group_end) + { + if (self->group_count == 0) + return false; + self->group_count--; + self->table_value = *(self->data++); + unsigned symbol_count = *(self->data++); + self->group_end = self->data + symbol_count; + self->symbol = *self->data; + } + else + { + self->symbol = *self->data; + return true; + } + } + + // For large parse states, iterate through every symbol until one + // is found that has valid actions. + else + { + do + { + self->data++; + self->symbol++; + if (self->symbol >= self->language->symbol_count) + return false; + self->table_value = *self->data; + } while (!self->table_value); + } + + // Depending on if the symbols is terminal or non-terminal, the table value + // either represents a list of actions or a successor state. + if (self->symbol < self->language->token_count) + { + const TSParseActionEntry *entry = + &self->language->parse_actions[self->table_value]; + self->action_count = entry->entry.count; + self->actions = (const TSParseAction *)(entry + 1); + self->next_state = 0; + } + else + { + self->action_count = 0; + self->next_state = self->table_value; + } + return true; +} + +// Whether the state is a "primary state". If this returns false, it indicates +// that there exists another state that behaves identically to this one with +// respect to query analysis. +static inline bool ts_language_state_is_primary(const TSLanguage *self, + TSStateId state) +{ + if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) + { + return state == self->primary_state_ids[state]; + } + else + { + return true; + } +} + +static inline const bool *ts_language_enabled_external_tokens( + const TSLanguage *self, unsigned external_scanner_state) +{ + if (external_scanner_state == 0) + { + return NULL; + } + else + { + return self->external_scanner.states + + self->external_token_count * external_scanner_state; + } +} + +static inline const TSSymbol *ts_language_alias_sequence(const TSLanguage *self, + uint32_t production_id) +{ + return production_id + ? &self->alias_sequences[production_id * + self->max_alias_sequence_length] + : NULL; +} + +static inline TSSymbol ts_language_alias_at(const TSLanguage *self, + uint32_t production_id, + uint32_t child_index) +{ + return production_id + ? self->alias_sequences[production_id * + self->max_alias_sequence_length + + child_index] + : 0; +} + +static inline void ts_language_field_map(const TSLanguage *self, + uint32_t production_id, + const TSFieldMapEntry **start, + const TSFieldMapEntry **end) +{ + if (self->field_count == 0) + { + *start = NULL; + *end = NULL; + return; + } + + TSFieldMapSlice slice = self->field_map_slices[production_id]; + *start = &self->field_map_entries[slice.index]; + *end = &self->field_map_entries[slice.index] + slice.length; +} + +static inline void ts_language_aliases_for_symbol(const TSLanguage *self, + TSSymbol original_symbol, + const TSSymbol **start, + const TSSymbol **end) +{ + *start = &self->public_symbol_map[original_symbol]; + *end = *start + 1; + + unsigned idx = 0; + for (;;) + { + TSSymbol symbol = self->alias_map[idx++]; + if (symbol == 0 || symbol > original_symbol) + break; + uint16_t count = self->alias_map[idx++]; + if (symbol == original_symbol) + { + *start = &self->alias_map[idx]; + *end = &self->alias_map[idx + count]; + break; + } + idx += count; + } +} + +static const Length LENGTH_UNDEFINED = {0, {0, 1}}; +static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; + +static TSPoint point_add(TSPoint a, TSPoint b); +static TSPoint point_sub(TSPoint a, TSPoint b); + +static inline bool length_is_undefined(Length length) +{ + return length.bytes == 0 && length.extent.column != 0; +} + +static inline Length length_min(Length len1, Length len2) +{ + return (len1.bytes < len2.bytes) ? len1 : len2; +} + +static inline Length length_add(Length len1, Length len2) +{ + Length result; + result.bytes = len1.bytes + len2.bytes; + result.extent = point_add(len1.extent, len2.extent); + return result; +} + +static inline Length length_sub(Length len1, Length len2) +{ + Length result; + result.bytes = len1.bytes - len2.bytes; + result.extent = point_sub(len1.extent, len2.extent); + return result; +} + +static inline Length length_zero(void) +{ + Length result = {0, {0, 0}}; + return result; +} + +static inline Length length_saturating_sub(Length len1, Length len2) +{ + if (len1.bytes > len2.bytes) + { + return length_sub(len1, len2); + } + else + { + return length_zero(); + } +} + +typedef struct +{ + TSLexer data; + Length current_position; + Length token_start_position; + Length token_end_position; + + TSRange *included_ranges; + const char *chunk; + TSInput input; + TSLogger logger; + + uint32_t included_range_count; + uint32_t current_included_range_index; + uint32_t chunk_start; + uint32_t chunk_size; + uint32_t lookahead_size; + bool did_get_column; + + char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; +} Lexer; + +void ts_lexer_init(Lexer *); +void ts_lexer_delete(Lexer *); +void ts_lexer_set_input(Lexer *, TSInput); +void ts_lexer_reset(Lexer *, Length); +void ts_lexer_start(Lexer *); +void ts_lexer_finish(Lexer *, uint32_t *); +void ts_lexer_advance_to_end(Lexer *); +void ts_lexer_mark_end(Lexer *); +bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, + uint32_t count); +TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); + +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, + int32_t lookahead) +{ + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) + { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) + { + return true; + } + else if (lookahead > range->end) + { + index = mid_index; + } + size -= half_size; + } + TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + +static inline TSPoint point__new(unsigned row, unsigned column) +{ + TSPoint result = {row, column}; + return result; +} + +static inline TSPoint point_add(TSPoint a, TSPoint b) +{ + if (b.row > 0) + return point__new(a.row + b.row, b.column); + else + return point__new(a.row, a.column + b.column); +} + +static inline TSPoint point_sub(TSPoint a, TSPoint b) +{ + if (a.row > b.row) + return point__new(a.row - b.row, a.column); + else + return point__new(0, a.column - b.column); +} + +static inline bool point_lte(TSPoint a, TSPoint b) +{ + return (a.row < b.row) || (a.row == b.row && a.column <= b.column); +} + +static inline bool point_lt(TSPoint a, TSPoint b) +{ + return (a.row < b.row) || (a.row == b.row && a.column < b.column); +} + +static inline bool point_gt(TSPoint a, TSPoint b) +{ + return (a.row > b.row) || (a.row == b.row && a.column > b.column); +} + +static inline bool point_gte(TSPoint a, TSPoint b) +{ + return (a.row > b.row) || (a.row == b.row && a.column >= b.column); +} + +static inline bool point_eq(TSPoint a, TSPoint b) +{ + return a.row == b.row && a.column == b.column; +} + +static inline TSPoint point_min(TSPoint a, TSPoint b) +{ + if (a.row < b.row || (a.row == b.row && a.column < b.column)) + return a; + else + return b; +} + +static inline TSPoint point_max(TSPoint a, TSPoint b) +{ + if (a.row > b.row || (a.row == b.row && a.column > b.column)) + return a; + else + return b; +} + +typedef struct +{ + uint32_t count; + TSSymbol symbol; + int dynamic_precedence; + unsigned short production_id; +} ReduceAction; + +typedef Array(ReduceAction) ReduceActionSet; + +static inline void ts_reduce_action_set_add(ReduceActionSet *self, + ReduceAction new_action) +{ + for (uint32_t i = 0; i < self->size; i++) + { + ReduceAction action = self->contents[i]; + if (action.symbol == new_action.symbol && + action.count == new_action.count) + return; + } + array_push(self, new_action); +} + +typedef struct +{ + Subtree tree; + uint32_t child_index; + uint32_t byte_offset; +} StackEntry; + +typedef struct +{ + Array(StackEntry) stack; + Subtree last_external_token; +} ReusableNode; + +static inline ReusableNode reusable_node_new(void) +{ + return (ReusableNode){array_new(), NULL_SUBTREE}; +} + +static inline void reusable_node_clear(ReusableNode *self) +{ + array_clear(&self->stack); + self->last_external_token = NULL_SUBTREE; +} + +static inline Subtree reusable_node_tree(ReusableNode *self) +{ + return self->stack.size > 0 + ? self->stack.contents[self->stack.size - 1].tree + : NULL_SUBTREE; +} + +static inline uint32_t reusable_node_byte_offset(ReusableNode *self) +{ + return self->stack.size > 0 + ? self->stack.contents[self->stack.size - 1].byte_offset + : UINT32_MAX; +} + +static inline void reusable_node_delete(ReusableNode *self) +{ + array_delete(&self->stack); +} + + +static inline uint32_t ts_subtree_total_bytes(Subtree self); +static inline bool ts_subtree_has_external_tokens(Subtree self); +Subtree ts_subtree_last_external_token(Subtree self); +static inline uint32_t ts_subtree_child_count(Subtree self); + +static inline void reusable_node_advance(ReusableNode *self) +{ + StackEntry last_entry = *array_back(&self->stack); + uint32_t byte_offset = + last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); + if (ts_subtree_has_external_tokens(last_entry.tree)) + { + self->last_external_token = + ts_subtree_last_external_token(last_entry.tree); + } + + Subtree tree; + uint32_t next_index; + do + { + StackEntry popped_entry = array_pop(&self->stack); + next_index = popped_entry.child_index + 1; + if (self->stack.size == 0) + return; + tree = array_back(&self->stack)->tree; + } while (ts_subtree_child_count(tree) <= next_index); + + array_push(&self->stack, ((StackEntry){ + .tree = ts_subtree_children(tree)[next_index], + .child_index = next_index, + .byte_offset = byte_offset, + })); +} + +static inline bool reusable_node_descend(ReusableNode *self) +{ + StackEntry last_entry = *array_back(&self->stack); + if (ts_subtree_child_count(last_entry.tree) > 0) + { + array_push(&self->stack, + ((StackEntry){ + .tree = ts_subtree_children(last_entry.tree)[0], + .child_index = 0, + .byte_offset = last_entry.byte_offset, + })); + return true; + } + else + { + return false; + } +} + +static inline void reusable_node_advance_past_leaf(ReusableNode *self) +{ + while (reusable_node_descend(self)) + { + } + reusable_node_advance(self); +} + +static inline void reusable_node_reset(ReusableNode *self, Subtree tree) +{ + reusable_node_clear(self); + array_push(&self->stack, ((StackEntry){ + .tree = tree, + .child_index = 0, + .byte_offset = 0, + })); + + // Never reuse the root node, because it has a non-standard internal + // structure due to transformations that are applied when it is accepted: + // adding the EOF child and any extra children. + if (!reusable_node_descend(self)) + { + reusable_node_clear(self); + } +} + +typedef struct Stack Stack; + +typedef unsigned StackVersion; + +typedef struct +{ + SubtreeArray subtrees; + StackVersion version; +} StackSlice; +typedef Array(StackSlice) StackSliceArray; + +typedef struct +{ + Length position; + unsigned depth; + TSStateId state; +} StackSummaryEntry; +typedef Array(StackSummaryEntry) StackSummary; + +// Create a stack. +Stack *ts_stack_new(SubtreePool *); + +// Release the memory reserved for a given stack. +void ts_stack_delete(Stack *); + +// Get the stack's current number of versions. +uint32_t ts_stack_version_count(const Stack *); + +// Get the state at the top of the given version of the stack. If the stack is +// empty, this returns the initial state, 0. +TSStateId ts_stack_state(const Stack *, StackVersion); + +// Get the last external token associated with a given version of the stack. +Subtree ts_stack_last_external_token(const Stack *, StackVersion); + +// Set the last external token associated with a given version of the stack. +void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree); + +// Get the position of the given version of the stack within the document. +Length ts_stack_position(const Stack *, StackVersion); + +// Push a tree and state onto the given version of the stack. +// +// This transfers ownership of the tree to the Stack. Callers that +// need to retain ownership of the tree for their own purposes should +// first retain the tree. +void ts_stack_push(Stack *, StackVersion, Subtree, bool, TSStateId); + +// Pop the given number of entries from the given version of the stack. This +// operation can increase the number of stack versions by revealing multiple +// versions which had previously been merged. It returns an array that +// specifies the index of each revealed version and the trees that were +// removed from that version. +StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count); + +// Remove an error at the top of the given version of the stack. +SubtreeArray ts_stack_pop_error(Stack *, StackVersion); + +// Remove any pending trees from the top of the given version of the stack. +StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); + +// Remove any all trees from the given version of the stack. +StackSliceArray ts_stack_pop_all(Stack *, StackVersion); + +// Get the maximum number of tree nodes reachable from this version of the stack +// since the last error was detected. +unsigned ts_stack_node_count_since_error(const Stack *, StackVersion); + +int ts_stack_dynamic_precedence(Stack *, StackVersion); + +bool ts_stack_has_advanced_since_error(const Stack *, StackVersion); + +// Compute a summary of all the parse states near the top of the given +// version of the stack and store the summary for later retrieval. +void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth); + +// Retrieve a summary of all the parse states near the top of the +// given version of the stack. +StackSummary *ts_stack_get_summary(Stack *, StackVersion); + +// Get the total cost of all errors on the given version of the stack. +unsigned ts_stack_error_cost(const Stack *, StackVersion version); + +// Merge the given two stack versions if possible, returning true +// if they were successfully merged and false otherwise. +bool ts_stack_merge(Stack *, StackVersion, StackVersion); + +// Determine whether the given two stack versions can be merged. +bool ts_stack_can_merge(Stack *, StackVersion, StackVersion); + +Subtree ts_stack_resume(Stack *, StackVersion); + +void ts_stack_pause(Stack *, StackVersion, Subtree); + +void ts_stack_halt(Stack *, StackVersion); + +bool ts_stack_is_active(const Stack *, StackVersion); + +bool ts_stack_is_paused(const Stack *, StackVersion); + +bool ts_stack_is_halted(const Stack *, StackVersion); + +void ts_stack_renumber_version(Stack *, StackVersion, StackVersion); + +void ts_stack_swap_versions(Stack *, StackVersion, StackVersion); + +StackVersion ts_stack_copy_version(Stack *, StackVersion); + +// Remove the given version from the stack. +void ts_stack_remove_version(Stack *, StackVersion); + +void ts_stack_clear(Stack *); + +typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t); + +void ts_external_scanner_state_init(ExternalScannerState *, const char *, + unsigned); +const char *ts_external_scanner_state_data(const ExternalScannerState *); +bool ts_external_scanner_state_eq(const ExternalScannerState *self, + const char *, unsigned); +void ts_external_scanner_state_delete(ExternalScannerState *self); + +void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); +void ts_subtree_array_clear(SubtreePool *, SubtreeArray *); +void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); +void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); +void ts_subtree_array_reverse(SubtreeArray *); + +SubtreePool ts_subtree_pool_new(uint32_t capacity); +void ts_subtree_pool_delete(SubtreePool *); + +Subtree ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, uint32_t, + TSStateId, bool, bool, bool, const TSLanguage *); +Subtree ts_subtree_new_error(SubtreePool *, int32_t, Length, Length, uint32_t, + TSStateId, const TSLanguage *); +MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, + const TSLanguage *); +Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); +Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, + const TSLanguage *); +MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); +void ts_subtree_retain(Subtree); +void ts_subtree_release(SubtreePool *, Subtree); +int ts_subtree_compare(Subtree, Subtree, SubtreePool *); +void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); +void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, + const TSLanguage *); +void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); +void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); +Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); +char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, + bool include_all); +Subtree ts_subtree_last_external_token(Subtree); +const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); +bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); + +#define SUBTREE_GET(self, name) \ + ((self).data.is_inline ? (self).data.name : (self).ptr->name) + +static inline TSSymbol ts_subtree_symbol(Subtree self) +{ + return SUBTREE_GET(self, symbol); +} +static inline bool ts_subtree_visible(Subtree self) +{ + return SUBTREE_GET(self, visible); +} +static inline bool ts_subtree_named(Subtree self) +{ + return SUBTREE_GET(self, named); +} +static inline bool ts_subtree_extra(Subtree self) +{ + return SUBTREE_GET(self, extra); +} +static inline bool ts_subtree_has_changes(Subtree self) +{ + return SUBTREE_GET(self, has_changes); +} +static inline bool ts_subtree_missing(Subtree self) +{ + return SUBTREE_GET(self, is_missing); +} +static inline bool ts_subtree_is_keyword(Subtree self) +{ + return SUBTREE_GET(self, is_keyword); +} +static inline TSStateId ts_subtree_parse_state(Subtree self) +{ + return SUBTREE_GET(self, parse_state); +} +static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) +{ + return SUBTREE_GET(self, lookahead_bytes); +} + +#undef SUBTREE_GET + +// Get the size needed to store a heap-allocated subtree with the given +// number of children. +static inline size_t ts_subtree_alloc_size(uint32_t child_count) +{ + return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); +} + + + +static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) +{ + if (self->data.is_inline) + { + self->data.extra = is_extra; + } + else + { + self->ptr->extra = is_extra; + } +} + +static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) +{ + if (self.data.is_inline) + return self.data.symbol; + if (self.ptr->child_count == 0) + return self.ptr->symbol; + return self.ptr->first_leaf.symbol; +} + +static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) +{ + if (self.data.is_inline) + return self.data.parse_state; + if (self.ptr->child_count == 0) + return self.ptr->parse_state; + return self.ptr->first_leaf.parse_state; +} + +static inline Length ts_subtree_padding(Subtree self) +{ + if (self.data.is_inline) + { + Length result = {self.data.padding_bytes, + {self.data.padding_rows, self.data.padding_columns}}; + return result; + } + else + { + return self.ptr->padding; + } +} + +static inline Length ts_subtree_size(Subtree self) +{ + if (self.data.is_inline) + { + Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; + return result; + } + else + { + return self.ptr->size; + } +} + +static inline Length ts_subtree_total_size(Subtree self) +{ + return length_add(ts_subtree_padding(self), ts_subtree_size(self)); +} + +static inline uint32_t ts_subtree_total_bytes(Subtree self) +{ + return ts_subtree_total_size(self).bytes; +} + +static inline uint32_t ts_subtree_child_count(Subtree self) +{ + return self.data.is_inline ? 0 : self.ptr->child_count; +} + +static inline uint32_t ts_subtree_repeat_depth(Subtree self) +{ + return self.data.is_inline ? 0 : self.ptr->repeat_depth; +} + +static inline uint32_t ts_subtree_is_repetition(Subtree self) +{ + return self.data.is_inline ? 0 + : !self.ptr->named && !self.ptr->visible && + self.ptr->child_count != 0; +} + +static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) +{ + return (self.data.is_inline || self.ptr->child_count == 0) + ? 0 + : self.ptr->visible_descendant_count; +} + +static inline uint32_t ts_subtree_visible_child_count(Subtree self) +{ + if (ts_subtree_child_count(self) > 0) + { + return self.ptr->visible_child_count; + } + else + { + return 0; + } +} + +static inline uint32_t ts_subtree_error_cost(Subtree self) +{ + if (ts_subtree_missing(self)) + { + return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; + } + else + { + return self.data.is_inline ? 0 : self.ptr->error_cost; + } +} + +static inline int32_t ts_subtree_dynamic_precedence(Subtree self) +{ + return (self.data.is_inline || self.ptr->child_count == 0) + ? 0 + : self.ptr->dynamic_precedence; +} + +static inline uint16_t ts_subtree_production_id(Subtree self) +{ + if (ts_subtree_child_count(self) > 0) + { + return self.ptr->production_id; + } + else + { + return 0; + } +} + +static inline bool ts_subtree_fragile_left(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->fragile_left; +} + +static inline bool ts_subtree_fragile_right(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->fragile_right; +} + +static inline bool ts_subtree_has_external_tokens(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->has_external_tokens; +} + +static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) +{ + return self.data.is_inline ? false + : self.ptr->has_external_scanner_state_change; +} + +static inline bool ts_subtree_depends_on_column(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->depends_on_column; +} + +static inline bool ts_subtree_is_fragile(Subtree self) +{ + return self.data.is_inline + ? false + : (self.ptr->fragile_left || self.ptr->fragile_right); +} + +static inline bool ts_subtree_is_error(Subtree self) +{ + return ts_subtree_symbol(self) == ts_builtin_sym_error; +} + +static inline bool ts_subtree_is_eof(Subtree self) +{ + return ts_subtree_symbol(self) == ts_builtin_sym_end; +} + +static inline Subtree ts_subtree_from_mut(MutableSubtree self) +{ + Subtree result; + result.data = self.data; + return result; +} + +static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) +{ + MutableSubtree result; + result.data = self.data; + return result; +} + +typedef enum +{ + TreeCursorStepNone, + TreeCursorStepHidden, + TreeCursorStepVisible, +} TreeCursorStep; + +void ts_tree_cursor_init(TreeCursor *, TSNode); +void ts_tree_cursor_current_status(const TSTreeCursor *, TSFieldId *, bool *, + bool *, bool *, TSSymbol *, unsigned *); + +TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *); +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *); + +static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) +{ + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *last_entry = array_back(&self->stack); + return *last_entry->subtree; +} + +TSNode ts_tree_cursor_parent_node(const TSTreeCursor *); + +typedef struct +{ + const Subtree *child; + const Subtree *parent; + Length position; + TSSymbol alias_symbol; +} ParentCacheEntry; + +struct TSTree +{ + Subtree root; + const TSLanguage *language; + TSRange *included_ranges; + unsigned included_range_count; +}; + +TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, + unsigned); +TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); + +typedef uint64_t TSClock; +typedef uint64_t TSDuration; + +#endif // TREE_SITTER_TREE_H_ diff --git a/parser/src/array.h b/parser/src/array.h deleted file mode 100644 index 15a3b233..00000000 --- a/parser/src/array.h +++ /dev/null @@ -1,290 +0,0 @@ -#ifndef TREE_SITTER_ARRAY_H_ -#define TREE_SITTER_ARRAY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./alloc.h" - -#include -#include -#include -#include -#include - -#ifdef _MSC_VER -#pragma warning(disable : 4101) -#elif defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif - -#define Array(T) \ - struct { \ - T *contents; \ - uint32_t size; \ - uint32_t capacity; \ - } - -/// Initialize an array. -#define array_init(self) \ - ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) - -/// Create an empty array. -#define array_new() \ - { NULL, 0, 0 } - -/// Get a pointer to the element at a given `index` in the array. -#define array_get(self, _index) \ - (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) - -/// Get a pointer to the first element in the array. -#define array_front(self) array_get(self, 0) - -/// Get a pointer to the last element in the array. -#define array_back(self) array_get(self, (self)->size - 1) - -/// Clear the array, setting its size to zero. Note that this does not free any -/// memory allocated for the array's contents. -#define array_clear(self) ((self)->size = 0) - -/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is -/// less than the array's current capacity, this function has no effect. -#define array_reserve(self, new_capacity) \ - _array__reserve((Array *)(self), array_elem_size(self), new_capacity) - -/// Free any memory allocated for this array. Note that this does not free any -/// memory allocated for the array's contents. -#define array_delete(self) _array__delete((Array *)(self)) - -/// Push a new `element` onto the end of the array. -#define array_push(self, element) \ - (_array__grow((Array *)(self), 1, array_elem_size(self)), \ - (self)->contents[(self)->size++] = (element)) - -/// Increase the array's size by `count` elements. -/// New elements are zero-initialized. -#define array_grow_by(self, count) \ - do { \ - if ((count) == 0) break; \ - _array__grow((Array *)(self), count, array_elem_size(self)); \ - memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ - (self)->size += (count); \ - } while (0) - -/// Append all elements from one array to the end of another. -#define array_push_all(self, other) \ - array_extend((self), (other)->size, (other)->contents) - -/// Append `count` elements to the end of the array, reading their values from the -/// `contents` pointer. -#define array_extend(self, count, contents) \ - _array__splice( \ - (Array *)(self), array_elem_size(self), (self)->size, \ - 0, count, contents \ - ) - -/// Remove `old_count` elements from the array starting at the given `index`. At -/// the same index, insert `new_count` new elements, reading their values from the -/// `new_contents` pointer. -#define array_splice(self, _index, old_count, new_count, new_contents) \ - _array__splice( \ - (Array *)(self), array_elem_size(self), _index, \ - old_count, new_count, new_contents \ - ) - -/// Insert one `element` into the array at the given `index`. -#define array_insert(self, _index, element) \ - _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) - -/// Remove one element from the array at the given `index`. -#define array_erase(self, _index) \ - _array__erase((Array *)(self), array_elem_size(self), _index) - -/// Pop the last element off the array, returning the element by value. -#define array_pop(self) ((self)->contents[--(self)->size]) - -/// Assign the contents of one array to another, reallocating if necessary. -#define array_assign(self, other) \ - _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) - -/// Swap one array with another -#define array_swap(self, other) \ - _array__swap((Array *)(self), (Array *)(other)) - -/// Get the size of the array contents -#define array_elem_size(self) (sizeof *(self)->contents) - -/// Search a sorted array for a given `needle` value, using the given `compare` -/// callback to determine the order. -/// -/// If an existing element is found to be equal to `needle`, then the `index` -/// out-parameter is set to the existing value's index, and the `exists` -/// out-parameter is set to true. Otherwise, `index` is set to an index where -/// `needle` should be inserted in order to preserve the sorting, and `exists` -/// is set to false. -#define array_search_sorted_with(self, compare, needle, _index, _exists) \ - _array__search_sorted(self, 0, compare, , needle, _index, _exists) - -/// Search a sorted array for a given `needle` value, using integer comparisons -/// of a given struct field (specified with a leading dot) to determine the order. -/// -/// See also `array_search_sorted_with`. -#define array_search_sorted_by(self, field, needle, _index, _exists) \ - _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) - -/// Insert a given `value` into a sorted array, using the given `compare` -/// callback to determine the order. -#define array_insert_sorted_with(self, compare, value) \ - do { \ - unsigned _index, _exists; \ - array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ - if (!_exists) array_insert(self, _index, value); \ - } while (0) - -/// Insert a given `value` into a sorted array, using integer comparisons of -/// a given struct field (specified with a leading dot) to determine the order. -/// -/// See also `array_search_sorted_by`. -#define array_insert_sorted_by(self, field, value) \ - do { \ - unsigned _index, _exists; \ - array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ - if (!_exists) array_insert(self, _index, value); \ - } while (0) - -// Private - -typedef Array(void) Array; - -/// This is not what you're looking for, see `array_delete`. -static inline void _array__delete(Array *self) { - if (self->contents) { - ts_free(self->contents); - self->contents = NULL; - self->size = 0; - self->capacity = 0; - } -} - -/// This is not what you're looking for, see `array_erase`. -static inline void _array__erase(Array *self, size_t element_size, - uint32_t index) { - assert(index < self->size); - char *contents = (char *)self->contents; - memmove(contents + index * element_size, contents + (index + 1) * element_size, - (self->size - index - 1) * element_size); - self->size--; -} - -/// This is not what you're looking for, see `array_reserve`. -static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { - if (new_capacity > self->capacity) { - if (self->contents) { - self->contents = ts_realloc(self->contents, new_capacity * element_size); - } else { - self->contents = ts_malloc(new_capacity * element_size); - } - self->capacity = new_capacity; - } -} - -/// This is not what you're looking for, see `array_assign`. -static inline void _array__assign(Array *self, const Array *other, size_t element_size) { - _array__reserve(self, element_size, other->size); - self->size = other->size; - memcpy(self->contents, other->contents, self->size * element_size); -} - -/// This is not what you're looking for, see `array_swap`. -static inline void _array__swap(Array *self, Array *other) { - Array swap = *other; - *other = *self; - *self = swap; -} - -/// This is not what you're looking for, see `array_push` or `array_grow_by`. -static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { - uint32_t new_size = self->size + count; - if (new_size > self->capacity) { - uint32_t new_capacity = self->capacity * 2; - if (new_capacity < 8) new_capacity = 8; - if (new_capacity < new_size) new_capacity = new_size; - _array__reserve(self, element_size, new_capacity); - } -} - -/// This is not what you're looking for, see `array_splice`. -static inline void _array__splice(Array *self, size_t element_size, - uint32_t index, uint32_t old_count, - uint32_t new_count, const void *elements) { - uint32_t new_size = self->size + new_count - old_count; - uint32_t old_end = index + old_count; - uint32_t new_end = index + new_count; - assert(old_end <= self->size); - - _array__reserve(self, element_size, new_size); - - char *contents = (char *)self->contents; - if (self->size > old_end) { - memmove( - contents + new_end * element_size, - contents + old_end * element_size, - (self->size - old_end) * element_size - ); - } - if (new_count > 0) { - if (elements) { - memcpy( - (contents + index * element_size), - elements, - new_count * element_size - ); - } else { - memset( - (contents + index * element_size), - 0, - new_count * element_size - ); - } - } - self->size += new_count - old_count; -} - -/// A binary search routine, based on Rust's `std::slice::binary_search_by`. -/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. -#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ - do { \ - *(_index) = start; \ - *(_exists) = false; \ - uint32_t size = (self)->size - *(_index); \ - if (size == 0) break; \ - int comparison; \ - while (size > 1) { \ - uint32_t half_size = size / 2; \ - uint32_t mid_index = *(_index) + half_size; \ - comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ - if (comparison <= 0) *(_index) = mid_index; \ - size -= half_size; \ - } \ - comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ - if (comparison == 0) *(_exists) = true; \ - else if (comparison < 0) *(_index) += 1; \ - } while (0) - -/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) -/// parameter by reference in order to work with the generic sorting function above. -#define _compare_int(a, b) ((int)*(a) - (int)(b)) - -#ifdef _MSC_VER -#pragma warning(default : 4101) -#elif defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic pop -#endif - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_ARRAY_H_ diff --git a/parser/src/atomic.h b/parser/src/atomic.h deleted file mode 100644 index e680b60e..00000000 --- a/parser/src/atomic.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef TREE_SITTER_ATOMIC_H_ -#define TREE_SITTER_ATOMIC_H_ - -#include -#include -#include - -#ifdef __TINYC__ - -static inline size_t atomic_load(const volatile size_t *p) { - return *p; -} - -static inline uint32_t atomic_inc(volatile uint32_t *p) { - *p += 1; - return *p; -} - -static inline uint32_t atomic_dec(volatile uint32_t *p) { - *p-= 1; - return *p; -} - -#elif defined(_WIN32) - -#include - -static inline size_t atomic_load(const volatile size_t *p) { - return *p; -} - -static inline uint32_t atomic_inc(volatile uint32_t *p) { - return InterlockedIncrement((long volatile *)p); -} - -static inline uint32_t atomic_dec(volatile uint32_t *p) { - return InterlockedDecrement((long volatile *)p); -} - -#else - -static inline size_t atomic_load(const volatile size_t *p) { -#ifdef __ATOMIC_RELAXED - return __atomic_load_n(p, __ATOMIC_RELAXED); -#else - return __sync_fetch_and_add((volatile size_t *)p, 0); -#endif -} - -static inline uint32_t atomic_inc(volatile uint32_t *p) { - #ifdef __ATOMIC_RELAXED - return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST); - #else - return __sync_add_and_fetch(p, 1U); - #endif -} - -static inline uint32_t atomic_dec(volatile uint32_t *p) { - #ifdef __ATOMIC_RELAXED - return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST); - #else - return __sync_sub_and_fetch(p, 1U); - #endif -} - -#endif - -#endif // TREE_SITTER_ATOMIC_H_ diff --git a/parser/src/clock.h b/parser/src/clock.h deleted file mode 100644 index 6e75729e..00000000 --- a/parser/src/clock.h +++ /dev/null @@ -1,146 +0,0 @@ -#ifndef TREE_SITTER_CLOCK_H_ -#define TREE_SITTER_CLOCK_H_ - -#include -#include - -typedef uint64_t TSDuration; - -#ifdef _WIN32 - -// Windows: -// * Represent a time as a performance counter value. -// * Represent a duration as a number of performance counter ticks. - -#include -typedef uint64_t TSClock; - -static inline TSDuration duration_from_micros(uint64_t micros) { - LARGE_INTEGER frequency; - QueryPerformanceFrequency(&frequency); - return micros * (uint64_t)frequency.QuadPart / 1000000; -} - -static inline uint64_t duration_to_micros(TSDuration self) { - LARGE_INTEGER frequency; - QueryPerformanceFrequency(&frequency); - return self * 1000000 / (uint64_t)frequency.QuadPart; -} - -static inline TSClock clock_null(void) { - return 0; -} - -static inline TSClock clock_now(void) { - LARGE_INTEGER result; - QueryPerformanceCounter(&result); - return (uint64_t)result.QuadPart; -} - -static inline TSClock clock_after(TSClock base, TSDuration duration) { - return base + duration; -} - -static inline bool clock_is_null(TSClock self) { - return !self; -} - -static inline bool clock_is_gt(TSClock self, TSClock other) { - return self > other; -} - -#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__) - -// POSIX with monotonic clock support (Linux) -// * Represent a time as a monotonic (seconds, nanoseconds) pair. -// * Represent a duration as a number of microseconds. -// -// On these platforms, parse timeouts will correspond accurately to -// real time, regardless of what other processes are running. - -#include -typedef struct timespec TSClock; - -static inline TSDuration duration_from_micros(uint64_t micros) { - return micros; -} - -static inline uint64_t duration_to_micros(TSDuration self) { - return self; -} - -static inline TSClock clock_now(void) { - TSClock result; - clock_gettime(CLOCK_MONOTONIC, &result); - return result; -} - -static inline TSClock clock_null(void) { - return (TSClock) {0, 0}; -} - -static inline TSClock clock_after(TSClock base, TSDuration duration) { - TSClock result = base; - result.tv_sec += duration / 1000000; - result.tv_nsec += (duration % 1000000) * 1000; - if (result.tv_nsec >= 1000000000) { - result.tv_nsec -= 1000000000; - ++(result.tv_sec); - } - return result; -} - -static inline bool clock_is_null(TSClock self) { - return !self.tv_sec; -} - -static inline bool clock_is_gt(TSClock self, TSClock other) { - if (self.tv_sec > other.tv_sec) return true; - if (self.tv_sec < other.tv_sec) return false; - return self.tv_nsec > other.tv_nsec; -} - -#else - -// macOS or POSIX without monotonic clock support -// * Represent a time as a process clock value. -// * Represent a duration as a number of process clock ticks. -// -// On these platforms, parse timeouts may be affected by other processes, -// which is not ideal, but is better than using a non-monotonic time API -// like `gettimeofday`. - -#include -typedef uint64_t TSClock; - -static inline TSDuration duration_from_micros(uint64_t micros) { - return micros * (uint64_t)CLOCKS_PER_SEC / 1000000; -} - -static inline uint64_t duration_to_micros(TSDuration self) { - return self * 1000000 / (uint64_t)CLOCKS_PER_SEC; -} - -static inline TSClock clock_null(void) { - return 0; -} - -static inline TSClock clock_now(void) { - return (uint64_t)clock(); -} - -static inline TSClock clock_after(TSClock base, TSDuration duration) { - return base + duration; -} - -static inline bool clock_is_null(TSClock self) { - return !self; -} - -static inline bool clock_is_gt(TSClock self, TSClock other) { - return self > other; -} - -#endif - -#endif // TREE_SITTER_CLOCK_H_ diff --git a/parser/src/combined.c b/parser/src/combined.c new file mode 100644 index 00000000..7a582f28 --- /dev/null +++ b/parser/src/combined.c @@ -0,0 +1,11843 @@ +#include "./api.h" + +uint32_t ts_node_end_byte(TSNode self); +TSNode ts_node_parent(TSNode self); +bool ts_node_is_null(TSNode self); +uint32_t ts_node_child_count(TSNode self); +TSNode ts_tree_root_node(const TSTree *self); +TSNode ts_node_child_containing_descendant(TSNode self, TSNode subnode); +void ts_parser_reset(TSParser *self); +bool ts_parser_set_language(TSParser *self, const TSLanguage *language); +void ts_query_delete(TSQuery *self); +void ts_tree_cursor_delete(TSTreeCursor *_self); +void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node); +bool ts_tree_cursor_goto_parent(TSTreeCursor *_self); +TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self); + +// #define DEBUG_GET_CHANGED_RANGES + +static void ts_range_array_add(TSRangeArray *self, Length start, Length end) +{ + if (self->size > 0) + { + TSRange *last_range = array_back(self); + if (start.bytes <= last_range->end_byte) + { + last_range->end_byte = end.bytes; + last_range->end_point = end.extent; + return; + } + } + + if (start.bytes < end.bytes) + { + TSRange range = {start.extent, end.extent, start.bytes, end.bytes}; + array_push(self, range); + } +} + +bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index, + uint32_t start_byte, uint32_t end_byte) +{ + for (unsigned i = start_index; i < self->size; i++) + { + TSRange *range = &self->contents[i]; + if (range->end_byte > start_byte) + { + if (range->start_byte >= end_byte) + break; + return true; + } + } + return false; +} + +void ts_range_array_get_changed_ranges(const TSRange *old_ranges, + unsigned old_range_count, + const TSRange *new_ranges, + unsigned new_range_count, + TSRangeArray *differences) +{ + unsigned new_index = 0; + unsigned old_index = 0; + Length current_position = length_zero(); + bool in_old_range = false; + bool in_new_range = false; + + while (old_index < old_range_count || new_index < new_range_count) + { + const TSRange *old_range = &old_ranges[old_index]; + const TSRange *new_range = &new_ranges[new_index]; + + Length next_old_position; + if (in_old_range) + { + next_old_position = + (Length){old_range->end_byte, old_range->end_point}; + } + else if (old_index < old_range_count) + { + next_old_position = + (Length){old_range->start_byte, old_range->start_point}; + } + else + { + next_old_position = LENGTH_MAX; + } + + Length next_new_position; + if (in_new_range) + { + next_new_position = + (Length){new_range->end_byte, new_range->end_point}; + } + else if (new_index < new_range_count) + { + next_new_position = + (Length){new_range->start_byte, new_range->start_point}; + } + else + { + next_new_position = LENGTH_MAX; + } + + if (next_old_position.bytes < next_new_position.bytes) + { + if (in_old_range != in_new_range) + { + ts_range_array_add(differences, current_position, + next_old_position); + } + if (in_old_range) + old_index++; + current_position = next_old_position; + in_old_range = !in_old_range; + } + else if (next_new_position.bytes < next_old_position.bytes) + { + if (in_old_range != in_new_range) + { + ts_range_array_add(differences, current_position, + next_new_position); + } + if (in_new_range) + new_index++; + current_position = next_new_position; + in_new_range = !in_new_range; + } + else + { + if (in_old_range != in_new_range) + { + ts_range_array_add(differences, current_position, + next_new_position); + } + if (in_old_range) + old_index++; + if (in_new_range) + new_index++; + in_old_range = !in_old_range; + in_new_range = !in_new_range; + current_position = next_new_position; + } + } +} + +typedef struct +{ + TreeCursor cursor; + const TSLanguage *language; + unsigned visible_depth; + bool in_padding; +} Iterator; + +static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, + const TSLanguage *language) +{ + array_clear(&cursor->stack); + array_push(&cursor->stack, ((TreeCursorEntry){ + .subtree = tree, + .position = length_zero(), + .child_index = 0, + .structural_child_index = 0, + })); + return (Iterator){ + .cursor = *cursor, + .language = language, + .visible_depth = 1, + .in_padding = false, + }; +} + +static bool iterator_done(Iterator *self) +{ + return self->cursor.stack.size == 0; +} + +static Length iterator_start_position(Iterator *self) +{ + TreeCursorEntry entry = *array_back(&self->cursor.stack); + if (self->in_padding) + { + return entry.position; + } + else + { + return length_add(entry.position, ts_subtree_padding(*entry.subtree)); + } +} + +static Length iterator_end_position(Iterator *self) +{ + TreeCursorEntry entry = *array_back(&self->cursor.stack); + Length result = + length_add(entry.position, ts_subtree_padding(*entry.subtree)); + if (self->in_padding) + { + return result; + } + else + { + return length_add(result, ts_subtree_size(*entry.subtree)); + } +} + +static bool iterator_tree_is_visible(const Iterator *self) +{ + TreeCursorEntry entry = *array_back(&self->cursor.stack); + if (ts_subtree_visible(*entry.subtree)) + return true; + if (self->cursor.stack.size > 1) + { + Subtree parent = + *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; + return ts_language_alias_at(self->language, parent.ptr->production_id, + entry.structural_child_index) != 0; + } + return false; +} + +static void iterator_get_visible_state(const Iterator *self, Subtree *tree, + TSSymbol *alias_symbol, + uint32_t *start_byte) +{ + uint32_t i = self->cursor.stack.size - 1; + + if (self->in_padding) + { + if (i == 0) + return; + i--; + } + + for (; i + 1 > 0; i--) + { + TreeCursorEntry entry = self->cursor.stack.contents[i]; + + if (i > 0) + { + const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; + *alias_symbol = + ts_language_alias_at(self->language, parent->ptr->production_id, + entry.structural_child_index); + } + + if (ts_subtree_visible(*entry.subtree) || *alias_symbol) + { + *tree = *entry.subtree; + *start_byte = entry.position.bytes; + break; + } + } +} + +static void iterator_ascend(Iterator *self) +{ + if (iterator_done(self)) + return; + if (iterator_tree_is_visible(self) && !self->in_padding) + self->visible_depth--; + if (array_back(&self->cursor.stack)->child_index > 0) + self->in_padding = false; + self->cursor.stack.size--; +} + +static bool iterator_descend(Iterator *self, uint32_t goal_position) +{ + if (self->in_padding) + return false; + + bool did_descend = false; + do + { + did_descend = false; + TreeCursorEntry entry = *array_back(&self->cursor.stack); + Length position = entry.position; + uint32_t structural_child_index = 0; + for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; + i++) + { + const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; + Length child_left = + length_add(position, ts_subtree_padding(*child)); + Length child_right = + length_add(child_left, ts_subtree_size(*child)); + + if (child_right.bytes > goal_position) + { + array_push(&self->cursor.stack, + ((TreeCursorEntry){ + .subtree = child, + .position = position, + .child_index = i, + .structural_child_index = structural_child_index, + })); + + if (iterator_tree_is_visible(self)) + { + if (child_left.bytes > goal_position) + { + self->in_padding = true; + } + else + { + self->visible_depth++; + } + return true; + } + + did_descend = true; + break; + } + + position = child_right; + if (!ts_subtree_extra(*child)) + structural_child_index++; + } + } while (did_descend); + + return false; +} + +static void iterator_advance(Iterator *self) +{ + if (self->in_padding) + { + self->in_padding = false; + if (iterator_tree_is_visible(self)) + { + self->visible_depth++; + } + else + { + iterator_descend(self, 0); + } + return; + } + + for (;;) + { + if (iterator_tree_is_visible(self)) + self->visible_depth--; + TreeCursorEntry entry = array_pop(&self->cursor.stack); + if (iterator_done(self)) + return; + + const Subtree *parent = array_back(&self->cursor.stack)->subtree; + uint32_t child_index = entry.child_index + 1; + if (ts_subtree_child_count(*parent) > child_index) + { + Length position = length_add(entry.position, + ts_subtree_total_size(*entry.subtree)); + uint32_t structural_child_index = entry.structural_child_index; + if (!ts_subtree_extra(*entry.subtree)) + structural_child_index++; + const Subtree *next_child = + &ts_subtree_children(*parent)[child_index]; + + array_push(&self->cursor.stack, + ((TreeCursorEntry){ + .subtree = next_child, + .position = position, + .child_index = child_index, + .structural_child_index = structural_child_index, + })); + + if (iterator_tree_is_visible(self)) + { + if (ts_subtree_padding(*next_child).bytes > 0) + { + self->in_padding = true; + } + else + { + self->visible_depth++; + } + } + else + { + iterator_descend(self, 0); + } + break; + } + } +} + +typedef enum +{ + IteratorDiffers, + IteratorMayDiffer, + IteratorMatches, +} IteratorComparison; + +static IteratorComparison iterator_compare(const Iterator *old_iter, + const Iterator *new_iter) +{ + Subtree old_tree = NULL_SUBTREE; + Subtree new_tree = NULL_SUBTREE; + uint32_t old_start = 0; + uint32_t new_start = 0; + TSSymbol old_alias_symbol = 0; + TSSymbol new_alias_symbol = 0; + iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, + &old_start); + iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, + &new_start); + + if (!old_tree.ptr && !new_tree.ptr) + return IteratorMatches; + if (!old_tree.ptr || !new_tree.ptr) + return IteratorDiffers; + + if (old_alias_symbol == new_alias_symbol && + ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)) + { + if (old_start == new_start && !ts_subtree_has_changes(old_tree) && + ts_subtree_symbol(old_tree) != ts_builtin_sym_error && + ts_subtree_size(old_tree).bytes == + ts_subtree_size(new_tree).bytes && + ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && + ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && + (ts_subtree_parse_state(old_tree) == ERROR_STATE) == + (ts_subtree_parse_state(new_tree) == ERROR_STATE)) + { + return IteratorMatches; + } + else + { + return IteratorMayDiffer; + } + } + + return IteratorDiffers; +} + +#ifdef DEBUG_GET_CHANGED_RANGES +static inline void iterator_print_state(Iterator *self) +{ + TreeCursorEntry entry = *array_back(&self->cursor.stack); + TSPoint start = iterator_start_position(self).extent; + TSPoint end = iterator_end_position(self).extent; + const char *name = ts_language_symbol_name( + self->language, ts_subtree_symbol(*entry.subtree)); + printf("(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", name, + self->in_padding ? "(p)" : " ", self->visible_depth, start.row + 1, + start.column, end.row + 1, end.column); +} +#endif + +unsigned ts_subtree_get_changed_ranges( + const Subtree *old_tree, const Subtree *new_tree, TreeCursor *cursor1, + TreeCursor *cursor2, const TSLanguage *language, + const TSRangeArray *included_range_differences, TSRange **ranges) +{ + TSRangeArray results = array_new(); + + Iterator old_iter = iterator_new(cursor1, old_tree, language); + Iterator new_iter = iterator_new(cursor2, new_tree, language); + + unsigned included_range_difference_index = 0; + + Length position = iterator_start_position(&old_iter); + Length next_position = iterator_start_position(&new_iter); + if (position.bytes < next_position.bytes) + { + ts_range_array_add(&results, position, next_position); + position = next_position; + } + else if (position.bytes > next_position.bytes) + { + ts_range_array_add(&results, next_position, position); + next_position = position; + } + + do + { +#ifdef DEBUG_GET_CHANGED_RANGES + printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, + position.extent.column); + iterator_print_state(&old_iter); + printf("\tvs\t"); + iterator_print_state(&new_iter); + puts(""); +#endif + + // Compare the old and new subtrees. + IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); + + // Even if the two subtrees appear to be identical, they could differ + // internally if they contain a range of text that was previously + // excluded from the parse, and is now included, or vice-versa. + if (comparison == IteratorMatches && + ts_range_array_intersects( + included_range_differences, included_range_difference_index, + position.bytes, iterator_end_position(&old_iter).bytes)) + { + comparison = IteratorMayDiffer; + } + + bool is_changed = false; + switch (comparison) + { + // If the subtrees are definitely identical, move to the end + // of both subtrees. + case IteratorMatches: + next_position = iterator_end_position(&old_iter); + break; + + // If the subtrees might differ internally, descend into both + // subtrees, finding the first child that spans the current position. + case IteratorMayDiffer: + if (iterator_descend(&old_iter, position.bytes)) + { + if (!iterator_descend(&new_iter, position.bytes)) + { + is_changed = true; + next_position = iterator_end_position(&old_iter); + } + } + else if (iterator_descend(&new_iter, position.bytes)) + { + is_changed = true; + next_position = iterator_end_position(&new_iter); + } + else + { + next_position = length_min(iterator_end_position(&old_iter), + iterator_end_position(&new_iter)); + } + break; + + // If the subtrees are different, record a change and then move + // to the end of both subtrees. + case IteratorDiffers: + is_changed = true; + next_position = length_min(iterator_end_position(&old_iter), + iterator_end_position(&new_iter)); + break; + } + + // Ensure that both iterators are caught up to the current position. + while (!iterator_done(&old_iter) && + iterator_end_position(&old_iter).bytes <= next_position.bytes) + iterator_advance(&old_iter); + while (!iterator_done(&new_iter) && + iterator_end_position(&new_iter).bytes <= next_position.bytes) + iterator_advance(&new_iter); + + // Ensure that both iterators are at the same depth in the tree. + while (old_iter.visible_depth > new_iter.visible_depth) + { + iterator_ascend(&old_iter); + } + while (new_iter.visible_depth > old_iter.visible_depth) + { + iterator_ascend(&new_iter); + } + + if (is_changed) + { +#ifdef DEBUG_GET_CHANGED_RANGES + printf(" change: [[%u, %u] - [%u, %u]]\n", position.extent.row + 1, + position.extent.column, next_position.extent.row + 1, + next_position.extent.column); +#endif + + ts_range_array_add(&results, position, next_position); + } + + position = next_position; + + // Keep track of the current position in the included range differences + // array in order to avoid scanning the entire array on each iteration. + while (included_range_difference_index < + included_range_differences->size) + { + const TSRange *range = + &included_range_differences + ->contents[included_range_difference_index]; + if (range->end_byte <= position.bytes) + { + included_range_difference_index++; + } + else + { + break; + } + } + } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); + + Length old_size = ts_subtree_total_size(*old_tree); + Length new_size = ts_subtree_total_size(*new_tree); + if (old_size.bytes < new_size.bytes) + { + ts_range_array_add(&results, old_size, new_size); + } + else if (new_size.bytes < old_size.bytes) + { + ts_range_array_add(&results, new_size, old_size); + } + + *cursor1 = old_iter.cursor; + *cursor2 = new_iter.cursor; + *ranges = results.contents; + return results.size; +} + +const TSLanguage *ts_language_copy(const TSLanguage *self) +{ + return self; +} + +void ts_language_delete(const TSLanguage *self) +{ + (void)(self); +} + +uint32_t ts_language_symbol_count(const TSLanguage *self) +{ + return self->symbol_count + self->alias_count; +} + +uint32_t ts_language_state_count(const TSLanguage *self) +{ + return self->state_count; +} + +uint32_t ts_language_version(const TSLanguage *self) +{ + return self->version; +} + +uint32_t ts_language_field_count(const TSLanguage *self) +{ + return self->field_count; +} + +void ts_language_table_entry(const TSLanguage *self, TSStateId state, + TSSymbol symbol, TableEntry *result) +{ + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) + { + result->action_count = 0; + result->is_reusable = false; + result->actions = NULL; + } + else + { + assert(symbol < self->token_count); + uint32_t action_index = ts_language_lookup(self, state, symbol); + const TSParseActionEntry *entry = &self->parse_actions[action_index]; + result->action_count = entry->entry.count; + result->is_reusable = entry->entry.reusable; + result->actions = (const TSParseAction *)(entry + 1); + } +} + +TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *self, + TSSymbol symbol) +{ + if (symbol == ts_builtin_sym_error) + { + return (TSSymbolMetadata){.visible = true, .named = true}; + } + else if (symbol == ts_builtin_sym_error_repeat) + { + return (TSSymbolMetadata){.visible = false, .named = false}; + } + else + { + return self->symbol_metadata[symbol]; + } +} + +TSSymbol ts_language_public_symbol(const TSLanguage *self, TSSymbol symbol) +{ + if (symbol == ts_builtin_sym_error) + return symbol; + return self->public_symbol_map[symbol]; +} + +TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, + TSSymbol symbol) +{ + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) + { + return 0; + } + else if (symbol < self->token_count) + { + uint32_t count; + const TSParseAction *actions = + ts_language_actions(self, state, symbol, &count); + if (count > 0) + { + TSParseAction action = actions[count - 1]; + if (action.type == TSParseActionTypeShift) + { + return action.shift.extra ? state : action.shift.state; + } + } + return 0; + } + else + { + return ts_language_lookup(self, state, symbol); + } +} + +const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol) +{ + if (symbol == ts_builtin_sym_error) + { + return "ERROR"; + } + else if (symbol == ts_builtin_sym_error_repeat) + { + return "_ERROR"; + } + else if (symbol < ts_language_symbol_count(self)) + { + return self->symbol_names[symbol]; + } + else + { + return NULL; + } +} + +TSSymbol ts_language_symbol_for_name(const TSLanguage *self, const char *string, + uint32_t length, bool is_named) +{ + if (!strncmp(string, "ERROR", length)) + return ts_builtin_sym_error; + uint16_t count = (uint16_t)ts_language_symbol_count(self); + for (TSSymbol i = 0; i < count; i++) + { + TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); + if ((!metadata.visible && !metadata.supertype) || + metadata.named != is_named) + continue; + const char *symbol_name = self->symbol_names[i]; + if (!strncmp(symbol_name, string, length) && !symbol_name[length]) + { + return self->public_symbol_map[i]; + } + } + return 0; +} + +TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol) +{ + TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); + if (metadata.named && metadata.visible) + { + return TSSymbolTypeRegular; + } + else if (metadata.visible) + { + return TSSymbolTypeAnonymous; + } + else + { + return TSSymbolTypeAuxiliary; + } +} + +const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) +{ + uint32_t count = ts_language_field_count(self); + if (count && id <= count) + { + return self->field_names[id]; + } + else + { + return NULL; + } +} + +TSFieldId ts_language_field_id_for_name(const TSLanguage *self, + const char *name, uint32_t name_length) +{ + uint16_t count = (uint16_t)ts_language_field_count(self); + for (TSSymbol i = 1; i < count + 1; i++) + { + switch (strncmp(name, self->field_names[i], name_length)) + { + case 0: + if (self->field_names[i][name_length] == 0) + return i; + break; + case -1: + return 0; + default: + break; + } + } + return 0; +} + +TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, + TSStateId state) +{ + if (state >= self->state_count) + return NULL; + LookaheadIterator *iterator = malloc(sizeof(LookaheadIterator)); + *iterator = ts_language_lookaheads(self, state); + return (TSLookaheadIterator *)iterator; +} + +void ts_lookahead_iterator_delete(TSLookaheadIterator *self) +{ + free(self); +} + +bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, + TSStateId state) +{ + LookaheadIterator *iterator = (LookaheadIterator *)self; + if (state >= iterator->language->state_count) + return false; + *iterator = ts_language_lookaheads(iterator->language, state); + return true; +} + +const TSLanguage *ts_lookahead_iterator_language( + const TSLookaheadIterator *self) +{ + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return iterator->language; +} + +bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, + const TSLanguage *language, TSStateId state) +{ + if (state >= language->state_count) + return false; + LookaheadIterator *iterator = (LookaheadIterator *)self; + *iterator = ts_language_lookaheads(language, state); + return true; +} + +bool ts_lookahead_iterator_next(TSLookaheadIterator *self) +{ + LookaheadIterator *iterator = (LookaheadIterator *)self; + return ts_lookahead_iterator__next(iterator); +} + +TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) +{ + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return iterator->symbol; +} + +const char *ts_lookahead_iterator_current_symbol_name( + const TSLookaheadIterator *self) +{ + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return ts_language_symbol_name(iterator->language, iterator->symbol); +} + +static const int32_t BYTE_ORDER_MARK = 0xFEFF; + +static const TSRange DEFAULT_RANGE = {.start_point = + { + .row = 0, + .column = 0, + }, + .end_point = + { + .row = UINT32_MAX, + .column = UINT32_MAX, + }, + .start_byte = 0, + .end_byte = UINT32_MAX}; + +// Check if the lexer has reached EOF. This state is stored +// by setting the lexer's `current_included_range_index` such that +// it has consumed all of its available ranges. +static bool ts_lexer__eof(const TSLexer *_self) +{ + Lexer *self = (Lexer *)_self; + return self->current_included_range_index == self->included_range_count; +} + +// Clear the currently stored chunk of source code, because the lexer's +// position has changed. +static void ts_lexer__clear_chunk(Lexer *self) +{ + self->chunk = NULL; + self->chunk_size = 0; + self->chunk_start = 0; +} + +// Call the lexer's input callback to obtain a new chunk of source code +// for the current position. +static void ts_lexer__get_chunk(Lexer *self) +{ + self->chunk_start = self->current_position.bytes; + self->chunk = + self->input.read(self->input.payload, self->current_position.bytes, + self->current_position.extent, &self->chunk_size); + if (!self->chunk_size) + { + self->current_included_range_index = self->included_range_count; + self->chunk = NULL; + } +} + +uint32_t ascii_decode(const uint8_t *chunk, uint32_t size, int32_t *codepoint) +{ + (void)(size); + *(uint8_t *)codepoint = *chunk; + return (1); +} + +typedef uint32_t (*UnicodeDecodeFunction)(const uint8_t *chunk, uint32_t size, + int32_t *codepoint); + +// Decode the next unicode character in the current chunk of source code. +// This assumes that the lexer has already retrieved a chunk of source +// code that spans the current position. +static void ts_lexer__get_lookahead(Lexer *self) +{ + uint32_t position_in_chunk = + self->current_position.bytes - self->chunk_start; + uint32_t size = self->chunk_size - position_in_chunk; + + if (size == 0) + { + self->lookahead_size = 1; + self->data.lookahead = '\0'; + return; + } + + const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; + UnicodeDecodeFunction decode = ascii_decode; + + self->lookahead_size = decode(chunk, size, &self->data.lookahead); + + // If this chunk ended in the middle of a multi-byte character, + // try again with a fresh chunk. + if (self->data.lookahead == TS_DECODE_ERROR && size < 4) + { + ts_lexer__get_chunk(self); + chunk = (const uint8_t *)self->chunk; + size = self->chunk_size; + self->lookahead_size = decode(chunk, size, &self->data.lookahead); + } + + if (self->data.lookahead == TS_DECODE_ERROR) + { + self->lookahead_size = 1; + } +} + +static void ts_lexer_goto(Lexer *self, Length position) +{ + self->current_position = position; + + // Move to the first valid position at or after the given position. + bool found_included_range = false; + for (unsigned i = 0; i < self->included_range_count; i++) + { + TSRange *included_range = &self->included_ranges[i]; + if (included_range->end_byte > self->current_position.bytes && + included_range->end_byte > included_range->start_byte) + { + if (included_range->start_byte >= self->current_position.bytes) + { + self->current_position = (Length){ + .bytes = included_range->start_byte, + .extent = included_range->start_point, + }; + } + + self->current_included_range_index = i; + found_included_range = true; + break; + } + } + + if (found_included_range) + { + // If the current position is outside of the current chunk of text, + // then clear out the current chunk of text. + if (self->chunk && (self->current_position.bytes < self->chunk_start || + self->current_position.bytes >= + self->chunk_start + self->chunk_size)) + { + ts_lexer__clear_chunk(self); + } + + self->lookahead_size = 0; + self->data.lookahead = '\0'; + } + + // If the given position is beyond any of included ranges, move to the EOF + // state - past the end of the included ranges. + else + { + self->current_included_range_index = self->included_range_count; + TSRange *last_included_range = + &self->included_ranges[self->included_range_count - 1]; + self->current_position = (Length){ + .bytes = last_included_range->end_byte, + .extent = last_included_range->end_point, + }; + ts_lexer__clear_chunk(self); + self->lookahead_size = 1; + self->data.lookahead = '\0'; + } +} + +// Intended to be called only from functions that control logging. +static void ts_lexer__do_advance(Lexer *self, bool skip) +{ + if (self->lookahead_size) + { + self->current_position.bytes += self->lookahead_size; + if (self->data.lookahead == '\n') + { + self->current_position.extent.row++; + self->current_position.extent.column = 0; + } + else + { + self->current_position.extent.column += self->lookahead_size; + } + } + + const TSRange *current_range = + &self->included_ranges[self->current_included_range_index]; + while (self->current_position.bytes >= current_range->end_byte || + current_range->end_byte == current_range->start_byte) + { + if (self->current_included_range_index < self->included_range_count) + { + self->current_included_range_index++; + } + if (self->current_included_range_index < self->included_range_count) + { + current_range++; + self->current_position = (Length){ + current_range->start_byte, + current_range->start_point, + }; + } + else + { + current_range = NULL; + break; + } + } + + if (skip) + self->token_start_position = self->current_position; + + if (current_range) + { + if (self->current_position.bytes < self->chunk_start || + self->current_position.bytes >= + self->chunk_start + self->chunk_size) + { + ts_lexer__get_chunk(self); + } + ts_lexer__get_lookahead(self); + } + else + { + ts_lexer__clear_chunk(self); + self->data.lookahead = '\0'; + self->lookahead_size = 1; + } +} + +// Advance to the next character in the source code, retrieving a new +// chunk of source code if needed. +static void ts_lexer__advance(TSLexer *_self, bool skip) +{ + Lexer *self = (Lexer *)_self; + if (!self->chunk) + return; + + if (skip) + { + } + else + { + } + + ts_lexer__do_advance(self, skip); +} + +// Mark that a token match has completed. This can be called multiple +// times if a longer match is found later. +static void ts_lexer__mark_end(TSLexer *_self) +{ + Lexer *self = (Lexer *)_self; + if (!ts_lexer__eof(&self->data)) + { + // If the lexer is right at the beginning of included range, + // then the token should be considered to end at the *end* of the + // previous included range, rather than here. + TSRange *current_included_range = + &self->included_ranges[self->current_included_range_index]; + if (self->current_included_range_index > 0 && + self->current_position.bytes == current_included_range->start_byte) + { + TSRange *previous_included_range = current_included_range - 1; + self->token_end_position = (Length){ + previous_included_range->end_byte, + previous_included_range->end_point, + }; + return; + } + } + self->token_end_position = self->current_position; +} + +static uint32_t ts_lexer__get_column(TSLexer *_self) +{ + Lexer *self = (Lexer *)_self; + + uint32_t goal_byte = self->current_position.bytes; + + self->did_get_column = true; + self->current_position.bytes -= self->current_position.extent.column; + self->current_position.extent.column = 0; + + if (self->current_position.bytes < self->chunk_start) + { + ts_lexer__get_chunk(self); + } + + uint32_t result = 0; + if (!ts_lexer__eof(_self)) + { + ts_lexer__get_lookahead(self); + while (self->current_position.bytes < goal_byte && self->chunk) + { + result++; + ts_lexer__do_advance(self, false); + if (ts_lexer__eof(_self)) + break; + } + } + + return result; +} + +// Is the lexer at a boundary between two disjoint included ranges of +// source code? This is exposed as an API because some languages' external +// scanners need to perform custom actions at these boundaries. +static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) +{ + const Lexer *self = (const Lexer *)_self; + if (self->current_included_range_index < self->included_range_count) + { + TSRange *current_range = + &self->included_ranges[self->current_included_range_index]; + return self->current_position.bytes == current_range->start_byte; + } + else + { + return false; + } +} + +void ts_lexer_init(Lexer *self) +{ + *self = (Lexer){ + .data = + { + // The lexer's methods are stored as struct fields so that + // generated + // parsers can call them without needing to be linked against + // this + // library. + .advance = ts_lexer__advance, + .mark_end = ts_lexer__mark_end, + .get_column = ts_lexer__get_column, + .is_at_included_range_start = + ts_lexer__is_at_included_range_start, + .eof = ts_lexer__eof, + .lookahead = 0, + .result_symbol = 0, + }, + .chunk = NULL, + .chunk_size = 0, + .chunk_start = 0, + .current_position = {0, {0, 0}}, + .logger = {.payload = NULL, .log = NULL}, + .included_ranges = NULL, + .included_range_count = 0, + .current_included_range_index = 0, + }; + ts_lexer_set_included_ranges(self, NULL, 0); +} + +void ts_lexer_delete(Lexer *self) +{ + free(self->included_ranges); +} + +void ts_lexer_set_input(Lexer *self, TSInput input) +{ + self->input = input; + ts_lexer__clear_chunk(self); + ts_lexer_goto(self, self->current_position); +} + +// Move the lexer to the given position. This doesn't do any work +// if the parser is already at the given position. +void ts_lexer_reset(Lexer *self, Length position) +{ + if (position.bytes != self->current_position.bytes) + { + ts_lexer_goto(self, position); + } +} + +void ts_lexer_start(Lexer *self) +{ + self->token_start_position = self->current_position; + self->token_end_position = LENGTH_UNDEFINED; + self->data.result_symbol = 0; + self->did_get_column = false; + if (!ts_lexer__eof(&self->data)) + { + if (!self->chunk_size) + ts_lexer__get_chunk(self); + if (!self->lookahead_size) + ts_lexer__get_lookahead(self); + if (self->current_position.bytes == 0 && + self->data.lookahead == BYTE_ORDER_MARK) + ts_lexer__advance(&self->data, true); + } +} + +void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) +{ + if (length_is_undefined(self->token_end_position)) + { + ts_lexer__mark_end(&self->data); + } + + // If the token ended at an included range boundary, then its end position + // will have been reset to the end of the preceding range. Reset the start + // position to match. + if (self->token_end_position.bytes < self->token_start_position.bytes) + { + self->token_start_position = self->token_end_position; + } + + uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; + + // In order to determine that a byte sequence is invalid UTF8 or UTF16, + // the character decoding algorithm may have looked at the following byte. + // Therefore, the next byte *after* the current (invalid) character + // affects the interpretation of the current character. + if (self->data.lookahead == TS_DECODE_ERROR) + { + current_lookahead_end_byte += 4; // the maximum number of bytes read to + // identify an invalid code point + } + + if (current_lookahead_end_byte > *lookahead_end_byte) + { + *lookahead_end_byte = current_lookahead_end_byte; + } +} + +void ts_lexer_advance_to_end(Lexer *self) +{ + while (self->chunk) + { + ts_lexer__advance(&self->data, false); + } +} + +void ts_lexer_mark_end(Lexer *self) +{ + ts_lexer__mark_end(&self->data); +} + +bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, + uint32_t count) +{ + if (count == 0 || !ranges) + { + ranges = &DEFAULT_RANGE; + count = 1; + } + else + { + uint32_t previous_byte = 0; + for (unsigned i = 0; i < count; i++) + { + const TSRange *range = &ranges[i]; + if (range->start_byte < previous_byte || + range->end_byte < range->start_byte) + return false; + previous_byte = range->end_byte; + } + } + + size_t size = count * sizeof(TSRange); + self->included_ranges = realloc(self->included_ranges, size); + memcpy(self->included_ranges, ranges, size); + self->included_range_count = count; + ts_lexer_goto(self, self->current_position); + return true; +} + +TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) +{ + *count = self->included_range_count; + return self->included_ranges; +} + +#undef LOG + +typedef struct +{ + Subtree parent; + const TSTree *tree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + const TSSymbol *alias_sequence; +} NodeChildIterator; + +// TSNode - constructors + +TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, + TSSymbol alias) +{ + return (TSNode){ + {position.bytes, position.extent.row, position.extent.column, alias}, + subtree, + tree, + }; +} + +static inline TSNode ts_node__null(void) +{ + return ts_node_new(NULL, NULL, length_zero(), 0); +} + +// TSNode - accessors + +uint32_t ts_node_start_byte(TSNode self) +{ + return self.context[0]; +} + +TSPoint ts_node_start_point(TSNode self) +{ + return (TSPoint){self.context[1], self.context[2]}; +} + +static inline uint32_t ts_node__alias(const TSNode *self) +{ + return self->context[3]; +} + +static inline Subtree ts_node__subtree(TSNode self) +{ + return *(const Subtree *)self.id; +} + +// NodeChildIterator + +static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) +{ + Subtree subtree = ts_node__subtree(*node); + if (ts_subtree_child_count(subtree) == 0) + { + return (NodeChildIterator){ + NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; + } + const TSSymbol *alias_sequence = ts_language_alias_sequence( + node->tree->language, subtree.ptr->production_id); + return (NodeChildIterator){ + .tree = node->tree, + .parent = subtree, + .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, + .child_index = 0, + .structural_child_index = 0, + .alias_sequence = alias_sequence, + }; +} + +static inline bool ts_node_child_iterator_done(NodeChildIterator *self) +{ + return self->child_index == self->parent.ptr->child_count; +} + +static inline bool ts_node_child_iterator_next(NodeChildIterator *self, + TSNode *result) +{ + if (!self->parent.ptr || ts_node_child_iterator_done(self)) + return false; + const Subtree *child = + &ts_subtree_children(self->parent)[self->child_index]; + TSSymbol alias_symbol = 0; + if (!ts_subtree_extra(*child)) + { + if (self->alias_sequence) + { + alias_symbol = self->alias_sequence[self->structural_child_index]; + } + self->structural_child_index++; + } + if (self->child_index > 0) + { + self->position = length_add(self->position, ts_subtree_padding(*child)); + } + *result = ts_node_new(self->tree, child, self->position, alias_symbol); + self->position = length_add(self->position, ts_subtree_size(*child)); + self->child_index++; + return true; +} + +// TSNode - private + +static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) +{ + Subtree tree = ts_node__subtree(self); + if (include_anonymous) + { + return ts_subtree_visible(tree) || ts_node__alias(&self); + } + else + { + TSSymbol alias = ts_node__alias(&self); + if (alias) + { + return ts_language_symbol_metadata(self.tree->language, alias) + .named; + } + else + { + return ts_subtree_visible(tree) && ts_subtree_named(tree); + } + } +} + +static inline uint32_t ts_node__relevant_child_count(TSNode self, + bool include_anonymous) +{ + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) + { + if (include_anonymous) + { + return tree.ptr->visible_child_count; + } + else + { + return tree.ptr->named_child_count; + } + } + else + { + return 0; + } +} + +static inline TSNode ts_node__child(TSNode self, uint32_t child_index, + bool include_anonymous) +{ + TSNode result = self; + bool did_descend = true; + + while (did_descend) + { + did_descend = false; + + TSNode child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) + { + if (ts_node__is_relevant(child, include_anonymous)) + { + if (index == child_index) + { + return child; + } + index++; + } + else + { + uint32_t grandchild_index = child_index - index; + uint32_t grandchild_count = + ts_node__relevant_child_count(child, include_anonymous); + if (grandchild_index < grandchild_count) + { + did_descend = true; + result = child; + child_index = grandchild_index; + break; + } + index += grandchild_count; + } + } + } + + return ts_node__null(); +} + +static bool ts_subtree_has_trailing_empty_descendant(Subtree self, + Subtree other) +{ + for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) + { + Subtree child = ts_subtree_children(self)[i]; + if (ts_subtree_total_bytes(child) > 0) + break; + if (child.ptr == other.ptr || + ts_subtree_has_trailing_empty_descendant(child, other)) + { + return true; + } + } + return false; +} + +static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) +{ + Subtree self_subtree = ts_node__subtree(self); + bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; + uint32_t target_end_byte = ts_node_end_byte(self); + + TSNode node = ts_node_parent(self); + TSNode earlier_node = ts_node__null(); + bool earlier_node_is_relevant = false; + + while (!ts_node_is_null(node)) + { + TSNode earlier_child = ts_node__null(); + bool earlier_child_is_relevant = false; + bool found_child_containing_target = false; + + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) + { + if (child.id == self.id) + break; + if (iterator.position.bytes > target_end_byte) + { + found_child_containing_target = true; + break; + } + + if (iterator.position.bytes == target_end_byte && + (!self_is_empty || ts_subtree_has_trailing_empty_descendant( + ts_node__subtree(child), self_subtree))) + { + found_child_containing_target = true; + break; + } + + if (ts_node__is_relevant(child, include_anonymous)) + { + earlier_child = child; + earlier_child_is_relevant = true; + } + else if (ts_node__relevant_child_count(child, include_anonymous) > + 0) + { + earlier_child = child; + earlier_child_is_relevant = false; + } + } + + if (found_child_containing_target) + { + if (!ts_node_is_null(earlier_child)) + { + earlier_node = earlier_child; + earlier_node_is_relevant = earlier_child_is_relevant; + } + node = child; + } + else if (earlier_child_is_relevant) + { + return earlier_child; + } + else if (!ts_node_is_null(earlier_child)) + { + node = earlier_child; + } + else if (earlier_node_is_relevant) + { + return earlier_node; + } + else + { + node = earlier_node; + earlier_node = ts_node__null(); + earlier_node_is_relevant = false; + } + } + + return ts_node__null(); +} + +static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) +{ + uint32_t target_end_byte = ts_node_end_byte(self); + + TSNode node = ts_node_parent(self); + TSNode later_node = ts_node__null(); + bool later_node_is_relevant = false; + + while (!ts_node_is_null(node)) + { + TSNode later_child = ts_node__null(); + bool later_child_is_relevant = false; + TSNode child_containing_target = ts_node__null(); + + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) + { + if (iterator.position.bytes < target_end_byte) + continue; + if (ts_node_start_byte(child) <= ts_node_start_byte(self)) + { + if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) + { + child_containing_target = child; + } + } + else if (ts_node__is_relevant(child, include_anonymous)) + { + later_child = child; + later_child_is_relevant = true; + break; + } + else if (ts_node__relevant_child_count(child, include_anonymous) > + 0) + { + later_child = child; + later_child_is_relevant = false; + break; + } + } + + if (!ts_node_is_null(child_containing_target)) + { + if (!ts_node_is_null(later_child)) + { + later_node = later_child; + later_node_is_relevant = later_child_is_relevant; + } + node = child_containing_target; + } + else if (later_child_is_relevant) + { + return later_child; + } + else if (!ts_node_is_null(later_child)) + { + node = later_child; + } + else if (later_node_is_relevant) + { + return later_node; + } + else + { + node = later_node; + } + } + + return ts_node__null(); +} + +static inline TSNode ts_node__first_child_for_byte(TSNode self, uint32_t goal, + bool include_anonymous) +{ + TSNode node = self; + bool did_descend = true; + + while (did_descend) + { + did_descend = false; + + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) + { + if (ts_node_end_byte(child) > goal) + { + if (ts_node__is_relevant(child, include_anonymous)) + { + return child; + } + else if (ts_node_child_count(child) > 0) + { + did_descend = true; + node = child; + break; + } + } + } + } + + return ts_node__null(); +} + +static inline TSNode ts_node__descendant_for_byte_range(TSNode self, + uint32_t range_start, + uint32_t range_end, + bool include_anonymous) +{ + TSNode node = self; + TSNode last_visible_node = self; + + bool did_descend = true; + while (did_descend) + { + did_descend = false; + + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) + { + uint32_t node_end = iterator.position.bytes; + + // The end of this node must extend far enough forward to touch + // the end of the range and exceed the start of the range. + if (node_end < range_end) + continue; + if (node_end <= range_start) + continue; + + // The start of this node must extend far enough backward to + // touch the start of the range. + if (range_start < ts_node_start_byte(child)) + break; + + node = child; + if (ts_node__is_relevant(node, include_anonymous)) + { + last_visible_node = node; + } + did_descend = true; + break; + } + } + + return last_visible_node; +} + +static inline TSNode ts_node__descendant_for_point_range(TSNode self, + TSPoint range_start, + TSPoint range_end, + bool include_anonymous) +{ + TSNode node = self; + TSNode last_visible_node = self; + + bool did_descend = true; + while (did_descend) + { + did_descend = false; + + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&node); + while (ts_node_child_iterator_next(&iterator, &child)) + { + TSPoint node_end = iterator.position.extent; + + // The end of this node must extend far enough forward to touch + // the end of the range and exceed the start of the range. + if (point_lt(node_end, range_end)) + continue; + if (point_lte(node_end, range_start)) + continue; + + // The start of this node must extend far enough backward to + // touch the start of the range. + if (point_lt(range_start, ts_node_start_point(child))) + break; + + node = child; + if (ts_node__is_relevant(node, include_anonymous)) + { + last_visible_node = node; + } + did_descend = true; + break; + } + } + + return last_visible_node; +} + +// TSNode - public + +uint32_t ts_node_end_byte(TSNode self) +{ + return ts_node_start_byte(self) + + ts_subtree_size(ts_node__subtree(self)).bytes; +} + +TSPoint ts_node_end_point(TSNode self) +{ + return point_add(ts_node_start_point(self), + ts_subtree_size(ts_node__subtree(self)).extent); +} + +TSSymbol ts_node_symbol(TSNode self) +{ + TSSymbol symbol = ts_node__alias(&self); + if (!symbol) + symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_public_symbol(self.tree->language, symbol); +} + +const char *ts_node_type(TSNode self) +{ + TSSymbol symbol = ts_node__alias(&self); + if (!symbol) + symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_symbol_name(self.tree->language, symbol); +} + +const TSLanguage *ts_node_language(TSNode self) +{ + return self.tree->language; +} + +TSSymbol ts_node_grammar_symbol(TSNode self) +{ + return ts_subtree_symbol(ts_node__subtree(self)); +} + +const char *ts_node_grammar_type(TSNode self) +{ + TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_symbol_name(self.tree->language, symbol); +} + +char *ts_node_string(TSNode self) +{ + TSSymbol alias_symbol = ts_node__alias(&self); + return ts_subtree_string( + ts_node__subtree(self), alias_symbol, + ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, + self.tree->language, false); +} + +bool ts_node_eq(TSNode self, TSNode other) +{ + return self.tree == other.tree && self.id == other.id; +} + +bool ts_node_is_null(TSNode self) +{ + return self.id == 0; +} + +bool ts_node_is_extra(TSNode self) +{ + return ts_subtree_extra(ts_node__subtree(self)); +} + +bool ts_node_is_named(TSNode self) +{ + TSSymbol alias = ts_node__alias(&self); + return alias ? ts_language_symbol_metadata(self.tree->language, alias).named + : ts_subtree_named(ts_node__subtree(self)); +} + +bool ts_node_is_missing(TSNode self) +{ + return ts_subtree_missing(ts_node__subtree(self)); +} + +bool ts_node_has_changes(TSNode self) +{ + return ts_subtree_has_changes(ts_node__subtree(self)); +} + +bool ts_node_has_error(TSNode self) +{ + return ts_subtree_error_cost(ts_node__subtree(self)) > 0; +} + +bool ts_node_is_error(TSNode self) +{ + TSSymbol symbol = ts_node_symbol(self); + return symbol == ts_builtin_sym_error; +} + +uint32_t ts_node_descendant_count(TSNode self) +{ + return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; +} + +TSStateId ts_node_parse_state(TSNode self) +{ + return ts_subtree_parse_state(ts_node__subtree(self)); +} + +TSStateId ts_node_next_parse_state(TSNode self) +{ + const TSLanguage *language = self.tree->language; + uint16_t state = ts_node_parse_state(self); + if (state == TS_TREE_STATE_NONE) + { + return TS_TREE_STATE_NONE; + } + uint16_t symbol = ts_node_grammar_symbol(self); + return ts_language_next_state(language, state, symbol); +} + +TSNode ts_node_parent(TSNode self) +{ + TSNode node = ts_tree_root_node(self.tree); + if (node.id == self.id) + return ts_node__null(); + + while (true) + { + TSNode next_node = ts_node_child_containing_descendant(node, self); + if (ts_node_is_null(next_node)) + break; + node = next_node; + } + + return node; +} + +TSNode ts_node_child_containing_descendant(TSNode self, TSNode subnode) +{ + uint32_t start_byte = ts_node_start_byte(subnode); + uint32_t end_byte = ts_node_end_byte(subnode); + + do + { + NodeChildIterator iter = ts_node_iterate_children(&self); + do + { + if (!ts_node_child_iterator_next(&iter, &self) || + ts_node_start_byte(self) > start_byte || self.id == subnode.id) + { + return ts_node__null(); + } + } while (iter.position.bytes < end_byte || + ts_node_child_count(self) == 0); + } while (!ts_node__is_relevant(self, true)); + + return self; +} + +TSNode ts_node_child(TSNode self, uint32_t child_index) +{ + return ts_node__child(self, child_index, true); +} + +TSNode ts_node_named_child(TSNode self, uint32_t child_index) +{ + return ts_node__child(self, child_index, false); +} + +TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) +{ +recur: + if (!field_id || ts_node_child_count(self) == 0) + return ts_node__null(); + + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self.tree->language, + ts_node__subtree(self).ptr->production_id, &field_map, + &field_map_end); + if (field_map == field_map_end) + return ts_node__null(); + + // The field mappings are sorted by their field id. Scan all + // the mappings to find the ones for the given field id. + while (field_map->field_id < field_id) + { + field_map++; + if (field_map == field_map_end) + return ts_node__null(); + } + while (field_map_end[-1].field_id > field_id) + { + field_map_end--; + if (field_map == field_map_end) + return ts_node__null(); + } + + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&self); + while (ts_node_child_iterator_next(&iterator, &child)) + { + if (!ts_subtree_extra(ts_node__subtree(child))) + { + uint32_t index = iterator.structural_child_index - 1; + if (index < field_map->child_index) + continue; + + // Hidden nodes' fields are "inherited" by their visible parent. + if (field_map->inherited) + { + + // If this is the *last* possible child node for this field, + // then perform a tail call to avoid recursion. + if (field_map + 1 == field_map_end) + { + self = child; + goto recur; + } + + // Otherwise, descend into this child, but if it doesn't contain + // the field, continue searching subsequent children. + else + { + TSNode result = ts_node_child_by_field_id(child, field_id); + if (result.id) + return result; + field_map++; + if (field_map == field_map_end) + return ts_node__null(); + } + } + + else if (ts_node__is_relevant(child, true)) + { + return child; + } + + // If the field refers to a hidden node with visible children, + // return the first visible child. + else if (ts_node_child_count(child) > 0) + { + return ts_node_child(child, 0); + } + + // Otherwise, continue searching subsequent children. + else + { + field_map++; + if (field_map == field_map_end) + return ts_node__null(); + } + } + } + + return ts_node__null(); +} + +static inline const char *ts_node__field_name_from_language( + TSNode self, uint32_t structural_child_index) +{ + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self.tree->language, + ts_node__subtree(self).ptr->production_id, &field_map, + &field_map_end); + for (; field_map != field_map_end; field_map++) + { + if (!field_map->inherited && + field_map->child_index == structural_child_index) + { + return self.tree->language->field_names[field_map->field_id]; + } + } + return NULL; +} + +const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) +{ + TSNode result = self; + bool did_descend = true; + const char *inherited_field_name = NULL; + + while (did_descend) + { + did_descend = false; + + TSNode child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) + { + if (ts_node__is_relevant(child, true)) + { + if (index == child_index) + { + if (ts_node_is_extra(child)) + { + return NULL; + } + const char *field_name = ts_node__field_name_from_language( + result, iterator.structural_child_index - 1); + if (field_name) + return field_name; + return inherited_field_name; + } + index++; + } + else + { + uint32_t grandchild_index = child_index - index; + uint32_t grandchild_count = + ts_node__relevant_child_count(child, true); + if (grandchild_index < grandchild_count) + { + const char *field_name = ts_node__field_name_from_language( + result, iterator.structural_child_index - 1); + if (field_name) + inherited_field_name = field_name; + + did_descend = true; + result = child; + child_index = grandchild_index; + break; + } + index += grandchild_count; + } + } + } + + return NULL; +} + +TSNode ts_node_child_by_field_name(TSNode self, const char *name, + uint32_t name_length) +{ + TSFieldId field_id = + ts_language_field_id_for_name(self.tree->language, name, name_length); + return ts_node_child_by_field_id(self, field_id); +} + +uint32_t ts_node_child_count(TSNode self) +{ + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) + { + return tree.ptr->visible_child_count; + } + else + { + return 0; + } +} + +uint32_t ts_node_named_child_count(TSNode self) +{ + Subtree tree = ts_node__subtree(self); + if (ts_subtree_child_count(tree) > 0) + { + return tree.ptr->named_child_count; + } + else + { + return 0; + } +} + +TSNode ts_node_next_sibling(TSNode self) +{ + return ts_node__next_sibling(self, true); +} + +TSNode ts_node_next_named_sibling(TSNode self) +{ + return ts_node__next_sibling(self, false); +} + +TSNode ts_node_prev_sibling(TSNode self) +{ + return ts_node__prev_sibling(self, true); +} + +TSNode ts_node_prev_named_sibling(TSNode self) +{ + return ts_node__prev_sibling(self, false); +} + +TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) +{ + return ts_node__first_child_for_byte(self, byte, true); +} + +TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) +{ + return ts_node__first_child_for_byte(self, byte, false); +} + +TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t start, + uint32_t end) +{ + return ts_node__descendant_for_byte_range(self, start, end, true); +} + +TSNode ts_node_named_descendant_for_byte_range(TSNode self, uint32_t start, + uint32_t end) +{ + return ts_node__descendant_for_byte_range(self, start, end, false); +} + +TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, + TSPoint end) +{ + return ts_node__descendant_for_point_range(self, start, end, true); +} + +TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, + TSPoint end) +{ + return ts_node__descendant_for_point_range(self, start, end, false); +} + +void ts_node_edit(TSNode *self, const TSInputEdit *edit) +{ + uint32_t start_byte = ts_node_start_byte(*self); + TSPoint start_point = ts_node_start_point(*self); + + if (start_byte >= edit->old_end_byte) + { + start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); + start_point = point_add(edit->new_end_point, + point_sub(start_point, edit->old_end_point)); + } + else if (start_byte > edit->start_byte) + { + start_byte = edit->new_end_byte; + start_point = edit->new_end_point; + } + + self->context[0] = start_byte; + self->context[1] = start_point.row; + self->context[2] = start_point.column; +} + +#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) + +#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree)) + +static const unsigned MAX_VERSION_COUNT = 6; +static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; +static const unsigned MAX_SUMMARY_DEPTH = 16; +static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; +static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; + +typedef struct +{ + Subtree token; + Subtree last_external_token; + uint32_t byte_index; +} TokenCache; + +struct TSParser +{ + Lexer lexer; + Stack *stack; + SubtreePool tree_pool; + const TSLanguage *language; + ReduceActionSet reduce_actions; + Subtree finished_tree; + SubtreeArray trailing_extras; + SubtreeArray trailing_extras2; + SubtreeArray scratch_trees; + TokenCache token_cache; + ReusableNode reusable_node; + void *external_scanner_payload; + TSClock end_clock; + TSDuration timeout_duration; + unsigned accept_count; + unsigned operation_count; + const volatile size_t *cancellation_flag; + Subtree old_tree; + TSRangeArray included_range_differences; + unsigned included_range_difference_index; + bool has_scanner_error; +}; + +typedef struct +{ + unsigned cost; + unsigned node_count; + int dynamic_precedence; + bool is_in_error; +} ErrorStatus; + +typedef enum +{ + ErrorComparisonTakeLeft, + ErrorComparisonPreferLeft, + ErrorComparisonNone, + ErrorComparisonPreferRight, + ErrorComparisonTakeRight, +} ErrorComparison; + +typedef struct +{ + const char *string; + uint32_t length; +} TSStringInput; + +// StringInput + +static const char *ts_string_input_read(void *_self, uint32_t byte, + TSPoint point, uint32_t *length) +{ + (void)point; + TSStringInput *self = (TSStringInput *)_self; + if (byte >= self->length) + { + *length = 0; + return ""; + } + else + { + *length = self->length - byte; + return self->string + byte; + } +} + +// Parser - Private + +static bool ts_parser__breakdown_top_of_stack(TSParser *self, + StackVersion version) +{ + bool did_break_down = false; + bool pending = false; + + do + { + StackSliceArray pop = ts_stack_pop_pending(self->stack, version); + if (!pop.size) + break; + + did_break_down = true; + pending = false; + for (uint32_t i = 0; i < pop.size; i++) + { + StackSlice slice = pop.contents[i]; + TSStateId state = ts_stack_state(self->stack, slice.version); + Subtree parent = *array_front(&slice.subtrees); + + for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) + { + Subtree child = ts_subtree_children(parent)[j]; + pending = ts_subtree_child_count(child) > 0; + + if (ts_subtree_is_error(child)) + { + state = ERROR_STATE; + } + else if (!ts_subtree_extra(child)) + { + state = ts_language_next_state(self->language, state, + ts_subtree_symbol(child)); + } + + ts_subtree_retain(child); + ts_stack_push(self->stack, slice.version, child, pending, + state); + } + + for (uint32_t j = 1; j < slice.subtrees.size; j++) + { + Subtree tree = slice.subtrees.contents[j]; + ts_stack_push(self->stack, slice.version, tree, false, state); + } + + ts_subtree_release(&self->tree_pool, parent); + array_delete(&slice.subtrees); + } + } while (pending); + + return did_break_down; +} + +static void ts_parser__breakdown_lookahead(TSParser *self, Subtree *lookahead, + TSStateId state, + ReusableNode *reusable_node) +{ + bool did_descend = false; + Subtree tree = reusable_node_tree(reusable_node); + while (ts_subtree_child_count(tree) > 0 && + ts_subtree_parse_state(tree) != state) + { + reusable_node_descend(reusable_node); + tree = reusable_node_tree(reusable_node); + did_descend = true; + } + + if (did_descend) + { + ts_subtree_release(&self->tree_pool, *lookahead); + *lookahead = tree; + ts_subtree_retain(*lookahead); + } +} + +static ErrorComparison ts_parser__compare_versions(TSParser *self, + ErrorStatus a, ErrorStatus b) +{ + (void)self; + if (!a.is_in_error && b.is_in_error) + { + if (a.cost < b.cost) + { + return ErrorComparisonTakeLeft; + } + else + { + return ErrorComparisonPreferLeft; + } + } + + if (a.is_in_error && !b.is_in_error) + { + if (b.cost < a.cost) + { + return ErrorComparisonTakeRight; + } + else + { + return ErrorComparisonPreferRight; + } + } + + if (a.cost < b.cost) + { + if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) + { + return ErrorComparisonTakeLeft; + } + else + { + return ErrorComparisonPreferLeft; + } + } + + if (b.cost < a.cost) + { + if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) + { + return ErrorComparisonTakeRight; + } + else + { + return ErrorComparisonPreferRight; + } + } + + if (a.dynamic_precedence > b.dynamic_precedence) + return ErrorComparisonPreferLeft; + if (b.dynamic_precedence > a.dynamic_precedence) + return ErrorComparisonPreferRight; + return ErrorComparisonNone; +} + +static ErrorStatus ts_parser__version_status(TSParser *self, + StackVersion version) +{ + unsigned cost = ts_stack_error_cost(self->stack, version); + bool is_paused = ts_stack_is_paused(self->stack, version); + if (is_paused) + cost += ERROR_COST_PER_SKIPPED_TREE; + return (ErrorStatus){ + .cost = cost, + .node_count = ts_stack_node_count_since_error(self->stack, version), + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .is_in_error = + is_paused || ts_stack_state(self->stack, version) == ERROR_STATE}; +} + +static bool ts_parser__better_version_exists(TSParser *self, + StackVersion version, + bool is_in_error, unsigned cost) +{ + if (self->finished_tree.ptr && + ts_subtree_error_cost(self->finished_tree) <= cost) + { + return true; + } + + Length position = ts_stack_position(self->stack, version); + ErrorStatus status = { + .cost = cost, + .is_in_error = is_in_error, + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .node_count = ts_stack_node_count_since_error(self->stack, version), + }; + + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; + i++) + { + if (i == version || !ts_stack_is_active(self->stack, i) || + ts_stack_position(self->stack, i).bytes < position.bytes) + continue; + ErrorStatus status_i = ts_parser__version_status(self, i); + switch (ts_parser__compare_versions(self, status, status_i)) + { + case ErrorComparisonTakeRight: + return true; + case ErrorComparisonPreferRight: + if (ts_stack_can_merge(self->stack, i, version)) + return true; + break; + default: + break; + } + } + + return false; +} + +static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) +{ + return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); +} + +static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) +{ + + (void)(lex_mode); + return self->language->keyword_lex_fn(&self->lexer.data, 0); +} + +static void ts_parser__external_scanner_create(TSParser *self) +{ + if (self->language && self->language->external_scanner.states) + { + if (self->language->external_scanner.create) + { + self->external_scanner_payload = + self->language->external_scanner.create(); + } + } +} + +static void ts_parser__external_scanner_destroy(TSParser *self) +{ + if (self->language && self->external_scanner_payload && + self->language->external_scanner.destroy) + { + self->language->external_scanner.destroy( + self->external_scanner_payload); + } + self->external_scanner_payload = NULL; +} + +static unsigned ts_parser__external_scanner_serialize(TSParser *self) +{ + + uint32_t length = self->language->external_scanner.serialize( + self->external_scanner_payload, self->lexer.debug_buffer); + assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + return length; +} + +static void ts_parser__external_scanner_deserialize(TSParser *self, + Subtree external_token) +{ + const char *data = NULL; + uint32_t length = 0; + if (external_token.ptr) + { + data = ts_external_scanner_state_data( + &external_token.ptr->external_scanner_state); + length = external_token.ptr->external_scanner_state.length; + } + + self->language->external_scanner.deserialize(self->external_scanner_payload, + data, length); +} + +static bool ts_parser__external_scanner_scan(TSParser *self, + TSStateId external_lex_state) +{ + + const bool *valid_external_tokens = + ts_language_enabled_external_tokens(self->language, external_lex_state); + return self->language->external_scanner.scan(self->external_scanner_payload, + &self->lexer.data, + valid_external_tokens); +} + +static bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, + Subtree tree, + TableEntry *table_entry) +{ + TSLexMode current_lex_mode = self->language->lex_modes[state]; + TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree); + TSStateId leaf_state = ts_subtree_leaf_parse_state(tree); + TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state]; + + // At the end of a non-terminal extra node, the lexer normally returns + // NULL, which indicates that the parser should look for a reduce action + // at symbol `0`. Avoid reusing tokens in this situation to ensure that + // the same thing happens when incrementally reparsing. + if (current_lex_mode.lex_state == (uint16_t)(-1)) + return false; + + // If the token was created in a state with the same set of lookaheads, it + // is reusable. + if (table_entry->action_count > 0 && + memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexMode)) == 0 && + (leaf_symbol != self->language->keyword_capture_token || + (!ts_subtree_is_keyword(tree) && + ts_subtree_parse_state(tree) == state))) + return true; + + // Empty tokens are not reusable in states with different lookaheads. + if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) + return false; + + // If the current state allows external tokens or other tokens that conflict + // with this token, this token is not reusable. + return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; +} + +const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { + static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; + if ( + self.ptr && + !self.data.is_inline && + self.ptr->has_external_tokens && + self.ptr->child_count == 0 + ) { + return &self.ptr->external_scanner_state; + } else { + return &empty_state; + } +} + +static Subtree ts_parser__lex(TSParser *self, StackVersion version, + TSStateId parse_state) +{ + TSLexMode lex_mode = self->language->lex_modes[parse_state]; + if (lex_mode.lex_state == (uint16_t)-1) + { + return NULL_SUBTREE; + } + + const Length start_position = ts_stack_position(self->stack, version); + const Subtree external_token = + ts_stack_last_external_token(self->stack, version); + + bool found_external_token = false; + bool error_mode = parse_state == ERROR_STATE; + bool skipped_error = false; + bool called_get_column = false; + int32_t first_error_character = 0; + Length error_start_position = length_zero(); + Length error_end_position = length_zero(); + uint32_t lookahead_end_byte = 0; + uint32_t external_scanner_state_len = 0; + bool external_scanner_state_changed = false; + ts_lexer_reset(&self->lexer, start_position); + + for (;;) + { + bool found_token = false; + Length current_position = self->lexer.current_position; + + if (lex_mode.external_lex_state != 0) + { + ts_lexer_start(&self->lexer); + ts_parser__external_scanner_deserialize(self, external_token); + found_token = ts_parser__external_scanner_scan( + self, lex_mode.external_lex_state); + if (self->has_scanner_error) + return NULL_SUBTREE; + ts_lexer_finish(&self->lexer, &lookahead_end_byte); + + if (found_token) + { + external_scanner_state_len = + ts_parser__external_scanner_serialize(self); + external_scanner_state_changed = !ts_external_scanner_state_eq( + ts_subtree_external_scanner_state(external_token), + self->lexer.debug_buffer, external_scanner_state_len); + + // When recovering from an error, ignore any zero-length + // external tokens unless they have changed the external + // scanner's state. This helps to avoid infinite loops which + // could otherwise occur, because the lexer is looking for any + // possible token, instead of looking for the specific set of + // tokens that are valid in some parse state. + // + // Note that it's possible that the token end position may be + // *before* the original position of the lexer because of the + // way that tokens are positioned at included range boundaries: + // when a token is terminated at the start of an included range, + // it is marked as ending at the *end* of the preceding included + // range. + if (self->lexer.token_end_position.bytes <= + current_position.bytes && + (error_mode || !ts_stack_has_advanced_since_error( + self->stack, version)) && + !external_scanner_state_changed) + { + found_token = false; + } + } + + if (found_token) + { + found_external_token = true; + called_get_column = self->lexer.did_get_column; + break; + } + + ts_lexer_reset(&self->lexer, current_position); + } + ts_lexer_start(&self->lexer); + found_token = ts_parser__call_main_lex_fn(self, lex_mode); + ts_lexer_finish(&self->lexer, &lookahead_end_byte); + if (found_token) + break; + + if (!error_mode) + { + error_mode = true; + lex_mode = self->language->lex_modes[ERROR_STATE]; + ts_lexer_reset(&self->lexer, start_position); + continue; + } + + if (!skipped_error) + { + skipped_error = true; + error_start_position = self->lexer.token_start_position; + error_end_position = self->lexer.token_start_position; + first_error_character = self->lexer.data.lookahead; + } + + if (self->lexer.current_position.bytes == error_end_position.bytes) + { + if (self->lexer.data.eof(&self->lexer.data)) + { + self->lexer.data.result_symbol = ts_builtin_sym_error; + break; + } + self->lexer.data.advance(&self->lexer.data, false); + } + + error_end_position = self->lexer.current_position; + } + + Subtree result; + if (skipped_error) + { + Length padding = length_sub(error_start_position, start_position); + Length size = length_sub(error_end_position, error_start_position); + uint32_t lookahead_bytes = + lookahead_end_byte - error_end_position.bytes; + result = ts_subtree_new_error(&self->tree_pool, first_error_character, + padding, size, lookahead_bytes, + parse_state, self->language); + } + else + { + bool is_keyword = false; + TSSymbol symbol = self->lexer.data.result_symbol; + Length padding = + length_sub(self->lexer.token_start_position, start_position); + Length size = length_sub(self->lexer.token_end_position, + self->lexer.token_start_position); + uint32_t lookahead_bytes = + lookahead_end_byte - self->lexer.token_end_position.bytes; + + if (found_external_token) + { + symbol = self->language->external_scanner.symbol_map[symbol]; + } + else if (symbol == self->language->keyword_capture_token && symbol != 0) + { + uint32_t end_byte = self->lexer.token_end_position.bytes; + ts_lexer_reset(&self->lexer, self->lexer.token_start_position); + ts_lexer_start(&self->lexer); + + is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode); + + if (is_keyword && + self->lexer.token_end_position.bytes == end_byte && + ts_language_has_actions(self->language, parse_state, + self->lexer.data.result_symbol)) + { + symbol = self->lexer.data.result_symbol; + } + } + + result = ts_subtree_new_leaf(&self->tree_pool, symbol, padding, size, + lookahead_bytes, parse_state, + found_external_token, called_get_column, + is_keyword, self->language); + + if (found_external_token) + { + MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result); + ts_external_scanner_state_init( + &mut_result.ptr->external_scanner_state, + self->lexer.debug_buffer, external_scanner_state_len); + mut_result.ptr->has_external_scanner_state_change = + external_scanner_state_changed; + } + } + return result; +} + +static Subtree ts_parser__get_cached_token(TSParser *self, TSStateId state, + size_t position, + Subtree last_external_token, + TableEntry *table_entry) +{ + TokenCache *cache = &self->token_cache; + if (cache->token.ptr && cache->byte_index == position && + ts_subtree_external_scanner_state_eq(cache->last_external_token, + last_external_token)) + { + ts_language_table_entry(self->language, state, + ts_subtree_symbol(cache->token), table_entry); + if (ts_parser__can_reuse_first_leaf(self, state, cache->token, + table_entry)) + { + ts_subtree_retain(cache->token); + return cache->token; + } + } + return NULL_SUBTREE; +} + +static void ts_parser__set_cached_token(TSParser *self, uint32_t byte_index, + Subtree last_external_token, + Subtree token) +{ + TokenCache *cache = &self->token_cache; + if (token.ptr) + ts_subtree_retain(token); + if (last_external_token.ptr) + ts_subtree_retain(last_external_token); + if (cache->token.ptr) + ts_subtree_release(&self->tree_pool, cache->token); + if (cache->last_external_token.ptr) + ts_subtree_release(&self->tree_pool, cache->last_external_token); + cache->token = token; + cache->byte_index = byte_index; + cache->last_external_token = last_external_token; +} + +static bool ts_parser__has_included_range_difference(const TSParser *self, + uint32_t start_position, + uint32_t end_position) +{ + return ts_range_array_intersects(&self->included_range_differences, + self->included_range_difference_index, + start_position, end_position); +} + +static Subtree ts_parser__reuse_node(TSParser *self, StackVersion version, + TSStateId *state, uint32_t position, + Subtree last_external_token, + TableEntry *table_entry) +{ + Subtree result; + while ((result = reusable_node_tree(&self->reusable_node)).ptr) + { + uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node); + uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result); + + // Do not reuse an EOF node if the included ranges array has changes + // later on in the file. + if (ts_subtree_is_eof(result)) + end_byte_offset = UINT32_MAX; + + if (byte_offset > position) + { + + break; + } + + if (byte_offset < position) + { + + if (end_byte_offset <= position || + !reusable_node_descend(&self->reusable_node)) + { + reusable_node_advance(&self->reusable_node); + } + continue; + } + + if (!ts_subtree_external_scanner_state_eq( + self->reusable_node.last_external_token, last_external_token)) + { + reusable_node_advance(&self->reusable_node); + continue; + } + + const char *reason = NULL; + if (ts_subtree_has_changes(result)) + { + reason = "has_changes"; + } + else if (ts_subtree_is_error(result)) + { + reason = "is_error"; + } + else if (ts_subtree_missing(result)) + { + reason = "is_missing"; + } + else if (ts_subtree_is_fragile(result)) + { + reason = "is_fragile"; + } + else if (ts_parser__has_included_range_difference(self, byte_offset, + end_byte_offset)) + { + reason = "contains_different_included_range"; + } + + if (reason) + { + if (!reusable_node_descend(&self->reusable_node)) + { + reusable_node_advance(&self->reusable_node); + ts_parser__breakdown_top_of_stack(self, version); + *state = ts_stack_state(self->stack, version); + } + continue; + } + + TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result); + ts_language_table_entry(self->language, *state, leaf_symbol, + table_entry); + if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) + { + reusable_node_advance_past_leaf(&self->reusable_node); + break; + } + ts_subtree_retain(result); + return result; + } + + return NULL_SUBTREE; +} + +// Determine if a given tree should be replaced by an alternative tree. +// +// The decision is based on the trees' error costs (if any), their dynamic +// precedence, and finally, as a default, by a recursive comparison of the +// trees' symbols. +static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) +{ + if (!left.ptr) + return true; + if (!right.ptr) + return false; + + if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) + { + return true; + } + + if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) + { + return false; + } + + if (ts_subtree_dynamic_precedence(right) > + ts_subtree_dynamic_precedence(left)) + { + return true; + } + + if (ts_subtree_dynamic_precedence(left) > + ts_subtree_dynamic_precedence(right)) + { + return false; + } + + if (ts_subtree_error_cost(left) > 0) + return true; + + int comparison = ts_subtree_compare(left, right, &self->tree_pool); + switch (comparison) + { + case -1: + return false; + break; + case 1: + return true; + default: + return false; + } +} + +// Determine if a given tree's children should be replaced by an alternative +// array of children. +static bool ts_parser__select_children(TSParser *self, Subtree left, + const SubtreeArray *children) +{ + array_assign(&self->scratch_trees, children); + + // Create a temporary subtree using the scratch trees array. This node does + // not perform any allocation except for possibly growing the array to make + // room for its own heap data. The scratch tree is never explicitly + // released, so the same 'scratch trees' array can be reused again later. + MutableSubtree scratch_tree = ts_subtree_new_node( + ts_subtree_symbol(left), &self->scratch_trees, 0, self->language); + + return ts_parser__select_tree(self, left, + ts_subtree_from_mut(scratch_tree)); +} + +static void ts_parser__shift(TSParser *self, StackVersion version, + TSStateId state, Subtree lookahead, bool extra) +{ + bool is_leaf = ts_subtree_child_count(lookahead) == 0; + Subtree subtree_to_push = lookahead; + if (extra != ts_subtree_extra(lookahead) && is_leaf) + { + MutableSubtree result = + ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_extra(&result, extra); + subtree_to_push = ts_subtree_from_mut(result); + } + + ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); + if (ts_subtree_has_external_tokens(subtree_to_push)) + { + ts_stack_set_last_external_token( + self->stack, version, + ts_subtree_last_external_token(subtree_to_push)); + } +} + +static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, + TSSymbol symbol, uint32_t count, + int dynamic_precedence, + uint16_t production_id, bool is_fragile, + bool end_of_non_terminal_extra) +{ + uint32_t initial_version_count = ts_stack_version_count(self->stack); + + // Pop the given number of nodes from the given version of the parse stack. + // If stack versions have previously merged, then there may be more than one + // path back through the stack. For each path, create a new parent node to + // contain the popped children, and push it onto the stack in place of the + // children. + StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); + uint32_t removed_version_count = 0; + for (uint32_t i = 0; i < pop.size; i++) + { + StackSlice slice = pop.contents[i]; + StackVersion slice_version = slice.version - removed_version_count; + + // This is where new versions are added to the parse stack. The versions + // will all be sorted and truncated at the end of the outer parsing + // loop. Allow the maximum version count to be temporarily exceeded, but + // only by a limited threshold. + if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) + { + ts_stack_remove_version(self->stack, slice_version); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + removed_version_count++; + while (i + 1 < pop.size) + { + StackSlice next_slice = pop.contents[i + 1]; + if (next_slice.version != slice.version) + break; + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); + i++; + } + continue; + } + + // Extra tokens on top of the stack should not be included in this new + // parent node. They will be re-pushed onto the stack after the parent + // node is created and pushed. + SubtreeArray children = slice.subtrees; + ts_subtree_array_remove_trailing_extras(&children, + &self->trailing_extras); + + MutableSubtree parent = ts_subtree_new_node( + symbol, &children, production_id, self->language); + + // This pop operation may have caused multiple stack versions to + // collapse into one, because they all diverged from a common state. In + // that case, choose one of the arrays of trees to be the parent node's + // children, and delete the rest of the tree arrays. + while (i + 1 < pop.size) + { + StackSlice next_slice = pop.contents[i + 1]; + if (next_slice.version != slice.version) + break; + i++; + + SubtreeArray next_slice_children = next_slice.subtrees; + ts_subtree_array_remove_trailing_extras(&next_slice_children, + &self->trailing_extras2); + + if (ts_parser__select_children(self, ts_subtree_from_mut(parent), + &next_slice_children)) + { + ts_subtree_array_clear(&self->tree_pool, + &self->trailing_extras); + ts_subtree_release(&self->tree_pool, + ts_subtree_from_mut(parent)); + array_swap(&self->trailing_extras, &self->trailing_extras2); + parent = ts_subtree_new_node(symbol, &next_slice_children, + production_id, self->language); + } + else + { + array_clear(&self->trailing_extras2); + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); + } + } + + TSStateId state = ts_stack_state(self->stack, slice_version); + TSStateId next_state = + ts_language_next_state(self->language, state, symbol); + if (end_of_non_terminal_extra && next_state == state) + { + parent.ptr->extra = true; + } + if (is_fragile || pop.size > 1 || initial_version_count > 1) + { + parent.ptr->fragile_left = true; + parent.ptr->fragile_right = true; + parent.ptr->parse_state = TS_TREE_STATE_NONE; + } + else + { + parent.ptr->parse_state = state; + } + parent.ptr->dynamic_precedence += dynamic_precedence; + + // Push the parent node onto the stack, along with any extra tokens that + // were previously on top of the stack. + ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), + false, next_state); + for (uint32_t j = 0; j < self->trailing_extras.size; j++) + { + ts_stack_push(self->stack, slice_version, + self->trailing_extras.contents[j], false, next_state); + } + + for (StackVersion j = 0; j < slice_version; j++) + { + if (j == version) + continue; + if (ts_stack_merge(self->stack, j, slice_version)) + { + removed_version_count++; + break; + } + } + } + + // Return the first new stack version that was created. + return ts_stack_version_count(self->stack) > initial_version_count + ? initial_version_count + : STACK_VERSION_NONE; +} + +static void ts_parser__accept(TSParser *self, StackVersion version, + Subtree lookahead) +{ + assert(ts_subtree_is_eof(lookahead)); + ts_stack_push(self->stack, version, lookahead, false, 1); + + StackSliceArray pop = ts_stack_pop_all(self->stack, version); + for (uint32_t i = 0; i < pop.size; i++) + { + SubtreeArray trees = pop.contents[i].subtrees; + + Subtree root = NULL_SUBTREE; + for (uint32_t j = trees.size - 1; j + 1 > 0; j--) + { + Subtree tree = trees.contents[j]; + if (!ts_subtree_extra(tree)) + { + assert(!tree.data.is_inline); + uint32_t child_count = ts_subtree_child_count(tree); + const Subtree *children = ts_subtree_children(tree); + for (uint32_t k = 0; k < child_count; k++) + { + ts_subtree_retain(children[k]); + } + array_splice(&trees, j, 1, child_count, children); + root = ts_subtree_from_mut(ts_subtree_new_node( + ts_subtree_symbol(tree), &trees, tree.ptr->production_id, + self->language)); + ts_subtree_release(&self->tree_pool, tree); + break; + } + } + + assert(root.ptr); + self->accept_count++; + + if (self->finished_tree.ptr) + { + if (ts_parser__select_tree(self, self->finished_tree, root)) + { + ts_subtree_release(&self->tree_pool, self->finished_tree); + self->finished_tree = root; + } + else + { + ts_subtree_release(&self->tree_pool, root); + } + } + else + { + self->finished_tree = root; + } + } + + ts_stack_remove_version(self->stack, pop.contents[0].version); + ts_stack_halt(self->stack, version); +} + +static bool ts_parser__do_all_potential_reductions( + TSParser *self, StackVersion starting_version, TSSymbol lookahead_symbol) +{ + uint32_t initial_version_count = ts_stack_version_count(self->stack); + + bool can_shift_lookahead_symbol = false; + StackVersion version = starting_version; + for (unsigned i = 0; true; i++) + { + uint32_t version_count = ts_stack_version_count(self->stack); + if (version >= version_count) + break; + + bool merged = false; + for (StackVersion j = initial_version_count; j < version; j++) + { + if (ts_stack_merge(self->stack, j, version)) + { + merged = true; + break; + } + } + if (merged) + continue; + + TSStateId state = ts_stack_state(self->stack, version); + bool has_shift_action = false; + array_clear(&self->reduce_actions); + + TSSymbol first_symbol, end_symbol; + if (lookahead_symbol != 0) + { + first_symbol = lookahead_symbol; + end_symbol = lookahead_symbol + 1; + } + else + { + first_symbol = 1; + end_symbol = self->language->token_count; + } + + for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) + { + TableEntry entry; + ts_language_table_entry(self->language, state, symbol, &entry); + for (uint32_t j = 0; j < entry.action_count; j++) + { + TSParseAction action = entry.actions[j]; + switch (action.type) + { + case TSParseActionTypeShift: + case TSParseActionTypeRecover: + if (!action.shift.extra && !action.shift.repetition) + has_shift_action = true; + break; + case TSParseActionTypeReduce: + if (action.reduce.child_count > 0) + ts_reduce_action_set_add( + &self->reduce_actions, + (ReduceAction){ + .symbol = action.reduce.symbol, + .count = action.reduce.child_count, + .dynamic_precedence = + action.reduce.dynamic_precedence, + .production_id = action.reduce.production_id, + }); + break; + default: + break; + } + } + } + + StackVersion reduction_version = STACK_VERSION_NONE; + for (uint32_t j = 0; j < self->reduce_actions.size; j++) + { + ReduceAction action = self->reduce_actions.contents[j]; + + reduction_version = ts_parser__reduce( + self, version, action.symbol, action.count, + action.dynamic_precedence, action.production_id, true, false); + } + + if (has_shift_action) + { + can_shift_lookahead_symbol = true; + } + else if (reduction_version != STACK_VERSION_NONE && + i < MAX_VERSION_COUNT) + { + ts_stack_renumber_version(self->stack, reduction_version, version); + continue; + } + else if (lookahead_symbol != 0) + { + ts_stack_remove_version(self->stack, version); + } + + if (version == starting_version) + { + version = version_count; + } + else + { + version++; + } + } + + return can_shift_lookahead_symbol; +} + +static bool ts_parser__recover_to_state(TSParser *self, StackVersion version, + unsigned depth, TSStateId goal_state) +{ + StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); + StackVersion previous_version = STACK_VERSION_NONE; + + for (unsigned i = 0; i < pop.size; i++) + { + StackSlice slice = pop.contents[i]; + + if (slice.version == previous_version) + { + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + array_erase(&pop, i--); + continue; + } + + if (ts_stack_state(self->stack, slice.version) != goal_state) + { + ts_stack_halt(self->stack, slice.version); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); + array_erase(&pop, i--); + continue; + } + + SubtreeArray error_trees = + ts_stack_pop_error(self->stack, slice.version); + if (error_trees.size > 0) + { + assert(error_trees.size == 1); + Subtree error_tree = error_trees.contents[0]; + uint32_t error_child_count = ts_subtree_child_count(error_tree); + if (error_child_count > 0) + { + array_splice(&slice.subtrees, 0, 0, error_child_count, + ts_subtree_children(error_tree)); + for (unsigned j = 0; j < error_child_count; j++) + { + ts_subtree_retain(slice.subtrees.contents[j]); + } + } + ts_subtree_array_delete(&self->tree_pool, &error_trees); + } + + ts_subtree_array_remove_trailing_extras(&slice.subtrees, + &self->trailing_extras); + + if (slice.subtrees.size > 0) + { + Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, + self->language); + ts_stack_push(self->stack, slice.version, error, false, goal_state); + } + else + { + array_delete(&slice.subtrees); + } + + for (unsigned j = 0; j < self->trailing_extras.size; j++) + { + Subtree tree = self->trailing_extras.contents[j]; + ts_stack_push(self->stack, slice.version, tree, false, goal_state); + } + + previous_version = slice.version; + } + + return previous_version != STACK_VERSION_NONE; +} + +static void ts_parser__recover(TSParser *self, StackVersion version, + Subtree lookahead) +{ + bool did_recover = false; + unsigned previous_version_count = ts_stack_version_count(self->stack); + Length position = ts_stack_position(self->stack, version); + StackSummary *summary = ts_stack_get_summary(self->stack, version); + unsigned node_count_since_error = + ts_stack_node_count_since_error(self->stack, version); + unsigned current_error_cost = ts_stack_error_cost(self->stack, version); + + // When the parser is in the error state, there are two strategies for + // recovering with a given lookahead token: + // 1. Find a previous state on the stack in which that lookahead token would + // be valid. Then, + // create a new stack version that is in that state again. This entails + // popping all of the subtrees that have been pushed onto the stack since + // that previous state, and wrapping them in an ERROR node. + // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto + // the stack, and + // move on to the next lookahead token, remaining in the error state. + // + // First, try the strategy 1. Upon entering the error state, the parser + // recorded a summary of the previous parse states and their depths. Look at + // each state in the summary, to see if the current lookahead token would be + // valid in that state. + if (summary && !ts_subtree_is_error(lookahead)) + { + for (unsigned i = 0; i < summary->size; i++) + { + StackSummaryEntry entry = summary->contents[i]; + + if (entry.state == ERROR_STATE) + continue; + if (entry.position.bytes == position.bytes) + continue; + unsigned depth = entry.depth; + if (node_count_since_error > 0) + depth++; + + // Do not recover in ways that create redundant stack versions. + bool would_merge = false; + for (unsigned j = 0; j < previous_version_count; j++) + { + if (ts_stack_state(self->stack, j) == entry.state && + ts_stack_position(self->stack, j).bytes == position.bytes) + { + would_merge = true; + break; + } + } + if (would_merge) + continue; + + // Do not recover if the result would clearly be worse than some + // existing stack version. + unsigned new_cost = + current_error_cost + entry.depth * ERROR_COST_PER_SKIPPED_TREE + + (position.bytes - entry.position.bytes) * + ERROR_COST_PER_SKIPPED_CHAR + + (position.extent.row - entry.position.extent.row) * + ERROR_COST_PER_SKIPPED_LINE; + if (ts_parser__better_version_exists(self, version, false, + new_cost)) + break; + + // If the current lookahead token is valid in some previous state, + // recover to that state. Then stop looking for further recoveries. + if (ts_language_has_actions(self->language, entry.state, + ts_subtree_symbol(lookahead))) + { + if (ts_parser__recover_to_state(self, version, depth, + entry.state)) + { + did_recover = true; + break; + } + } + } + } + + // In the process of attempting to recover, some stack versions may have + // been created and subsequently halted. Remove those versions. + for (unsigned i = previous_version_count; + i < ts_stack_version_count(self->stack); i++) + { + if (!ts_stack_is_active(self->stack, i)) + { + ts_stack_remove_version(self->stack, i--); + } + } + + // If strategy 1 succeeded, a new stack version will have been created which + // is able to handle the current lookahead token. Now, in addition, try + // strategy 2 described above: skip the current lookahead token by wrapping + // it in an ERROR node. + + // Don't pursue this additional strategy if there are already too many stack + // versions. + if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) + { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } + + if (did_recover && ts_subtree_has_external_scanner_state_change(lookahead)) + { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } + + // If the parser is still in the error state at the end of the file, just + // wrap everything in an ERROR node and terminate. + if (ts_subtree_is_eof(lookahead)) + { + SubtreeArray children = array_new(); + Subtree parent = + ts_subtree_new_error_node(&children, false, self->language); + ts_stack_push(self->stack, version, parent, false, 1); + ts_parser__accept(self, version, lookahead); + return; + } + + // Do not recover if the result would clearly be worse than some existing + // stack version. + unsigned new_cost = + current_error_cost + ERROR_COST_PER_SKIPPED_TREE + + ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + + ts_subtree_total_size(lookahead).extent.row * + ERROR_COST_PER_SKIPPED_LINE; + if (ts_parser__better_version_exists(self, version, false, new_cost)) + { + ts_stack_halt(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + return; + } + + // If the current lookahead token is an extra token, mark it as extra. This + // means it won't be counted in error cost calculations. + unsigned n; + const TSParseAction *actions = ts_language_actions( + self->language, 1, ts_subtree_symbol(lookahead), &n); + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && + actions[n - 1].shift.extra) + { + MutableSubtree mutable_lookahead = + ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_extra(&mutable_lookahead, true); + lookahead = ts_subtree_from_mut(mutable_lookahead); + } + + // Wrap the lookahead token in an ERROR. + SubtreeArray children = array_new(); + array_reserve(&children, 1); + array_push(&children, lookahead); + MutableSubtree error_repeat = ts_subtree_new_node( + ts_builtin_sym_error_repeat, &children, 0, self->language); + + // If other tokens have already been skipped, so there is already an ERROR + // at the top of the stack, then pop that ERROR off the stack and wrap the + // two ERRORs together into one larger ERROR. + if (node_count_since_error > 0) + { + StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); + + // TODO: Figure out how to make this condition occur. + // See https://github.com/atom/atom/issues/18450#issuecomment-439579778 + // If multiple stack versions have merged at this point, just pick one + // of the errors arbitrarily and discard the rest. + if (pop.size > 1) + { + for (unsigned i = 1; i < pop.size; i++) + { + ts_subtree_array_delete(&self->tree_pool, + &pop.contents[i].subtrees); + } + while (ts_stack_version_count(self->stack) > + pop.contents[0].version + 1) + { + ts_stack_remove_version(self->stack, + pop.contents[0].version + 1); + } + } + + ts_stack_renumber_version(self->stack, pop.contents[0].version, + version); + array_push(&pop.contents[0].subtrees, + ts_subtree_from_mut(error_repeat)); + error_repeat = + ts_subtree_new_node(ts_builtin_sym_error_repeat, + &pop.contents[0].subtrees, 0, self->language); + } + + // Push the new ERROR onto the stack. + ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), + false, ERROR_STATE); + if (ts_subtree_has_external_tokens(lookahead)) + { + ts_stack_set_last_external_token( + self->stack, version, ts_subtree_last_external_token(lookahead)); + } +} + +static void ts_parser__handle_error(TSParser *self, StackVersion version, + Subtree lookahead) +{ + uint32_t previous_version_count = ts_stack_version_count(self->stack); + + // Perform any reductions that can happen in this state, regardless of the + // lookahead. After skipping one or more invalid tokens, the parser might + // find a token that would have allowed a reduction to take place. + ts_parser__do_all_potential_reductions(self, version, 0); + uint32_t version_count = ts_stack_version_count(self->stack); + Length position = ts_stack_position(self->stack, version); + + // Push a discontinuity onto the stack. Merge all of the stack versions that + // were created in the previous step. + bool did_insert_missing_token = false; + for (StackVersion v = version; v < version_count;) + { + if (!did_insert_missing_token) + { + TSStateId state = ts_stack_state(self->stack, v); + for (TSSymbol missing_symbol = 1; + missing_symbol < (uint16_t)self->language->token_count; + missing_symbol++) + { + TSStateId state_after_missing_symbol = ts_language_next_state( + self->language, state, missing_symbol); + if (state_after_missing_symbol == 0 || + state_after_missing_symbol == state) + { + continue; + } + + if (ts_language_has_reduce_action( + self->language, state_after_missing_symbol, + ts_subtree_leaf_symbol(lookahead))) + { + // In case the parser is currently outside of any included + // range, the lexer will snap to the beginning of the next + // included range. The missing token's padding must be + // assigned to position it within the next included range. + ts_lexer_reset(&self->lexer, position); + ts_lexer_mark_end(&self->lexer); + Length padding = + length_sub(self->lexer.token_end_position, position); + uint32_t lookahead_bytes = + ts_subtree_total_bytes(lookahead) + + ts_subtree_lookahead_bytes(lookahead); + + StackVersion version_with_missing_tree = + ts_stack_copy_version(self->stack, v); + Subtree missing_tree = ts_subtree_new_missing_leaf( + &self->tree_pool, missing_symbol, padding, + lookahead_bytes, self->language); + ts_stack_push(self->stack, version_with_missing_tree, + missing_tree, false, + state_after_missing_symbol); + + if (ts_parser__do_all_potential_reductions( + self, version_with_missing_tree, + ts_subtree_leaf_symbol(lookahead))) + { + did_insert_missing_token = true; + break; + } + } + } + } + + ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); + v = (v == version) ? previous_version_count : v + 1; + } + + for (unsigned i = previous_version_count; i < version_count; i++) + { + bool did_merge = + ts_stack_merge(self->stack, version, previous_version_count); + assert(did_merge); + (void)did_merge; // fix warning/error with clang -Os + } + + ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); + + // Begin recovery with the current lookahead node, rather than waiting for + // the next turn of the parse loop. This ensures that the tree accounts for + // the current lookahead token's "lookahead bytes" value, which describes + // how far the lexer needed to look ahead beyond the content of the token in + // order to recognize it. + if (ts_subtree_child_count(lookahead) > 0) + { + ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, + &self->reusable_node); + } + ts_parser__recover(self, version, lookahead); +} + +static bool ts_parser__advance(TSParser *self, StackVersion version, + bool allow_node_reuse) +{ + TSStateId state = ts_stack_state(self->stack, version); + uint32_t position = ts_stack_position(self->stack, version).bytes; + Subtree last_external_token = + ts_stack_last_external_token(self->stack, version); + + bool did_reuse = true; + Subtree lookahead = NULL_SUBTREE; + TableEntry table_entry = {.action_count = 0}; + + // If possible, reuse a node from the previous syntax tree. + if (allow_node_reuse) + { + lookahead = ts_parser__reuse_node(self, version, &state, position, + last_external_token, &table_entry); + } + + // If no node from the previous syntax tree could be reused, then try to + // reuse the token previously returned by the lexer. + if (!lookahead.ptr) + { + did_reuse = false; + lookahead = ts_parser__get_cached_token( + self, state, position, last_external_token, &table_entry); + } + + bool needs_lex = !lookahead.ptr; + for (;;) + { + // Otherwise, re-run the lexer. + if (needs_lex) + { + needs_lex = false; + lookahead = ts_parser__lex(self, version, state); + if (self->has_scanner_error) + return false; + + if (lookahead.ptr) + { + ts_parser__set_cached_token(self, position, last_external_token, + lookahead); + ts_language_table_entry(self->language, state, + ts_subtree_symbol(lookahead), + &table_entry); + } + + // When parsing a non-terminal extra, a null lookahead indicates the + // end of the rule. The reduction is stored in the EOF table entry. + // After the reduction, the lexer needs to be run again. + else + { + ts_language_table_entry(self->language, state, + ts_builtin_sym_end, &table_entry); + } + } + + // If a cancellation flag or a timeout was provided, then check every + // time a fixed number of parse actions has been processed. + if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) + { + self->operation_count = 0; + } + + // Process each parse action for the current lookahead token in + // the current state. If there are multiple actions, then this is + // an ambiguous state. REDUCE actions always create a new stack + // version, whereas SHIFT actions update the existing stack version + // and terminate this loop. + StackVersion last_reduction_version = STACK_VERSION_NONE; + for (uint32_t i = 0; i < table_entry.action_count; i++) + { + TSParseAction action = table_entry.actions[i]; + + switch (action.type) + { + case TSParseActionTypeShift: { + if (action.shift.repetition) + break; + TSStateId next_state; + if (action.shift.extra) + { + next_state = state; + } + else + { + next_state = action.shift.state; + } + + if (ts_subtree_child_count(lookahead) > 0) + { + ts_parser__breakdown_lookahead(self, &lookahead, state, + &self->reusable_node); + next_state = ts_language_next_state( + self->language, state, ts_subtree_symbol(lookahead)); + } + + ts_parser__shift(self, version, next_state, lookahead, + action.shift.extra); + if (did_reuse) + reusable_node_advance(&self->reusable_node); + return true; + } + + case TSParseActionTypeReduce: { + bool is_fragile = table_entry.action_count > 1; + bool end_of_non_terminal_extra = lookahead.ptr == NULL; + StackVersion reduction_version = ts_parser__reduce( + self, version, action.reduce.symbol, + action.reduce.child_count, action.reduce.dynamic_precedence, + action.reduce.production_id, is_fragile, + end_of_non_terminal_extra); + if (reduction_version != STACK_VERSION_NONE) + { + last_reduction_version = reduction_version; + } + break; + } + + case TSParseActionTypeAccept: { + ts_parser__accept(self, version, lookahead); + return true; + } + + case TSParseActionTypeRecover: { + if (ts_subtree_child_count(lookahead) > 0) + { + ts_parser__breakdown_lookahead( + self, &lookahead, ERROR_STATE, &self->reusable_node); + } + + ts_parser__recover(self, version, lookahead); + if (did_reuse) + reusable_node_advance(&self->reusable_node); + return true; + } + } + } + + // If a reduction was performed, then replace the current stack version + // with one of the stack versions created by a reduction, and continue + // processing this version of the stack with the same lookahead symbol. + if (last_reduction_version != STACK_VERSION_NONE) + { + ts_stack_renumber_version(self->stack, last_reduction_version, + version); + state = ts_stack_state(self->stack, version); + + // At the end of a non-terminal extra rule, the lexer will return a + // null subtree, because the parser needs to perform a fixed + // reduction regardless of the lookahead node. After performing that + // reduction, (and completing the non-terminal extra rule) run the + // lexer again based on the current parse state. + if (!lookahead.ptr) + { + needs_lex = true; + } + else + { + ts_language_table_entry(self->language, state, + ts_subtree_leaf_symbol(lookahead), + &table_entry); + } + + continue; + } + + // A non-terminal extra rule was reduced and merged into an existing + // stack version. This version can be discarded. + if (!lookahead.ptr) + { + ts_stack_halt(self->stack, version); + return true; + } + + // If there were no parse actions for the current lookahead token, then + // it is not valid in this state. If the current lookahead token is a + // keyword, then switch to treating it as the normal word token if that + // token is valid in this state. + if (ts_subtree_is_keyword(lookahead) && + ts_subtree_symbol(lookahead) != + self->language->keyword_capture_token) + { + ts_language_table_entry(self->language, state, + self->language->keyword_capture_token, + &table_entry); + if (table_entry.action_count > 0) + { + + MutableSubtree mutable_lookahead = + ts_subtree_make_mut(&self->tree_pool, lookahead); + ts_subtree_set_symbol(&mutable_lookahead, + self->language->keyword_capture_token, + self->language); + lookahead = ts_subtree_from_mut(mutable_lookahead); + continue; + } + } + + // If the current lookahead token is not valid and the parser is + // already in the error state, restart the error recovery process. + // TODO - can this be unified with the other `RECOVER` case above? + if (state == ERROR_STATE) + { + ts_parser__recover(self, version, lookahead); + return true; + } + + // If the current lookahead token is not valid and the previous + // subtree on the stack was reused from an old tree, it isn't actually + // valid to reuse it. Remove it from the stack, and in its place, + // push each of its children. Then try again to process the current + // lookahead. + if (ts_parser__breakdown_top_of_stack(self, version)) + { + state = ts_stack_state(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + needs_lex = true; + continue; + } + + // At this point, the current lookahead token is definitely not valid + // for this parse stack version. Mark this version as paused and + // continue processing any other stack versions that might exist. If + // some other version advances successfully, then this version can + // simply be removed. But if all versions end up paused, then error + // recovery is needed. + ts_stack_pause(self->stack, version, lookahead); + return true; + } +} + +static unsigned ts_parser__condense_stack(TSParser *self) +{ + bool made_changes = false; + unsigned min_error_cost = UINT_MAX; + for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) + { + // Prune any versions that have been marked for removal. + if (ts_stack_is_halted(self->stack, i)) + { + ts_stack_remove_version(self->stack, i); + i--; + continue; + } + + // Keep track of the minimum error cost of any stack version so + // that it can be returned. + ErrorStatus status_i = ts_parser__version_status(self, i); + if (!status_i.is_in_error && status_i.cost < min_error_cost) + { + min_error_cost = status_i.cost; + } + + // Examine each pair of stack versions, removing any versions that + // are clearly worse than another version. Ensure that the versions + // are ordered from most promising to least promising. + for (StackVersion j = 0; j < i; j++) + { + ErrorStatus status_j = ts_parser__version_status(self, j); + + switch (ts_parser__compare_versions(self, status_j, status_i)) + { + case ErrorComparisonTakeLeft: + made_changes = true; + ts_stack_remove_version(self->stack, i); + i--; + j = i; + break; + + case ErrorComparisonPreferLeft: + case ErrorComparisonNone: + if (ts_stack_merge(self->stack, j, i)) + { + made_changes = true; + i--; + j = i; + } + break; + + case ErrorComparisonPreferRight: + made_changes = true; + if (ts_stack_merge(self->stack, j, i)) + { + i--; + j = i; + } + else + { + ts_stack_swap_versions(self->stack, i, j); + } + break; + + case ErrorComparisonTakeRight: + made_changes = true; + ts_stack_remove_version(self->stack, j); + i--; + j--; + break; + } + } + } + + // Enforce a hard upper bound on the number of stack versions by + // discarding the least promising versions. + while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) + { + ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); + made_changes = true; + } + + // If the best-performing stack version is currently paused, or all + // versions are paused, then resume the best paused version and begin + // the error recovery process. Otherwise, remove the paused versions. + if (ts_stack_version_count(self->stack) > 0) + { + bool has_unpaused_version = false; + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; + i++) + { + if (ts_stack_is_paused(self->stack, i)) + { + if (!has_unpaused_version && + self->accept_count < MAX_VERSION_COUNT) + { + min_error_cost = ts_stack_error_cost(self->stack, i); + Subtree lookahead = ts_stack_resume(self->stack, i); + ts_parser__handle_error(self, i, lookahead); + has_unpaused_version = true; + } + else + { + ts_stack_remove_version(self->stack, i); + i--; + n--; + } + } + else + { + has_unpaused_version = true; + } + } + } + return min_error_cost; +} + +static bool ts_parser_has_outstanding_parse(TSParser *self) +{ + return (self->external_scanner_payload || + ts_stack_state(self->stack, 0) != 1 || + ts_stack_node_count_since_error(self->stack, 0) != 0); +} + +// Parser - Public + +TSParser *ts_parser_new(void) +{ + TSParser *self = calloc(1, sizeof(TSParser)); + ts_lexer_init(&self->lexer); + array_init(&self->reduce_actions); + array_reserve(&self->reduce_actions, 4); + self->tree_pool = ts_subtree_pool_new(32); + self->stack = ts_stack_new(&self->tree_pool); + self->finished_tree = NULL_SUBTREE; + self->reusable_node = reusable_node_new(); + self->cancellation_flag = NULL; + self->timeout_duration = 0; + self->language = NULL; + self->has_scanner_error = false; + self->external_scanner_payload = NULL; + self->end_clock = 0; + self->operation_count = 0; + self->old_tree = NULL_SUBTREE; + self->included_range_differences = (TSRangeArray)array_new(); + self->included_range_difference_index = 0; + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + return self; +} + +void ts_parser_delete(TSParser *self) +{ + if (!self) + return; + + ts_parser_set_language(self, NULL); + ts_stack_delete(self->stack); + if (self->reduce_actions.contents) + { + array_delete(&self->reduce_actions); + } + if (self->included_range_differences.contents) + { + array_delete(&self->included_range_differences); + } + if (self->old_tree.ptr) + { + ts_subtree_release(&self->tree_pool, self->old_tree); + self->old_tree = NULL_SUBTREE; + } + ts_lexer_delete(&self->lexer); + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + ts_subtree_pool_delete(&self->tree_pool); + reusable_node_delete(&self->reusable_node); + array_delete(&self->trailing_extras); + array_delete(&self->trailing_extras2); + array_delete(&self->scratch_trees); + free(self); +} + +const TSLanguage *ts_parser_language(const TSParser *self) +{ + return self->language; +} + +bool ts_parser_set_language(TSParser *self, const TSLanguage *language) +{ + ts_parser_reset(self); + ts_language_delete(self->language); + self->language = NULL; + + self->language = ts_language_copy(language); + return true; +} + +TSLogger ts_parser_logger(const TSParser *self) +{ + return self->lexer.logger; +} + +void ts_parser_set_logger(TSParser *self, TSLogger logger) +{ + self->lexer.logger = logger; +} + +void ts_parser_print_dot_graphs(TSParser *self, int fd) +{ + (void)(self); + (void)(fd); +} + +const size_t *ts_parser_cancellation_flag(const TSParser *self) +{ + return (const size_t *)self->cancellation_flag; +} + +void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) +{ + self->cancellation_flag = (const volatile size_t *)flag; +} + +uint64_t ts_parser_timeout_micros(const TSParser *self) +{ + (void)(self); + return 0; +} + +void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) +{ + (void)(timeout_micros); + self->timeout_duration = 0; +} + +bool ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, + uint32_t count) +{ + return ts_lexer_set_included_ranges(&self->lexer, ranges, count); +} + +const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) +{ + return ts_lexer_included_ranges(&self->lexer, count); +} + +void ts_parser_reset(TSParser *self) +{ + ts_parser__external_scanner_destroy(self); + if (self->old_tree.ptr) + { + ts_subtree_release(&self->tree_pool, self->old_tree); + self->old_tree = NULL_SUBTREE; + } + + reusable_node_clear(&self->reusable_node); + ts_lexer_reset(&self->lexer, length_zero()); + ts_stack_clear(self->stack); + ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + if (self->finished_tree.ptr) + { + ts_subtree_release(&self->tree_pool, self->finished_tree); + self->finished_tree = NULL_SUBTREE; + } + self->accept_count = 0; + self->has_scanner_error = false; +} + +TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) +{ + TSTree *result = NULL; + if (!self->language || !input.read) + return NULL; + + ts_lexer_set_input(&self->lexer, input); + array_clear(&self->included_range_differences); + self->included_range_difference_index = 0; + + if (ts_parser_has_outstanding_parse(self)) + { + } + else + { + ts_parser__external_scanner_create(self); + if (self->has_scanner_error) + goto exit; + + if (old_tree) + { + ts_subtree_retain(old_tree->root); + self->old_tree = old_tree->root; + ts_range_array_get_changed_ranges( + old_tree->included_ranges, old_tree->included_range_count, + self->lexer.included_ranges, self->lexer.included_range_count, + &self->included_range_differences); + reusable_node_reset(&self->reusable_node, old_tree->root); + } + else + { + reusable_node_clear(&self->reusable_node); + } + } + + self->operation_count = 0; + + uint32_t position = 0, last_position = 0, version_count = 0; + do + { + for (StackVersion version = 0; + version_count = ts_stack_version_count(self->stack), + version < version_count; + version++) + { + bool allow_node_reuse = version_count == 1; + while (ts_stack_is_active(self->stack, version)) + { + + if (!ts_parser__advance(self, version, allow_node_reuse)) + { + if (self->has_scanner_error) + goto exit; + return NULL; + } + + position = ts_stack_position(self->stack, version).bytes; + if (position > last_position || + (version > 0 && position == last_position)) + { + last_position = position; + break; + } + } + } + + // After advancing each version of the stack, re-sort the versions by + // their cost, removing any versions that are no longer worth pursuing. + unsigned min_error_cost = ts_parser__condense_stack(self); + + // If there's already a finished parse tree that's better than any + // in-progress version, then terminate parsing. Clear the parse stack to + // remove any extra references to subtrees within the finished tree, + // ensuring that these subtrees can be safely mutated in-place for + // rebalancing. + if (self->finished_tree.ptr && + ts_subtree_error_cost(self->finished_tree) < min_error_cost) + { + ts_stack_clear(self->stack); + break; + } + + while (self->included_range_difference_index < + self->included_range_differences.size) + { + TSRange *range = + &self->included_range_differences + .contents[self->included_range_difference_index]; + if (range->end_byte <= position) + { + self->included_range_difference_index++; + } + else + { + break; + } + } + } while (version_count != 0); + + assert(self->finished_tree.ptr); + ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); + + result = ts_tree_new(self->finished_tree, self->language, + self->lexer.included_ranges, + self->lexer.included_range_count); + self->finished_tree = NULL_SUBTREE; + +exit: + ts_parser_reset(self); + return result; +} + +TSTree *ts_parser_parse_string_encoding(TSParser *self, const TSTree *old_tree, + const char *string, uint32_t length, + TSInputEncoding encoding); + +TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree, + const char *string, uint32_t length) +{ + return ts_parser_parse_string_encoding(self, old_tree, string, length, + TSInputEncodingUTF8); +} + +TSTree *ts_parser_parse_string_encoding(TSParser *self, const TSTree *old_tree, + const char *string, uint32_t length, + TSInputEncoding encoding) +{ + TSStringInput input = {string, length}; + return ts_parser_parse(self, old_tree, + (TSInput){ + &input, + ts_string_input_read, + encoding, + }); +} + +/* + * Stream - A sequence of unicode characters derived from a UTF8 string. + * This struct is used in parsing queries from S-expressions. + */ +typedef struct +{ + const char *input; + const char *start; + const char *end; + int32_t next; + uint8_t next_size; +} Stream; + +/* + * QueryStep - A step in the process of matching a query. Each node within + * a query S-expression corresponds to one of these steps. An entire pattern + * is represented as a sequence of these steps. The basic properties of a + * node are represented by these fields: + * - `symbol` - The grammar symbol to match. A zero value represents the + * wildcard symbol, '_'. + * - `field` - The field name to match. A zero value means that a field name + * was not specified. + * - `capture_ids` - An array of integers representing the names of captures + * associated with this node in the pattern, terminated by a `NONE` value. + * - `depth` - The depth where this node occurs in the pattern. The root node + * of the pattern has depth zero. + * - `negated_field_list_id` - An id representing a set of fields that must + * not be present on a node matching this step. + * + * Steps have some additional fields in order to handle the `.` (or "anchor") + * operator, which forbids additional child nodes: + * - `is_immediate` - Indicates that the node matching this step cannot be + * preceded by other sibling nodes that weren't specified in the pattern. + * - `is_last_child` - Indicates that the node matching this step cannot have + * any subsequent named siblings. + * + * For simple patterns, steps are matched in sequential order. But in order to + * handle alternative/repeated/optional sub-patterns, query steps are not always + * structured as a linear sequence; they sometimes need to split and merge. This + * is done using the following fields: + * - `alternative_index` - The index of a different query step that serves as + * an alternative to this step. A `NONE` value represents no alternative. + * When a query state reaches a step with an alternative index, the state + * is duplicated, with one copy remaining at the original step, and one copy + * moving to the alternative step. The alternative may have its own + * alternative step, so this splitting is an iterative process. + * - `is_dead_end` - Indicates that this state cannot be passed directly, and + * exists only in order to redirect to an alternative index, with no + * splitting. + * - `is_pass_through` - Indicates that state has no matching logic of its own, + * and exists only to split a state. One copy of the state advances + * immediately to the next step, and one moves to the alternative step. + * - `alternative_is_immediate` - Indicates that this step's alternative step + * should be treated as if `is_immediate` is true. + * + * Steps also store some derived state that summarizes how they relate to other + * steps within the same pattern. This is used to optimize the matching process: + * - `contains_captures` - Indicates that this step or one of its child steps + * has a non-empty `capture_ids` list. + * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then + * it and all of its subsequent sibling steps within the same parent pattern + * are guaranteed to match. + * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but + * for the entire top-level pattern. When iterating through a query's + * captures using `ts_query_cursor_next_capture`, this field is used to + * detect that a capture can safely be returned from a match that has not + * even completed yet. + */ +typedef struct +{ + TSSymbol symbol; + TSSymbol supertype_symbol; + TSFieldId field; + uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; + uint16_t depth; + uint16_t alternative_index; + uint16_t negated_field_list_id; + bool is_named : 1; + bool is_immediate : 1; + bool is_last_child : 1; + bool is_pass_through : 1; + bool is_dead_end : 1; + bool alternative_is_immediate : 1; + bool contains_captures : 1; + bool root_pattern_guaranteed : 1; + bool parent_pattern_guaranteed : 1; +} QueryStep; + +/* + * Slice - A slice of an external array. Within a query, capture names, + * literal string values, and predicate step information are stored in three + * contiguous arrays. Individual captures, string values, and predicates are + * represented as slices of these three arrays. + */ +typedef struct +{ + uint32_t offset; + uint32_t length; +} Slice; + +/* + * SymbolTable - a two-way mapping of strings to ids. + */ +typedef struct +{ + Array(char) characters; + Array(Slice) slices; +} SymbolTable; + +/** + * CaptureQuantififers - a data structure holding the quantifiers of pattern + * captures. + */ +typedef Array(uint8_t) CaptureQuantifiers; + +/* + * PatternEntry - Information about the starting point for matching a particular + * pattern. These entries are stored in a 'pattern map' - a sorted array that + * makes it possible to efficiently lookup patterns based on the symbol for + * their first step. The entry consists of the following fields: + * - `pattern_index` - the index of the pattern within the query + * - `step_index` - the index of the pattern's first step in the shared `steps` + * array + * - `is_rooted` - whether or not the pattern has a single root node. This + * property affects decisions about whether or not to start the pattern for + * nodes outside of a QueryCursor's range restriction. + */ +typedef struct +{ + uint16_t step_index; + uint16_t pattern_index; + bool is_rooted; +} PatternEntry; + +typedef struct +{ + Slice steps; + Slice predicate_steps; + uint32_t start_byte; + bool is_non_local; +} QueryPattern; + +typedef struct +{ + uint32_t byte_offset; + uint16_t step_index; +} StepOffset; + +/* + * QueryState - The state of an in-progress match of a particular pattern + * in a query. While executing, a `TSQueryCursor` must keep track of a number + * of possible in-progress matches. Each of those possible matches is + * represented as one of these states. Fields: + * - `id` - A numeric id that is exposed to the public API. This allows the + * caller to remove a given match, preventing any more of its captures + * from being returned. + * - `start_depth` - The depth in the tree where the first step of the state's + * pattern was matched. + * - `pattern_index` - The pattern that the state is matching. + * - `consumed_capture_count` - The number of captures from this match that + * have already been returned. + * - `capture_list_id` - A numeric id that can be used to retrieve the state's + * list of captures from the `CaptureListPool`. + * - `seeking_immediate_match` - A flag that indicates that the state's next + * step must be matched by the very next sibling. This is used when + * processing repetitions. + * - `has_in_progress_alternatives` - A flag that indicates that there is are + * other states that have the same captures as this state, but are at + * different steps in their pattern. This means that in order to obey the + * 'longest-match' rule, this state should not be returned as a match until + * it is clear that there can be no other alternative match with more + * captures. + */ +typedef struct +{ + uint32_t id; + uint32_t capture_list_id; + uint16_t start_depth; + uint16_t step_index; + uint16_t pattern_index; + uint16_t consumed_capture_count : 12; + bool seeking_immediate_match : 1; + bool has_in_progress_alternatives : 1; + bool dead : 1; + bool needs_parent : 1; +} QueryState; + +typedef Array(TSQueryCapture) CaptureList; + +/* + * CaptureListPool - A collection of *lists* of captures. Each query state needs + * to maintain its own list of captures. To avoid repeated allocations, this + * struct maintains a fixed set of capture lists, and keeps track of which ones + * are currently in use by a query state. + */ +typedef struct +{ + Array(CaptureList) list; + CaptureList empty_list; + // The maximum number of capture lists that we are allowed to allocate. We + // never allow `list` to allocate more entries than this, dropping pending + // matches if needed to stay under the limit. + uint32_t max_capture_list_count; + // The number of capture lists allocated in `list` that are not currently in + // use. We reuse those existing-but-unused capture lists before trying to + // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture + // list's length to indicate that it's not in use. + uint32_t free_capture_list_count; +} CaptureListPool; + +/* + * AnalysisState - The state needed for walking the parse table when analyzing + * a query pattern, to determine at which steps the pattern might fail to match. + */ +typedef struct +{ + TSStateId parse_state; + TSSymbol parent_symbol; + uint16_t child_index; + TSFieldId field_id : 15; + bool done : 1; +} AnalysisStateEntry; + +typedef struct +{ + AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; + uint16_t depth; + uint16_t step_index; + TSSymbol root_symbol; +} AnalysisState; + +typedef Array(AnalysisState *) AnalysisStateSet; + +typedef struct +{ + AnalysisStateSet states; + AnalysisStateSet next_states; + AnalysisStateSet deeper_states; + AnalysisStateSet state_pool; + Array(uint16_t) final_step_indices; + Array(TSSymbol) finished_parent_symbols; + bool did_abort; +} QueryAnalysis; + +/* + * AnalysisSubgraph - A subset of the states in the parse table that are used + * in constructing nodes with a certain symbol. Each state is accompanied by + * some information about the possible node that could be produced in + * downstream states. + */ +typedef struct +{ + TSStateId state; + uint16_t production_id; + uint8_t child_index : 7; + bool done : 1; +} AnalysisSubgraphNode; + +typedef struct +{ + TSSymbol symbol; + Array(TSStateId) start_states; + Array(AnalysisSubgraphNode) nodes; +} AnalysisSubgraph; + +typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; + +/* + * StatePredecessorMap - A map that stores the predecessors of each parse state. + * This is used during query analysis to determine which parse states can lead + * to which reduce actions. + */ +typedef struct +{ + TSStateId *contents; +} StatePredecessorMap; + +/* + * TSQuery - A tree query, compiled from a string of S-expressions. The query + * itself is immutable. The mutable state used in the process of executing the + * query is stored in a `TSQueryCursor`. + */ +struct TSQuery +{ + SymbolTable captures; + SymbolTable predicate_values; + Array(CaptureQuantifiers) capture_quantifiers; + Array(QueryStep) steps; + Array(PatternEntry) pattern_map; + Array(TSQueryPredicateStep) predicate_steps; + Array(QueryPattern) patterns; + Array(StepOffset) step_offsets; + Array(TSFieldId) negated_fields; + Array(char) string_buffer; + Array(TSSymbol) repeat_symbols_with_rootless_patterns; + const TSLanguage *language; + uint16_t wildcard_root_pattern_count; +}; + +/* + * TSQueryCursor - A stateful struct used to execute a query on a tree. + */ +struct TSQueryCursor +{ + const TSQuery *query; + TSTreeCursor cursor; + Array(QueryState) states; + Array(QueryState) finished_states; + CaptureListPool capture_list_pool; + uint32_t depth; + uint32_t max_start_depth; + uint32_t start_byte; + uint32_t end_byte; + TSPoint start_point; + TSPoint end_point; + uint32_t next_state_id; + bool on_visible_node; + bool ascending; + bool halted; + bool did_exceed_match_limit; +}; + +static const TSQueryError PARENT_DONE = -1; +static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; +static const uint16_t NONE = UINT16_MAX; +static const TSSymbol WILDCARD_SYMBOL = 0; + +/********** + * Stream + **********/ + +// Advance to the next unicode code point in the stream. +static bool stream_advance(Stream *self) +{ + self->input += self->next_size; + if (self->input < self->end) + { + uint32_t size = + ascii_decode((const uint8_t *)self->input, + (uint32_t)(self->end - self->input), &self->next); + if (size > 0) + { + self->next_size = size; + return true; + } + } + else + { + self->next_size = 0; + self->next = '\0'; + } + return false; +} + +// Reset the stream to the given input position, represented as a pointer +// into the input string. +static void stream_reset(Stream *self, const char *input) +{ + self->input = input; + self->next_size = 0; + stream_advance(self); +} + +static Stream stream_new(const char *string, uint32_t length) +{ + Stream self = { + .next = 0, + .input = string, + .start = string, + .end = string + length, + }; + stream_advance(&self); + return self; +} + +static void stream_skip_whitespace(Stream *self) +{ + for (;;) + { + if (isspace(self->next)) + { + stream_advance(self); + } + else if (self->next == ';') + { + // skip over comments + stream_advance(self); + while (self->next && self->next != '\n') + { + if (!stream_advance(self)) + break; + } + } + else + { + break; + } + } +} + +static bool stream_is_ident_start(Stream *self) +{ + return isalnum(self->next) || self->next == '_' || self->next == '-'; +} + +static void stream_scan_identifier(Stream *stream) +{ + do + { + stream_advance(stream); + } while (isalnum(stream->next) || stream->next == '_' || + stream->next == '-' || stream->next == '.' || + stream->next == '?' || stream->next == '!'); +} + +static uint32_t stream_offset(Stream *self) +{ + return (uint32_t)(self->input - self->start); +} + +/****************** + * CaptureListPool + ******************/ + +static CaptureListPool capture_list_pool_new(void) +{ + return (CaptureListPool){ + .list = array_new(), + .empty_list = array_new(), + .max_capture_list_count = UINT32_MAX, + .free_capture_list_count = 0, + }; +} + +static void capture_list_pool_reset(CaptureListPool *self) +{ + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) + { + // This invalid size means that the list is not in use. + self->list.contents[i].size = UINT32_MAX; + } + self->free_capture_list_count = self->list.size; +} + +static void capture_list_pool_delete(CaptureListPool *self) +{ + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) + { + array_delete(&self->list.contents[i]); + } + array_delete(&self->list); +} + +static const CaptureList *capture_list_pool_get(const CaptureListPool *self, + uint16_t id) +{ + if (id >= self->list.size) + return &self->empty_list; + return &self->list.contents[id]; +} + +static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, + uint16_t id) +{ + assert(id < self->list.size); + return &self->list.contents[id]; +} + +static bool capture_list_pool_is_empty(const CaptureListPool *self) +{ + // The capture list pool is empty if all allocated lists are in use, and we + // have reached the maximum allowed number of allocated lists. + return self->free_capture_list_count == 0 && + self->list.size >= self->max_capture_list_count; +} + +static uint16_t capture_list_pool_acquire(CaptureListPool *self) +{ + // First see if any already allocated capture list is currently unused. + if (self->free_capture_list_count > 0) + { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) + { + if (self->list.contents[i].size == UINT32_MAX) + { + array_clear(&self->list.contents[i]); + self->free_capture_list_count--; + return i; + } + } + } + + // Otherwise allocate and initialize a new capture list, as long as that + // doesn't put us over the requested maximum. + uint32_t i = self->list.size; + if (i >= self->max_capture_list_count) + { + return NONE; + } + CaptureList list; + array_init(&list); + array_push(&self->list, list); + return i; +} + +static void capture_list_pool_release(CaptureListPool *self, uint16_t id) +{ + if (id >= self->list.size) + return; + self->list.contents[id].size = UINT32_MAX; + self->free_capture_list_count++; +} + +/************** + * Quantifiers + **************/ + +static TSQuantifier quantifier_mul(TSQuantifier left, TSQuantifier right) +{ + switch (left) + { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + switch (right) + { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierZeroOrMore: + switch (right) + { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierOne: + return right; + case TSQuantifierOneOrMore: + switch (right) + { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be + // covered above! +} + +static TSQuantifier quantifier_join(TSQuantifier left, TSQuantifier right) +{ + switch (left) + { + case TSQuantifierZero: + switch (right) + { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierZeroOrOne: + switch (right) + { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + break; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + break; + }; + break; + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + switch (right) + { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + return TSQuantifierOne; + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOneOrMore: + switch (right) + { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be + // covered above! +} + +static TSQuantifier quantifier_add(TSQuantifier left, TSQuantifier right) +{ + switch (left) + { + case TSQuantifierZero: + return right; + case TSQuantifierZeroOrOne: + switch (right) + { + case TSQuantifierZero: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierZeroOrMore: + switch (right) + { + case TSQuantifierZero: + return TSQuantifierZeroOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOne: + switch (right) + { + case TSQuantifierZero: + return TSQuantifierOne; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be + // covered above! +} + +// Create new capture quantifiers structure +static CaptureQuantifiers capture_quantifiers_new(void) +{ + return (CaptureQuantifiers)array_new(); +} + +// Delete capture quantifiers structure +static void capture_quantifiers_delete(CaptureQuantifiers *self) +{ + array_delete(self); +} + +// Clear capture quantifiers structure +static void capture_quantifiers_clear(CaptureQuantifiers *self) +{ + array_clear(self); +} + +// Replace capture quantifiers with the given quantifiers +static void capture_quantifiers_replace(CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers) +{ + array_clear(self); + array_push_all(self, quantifiers); +} + +// Return capture quantifier for the given capture id +static TSQuantifier capture_quantifier_for_id(const CaptureQuantifiers *self, + uint16_t id) +{ + return (self->size <= id) ? TSQuantifierZero + : (TSQuantifier)*array_get(self, id); +} + +// Add the given quantifier to the current value for id +static void capture_quantifiers_add_for_id(CaptureQuantifiers *self, + uint16_t id, TSQuantifier quantifier) +{ + if (self->size <= id) + { + array_grow_by(self, id + 1 - self->size); + } + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = + (uint8_t)quantifier_add((TSQuantifier)*own_quantifier, quantifier); +} + +// Point-wise add the given quantifiers to the current values +static void capture_quantifiers_add_all(CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers) +{ + if (self->size < quantifiers->size) + { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) + { + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t)quantifier_add((TSQuantifier)*own_quantifier, + (TSQuantifier)*quantifier); + } +} + +// Join the given quantifier with the current values +static void capture_quantifiers_mul(CaptureQuantifiers *self, + TSQuantifier quantifier) +{ + for (uint16_t id = 0; id < (uint16_t)self->size; id++) + { + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = + (uint8_t)quantifier_mul((TSQuantifier)*own_quantifier, quantifier); + } +} + +// Point-wise join the quantifiers from a list of alternatives with the current +// values +static void capture_quantifiers_join_all(CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers) +{ + if (self->size < quantifiers->size) + { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint32_t id = 0; id < quantifiers->size; id++) + { + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t)quantifier_join( + (TSQuantifier)*own_quantifier, (TSQuantifier)*quantifier); + } + for (uint32_t id = quantifiers->size; id < self->size; id++) + { + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t)quantifier_join( + (TSQuantifier)*own_quantifier, TSQuantifierZero); + } +} + +/************** + * SymbolTable + **************/ + +static SymbolTable symbol_table_new(void) +{ + return (SymbolTable){ + .characters = array_new(), + .slices = array_new(), + }; +} + +static void symbol_table_delete(SymbolTable *self) +{ + array_delete(&self->characters); + array_delete(&self->slices); +} + +static int symbol_table_id_for_name(const SymbolTable *self, const char *name, + uint32_t length) +{ + for (unsigned i = 0; i < self->slices.size; i++) + { + Slice slice = self->slices.contents[i]; + if (slice.length == length && + !strncmp(&self->characters.contents[slice.offset], name, length)) + return i; + } + return -1; +} + +static const char *symbol_table_name_for_id(const SymbolTable *self, + uint16_t id, uint32_t *length) +{ + Slice slice = self->slices.contents[id]; + *length = slice.length; + return &self->characters.contents[slice.offset]; +} + +static uint16_t symbol_table_insert_name(SymbolTable *self, const char *name, + uint32_t length) +{ + int id = symbol_table_id_for_name(self, name, length); + if (id >= 0) + return (uint16_t)id; + Slice slice = { + .offset = self->characters.size, + .length = length, + }; + array_grow_by(&self->characters, length + 1); + memcpy(&self->characters.contents[slice.offset], name, length); + self->characters.contents[self->characters.size - 1] = 0; + array_push(&self->slices, slice); + return self->slices.size - 1; +} + +/************ + * QueryStep + ************/ + +static QueryStep query_step__new(TSSymbol symbol, uint16_t depth, + bool is_immediate) +{ + QueryStep step = { + .symbol = symbol, + .depth = depth, + .field = 0, + .alternative_index = NONE, + .negated_field_list_id = 0, + .contains_captures = false, + .is_last_child = false, + .is_named = false, + .is_pass_through = false, + .is_dead_end = false, + .root_pattern_guaranteed = false, + .is_immediate = is_immediate, + .alternative_is_immediate = false, + }; + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) + { + step.capture_ids[i] = NONE; + } + return step; +} + +static void query_step__add_capture(QueryStep *self, uint16_t capture_id) +{ + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) + { + if (self->capture_ids[i] == NONE) + { + self->capture_ids[i] = capture_id; + break; + } + } +} + +static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) +{ + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) + { + if (self->capture_ids[i] == capture_id) + { + self->capture_ids[i] = NONE; + while (i + 1 < MAX_STEP_CAPTURE_COUNT) + { + if (self->capture_ids[i + 1] == NONE) + break; + self->capture_ids[i] = self->capture_ids[i + 1]; + self->capture_ids[i + 1] = NONE; + i++; + } + break; + } + } +} + +/********************** + * StatePredecessorMap + **********************/ + +static inline StatePredecessorMap state_predecessor_map_new( + const TSLanguage *language) +{ + return (StatePredecessorMap){ + .contents = calloc((size_t)language->state_count * + (MAX_STATE_PREDECESSOR_COUNT + 1), + sizeof(TSStateId)), + }; +} + +static inline void state_predecessor_map_delete(StatePredecessorMap *self) +{ + free(self->contents); +} + +static inline void state_predecessor_map_add(StatePredecessorMap *self, + TSStateId state, + TSStateId predecessor) +{ + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); + TSStateId *count = &self->contents[index]; + if (*count == 0 || (*count < MAX_STATE_PREDECESSOR_COUNT && + self->contents[index + *count] != predecessor)) + { + (*count)++; + self->contents[index + *count] = predecessor; + } +} + +static inline const TSStateId *state_predecessor_map_get( + const StatePredecessorMap *self, TSStateId state, unsigned *count) +{ + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); + *count = self->contents[index]; + return &self->contents[index + 1]; +} + +/**************** + * AnalysisState + ****************/ + +static unsigned analysis_state__recursion_depth(const AnalysisState *self) +{ + unsigned result = 0; + for (unsigned i = 0; i < self->depth; i++) + { + TSSymbol symbol = self->stack[i].parent_symbol; + for (unsigned j = 0; j < i; j++) + { + if (self->stack[j].parent_symbol == symbol) + { + result++; + break; + } + } + } + return result; +} + +static inline int analysis_state__compare_position(AnalysisState *const *self, + AnalysisState *const *other) +{ + for (unsigned i = 0; i < (*self)->depth; i++) + { + if (i >= (*other)->depth) + return -1; + if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) + return -1; + if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) + return 1; + } + if ((*self)->depth < (*other)->depth) + return 1; + if ((*self)->step_index < (*other)->step_index) + return -1; + if ((*self)->step_index > (*other)->step_index) + return 1; + return 0; +} + +static inline int analysis_state__compare(AnalysisState *const *self, + AnalysisState *const *other) +{ + int result = analysis_state__compare_position(self, other); + if (result != 0) + return result; + for (unsigned i = 0; i < (*self)->depth; i++) + { + if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) + return -1; + if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) + return 1; + if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) + return -1; + if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) + return 1; + if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) + return -1; + if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) + return 1; + } + return 0; +} + +static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) +{ + if (self->depth == 0) + { + return &self->stack[0]; + } + return &self->stack[self->depth - 1]; +} + +static inline bool analysis_state__has_supertype(AnalysisState *self, + TSSymbol symbol) +{ + for (unsigned i = 0; i < self->depth; i++) + { + if (self->stack[i].parent_symbol == symbol) + return true; + } + return false; +} + +/****************** + * AnalysisStateSet + ******************/ + +// Obtains an `AnalysisState` instance, either by consuming one from this set's +// object pool, or by cloning one from scratch. +static inline AnalysisState *analysis_state_pool__clone_or_reuse( + AnalysisStateSet *self, AnalysisState *borrowed_item) +{ + AnalysisState *new_item; + if (self->size) + { + new_item = array_pop(self); + } + else + { + new_item = malloc(sizeof(AnalysisState)); + } + *new_item = *borrowed_item; + return new_item; +} + +// Inserts a clone of the passed-in item at the appropriate position to maintain +// ordering in this set. The set does not contain duplicates, so if the item is +// already present, it will not be inserted, and no clone will be made. +// +// The caller retains ownership of the passed-in memory. However, the clone that +// is created by this function will be managed by the state set. +static inline void analysis_state_set__insert_sorted( + AnalysisStateSet *self, AnalysisStateSet *pool, + AnalysisState *borrowed_item) +{ + unsigned index, exists; + array_search_sorted_with(self, analysis_state__compare, &borrowed_item, + &index, &exists); + if (!exists) + { + AnalysisState *new_item = + analysis_state_pool__clone_or_reuse(pool, borrowed_item); + array_insert(self, index, new_item); + } +} + +// Inserts a clone of the passed-in item at the end position of this list. +// +// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison +// function `analysis_state__compare`) than largest item already in this set. If +// items are inserted in the wrong order, the set will not function properly for +// future use. +// +// The caller retains ownership of the passed-in memory. However, the clone that +// is created by this function will be managed by the state set. +static inline void analysis_state_set__push(AnalysisStateSet *self, + AnalysisStateSet *pool, + AnalysisState *borrowed_item) +{ + AnalysisState *new_item = + analysis_state_pool__clone_or_reuse(pool, borrowed_item); + array_push(self, new_item); +} + +// Removes all items from this set, returning it to an empty state. +static inline void analysis_state_set__clear(AnalysisStateSet *self, + AnalysisStateSet *pool) +{ + array_push_all(pool, self); + array_clear(self); +} + +// Releases all memory that is managed with this state set, including any items +// currently present. After calling this function, the set is no longer suitable +// for use. +static inline void analysis_state_set__delete(AnalysisStateSet *self) +{ + for (unsigned i = 0; i < self->size; i++) + { + free(self->contents[i]); + } + array_delete(self); +} + +/**************** + * QueryAnalyzer + ****************/ + +static inline QueryAnalysis query_analysis__new(void) +{ + return (QueryAnalysis){ + .states = array_new(), + .next_states = array_new(), + .deeper_states = array_new(), + .state_pool = array_new(), + .final_step_indices = array_new(), + .finished_parent_symbols = array_new(), + .did_abort = false, + }; +} + +static inline void query_analysis__delete(QueryAnalysis *self) +{ + analysis_state_set__delete(&self->states); + analysis_state_set__delete(&self->next_states); + analysis_state_set__delete(&self->deeper_states); + analysis_state_set__delete(&self->state_pool); + array_delete(&self->final_step_indices); + array_delete(&self->finished_parent_symbols); +} + +/*********************** + * AnalysisSubgraphNode + ***********************/ + +static inline int analysis_subgraph_node__compare( + const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) +{ + if (self->state < other->state) + return -1; + if (self->state > other->state) + return 1; + if (self->child_index < other->child_index) + return -1; + if (self->child_index > other->child_index) + return 1; + if (self->done < other->done) + return -1; + if (self->done > other->done) + return 1; + if (self->production_id < other->production_id) + return -1; + if (self->production_id > other->production_id) + return 1; + return 0; +} + +/********* + * Query + *********/ + +// The `pattern_map` contains a mapping from TSSymbol values to indices in the +// `steps` array. For a given syntax node, the `pattern_map` makes it possible +// to quickly find the starting steps of all of the patterns whose root matches +// that node. Each entry has two fields: a `pattern_index`, which identifies one +// of the patterns in the query, and a `step_index`, which indicates the start +// offset of that pattern's steps within the `steps` array. +// +// The entries are sorted by the patterns' root symbols, and lookups use a +// binary search. This ensures that the cost of this initial lookup step +// scales logarithmically with the number of patterns in the query. +// +// This returns `true` if the symbol is present and `false` otherwise. +// If the symbol is not present `*result` is set to the index where the +// symbol should be inserted. +static inline bool ts_query__pattern_map_search(const TSQuery *self, + TSSymbol needle, + uint32_t *result) +{ + uint32_t base_index = self->wildcard_root_pattern_count; + uint32_t size = self->pattern_map.size - base_index; + if (size == 0) + { + *result = base_index; + return false; + } + while (size > 1) + { + uint32_t half_size = size / 2; + uint32_t mid_index = base_index + half_size; + TSSymbol mid_symbol = + self->steps + .contents[self->pattern_map.contents[mid_index].step_index] + .symbol; + if (needle > mid_symbol) + base_index = mid_index; + size -= half_size; + } + + TSSymbol symbol = + self->steps.contents[self->pattern_map.contents[base_index].step_index] + .symbol; + + if (needle > symbol) + { + base_index++; + if (base_index < self->pattern_map.size) + { + symbol = + self->steps + .contents[self->pattern_map.contents[base_index].step_index] + .symbol; + } + } + + *result = base_index; + return needle == symbol; +} + +// Insert a new pattern's start index into the pattern map, maintaining +// the pattern map's ordering invariant. +static inline void ts_query__pattern_map_insert(TSQuery *self, TSSymbol symbol, + PatternEntry new_entry) +{ + uint32_t index; + ts_query__pattern_map_search(self, symbol, &index); + + // Ensure that the entries are sorted not only by symbol, but also + // by pattern_index. This way, states for earlier patterns will be + // initiated first, which allows the ordering of the states array + // to be maintained more efficiently. + while (index < self->pattern_map.size) + { + PatternEntry *entry = &self->pattern_map.contents[index]; + if (self->steps.contents[entry->step_index].symbol == symbol && + entry->pattern_index < new_entry.pattern_index) + { + index++; + } + else + { + break; + } + } + + array_insert(&self->pattern_map, index, new_entry); +} + +// Walk the subgraph for this non-terminal, tracking all of the possible +// sequences of progress within the pattern. +static void ts_query__perform_analysis(TSQuery *self, + const AnalysisSubgraphArray *subgraphs, + QueryAnalysis *analysis) +{ + unsigned recursion_depth_limit = 0; + unsigned prev_final_step_count = 0; + array_clear(&analysis->final_step_indices); + array_clear(&analysis->finished_parent_symbols); + + for (unsigned iteration = 0;; iteration++) + { + if (iteration == MAX_ANALYSIS_ITERATION_COUNT) + { + analysis->did_abort = true; + break; + } + +#ifdef DEBUG_ANALYZE_QUERY + printf("Iteration: %u. Final step indices:", iteration); + for (unsigned j = 0; j < analysis->final_step_indices.size; j++) + { + printf(" %4u", analysis->final_step_indices.contents[j]); + } + printf("\n"); + for (unsigned j = 0; j < analysis->states.size; j++) + { + AnalysisState *state = analysis->states.contents[j]; + printf(" %3u: step: %u, stack: [", j, state->step_index); + for (unsigned k = 0; k < state->depth; k++) + { + printf( + " {%s, child: %u, state: %4u", + self->language->symbol_names[state->stack[k].parent_symbol], + state->stack[k].child_index, state->stack[k].parse_state); + if (state->stack[k].field_id) + printf( + ", field: %s", + self->language->field_names[state->stack[k].field_id]); + if (state->stack[k].done) + printf(", DONE"); + printf("}"); + } + printf(" ]\n"); + } +#endif + + // If no further progress can be made within the current recursion depth + // limit, then bump the depth limit by one, and continue to process the + // states the exceeded the limit. But only allow this if progress has + // been made since the last time the depth limit was increased. + if (analysis->states.size == 0) + { + if (analysis->deeper_states.size > 0 && + analysis->final_step_indices.size > prev_final_step_count) + { +#ifdef DEBUG_ANALYZE_QUERY + printf("Increase recursion depth limit to %u\n", + recursion_depth_limit + 1); +#endif + + prev_final_step_count = analysis->final_step_indices.size; + recursion_depth_limit++; + AnalysisStateSet _states = analysis->states; + analysis->states = analysis->deeper_states; + analysis->deeper_states = _states; + continue; + } + + break; + } + + analysis_state_set__clear(&analysis->next_states, + &analysis->state_pool); + for (unsigned j = 0; j < analysis->states.size; j++) + { + AnalysisState *const state = analysis->states.contents[j]; + + // For efficiency, it's important to avoid processing the same + // analysis state more than once. To achieve this, keep the states + // in order of ascending position within their hypothetical syntax + // trees. In each iteration of this loop, start by advancing the + // states that have made the least progress. Avoid advancing states + // that have already made more progress. + if (analysis->next_states.size > 0) + { + int comparison = analysis_state__compare_position( + &state, array_back(&analysis->next_states)); + if (comparison == 0) + { + analysis_state_set__insert_sorted( + &analysis->next_states, &analysis->state_pool, state); + continue; + } + else if (comparison > 0) + { +#ifdef DEBUG_ANALYZE_QUERY + printf("Terminate iteration at state %u\n", j); +#endif + while (j < analysis->states.size) + { + analysis_state_set__push(&analysis->next_states, + &analysis->state_pool, + analysis->states.contents[j]); + j++; + } + break; + } + } + + const TSStateId parse_state = + analysis_state__top(state)->parse_state; + const TSSymbol parent_symbol = + analysis_state__top(state)->parent_symbol; + const TSFieldId parent_field_id = + analysis_state__top(state)->field_id; + const unsigned child_index = + analysis_state__top(state)->child_index; + const QueryStep *const step = + &self->steps.contents[state->step_index]; + + unsigned subgraph_index, exists; + array_search_sorted_by(subgraphs, .symbol, parent_symbol, + &subgraph_index, &exists); + if (!exists) + continue; + const AnalysisSubgraph *subgraph = + &subgraphs->contents[subgraph_index]; + + // Follow every possible path in the parse table, but only visit + // states that are part of the subgraph for the current symbol. + LookaheadIterator lookahead_iterator = + ts_language_lookaheads(self->language, parse_state); + while (ts_lookahead_iterator__next(&lookahead_iterator)) + { + TSSymbol sym = lookahead_iterator.symbol; + + AnalysisSubgraphNode successor = { + .state = parse_state, + .child_index = child_index, + }; + if (lookahead_iterator.action_count) + { + const TSParseAction *action = + &lookahead_iterator + .actions[lookahead_iterator.action_count - 1]; + if (action->type == TSParseActionTypeShift) + { + if (!action->shift.extra) + { + successor.state = action->shift.state; + successor.child_index++; + } + } + else + { + continue; + } + } + else if (lookahead_iterator.next_state != 0) + { + successor.state = lookahead_iterator.next_state; + successor.child_index++; + } + else + { + continue; + } + + unsigned node_index; + array_search_sorted_with(&subgraph->nodes, + analysis_subgraph_node__compare, + &successor, &node_index, &exists); + while (node_index < subgraph->nodes.size) + { + AnalysisSubgraphNode *node = + &subgraph->nodes.contents[node_index++]; + if (node->state != successor.state || + node->child_index != successor.child_index) + break; + + // Use the subgraph to determine what alias and field will + // eventually be applied to this child node. + TSSymbol alias = ts_language_alias_at( + self->language, node->production_id, child_index); + TSSymbol visible_symbol = + alias ? alias + : self->language->symbol_metadata[sym].visible + ? self->language->public_symbol_map[sym] + : 0; + TSFieldId field_id = parent_field_id; + if (!field_id) + { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->language, + node->production_id, &field_map, + &field_map_end); + for (; field_map != field_map_end; field_map++) + { + if (!field_map->inherited && + field_map->child_index == child_index) + { + field_id = field_map->field_id; + break; + } + } + } + + // Create a new state that has advanced past this + // hypothetical subtree. + AnalysisState next_state = *state; + AnalysisStateEntry *next_state_top = + analysis_state__top(&next_state); + next_state_top->child_index = successor.child_index; + next_state_top->parse_state = successor.state; + if (node->done) + next_state_top->done = true; + + // Determine if this hypothetical child node would match the + // current step of the query pattern. + bool does_match = false; + if (visible_symbol) + { + does_match = true; + if (step->symbol == WILDCARD_SYMBOL) + { + if (step->is_named && + !self->language->symbol_metadata[visible_symbol] + .named) + does_match = false; + } + else if (step->symbol != visible_symbol) + { + does_match = false; + } + if (step->field && step->field != field_id) + { + does_match = false; + } + if (step->supertype_symbol && + !analysis_state__has_supertype( + state, step->supertype_symbol)) + does_match = false; + } + + // If this child is hidden, then descend into it and walk + // through its children. If the top entry of the stack is at + // the end of its rule, then that entry can be replaced. + // Otherwise, push a new entry onto the stack. + else if (sym >= self->language->token_count) + { + if (!next_state_top->done) + { + if (next_state.depth + 1 >= + MAX_ANALYSIS_STATE_DEPTH) + { +#ifdef DEBUG_ANALYZE_QUERY + printf("Exceeded depth limit for state %u\n", + j); +#endif + + analysis->did_abort = true; + continue; + } + + next_state.depth++; + next_state_top = analysis_state__top(&next_state); + } + + *next_state_top = (AnalysisStateEntry){ + .parse_state = parse_state, + .parent_symbol = sym, + .child_index = 0, + .field_id = field_id, + .done = false, + }; + + if (analysis_state__recursion_depth(&next_state) > + recursion_depth_limit) + { + analysis_state_set__insert_sorted( + &analysis->deeper_states, &analysis->state_pool, + &next_state); + continue; + } + } + + // Pop from the stack when this state reached the end of its + // current syntax node. + while (next_state.depth > 0 && next_state_top->done) + { + next_state.depth--; + next_state_top = analysis_state__top(&next_state); + } + + // If this hypothetical child did match the current step of + // the query pattern, then advance to the next step at the + // current depth. This involves skipping over any descendant + // steps of the current child. + const QueryStep *next_step = step; + if (does_match) + { + for (;;) + { + next_state.step_index++; + next_step = + &self->steps.contents[next_state.step_index]; + if (next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= step->depth) + break; + } + } + else if (successor.state == parse_state) + { + continue; + } + + for (;;) + { + // Skip pass-through states. Although these states have + // alternatives, they are only used to implement + // repetitions, and query analysis does not need to + // process repetitions in order to determine whether + // steps are possible and definite. + if (next_step->is_pass_through) + { + next_state.step_index++; + next_step++; + continue; + } + + // If the pattern is finished or hypothetical parent + // node is complete, then record that matching can + // terminate at this step of the pattern. Otherwise, add + // this state to the list of states to process on the + // next iteration. + if (!next_step->is_dead_end) + { + bool did_finish_pattern = + self->steps.contents[next_state.step_index] + .depth != step->depth; + if (did_finish_pattern) + { + array_insert_sorted_by( + &analysis->finished_parent_symbols, , + state->root_symbol); + } + else if (next_state.depth == 0) + { + array_insert_sorted_by( + &analysis->final_step_indices, , + next_state.step_index); + } + else + { + analysis_state_set__insert_sorted( + &analysis->next_states, + &analysis->state_pool, &next_state); + } + } + + // If the state has advanced to a step with an + // alternative step, then add another state at that + // alternative step. This process is simpler than the + // process of actually matching a pattern during query + // execution, because for the purposes of query + // analysis, there is no need to process repetitions. + if (does_match && + next_step->alternative_index != NONE && + next_step->alternative_index > + next_state.step_index) + { + next_state.step_index = + next_step->alternative_index; + next_step = + &self->steps.contents[next_state.step_index]; + } + else + { + break; + } + } + } + } + } + + AnalysisStateSet _states = analysis->states; + analysis->states = analysis->next_states; + analysis->next_states = _states; + } +} + +static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) +{ + Array(uint16_t) non_rooted_pattern_start_steps = array_new(); + for (unsigned i = 0; i < self->pattern_map.size; i++) + { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (!pattern->is_rooted) + { + QueryStep *step = &self->steps.contents[pattern->step_index]; + if (step->symbol != WILDCARD_SYMBOL) + { + array_push(&non_rooted_pattern_start_steps, i); + } + } + } + + // Walk forward through all of the steps in the query, computing some + // basic information about each step. Mark all of the steps that contain + // captures, and record the indices of all of the steps that have child + // steps. + Array(uint32_t) parent_step_indices = array_new(); + for (unsigned i = 0; i < self->steps.size; i++) + { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) + { + step->parent_pattern_guaranteed = true; + step->root_pattern_guaranteed = true; + continue; + } + + bool has_children = false; + bool is_wildcard = step->symbol == WILDCARD_SYMBOL; + step->contains_captures = step->capture_ids[0] != NONE; + for (unsigned j = i + 1; j < self->steps.size; j++) + { + QueryStep *next_step = &self->steps.contents[j]; + if (next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= step->depth) + break; + if (next_step->capture_ids[0] != NONE) + { + step->contains_captures = true; + } + if (!is_wildcard) + { + next_step->root_pattern_guaranteed = true; + next_step->parent_pattern_guaranteed = true; + } + has_children = true; + } + + if (has_children && !is_wildcard) + { + array_push(&parent_step_indices, i); + } + } + + // For every parent symbol in the query, initialize an 'analysis subgraph'. + // This subgraph lists all of the states in the parse table that are + // directly involved in building subtrees for this symbol. + // + // In addition to the parent symbols in the query, construct subgraphs for + // all of the hidden symbols in the grammar, because these might occur + // within one of the parent nodes, such that their children appear to belong + // to the parent. + AnalysisSubgraphArray subgraphs = array_new(); + for (unsigned i = 0; i < parent_step_indices.size; i++) + { + uint32_t parent_step_index = parent_step_indices.contents[i]; + TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + AnalysisSubgraph subgraph = {.symbol = parent_symbol}; + array_insert_sorted_by(&subgraphs, .symbol, subgraph); + } + for (TSSymbol sym = (uint16_t)self->language->token_count; + sym < (uint16_t)self->language->symbol_count; sym++) + { + if (!ts_language_symbol_metadata(self->language, sym).visible) + { + AnalysisSubgraph subgraph = {.symbol = sym}; + array_insert_sorted_by(&subgraphs, .symbol, subgraph); + } + } + + // Scan the parse table to find the data needed to populate these subgraphs. + // Collect three things during this scan: + // 1) All of the parse states where one of these symbols can start. + // 2) All of the parse states where one of these symbols can end, along + // with information about the node that would be created. + // 3) A list of predecessor states for each state. + StatePredecessorMap predecessor_map = + state_predecessor_map_new(self->language); + for (TSStateId state = 1; state < (uint16_t)self->language->state_count; + state++) + { + unsigned subgraph_index, exists; + LookaheadIterator lookahead_iterator = + ts_language_lookaheads(self->language, state); + while (ts_lookahead_iterator__next(&lookahead_iterator)) + { + if (lookahead_iterator.action_count) + { + for (unsigned i = 0; i < lookahead_iterator.action_count; i++) + { + const TSParseAction *action = + &lookahead_iterator.actions[i]; + if (action->type == TSParseActionTypeReduce) + { + const TSSymbol *aliases, *aliases_end; + ts_language_aliases_for_symbol(self->language, + action->reduce.symbol, + &aliases, &aliases_end); + for (const TSSymbol *symbol = aliases; + symbol < aliases_end; symbol++) + { + array_search_sorted_by(&subgraphs, .symbol, *symbol, + &subgraph_index, &exists); + if (exists) + { + AnalysisSubgraph *subgraph = + &subgraphs.contents[subgraph_index]; + if (subgraph->nodes.size == 0 || + array_back(&subgraph->nodes)->state != + state) + { + array_push( + &subgraph->nodes, + ((AnalysisSubgraphNode){ + .state = state, + .production_id = + action->reduce.production_id, + .child_index = + action->reduce.child_count, + .done = true, + })); + } + } + } + } + else if (action->type == TSParseActionTypeShift && + !action->shift.extra) + { + TSStateId next_state = action->shift.state; + state_predecessor_map_add(&predecessor_map, next_state, + state); + } + } + } + else if (lookahead_iterator.next_state != 0) + { + if (lookahead_iterator.next_state != state) + { + state_predecessor_map_add( + &predecessor_map, lookahead_iterator.next_state, state); + } + if (ts_language_state_is_primary(self->language, state)) + { + const TSSymbol *aliases, *aliases_end; + ts_language_aliases_for_symbol(self->language, + lookahead_iterator.symbol, + &aliases, &aliases_end); + for (const TSSymbol *symbol = aliases; symbol < aliases_end; + symbol++) + { + array_search_sorted_by(&subgraphs, .symbol, *symbol, + &subgraph_index, &exists); + if (exists) + { + AnalysisSubgraph *subgraph = + &subgraphs.contents[subgraph_index]; + if (subgraph->start_states.size == 0 || + *array_back(&subgraph->start_states) != state) + array_push(&subgraph->start_states, state); + } + } + } + } + } + } + + // For each subgraph, compute the preceding states by walking backward + // from the end states using the predecessor map. + Array(AnalysisSubgraphNode) next_nodes = array_new(); + for (unsigned i = 0; i < subgraphs.size; i++) + { + AnalysisSubgraph *subgraph = &subgraphs.contents[i]; + if (subgraph->nodes.size == 0) + { + array_delete(&subgraph->start_states); + array_erase(&subgraphs, i); + i--; + continue; + } + array_assign(&next_nodes, &subgraph->nodes); + while (next_nodes.size > 0) + { + AnalysisSubgraphNode node = array_pop(&next_nodes); + if (node.child_index > 1) + { + unsigned predecessor_count; + const TSStateId *predecessors = state_predecessor_map_get( + &predecessor_map, node.state, &predecessor_count); + for (unsigned j = 0; j < predecessor_count; j++) + { + AnalysisSubgraphNode predecessor_node = { + .state = predecessors[j], + .child_index = node.child_index - 1, + .production_id = node.production_id, + .done = false, + }; + unsigned index, exists; + array_search_sorted_with( + &subgraph->nodes, analysis_subgraph_node__compare, + &predecessor_node, &index, &exists); + if (!exists) + { + array_insert(&subgraph->nodes, index, predecessor_node); + array_push(&next_nodes, predecessor_node); + } + } + } + } + } + +#ifdef DEBUG_ANALYZE_QUERY + printf("\nSubgraphs:\n"); + for (unsigned i = 0; i < subgraphs.size; i++) + { + AnalysisSubgraph *subgraph = &subgraphs.contents[i]; + printf(" %u, %s:\n", subgraph->symbol, + ts_language_symbol_name(self->language, subgraph->symbol)); + for (unsigned j = 0; j < subgraph->start_states.size; j++) + { + printf(" {state: %u}\n", subgraph->start_states.contents[j]); + } + for (unsigned j = 0; j < subgraph->nodes.size; j++) + { + AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; + printf(" {state: %u, child_index: %u, production_id: %u, done: " + "%d}\n", + node->state, node->child_index, node->production_id, + node->done); + } + printf("\n"); + } +#endif + + // For each non-terminal pattern, determine if the pattern can successfully + // match, and identify all of the possible children within the pattern where + // matching could fail. + bool all_patterns_are_valid = true; + QueryAnalysis analysis = query_analysis__new(); + for (unsigned i = 0; i < parent_step_indices.size; i++) + { + uint16_t parent_step_index = parent_step_indices.contents[i]; + uint16_t parent_depth = self->steps.contents[parent_step_index].depth; + TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + if (parent_symbol == ts_builtin_sym_error) + continue; + + // Find the subgraph that corresponds to this pattern's root symbol. If + // the pattern's root symbol is a terminal, then return an error. + unsigned subgraph_index, exists; + array_search_sorted_by(&subgraphs, .symbol, parent_symbol, + &subgraph_index, &exists); + if (!exists) + { + unsigned first_child_step_index = parent_step_index + 1; + uint32_t j, child_exists; + array_search_sorted_by(&self->step_offsets, .step_index, + first_child_step_index, &j, &child_exists); + assert(child_exists); + *error_offset = self->step_offsets.contents[j].byte_offset; + all_patterns_are_valid = false; + break; + } + + // Initialize an analysis state at every parse state in the table where + // this parent symbol can occur. + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + analysis_state_set__clear(&analysis.states, &analysis.state_pool); + analysis_state_set__clear(&analysis.deeper_states, + &analysis.state_pool); + for (unsigned j = 0; j < subgraph->start_states.size; j++) + { + TSStateId parse_state = subgraph->start_states.contents[j]; + analysis_state_set__push( + &analysis.states, &analysis.state_pool, + &((AnalysisState){ + .step_index = parent_step_index + 1, + .stack = + { + [0] = + { + .parse_state = parse_state, + .parent_symbol = parent_symbol, + .child_index = 0, + .field_id = 0, + .done = false, + }, + }, + .depth = 1, + .root_symbol = parent_symbol, + })); + } + +#ifdef DEBUG_ANALYZE_QUERY + printf("\nWalk states for %s:\n", + ts_language_symbol_name( + self->language, + analysis.states.contents[0]->stack[0].parent_symbol)); +#endif + + analysis.did_abort = false; + ts_query__perform_analysis(self, &subgraphs, &analysis); + + // If this pattern could not be fully analyzed, then every step should + // be considered fallible. + if (analysis.did_abort) + { + for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) + { + QueryStep *step = &self->steps.contents[j]; + if (step->depth <= parent_depth || + step->depth == PATTERN_DONE_MARKER) + break; + if (!step->is_dead_end) + { + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; + } + } + continue; + } + + // If this pattern cannot match, store the pattern index so that it can + // be returned to the caller. + if (analysis.finished_parent_symbols.size == 0) + { + assert(analysis.final_step_indices.size > 0); + uint16_t impossible_step_index = + *array_back(&analysis.final_step_indices); + uint32_t j, impossible_exists; + array_search_sorted_by(&self->step_offsets, .step_index, + impossible_step_index, &j, + &impossible_exists); + if (j >= self->step_offsets.size) + j = self->step_offsets.size - 1; + *error_offset = self->step_offsets.contents[j].byte_offset; + all_patterns_are_valid = false; + break; + } + + // Mark as fallible any step where a match terminated. + // Later, this property will be propagated to all of the step's + // predecessors. + for (unsigned j = 0; j < analysis.final_step_indices.size; j++) + { + uint32_t final_step_index = analysis.final_step_indices.contents[j]; + QueryStep *step = &self->steps.contents[final_step_index]; + if (step->depth != PATTERN_DONE_MARKER && + step->depth > parent_depth && !step->is_dead_end) + { + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; + } + } + } + + // Mark as indefinite any step with captures that are used in predicates. + Array(uint16_t) predicate_capture_ids = array_new(); + for (unsigned i = 0; i < self->patterns.size; i++) + { + QueryPattern *pattern = &self->patterns.contents[i]; + + // Gather all of the captures that are used in predicates for this + // pattern. + array_clear(&predicate_capture_ids); + for (unsigned start = pattern->predicate_steps.offset, + end = start + pattern->predicate_steps.length, j = start; + j < end; j++) + { + TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; + if (step->type == TSQueryPredicateStepTypeCapture) + { + uint16_t value_id = step->value_id; + array_insert_sorted_by(&predicate_capture_ids, , value_id); + } + } + + // Find all of the steps that have these captures. + for (unsigned start = pattern->steps.offset, + end = start + pattern->steps.length, j = start; + j < end; j++) + { + QueryStep *step = &self->steps.contents[j]; + for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) + { + uint16_t capture_id = step->capture_ids[k]; + if (capture_id == NONE) + break; + unsigned index, exists; + array_search_sorted_by(&predicate_capture_ids, , capture_id, + &index, &exists); + if (exists) + { + step->root_pattern_guaranteed = false; + break; + } + } + } + } + + // Propagate fallibility. If a pattern is fallible at a given step, then it + // is fallible at all of its preceding steps. + bool done = self->steps.size == 0; + while (!done) + { + done = true; + for (unsigned i = self->steps.size - 1; i > 0; i--) + { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) + continue; + + // Determine if this step is definite or has definite alternatives. + bool parent_pattern_guaranteed = false; + for (;;) + { + if (step->root_pattern_guaranteed) + { + parent_pattern_guaranteed = true; + break; + } + if (step->alternative_index == NONE || + step->alternative_index < i) + { + break; + } + step = &self->steps.contents[step->alternative_index]; + } + + // If not, mark its predecessor as indefinite. + if (!parent_pattern_guaranteed) + { + QueryStep *prev_step = &self->steps.contents[i - 1]; + if (!prev_step->is_dead_end && + prev_step->depth != PATTERN_DONE_MARKER && + prev_step->root_pattern_guaranteed) + { + prev_step->root_pattern_guaranteed = false; + done = false; + } + } + } + } + +#ifdef DEBUG_ANALYZE_QUERY + printf("Steps:\n"); + for (unsigned i = 0; i < self->steps.size; i++) + { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) + { + printf(" %u: DONE\n", i); + } + else + { + printf( + " %u: {symbol: %s, field: %s, depth: %u, " + "parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", + i, + (step->symbol == WILDCARD_SYMBOL) + ? "ANY" + : ts_language_symbol_name(self->language, step->symbol), + (step->field ? ts_language_field_name_for_id(self->language, + step->field) + : "-"), + step->depth, step->parent_pattern_guaranteed, + step->root_pattern_guaranteed); + } + } +#endif + + // Determine which repetition symbols in this language have the possibility + // of matching non-rooted patterns in this query. These repetition symbols + // prevent certain optimizations with range restrictions. + analysis.did_abort = false; + for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) + { + uint16_t pattern_entry_index = + non_rooted_pattern_start_steps.contents[i]; + PatternEntry *pattern_entry = + &self->pattern_map.contents[pattern_entry_index]; + + analysis_state_set__clear(&analysis.states, &analysis.state_pool); + analysis_state_set__clear(&analysis.deeper_states, + &analysis.state_pool); + for (unsigned j = 0; j < subgraphs.size; j++) + { + AnalysisSubgraph *subgraph = &subgraphs.contents[j]; + TSSymbolMetadata metadata = + ts_language_symbol_metadata(self->language, subgraph->symbol); + if (metadata.visible || metadata.named) + continue; + + for (uint32_t k = 0; k < subgraph->start_states.size; k++) + { + TSStateId parse_state = subgraph->start_states.contents[k]; + analysis_state_set__push( + &analysis.states, &analysis.state_pool, + &((AnalysisState){ + .step_index = pattern_entry->step_index, + .stack = + { + [0] = + { + .parse_state = parse_state, + .parent_symbol = subgraph->symbol, + .child_index = 0, + .field_id = 0, + .done = false, + }, + }, + .root_symbol = subgraph->symbol, + .depth = 1, + })); + } + } + +#ifdef DEBUG_ANALYZE_QUERY + printf("\nWalk states for rootless pattern step %u:\n", + pattern_entry->step_index); +#endif + + ts_query__perform_analysis(self, &subgraphs, &analysis); + + if (analysis.finished_parent_symbols.size > 0) + { + self->patterns.contents[pattern_entry->pattern_index].is_non_local = + true; + } + + for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) + { + TSSymbol symbol = analysis.finished_parent_symbols.contents[k]; + array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, + , symbol); + } + } + +#ifdef DEBUG_ANALYZE_QUERY + if (self->repeat_symbols_with_rootless_patterns.size > 0) + { + printf("\nRepetition symbols with rootless patterns:\n"); + printf("aborted analysis: %d\n", analysis.did_abort); + for (unsigned i = 0; + i < self->repeat_symbols_with_rootless_patterns.size; i++) + { + TSSymbol symbol = + self->repeat_symbols_with_rootless_patterns.contents[i]; + printf(" %u, %s\n", symbol, + ts_language_symbol_name(self->language, symbol)); + } + printf("\n"); + } +#endif + + // Cleanup + for (unsigned i = 0; i < subgraphs.size; i++) + { + array_delete(&subgraphs.contents[i].start_states); + array_delete(&subgraphs.contents[i].nodes); + } + array_delete(&subgraphs); + query_analysis__delete(&analysis); + array_delete(&next_nodes); + array_delete(&non_rooted_pattern_start_steps); + array_delete(&parent_step_indices); + array_delete(&predicate_capture_ids); + state_predecessor_map_delete(&predecessor_map); + + return all_patterns_are_valid; +} + +static void ts_query__add_negated_fields(TSQuery *self, uint16_t step_index, + TSFieldId *field_ids, + uint16_t field_count) +{ + QueryStep *step = &self->steps.contents[step_index]; + + // The negated field array stores a list of field lists, separated by zeros. + // Try to find the start index of an existing list that matches this new + // list. + bool failed_match = false; + unsigned match_count = 0; + unsigned start_i = 0; + for (unsigned i = 0; i < self->negated_fields.size; i++) + { + TSFieldId existing_field_id = self->negated_fields.contents[i]; + + // At each zero value, terminate the match attempt. If we've exactly + // matched the new field list, then reuse this index. Otherwise, + // start over the matching process. + if (existing_field_id == 0) + { + if (match_count == field_count) + { + step->negated_field_list_id = start_i; + return; + } + else + { + start_i = i + 1; + match_count = 0; + failed_match = false; + } + } + + // If the existing list matches our new list so far, then advance + // to the next element of the new list. + else if (match_count < field_count && + existing_field_id == field_ids[match_count] && !failed_match) + { + match_count++; + } + + // Otherwise, this existing list has failed to match. + else + { + match_count = 0; + failed_match = true; + } + } + + step->negated_field_list_id = self->negated_fields.size; + array_extend(&self->negated_fields, field_count, field_ids); + array_push(&self->negated_fields, 0); +} + +static TSQueryError ts_query__parse_string_literal(TSQuery *self, + Stream *stream) +{ + const char *string_start = stream->input; + if (stream->next != '"') + return TSQueryErrorSyntax; + stream_advance(stream); + const char *prev_position = stream->input; + + bool is_escaped = false; + array_clear(&self->string_buffer); + for (;;) + { + if (is_escaped) + { + is_escaped = false; + switch (stream->next) + { + case 'n': + array_push(&self->string_buffer, '\n'); + break; + case 'r': + array_push(&self->string_buffer, '\r'); + break; + case 't': + array_push(&self->string_buffer, '\t'); + break; + case '0': + array_push(&self->string_buffer, '\0'); + break; + default: + array_extend(&self->string_buffer, stream->next_size, + stream->input); + break; + } + prev_position = stream->input + stream->next_size; + } + else + { + if (stream->next == '\\') + { + array_extend(&self->string_buffer, + (uint32_t)(stream->input - prev_position), + prev_position); + prev_position = stream->input + 1; + is_escaped = true; + } + else if (stream->next == '"') + { + array_extend(&self->string_buffer, + (uint32_t)(stream->input - prev_position), + prev_position); + stream_advance(stream); + return TSQueryErrorNone; + } + else if (stream->next == '\n') + { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } + if (!stream_advance(stream)) + { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } +} + +// Parse a single predicate associated with a pattern, adding it to the +// query's internal `predicate_steps` array. Predicates are arbitrary +// S-expressions associated with a pattern which are meant to be handled at +// a higher level of abstraction, such as the Rust/JavaScript bindings. They +// can contain '@'-prefixed capture names, double-quoted strings, and bare +// symbols, which also represent strings. +static TSQueryError ts_query__parse_predicate(TSQuery *self, Stream *stream) +{ + if (!stream_is_ident_start(stream)) + return TSQueryErrorSyntax; + const char *predicate_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - predicate_name); + uint16_t id = symbol_table_insert_name(&self->predicate_values, + predicate_name, length); + array_push(&self->predicate_steps, + ((TSQueryPredicateStep){ + .type = TSQueryPredicateStepTypeString, + .value_id = id, + })); + stream_skip_whitespace(stream); + + for (;;) + { + if (stream->next == ')') + { + stream_advance(stream); + stream_skip_whitespace(stream); + array_push(&self->predicate_steps, + ((TSQueryPredicateStep){ + .type = TSQueryPredicateStepTypeDone, + .value_id = 0, + })); + break; + } + + // Parse an '@'-prefixed capture name + else if (stream->next == '@') + { + stream_advance(stream); + + // Parse the capture name + if (!stream_is_ident_start(stream)) + return TSQueryErrorSyntax; + const char *capture_name = stream->input; + stream_scan_identifier(stream); + uint32_t capture_length = (uint32_t)(stream->input - capture_name); + + // Add the capture id to the first step of the pattern + int capture_id = symbol_table_id_for_name( + &self->captures, capture_name, capture_length); + if (capture_id == -1) + { + stream_reset(stream, capture_name); + return TSQueryErrorCapture; + } + + array_push(&self->predicate_steps, + ((TSQueryPredicateStep){ + .type = TSQueryPredicateStepTypeCapture, + .value_id = capture_id, + })); + } + + // Parse a string literal + else if (stream->next == '"') + { + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) + return e; + uint16_t query_id = symbol_table_insert_name( + &self->predicate_values, self->string_buffer.contents, + self->string_buffer.size); + array_push(&self->predicate_steps, + ((TSQueryPredicateStep){ + .type = TSQueryPredicateStepTypeString, + .value_id = query_id, + })); + } + + // Parse a bare symbol + else if (stream_is_ident_start(stream)) + { + const char *symbol_start = stream->input; + stream_scan_identifier(stream); + uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); + uint16_t query_id = symbol_table_insert_name( + &self->predicate_values, symbol_start, symbol_length); + array_push(&self->predicate_steps, + ((TSQueryPredicateStep){ + .type = TSQueryPredicateStepTypeString, + .value_id = query_id, + })); + } + + else + { + return TSQueryErrorSyntax; + } + + stream_skip_whitespace(stream); + } + + return 0; +} + +// Read one S-expression pattern from the stream, and incorporate it into +// the query's internal state machine representation. For nested patterns, +// this function calls itself recursively. +// +// The caller is responsible for passing in a dedicated CaptureQuantifiers. +// These should not be shared between different calls to +// ts_query__parse_pattern! +static TSQueryError ts_query__parse_pattern( + TSQuery *self, Stream *stream, uint32_t depth, bool is_immediate, + CaptureQuantifiers *capture_quantifiers) +{ + if (stream->next == 0) + return TSQueryErrorSyntax; + if (stream->next == ')' || stream->next == ']') + return PARENT_DONE; + + const uint32_t starting_step_index = self->steps.size; + + // Store the byte offset of each step in the query. + if (self->step_offsets.size == 0 || + array_back(&self->step_offsets)->step_index != starting_step_index) + { + array_push(&self->step_offsets, + ((StepOffset){ + .step_index = starting_step_index, + .byte_offset = stream_offset(stream), + })); + } + + // An open bracket is the start of an alternation. + if (stream->next == '[') + { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse each branch, and add a placeholder step in between the + // branches. + Array(uint32_t) branch_step_indices = array_new(); + CaptureQuantifiers branch_capture_quantifiers = + capture_quantifiers_new(); + for (;;) + { + uint32_t start_index = self->steps.size; + TSQueryError e = ts_query__parse_pattern( + self, stream, depth, is_immediate, &branch_capture_quantifiers); + + if (e == PARENT_DONE) + { + if (stream->next == ']' && branch_step_indices.size > 0) + { + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) + { + capture_quantifiers_delete(&branch_capture_quantifiers); + array_delete(&branch_step_indices); + return e; + } + + if (start_index == starting_step_index) + { + capture_quantifiers_replace(capture_quantifiers, + &branch_capture_quantifiers); + } + else + { + capture_quantifiers_join_all(capture_quantifiers, + &branch_capture_quantifiers); + } + + array_push(&branch_step_indices, start_index); + array_push(&self->steps, query_step__new(0, depth, false)); + capture_quantifiers_clear(&branch_capture_quantifiers); + } + (void)array_pop(&self->steps); + + // For all of the branches except for the last one, add the subsequent + // branch as an alternative, and link the end of the branch to the + // current end of the steps. + for (unsigned i = 0; i < branch_step_indices.size - 1; i++) + { + uint32_t step_index = branch_step_indices.contents[i]; + uint32_t next_step_index = branch_step_indices.contents[i + 1]; + QueryStep *start_step = &self->steps.contents[step_index]; + QueryStep *end_step = &self->steps.contents[next_step_index - 1]; + start_step->alternative_index = next_step_index; + end_step->alternative_index = self->steps.size; + end_step->is_dead_end = true; + } + + capture_quantifiers_delete(&branch_capture_quantifiers); + array_delete(&branch_step_indices); + } + + // An open parenthesis can be the start of three possible constructs: + // * A grouped sequence + // * A predicate + // * A named node + else if (stream->next == '(') + { + stream_advance(stream); + stream_skip_whitespace(stream); + + // If this parenthesis is followed by a node, then it represents a + // grouped sequence. + if (stream->next == '(' || stream->next == '"' || stream->next == '[') + { + bool child_is_immediate = is_immediate; + CaptureQuantifiers child_capture_quantifiers = + capture_quantifiers_new(); + for (;;) + { + if (stream->next == '.') + { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + TSQueryError e = ts_query__parse_pattern( + self, stream, depth, child_is_immediate, + &child_capture_quantifiers); + if (e == PARENT_DONE) + { + if (stream->next == ')') + { + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) + { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, + &child_capture_quantifiers); + capture_quantifiers_clear(&child_capture_quantifiers); + child_is_immediate = false; + } + + capture_quantifiers_delete(&child_capture_quantifiers); + } + + // A dot/pound character indicates the start of a predicate. + else if (stream->next == '.' || stream->next == '#') + { + stream_advance(stream); + return ts_query__parse_predicate(self, stream); + } + + // Otherwise, this parenthesis is the start of a named node. + else + { + TSSymbol symbol; + + // Parse a normal node name + if (stream_is_ident_start(stream)) + { + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - node_name); + + // Parse the wildcard symbol + if (length == 1 && node_name[0] == '_') + { + symbol = WILDCARD_SYMBOL; + } + + else + { + symbol = ts_language_symbol_for_name( + self->language, node_name, length, true); + if (!symbol) + { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; + } + } + } + else + { + return TSQueryErrorSyntax; + } + + // Add a step for the node. + array_push(&self->steps, + query_step__new(symbol, depth, is_immediate)); + QueryStep *step = array_back(&self->steps); + if (ts_language_symbol_metadata(self->language, symbol).supertype) + { + step->supertype_symbol = step->symbol; + step->symbol = WILDCARD_SYMBOL; + } + if (symbol == WILDCARD_SYMBOL) + { + step->is_named = true; + } + + stream_skip_whitespace(stream); + + if (stream->next == '/') + { + stream_advance(stream); + if (!stream_is_ident_start(stream)) + { + return TSQueryErrorSyntax; + } + + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - node_name); + + step->symbol = ts_language_symbol_for_name( + self->language, node_name, length, true); + if (!step->symbol) + { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; + } + + stream_skip_whitespace(stream); + } + + // Parse the child patterns + bool child_is_immediate = false; + uint16_t last_child_step_index = 0; + uint16_t negated_field_count = 0; + TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; + CaptureQuantifiers child_capture_quantifiers = + capture_quantifiers_new(); + for (;;) + { + // Parse a negated field assertion + if (stream->next == '!') + { + stream_advance(stream); + stream_skip_whitespace(stream); + if (!stream_is_ident_start(stream)) + { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - field_name); + stream_skip_whitespace(stream); + + TSFieldId field_id = ts_language_field_id_for_name( + self->language, field_name, length); + if (!field_id) + { + stream->input = field_name; + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorField; + } + + // Keep the field ids sorted. + if (negated_field_count < MAX_NEGATED_FIELD_COUNT) + { + negated_field_ids[negated_field_count] = field_id; + negated_field_count++; + } + + continue; + } + + // Parse a sibling anchor + if (stream->next == '.') + { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + + uint16_t step_index = self->steps.size; + TSQueryError e = ts_query__parse_pattern( + self, stream, depth + 1, child_is_immediate, + &child_capture_quantifiers); + if (e == PARENT_DONE) + { + if (stream->next == ')') + { + if (child_is_immediate) + { + if (last_child_step_index == 0) + { + capture_quantifiers_delete( + &child_capture_quantifiers); + return TSQueryErrorSyntax; + } + self->steps.contents[last_child_step_index] + .is_last_child = true; + } + + if (negated_field_count) + { + ts_query__add_negated_fields( + self, starting_step_index, negated_field_ids, + negated_field_count); + } + + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) + { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, + &child_capture_quantifiers); + + last_child_step_index = step_index; + child_is_immediate = false; + capture_quantifiers_clear(&child_capture_quantifiers); + } + capture_quantifiers_delete(&child_capture_quantifiers); + } + } + + // Parse a wildcard pattern + else if (stream->next == '_') + { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Add a step that matches any kind of node + array_push(&self->steps, + query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); + } + + // Parse a double-quoted anonymous leaf node expression + else if (stream->next == '"') + { + const char *string_start = stream->input; + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) + return e; + + // Add a step for the node + TSSymbol symbol = ts_language_symbol_for_name( + self->language, self->string_buffer.contents, + self->string_buffer.size, false); + if (!symbol) + { + stream_reset(stream, string_start + 1); + return TSQueryErrorNodeType; + } + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); + } + + // Parse a field-prefixed pattern + else if (stream_is_ident_start(stream)) + { + // Parse the field name + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - field_name); + stream_skip_whitespace(stream); + + if (stream->next != ':') + { + stream_reset(stream, field_name); + return TSQueryErrorSyntax; + } + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse the pattern + CaptureQuantifiers field_capture_quantifiers = + capture_quantifiers_new(); + TSQueryError e = ts_query__parse_pattern( + self, stream, depth, is_immediate, &field_capture_quantifiers); + if (e) + { + capture_quantifiers_delete(&field_capture_quantifiers); + if (e == PARENT_DONE) + e = TSQueryErrorSyntax; + return e; + } + + // Add the field name to the first step of the pattern + TSFieldId field_id = + ts_language_field_id_for_name(self->language, field_name, length); + if (!field_id) + { + stream->input = field_name; + return TSQueryErrorField; + } + + uint32_t step_index = starting_step_index; + QueryStep *step = &self->steps.contents[step_index]; + for (;;) + { + step->field = field_id; + if (step->alternative_index != NONE && + step->alternative_index > step_index && + step->alternative_index < self->steps.size) + { + step_index = step->alternative_index; + step = &self->steps.contents[step_index]; + } + else + { + break; + } + } + + capture_quantifiers_add_all(capture_quantifiers, + &field_capture_quantifiers); + capture_quantifiers_delete(&field_capture_quantifiers); + } + + else + { + return TSQueryErrorSyntax; + } + + stream_skip_whitespace(stream); + + // Parse suffixes modifiers for this pattern + TSQuantifier quantifier = TSQuantifierOne; + for (;;) + { + // Parse the one-or-more operator. + if (stream->next == '+') + { + quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep repeat_step = + query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_pass_through = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + } + + // Parse the zero-or-more repetition operator. + else if (stream->next == '*') + { + quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep repeat_step = + query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_pass_through = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + + // Stop when `step->alternative_index` is `NONE` or it points to + // `repeat_step` or beyond. Note that having just been pushed, + // `repeat_step` occupies slot `self->steps.size - 1`. + QueryStep *step = &self->steps.contents[starting_step_index]; + while (step->alternative_index != NONE && + step->alternative_index < self->steps.size - 1) + { + step = &self->steps.contents[step->alternative_index]; + } + step->alternative_index = self->steps.size; + } + + // Parse the optional operator. + else if (stream->next == '?') + { + quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); + + stream_advance(stream); + stream_skip_whitespace(stream); + + QueryStep *step = &self->steps.contents[starting_step_index]; + while (step->alternative_index != NONE && + step->alternative_index < self->steps.size) + { + step = &self->steps.contents[step->alternative_index]; + } + step->alternative_index = self->steps.size; + } + + // Parse an '@'-prefixed capture pattern + else if (stream->next == '@') + { + stream_advance(stream); + if (!stream_is_ident_start(stream)) + return TSQueryErrorSyntax; + const char *capture_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = (uint32_t)(stream->input - capture_name); + stream_skip_whitespace(stream); + + // Add the capture id to the first step of the pattern + uint16_t capture_id = + symbol_table_insert_name(&self->captures, capture_name, length); + + // Add the capture quantifier + capture_quantifiers_add_for_id(capture_quantifiers, capture_id, + TSQuantifierOne); + + uint32_t step_index = starting_step_index; + for (;;) + { + QueryStep *step = &self->steps.contents[step_index]; + query_step__add_capture(step, capture_id); + if (step->alternative_index != NONE && + step->alternative_index > step_index && + step->alternative_index < self->steps.size) + { + step_index = step->alternative_index; + } + else + { + break; + } + } + } + + // No more suffix modifiers + else + { + break; + } + } + + capture_quantifiers_mul(capture_quantifiers, quantifier); + + return 0; +} + +TSQuery *ts_query_new(const TSLanguage *language, const char *source, + uint32_t source_len, uint32_t *error_offset, + TSQueryError *error_type) +{ + + TSQuery *self = malloc(sizeof(TSQuery)); + *self = (TSQuery){ + .steps = array_new(), + .pattern_map = array_new(), + .captures = symbol_table_new(), + .capture_quantifiers = array_new(), + .predicate_values = symbol_table_new(), + .predicate_steps = array_new(), + .patterns = array_new(), + .step_offsets = array_new(), + .string_buffer = array_new(), + .negated_fields = array_new(), + .repeat_symbols_with_rootless_patterns = array_new(), + .wildcard_root_pattern_count = 0, + .language = ts_language_copy(language), + }; + + array_push(&self->negated_fields, 0); + + // Parse all of the S-expressions in the given string. + Stream stream = stream_new(source, source_len); + stream_skip_whitespace(&stream); + while (stream.input < stream.end) + { + uint32_t pattern_index = self->patterns.size; + uint32_t start_step_index = self->steps.size; + uint32_t start_predicate_step_index = self->predicate_steps.size; + array_push(&self->patterns, + ((QueryPattern){ + .steps = (Slice){.offset = start_step_index}, + .predicate_steps = + (Slice){.offset = start_predicate_step_index}, + .start_byte = stream_offset(&stream), + .is_non_local = false, + })); + CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); + *error_type = ts_query__parse_pattern(self, &stream, 0, false, + &capture_quantifiers); + array_push(&self->steps, + query_step__new(0, PATTERN_DONE_MARKER, false)); + + QueryPattern *pattern = array_back(&self->patterns); + pattern->steps.length = self->steps.size - start_step_index; + pattern->predicate_steps.length = + self->predicate_steps.size - start_predicate_step_index; + + // If any pattern could not be parsed, then report the error information + // and terminate. + if (*error_type) + { + if (*error_type == PARENT_DONE) + *error_type = TSQueryErrorSyntax; + *error_offset = stream_offset(&stream); + capture_quantifiers_delete(&capture_quantifiers); + ts_query_delete(self); + return NULL; + } + + // Maintain a list of capture quantifiers for each pattern + array_push(&self->capture_quantifiers, capture_quantifiers); + + // Maintain a map that can look up patterns for a given root symbol. + uint16_t wildcard_root_alternative_index = NONE; + for (;;) + { + QueryStep *step = &self->steps.contents[start_step_index]; + + // If a pattern has a wildcard at its root, but it has a + // non-wildcard child, then optimize the matching process by + // skipping matching the wildcard. Later, during the matching + // process, the query cursor will check that there is a parent node, + // and capture it if necessary. + if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && + !step->field) + { + QueryStep *second_step = + &self->steps.contents[start_step_index + 1]; + if (second_step->symbol != WILDCARD_SYMBOL && + second_step->depth == 1) + { + wildcard_root_alternative_index = step->alternative_index; + start_step_index += 1; + step = second_step; + } + } + + // Determine whether the pattern has a single root node. This + // affects decisions about whether or not to start matching the + // pattern when a query cursor has a range restriction or when + // immediately within an error node. + uint32_t start_depth = step->depth; + bool is_rooted = start_depth == 0; + for (uint32_t step_index = start_step_index + 1; + step_index < self->steps.size; step_index++) + { + QueryStep *child_step = &self->steps.contents[step_index]; + if (child_step->is_dead_end) + break; + if (child_step->depth == start_depth) + { + is_rooted = false; + break; + } + } + + ts_query__pattern_map_insert( + self, step->symbol, + (PatternEntry){.step_index = start_step_index, + .pattern_index = pattern_index, + .is_rooted = is_rooted}); + if (step->symbol == WILDCARD_SYMBOL) + { + self->wildcard_root_pattern_count++; + } + + // If there are alternatives or options at the root of the pattern, + // then add multiple entries to the pattern map. + if (step->alternative_index != NONE) + { + start_step_index = step->alternative_index; + } + else if (wildcard_root_alternative_index != NONE) + { + start_step_index = wildcard_root_alternative_index; + wildcard_root_alternative_index = NONE; + } + else + { + break; + } + } + } + + if (!ts_query__analyze_patterns(self, error_offset)) + { + *error_type = TSQueryErrorStructure; + ts_query_delete(self); + return NULL; + } + + array_delete(&self->string_buffer); + return self; +} + +void ts_query_delete(TSQuery *self) +{ + if (self) + { + array_delete(&self->steps); + array_delete(&self->pattern_map); + array_delete(&self->predicate_steps); + array_delete(&self->patterns); + array_delete(&self->step_offsets); + array_delete(&self->string_buffer); + array_delete(&self->negated_fields); + array_delete(&self->repeat_symbols_with_rootless_patterns); + ts_language_delete(self->language); + symbol_table_delete(&self->captures); + symbol_table_delete(&self->predicate_values); + for (uint32_t index = 0; index < self->capture_quantifiers.size; + index++) + { + CaptureQuantifiers *capture_quantifiers = + array_get(&self->capture_quantifiers, index); + capture_quantifiers_delete(capture_quantifiers); + } + array_delete(&self->capture_quantifiers); + free(self); + } +} + +uint32_t ts_query_pattern_count(const TSQuery *self) +{ + return self->patterns.size; +} + +uint32_t ts_query_capture_count(const TSQuery *self) +{ + return self->captures.slices.size; +} + +uint32_t ts_query_string_count(const TSQuery *self) +{ + return self->predicate_values.slices.size; +} + +const char *ts_query_capture_name_for_id(const TSQuery *self, uint32_t index, + uint32_t *length) +{ + return symbol_table_name_for_id(&self->captures, index, length); +} + +TSQuantifier ts_query_capture_quantifier_for_id(const TSQuery *self, + uint32_t pattern_index, + uint32_t capture_index) +{ + CaptureQuantifiers *capture_quantifiers = + array_get(&self->capture_quantifiers, pattern_index); + return capture_quantifier_for_id(capture_quantifiers, capture_index); +} + +const char *ts_query_string_value_for_id(const TSQuery *self, uint32_t index, + uint32_t *length) +{ + return symbol_table_name_for_id(&self->predicate_values, index, length); +} + +const TSQueryPredicateStep *ts_query_predicates_for_pattern( + const TSQuery *self, uint32_t pattern_index, uint32_t *step_count) +{ + Slice slice = self->patterns.contents[pattern_index].predicate_steps; + *step_count = slice.length; + if (self->predicate_steps.contents == NULL) + { + return NULL; + } + return &self->predicate_steps.contents[slice.offset]; +} + +uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, + uint32_t pattern_index) +{ + return self->patterns.contents[pattern_index].start_byte; +} + +bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index) +{ + for (unsigned i = 0; i < self->pattern_map.size; i++) + { + PatternEntry *entry = &self->pattern_map.contents[i]; + if (entry->pattern_index == pattern_index) + { + if (!entry->is_rooted) + return false; + } + } + return true; +} + +bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index) +{ + if (pattern_index < self->patterns.size) + { + return self->patterns.contents[pattern_index].is_non_local; + } + else + { + return false; + } +} + +bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, + uint32_t byte_offset) +{ + uint32_t step_index = UINT32_MAX; + for (unsigned i = 0; i < self->step_offsets.size; i++) + { + StepOffset *step_offset = &self->step_offsets.contents[i]; + if (step_offset->byte_offset > byte_offset) + break; + step_index = step_offset->step_index; + } + if (step_index < self->steps.size) + { + return self->steps.contents[step_index].root_pattern_guaranteed; + } + else + { + return false; + } +} + +bool ts_query__step_is_fallible(const TSQuery *self, uint16_t step_index) +{ + assert((uint32_t)step_index + 1 < self->steps.size); + QueryStep *step = &self->steps.contents[step_index]; + QueryStep *next_step = &self->steps.contents[step_index + 1]; + return (next_step->depth != PATTERN_DONE_MARKER && + next_step->depth > step->depth && + !next_step->parent_pattern_guaranteed); +} + +void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length) +{ + // Remove capture information for any pattern step that previously + // captured with the given name. + int id = symbol_table_id_for_name(&self->captures, name, length); + if (id != -1) + { + for (unsigned i = 0; i < self->steps.size; i++) + { + QueryStep *step = &self->steps.contents[i]; + query_step__remove_capture(step, id); + } + } +} + +void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index) +{ + // Remove the given pattern from the pattern map. Its steps will still + // be in the `steps` array, but they will never be read. + for (unsigned i = 0; i < self->pattern_map.size; i++) + { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (pattern->pattern_index == pattern_index) + { + array_erase(&self->pattern_map, i); + i--; + } + } +} + +/*************** + * QueryCursor + ***************/ + +TSQueryCursor *ts_query_cursor_new(void) +{ + TSQueryCursor *self = malloc(sizeof(TSQueryCursor)); + *self = (TSQueryCursor){ + .did_exceed_match_limit = false, + .ascending = false, + .halted = false, + .states = array_new(), + .finished_states = array_new(), + .capture_list_pool = capture_list_pool_new(), + .start_byte = 0, + .end_byte = UINT32_MAX, + .start_point = {0, 0}, + .end_point = POINT_MAX, + .max_start_depth = UINT32_MAX, + }; + array_reserve(&self->states, 8); + array_reserve(&self->finished_states, 8); + return self; +} + +void ts_query_cursor_delete(TSQueryCursor *self) +{ + array_delete(&self->states); + array_delete(&self->finished_states); + ts_tree_cursor_delete(&self->cursor); + capture_list_pool_delete(&self->capture_list_pool); + free(self); +} + +bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) +{ + return self->did_exceed_match_limit; +} + +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) +{ + return self->capture_list_pool.max_capture_list_count; +} + +void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) +{ + self->capture_list_pool.max_capture_list_count = limit; +} + +#ifdef DEBUG_EXECUTE_QUERY +# define LOG(...) fprintf(stderr, __VA_ARGS__) +#else +# define LOG(...) +#endif + +void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, + TSNode node) +{ + if (query) + { + LOG("query steps:\n"); + for (unsigned i = 0; i < query->steps.size; i++) + { + QueryStep *step = &query->steps.contents[i]; + LOG(" %u: {", i); + if (step->depth == PATTERN_DONE_MARKER) + { + LOG("DONE"); + } + else if (step->is_dead_end) + { + LOG("dead_end"); + } + else if (step->is_pass_through) + { + LOG("pass_through"); + } + else if (step->symbol != WILDCARD_SYMBOL) + { + LOG("symbol: %s", query->language->symbol_names[step->symbol]); + } + else + { + LOG("symbol: *"); + } + if (step->field) + { + LOG(", field: %s", query->language->field_names[step->field]); + } + if (step->alternative_index != NONE) + { + LOG(", alternative: %u", step->alternative_index); + } + LOG("},\n"); + } + } + + array_clear(&self->states); + array_clear(&self->finished_states); + ts_tree_cursor_reset(&self->cursor, node); + capture_list_pool_reset(&self->capture_list_pool); + self->on_visible_node = true; + self->next_state_id = 0; + self->depth = 0; + self->ascending = false; + self->halted = false; + self->query = query; + self->did_exceed_match_limit = false; +} + +void ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, + uint32_t end_byte) +{ + if (end_byte == 0) + { + end_byte = UINT32_MAX; + } + self->start_byte = start_byte; + self->end_byte = end_byte; +} + +void ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, + TSPoint end_point) +{ + if (end_point.row == 0 && end_point.column == 0) + { + end_point = POINT_MAX; + } + self->start_point = start_point; + self->end_point = end_point; +} + +// Search through all of the in-progress states, and find the captured +// node that occurs earliest in the document. +static bool ts_query_cursor__first_in_progress_capture( + TSQueryCursor *self, uint32_t *state_index, uint32_t *byte_offset, + uint32_t *pattern_index, bool *root_pattern_guaranteed) +{ + bool result = false; + *state_index = UINT32_MAX; + *byte_offset = UINT32_MAX; + *pattern_index = UINT32_MAX; + for (unsigned i = 0; i < self->states.size; i++) + { + QueryState *state = &self->states.contents[i]; + if (state->dead) + continue; + + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, state->capture_list_id); + if (state->consumed_capture_count >= captures->size) + { + continue; + } + + TSNode node = captures->contents[state->consumed_capture_count].node; + if (ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point)) + { + state->consumed_capture_count++; + i--; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if (!result || node_start_byte < *byte_offset || + (node_start_byte == *byte_offset && + state->pattern_index < *pattern_index)) + { + QueryStep *step = &self->query->steps.contents[state->step_index]; + if (root_pattern_guaranteed) + { + *root_pattern_guaranteed = step->root_pattern_guaranteed; + } + else if (step->root_pattern_guaranteed) + { + continue; + } + + result = true; + *state_index = i; + *byte_offset = node_start_byte; + *pattern_index = state->pattern_index; + } + } + return result; +} + +// Determine which node is first in a depth-first traversal +int ts_query_cursor__compare_nodes(TSNode left, TSNode right) +{ + if (left.id != right.id) + { + uint32_t left_start = ts_node_start_byte(left); + uint32_t right_start = ts_node_start_byte(right); + if (left_start < right_start) + return -1; + if (left_start > right_start) + return 1; + uint32_t left_node_count = ts_node_end_byte(left); + uint32_t right_node_count = ts_node_end_byte(right); + if (left_node_count > right_node_count) + return -1; + if (left_node_count < right_node_count) + return 1; + } + return 0; +} + +// Determine if either state contains a superset of the other state's captures. +void ts_query_cursor__compare_captures(TSQueryCursor *self, + QueryState *left_state, + QueryState *right_state, + bool *left_contains_right, + bool *right_contains_left) +{ + const CaptureList *left_captures = capture_list_pool_get( + &self->capture_list_pool, left_state->capture_list_id); + const CaptureList *right_captures = capture_list_pool_get( + &self->capture_list_pool, right_state->capture_list_id); + *left_contains_right = true; + *right_contains_left = true; + unsigned i = 0, j = 0; + for (;;) + { + if (i < left_captures->size) + { + if (j < right_captures->size) + { + TSQueryCapture *left = &left_captures->contents[i]; + TSQueryCapture *right = &right_captures->contents[j]; + if (left->node.id == right->node.id && + left->index == right->index) + { + i++; + j++; + } + else + { + switch ( + ts_query_cursor__compare_nodes(left->node, right->node)) + { + case -1: + *right_contains_left = false; + i++; + break; + case 1: + *left_contains_right = false; + j++; + break; + default: + *right_contains_left = false; + *left_contains_right = false; + i++; + j++; + break; + } + } + } + else + { + *right_contains_left = false; + break; + } + } + else + { + if (j < right_captures->size) + { + *left_contains_right = false; + } + break; + } + } +} + +static void ts_query_cursor__add_state(TSQueryCursor *self, + const PatternEntry *pattern) +{ + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + + // Keep the states array in ascending order of start_depth and + // pattern_index, so that it can be processed more efficiently elsewhere. + // Usually, there is no work to do here because of two facts: + // * States with lower start_depth are naturally added first due to the + // order in which nodes are visited. + // * Earlier patterns are naturally added first because of the ordering of + // the + // pattern_map data structure that's used to initiate matches. + // + // This loop is only needed in cases where two conditions hold: + // * A pattern consists of more than one sibling node, so that its states + // remain in progress after exiting the node that started the match. + // * The first node in the pattern matches against multiple nodes at the + // same depth. + // + // An example of this is the pattern '((comment)* (function))'. If multiple + // `comment` nodes appear in a row, then we may initiate a new state for + // this pattern while another state for the same pattern is already in + // progress. If there are multiple patterns like this in a query, then this + // loop will need to execute in order to keep the states ordered by + // pattern_index. + uint32_t index = self->states.size; + while (index > 0) + { + QueryState *prev_state = &self->states.contents[index - 1]; + if (prev_state->start_depth < start_depth) + break; + if (prev_state->start_depth == start_depth) + { + // Avoid inserting an unnecessary duplicate state, which would be + // immediately pruned by the longest-match criteria. + if (prev_state->pattern_index == pattern->pattern_index && + prev_state->step_index == pattern->step_index) + return; + if (prev_state->pattern_index <= pattern->pattern_index) + break; + } + index--; + } + + LOG(" start state. pattern:%u, step:%u\n", pattern->pattern_index, + pattern->step_index); + array_insert(&self->states, index, + ((QueryState){ + .id = UINT32_MAX, + .capture_list_id = NONE, + .step_index = pattern->step_index, + .pattern_index = pattern->pattern_index, + .start_depth = start_depth, + .consumed_capture_count = 0, + .seeking_immediate_match = true, + .has_in_progress_alternatives = false, + .needs_parent = step->depth == 1, + .dead = false, + })); +} + +// Acquire a capture list for this state. If there are no capture lists left in +// the pool, this will steal the capture list from another existing state, and +// mark that other state as 'dead'. +static CaptureList *ts_query_cursor__prepare_to_capture( + TSQueryCursor *self, QueryState *state, unsigned state_index_to_preserve) +{ + if (state->capture_list_id == NONE) + { + state->capture_list_id = + capture_list_pool_acquire(&self->capture_list_pool); + + // If there are no capture lists left in the pool, then terminate + // whichever state has captured the earliest node in the document, and + // steal its capture list. + if (state->capture_list_id == NONE) + { + self->did_exceed_match_limit = true; + uint32_t state_index, byte_offset, pattern_index; + if (ts_query_cursor__first_in_progress_capture( + self, &state_index, &byte_offset, &pattern_index, NULL) && + state_index != state_index_to_preserve) + { + LOG(" abandon state. index:%u, pattern:%u, offset:%u.\n", + state_index, pattern_index, byte_offset); + QueryState *other_state = &self->states.contents[state_index]; + state->capture_list_id = other_state->capture_list_id; + other_state->capture_list_id = NONE; + other_state->dead = true; + CaptureList *list = capture_list_pool_get_mut( + &self->capture_list_pool, state->capture_list_id); + array_clear(list); + return list; + } + else + { + LOG(" ran out of capture lists"); + return NULL; + } + } + } + return capture_list_pool_get_mut(&self->capture_list_pool, + state->capture_list_id); +} + +static void ts_query_cursor__capture(TSQueryCursor *self, QueryState *state, + QueryStep *step, TSNode node) +{ + if (state->dead) + return; + CaptureList *capture_list = + ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); + if (!capture_list) + { + state->dead = true; + return; + } + + for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) + { + uint16_t capture_id = step->capture_ids[j]; + if (step->capture_ids[j] == NONE) + break; + array_push(capture_list, ((TSQueryCapture){node, capture_id})); + LOG(" capture node. type:%s, pattern:%u, capture_id:%u, " + "capture_count:%u\n", + ts_node_type(node), state->pattern_index, capture_id, + capture_list->size); + } +} + +// Duplicate the given state and insert the newly-created state immediately +// after the given state in the `states` array. Ensures that the given state +// reference is still valid, even if the states array is reallocated. +static QueryState *ts_query_cursor__copy_state(TSQueryCursor *self, + QueryState **state_ref) +{ + const QueryState *state = *state_ref; + uint32_t state_index = (uint32_t)(state - self->states.contents); + QueryState copy = *state; + copy.capture_list_id = NONE; + + // If the state has captures, copy its capture list. + if (state->capture_list_id != NONE) + { + CaptureList *new_captures = + ts_query_cursor__prepare_to_capture(self, ©, state_index); + if (!new_captures) + return NULL; + const CaptureList *old_captures = capture_list_pool_get( + &self->capture_list_pool, state->capture_list_id); + array_push_all(new_captures, old_captures); + } + + array_insert(&self->states, state_index + 1, copy); + *state_ref = &self->states.contents[state_index]; + return &self->states.contents[state_index + 1]; +} + +static inline bool ts_query_cursor__should_descend(TSQueryCursor *self, + bool node_intersects_range) +{ + + if (node_intersects_range && self->depth < self->max_start_depth) + { + return true; + } + + // If there are in-progress matches whose remaining steps occur + // deeper in the tree, then descend. + for (unsigned i = 0; i < self->states.size; i++) + { + QueryState *state = &self->states.contents[i]; + ; + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (next_step->depth != PATTERN_DONE_MARKER && + state->start_depth + next_step->depth > self->depth) + { + return true; + } + } + + if (self->depth >= self->max_start_depth) + { + return false; + } + + // If the current node is hidden, then a non-rooted pattern might match + // one if its roots inside of this node, and match another of its roots + // as part of a sibling node, so we may need to descend. + if (!self->on_visible_node) + { + // Descending into a repetition node outside of the range can be + // expensive, because these nodes can have many visible children. + // Avoid descending into repetition nodes unless we have already + // determined that this query can match rootless patterns inside + // of this type of repetition node. + Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); + if (ts_subtree_is_repetition(subtree)) + { + bool exists; + uint32_t index; + array_search_sorted_by( + &self->query->repeat_symbols_with_rootless_patterns, , + ts_subtree_symbol(subtree), &index, &exists); + return exists; + } + + return true; + } + + return false; +} + +// Walk the tree, processing patterns until at least one pattern finishes, +// If one or more patterns finish, return `true` and store their states in the +// `finished_states` array. Multiple patterns can finish on the same node. If +// there are no more matches, return `false`. +static inline bool ts_query_cursor__advance(TSQueryCursor *self, + bool stop_on_definite_step) +{ + bool did_match = false; + for (;;) + { + if (self->halted) + { + while (self->states.size > 0) + { + QueryState state = array_pop(&self->states); + capture_list_pool_release(&self->capture_list_pool, + state.capture_list_id); + } + } + + if (did_match || self->halted) + return did_match; + + // Exit the current node. + if (self->ascending) + { + if (self->on_visible_node) + { + LOG("leave node. depth:%u, type:%s\n", self->depth, + ts_node_type(ts_tree_cursor_current_node(&self->cursor))); + + // After leaving a node, remove any states that cannot make + // further progress. + uint32_t deleted_count = 0; + for (unsigned i = 0, n = self->states.size; i < n; i++) + { + QueryState *state = &self->states.contents[i]; + QueryStep *step = + &self->query->steps.contents[state->step_index]; + + // If a state completed its pattern inside of this node, but + // was deferred from finishing in order to search for longer + // matches, mark it as finished. + if (step->depth == PATTERN_DONE_MARKER && + (state->start_depth > self->depth || self->depth == 0)) + { + LOG(" finish pattern %u\n", state->pattern_index); + array_push(&self->finished_states, *state); + did_match = true; + deleted_count++; + } + + // If a state needed to match something within this node, + // then remove that state as it has failed to match. + else if (step->depth != PATTERN_DONE_MARKER && + (uint32_t)state->start_depth + + (uint32_t)step->depth > + self->depth) + { + LOG(" failed to match. pattern:%u, step:%u\n", + state->pattern_index, state->step_index); + capture_list_pool_release(&self->capture_list_pool, + state->capture_list_id); + deleted_count++; + } + + else if (deleted_count > 0) + { + self->states.contents[i - deleted_count] = *state; + } + } + self->states.size -= deleted_count; + } + + // Leave this node by stepping to its next sibling or to its parent. + switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) + { + case TreeCursorStepVisible: + if (!self->on_visible_node) + { + self->depth++; + self->on_visible_node = true; + } + self->ascending = false; + break; + case TreeCursorStepHidden: + if (self->on_visible_node) + { + self->depth--; + self->on_visible_node = false; + } + self->ascending = false; + break; + default: + if (ts_tree_cursor_goto_parent(&self->cursor)) + { + self->depth--; + } + else + { + LOG("halt at root\n"); + self->halted = true; + } + } + } + + // Enter a new node. + else + { + // Get the properties of the current node. + TSNode node = ts_tree_cursor_current_node(&self->cursor); + TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); + bool parent_precedes_range = + !ts_node_is_null(parent_node) && + (ts_node_end_byte(parent_node) <= self->start_byte || + point_lte(ts_node_end_point(parent_node), self->start_point)); + bool parent_follows_range = + !ts_node_is_null(parent_node) && + (ts_node_start_byte(parent_node) >= self->end_byte || + point_gte(ts_node_start_point(parent_node), self->end_point)); + bool node_precedes_range = + parent_precedes_range || + (ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point)); + bool node_follows_range = + parent_follows_range || + (ts_node_start_byte(node) >= self->end_byte || + point_gte(ts_node_start_point(node), self->end_point)); + bool parent_intersects_range = + !parent_precedes_range && !parent_follows_range; + bool node_intersects_range = + !node_precedes_range && !node_follows_range; + + if (self->on_visible_node) + { + TSSymbol symbol = ts_node_symbol(node); + bool is_named = ts_node_is_named(node); + bool has_later_siblings; + bool has_later_named_siblings; + bool can_have_later_siblings_with_this_field; + TSFieldId field_id = 0; + TSSymbol supertypes[8] = {0}; + unsigned supertype_count = 8; + ts_tree_cursor_current_status( + &self->cursor, &field_id, &has_later_siblings, + &has_later_named_siblings, + &can_have_later_siblings_with_this_field, supertypes, + &supertype_count); + LOG("enter node. depth:%u, type:%s, field:%s, row:%u " + "state_count:%u, finished_state_count:%u\n", + self->depth, ts_node_type(node), + ts_language_field_name_for_id(self->query->language, + field_id), + ts_node_start_point(node).row, self->states.size, + self->finished_states.size); + + bool node_is_error = symbol == ts_builtin_sym_error; + bool parent_is_error = + !ts_node_is_null(parent_node) && + ts_node_symbol(parent_node) == ts_builtin_sym_error; + + // Add new states for any patterns whose root node is a + // wildcard. + if (!node_is_error) + { + for (unsigned i = 0; + i < self->query->wildcard_root_pattern_count; i++) + { + PatternEntry *pattern = + &self->query->pattern_map.contents[i]; + + // If this node matches the first step of the pattern, + // then add a new state at the start of this pattern. + QueryStep *step = + &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + if ((pattern->is_rooted ? node_intersects_range + : (parent_intersects_range && + !parent_is_error)) && + (!step->field || field_id == step->field) && + (!step->supertype_symbol || supertype_count > 0) && + (start_depth <= self->max_start_depth)) + { + ts_query_cursor__add_state(self, pattern); + } + } + } + + // Add new states for any patterns whose root node matches this + // node. + unsigned i; + if (ts_query__pattern_map_search(self->query, symbol, &i)) + { + PatternEntry *pattern = + &self->query->pattern_map.contents[i]; + + QueryStep *step = + &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + do + { + // If this node matches the first step of the pattern, + // then add a new state at the start of this pattern. + if ((pattern->is_rooted ? node_intersects_range + : (parent_intersects_range && + !parent_is_error)) && + (!step->field || field_id == step->field) && + (start_depth <= self->max_start_depth)) + { + ts_query_cursor__add_state(self, pattern); + } + + // Advance to the next pattern whose root node matches + // this node. + i++; + if (i == self->query->pattern_map.size) + break; + pattern = &self->query->pattern_map.contents[i]; + step = + &self->query->steps.contents[pattern->step_index]; + } while (step->symbol == symbol); + } + + // Update all of the in-progress states with current node. + for (unsigned j = 0, copy_count = 0; j < self->states.size; + j += 1 + copy_count) + { + QueryState *state = &self->states.contents[j]; + QueryStep *step = + &self->query->steps.contents[state->step_index]; + state->has_in_progress_alternatives = false; + copy_count = 0; + + // Check that the node matches all of the criteria for the + // next step of the pattern. + if ((uint32_t)state->start_depth + (uint32_t)step->depth != + self->depth) + continue; + + // Determine if this node matches this step of the pattern, + // and also if this node can have later siblings that match + // this step of the pattern. + bool node_does_match = false; + if (step->symbol == WILDCARD_SYMBOL) + { + node_does_match = + !node_is_error && (is_named || !step->is_named); + } + else + { + node_does_match = symbol == step->symbol; + } + bool later_sibling_can_match = has_later_siblings; + if ((step->is_immediate && is_named) || + state->seeking_immediate_match) + { + later_sibling_can_match = false; + } + if (step->is_last_child && has_later_named_siblings) + { + node_does_match = false; + } + if (step->supertype_symbol) + { + bool has_supertype = false; + for (unsigned k = 0; k < supertype_count; k++) + { + if (supertypes[k] == step->supertype_symbol) + { + has_supertype = true; + break; + } + } + if (!has_supertype) + node_does_match = false; + } + if (step->field) + { + if (step->field == field_id) + { + if (!can_have_later_siblings_with_this_field) + { + later_sibling_can_match = false; + } + } + else + { + node_does_match = false; + } + } + + if (step->negated_field_list_id) + { + TSFieldId *negated_field_ids = + &self->query->negated_fields + .contents[step->negated_field_list_id]; + for (;;) + { + TSFieldId negated_field_id = *negated_field_ids; + if (negated_field_id) + { + negated_field_ids++; + if (ts_node_child_by_field_id(node, + negated_field_id) + .id) + { + node_does_match = false; + break; + } + } + else + { + break; + } + } + } + + // Remove states immediately if it is ever clear that they + // cannot match. + if (!node_does_match) + { + if (!later_sibling_can_match) + { + LOG(" discard state. pattern:%u, step:%u\n", + state->pattern_index, state->step_index); + capture_list_pool_release(&self->capture_list_pool, + state->capture_list_id); + array_erase(&self->states, j); + j--; + } + continue; + } + + // Some patterns can match their root node in multiple ways, + // capturing different children. If this pattern step could + // match later children within the same parent, then this + // query state cannot simply be updated in place. It must be + // split into two states: one that matches this node, and + // one which skips over this node, to preserve the + // possibility of matching later siblings. + if (later_sibling_can_match && + (step->contains_captures || + ts_query__step_is_fallible(self->query, + state->step_index))) + { + if (ts_query_cursor__copy_state(self, &state)) + { + LOG(" split state for capture. pattern:%u, " + "step:%u\n", + state->pattern_index, state->step_index); + copy_count++; + } + } + + // If this pattern started with a wildcard, such that the + // pattern map actually points to the *second* step of the + // pattern, then check that the node has a parent, and + // capture the parent node if necessary. + if (state->needs_parent) + { + TSNode parent = + ts_tree_cursor_parent_node(&self->cursor); + if (ts_node_is_null(parent)) + { + LOG(" missing parent node\n"); + state->dead = true; + } + else + { + state->needs_parent = false; + QueryStep *skipped_wildcard_step = step; + do + { + skipped_wildcard_step--; + } while (skipped_wildcard_step->is_dead_end || + skipped_wildcard_step->is_pass_through || + skipped_wildcard_step->depth > 0); + if (skipped_wildcard_step->capture_ids[0] != NONE) + { + LOG(" capture wildcard parent\n"); + ts_query_cursor__capture( + self, state, skipped_wildcard_step, parent); + } + } + } + + // If the current node is captured in this pattern, add it + // to the capture list. + if (step->capture_ids[0] != NONE) + { + ts_query_cursor__capture(self, state, step, node); + } + + if (state->dead) + { + array_erase(&self->states, j); + j--; + continue; + } + + // Advance this state to the next step of its pattern. + state->step_index++; + state->seeking_immediate_match = false; + LOG(" advance state. pattern:%u, step:%u\n", + state->pattern_index, state->step_index); + + QueryStep *next_step = + &self->query->steps.contents[state->step_index]; + if (stop_on_definite_step && + next_step->root_pattern_guaranteed) + did_match = true; + + // If this state's next step has an alternative step, then + // copy the state in order to pursue both alternatives. The + // alternative step itself may have an alternative, so this + // is an interactive process. + unsigned end_index = j + 1; + for (unsigned k = j; k < end_index; k++) + { + QueryState *child_state = &self->states.contents[k]; + QueryStep *child_step = + &self->query->steps + .contents[child_state->step_index]; + if (child_step->alternative_index != NONE) + { + // A "dead-end" step exists only to add a + // non-sequential jump into the step sequence, via + // its alternative index. When a state reaches a + // dead-end step, it jumps straight to the step's + // alternative. + if (child_step->is_dead_end) + { + child_state->step_index = + child_step->alternative_index; + k--; + continue; + } + + // A "pass-through" step exists only to add a branch + // into the step sequence, via its + // alternative_index. When a state reaches a + // pass-through step, it splits in order to process + // the alternative step, and then it advances to the + // next step. + if (child_step->is_pass_through) + { + child_state->step_index++; + k--; + } + + QueryState *copy = + ts_query_cursor__copy_state(self, &child_state); + if (copy) + { + LOG(" split state for branch. pattern:%u, " + "from_step:%u, to_step:%u, immediate:%d, " + "capture_count: %u\n", + copy->pattern_index, copy->step_index, + next_step->alternative_index, + next_step->alternative_is_immediate, + capture_list_pool_get( + &self->capture_list_pool, + copy->capture_list_id) + ->size); + end_index++; + copy_count++; + copy->step_index = + child_step->alternative_index; + if (child_step->alternative_is_immediate) + { + copy->seeking_immediate_match = true; + } + } + } + } + } + + for (unsigned j = 0; j < self->states.size; j++) + { + QueryState *state = &self->states.contents[j]; + if (state->dead) + { + array_erase(&self->states, j); + j--; + continue; + } + + // Enforce the longest-match criteria. When a query pattern + // contains optional or repeated nodes, this is necessary to + // avoid multiple redundant states, where one state has a + // strict subset of another state's captures. + bool did_remove = false; + for (unsigned k = j + 1; k < self->states.size; k++) + { + QueryState *other_state = &self->states.contents[k]; + + // Query states are kept in ascending order of + // start_depth and pattern_index. Since the + // longest-match criteria is only used for deduping + // matches of the same pattern and root node, we only + // need to perform pairwise comparisons within a small + // slice of the states array. + if (other_state->start_depth != state->start_depth || + other_state->pattern_index != state->pattern_index) + break; + + bool left_contains_right, right_contains_left; + ts_query_cursor__compare_captures( + self, state, other_state, &left_contains_right, + &right_contains_left); + if (left_contains_right) + { + if (state->step_index == other_state->step_index) + { + LOG(" drop shorter state. pattern: %u, " + "step_index: %u\n", + state->pattern_index, state->step_index); + capture_list_pool_release( + &self->capture_list_pool, + other_state->capture_list_id); + array_erase(&self->states, k); + k--; + continue; + } + other_state->has_in_progress_alternatives = true; + } + if (right_contains_left) + { + if (state->step_index == other_state->step_index) + { + LOG(" drop shorter state. pattern: %u, " + "step_index: %u\n", + state->pattern_index, state->step_index); + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id); + array_erase(&self->states, j); + j--; + did_remove = true; + break; + } + state->has_in_progress_alternatives = true; + } + } + + // If the state is at the end of its pattern, remove it from + // the list of in-progress states and add it to the list of + // finished states. + if (!did_remove) + { + LOG(" keep state. pattern: %u, start_depth: %u, " + "step_index: %u, capture_count: %u\n", + state->pattern_index, state->start_depth, + state->step_index, + capture_list_pool_get(&self->capture_list_pool, + state->capture_list_id) + ->size); + QueryStep *next_step = + &self->query->steps.contents[state->step_index]; + if (next_step->depth == PATTERN_DONE_MARKER) + { + if (state->has_in_progress_alternatives) + { + LOG(" defer finishing pattern %u\n", + state->pattern_index); + } + else + { + LOG(" finish pattern %u\n", + state->pattern_index); + array_push(&self->finished_states, *state); + array_erase( + &self->states, + (uint32_t)(state - self->states.contents)); + did_match = true; + j--; + } + } + } + } + } + + if (ts_query_cursor__should_descend(self, node_intersects_range)) + { + switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) + { + case TreeCursorStepVisible: + self->depth++; + self->on_visible_node = true; + continue; + case TreeCursorStepHidden: + self->on_visible_node = false; + continue; + default: + break; + } + } + + self->ascending = true; + } + } +} + +bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match) +{ + if (self->finished_states.size == 0) + { + if (!ts_query_cursor__advance(self, false)) + { + return false; + } + } + + QueryState *state = &self->finished_states.contents[0]; + if (state->id == UINT32_MAX) + state->id = self->next_state_id++; + match->id = state->id; + match->pattern_index = state->pattern_index; + const CaptureList *captures = + capture_list_pool_get(&self->capture_list_pool, state->capture_list_id); + match->captures = captures->contents; + match->capture_count = captures->size; + capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); + array_erase(&self->finished_states, 0); + return true; +} + +void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id) +{ + for (unsigned i = 0; i < self->finished_states.size; i++) + { + const QueryState *state = &self->finished_states.contents[i]; + if (state->id == match_id) + { + capture_list_pool_release(&self->capture_list_pool, + state->capture_list_id); + array_erase(&self->finished_states, i); + return; + } + } + + // Remove unfinished query states as well to prevent future + // captures for a match being removed. + for (unsigned i = 0; i < self->states.size; i++) + { + const QueryState *state = &self->states.contents[i]; + if (state->id == match_id) + { + capture_list_pool_release(&self->capture_list_pool, + state->capture_list_id); + array_erase(&self->states, i); + return; + } + } +} + +bool ts_query_cursor_next_capture(TSQueryCursor *self, TSQueryMatch *match, + uint32_t *capture_index) +{ + // The goal here is to return captures in order, even though they may not + // be discovered in order, because patterns can overlap. Search for matches + // until there is a finished capture that is before any unfinished capture. + for (;;) + { + // First, find the earliest capture in an unfinished match. + uint32_t first_unfinished_capture_byte; + uint32_t first_unfinished_pattern_index; + uint32_t first_unfinished_state_index; + bool first_unfinished_state_is_definite = false; + ts_query_cursor__first_in_progress_capture( + self, &first_unfinished_state_index, &first_unfinished_capture_byte, + &first_unfinished_pattern_index, + &first_unfinished_state_is_definite); + + // Then find the earliest capture in a finished match. It must occur + // before the first capture in an *unfinished* match. + QueryState *first_finished_state = NULL; + uint32_t first_finished_capture_byte = first_unfinished_capture_byte; + uint32_t first_finished_pattern_index = first_unfinished_pattern_index; + for (unsigned i = 0; i < self->finished_states.size;) + { + QueryState *state = &self->finished_states.contents[i]; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, state->capture_list_id); + + // Remove states whose captures are all consumed. + if (state->consumed_capture_count >= captures->size) + { + capture_list_pool_release(&self->capture_list_pool, + state->capture_list_id); + array_erase(&self->finished_states, i); + continue; + } + + TSNode node = + captures->contents[state->consumed_capture_count].node; + + bool node_precedes_range = + (ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point)); + bool node_follows_range = + (ts_node_start_byte(node) >= self->end_byte || + point_gte(ts_node_start_point(node), self->end_point)); + bool node_outside_of_range = + node_precedes_range || node_follows_range; + + // Skip captures that are outside of the cursor's range. + if (node_outside_of_range) + { + state->consumed_capture_count++; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if (node_start_byte < first_finished_capture_byte || + (node_start_byte == first_finished_capture_byte && + state->pattern_index < first_finished_pattern_index)) + { + first_finished_state = state; + first_finished_capture_byte = node_start_byte; + first_finished_pattern_index = state->pattern_index; + } + i++; + } + + // If there is finished capture that is clearly before any unfinished + // capture, then return its match, and its capture index. Internally + // record the fact that the capture has been 'consumed'. + QueryState *state; + if (first_finished_state) + { + state = first_finished_state; + } + else if (first_unfinished_state_is_definite) + { + state = &self->states.contents[first_unfinished_state_index]; + } + else + { + state = NULL; + } + + if (state) + { + if (state->id == UINT32_MAX) + state->id = self->next_state_id++; + match->id = state->id; + match->pattern_index = state->pattern_index; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, state->capture_list_id); + match->captures = captures->contents; + match->capture_count = captures->size; + *capture_index = state->consumed_capture_count; + state->consumed_capture_count++; + return true; + } + + if (capture_list_pool_is_empty(&self->capture_list_pool)) + { + LOG(" abandon state. index:%u, pattern:%u, offset:%u.\n", + first_unfinished_state_index, first_unfinished_pattern_index, + first_unfinished_capture_byte); + capture_list_pool_release( + &self->capture_list_pool, + self->states.contents[first_unfinished_state_index] + .capture_list_id); + array_erase(&self->states, first_unfinished_state_index); + } + + // If there are no finished matches that are ready to be returned, then + // continue finding more matches. + if (!ts_query_cursor__advance(self, true) && + self->finished_states.size == 0) + return false; + } +} + +void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, + uint32_t max_start_depth) +{ + self->max_start_depth = max_start_depth; +} + +#undef LOG + +typedef struct StackNode StackNode; + +typedef struct +{ + StackNode *node; + Subtree subtree; + bool is_pending; +} StackLink; + +struct StackNode +{ + TSStateId state; + Length position; + StackLink links[MAX_LINK_COUNT]; + short unsigned int link_count; + uint32_t ref_count; + unsigned error_cost; + unsigned node_count; + int dynamic_precedence; +}; + +typedef struct +{ + StackNode *node; + SubtreeArray subtrees; + uint32_t subtree_count; + bool is_pending; +} StackIterator; + +typedef Array(StackNode *) StackNodeArray; + +typedef enum +{ + StackStatusActive, + StackStatusPaused, + StackStatusHalted, +} StackStatus; + +typedef struct +{ + StackNode *node; + StackSummary *summary; + unsigned node_count_at_last_error; + Subtree last_external_token; + Subtree lookahead_when_paused; + StackStatus status; +} StackHead; + +struct Stack +{ + Array(StackHead) heads; + StackSliceArray slices; + Array(StackIterator) iterators; + StackNodeArray node_pool; + StackNode *base_node; + SubtreePool *subtree_pool; +}; + +typedef unsigned StackAction; +enum +{ + StackActionNone, + StackActionStop = 1, + StackActionPop = 2, +}; + +typedef StackAction (*StackCallback)(void *, const StackIterator *); + +static void stack_node_retain(StackNode *self) +{ + if (!self) + return; + assert(self->ref_count > 0); + self->ref_count++; + assert(self->ref_count != 0); +} + +static void stack_node_release(StackNode *self, StackNodeArray *pool, + SubtreePool *subtree_pool) +{ +recur: + assert(self->ref_count != 0); + self->ref_count--; + if (self->ref_count > 0) + return; + + StackNode *first_predecessor = NULL; + if (self->link_count > 0) + { + for (unsigned i = self->link_count - 1; i > 0; i--) + { + StackLink link = self->links[i]; + if (link.subtree.ptr) + ts_subtree_release(subtree_pool, link.subtree); + stack_node_release(link.node, pool, subtree_pool); + } + StackLink link = self->links[0]; + if (link.subtree.ptr) + ts_subtree_release(subtree_pool, link.subtree); + first_predecessor = self->links[0].node; + } + + if (pool->size < MAX_NODE_POOL_SIZE) + { + array_push(pool, self); + } + else + { + free(self); + } + + if (first_predecessor) + { + self = first_predecessor; + goto recur; + } +} + +/// Get the number of nodes in the subtree, for the purpose of measuring +/// how much progress has been made by a given version of the stack. +static uint32_t stack__subtree_node_count(Subtree subtree) +{ + uint32_t count = ts_subtree_visible_descendant_count(subtree); + if (ts_subtree_visible(subtree)) + count++; + + // Count intermediate error nodes even though they are not visible, + // because a stack version's node count is used to check whether it + // has made any progress since the last time it encountered an error. + if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) + count++; + + return count; +} + +static StackNode *stack_node_new(StackNode *previous_node, Subtree subtree, + bool is_pending, TSStateId state, + StackNodeArray *pool) +{ + StackNode *node = + pool->size > 0 ? array_pop(pool) : malloc(sizeof(StackNode)); + *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; + + if (previous_node) + { + node->link_count = 1; + node->links[0] = (StackLink){ + .node = previous_node, + .subtree = subtree, + .is_pending = is_pending, + }; + + node->position = previous_node->position; + node->error_cost = previous_node->error_cost; + node->dynamic_precedence = previous_node->dynamic_precedence; + node->node_count = previous_node->node_count; + + if (subtree.ptr) + { + node->error_cost += ts_subtree_error_cost(subtree); + node->position = + length_add(node->position, ts_subtree_total_size(subtree)); + node->node_count += stack__subtree_node_count(subtree); + node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); + } + } + else + { + node->position = length_zero(); + node->error_cost = 0; + } + + return node; +} + +static bool stack__subtree_is_equivalent(Subtree left, Subtree right) +{ + if (left.ptr == right.ptr) + return true; + if (!left.ptr || !right.ptr) + return false; + + // Symbols must match + if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) + return false; + + // If both have errors, don't bother keeping both. + if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) + return true; + + return (ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && + ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && + ts_subtree_child_count(left) == ts_subtree_child_count(right) && + ts_subtree_extra(left) == ts_subtree_extra(right) && + ts_subtree_external_scanner_state_eq(left, right)); +} + +static void stack_node_add_link(StackNode *self, StackLink link, + SubtreePool *subtree_pool) +{ + if (link.node == self) + return; + + for (int i = 0; i < self->link_count; i++) + { + StackLink *existing_link = &self->links[i]; + if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) + { + // In general, we preserve ambiguities until they are removed from + // the stack during a pop operation where multiple paths lead to the + // same node. But in the special case where two links directly + // connect the same pair of nodes, we can safely remove the + // ambiguity ahead of time without changing behavior. + if (existing_link->node == link.node) + { + if (ts_subtree_dynamic_precedence(link.subtree) > + ts_subtree_dynamic_precedence(existing_link->subtree)) + { + ts_subtree_retain(link.subtree); + ts_subtree_release(subtree_pool, existing_link->subtree); + existing_link->subtree = link.subtree; + self->dynamic_precedence = + link.node->dynamic_precedence + + ts_subtree_dynamic_precedence(link.subtree); + } + return; + } + + // If the previous nodes are mergeable, merge them recursively. + if (existing_link->node->state == link.node->state && + existing_link->node->position.bytes == + link.node->position.bytes && + existing_link->node->error_cost == link.node->error_cost) + { + for (int j = 0; j < link.node->link_count; j++) + { + stack_node_add_link(existing_link->node, + link.node->links[j], subtree_pool); + } + int32_t dynamic_precedence = link.node->dynamic_precedence; + if (link.subtree.ptr) + { + dynamic_precedence += + ts_subtree_dynamic_precedence(link.subtree); + } + if (dynamic_precedence > self->dynamic_precedence) + { + self->dynamic_precedence = dynamic_precedence; + } + return; + } + } + } + + if (self->link_count == MAX_LINK_COUNT) + return; + + stack_node_retain(link.node); + unsigned node_count = link.node->node_count; + int dynamic_precedence = link.node->dynamic_precedence; + self->links[self->link_count++] = link; + + if (link.subtree.ptr) + { + ts_subtree_retain(link.subtree); + node_count += stack__subtree_node_count(link.subtree); + dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); + } + + if (node_count > self->node_count) + self->node_count = node_count; + if (dynamic_precedence > self->dynamic_precedence) + self->dynamic_precedence = dynamic_precedence; +} + +static void stack_head_delete(StackHead *self, StackNodeArray *pool, + SubtreePool *subtree_pool) +{ + if (self->node) + { + if (self->last_external_token.ptr) + { + ts_subtree_release(subtree_pool, self->last_external_token); + } + if (self->lookahead_when_paused.ptr) + { + ts_subtree_release(subtree_pool, self->lookahead_when_paused); + } + if (self->summary) + { + array_delete(self->summary); + free(self->summary); + } + stack_node_release(self->node, pool, subtree_pool); + } +} + +static StackVersion ts_stack__add_version(Stack *self, + StackVersion original_version, + StackNode *node) +{ + StackHead head = { + .node = node, + .node_count_at_last_error = + self->heads.contents[original_version].node_count_at_last_error, + .last_external_token = + self->heads.contents[original_version].last_external_token, + .status = StackStatusActive, + .lookahead_when_paused = NULL_SUBTREE, + }; + array_push(&self->heads, head); + stack_node_retain(node); + if (head.last_external_token.ptr) + ts_subtree_retain(head.last_external_token); + return (StackVersion)(self->heads.size - 1); +} + +static void ts_stack__add_slice(Stack *self, StackVersion original_version, + StackNode *node, SubtreeArray *subtrees) +{ + for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) + { + StackVersion version = self->slices.contents[i].version; + if (self->heads.contents[version].node == node) + { + StackSlice slice = {*subtrees, version}; + array_insert(&self->slices, i + 1, slice); + return; + } + } + + StackVersion version = ts_stack__add_version(self, original_version, node); + StackSlice slice = {*subtrees, version}; + array_push(&self->slices, slice); +} + +static StackSliceArray stack__iter(Stack *self, StackVersion version, + StackCallback callback, void *payload, + int goal_subtree_count) +{ + array_clear(&self->slices); + array_clear(&self->iterators); + + StackHead *head = array_get(&self->heads, version); + StackIterator new_iterator = { + .node = head->node, + .subtrees = array_new(), + .subtree_count = 0, + .is_pending = true, + }; + + bool include_subtrees = false; + if (goal_subtree_count >= 0) + { + include_subtrees = true; + array_reserve(&new_iterator.subtrees, + (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / + sizeof(Subtree)); + } + + array_push(&self->iterators, new_iterator); + + while (self->iterators.size > 0) + { + for (uint32_t i = 0, size = self->iterators.size; i < size; i++) + { + StackIterator *iterator = &self->iterators.contents[i]; + StackNode *node = iterator->node; + + StackAction action = callback(payload, iterator); + bool should_pop = action & StackActionPop; + bool should_stop = + action & StackActionStop || node->link_count == 0; + + if (should_pop) + { + SubtreeArray subtrees = iterator->subtrees; + if (!should_stop) + { + ts_subtree_array_copy(subtrees, &subtrees); + } + ts_subtree_array_reverse(&subtrees); + ts_stack__add_slice(self, version, node, &subtrees); + } + + if (should_stop) + { + if (!should_pop) + { + ts_subtree_array_delete(self->subtree_pool, + &iterator->subtrees); + } + array_erase(&self->iterators, i); + i--, size--; + continue; + } + + for (uint32_t j = 1; j <= node->link_count; j++) + { + StackIterator *next_iterator; + StackLink link; + if (j == node->link_count) + { + link = node->links[0]; + next_iterator = &self->iterators.contents[i]; + } + else + { + if (self->iterators.size >= MAX_ITERATOR_COUNT) + continue; + link = node->links[j]; + StackIterator current_iterator = + self->iterators.contents[i]; + array_push(&self->iterators, current_iterator); + next_iterator = array_back(&self->iterators); + ts_subtree_array_copy(next_iterator->subtrees, + &next_iterator->subtrees); + } + + next_iterator->node = link.node; + if (link.subtree.ptr) + { + if (include_subtrees) + { + array_push(&next_iterator->subtrees, link.subtree); + ts_subtree_retain(link.subtree); + } + + if (!ts_subtree_extra(link.subtree)) + { + next_iterator->subtree_count++; + if (!link.is_pending) + { + next_iterator->is_pending = false; + } + } + } + else + { + next_iterator->subtree_count++; + next_iterator->is_pending = false; + } + } + } + } + + return self->slices; +} + +Stack *ts_stack_new(SubtreePool *subtree_pool) +{ + Stack *self = calloc(1, sizeof(Stack)); + + array_init(&self->heads); + array_init(&self->slices); + array_init(&self->iterators); + array_init(&self->node_pool); + array_reserve(&self->heads, 4); + array_reserve(&self->slices, 4); + array_reserve(&self->iterators, 4); + array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); + + self->subtree_pool = subtree_pool; + self->base_node = + stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); + ts_stack_clear(self); + + return self; +} + +void ts_stack_delete(Stack *self) +{ + if (self->slices.contents) + array_delete(&self->slices); + if (self->iterators.contents) + array_delete(&self->iterators); + stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); + for (uint32_t i = 0; i < self->heads.size; i++) + { + stack_head_delete(&self->heads.contents[i], &self->node_pool, + self->subtree_pool); + } + array_clear(&self->heads); + if (self->node_pool.contents) + { + for (uint32_t i = 0; i < self->node_pool.size; i++) + free(self->node_pool.contents[i]); + array_delete(&self->node_pool); + } + array_delete(&self->heads); + free(self); +} + +uint32_t ts_stack_version_count(const Stack *self) +{ + return self->heads.size; +} + +TSStateId ts_stack_state(const Stack *self, StackVersion version) +{ + return array_get(&self->heads, version)->node->state; +} + +Length ts_stack_position(const Stack *self, StackVersion version) +{ + return array_get(&self->heads, version)->node->position; +} + +Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) +{ + return array_get(&self->heads, version)->last_external_token; +} + +void ts_stack_set_last_external_token(Stack *self, StackVersion version, + Subtree token) +{ + StackHead *head = array_get(&self->heads, version); + if (token.ptr) + ts_subtree_retain(token); + if (head->last_external_token.ptr) + ts_subtree_release(self->subtree_pool, head->last_external_token); + head->last_external_token = token; +} + +unsigned ts_stack_error_cost(const Stack *self, StackVersion version) +{ + StackHead *head = array_get(&self->heads, version); + unsigned result = head->node->error_cost; + if (head->status == StackStatusPaused || + (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) + { + result += ERROR_COST_PER_RECOVERY; + } + return result; +} + +unsigned ts_stack_node_count_since_error(const Stack *self, + StackVersion version) +{ + StackHead *head = array_get(&self->heads, version); + if (head->node->node_count < head->node_count_at_last_error) + { + head->node_count_at_last_error = head->node->node_count; + } + return head->node->node_count - head->node_count_at_last_error; +} + +void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, + bool pending, TSStateId state) +{ + StackHead *head = array_get(&self->heads, version); + StackNode *new_node = + stack_node_new(head->node, subtree, pending, state, &self->node_pool); + if (!subtree.ptr) + head->node_count_at_last_error = new_node->node_count; + head->node = new_node; +} + +static inline StackAction pop_count_callback(void *payload, + const StackIterator *iterator) +{ + unsigned *goal_subtree_count = payload; + if (iterator->subtree_count == *goal_subtree_count) + { + return StackActionPop | StackActionStop; + } + else + { + return StackActionNone; + } +} + +StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, + uint32_t count) +{ + return stack__iter(self, version, pop_count_callback, &count, (int)count); +} + +static inline StackAction pop_pending_callback(void *payload, + const StackIterator *iterator) +{ + (void)payload; + if (iterator->subtree_count >= 1) + { + if (iterator->is_pending) + { + return StackActionPop | StackActionStop; + } + else + { + return StackActionStop; + } + } + else + { + return StackActionNone; + } +} + +StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) +{ + StackSliceArray pop = + stack__iter(self, version, pop_pending_callback, NULL, 0); + if (pop.size > 0) + { + ts_stack_renumber_version(self, pop.contents[0].version, version); + pop.contents[0].version = version; + } + return pop; +} + +static inline StackAction pop_error_callback(void *payload, + const StackIterator *iterator) +{ + if (iterator->subtrees.size > 0) + { + bool *found_error = payload; + if (!*found_error && + ts_subtree_is_error(iterator->subtrees.contents[0])) + { + *found_error = true; + return StackActionPop | StackActionStop; + } + else + { + return StackActionStop; + } + } + else + { + return StackActionNone; + } +} + +SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) +{ + StackNode *node = array_get(&self->heads, version)->node; + for (unsigned i = 0; i < node->link_count; i++) + { + if (node->links[i].subtree.ptr && + ts_subtree_is_error(node->links[i].subtree)) + { + bool found_error = false; + StackSliceArray pop = + stack__iter(self, version, pop_error_callback, &found_error, 1); + if (pop.size > 0) + { + assert(pop.size == 1); + ts_stack_renumber_version(self, pop.contents[0].version, + version); + return pop.contents[0].subtrees; + } + break; + } + } + return (SubtreeArray){.size = 0}; +} + +static inline StackAction pop_all_callback(void *payload, + const StackIterator *iterator) +{ + (void)payload; + return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; +} + +StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) +{ + return stack__iter(self, version, pop_all_callback, NULL, 0); +} + +typedef struct +{ + StackSummary *summary; + unsigned max_depth; +} SummarizeStackSession; + +static inline StackAction summarize_stack_callback( + void *payload, const StackIterator *iterator) +{ + SummarizeStackSession *session = payload; + TSStateId state = iterator->node->state; + unsigned depth = iterator->subtree_count; + if (depth > session->max_depth) + return StackActionStop; + for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) + { + StackSummaryEntry entry = session->summary->contents[i]; + if (entry.depth < depth) + break; + if (entry.depth == depth && entry.state == state) + return StackActionNone; + } + array_push(session->summary, ((StackSummaryEntry){ + .position = iterator->node->position, + .depth = depth, + .state = state, + })); + return StackActionNone; +} + +void ts_stack_record_summary(Stack *self, StackVersion version, + unsigned max_depth) +{ + SummarizeStackSession session = {.summary = malloc(sizeof(StackSummary)), + .max_depth = max_depth}; + array_init(session.summary); + stack__iter(self, version, summarize_stack_callback, &session, -1); + StackHead *head = &self->heads.contents[version]; + if (head->summary) + { + array_delete(head->summary); + free(head->summary); + } + head->summary = session.summary; +} + +StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) +{ + return array_get(&self->heads, version)->summary; +} + +int ts_stack_dynamic_precedence(Stack *self, StackVersion version) +{ + return array_get(&self->heads, version)->node->dynamic_precedence; +} + +bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) +{ + const StackHead *head = array_get(&self->heads, version); + const StackNode *node = head->node; + if (node->error_cost == 0) + return true; + while (node) + { + if (node->link_count > 0) + { + Subtree subtree = node->links[0].subtree; + if (subtree.ptr) + { + if (ts_subtree_total_bytes(subtree) > 0) + { + return true; + } + else if (node->node_count > head->node_count_at_last_error && + ts_subtree_error_cost(subtree) == 0) + { + node = node->links[0].node; + continue; + } + } + } + break; + } + return false; +} + +void ts_stack_remove_version(Stack *self, StackVersion version) +{ + stack_head_delete(array_get(&self->heads, version), &self->node_pool, + self->subtree_pool); + array_erase(&self->heads, version); +} + +void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) +{ + if (v1 == v2) + return; + assert(v2 < v1); + assert((uint32_t)v1 < self->heads.size); + StackHead *source_head = &self->heads.contents[v1]; + StackHead *target_head = &self->heads.contents[v2]; + if (target_head->summary && !source_head->summary) + { + source_head->summary = target_head->summary; + target_head->summary = NULL; + } + stack_head_delete(target_head, &self->node_pool, self->subtree_pool); + *target_head = *source_head; + array_erase(&self->heads, v1); +} + +void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) +{ + StackHead temporary_head = self->heads.contents[v1]; + self->heads.contents[v1] = self->heads.contents[v2]; + self->heads.contents[v2] = temporary_head; +} + +StackVersion ts_stack_copy_version(Stack *self, StackVersion version) +{ + assert(version < self->heads.size); + array_push(&self->heads, self->heads.contents[version]); + StackHead *head = array_back(&self->heads); + stack_node_retain(head->node); + if (head->last_external_token.ptr) + ts_subtree_retain(head->last_external_token); + head->summary = NULL; + return self->heads.size - 1; +} + +bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) +{ + if (!ts_stack_can_merge(self, version1, version2)) + return false; + StackHead *head1 = &self->heads.contents[version1]; + StackHead *head2 = &self->heads.contents[version2]; + for (uint32_t i = 0; i < head2->node->link_count; i++) + { + stack_node_add_link(head1->node, head2->node->links[i], + self->subtree_pool); + } + if (head1->node->state == ERROR_STATE) + { + head1->node_count_at_last_error = head1->node->node_count; + } + ts_stack_remove_version(self, version2); + return true; +} + +bool ts_stack_can_merge(Stack *self, StackVersion version1, + StackVersion version2) +{ + StackHead *head1 = &self->heads.contents[version1]; + StackHead *head2 = &self->heads.contents[version2]; + return head1->status == StackStatusActive && + head2->status == StackStatusActive && + head1->node->state == head2->node->state && + head1->node->position.bytes == head2->node->position.bytes && + head1->node->error_cost == head2->node->error_cost && + ts_subtree_external_scanner_state_eq(head1->last_external_token, + head2->last_external_token); +} + +void ts_stack_halt(Stack *self, StackVersion version) +{ + array_get(&self->heads, version)->status = StackStatusHalted; +} + +void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) +{ + StackHead *head = array_get(&self->heads, version); + head->status = StackStatusPaused; + head->lookahead_when_paused = lookahead; + head->node_count_at_last_error = head->node->node_count; +} + +bool ts_stack_is_active(const Stack *self, StackVersion version) +{ + return array_get(&self->heads, version)->status == StackStatusActive; +} + +bool ts_stack_is_halted(const Stack *self, StackVersion version) +{ + return array_get(&self->heads, version)->status == StackStatusHalted; +} + +bool ts_stack_is_paused(const Stack *self, StackVersion version) +{ + return array_get(&self->heads, version)->status == StackStatusPaused; +} + +Subtree ts_stack_resume(Stack *self, StackVersion version) +{ + StackHead *head = array_get(&self->heads, version); + assert(head->status == StackStatusPaused); + Subtree result = head->lookahead_when_paused; + head->status = StackStatusActive; + head->lookahead_when_paused = NULL_SUBTREE; + return result; +} + +void ts_stack_clear(Stack *self) +{ + stack_node_retain(self->base_node); + for (uint32_t i = 0; i < self->heads.size; i++) + { + stack_head_delete(&self->heads.contents[i], &self->node_pool, + self->subtree_pool); + } + array_clear(&self->heads); + array_push(&self->heads, ((StackHead){ + .node = self->base_node, + .status = StackStatusActive, + .last_external_token = NULL_SUBTREE, + .lookahead_when_paused = NULL_SUBTREE, + })); +} + +bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, void *f) +{ + (void)(self); + (void)(language); + (void)(f); + return (false); +} + +typedef struct +{ + Length start; + Length old_end; + Length new_end; +} Edit; + +// ExternalScannerState + +void ts_external_scanner_state_init(ExternalScannerState *self, + const char *data, unsigned length) +{ + self->length = length; + if (length > sizeof(self->short_data)) + { + self->long_data = malloc(length); + memcpy(self->long_data, data, length); + } + else + { + memcpy(self->short_data, data, length); + } +} + +ExternalScannerState ts_external_scanner_state_copy( + const ExternalScannerState *self) +{ + ExternalScannerState result = *self; + if (self->length > sizeof(self->short_data)) + { + result.long_data = malloc(self->length); + memcpy(result.long_data, self->long_data, self->length); + } + return result; +} + +void ts_external_scanner_state_delete(ExternalScannerState *self) +{ + if (self->length > sizeof(self->short_data)) + { + free(self->long_data); + } +} + +const char *ts_external_scanner_state_data(const ExternalScannerState *self) +{ + if (self->length > sizeof(self->short_data)) + { + return self->long_data; + } + else + { + return self->short_data; + } +} + +bool ts_external_scanner_state_eq(const ExternalScannerState *self, + const char *buffer, unsigned length) +{ + return self->length == length && + memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; +} + +// SubtreeArray + +void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) +{ + dest->size = self.size; + dest->capacity = self.capacity; + dest->contents = self.contents; + if (self.capacity > 0) + { + dest->contents = calloc(self.capacity, sizeof(Subtree)); + memcpy(dest->contents, self.contents, self.size * sizeof(Subtree)); + for (uint32_t i = 0; i < self.size; i++) + { + ts_subtree_retain(dest->contents[i]); + } + } +} + +void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) +{ + for (uint32_t i = 0; i < self->size; i++) + { + ts_subtree_release(pool, self->contents[i]); + } + array_clear(self); +} + +void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) +{ + ts_subtree_array_clear(pool, self); + array_delete(self); +} + +void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, + SubtreeArray *destination) +{ + array_clear(destination); + while (self->size > 0) + { + Subtree last = self->contents[self->size - 1]; + if (ts_subtree_extra(last)) + { + self->size--; + array_push(destination, last); + } + else + { + break; + } + } + ts_subtree_array_reverse(destination); +} + +void ts_subtree_array_reverse(SubtreeArray *self) +{ + for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) + { + size_t reverse_index = self->size - 1 - i; + Subtree swap = self->contents[i]; + self->contents[i] = self->contents[reverse_index]; + self->contents[reverse_index] = swap; + } +} + +// SubtreePool + +SubtreePool ts_subtree_pool_new(uint32_t capacity) +{ + SubtreePool self = {array_new(), array_new()}; + array_reserve(&self.free_trees, capacity); + return self; +} + +void ts_subtree_pool_delete(SubtreePool *self) +{ + if (self->free_trees.contents) + { + for (unsigned i = 0; i < self->free_trees.size; i++) + { + free(self->free_trees.contents[i].ptr); + } + array_delete(&self->free_trees); + } + if (self->tree_stack.contents) + array_delete(&self->tree_stack); +} + +static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) +{ + if (self->free_trees.size > 0) + { + return array_pop(&self->free_trees).ptr; + } + else + { + return malloc(sizeof(SubtreeHeapData)); + } +} + +static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) +{ + if (self->free_trees.capacity > 0 && + self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) + { + array_push(&self->free_trees, (MutableSubtree){.ptr = tree}); + } + else + { + free(tree); + } +} + +// Subtree + +static inline bool ts_subtree_can_inline(Length padding, Length size, + uint32_t lookahead_bytes) +{ + return padding.bytes < TS_MAX_INLINE_TREE_LENGTH && + padding.extent.row < 16 && + padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && + size.extent.row == 0 && + size.extent.column < TS_MAX_INLINE_TREE_LENGTH && + lookahead_bytes < 16; +} + +Subtree ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, + Length size, uint32_t lookahead_bytes, + TSStateId parse_state, bool has_external_tokens, + bool depends_on_column, bool is_keyword, + const TSLanguage *language) +{ + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + bool extra = symbol == ts_builtin_sym_end; + + bool is_inline = (symbol <= UINT8_MAX && !has_external_tokens && + ts_subtree_can_inline(padding, size, lookahead_bytes)); + + if (is_inline) + { + return (Subtree){{ + .parse_state = parse_state, + .symbol = symbol, + .padding_bytes = padding.bytes, + .padding_rows = padding.extent.row, + .padding_columns = padding.extent.column, + .size_bytes = size.bytes, + .lookahead_bytes = lookahead_bytes, + .visible = metadata.visible, + .named = metadata.named, + .extra = extra, + .has_changes = false, + .is_missing = false, + .is_keyword = is_keyword, + .is_inline = true, + }}; + } + else + { + SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + *data = (SubtreeHeapData){ + .ref_count = 1, + .padding = padding, + .size = size, + .lookahead_bytes = lookahead_bytes, + .error_cost = 0, + .child_count = 0, + .symbol = symbol, + .parse_state = parse_state, + .visible = metadata.visible, + .named = metadata.named, + .extra = extra, + .fragile_left = false, + .fragile_right = false, + .has_changes = false, + .has_external_tokens = has_external_tokens, + .has_external_scanner_state_change = false, + .depends_on_column = depends_on_column, + .is_missing = false, + .is_keyword = is_keyword, + {{.first_leaf = {.symbol = 0, .parse_state = 0}}}}; + return (Subtree){.ptr = data}; + } +} + +void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, + const TSLanguage *language) +{ + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + if (self->data.is_inline) + { + assert(symbol < UINT8_MAX); + self->data.symbol = symbol; + self->data.named = metadata.named; + self->data.visible = metadata.visible; + } + else + { + self->ptr->symbol = symbol; + self->ptr->named = metadata.named; + self->ptr->visible = metadata.visible; + } +} + +Subtree ts_subtree_new_error(SubtreePool *pool, int32_t lookahead_char, + Length padding, Length size, + uint32_t bytes_scanned, TSStateId parse_state, + const TSLanguage *language) +{ + Subtree result = ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, + size, bytes_scanned, parse_state, + false, false, false, language); + SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; + data->fragile_left = true; + data->fragile_right = true; + data->lookahead_char = lookahead_char; + return result; +} + +// Clone a subtree. +MutableSubtree ts_subtree_clone(Subtree self) +{ + size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); + Subtree *new_children = malloc(alloc_size); + Subtree *old_children = ts_subtree_children(self); + memcpy(new_children, old_children, alloc_size); + SubtreeHeapData *result = + (SubtreeHeapData *)&new_children[self.ptr->child_count]; + if (self.ptr->child_count > 0) + { + for (uint32_t i = 0; i < self.ptr->child_count; i++) + { + ts_subtree_retain(new_children[i]); + } + } + else if (self.ptr->has_external_tokens) + { + result->external_scanner_state = + ts_external_scanner_state_copy(&self.ptr->external_scanner_state); + } + result->ref_count = 1; + return (MutableSubtree){.ptr = result}; +} + +// Get mutable version of a subtree. +// +// This takes ownership of the subtree. If the subtree has only one owner, +// this will directly convert it into a mutable version. Otherwise, it will +// perform a copy. +MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) +{ + if (self.data.is_inline) + return (MutableSubtree){self.data}; + if (self.ptr->ref_count == 1) + return ts_subtree_to_mut_unsafe(self); + MutableSubtree result = ts_subtree_clone(self); + ts_subtree_release(pool, self); + return result; +} + +static void ts_subtree__compress(MutableSubtree self, unsigned count, + const TSLanguage *language, + MutableSubtreeArray *stack) +{ + unsigned initial_stack_size = stack->size; + + MutableSubtree tree = self; + TSSymbol symbol = tree.ptr->symbol; + for (unsigned i = 0; i < count; i++) + { + if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) + break; + + MutableSubtree child = + ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); + if (child.data.is_inline || child.ptr->child_count < 2 || + child.ptr->ref_count > 1 || child.ptr->symbol != symbol) + break; + + MutableSubtree grandchild = + ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); + if (grandchild.data.is_inline || grandchild.ptr->child_count < 2 || + grandchild.ptr->ref_count > 1 || grandchild.ptr->symbol != symbol) + break; + + ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); + ts_subtree_children(child)[0] = + ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; + ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = + ts_subtree_from_mut(child); + array_push(stack, tree); + tree = grandchild; + } + + while (stack->size > initial_stack_size) + { + tree = array_pop(stack); + MutableSubtree child = + ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); + MutableSubtree grandchild = ts_subtree_to_mut_unsafe( + ts_subtree_children(child)[child.ptr->child_count - 1]); + ts_subtree_summarize_children(grandchild, language); + ts_subtree_summarize_children(child, language); + ts_subtree_summarize_children(tree, language); + } +} + +void ts_subtree_balance(Subtree self, SubtreePool *pool, + const TSLanguage *language) +{ + array_clear(&pool->tree_stack); + + if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) + { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); + } + + while (pool->tree_stack.size > 0) + { + MutableSubtree tree = array_pop(&pool->tree_stack); + + if (tree.ptr->repeat_depth > 0) + { + Subtree child1 = ts_subtree_children(tree)[0]; + Subtree child2 = + ts_subtree_children(tree)[tree.ptr->child_count - 1]; + long repeat_delta = (long)ts_subtree_repeat_depth(child1) - + (long)ts_subtree_repeat_depth(child2); + if (repeat_delta > 0) + { + unsigned n = (unsigned)repeat_delta; + for (unsigned i = n / 2; i > 0; i /= 2) + { + ts_subtree__compress(tree, i, language, &pool->tree_stack); + n -= i; + } + } + } + + for (uint32_t i = 0; i < tree.ptr->child_count; i++) + { + Subtree child = ts_subtree_children(tree)[i]; + if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) + { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); + } + } + } +} + +// Assign all of the node's properties that depend on its children. +void ts_subtree_summarize_children(MutableSubtree self, + const TSLanguage *language) +{ + assert(!self.data.is_inline); + + self.ptr->named_child_count = 0; + self.ptr->visible_child_count = 0; + self.ptr->error_cost = 0; + self.ptr->repeat_depth = 0; + self.ptr->visible_descendant_count = 0; + self.ptr->has_external_tokens = false; + self.ptr->depends_on_column = false; + self.ptr->has_external_scanner_state_change = false; + self.ptr->dynamic_precedence = 0; + + uint32_t structural_index = 0; + const TSSymbol *alias_sequence = + ts_language_alias_sequence(language, self.ptr->production_id); + uint32_t lookahead_end_byte = 0; + + const Subtree *children = ts_subtree_children(self); + for (uint32_t i = 0; i < self.ptr->child_count; i++) + { + Subtree child = children[i]; + + if (self.ptr->size.extent.row == 0 && + ts_subtree_depends_on_column(child)) + { + self.ptr->depends_on_column = true; + } + + if (ts_subtree_has_external_scanner_state_change(child)) + { + self.ptr->has_external_scanner_state_change = true; + } + + if (i == 0) + { + self.ptr->padding = ts_subtree_padding(child); + self.ptr->size = ts_subtree_size(child); + } + else + { + self.ptr->size = + length_add(self.ptr->size, ts_subtree_total_size(child)); + } + + uint32_t child_lookahead_end_byte = self.ptr->padding.bytes + + self.ptr->size.bytes + + ts_subtree_lookahead_bytes(child); + if (child_lookahead_end_byte > lookahead_end_byte) + { + lookahead_end_byte = child_lookahead_end_byte; + } + + if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) + { + self.ptr->error_cost += ts_subtree_error_cost(child); + } + + uint32_t grandchild_count = ts_subtree_child_count(child); + if (self.ptr->symbol == ts_builtin_sym_error || + self.ptr->symbol == ts_builtin_sym_error_repeat) + { + if (!ts_subtree_extra(child) && + !(ts_subtree_is_error(child) && grandchild_count == 0)) + { + if (ts_subtree_visible(child)) + { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; + } + else if (grandchild_count > 0) + { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * + child.ptr->visible_child_count; + } + } + } + + self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); + self.ptr->visible_descendant_count += + ts_subtree_visible_descendant_count(child); + + if (alias_sequence && alias_sequence[structural_index] != 0 && + !ts_subtree_extra(child)) + { + self.ptr->visible_descendant_count++; + self.ptr->visible_child_count++; + if (ts_language_symbol_metadata(language, + alias_sequence[structural_index]) + .named) + { + self.ptr->named_child_count++; + } + } + else if (ts_subtree_visible(child)) + { + self.ptr->visible_descendant_count++; + self.ptr->visible_child_count++; + if (ts_subtree_named(child)) + self.ptr->named_child_count++; + } + else if (grandchild_count > 0) + { + self.ptr->visible_child_count += child.ptr->visible_child_count; + self.ptr->named_child_count += child.ptr->named_child_count; + } + + if (ts_subtree_has_external_tokens(child)) + self.ptr->has_external_tokens = true; + + if (ts_subtree_is_error(child)) + { + self.ptr->fragile_left = self.ptr->fragile_right = true; + self.ptr->parse_state = TS_TREE_STATE_NONE; + } + + if (!ts_subtree_extra(child)) + structural_index++; + } + + self.ptr->lookahead_bytes = + lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; + + if (self.ptr->symbol == ts_builtin_sym_error || + self.ptr->symbol == ts_builtin_sym_error_repeat) + { + self.ptr->error_cost += + ERROR_COST_PER_RECOVERY + + ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + + ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; + } + + if (self.ptr->child_count > 0) + { + Subtree first_child = children[0]; + Subtree last_child = children[self.ptr->child_count - 1]; + + self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); + self.ptr->first_leaf.parse_state = + ts_subtree_leaf_parse_state(first_child); + + if (ts_subtree_fragile_left(first_child)) + self.ptr->fragile_left = true; + if (ts_subtree_fragile_right(last_child)) + self.ptr->fragile_right = true; + + if (self.ptr->child_count >= 2 && !self.ptr->visible && + !self.ptr->named && + ts_subtree_symbol(first_child) == self.ptr->symbol) + { + if (ts_subtree_repeat_depth(first_child) > + ts_subtree_repeat_depth(last_child)) + { + self.ptr->repeat_depth = + ts_subtree_repeat_depth(first_child) + 1; + } + else + { + self.ptr->repeat_depth = + ts_subtree_repeat_depth(last_child) + 1; + } + } + } +} + +// Create a new parent node with the given children. +// +// This takes ownership of the children array. +MutableSubtree ts_subtree_new_node(TSSymbol symbol, SubtreeArray *children, + unsigned production_id, + const TSLanguage *language) +{ + TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); + bool fragile = + symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; + + // Allocate the node's data at the end of the array of children. + size_t new_byte_size = ts_subtree_alloc_size(children->size); + if (children->capacity * sizeof(Subtree) < new_byte_size) + { + children->contents = realloc(children->contents, new_byte_size); + children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); + } + SubtreeHeapData *data = + (SubtreeHeapData *)&children->contents[children->size]; + + *data = (SubtreeHeapData){.ref_count = 1, + .symbol = symbol, + .child_count = children->size, + .visible = metadata.visible, + .named = metadata.named, + .has_changes = false, + .has_external_scanner_state_change = false, + .fragile_left = fragile, + .fragile_right = fragile, + .is_keyword = false, + {{ + .visible_descendant_count = 0, + .production_id = production_id, + .first_leaf = {.symbol = 0, .parse_state = 0}, + }}}; + MutableSubtree result = {.ptr = data}; + ts_subtree_summarize_children(result, language); + return result; +} + +// Create a new error node containing the given children. +// +// This node is treated as 'extra'. Its children are prevented from having +// having any effect on the parse state. +Subtree ts_subtree_new_error_node(SubtreeArray *children, bool extra, + const TSLanguage *language) +{ + MutableSubtree result = + ts_subtree_new_node(ts_builtin_sym_error, children, 0, language); + result.ptr->extra = extra; + return ts_subtree_from_mut(result); +} + +// Create a new 'missing leaf' node. +// +// This node is treated as 'extra'. Its children are prevented from having +// having any effect on the parse state. +Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, + Length padding, uint32_t lookahead_bytes, + const TSLanguage *language) +{ + Subtree result = + ts_subtree_new_leaf(pool, symbol, padding, length_zero(), + lookahead_bytes, 0, false, false, false, language); + if (result.data.is_inline) + { + result.data.is_missing = true; + } + else + { + ((SubtreeHeapData *)result.ptr)->is_missing = true; + } + return result; +} + +void ts_subtree_retain(Subtree self) +{ + if (self.data.is_inline) + return; + assert(self.ptr->ref_count > 0); + atomic_inc((volatile uint32_t *)&self.ptr->ref_count); + assert(self.ptr->ref_count != 0); +} + +void ts_subtree_release(SubtreePool *pool, Subtree self) +{ + if (self.data.is_inline) + return; + array_clear(&pool->tree_stack); + + assert(self.ptr->ref_count > 0); + if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) + { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); + } + + while (pool->tree_stack.size > 0) + { + MutableSubtree tree = array_pop(&pool->tree_stack); + if (tree.ptr->child_count > 0) + { + Subtree *children = ts_subtree_children(tree); + for (uint32_t i = 0; i < tree.ptr->child_count; i++) + { + Subtree child = children[i]; + if (child.data.is_inline) + continue; + assert(child.ptr->ref_count > 0); + if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) + { + array_push(&pool->tree_stack, + ts_subtree_to_mut_unsafe(child)); + } + } + free(children); + } + else + { + if (tree.ptr->has_external_tokens) + { + ts_external_scanner_state_delete( + &tree.ptr->external_scanner_state); + } + ts_subtree_pool_free(pool, tree.ptr); + } + } +} + +int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) +{ + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left)); + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right)); + + while (pool->tree_stack.size > 0) + { + right = ts_subtree_from_mut(array_pop(&pool->tree_stack)); + left = ts_subtree_from_mut(array_pop(&pool->tree_stack)); + + int result = 0; + if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) + result = -1; + else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) + result = 1; + else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) + result = -1; + else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) + result = 1; + if (result != 0) + { + array_clear(&pool->tree_stack); + return result; + } + + for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) + { + Subtree left_child = ts_subtree_children(left)[i - 1]; + Subtree right_child = ts_subtree_children(right)[i - 1]; + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child)); + array_push(&pool->tree_stack, + ts_subtree_to_mut_unsafe(right_child)); + } + } + + return 0; +} + +static inline void ts_subtree_set_has_changes(MutableSubtree *self) +{ + if (self->data.is_inline) + { + self->data.has_changes = true; + } + else + { + self->ptr->has_changes = true; + } +} + +Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, + SubtreePool *pool) +{ + typedef struct + { + Subtree *tree; + Edit edit; + } EditEntry; + + Array(EditEntry) stack = array_new(); + array_push( + &stack, + ((EditEntry){ + .tree = &self, + .edit = + (Edit){ + .start = {input_edit->start_byte, input_edit->start_point}, + .old_end = {input_edit->old_end_byte, + input_edit->old_end_point}, + .new_end = {input_edit->new_end_byte, + input_edit->new_end_point}, + }, + })); + + while (stack.size) + { + EditEntry entry = array_pop(&stack); + Edit edit = entry.edit; + bool is_noop = edit.old_end.bytes == edit.start.bytes && + edit.new_end.bytes == edit.start.bytes; + bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; + bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); + + Length size = ts_subtree_size(*entry.tree); + Length padding = ts_subtree_padding(*entry.tree); + Length total_size = length_add(padding, size); + uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); + uint32_t end_byte = total_size.bytes + lookahead_bytes; + if (edit.start.bytes > end_byte || + (is_noop && edit.start.bytes == end_byte)) + continue; + + // If the edit is entirely within the space before this subtree, then + // shift this subtree over according to the edit without changing its + // size. + if (edit.old_end.bytes <= padding.bytes) + { + padding = + length_add(edit.new_end, length_sub(padding, edit.old_end)); + } + + // If the edit starts in the space before this subtree and extends into + // this subtree, shrink the subtree's content to compensate for the + // change in the space before it. + else if (edit.start.bytes < padding.bytes) + { + size = + length_saturating_sub(size, length_sub(edit.old_end, padding)); + padding = edit.new_end; + } + + // If the edit is a pure insertion right at the start of the subtree, + // shift the subtree over according to the insertion. + else if (edit.start.bytes == padding.bytes && is_pure_insertion) + { + padding = edit.new_end; + } + + // If the edit is within this subtree, resize the subtree to reflect the + // edit. + else if (edit.start.bytes < total_size.bytes || + (edit.start.bytes == total_size.bytes && is_pure_insertion)) + { + size = length_add(length_sub(edit.new_end, padding), + length_saturating_sub(total_size, edit.old_end)); + } + + MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); + + if (result.data.is_inline) + { + if (ts_subtree_can_inline(padding, size, lookahead_bytes)) + { + result.data.padding_bytes = padding.bytes; + result.data.padding_rows = padding.extent.row; + result.data.padding_columns = padding.extent.column; + result.data.size_bytes = size.bytes; + } + else + { + SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + data->ref_count = 1; + data->padding = padding; + data->size = size; + data->lookahead_bytes = lookahead_bytes; + data->error_cost = 0; + data->child_count = 0; + data->symbol = result.data.symbol; + data->parse_state = result.data.parse_state; + data->visible = result.data.visible; + data->named = result.data.named; + data->extra = result.data.extra; + data->fragile_left = false; + data->fragile_right = false; + data->has_changes = false; + data->has_external_tokens = false; + data->depends_on_column = false; + data->is_missing = result.data.is_missing; + data->is_keyword = result.data.is_keyword; + result.ptr = data; + } + } + else + { + result.ptr->padding = padding; + result.ptr->size = size; + } + + ts_subtree_set_has_changes(&result); + *entry.tree = ts_subtree_from_mut(result); + + Length child_left, child_right = length_zero(); + for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; + i++) + { + Subtree *child = &ts_subtree_children(*entry.tree)[i]; + Length child_size = ts_subtree_total_size(*child); + child_left = child_right; + child_right = length_add(child_left, child_size); + + // If this child ends before the edit, it is not affected. + if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < + edit.start.bytes) + continue; + + // Keep editing child nodes until a node is reached that starts + // after the edit. Also, if this node's validity depends on its + // column position, then continue invaliditing child nodes until + // reaching a line break. + if (((child_left.bytes > edit.old_end.bytes) || + (child_left.bytes == edit.old_end.bytes && + child_size.bytes > 0 && i > 0)) && + (!invalidate_first_row || + child_left.extent.row > entry.tree->ptr->padding.extent.row)) + { + break; + } + + // Transform edit into the child's coordinate space. + Edit child_edit = { + .start = length_saturating_sub(edit.start, child_left), + .old_end = length_saturating_sub(edit.old_end, child_left), + .new_end = length_saturating_sub(edit.new_end, child_left), + }; + + // Interpret all inserted text as applying to the *first* child that + // touches the edit. Subsequent children are only never have any + // text inserted into them; they are only shrunk to compensate for + // the edit. + if (child_right.bytes > edit.start.bytes || + (child_right.bytes == edit.start.bytes && is_pure_insertion)) + { + edit.new_end = edit.start; + } + + // Children that occur before the edit are not reshaped by the edit. + else + { + child_edit.old_end = child_edit.start; + child_edit.new_end = child_edit.start; + } + + // Queue processing of this child's subtree. + array_push(&stack, ((EditEntry){ + .tree = child, + .edit = child_edit, + })); + } + } + + array_delete(&stack); + return self; +} + +Subtree ts_subtree_last_external_token(Subtree tree) +{ + if (!ts_subtree_has_external_tokens(tree)) + return NULL_SUBTREE; + while (tree.ptr->child_count > 0) + { + for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) + { + Subtree child = ts_subtree_children(tree)[i]; + if (ts_subtree_has_external_tokens(child)) + { + tree = child; + break; + } + } + } + return tree; +} + +static const char *const ROOT_FIELD = "__ROOT__"; + +static size_t ts_subtree__write_to_string( + Subtree self, char *string, size_t limit, const TSLanguage *language, + bool include_all, TSSymbol alias_symbol, bool alias_is_named, + const char *field_name) +{ + (void)(self); + (void)(string); + (void)(limit); + (void)(language); + (void)(include_all); + (void)(alias_symbol); + (void)(alias_is_named); + (void)(field_name); + return (0); +} + +char *ts_subtree_string(Subtree self, TSSymbol alias_symbol, + bool alias_is_named, const TSLanguage *language, + bool include_all) +{ + char scratch_string[1]; + size_t size = ts_subtree__write_to_string(self, scratch_string, 1, language, + include_all, alias_symbol, + alias_is_named, ROOT_FIELD) + + 1; + char *result = malloc(size * sizeof(char)); + ts_subtree__write_to_string(self, result, size, language, include_all, + alias_symbol, alias_is_named, ROOT_FIELD); + return result; +} + +void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, + const TSLanguage *language, + TSSymbol alias_symbol, void *f) +{ + (void)(self); + (void)(start_offset); + (void)(language); + (void)(alias_symbol); + (void)(f); +} + +bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) +{ + const ExternalScannerState *state_self = + ts_subtree_external_scanner_state(self); + const ExternalScannerState *state_other = + ts_subtree_external_scanner_state(other); + return ts_external_scanner_state_eq( + state_self, ts_external_scanner_state_data(state_other), + state_other->length); +} + +TSTree *ts_tree_new(Subtree root, const TSLanguage *language, + const TSRange *included_ranges, + unsigned included_range_count) +{ + TSTree *result = malloc(sizeof(TSTree)); + result->root = root; + result->language = ts_language_copy(language); + result->included_ranges = calloc(included_range_count, sizeof(TSRange)); + memcpy(result->included_ranges, included_ranges, + included_range_count * sizeof(TSRange)); + result->included_range_count = included_range_count; + return result; +} + +TSTree *ts_tree_copy(const TSTree *self) +{ + ts_subtree_retain(self->root); + return ts_tree_new(self->root, self->language, self->included_ranges, + self->included_range_count); +} + +void ts_tree_delete(TSTree *self) +{ + if (!self) + return; + + SubtreePool pool = ts_subtree_pool_new(0); + ts_subtree_release(&pool, self->root); + ts_subtree_pool_delete(&pool); + ts_language_delete(self->language); + free(self->included_ranges); + free(self); +} + +TSNode ts_tree_root_node(const TSTree *self) +{ + return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); +} + +TSNode ts_tree_root_node_with_offset(const TSTree *self, uint32_t offset_bytes, + TSPoint offset_extent) +{ + Length offset = {offset_bytes, offset_extent}; + return ts_node_new(self, &self->root, + length_add(offset, ts_subtree_padding(self->root)), 0); +} + +const TSLanguage *ts_tree_language(const TSTree *self) +{ + return self->language; +} + +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) +{ + for (unsigned i = 0; i < self->included_range_count; i++) + { + TSRange *range = &self->included_ranges[i]; + if (range->end_byte >= edit->old_end_byte) + { + if (range->end_byte != UINT32_MAX) + { + range->end_byte = + edit->new_end_byte + (range->end_byte - edit->old_end_byte); + range->end_point = + point_add(edit->new_end_point, + point_sub(range->end_point, edit->old_end_point)); + if (range->end_byte < edit->new_end_byte) + { + range->end_byte = UINT32_MAX; + range->end_point = POINT_MAX; + } + } + } + else if (range->end_byte > edit->start_byte) + { + range->end_byte = edit->start_byte; + range->end_point = edit->start_point; + } + if (range->start_byte >= edit->old_end_byte) + { + range->start_byte = + edit->new_end_byte + (range->start_byte - edit->old_end_byte); + range->start_point = + point_add(edit->new_end_point, + point_sub(range->start_point, edit->old_end_point)); + if (range->start_byte < edit->new_end_byte) + { + range->start_byte = UINT32_MAX; + range->start_point = POINT_MAX; + } + } + else if (range->start_byte > edit->start_byte) + { + range->start_byte = edit->start_byte; + range->start_point = edit->start_point; + } + } + + SubtreePool pool = ts_subtree_pool_new(0); + self->root = ts_subtree_edit(self->root, edit, &pool); + ts_subtree_pool_delete(&pool); +} + +TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) +{ + *length = self->included_range_count; + TSRange *ranges = calloc(self->included_range_count, sizeof(TSRange)); + memcpy(ranges, self->included_ranges, + self->included_range_count * sizeof(TSRange)); + return ranges; +} + +TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, + const TSTree *new_tree, uint32_t *length) +{ + TreeCursor cursor1 = {NULL, array_new(), 0}; + TreeCursor cursor2 = {NULL, array_new(), 0}; + ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); + ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); + + TSRangeArray included_range_differences = array_new(); + ts_range_array_get_changed_ranges( + old_tree->included_ranges, old_tree->included_range_count, + new_tree->included_ranges, new_tree->included_range_count, + &included_range_differences); + + TSRange *result; + *length = ts_subtree_get_changed_ranges( + &old_tree->root, &new_tree->root, &cursor1, &cursor2, + old_tree->language, &included_range_differences, &result); + + array_delete(&included_range_differences); + array_delete(&cursor1.stack); + array_delete(&cursor2.stack); + return result; +} + +#ifdef _WIN32 + +# include +# include + +int _ts_dup(HANDLE handle) +{ + HANDLE dup_handle; + if (!DuplicateHandle(GetCurrentProcess(), handle, GetCurrentProcess(), + &dup_handle, 0, FALSE, DUPLICATE_SAME_ACCESS)) + return -1; + + return _open_osfhandle((intptr_t)dup_handle, 0); +} + +void ts_tree_print_dot_graph(const TSTree *self, int fd) +{ + FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); + ts_subtree_print_dot_graph(self->root, self->language, file); + fclose(file); +} + +#else + +# include + +int _ts_dup(int file_descriptor) +{ + return dup(file_descriptor); +} + +void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) +{ + (void)(self); + (void)(file_descriptor); +} + +#endif + +typedef struct +{ + Subtree parent; + const TSTree *tree; + Length position; + uint32_t child_index; + uint32_t structural_child_index; + uint32_t descendant_index; + const TSSymbol *alias_sequence; +} CursorChildIterator; + +// CursorChildIterator + +static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, + uint32_t index) +{ + TreeCursorEntry *entry = &self->stack.contents[index]; + if (index == 0 || ts_subtree_visible(*entry->subtree)) + { + return true; + } + else if (!ts_subtree_extra(*entry->subtree)) + { + TreeCursorEntry *parent_entry = &self->stack.contents[index - 1]; + return ts_language_alias_at(self->tree->language, + parent_entry->subtree->ptr->production_id, + entry->structural_child_index); + } + else + { + return false; + } +} + +static inline CursorChildIterator ts_tree_cursor_iterate_children( + const TreeCursor *self) +{ + TreeCursorEntry *last_entry = array_back(&self->stack); + if (ts_subtree_child_count(*last_entry->subtree) == 0) + { + return (CursorChildIterator){ + NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; + } + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, last_entry->subtree->ptr->production_id); + + uint32_t descendant_index = last_entry->descendant_index; + if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) + { + descendant_index += 1; + } + + return (CursorChildIterator){ + .tree = self->tree, + .parent = *last_entry->subtree, + .position = last_entry->position, + .child_index = 0, + .structural_child_index = 0, + .descendant_index = descendant_index, + .alias_sequence = alias_sequence, + }; +} + +static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, + TreeCursorEntry *result, + bool *visible) +{ + if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) + return false; + const Subtree *child = + &ts_subtree_children(self->parent)[self->child_index]; + *result = (TreeCursorEntry){ + .subtree = child, + .position = self->position, + .child_index = self->child_index, + .structural_child_index = self->structural_child_index, + .descendant_index = self->descendant_index, + }; + *visible = ts_subtree_visible(*child); + bool extra = ts_subtree_extra(*child); + if (!extra) + { + if (self->alias_sequence) + { + *visible |= self->alias_sequence[self->structural_child_index]; + } + self->structural_child_index++; + } + + self->descendant_index += ts_subtree_visible_descendant_count(*child); + if (*visible) + { + self->descendant_index += 1; + } + + self->position = length_add(self->position, ts_subtree_size(*child)); + self->child_index++; + + if (self->child_index < self->parent.ptr->child_count) + { + Subtree next_child = + ts_subtree_children(self->parent)[self->child_index]; + self->position = + length_add(self->position, ts_subtree_padding(next_child)); + } + + return true; +} + +// Return a position that, when `b` is added to it, yields `a`. This +// can only be computed if `b` has zero rows. Otherwise, this function +// returns `LENGTH_UNDEFINED`, and the caller needs to recompute +// the position some other way. +static inline Length length_backtrack(Length a, Length b) +{ + if (length_is_undefined(a) || b.extent.row != 0) + { + return LENGTH_UNDEFINED; + } + + Length result; + result.bytes = a.bytes - b.bytes; + result.extent.row = a.extent.row; + result.extent.column = a.extent.column - b.extent.column; + return result; +} + +static inline bool ts_tree_cursor_child_iterator_previous( + CursorChildIterator *self, TreeCursorEntry *result, bool *visible) +{ + // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into + // account unsigned underflow + if (!self->parent.ptr || (int8_t)self->child_index == -1) + return false; + const Subtree *child = + &ts_subtree_children(self->parent)[self->child_index]; + *result = (TreeCursorEntry){ + .subtree = child, + .position = self->position, + .child_index = self->child_index, + .structural_child_index = self->structural_child_index, + }; + *visible = ts_subtree_visible(*child); + bool extra = ts_subtree_extra(*child); + if (!extra && self->alias_sequence) + { + *visible |= self->alias_sequence[self->structural_child_index]; + self->structural_child_index--; + } + + self->position = + length_backtrack(self->position, ts_subtree_padding(*child)); + self->child_index--; + + // unsigned can underflow so compare it to child_count + if (self->child_index < self->parent.ptr->child_count) + { + Subtree previous_child = + ts_subtree_children(self->parent)[self->child_index]; + Length size = ts_subtree_size(previous_child); + self->position = length_backtrack(self->position, size); + } + + return true; +} + +// TSTreeCursor - lifecycle + +TSTreeCursor ts_tree_cursor_new(TSNode node) +{ + TSTreeCursor self = {NULL, NULL, {0, 0, 0}}; + ts_tree_cursor_init((TreeCursor *)&self, node); + return self; +} + +void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) +{ + ts_tree_cursor_init((TreeCursor *)_self, node); +} + +void ts_tree_cursor_init(TreeCursor *self, TSNode node) +{ + self->tree = node.tree; + self->root_alias_symbol = node.context[3]; + array_clear(&self->stack); + array_push(&self->stack, ((TreeCursorEntry){ + .subtree = (const Subtree *)node.id, + .position = {ts_node_start_byte(node), + ts_node_start_point(node)}, + .child_index = 0, + .structural_child_index = 0, + .descendant_index = 0, + })); +} + +void ts_tree_cursor_delete(TSTreeCursor *_self) +{ + TreeCursor *self = (TreeCursor *)_self; + array_delete(&self->stack); +} + +// TSTreeCursor - walking the tree + +TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) +{ + TreeCursor *self = (TreeCursor *)_self; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) + { + if (visible) + { + array_push(&self->stack, entry); + return TreeCursorStepVisible; + } + if (ts_subtree_visible_child_count(*entry.subtree) > 0) + { + array_push(&self->stack, entry); + return TreeCursorStepHidden; + } + } + return TreeCursorStepNone; +} + +bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) +{ + for (;;) + { + switch (ts_tree_cursor_goto_first_child_internal(self)) + { + case TreeCursorStepHidden: + continue; + case TreeCursorStepVisible: + return true; + default: + return false; + } + } + return false; +} + +TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) +{ + TreeCursor *self = (TreeCursor *)_self; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) + return TreeCursorStepNone; + + TreeCursorEntry last_entry = {0}; + TreeCursorStep last_step = TreeCursorStepNone; + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) + { + if (visible) + { + last_entry = entry; + last_step = TreeCursorStepVisible; + } + else if (ts_subtree_visible_child_count(*entry.subtree) > 0) + { + last_entry = entry; + last_step = TreeCursorStepHidden; + } + } + if (last_entry.subtree) + { + array_push(&self->stack, last_entry); + return last_step; + } + + return TreeCursorStepNone; +} + +bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) +{ + for (;;) + { + switch (ts_tree_cursor_goto_last_child_internal(self)) + { + case TreeCursorStepHidden: + continue; + case TreeCursorStepVisible: + return true; + default: + return false; + } + } + return false; +} + +static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( + TSTreeCursor *_self, uint32_t goal_byte, TSPoint goal_point) +{ + TreeCursor *self = (TreeCursor *)_self; + uint32_t initial_size = self->stack.size; + uint32_t visible_child_index = 0; + + bool did_descend; + do + { + did_descend = false; + + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) + { + Length entry_end = + length_add(entry.position, ts_subtree_size(*entry.subtree)); + bool at_goal = entry_end.bytes >= goal_byte && + point_gte(entry_end.extent, goal_point); + uint32_t visible_child_count = + ts_subtree_visible_child_count(*entry.subtree); + if (at_goal) + { + if (visible) + { + array_push(&self->stack, entry); + return visible_child_index; + } + if (visible_child_count > 0) + { + array_push(&self->stack, entry); + did_descend = true; + break; + } + } + else if (visible) + { + visible_child_index++; + } + else + { + visible_child_index += visible_child_count; + } + } + } while (did_descend); + + self->stack.size = initial_size; + return -1; +} + +int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, + uint32_t goal_byte) +{ + return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, + POINT_ZERO); +} + +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, + TSPoint goal_point) +{ + return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, + goal_point); +} + +TreeCursorStep ts_tree_cursor_goto_sibling_internal( + TSTreeCursor *_self, + bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) +{ + TreeCursor *self = (TreeCursor *)_self; + uint32_t initial_size = self->stack.size; + + while (self->stack.size > 1) + { + TreeCursorEntry entry = array_pop(&self->stack); + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + iterator.child_index = entry.child_index; + iterator.structural_child_index = entry.structural_child_index; + iterator.position = entry.position; + iterator.descendant_index = entry.descendant_index; + + bool visible = false; + advance(&iterator, &entry, &visible); + if (visible && self->stack.size + 1 < initial_size) + break; + + while (advance(&iterator, &entry, &visible)) + { + if (visible) + { + array_push(&self->stack, entry); + return TreeCursorStepVisible; + } + + if (ts_subtree_visible_child_count(*entry.subtree)) + { + array_push(&self->stack, entry); + return TreeCursorStepHidden; + } + } + } + + self->stack.size = initial_size; + return TreeCursorStepNone; +} + +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) +{ + return ts_tree_cursor_goto_sibling_internal( + _self, ts_tree_cursor_child_iterator_next); +} + +bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) +{ + switch (ts_tree_cursor_goto_next_sibling_internal(self)) + { + case TreeCursorStepHidden: + ts_tree_cursor_goto_first_child(self); + return true; + case TreeCursorStepVisible: + return true; + default: + return false; + } +} + +TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal( + TSTreeCursor *_self) +{ + // since subtracting across row loses column information, we may have to + // restore it + TreeCursor *self = (TreeCursor *)_self; + + // for that, save current position before traversing + TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( + _self, ts_tree_cursor_child_iterator_previous); + if (step == TreeCursorStepNone) + return step; + + // if length is already valid, there's no need to recompute it + if (!length_is_undefined(array_back(&self->stack)->position)) + return step; + + // restore position from the parent node + const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; + Length position = parent->position; + uint32_t child_index = array_back(&self->stack)->child_index; + const Subtree *children = ts_subtree_children((*(parent->subtree))); + + if (child_index > 0) + { + // skip first child padding since its position should match the position + // of the parent + position = length_add(position, ts_subtree_size(children[0])); + for (uint32_t i = 1; i < child_index; ++i) + { + position = length_add(position, ts_subtree_total_size(children[i])); + } + position = + length_add(position, ts_subtree_padding(children[child_index])); + } + + array_back(&self->stack)->position = position; + + return step; +} + +bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) +{ + switch (ts_tree_cursor_goto_previous_sibling_internal(self)) + { + case TreeCursorStepHidden: + ts_tree_cursor_goto_last_child(self); + return true; + case TreeCursorStepVisible: + return true; + default: + return false; + } +} + +bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) +{ + TreeCursor *self = (TreeCursor *)_self; + for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) + { + if (ts_tree_cursor_is_entry_visible(self, i)) + { + self->stack.size = i + 1; + return true; + } + } + return false; +} + +void ts_tree_cursor_goto_descendant(TSTreeCursor *_self, + uint32_t goal_descendant_index) +{ + TreeCursor *self = (TreeCursor *)_self; + + // Ascend to the lowest ancestor that contains the goal node. + for (;;) + { + uint32_t i = self->stack.size - 1; + TreeCursorEntry *entry = &self->stack.contents[i]; + uint32_t next_descendant_index = + entry->descendant_index + + (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + + ts_subtree_visible_descendant_count(*entry->subtree); + if ((entry->descendant_index <= goal_descendant_index) && + (next_descendant_index > goal_descendant_index)) + { + break; + } + else if (self->stack.size <= 1) + { + return; + } + else + { + self->stack.size--; + } + } + + // Descend to the goal node. + bool did_descend = true; + do + { + did_descend = false; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + if (iterator.descendant_index > goal_descendant_index) + { + return; + } + + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) + { + if (iterator.descendant_index > goal_descendant_index) + { + array_push(&self->stack, entry); + if (visible && entry.descendant_index == goal_descendant_index) + { + return; + } + else + { + did_descend = true; + break; + } + } + } + } while (did_descend); +} + +uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) +{ + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *last_entry = array_back(&self->stack); + return last_entry->descendant_index; +} + +TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) +{ + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *last_entry = array_back(&self->stack); + TSSymbol alias_symbol = self->root_alias_symbol; + if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) + { + TreeCursorEntry *parent_entry = + &self->stack.contents[self->stack.size - 2]; + alias_symbol = ts_language_alias_at( + self->tree->language, parent_entry->subtree->ptr->production_id, + last_entry->structural_child_index); + } + return ts_node_new(self->tree, last_entry->subtree, last_entry->position, + alias_symbol); +} + +// Private - Get various facts about the current node that are needed +// when executing tree queries. +void ts_tree_cursor_current_status( + const TSTreeCursor *_self, TSFieldId *field_id, bool *has_later_siblings, + bool *has_later_named_siblings, + bool *can_have_later_siblings_with_this_field, TSSymbol *supertypes, + unsigned *supertype_count) +{ + const TreeCursor *self = (const TreeCursor *)_self; + unsigned max_supertypes = *supertype_count; + *field_id = 0; + *supertype_count = 0; + *has_later_siblings = false; + *has_later_named_siblings = false; + *can_have_later_siblings_with_this_field = false; + + // Walk up the tree, visiting the current node and its invisible ancestors, + // because fields can refer to nodes through invisible *wrapper* nodes, + for (unsigned i = self->stack.size - 1; i > 0; i--) + { + TreeCursorEntry *entry = &self->stack.contents[i]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, parent_entry->subtree->ptr->production_id); + +#define subtree_symbol(subtree, structural_child_index) \ + ((!ts_subtree_extra(subtree) && alias_sequence && \ + alias_sequence[structural_child_index]) \ + ? alias_sequence[structural_child_index] \ + : ts_subtree_symbol(subtree)) + + // Stop walking up when a visible ancestor is found. + TSSymbol entry_symbol = + subtree_symbol(*entry->subtree, entry->structural_child_index); + TSSymbolMetadata entry_metadata = + ts_language_symbol_metadata(self->tree->language, entry_symbol); + if (i != self->stack.size - 1 && entry_metadata.visible) + break; + + // Record any supertypes + if (entry_metadata.supertype && *supertype_count < max_supertypes) + { + supertypes[*supertype_count] = entry_symbol; + (*supertype_count)++; + } + + // Determine if the current node has later siblings. + if (!*has_later_siblings) + { + unsigned sibling_count = parent_entry->subtree->ptr->child_count; + unsigned structural_child_index = entry->structural_child_index; + if (!ts_subtree_extra(*entry->subtree)) + structural_child_index++; + for (unsigned j = entry->child_index + 1; j < sibling_count; j++) + { + Subtree sibling = + ts_subtree_children(*parent_entry->subtree)[j]; + TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( + self->tree->language, + subtree_symbol(sibling, structural_child_index)); + if (sibling_metadata.visible) + { + *has_later_siblings = true; + if (*has_later_named_siblings) + break; + if (sibling_metadata.named) + { + *has_later_named_siblings = true; + break; + } + } + else if (ts_subtree_visible_child_count(sibling) > 0) + { + *has_later_siblings = true; + if (*has_later_named_siblings) + break; + if (sibling.ptr->named_child_count > 0) + { + *has_later_named_siblings = true; + break; + } + } + if (!ts_subtree_extra(sibling)) + structural_child_index++; + } + } + +#undef subtree_symbol + + if (!ts_subtree_extra(*entry->subtree)) + { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->tree->language, + parent_entry->subtree->ptr->production_id, + &field_map, &field_map_end); + + // Look for a field name associated with the current node. + if (!*field_id) + { + for (const TSFieldMapEntry *map = field_map; + map < field_map_end; map++) + { + if (!map->inherited && + map->child_index == entry->structural_child_index) + { + *field_id = map->field_id; + break; + } + } + } + + // Determine if the current node can have later siblings with the + // same field name. + if (*field_id) + { + for (const TSFieldMapEntry *map = field_map; + map < field_map_end; map++) + { + if (map->field_id == *field_id && + map->child_index > entry->structural_child_index) + { + *can_have_later_siblings_with_this_field = true; + break; + } + } + } + } + } +} + +uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) +{ + const TreeCursor *self = (const TreeCursor *)_self; + uint32_t depth = 0; + for (unsigned i = 1; i < self->stack.size; i++) + { + if (ts_tree_cursor_is_entry_visible(self, i)) + { + depth++; + } + } + return depth; +} + +TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) +{ + const TreeCursor *self = (const TreeCursor *)_self; + for (int i = (int)self->stack.size - 2; i >= 0; i--) + { + TreeCursorEntry *entry = &self->stack.contents[i]; + bool is_visible = true; + TSSymbol alias_symbol = 0; + if (i > 0) + { + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + alias_symbol = ts_language_alias_at( + self->tree->language, parent_entry->subtree->ptr->production_id, + entry->structural_child_index); + is_visible = + (alias_symbol != 0) || ts_subtree_visible(*entry->subtree); + } + if (is_visible) + { + return ts_node_new(self->tree, entry->subtree, entry->position, + alias_symbol); + } + } + return ts_node_new(NULL, NULL, length_zero(), 0); +} + +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) +{ + const TreeCursor *self = (const TreeCursor *)_self; + + // Walk up the tree, visiting the current node and its invisible ancestors. + for (unsigned i = self->stack.size - 1; i > 0; i--) + { + TreeCursorEntry *entry = &self->stack.contents[i]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + + // Stop walking up when another visible node is found. + if (i != self->stack.size - 1 && + ts_tree_cursor_is_entry_visible(self, i)) + break; + + if (ts_subtree_extra(*entry->subtree)) + break; + + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->tree->language, + parent_entry->subtree->ptr->production_id, + &field_map, &field_map_end); + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) + { + if (!map->inherited && + map->child_index == entry->structural_child_index) + { + return map->field_id; + } + } + } + return 0; +} + +const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) +{ + TSFieldId id = ts_tree_cursor_current_field_id(_self); + if (id) + { + const TreeCursor *self = (const TreeCursor *)_self; + return self->tree->language->field_names[id]; + } + else + { + return NULL; + } +} + +TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) +{ + const TreeCursor *cursor = (const TreeCursor *)_cursor; + TSTreeCursor res = {NULL, NULL, {0, 0}}; + TreeCursor *copy = (TreeCursor *)&res; + copy->tree = cursor->tree; + copy->root_alias_symbol = cursor->root_alias_symbol; + array_init(©->stack); + array_push_all(©->stack, &cursor->stack); + return res; +} + +void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) +{ + const TreeCursor *cursor = (const TreeCursor *)_src; + TreeCursor *copy = (TreeCursor *)_dst; + copy->tree = cursor->tree; + copy->root_alias_symbol = cursor->root_alias_symbol; + array_clear(©->stack); + array_push_all(©->stack, &cursor->stack); +} diff --git a/parser/create_language.c b/parser/src/create_language.c similarity index 96% rename from parser/create_language.c rename to parser/src/create_language.c index 75aa116d..e82e32c2 100644 --- a/parser/create_language.c +++ b/parser/src/create_language.c @@ -6,13 +6,13 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/25 16:13:52 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 16:37:30 by maiboyer ### ########.fr */ +/* Updated: 2024/05/01 15:52:38 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ -#include "./static/headers/constants.h" -#include "./static/headers/symbols.h" -#include "./parse_types.h" +#include "../static/headers/constants.h" +#include "../static/headers/symbols.h" +#include "../parse_types.h" const uint16_t *create_parse_table(void); const uint16_t *create_small_parse_table(void); diff --git a/parser/src/error_costs.h b/parser/src/error_costs.h deleted file mode 100644 index 32d3666a..00000000 --- a/parser/src/error_costs.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef TREE_SITTER_ERROR_COSTS_H_ -#define TREE_SITTER_ERROR_COSTS_H_ - -#define ERROR_STATE 0 -#define ERROR_COST_PER_RECOVERY 500 -#define ERROR_COST_PER_MISSING_TREE 110 -#define ERROR_COST_PER_SKIPPED_TREE 100 -#define ERROR_COST_PER_SKIPPED_LINE 30 -#define ERROR_COST_PER_SKIPPED_CHAR 1 - -#endif diff --git a/parser/src/get_changed_ranges.c b/parser/src/get_changed_ranges.c deleted file mode 100644 index 902e1a4f..00000000 --- a/parser/src/get_changed_ranges.c +++ /dev/null @@ -1,501 +0,0 @@ -#include "./get_changed_ranges.h" -#include "./subtree.h" -#include "./language.h" -#include "./error_costs.h" -#include "./tree_cursor.h" -#include - -// #define DEBUG_GET_CHANGED_RANGES - -static void ts_range_array_add( - TSRangeArray *self, - Length start, - Length end -) { - if (self->size > 0) { - t_range *last_range = array_back(self); - if (start.bytes <= last_range->end_byte) { - last_range->end_byte = end.bytes; - last_range->end_point = end.extent; - return; - } - } - - if (start.bytes < end.bytes) { - t_range range = { start.extent, end.extent, start.bytes, end.bytes }; - array_push(self, range); - } -} - -bool ts_range_array_intersects( - const TSRangeArray *self, - unsigned start_index, - uint32_t start_byte, - uint32_t end_byte -) { - for (unsigned i = start_index; i < self->size; i++) { - t_range *range = &self->contents[i]; - if (range->end_byte > start_byte) { - if (range->start_byte >= end_byte) break; - return true; - } - } - return false; -} - -void ts_range_array_get_changed_ranges( - const t_range *old_ranges, unsigned old_range_count, - const t_range *new_ranges, unsigned new_range_count, - TSRangeArray *differences -) { - unsigned new_index = 0; - unsigned old_index = 0; - Length current_position = length_zero(); - bool in_old_range = false; - bool in_new_range = false; - - while (old_index < old_range_count || new_index < new_range_count) { - const t_range *old_range = &old_ranges[old_index]; - const t_range *new_range = &new_ranges[new_index]; - - Length next_old_position; - if (in_old_range) { - next_old_position = (Length) {old_range->end_byte, old_range->end_point}; - } else if (old_index < old_range_count) { - next_old_position = (Length) {old_range->start_byte, old_range->start_point}; - } else { - next_old_position = LENGTH_MAX; - } - - Length next_new_position; - if (in_new_range) { - next_new_position = (Length) {new_range->end_byte, new_range->end_point}; - } else if (new_index < new_range_count) { - next_new_position = (Length) {new_range->start_byte, new_range->start_point}; - } else { - next_new_position = LENGTH_MAX; - } - - if (next_old_position.bytes < next_new_position.bytes) { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_old_position); - } - if (in_old_range) old_index++; - current_position = next_old_position; - in_old_range = !in_old_range; - } else if (next_new_position.bytes < next_old_position.bytes) { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_new_position); - } - if (in_new_range) new_index++; - current_position = next_new_position; - in_new_range = !in_new_range; - } else { - if (in_old_range != in_new_range) { - ts_range_array_add(differences, current_position, next_new_position); - } - if (in_old_range) old_index++; - if (in_new_range) new_index++; - in_old_range = !in_old_range; - in_new_range = !in_new_range; - current_position = next_new_position; - } - } -} - -typedef struct { - TreeCursor cursor; - const t_language *language; - unsigned visible_depth; - bool in_padding; -} Iterator; - -static Iterator iterator_new( - TreeCursor *cursor, - const Subtree *tree, - const t_language *language -) { - array_clear(&cursor->stack); - array_push(&cursor->stack, ((TreeCursorEntry) { - .subtree = tree, - .position = length_zero(), - .child_index = 0, - .structural_child_index = 0, - })); - return (Iterator) { - .cursor = *cursor, - .language = language, - .visible_depth = 1, - .in_padding = false, - }; -} - -static bool iterator_done(Iterator *self) { - return self->cursor.stack.size == 0; -} - -static Length iterator_start_position(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - if (self->in_padding) { - return entry.position; - } else { - return length_add(entry.position, ts_subtree_padding(*entry.subtree)); - } -} - -static Length iterator_end_position(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); - if (self->in_padding) { - return result; - } else { - return length_add(result, ts_subtree_size(*entry.subtree)); - } -} - -static bool iterator_tree_is_visible(const Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - if (ts_subtree_visible(*entry.subtree)) return true; - if (self->cursor.stack.size > 1) { - Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; - return ts_language_alias_at( - self->language, - parent.ptr->production_id, - entry.structural_child_index - ) != 0; - } - return false; -} - -static void iterator_get_visible_state( - const Iterator *self, - Subtree *tree, - t_symbol *alias_symbol, - uint32_t *start_byte -) { - uint32_t i = self->cursor.stack.size - 1; - - if (self->in_padding) { - if (i == 0) return; - i--; - } - - for (; i + 1 > 0; i--) { - TreeCursorEntry entry = self->cursor.stack.contents[i]; - - if (i > 0) { - const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; - *alias_symbol = ts_language_alias_at( - self->language, - parent->ptr->production_id, - entry.structural_child_index - ); - } - - if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { - *tree = *entry.subtree; - *start_byte = entry.position.bytes; - break; - } - } -} - -static void iterator_ascend(Iterator *self) { - if (iterator_done(self)) return; - if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; - if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; - self->cursor.stack.size--; -} - -static bool iterator_descend(Iterator *self, uint32_t goal_position) { - if (self->in_padding) return false; - - bool did_descend = false; - do { - did_descend = false; - TreeCursorEntry entry = *array_back(&self->cursor.stack); - Length position = entry.position; - uint32_t structural_child_index = 0; - for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { - const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; - Length child_left = length_add(position, ts_subtree_padding(*child)); - Length child_right = length_add(child_left, ts_subtree_size(*child)); - - if (child_right.bytes > goal_position) { - array_push(&self->cursor.stack, ((TreeCursorEntry) { - .subtree = child, - .position = position, - .child_index = i, - .structural_child_index = structural_child_index, - })); - - if (iterator_tree_is_visible(self)) { - if (child_left.bytes > goal_position) { - self->in_padding = true; - } else { - self->visible_depth++; - } - return true; - } - - did_descend = true; - break; - } - - position = child_right; - if (!ts_subtree_extra(*child)) structural_child_index++; - } - } while (did_descend); - - return false; -} - -static void iterator_advance(Iterator *self) { - if (self->in_padding) { - self->in_padding = false; - if (iterator_tree_is_visible(self)) { - self->visible_depth++; - } else { - iterator_descend(self, 0); - } - return; - } - - for (;;) { - if (iterator_tree_is_visible(self)) self->visible_depth--; - TreeCursorEntry entry = array_pop(&self->cursor.stack); - if (iterator_done(self)) return; - - const Subtree *parent = array_back(&self->cursor.stack)->subtree; - uint32_t child_index = entry.child_index + 1; - if (ts_subtree_child_count(*parent) > child_index) { - Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); - uint32_t structural_child_index = entry.structural_child_index; - if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; - const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; - - array_push(&self->cursor.stack, ((TreeCursorEntry) { - .subtree = next_child, - .position = position, - .child_index = child_index, - .structural_child_index = structural_child_index, - })); - - if (iterator_tree_is_visible(self)) { - if (ts_subtree_padding(*next_child).bytes > 0) { - self->in_padding = true; - } else { - self->visible_depth++; - } - } else { - iterator_descend(self, 0); - } - break; - } - } -} - -typedef enum { - IteratorDiffers, - IteratorMayDiffer, - IteratorMatches, -} IteratorComparison; - -static IteratorComparison iterator_compare( - const Iterator *old_iter, - const Iterator *new_iter -) { - Subtree old_tree = NULL_SUBTREE; - Subtree new_tree = NULL_SUBTREE; - uint32_t old_start = 0; - uint32_t new_start = 0; - t_symbol old_alias_symbol = 0; - t_symbol new_alias_symbol = 0; - iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); - iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); - - if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; - if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; - - if ( - old_alias_symbol == new_alias_symbol && - ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree) - ) { - if (old_start == new_start && - !ts_subtree_has_changes(old_tree) && - ts_subtree_symbol(old_tree) != ts_builtin_sym_error && - ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && - ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && - ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && - (ts_subtree_parse_state(old_tree) == ERROR_STATE) == - (ts_subtree_parse_state(new_tree) == ERROR_STATE)) { - return IteratorMatches; - } else { - return IteratorMayDiffer; - } - } - - return IteratorDiffers; -} - -#ifdef DEBUG_GET_CHANGED_RANGES -static inline void iterator_print_state(Iterator *self) { - TreeCursorEntry entry = *array_back(&self->cursor.stack); - TSPoint start = iterator_start_position(self).extent; - TSPoint end = iterator_end_position(self).extent; - const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); - printf( - "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", - name, self->in_padding ? "(p)" : " ", - self->visible_depth, - start.row + 1, start.column, - end.row + 1, end.column - ); -} -#endif - -unsigned ts_subtree_get_changed_ranges( - const Subtree *old_tree, const Subtree *new_tree, - TreeCursor *cursor1, TreeCursor *cursor2, - const t_language *language, - const TSRangeArray *included_range_differences, - t_range **ranges -) { - TSRangeArray results = array_new(); - - Iterator old_iter = iterator_new(cursor1, old_tree, language); - Iterator new_iter = iterator_new(cursor2, new_tree, language); - - unsigned included_range_difference_index = 0; - - Length position = iterator_start_position(&old_iter); - Length next_position = iterator_start_position(&new_iter); - if (position.bytes < next_position.bytes) { - ts_range_array_add(&results, position, next_position); - position = next_position; - } else if (position.bytes > next_position.bytes) { - ts_range_array_add(&results, next_position, position); - next_position = position; - } - - do { - #ifdef DEBUG_GET_CHANGED_RANGES - printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); - iterator_print_state(&old_iter); - printf("\tvs\t"); - iterator_print_state(&new_iter); - puts(""); - #endif - - // Compare the old and new subtrees. - IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); - - // Even if the two subtrees appear to be identical, they could differ - // internally if they contain a range of text that was previously - // excluded from the parse, and is now included, or vice-versa. - if (comparison == IteratorMatches && ts_range_array_intersects( - included_range_differences, - included_range_difference_index, - position.bytes, - iterator_end_position(&old_iter).bytes - )) { - comparison = IteratorMayDiffer; - } - - bool is_changed = false; - switch (comparison) { - // If the subtrees are definitely identical, move to the end - // of both subtrees. - case IteratorMatches: - next_position = iterator_end_position(&old_iter); - break; - - // If the subtrees might differ internally, descend into both - // subtrees, finding the first child that spans the current position. - case IteratorMayDiffer: - if (iterator_descend(&old_iter, position.bytes)) { - if (!iterator_descend(&new_iter, position.bytes)) { - is_changed = true; - next_position = iterator_end_position(&old_iter); - } - } else if (iterator_descend(&new_iter, position.bytes)) { - is_changed = true; - next_position = iterator_end_position(&new_iter); - } else { - next_position = length_min( - iterator_end_position(&old_iter), - iterator_end_position(&new_iter) - ); - } - break; - - // If the subtrees are different, record a change and then move - // to the end of both subtrees. - case IteratorDiffers: - is_changed = true; - next_position = length_min( - iterator_end_position(&old_iter), - iterator_end_position(&new_iter) - ); - break; - } - - // Ensure that both iterators are caught up to the current position. - while ( - !iterator_done(&old_iter) && - iterator_end_position(&old_iter).bytes <= next_position.bytes - ) iterator_advance(&old_iter); - while ( - !iterator_done(&new_iter) && - iterator_end_position(&new_iter).bytes <= next_position.bytes - ) iterator_advance(&new_iter); - - // Ensure that both iterators are at the same depth in the tree. - while (old_iter.visible_depth > new_iter.visible_depth) { - iterator_ascend(&old_iter); - } - while (new_iter.visible_depth > old_iter.visible_depth) { - iterator_ascend(&new_iter); - } - - if (is_changed) { - #ifdef DEBUG_GET_CHANGED_RANGES - printf( - " change: [[%u, %u] - [%u, %u]]\n", - position.extent.row + 1, position.extent.column, - next_position.extent.row + 1, next_position.extent.column - ); - #endif - - ts_range_array_add(&results, position, next_position); - } - - position = next_position; - - // Keep track of the current position in the included range differences - // array in order to avoid scanning the entire array on each iteration. - while (included_range_difference_index < included_range_differences->size) { - const t_range *range = &included_range_differences->contents[ - included_range_difference_index - ]; - if (range->end_byte <= position.bytes) { - included_range_difference_index++; - } else { - break; - } - } - } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); - - Length old_size = ts_subtree_total_size(*old_tree); - Length new_size = ts_subtree_total_size(*new_tree); - if (old_size.bytes < new_size.bytes) { - ts_range_array_add(&results, old_size, new_size); - } else if (new_size.bytes < old_size.bytes) { - ts_range_array_add(&results, new_size, old_size); - } - - *cursor1 = old_iter.cursor; - *cursor2 = new_iter.cursor; - *ranges = results.contents; - return results.size; -} diff --git a/parser/src/get_changed_ranges.h b/parser/src/get_changed_ranges.h deleted file mode 100644 index 982a7047..00000000 --- a/parser/src/get_changed_ranges.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_ -#define TREE_SITTER_GET_CHANGED_RANGES_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./tree_cursor.h" -#include "./subtree.h" - -typedef Array(t_range) TSRangeArray; - -void ts_range_array_get_changed_ranges( - const t_range *old_ranges, unsigned old_range_count, - const t_range *new_ranges, unsigned new_range_count, - TSRangeArray *differences -); - -bool ts_range_array_intersects( - const TSRangeArray *self, unsigned start_index, - uint32_t start_byte, uint32_t end_byte -); - -unsigned ts_subtree_get_changed_ranges( - const Subtree *old_tree, const Subtree *new_tree, - TreeCursor *cursor1, TreeCursor *cursor2, - const t_language *language, - const TSRangeArray *included_range_differences, - t_range **ranges -); - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_GET_CHANGED_RANGES_H_ diff --git a/parser/src/host.h b/parser/src/host.h deleted file mode 100644 index a07e9f89..00000000 --- a/parser/src/host.h +++ /dev/null @@ -1,21 +0,0 @@ - -// Determine endian and pointer size based on known defines. -// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments -// to override this. - -#if !defined(TS_BIG_ENDIAN) -#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ - || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__))) -#define TS_BIG_ENDIAN 1 -#else -#define TS_BIG_ENDIAN 0 -#endif -#endif - -#if !defined(TS_PTR_SIZE) -#if UINTPTR_MAX == 0xFFFFFFFF -#define TS_PTR_SIZE 32 -#else -#define TS_PTR_SIZE 64 -#endif -#endif diff --git a/parser/src/language.c b/parser/src/language.c deleted file mode 100644 index 5cf86906..00000000 --- a/parser/src/language.c +++ /dev/null @@ -1,216 +0,0 @@ -#include "./language.h" - -#include "./api.h" -#include - -const t_language *ts_language_copy(const t_language *self) { - return self; -} - -void ts_language_delete(const t_language *self) { - (void)(self); -} - -uint32_t ts_language_symbol_count(const t_language *self) { - return self->symbol_count + self->alias_count; -} - -uint32_t ts_language_state_count(const t_language *self) { - return self->state_count; -} - -uint32_t ts_language_version(const t_language *self) { - return self->version; -} - -uint32_t ts_language_field_count(const t_language *self) { - return self->field_count; -} - -void ts_language_table_entry( - const t_language *self, - t_state_id state, - t_symbol symbol, - TableEntry *result -) { - if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - result->action_count = 0; - result->is_reusable = false; - result->actions = NULL; - } else { - assert(symbol < self->token_count); - uint32_t action_index = ts_language_lookup(self, state, symbol); - const TSParseActionEntry *entry = &self->parse_actions[action_index]; - result->action_count = entry->entry.count; - result->is_reusable = entry->entry.reusable; - result->actions = (const TSParseAction *)(entry + 1); - } -} - -TSSymbolMetadata ts_language_symbol_metadata( - const t_language *self, - t_symbol symbol -) { - if (symbol == ts_builtin_sym_error) { - return (TSSymbolMetadata) {.visible = true, .named = true}; - } else if (symbol == ts_builtin_sym_error_repeat) { - return (TSSymbolMetadata) {.visible = false, .named = false}; - } else { - return self->symbol_metadata[symbol]; - } -} - -t_symbol ts_language_public_symbol( - const t_language *self, - t_symbol symbol -) { - if (symbol == ts_builtin_sym_error) return symbol; - return self->public_symbol_map[symbol]; -} - -t_state_id ts_language_next_state( - const t_language *self, - t_state_id state, - t_symbol symbol -) { - if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - return 0; - } else if (symbol < self->token_count) { - uint32_t count; - const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); - if (count > 0) { - TSParseAction action = actions[count - 1]; - if (action.type == TSParseActionTypeShift) { - return action.shift.extra ? state : action.shift.state; - } - } - return 0; - } else { - return ts_language_lookup(self, state, symbol); - } -} - -const char *ts_language_symbol_name( - const t_language *self, - t_symbol symbol -) { - if (symbol == ts_builtin_sym_error) { - return "ERROR"; - } else if (symbol == ts_builtin_sym_error_repeat) { - return "_ERROR"; - } else if (symbol < ts_language_symbol_count(self)) { - return self->symbol_names[symbol]; - } else { - return NULL; - } -} - -t_symbol ts_language_symbol_for_name( - const t_language *self, - const char *string, - uint32_t length, - bool is_named -) { - if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; - uint16_t count = (uint16_t)ts_language_symbol_count(self); - for (t_symbol i = 0; i < count; i++) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); - if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; - const char *symbol_name = self->symbol_names[i]; - if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { - return self->public_symbol_map[i]; - } - } - return 0; -} - -t_symbol_type ts_language_symbol_type( - const t_language *self, - t_symbol symbol -) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); - if (metadata.named && metadata.visible) { - return TSSymbolTypeRegular; - } else if (metadata.visible) { - return TSSymbolTypeAnonymous; - } else { - return TSSymbolTypeAuxiliary; - } -} - -const char *ts_language_field_name_for_id( - const t_language *self, - t_field_id id -) { - uint32_t count = ts_language_field_count(self); - if (count && id <= count) { - return self->field_names[id]; - } else { - return NULL; - } -} - -t_field_id ts_language_field_id_for_name( - const t_language *self, - const char *name, - uint32_t name_length -) { - uint16_t count = (uint16_t)ts_language_field_count(self); - for (t_symbol i = 1; i < count + 1; i++) { - switch (strncmp(name, self->field_names[i], name_length)) { - case 0: - if (self->field_names[i][name_length] == 0) return i; - break; - case -1: - return 0; - default: - break; - } - } - return 0; -} - -t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { - if (state >= self->state_count) return NULL; - LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); - *iterator = ts_language_lookaheads(self, state); - return (t_lookahead_iterator *)iterator; -} - -void ts_lookahead_iterator_delete(t_lookahead_iterator *self) { - ts_free(self); -} - -bool ts_lookahead_iterator_reset_state(t_lookahead_iterator * self, t_state_id state) { - LookaheadIterator *iterator = (LookaheadIterator *)self; - if (state >= iterator->language->state_count) return false; - *iterator = ts_language_lookaheads(iterator->language, state); - return true; -} - -const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; - return iterator->language; -} - -bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state) { - if (state >= language->state_count) return false; - LookaheadIterator *iterator = (LookaheadIterator *)self; - *iterator = ts_language_lookaheads(language, state); - return true; -} - -bool ts_lookahead_iterator_next(t_lookahead_iterator *self) { - LookaheadIterator *iterator = (LookaheadIterator *)self; - return ts_lookahead_iterator__next(iterator); -} - -t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; - return iterator->symbol; -} - -const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self) { - const LookaheadIterator *iterator = (const LookaheadIterator *)self; - return ts_language_symbol_name(iterator->language, iterator->symbol); -} diff --git a/parser/src/language.h b/parser/src/language.h deleted file mode 100644 index 94167b45..00000000 --- a/parser/src/language.h +++ /dev/null @@ -1,299 +0,0 @@ -#ifndef TREE_SITTER_LANGUAGE_H_ -#define TREE_SITTER_LANGUAGE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./subtree.h" -#include "./parser.h" - -#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) - -#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 -#define LANGUAGE_VERSION_USABLE_VIA_WASM 13 - -typedef struct { - const TSParseAction *actions; - uint32_t action_count; - bool is_reusable; -} TableEntry; - -typedef struct { - const t_language *language; - const uint16_t *data; - const uint16_t *group_end; - t_state_id state; - uint16_t table_value; - uint16_t section_index; - uint16_t group_count; - bool is_small_state; - - const TSParseAction *actions; - t_symbol symbol; - t_state_id next_state; - uint16_t action_count; -} LookaheadIterator; - -void ts_language_table_entry(const t_language *, t_state_id, t_symbol, TableEntry *); - -TSSymbolMetadata ts_language_symbol_metadata(const t_language *, t_symbol); - -t_symbol ts_language_public_symbol(const t_language *, t_symbol); - -t_state_id ts_language_next_state(const t_language *self, t_state_id state, t_symbol symbol); - -static inline bool ts_language_is_symbol_external(const t_language *self, t_symbol symbol) { - return 0 < symbol && symbol < self->external_token_count + 1; -} - -static inline const TSParseAction *ts_language_actions( - const t_language *self, - t_state_id state, - t_symbol symbol, - uint32_t *count -) { - TableEntry entry; - ts_language_table_entry(self, state, symbol, &entry); - *count = entry.action_count; - return entry.actions; -} - -static inline bool ts_language_has_reduce_action( - const t_language *self, - t_state_id state, - t_symbol symbol -) { - TableEntry entry; - ts_language_table_entry(self, state, symbol, &entry); - return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce; -} - -// Lookup the table value for a given symbol and state. -// -// For non-terminal symbols, the table value represents a successor state. -// For terminal symbols, it represents an index in the actions table. -// For 'large' parse states, this is a direct lookup. For 'small' parse -// states, this requires searching through the symbol groups to find -// the given symbol. -static inline uint16_t ts_language_lookup( - const t_language *self, - t_state_id state, - t_symbol symbol -) { - if (state >= self->large_state_count) { - uint32_t index = self->small_parse_table_map[state - self->large_state_count]; - const uint16_t *data = &self->small_parse_table[index]; - uint16_t group_count = *(data++); - for (unsigned i = 0; i < group_count; i++) { - uint16_t section_value = *(data++); - uint16_t symbol_count = *(data++); - for (unsigned j = 0; j < symbol_count; j++) { - if (*(data++) == symbol) return section_value; - } - } - return 0; - } else { - return self->parse_table[state * self->symbol_count + symbol]; - } -} - -static inline bool ts_language_has_actions( - const t_language *self, - t_state_id state, - t_symbol symbol -) { - return ts_language_lookup(self, state, symbol) != 0; -} - -// Iterate over all of the symbols that are valid in the given state. -// -// For 'large' parse states, this just requires iterating through -// all possible symbols and checking the parse table for each one. -// For 'small' parse states, this exploits the structure of the -// table to only visit the valid symbols. -static inline LookaheadIterator ts_language_lookaheads( - const t_language *self, - t_state_id state -) { - bool is_small_state = state >= self->large_state_count; - const uint16_t *data; - const uint16_t *group_end = NULL; - uint16_t group_count = 0; - if (is_small_state) { - uint32_t index = self->small_parse_table_map[state - self->large_state_count]; - data = &self->small_parse_table[index]; - group_end = data + 1; - group_count = *data; - } else { - data = &self->parse_table[state * self->symbol_count] - 1; - } - return (LookaheadIterator) { - .language = self, - .data = data, - .group_end = group_end, - .group_count = group_count, - .is_small_state = is_small_state, - .symbol = UINT16_MAX, - .next_state = 0, - }; -} - -static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) { - // For small parse states, valid symbols are listed explicitly, - // grouped by their value. There's no need to look up the actions - // again until moving to the next group. - if (self->is_small_state) { - self->data++; - if (self->data == self->group_end) { - if (self->group_count == 0) return false; - self->group_count--; - self->table_value = *(self->data++); - unsigned symbol_count = *(self->data++); - self->group_end = self->data + symbol_count; - self->symbol = *self->data; - } else { - self->symbol = *self->data; - return true; - } - } - - // For large parse states, iterate through every symbol until one - // is found that has valid actions. - else { - do { - self->data++; - self->symbol++; - if (self->symbol >= self->language->symbol_count) return false; - self->table_value = *self->data; - } while (!self->table_value); - } - - // Depending on if the symbols is terminal or non-terminal, the table value either - // represents a list of actions or a successor state. - if (self->symbol < self->language->token_count) { - const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value]; - self->action_count = entry->entry.count; - self->actions = (const TSParseAction *)(entry + 1); - self->next_state = 0; - } else { - self->action_count = 0; - self->next_state = self->table_value; - } - return true; -} - -// Whether the state is a "primary state". If this returns false, it indicates that there exists -// another state that behaves identically to this one with respect to query analysis. -static inline bool ts_language_state_is_primary( - const t_language *self, - t_state_id state -) { - if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { - return state == self->primary_state_ids[state]; - } else { - return true; - } -} - -static inline const bool *ts_language_enabled_external_tokens( - const t_language *self, - unsigned external_scanner_state -) { - if (external_scanner_state == 0) { - return NULL; - } else { - return self->external_scanner.states + self->external_token_count * external_scanner_state; - } -} - -static inline const t_symbol *ts_language_alias_sequence( - const t_language *self, - uint32_t production_id -) { - return production_id ? - &self->alias_sequences[production_id * self->max_alias_sequence_length] : - NULL; -} - -static inline t_symbol ts_language_alias_at( - const t_language *self, - uint32_t production_id, - uint32_t child_index -) { - return production_id ? - self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] : - 0; -} - -static inline void ts_language_field_map( - const t_language *self, - uint32_t production_id, - const TSFieldMapEntry **start, - const TSFieldMapEntry **end -) { - if (self->field_count == 0) { - *start = NULL; - *end = NULL; - return; - } - - TSFieldMapSlice slice = self->field_map_slices[production_id]; - *start = &self->field_map_entries[slice.index]; - *end = &self->field_map_entries[slice.index] + slice.length; -} - -static inline void ts_language_aliases_for_symbol( - const t_language *self, - t_symbol original_symbol, - const t_symbol **start, - const t_symbol **end -) { - *start = &self->public_symbol_map[original_symbol]; - *end = *start + 1; - - unsigned idx = 0; - for (;;) { - t_symbol symbol = self->alias_map[idx++]; - if (symbol == 0 || symbol > original_symbol) break; - uint16_t count = self->alias_map[idx++]; - if (symbol == original_symbol) { - *start = &self->alias_map[idx]; - *end = &self->alias_map[idx + count]; - break; - } - idx += count; - } -} - -static inline void ts_language_write_symbol_as_dot_string( - const t_language *self, - FILE *f, - t_symbol symbol -) { - const char *name = ts_language_symbol_name(self, symbol); - for (const char *chr = name; *chr; chr++) { - switch (*chr) { - case '"': - case '\\': - fputc('\\', f); - fputc(*chr, f); - break; - case '\n': - fputs("\\n", f); - break; - case '\t': - fputs("\\t", f); - break; - default: - fputc(*chr, f); - break; - } - } -} - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_LANGUAGE_H_ diff --git a/parser/src/length.h b/parser/src/length.h deleted file mode 100644 index 83a947d1..00000000 --- a/parser/src/length.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef TREE_SITTER_LENGTH_H_ -#define TREE_SITTER_LENGTH_H_ - -#include -#include -#include "./point.h" -#include "./api.h" - -typedef struct { - uint32_t bytes; - t_point extent; -} Length; - -static const Length LENGTH_UNDEFINED = {0, {0, 1}}; -static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; - -static inline bool length_is_undefined(Length length) { - return length.bytes == 0 && length.extent.column != 0; -} - -static inline Length length_min(Length len1, Length len2) { - return (len1.bytes < len2.bytes) ? len1 : len2; -} - -static inline Length length_add(Length len1, Length len2) { - Length result; - result.bytes = len1.bytes + len2.bytes; - result.extent = point_add(len1.extent, len2.extent); - return result; -} - -static inline Length length_sub(Length len1, Length len2) { - Length result; - result.bytes = len1.bytes - len2.bytes; - result.extent = point_sub(len1.extent, len2.extent); - return result; -} - -static inline Length length_zero(void) { - Length result = {0, {0, 0}}; - return result; -} - -static inline Length length_saturating_sub(Length len1, Length len2) { - if (len1.bytes > len2.bytes) { - return length_sub(len1, len2); - } else { - return length_zero(); - } -} - -#endif diff --git a/parser/src/lexer.c b/parser/src/lexer.c deleted file mode 100644 index daf62f3d..00000000 --- a/parser/src/lexer.c +++ /dev/null @@ -1,438 +0,0 @@ -#include -#include "./lexer.h" -#include "./subtree.h" -#include "./length.h" -//#include "./unicode.h" - -#define LOG(message, character) \ - if (self->logger.log) { \ - snprintf( \ - self->debug_buffer, \ - TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ - 32 <= character && character < 127 ? \ - message " character:'%c'" : \ - message " character:%d", \ - character \ - ); \ - self->logger.log( \ - self->logger.payload, \ - TSLogTypeLex, \ - self->debug_buffer \ - ); \ - } - -static const int32_t BYTE_ORDER_MARK = 0xFEFF; - -static const t_range DEFAULT_RANGE = { - .start_point = { - .row = 0, - .column = 0, - }, - .end_point = { - .row = UINT32_MAX, - .column = UINT32_MAX, - }, - .start_byte = 0, - .end_byte = UINT32_MAX -}; - -// Check if the lexer has reached EOF. This state is stored -// by setting the lexer's `current_included_range_index` such that -// it has consumed all of its available ranges. -static bool ts_lexer__eof(const TSLexer *_self) { - Lexer *self = (Lexer *)_self; - return self->current_included_range_index == self->included_range_count; -} - -// Clear the currently stored chunk of source code, because the lexer's -// position has changed. -static void ts_lexer__clear_chunk(Lexer *self) { - self->chunk = NULL; - self->chunk_size = 0; - self->chunk_start = 0; -} - -// Call the lexer's input callback to obtain a new chunk of source code -// for the current position. -static void ts_lexer__get_chunk(Lexer *self) { - self->chunk_start = self->current_position.bytes; - self->chunk = self->input.read( - self->input.payload, - self->current_position.bytes, - self->current_position.extent, - &self->chunk_size - ); - if (!self->chunk_size) { - self->current_included_range_index = self->included_range_count; - self->chunk = NULL; - } -} -typedef uint32_t (*DecodeFunc)( - const uint8_t *string, - uint32_t length, - int32_t *code_point -); - -static uint32_t ts_decode_ascii( - const uint8_t *string, - uint32_t length, - int32_t *code_point -) { - uint32_t i = 1; - (void)(length); - *code_point = *string; - return i; -} - -// Decode the next unicode character in the current chunk of source code. -// This assumes that the lexer has already retrieved a chunk of source -// code that spans the current position. -static void ts_lexer__get_lookahead(Lexer *self) { - uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; - uint32_t size = self->chunk_size - position_in_chunk; - - if (size == 0) { - self->lookahead_size = 1; - self->data.lookahead = '\0'; - return; - } - - #define TS_DECODE_ERROR -1 - - const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; - // UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8 - // ? ts_decode_utf8 - // : ts_decode_utf16; - - - self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead); - - // If this chunk ended in the middle of a multi-byte character, - // try again with a fresh chunk. - if (self->data.lookahead == TS_DECODE_ERROR && size < 4) { - ts_lexer__get_chunk(self); - chunk = (const uint8_t *)self->chunk; - size = self->chunk_size; - self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead); - } - - if (self->data.lookahead == TS_DECODE_ERROR) { - self->lookahead_size = 1; - } -} - -static void ts_lexer_goto(Lexer *self, Length position) { - self->current_position = position; - - // Move to the first valid position at or after the given position. - bool found_included_range = false; - for (unsigned i = 0; i < self->included_range_count; i++) { - t_range *included_range = &self->included_ranges[i]; - if ( - included_range->end_byte > self->current_position.bytes && - included_range->end_byte > included_range->start_byte - ) { - if (included_range->start_byte >= self->current_position.bytes) { - self->current_position = (Length) { - .bytes = included_range->start_byte, - .extent = included_range->start_point, - }; - } - - self->current_included_range_index = i; - found_included_range = true; - break; - } - } - - if (found_included_range) { - // If the current position is outside of the current chunk of text, - // then clear out the current chunk of text. - if (self->chunk && ( - self->current_position.bytes < self->chunk_start || - self->current_position.bytes >= self->chunk_start + self->chunk_size - )) { - ts_lexer__clear_chunk(self); - } - - self->lookahead_size = 0; - self->data.lookahead = '\0'; - } - - // If the given position is beyond any of included ranges, move to the EOF - // state - past the end of the included ranges. - else { - self->current_included_range_index = self->included_range_count; - t_range *last_included_range = &self->included_ranges[self->included_range_count - 1]; - self->current_position = (Length) { - .bytes = last_included_range->end_byte, - .extent = last_included_range->end_point, - }; - ts_lexer__clear_chunk(self); - self->lookahead_size = 1; - self->data.lookahead = '\0'; - } -} - -// Intended to be called only from functions that control logging. -static void ts_lexer__do_advance(Lexer *self, bool skip) { - if (self->lookahead_size) { - self->current_position.bytes += self->lookahead_size; - if (self->data.lookahead == '\n') { - self->current_position.extent.row++; - self->current_position.extent.column = 0; - } else { - self->current_position.extent.column += self->lookahead_size; - } - } - - const t_range *current_range = &self->included_ranges[self->current_included_range_index]; - while ( - self->current_position.bytes >= current_range->end_byte || - current_range->end_byte == current_range->start_byte - ) { - if (self->current_included_range_index < self->included_range_count) { - self->current_included_range_index++; - } - if (self->current_included_range_index < self->included_range_count) { - current_range++; - self->current_position = (Length) { - current_range->start_byte, - current_range->start_point, - }; - } else { - current_range = NULL; - break; - } - } - - if (skip) self->token_start_position = self->current_position; - - if (current_range) { - if ( - self->current_position.bytes < self->chunk_start || - self->current_position.bytes >= self->chunk_start + self->chunk_size - ) { - ts_lexer__get_chunk(self); - } - ts_lexer__get_lookahead(self); - } else { - ts_lexer__clear_chunk(self); - self->data.lookahead = '\0'; - self->lookahead_size = 1; - } -} - -// Advance to the next character in the source code, retrieving a new -// chunk of source code if needed. -static void ts_lexer__advance(TSLexer *_self, bool skip) { - Lexer *self = (Lexer *)_self; - if (!self->chunk) return; - - if (skip) { - LOG("skip", self->data.lookahead) - } else { - LOG("consume", self->data.lookahead) - } - - ts_lexer__do_advance(self, skip); -} - -// Mark that a token match has completed. This can be called multiple -// times if a longer match is found later. -static void ts_lexer__mark_end(TSLexer *_self) { - Lexer *self = (Lexer *)_self; - if (!ts_lexer__eof(&self->data)) { - // If the lexer is right at the beginning of included range, - // then the token should be considered to end at the *end* of the - // previous included range, rather than here. - t_range *current_included_range = &self->included_ranges[ - self->current_included_range_index - ]; - if ( - self->current_included_range_index > 0 && - self->current_position.bytes == current_included_range->start_byte - ) { - t_range *previous_included_range = current_included_range - 1; - self->token_end_position = (Length) { - previous_included_range->end_byte, - previous_included_range->end_point, - }; - return; - } - } - self->token_end_position = self->current_position; -} - -static uint32_t ts_lexer__get_column(TSLexer *_self) { - Lexer *self = (Lexer *)_self; - - uint32_t goal_byte = self->current_position.bytes; - - self->did_get_column = true; - self->current_position.bytes -= self->current_position.extent.column; - self->current_position.extent.column = 0; - - if (self->current_position.bytes < self->chunk_start) { - ts_lexer__get_chunk(self); - } - - uint32_t result = 0; - if (!ts_lexer__eof(_self)) { - ts_lexer__get_lookahead(self); - while (self->current_position.bytes < goal_byte && self->chunk) { - result++; - ts_lexer__do_advance(self, false); - if (ts_lexer__eof(_self)) break; - } - } - - return result; -} - -// Is the lexer at a boundary between two disjoint included ranges of -// source code? This is exposed as an API because some languages' external -// scanners need to perform custom actions at these boundaries. -static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { - const Lexer *self = (const Lexer *)_self; - if (self->current_included_range_index < self->included_range_count) { - t_range *current_range = &self->included_ranges[self->current_included_range_index]; - return self->current_position.bytes == current_range->start_byte; - } else { - return false; - } -} - -void ts_lexer_init(Lexer *self) { - *self = (Lexer) { - .data = { - // The lexer's methods are stored as struct fields so that generated - // parsers can call them without needing to be linked against this - // library. - .advance = ts_lexer__advance, - .mark_end = ts_lexer__mark_end, - .get_column = ts_lexer__get_column, - .is_at_included_range_start = ts_lexer__is_at_included_range_start, - .eof = ts_lexer__eof, - .lookahead = 0, - .result_symbol = 0, - }, - .chunk = NULL, - .chunk_size = 0, - .chunk_start = 0, - .current_position = {0, {0, 0}}, - .logger = { - .payload = NULL, - .log = NULL - }, - .included_ranges = NULL, - .included_range_count = 0, - .current_included_range_index = 0, - }; - ts_lexer_set_included_ranges(self, NULL, 0); -} - -void ts_lexer_delete(Lexer *self) { - ts_free(self->included_ranges); -} - -void ts_lexer_set_input(Lexer *self, t_input input) { - self->input = input; - ts_lexer__clear_chunk(self); - ts_lexer_goto(self, self->current_position); -} - -// Move the lexer to the given position. This doesn't do any work -// if the parser is already at the given position. -void ts_lexer_reset(Lexer *self, Length position) { - if (position.bytes != self->current_position.bytes) { - ts_lexer_goto(self, position); - } -} - -void ts_lexer_start(Lexer *self) { - self->token_start_position = self->current_position; - self->token_end_position = LENGTH_UNDEFINED; - self->data.result_symbol = 0; - self->did_get_column = false; - if (!ts_lexer__eof(&self->data)) { - if (!self->chunk_size) ts_lexer__get_chunk(self); - if (!self->lookahead_size) ts_lexer__get_lookahead(self); - if ( - self->current_position.bytes == 0 && - self->data.lookahead == BYTE_ORDER_MARK - ) ts_lexer__advance(&self->data, true); - } -} - -void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { - if (length_is_undefined(self->token_end_position)) { - ts_lexer__mark_end(&self->data); - } - - // If the token ended at an included range boundary, then its end position - // will have been reset to the end of the preceding range. Reset the start - // position to match. - if (self->token_end_position.bytes < self->token_start_position.bytes) { - self->token_start_position = self->token_end_position; - } - - uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; - - // In order to determine that a byte sequence is invalid UTF8 or UTF16, - // the character decoding algorithm may have looked at the following byte. - // Therefore, the next byte *after* the current (invalid) character - // affects the interpretation of the current character. - if (self->data.lookahead == TS_DECODE_ERROR) { - current_lookahead_end_byte++; - } - - if (current_lookahead_end_byte > *lookahead_end_byte) { - *lookahead_end_byte = current_lookahead_end_byte; - } -} - -void ts_lexer_advance_to_end(Lexer *self) { - while (self->chunk) { - ts_lexer__advance(&self->data, false); - } -} - -void ts_lexer_mark_end(Lexer *self) { - ts_lexer__mark_end(&self->data); -} - -bool ts_lexer_set_included_ranges( - Lexer *self, - const t_range *ranges, - uint32_t count -) { - if (count == 0 || !ranges) { - ranges = &DEFAULT_RANGE; - count = 1; - } else { - uint32_t previous_byte = 0; - for (unsigned i = 0; i < count; i++) { - const t_range *range = &ranges[i]; - if ( - range->start_byte < previous_byte || - range->end_byte < range->start_byte - ) return false; - previous_byte = range->end_byte; - } - } - - size_t size = count * sizeof(t_range); - self->included_ranges = ts_realloc(self->included_ranges, size); - memcpy(self->included_ranges, ranges, size); - self->included_range_count = count; - ts_lexer_goto(self, self->current_position); - return true; -} - -t_range *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { - *count = self->included_range_count; - return self->included_ranges; -} - -#undef LOG diff --git a/parser/src/lexer.h b/parser/src/lexer.h deleted file mode 100644 index 4b63629b..00000000 --- a/parser/src/lexer.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef TREE_SITTER_LEXER_H_ -#define TREE_SITTER_LEXER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./length.h" -#include "./subtree.h" -#include "./api.h" -#include "./parser.h" - -typedef struct { - TSLexer data; - Length current_position; - Length token_start_position; - Length token_end_position; - - t_range *included_ranges; - const char *chunk; - t_input input; - t_logger logger; - - uint32_t included_range_count; - uint32_t current_included_range_index; - uint32_t chunk_start; - uint32_t chunk_size; - uint32_t lookahead_size; - bool did_get_column; - - char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; -} Lexer; - -void ts_lexer_init(Lexer *); -void ts_lexer_delete(Lexer *); -void ts_lexer_set_input(Lexer *, t_input); -void ts_lexer_reset(Lexer *, Length); -void ts_lexer_start(Lexer *); -void ts_lexer_finish(Lexer *, uint32_t *); -void ts_lexer_advance_to_end(Lexer *); -void ts_lexer_mark_end(Lexer *); -bool ts_lexer_set_included_ranges(Lexer *self, const t_range *ranges, uint32_t count); -t_range *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_LEXER_H_ diff --git a/parser/src/lib.c b/parser/src/lib.c deleted file mode 100644 index 4054eb3c..00000000 --- a/parser/src/lib.c +++ /dev/null @@ -1,13 +0,0 @@ -#define _POSIX_C_SOURCE 200112L - -#include "./alloc.c" -#include "./get_changed_ranges.c" -#include "./language.c" -#include "./lexer.c" -#include "./node.c" -#include "./parser.c" -#include "./query.c" -#include "./stack.c" -#include "./subtree.c" -#include "./tree_cursor.c" -#include "./tree.c" diff --git a/parser/src/node.c b/parser/src/node.c deleted file mode 100644 index b1582632..00000000 --- a/parser/src/node.c +++ /dev/null @@ -1,776 +0,0 @@ -#include -#include "./subtree.h" -#include "./tree.h" -#include "./language.h" - -typedef struct { - Subtree parent; - const t_tree *tree; - Length position; - uint32_t child_index; - uint32_t structural_child_index; - const t_symbol *alias_sequence; -} NodeChildIterator; - -// TSNode - constructors - -t_parse_node ts_node_new( - const t_tree *tree, - const Subtree *subtree, - Length position, - t_symbol alias -) { - return (t_parse_node) { - {position.bytes, position.extent.row, position.extent.column, alias}, - subtree, - tree, - }; -} - -static inline t_parse_node ts_node__null(void) { - return ts_node_new(NULL, NULL, length_zero(), 0); -} - -// TSNode - accessors - -uint32_t ts_node_start_byte(t_parse_node self) { - return self.context[0]; -} - -t_point ts_node_start_point(t_parse_node self) { - return (t_point) {self.context[1], self.context[2]}; -} - -static inline uint32_t ts_node__alias(const t_parse_node *self) { - return self->context[3]; -} - -static inline Subtree ts_node__subtree(t_parse_node self) { - return *(const Subtree *)self.id; -} - -// NodeChildIterator - -static inline NodeChildIterator ts_node_iterate_children(const t_parse_node *node) { - Subtree subtree = ts_node__subtree(*node); - if (ts_subtree_child_count(subtree) == 0) { - return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; - } - const t_symbol *alias_sequence = ts_language_alias_sequence( - node->tree->language, - subtree.ptr->production_id - ); - return (NodeChildIterator) { - .tree = node->tree, - .parent = subtree, - .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, - .child_index = 0, - .structural_child_index = 0, - .alias_sequence = alias_sequence, - }; -} - -static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { - return self->child_index == self->parent.ptr->child_count; -} - -static inline bool ts_node_child_iterator_next( - NodeChildIterator *self, - t_parse_node *result -) { - if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; - const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - t_symbol alias_symbol = 0; - if (!ts_subtree_extra(*child)) { - if (self->alias_sequence) { - alias_symbol = self->alias_sequence[self->structural_child_index]; - } - self->structural_child_index++; - } - if (self->child_index > 0) { - self->position = length_add(self->position, ts_subtree_padding(*child)); - } - *result = ts_node_new( - self->tree, - child, - self->position, - alias_symbol - ); - self->position = length_add(self->position, ts_subtree_size(*child)); - self->child_index++; - return true; -} - -// TSNode - private - -static inline bool ts_node__is_relevant(t_parse_node self, bool include_anonymous) { - Subtree tree = ts_node__subtree(self); - if (include_anonymous) { - return ts_subtree_visible(tree) || ts_node__alias(&self); - } else { - t_symbol alias = ts_node__alias(&self); - if (alias) { - return ts_language_symbol_metadata(self.tree->language, alias).named; - } else { - return ts_subtree_visible(tree) && ts_subtree_named(tree); - } - } -} - -static inline uint32_t ts_node__relevant_child_count( - t_parse_node self, - bool include_anonymous -) { - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) { - if (include_anonymous) { - return tree.ptr->visible_child_count; - } else { - return tree.ptr->named_child_count; - } - } else { - return 0; - } -} - -static inline t_parse_node ts_node__child( - t_parse_node self, - uint32_t child_index, - bool include_anonymous -) { - t_parse_node result = self; - bool did_descend = true; - - while (did_descend) { - did_descend = false; - - t_parse_node child; - uint32_t index = 0; - NodeChildIterator iterator = ts_node_iterate_children(&result); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (ts_node__is_relevant(child, include_anonymous)) { - if (index == child_index) { - return child; - } - index++; - } else { - uint32_t grandchild_index = child_index - index; - uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous); - if (grandchild_index < grandchild_count) { - did_descend = true; - result = child; - child_index = grandchild_index; - break; - } - index += grandchild_count; - } - } - } - - return ts_node__null(); -} - -static bool ts_subtree_has_trailing_empty_descendant( - Subtree self, - Subtree other -) { - for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { - Subtree child = ts_subtree_children(self)[i]; - if (ts_subtree_total_bytes(child) > 0) break; - if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) { - return true; - } - } - return false; -} - -static inline t_parse_node ts_node__prev_sibling(t_parse_node self, bool include_anonymous) { - Subtree self_subtree = ts_node__subtree(self); - bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; - uint32_t target_end_byte = ts_node_end_byte(self); - - t_parse_node node = ts_node_parent(self); - t_parse_node earlier_node = ts_node__null(); - bool earlier_node_is_relevant = false; - - while (!ts_node_is_null(node)) { - t_parse_node earlier_child = ts_node__null(); - bool earlier_child_is_relevant = false; - bool found_child_containing_target = false; - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (child.id == self.id) break; - if (iterator.position.bytes > target_end_byte) { - found_child_containing_target = true; - break; - } - - if (iterator.position.bytes == target_end_byte && - (!self_is_empty || - ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) { - found_child_containing_target = true; - break; - } - - if (ts_node__is_relevant(child, include_anonymous)) { - earlier_child = child; - earlier_child_is_relevant = true; - } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { - earlier_child = child; - earlier_child_is_relevant = false; - } - } - - if (found_child_containing_target) { - if (!ts_node_is_null(earlier_child)) { - earlier_node = earlier_child; - earlier_node_is_relevant = earlier_child_is_relevant; - } - node = child; - } else if (earlier_child_is_relevant) { - return earlier_child; - } else if (!ts_node_is_null(earlier_child)) { - node = earlier_child; - } else if (earlier_node_is_relevant) { - return earlier_node; - } else { - node = earlier_node; - earlier_node = ts_node__null(); - earlier_node_is_relevant = false; - } - } - - return ts_node__null(); -} - -static inline t_parse_node ts_node__next_sibling(t_parse_node self, bool include_anonymous) { - uint32_t target_end_byte = ts_node_end_byte(self); - - t_parse_node node = ts_node_parent(self); - t_parse_node later_node = ts_node__null(); - bool later_node_is_relevant = false; - - while (!ts_node_is_null(node)) { - t_parse_node later_child = ts_node__null(); - bool later_child_is_relevant = false; - t_parse_node child_containing_target = ts_node__null(); - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (iterator.position.bytes < target_end_byte) continue; - if (ts_node_start_byte(child) <= ts_node_start_byte(self)) { - if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) { - child_containing_target = child; - } - } else if (ts_node__is_relevant(child, include_anonymous)) { - later_child = child; - later_child_is_relevant = true; - break; - } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { - later_child = child; - later_child_is_relevant = false; - break; - } - } - - if (!ts_node_is_null(child_containing_target)) { - if (!ts_node_is_null(later_child)) { - later_node = later_child; - later_node_is_relevant = later_child_is_relevant; - } - node = child_containing_target; - } else if (later_child_is_relevant) { - return later_child; - } else if (!ts_node_is_null(later_child)) { - node = later_child; - } else if (later_node_is_relevant) { - return later_node; - } else { - node = later_node; - } - } - - return ts_node__null(); -} - -static inline t_parse_node ts_node__first_child_for_byte( - t_parse_node self, - uint32_t goal, - bool include_anonymous -) { - t_parse_node node = self; - bool did_descend = true; - - while (did_descend) { - did_descend = false; - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (ts_node_end_byte(child) > goal) { - if (ts_node__is_relevant(child, include_anonymous)) { - return child; - } else if (ts_node_child_count(child) > 0) { - did_descend = true; - node = child; - break; - } - } - } - } - - return ts_node__null(); -} - -static inline t_parse_node ts_node__descendant_for_byte_range( - t_parse_node self, - uint32_t range_start, - uint32_t range_end, - bool include_anonymous -) { - t_parse_node node = self; - t_parse_node last_visible_node = self; - - bool did_descend = true; - while (did_descend) { - did_descend = false; - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - uint32_t node_end = iterator.position.bytes; - - // The end of this node must extend far enough forward to touch - // the end of the range and exceed the start of the range. - if (node_end < range_end) continue; - if (node_end <= range_start) continue; - - // The start of this node must extend far enough backward to - // touch the start of the range. - if (range_start < ts_node_start_byte(child)) break; - - node = child; - if (ts_node__is_relevant(node, include_anonymous)) { - last_visible_node = node; - } - did_descend = true; - break; - } - } - - return last_visible_node; -} - -static inline t_parse_node ts_node__descendant_for_point_range( - t_parse_node self, - t_point range_start, - t_point range_end, - bool include_anonymous -) { - t_parse_node node = self; - t_parse_node last_visible_node = self; - - bool did_descend = true; - while (did_descend) { - did_descend = false; - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { - t_point node_end = iterator.position.extent; - - // The end of this node must extend far enough forward to touch - // the end of the range and exceed the start of the range. - if (point_lt(node_end, range_end)) continue; - if (point_lte(node_end, range_start)) continue; - - // The start of this node must extend far enough backward to - // touch the start of the range. - if (point_lt(range_start, ts_node_start_point(child))) break; - - node = child; - if (ts_node__is_relevant(node, include_anonymous)) { - last_visible_node = node; - } - did_descend = true; - break; - } - } - - return last_visible_node; -} - -// TSNode - public - -uint32_t ts_node_end_byte(t_parse_node self) { - return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes; -} - -t_point ts_node_end_point(t_parse_node self) { - return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent); -} - -t_symbol ts_node_symbol(t_parse_node self) { - t_symbol symbol = ts_node__alias(&self); - if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_public_symbol(self.tree->language, symbol); -} - -const char *ts_node_type(t_parse_node self) { - t_symbol symbol = ts_node__alias(&self); - if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_symbol_name(self.tree->language, symbol); -} - -const t_language *ts_node_language(t_parse_node self) { - return self.tree->language; -} - -t_symbol ts_node_grammar_symbol(t_parse_node self) { - return ts_subtree_symbol(ts_node__subtree(self)); -} - -const char *ts_node_grammar_type(t_parse_node self) { - t_symbol symbol = ts_subtree_symbol(ts_node__subtree(self)); - return ts_language_symbol_name(self.tree->language, symbol); -} - -char *ts_node_string(t_parse_node self) { - t_symbol alias_symbol = ts_node__alias(&self); - return ts_subtree_string( - ts_node__subtree(self), - alias_symbol, - ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, - self.tree->language, - false - ); -} - -bool ts_node_eq(t_parse_node self, t_parse_node other) { - return self.tree == other.tree && self.id == other.id; -} - -bool ts_node_is_null(t_parse_node self) { - return self.id == 0; -} - -bool ts_node_is_extra(t_parse_node self) { - return ts_subtree_extra(ts_node__subtree(self)); -} - -bool ts_node_is_named(t_parse_node self) { - t_symbol alias = ts_node__alias(&self); - return alias - ? ts_language_symbol_metadata(self.tree->language, alias).named - : ts_subtree_named(ts_node__subtree(self)); -} - -bool ts_node_is_missing(t_parse_node self) { - return ts_subtree_missing(ts_node__subtree(self)); -} - -bool ts_node_has_changes(t_parse_node self) { - return ts_subtree_has_changes(ts_node__subtree(self)); -} - -bool ts_node_has_error(t_parse_node self) { - return ts_subtree_error_cost(ts_node__subtree(self)) > 0; -} - -bool ts_node_is_error(t_parse_node self) { - t_symbol symbol = ts_node_symbol(self); - return symbol == ts_builtin_sym_error; -} - -uint32_t ts_node_descendant_count(t_parse_node self) { - return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; -} - -t_state_id ts_node_parse_state(t_parse_node self) { - return ts_subtree_parse_state(ts_node__subtree(self)); -} - -t_state_id ts_node_next_parse_state(t_parse_node self) { - const t_language *language = self.tree->language; - uint16_t state = ts_node_parse_state(self); - if (state == TS_TREE_STATE_NONE) { - return TS_TREE_STATE_NONE; - } - uint16_t symbol = ts_node_grammar_symbol(self); - return ts_language_next_state(language, state, symbol); -} - -t_parse_node ts_node_parent(t_parse_node self) { - t_parse_node node = ts_tree_root_node(self.tree); - if (node.id == self.id) return ts_node__null(); - - while (true) { - t_parse_node next_node = ts_node_child_containing_descendant(node, self); - if (ts_node_is_null(next_node)) break; - node = next_node; - } - - return node; -} - -t_parse_node ts_node_child_containing_descendant(t_parse_node self, t_parse_node subnode) { - uint32_t start_byte = ts_node_start_byte(subnode); - uint32_t end_byte = ts_node_end_byte(subnode); - - do { - NodeChildIterator iter = ts_node_iterate_children(&self); - do { - if ( - !ts_node_child_iterator_next(&iter, &self) - || ts_node_start_byte(self) > start_byte - || self.id == subnode.id - ) { - return ts_node__null(); - } - } while (iter.position.bytes < end_byte || ts_node_child_count(self) == 0); - } while (!ts_node__is_relevant(self, true)); - - return self; -} - -t_parse_node ts_node_child(t_parse_node self, uint32_t child_index) { - return ts_node__child(self, child_index, true); -} - -t_parse_node ts_node_named_child(t_parse_node self, uint32_t child_index) { - return ts_node__child(self, child_index, false); -} - -t_parse_node ts_node_child_by_field_id(t_parse_node self, t_field_id field_id) { -recur: - if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); - - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self.tree->language, - ts_node__subtree(self).ptr->production_id, - &field_map, - &field_map_end - ); - if (field_map == field_map_end) return ts_node__null(); - - // The field mappings are sorted by their field id. Scan all - // the mappings to find the ones for the given field id. - while (field_map->field_id < field_id) { - field_map++; - if (field_map == field_map_end) return ts_node__null(); - } - while (field_map_end[-1].field_id > field_id) { - field_map_end--; - if (field_map == field_map_end) return ts_node__null(); - } - - t_parse_node child; - NodeChildIterator iterator = ts_node_iterate_children(&self); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (!ts_subtree_extra(ts_node__subtree(child))) { - uint32_t index = iterator.structural_child_index - 1; - if (index < field_map->child_index) continue; - - // Hidden nodes' fields are "inherited" by their visible parent. - if (field_map->inherited) { - - // If this is the *last* possible child node for this field, - // then perform a tail call to avoid recursion. - if (field_map + 1 == field_map_end) { - self = child; - goto recur; - } - - // Otherwise, descend into this child, but if it doesn't contain - // the field, continue searching subsequent children. - else { - t_parse_node result = ts_node_child_by_field_id(child, field_id); - if (result.id) return result; - field_map++; - if (field_map == field_map_end) return ts_node__null(); - } - } - - else if (ts_node__is_relevant(child, true)) { - return child; - } - - // If the field refers to a hidden node with visible children, - // return the first visible child. - else if (ts_node_child_count(child) > 0 ) { - return ts_node_child(child, 0); - } - - // Otherwise, continue searching subsequent children. - else { - field_map++; - if (field_map == field_map_end) return ts_node__null(); - } - } - } - - return ts_node__null(); -} - -static inline const char *ts_node__field_name_from_language(t_parse_node self, uint32_t structural_child_index) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self.tree->language, - ts_node__subtree(self).ptr->production_id, - &field_map, - &field_map_end - ); - for (; field_map != field_map_end; field_map++) { - if (!field_map->inherited && field_map->child_index == structural_child_index) { - return self.tree->language->field_names[field_map->field_id]; - } - } - return NULL; -} - -const char *ts_node_field_name_for_child(t_parse_node self, uint32_t child_index) { - t_parse_node result = self; - bool did_descend = true; - const char *inherited_field_name = NULL; - - while (did_descend) { - did_descend = false; - - t_parse_node child; - uint32_t index = 0; - NodeChildIterator iterator = ts_node_iterate_children(&result); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (ts_node__is_relevant(child, true)) { - if (index == child_index) { - const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); - if (field_name) return field_name; - return inherited_field_name; - } - index++; - } else { - uint32_t grandchild_index = child_index - index; - uint32_t grandchild_count = ts_node__relevant_child_count(child, true); - if (grandchild_index < grandchild_count) { - const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); - if (field_name) inherited_field_name = field_name; - - did_descend = true; - result = child; - child_index = grandchild_index; - break; - } - index += grandchild_count; - } - } - } - - return NULL; -} - -t_parse_node ts_node_child_by_field_name( - t_parse_node self, - const char *name, - uint32_t name_length -) { - t_field_id field_id = ts_language_field_id_for_name( - self.tree->language, - name, - name_length - ); - return ts_node_child_by_field_id(self, field_id); -} - -uint32_t ts_node_child_count(t_parse_node self) { - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) { - return tree.ptr->visible_child_count; - } else { - return 0; - } -} - -uint32_t ts_node_named_child_count(t_parse_node self) { - Subtree tree = ts_node__subtree(self); - if (ts_subtree_child_count(tree) > 0) { - return tree.ptr->named_child_count; - } else { - return 0; - } -} - -t_parse_node ts_node_next_sibling(t_parse_node self) { - return ts_node__next_sibling(self, true); -} - -t_parse_node ts_node_next_named_sibling(t_parse_node self) { - return ts_node__next_sibling(self, false); -} - -t_parse_node ts_node_prev_sibling(t_parse_node self) { - return ts_node__prev_sibling(self, true); -} - -t_parse_node ts_node_prev_named_sibling(t_parse_node self) { - return ts_node__prev_sibling(self, false); -} - -t_parse_node ts_node_first_child_for_byte(t_parse_node self, uint32_t byte) { - return ts_node__first_child_for_byte(self, byte, true); -} - -t_parse_node ts_node_first_named_child_for_byte(t_parse_node self, uint32_t byte) { - return ts_node__first_child_for_byte(self, byte, false); -} - -t_parse_node ts_node_descendant_for_byte_range( - t_parse_node self, - uint32_t start, - uint32_t end -) { - return ts_node__descendant_for_byte_range(self, start, end, true); -} - -t_parse_node ts_node_named_descendant_for_byte_range( - t_parse_node self, - uint32_t start, - uint32_t end -) { - return ts_node__descendant_for_byte_range(self, start, end, false); -} - -t_parse_node ts_node_descendant_for_point_range( - t_parse_node self, - t_point start, - t_point end -) { - return ts_node__descendant_for_point_range(self, start, end, true); -} - -t_parse_node ts_node_named_descendant_for_point_range( - t_parse_node self, - t_point start, - t_point end -) { - return ts_node__descendant_for_point_range(self, start, end, false); -} - -void ts_node_edit(t_parse_node *self, const t_input_edit *edit) { - uint32_t start_byte = ts_node_start_byte(*self); - t_point start_point = ts_node_start_point(*self); - - if (start_byte >= edit->old_end_byte) { - start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); - start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point)); - } else if (start_byte > edit->start_byte) { - start_byte = edit->new_end_byte; - start_point = edit->new_end_point; - } - - self->context[0] = start_byte; - self->context[1] = start_point.row; - self->context[2] = start_point.column; -} diff --git a/parser/src/parser.c b/parser/src/parser.c deleted file mode 100644 index 7ad2d788..00000000 --- a/parser/src/parser.c +++ /dev/null @@ -1,2091 +0,0 @@ -#define _POSIX_C_SOURCE 200112L - -#include -#include -#include -#include -#include -#include -#include "./api.h" -#include "./alloc.h" -#include "./array.h" -#include "./atomic.h" -#include "./clock.h" -#include "./error_costs.h" -#include "./get_changed_ranges.h" -#include "./language.h" -#include "./length.h" -#include "./lexer.h" -#include "./reduce_action.h" -#include "./reusable_node.h" -#include "./stack.h" -#include "./subtree.h" -#include "./tree.h" - - -#define LOG(...) \ - if (self->lexer.logger.log || self->dot_graph_file) { \ - snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ - ts_parser__log(self); \ - } - -#define LOG_LOOKAHEAD(symbol_name, size) \ - if (self->lexer.logger.log || self->dot_graph_file) { \ - char *buf = self->lexer.debug_buffer; \ - const char *symbol = symbol_name; \ - int off = sprintf(buf, "lexed_lookahead sym:"); \ - for ( \ - int i = 0; \ - symbol[i] != '\0' \ - && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ - i++ \ - ) { \ - switch (symbol[i]) { \ - case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \ - case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \ - case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \ - case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \ - case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \ - case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \ - default: buf[off++] = symbol[i]; break; \ - } \ - } \ - snprintf( \ - buf + off, \ - TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ - ", size:%u", \ - size \ - ); \ - ts_parser__log(self); \ - } - -#define LOG_STACK() \ - if (self->dot_graph_file) { \ - ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ - fputs("\n\n", self->dot_graph_file); \ - } - -#define LOG_TREE(tree) \ - if (self->dot_graph_file) { \ - ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \ - fputs("\n", self->dot_graph_file); \ - } - -#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) - -#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree)) - -static const unsigned MAX_VERSION_COUNT = 6; -static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; -static const unsigned MAX_SUMMARY_DEPTH = 16; -static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; -static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; - -typedef struct { - Subtree token; - Subtree last_external_token; - uint32_t byte_index; -} TokenCache; - -struct t_parser { - Lexer lexer; - Stack *stack; - SubtreePool tree_pool; - const t_language *language; - ReduceActionSet reduce_actions; - Subtree finished_tree; - SubtreeArray trailing_extras; - SubtreeArray trailing_extras2; - SubtreeArray scratch_trees; - TokenCache token_cache; - ReusableNode reusable_node; - void *external_scanner_payload; - FILE *dot_graph_file; - TSClock end_clock; - TSDuration timeout_duration; - unsigned accept_count; - unsigned operation_count; - const volatile size_t *cancellation_flag; - Subtree old_tree; - TSRangeArray included_range_differences; - unsigned included_range_difference_index; - bool has_scanner_error; -}; - -typedef struct { - unsigned cost; - unsigned node_count; - int dynamic_precedence; - bool is_in_error; -} ErrorStatus; - -typedef enum { - ErrorComparisonTakeLeft, - ErrorComparisonPreferLeft, - ErrorComparisonNone, - ErrorComparisonPreferRight, - ErrorComparisonTakeRight, -} ErrorComparison; - -typedef struct { - const char *string; - uint32_t length; -} TSStringInput; - -// StringInput - -static const char *ts_string_input_read( - void *_self, - uint32_t byte, - t_point point, - uint32_t *length -) { - (void)point; - TSStringInput *self = (TSStringInput *)_self; - if (byte >= self->length) { - *length = 0; - return ""; - } else { - *length = self->length - byte; - return self->string + byte; - } -} - -// Parser - Private - -static void ts_parser__log(t_parser *self) { - if (self->lexer.logger.log) { - self->lexer.logger.log( - self->lexer.logger.payload, - TSLogTypeParse, - self->lexer.debug_buffer - ); - } - - if (self->dot_graph_file) { - fprintf(self->dot_graph_file, "graph {\nlabel=\""); - for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) { - if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file); - fputc(*chr, self->dot_graph_file); - } - fprintf(self->dot_graph_file, "\"\n}\n\n"); - } -} - -static bool ts_parser__breakdown_top_of_stack( - t_parser *self, - StackVersion version -) { - bool did_break_down = false; - bool pending = false; - - do { - StackSliceArray pop = ts_stack_pop_pending(self->stack, version); - if (!pop.size) break; - - did_break_down = true; - pending = false; - for (uint32_t i = 0; i < pop.size; i++) { - StackSlice slice = pop.contents[i]; - t_state_id state = ts_stack_state(self->stack, slice.version); - Subtree parent = *array_front(&slice.subtrees); - - for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { - Subtree child = ts_subtree_children(parent)[j]; - pending = ts_subtree_child_count(child) > 0; - - if (ts_subtree_is_error(child)) { - state = ERROR_STATE; - } else if (!ts_subtree_extra(child)) { - state = ts_language_next_state(self->language, state, ts_subtree_symbol(child)); - } - - ts_subtree_retain(child); - ts_stack_push(self->stack, slice.version, child, pending, state); - } - - for (uint32_t j = 1; j < slice.subtrees.size; j++) { - Subtree tree = slice.subtrees.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, state); - } - - ts_subtree_release(&self->tree_pool, parent); - array_delete(&slice.subtrees); - - LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent)); - LOG_STACK(); - } - } while (pending); - - return did_break_down; -} - -static void ts_parser__breakdown_lookahead( - t_parser *self, - Subtree *lookahead, - t_state_id state, - ReusableNode *reusable_node -) { - bool did_descend = false; - Subtree tree = reusable_node_tree(reusable_node); - while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) { - LOG("state_mismatch sym:%s", TREE_NAME(tree)); - reusable_node_descend(reusable_node); - tree = reusable_node_tree(reusable_node); - did_descend = true; - } - - if (did_descend) { - ts_subtree_release(&self->tree_pool, *lookahead); - *lookahead = tree; - ts_subtree_retain(*lookahead); - } -} - -static ErrorComparison ts_parser__compare_versions( - t_parser *self, - ErrorStatus a, - ErrorStatus b -) { - (void)self; - if (!a.is_in_error && b.is_in_error) { - if (a.cost < b.cost) { - return ErrorComparisonTakeLeft; - } else { - return ErrorComparisonPreferLeft; - } - } - - if (a.is_in_error && !b.is_in_error) { - if (b.cost < a.cost) { - return ErrorComparisonTakeRight; - } else { - return ErrorComparisonPreferRight; - } - } - - if (a.cost < b.cost) { - if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { - return ErrorComparisonTakeLeft; - } else { - return ErrorComparisonPreferLeft; - } - } - - if (b.cost < a.cost) { - if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { - return ErrorComparisonTakeRight; - } else { - return ErrorComparisonPreferRight; - } - } - - if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft; - if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight; - return ErrorComparisonNone; -} - -static ErrorStatus ts_parser__version_status( - t_parser *self, - StackVersion version -) { - unsigned cost = ts_stack_error_cost(self->stack, version); - bool is_paused = ts_stack_is_paused(self->stack, version); - if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; - return (ErrorStatus) { - .cost = cost, - .node_count = ts_stack_node_count_since_error(self->stack, version), - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE - }; -} - -static bool ts_parser__better_version_exists( - t_parser *self, - StackVersion version, - bool is_in_error, - unsigned cost -) { - if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) { - return true; - } - - Length position = ts_stack_position(self->stack, version); - ErrorStatus status = { - .cost = cost, - .is_in_error = is_in_error, - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .node_count = ts_stack_node_count_since_error(self->stack, version), - }; - - for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { - if (i == version || - !ts_stack_is_active(self->stack, i) || - ts_stack_position(self->stack, i).bytes < position.bytes) continue; - ErrorStatus status_i = ts_parser__version_status(self, i); - switch (ts_parser__compare_versions(self, status, status_i)) { - case ErrorComparisonTakeRight: - return true; - case ErrorComparisonPreferRight: - if (ts_stack_can_merge(self->stack, i, version)) return true; - break; - default: - break; - } - } - - return false; -} - -static bool ts_parser__call_main_lex_fn(t_parser *self, TSLexMode lex_mode) { - - return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); - -} - -static bool ts_parser__call_keyword_lex_fn(t_parser *self, TSLexMode lex_mode) { - (void)(lex_mode); - - return self->language->keyword_lex_fn(&self->lexer.data, 0); - -} - -static void ts_parser__external_scanner_create( - t_parser *self -) { - if (self->language && self->language->external_scanner.states) { -if (self->language->external_scanner.create) { - self->external_scanner_payload = self->language->external_scanner.create(); - - } -}} - -static void ts_parser__external_scanner_destroy( - t_parser *self -) { - if ( - self->language && - self->external_scanner_payload && - self->language->external_scanner.destroy - ) { - self->language->external_scanner.destroy( - self->external_scanner_payload - ); - } - self->external_scanner_payload = NULL; -} - -static unsigned ts_parser__external_scanner_serialize( - t_parser *self -) { - uint32_t length = self->language->external_scanner.serialize( - self->external_scanner_payload, - self->lexer.debug_buffer - ); - assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); - return length; - -} - -static void ts_parser__external_scanner_deserialize( - t_parser *self, - Subtree external_token -) { - const char *data = NULL; - uint32_t length = 0; - if (external_token.ptr) { - data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state); - length = external_token.ptr->external_scanner_state.length; - } - - - self->language->external_scanner.deserialize( - self->external_scanner_payload, - data, - length - ); - -} - -static bool ts_parser__external_scanner_scan( - t_parser *self, - t_state_id external_lex_state -) { - - const bool *valid_external_tokens = ts_language_enabled_external_tokens( - self->language, - external_lex_state - ); - return self->language->external_scanner.scan( - self->external_scanner_payload, - &self->lexer.data, - valid_external_tokens - ); - -} - -static bool ts_parser__can_reuse_first_leaf( - t_parser *self, - t_state_id state, - Subtree tree, - TableEntry *table_entry -) { - TSLexMode current_lex_mode = self->language->lex_modes[state]; - t_symbol leaf_symbol = ts_subtree_leaf_symbol(tree); - t_state_id leaf_state = ts_subtree_leaf_parse_state(tree); - TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state]; - - // At the end of a non-terminal extra node, the lexer normally returns - // NULL, which indicates that the parser should look for a reduce action - // at symbol `0`. Avoid reusing tokens in this situation to ensure that - // the same thing happens when incrementally reparsing. - if (current_lex_mode.lex_state == (uint16_t)(-1)) return false; - - // If the token was created in a state with the same set of lookaheads, it is reusable. - if ( - table_entry->action_count > 0 && - memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexMode)) == 0 && - ( - leaf_symbol != self->language->keyword_capture_token || - (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state) - ) - ) return true; - - // Empty tokens are not reusable in states with different lookaheads. - if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false; - - // If the current state allows external tokens or other tokens that conflict with this - // token, this token is not reusable. - return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; -} - -static Subtree ts_parser__lex( - t_parser *self, - StackVersion version, - t_state_id parse_state -) { - TSLexMode lex_mode = self->language->lex_modes[parse_state]; - if (lex_mode.lex_state == (uint16_t)-1) { - LOG("no_lookahead_after_non_terminal_extra"); - return NULL_SUBTREE; - } - - const Length start_position = ts_stack_position(self->stack, version); - const Subtree external_token = ts_stack_last_external_token(self->stack, version); - - bool found_external_token = false; - bool error_mode = parse_state == ERROR_STATE; - bool skipped_error = false; - bool called_get_column = false; - int32_t first_error_character = 0; - Length error_start_position = length_zero(); - Length error_end_position = length_zero(); - uint32_t lookahead_end_byte = 0; - uint32_t external_scanner_state_len = 0; - bool external_scanner_state_changed = false; - ts_lexer_reset(&self->lexer, start_position); - - for (;;) { - bool found_token = false; - Length current_position = self->lexer.current_position; - - if (lex_mode.external_lex_state != 0) { - LOG( - "lex_external state:%d, row:%u, column:%u", - lex_mode.external_lex_state, - current_position.extent.row, - current_position.extent.column - ); - ts_lexer_start(&self->lexer); - ts_parser__external_scanner_deserialize(self, external_token); - found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state); - if (self->has_scanner_error) return NULL_SUBTREE; - ts_lexer_finish(&self->lexer, &lookahead_end_byte); - - if (found_token) { - external_scanner_state_len = ts_parser__external_scanner_serialize(self); - external_scanner_state_changed = !ts_external_scanner_state_eq( - ts_subtree_external_scanner_state(external_token), - self->lexer.debug_buffer, - external_scanner_state_len - ); - - // When recovering from an error, ignore any zero-length external tokens - // unless they have changed the external scanner's state. This helps to - // avoid infinite loops which could otherwise occur, because the lexer is - // looking for any possible token, instead of looking for the specific set of - // tokens that are valid in some parse state. - // - // Note that it's possible that the token end position may be *before* the - // original position of the lexer because of the way that tokens are positioned - // at included range boundaries: when a token is terminated at the start of - // an included range, it is marked as ending at the *end* of the preceding - // included range. - if ( - self->lexer.token_end_position.bytes <= current_position.bytes && - (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) && - !external_scanner_state_changed - ) { - LOG( - "ignore_empty_external_token symbol:%s", - SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol]) - ) - found_token = false; - } - } - - if (found_token) { - found_external_token = true; - called_get_column = self->lexer.did_get_column; - break; - } - - ts_lexer_reset(&self->lexer, current_position); - } - - LOG( - "lex_internal state:%d, row:%u, column:%u", - lex_mode.lex_state, - current_position.extent.row, - current_position.extent.column - ); - ts_lexer_start(&self->lexer); - found_token = ts_parser__call_main_lex_fn(self, lex_mode); - ts_lexer_finish(&self->lexer, &lookahead_end_byte); - if (found_token) break; - - if (!error_mode) { - error_mode = true; - lex_mode = self->language->lex_modes[ERROR_STATE]; - ts_lexer_reset(&self->lexer, start_position); - continue; - } - - if (!skipped_error) { - LOG("skip_unrecognized_character"); - skipped_error = true; - error_start_position = self->lexer.token_start_position; - error_end_position = self->lexer.token_start_position; - first_error_character = self->lexer.data.lookahead; - } - - if (self->lexer.current_position.bytes == error_end_position.bytes) { - if (self->lexer.data.eof(&self->lexer.data)) { - self->lexer.data.result_symbol = ts_builtin_sym_error; - break; - } - self->lexer.data.advance(&self->lexer.data, false); - } - - error_end_position = self->lexer.current_position; - } - - Subtree result; - if (skipped_error) { - Length padding = length_sub(error_start_position, start_position); - Length size = length_sub(error_end_position, error_start_position); - uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes; - result = ts_subtree_new_error( - &self->tree_pool, - first_error_character, - padding, - size, - lookahead_bytes, - parse_state, - self->language - ); - } else { - bool is_keyword = false; - t_symbol symbol = self->lexer.data.result_symbol; - Length padding = length_sub(self->lexer.token_start_position, start_position); - Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); - uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; - - if (found_external_token) { - symbol = self->language->external_scanner.symbol_map[symbol]; - } else if (symbol == self->language->keyword_capture_token && symbol != 0) { - uint32_t end_byte = self->lexer.token_end_position.bytes; - ts_lexer_reset(&self->lexer, self->lexer.token_start_position); - ts_lexer_start(&self->lexer); - - is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode); - - if ( - is_keyword && - self->lexer.token_end_position.bytes == end_byte && - ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) - ) { - symbol = self->lexer.data.result_symbol; - } - } - - result = ts_subtree_new_leaf( - &self->tree_pool, - symbol, - padding, - size, - lookahead_bytes, - parse_state, - found_external_token, - called_get_column, - is_keyword, - self->language - ); - - if (found_external_token) { - MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result); - ts_external_scanner_state_init( - &mut_result.ptr->external_scanner_state, - self->lexer.debug_buffer, - external_scanner_state_len - ); - mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed; - } - } - - LOG_LOOKAHEAD( - SYM_NAME(ts_subtree_symbol(result)), - ts_subtree_total_size(result).bytes - ); - return result; -} - -static Subtree ts_parser__get_cached_token( - t_parser *self, - t_state_id state, - size_t position, - Subtree last_external_token, - TableEntry *table_entry -) { - TokenCache *cache = &self->token_cache; - if ( - cache->token.ptr && cache->byte_index == position && - ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token) - ) { - ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry); - if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) { - ts_subtree_retain(cache->token); - return cache->token; - } - } - return NULL_SUBTREE; -} - -static void ts_parser__set_cached_token( - t_parser *self, - uint32_t byte_index, - Subtree last_external_token, - Subtree token -) { - TokenCache *cache = &self->token_cache; - if (token.ptr) ts_subtree_retain(token); - if (last_external_token.ptr) ts_subtree_retain(last_external_token); - if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token); - if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token); - cache->token = token; - cache->byte_index = byte_index; - cache->last_external_token = last_external_token; -} - -static bool ts_parser__has_included_range_difference( - const t_parser *self, - uint32_t start_position, - uint32_t end_position -) { - return ts_range_array_intersects( - &self->included_range_differences, - self->included_range_difference_index, - start_position, - end_position - ); -} - -static Subtree ts_parser__reuse_node( - t_parser *self, - StackVersion version, - t_state_id *state, - uint32_t position, - Subtree last_external_token, - TableEntry *table_entry -) { - Subtree result; - while ((result = reusable_node_tree(&self->reusable_node)).ptr) { - uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node); - uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result); - - // Do not reuse an EOF node if the included ranges array has changes - // later on in the file. - if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX; - - if (byte_offset > position) { - LOG("before_reusable_node symbol:%s", TREE_NAME(result)); - break; - } - - if (byte_offset < position) { - LOG("past_reusable_node symbol:%s", TREE_NAME(result)); - if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) { - reusable_node_advance(&self->reusable_node); - } - continue; - } - - if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) { - LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result)); - reusable_node_advance(&self->reusable_node); - continue; - } - - const char *reason = NULL; - if (ts_subtree_has_changes(result)) { - reason = "has_changes"; - } else if (ts_subtree_is_error(result)) { - reason = "is_error"; - } else if (ts_subtree_missing(result)) { - reason = "is_missing"; - } else if (ts_subtree_is_fragile(result)) { - reason = "is_fragile"; - } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) { - reason = "contains_different_included_range"; - } - - if (reason) { - LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result)); - if (!reusable_node_descend(&self->reusable_node)) { - reusable_node_advance(&self->reusable_node); - ts_parser__breakdown_top_of_stack(self, version); - *state = ts_stack_state(self->stack, version); - } - continue; - } - - t_symbol leaf_symbol = ts_subtree_leaf_symbol(result); - ts_language_table_entry(self->language, *state, leaf_symbol, table_entry); - if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { - LOG( - "cant_reuse_node symbol:%s, first_leaf_symbol:%s", - TREE_NAME(result), - SYM_NAME(leaf_symbol) - ); - reusable_node_advance_past_leaf(&self->reusable_node); - break; - } - - LOG("reuse_node symbol:%s", TREE_NAME(result)); - ts_subtree_retain(result); - return result; - } - - return NULL_SUBTREE; -} - -// Determine if a given tree should be replaced by an alternative tree. -// -// The decision is based on the trees' error costs (if any), their dynamic precedence, -// and finally, as a default, by a recursive comparison of the trees' symbols. -static bool ts_parser__select_tree(t_parser *self, Subtree left, Subtree right) { - if (!left.ptr) return true; - if (!right.ptr) return false; - - if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) { - LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); - return true; - } - - if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) { - LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - return false; - } - - if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) { - LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, - TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left), - ts_subtree_dynamic_precedence(left)); - return true; - } - - if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) { - LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, - TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right), - ts_subtree_dynamic_precedence(right)); - return false; - } - - if (ts_subtree_error_cost(left) > 0) return true; - - int comparison = ts_subtree_compare(left, right, &self->tree_pool); - switch (comparison) { - case -1: - LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - return false; - break; - case 1: - LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); - return true; - default: - LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - return false; - } -} - -// Determine if a given tree's children should be replaced by an alternative -// array of children. -static bool ts_parser__select_children( - t_parser *self, - Subtree left, - const SubtreeArray *children -) { - array_assign(&self->scratch_trees, children); - - // Create a temporary subtree using the scratch trees array. This node does - // not perform any allocation except for possibly growing the array to make - // room for its own heap data. The scratch tree is never explicitly released, - // so the same 'scratch trees' array can be reused again later. - MutableSubtree scratch_tree = ts_subtree_new_node( - ts_subtree_symbol(left), - &self->scratch_trees, - 0, - self->language - ); - - return ts_parser__select_tree( - self, - left, - ts_subtree_from_mut(scratch_tree) - ); -} - -static void ts_parser__shift( - t_parser *self, - StackVersion version, - t_state_id state, - Subtree lookahead, - bool extra -) { - bool is_leaf = ts_subtree_child_count(lookahead) == 0; - Subtree subtree_to_push = lookahead; - if (extra != ts_subtree_extra(lookahead) && is_leaf) { - MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_extra(&result, extra); - subtree_to_push = ts_subtree_from_mut(result); - } - - ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); - if (ts_subtree_has_external_tokens(subtree_to_push)) { - ts_stack_set_last_external_token( - self->stack, version, ts_subtree_last_external_token(subtree_to_push) - ); - } -} - -static StackVersion ts_parser__reduce( - t_parser *self, - StackVersion version, - t_symbol symbol, - uint32_t count, - int dynamic_precedence, - uint16_t production_id, - bool is_fragile, - bool end_of_non_terminal_extra -) { - uint32_t initial_version_count = ts_stack_version_count(self->stack); - - // Pop the given number of nodes from the given version of the parse stack. - // If stack versions have previously merged, then there may be more than one - // path back through the stack. For each path, create a new parent node to - // contain the popped children, and push it onto the stack in place of the - // children. - StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); - uint32_t removed_version_count = 0; - for (uint32_t i = 0; i < pop.size; i++) { - StackSlice slice = pop.contents[i]; - StackVersion slice_version = slice.version - removed_version_count; - - // This is where new versions are added to the parse stack. The versions - // will all be sorted and truncated at the end of the outer parsing loop. - // Allow the maximum version count to be temporarily exceeded, but only - // by a limited threshold. - if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) { - ts_stack_remove_version(self->stack, slice_version); - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - removed_version_count++; - while (i + 1 < pop.size) { - StackSlice next_slice = pop.contents[i + 1]; - if (next_slice.version != slice.version) break; - ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); - i++; - } - continue; - } - - // Extra tokens on top of the stack should not be included in this new parent - // node. They will be re-pushed onto the stack after the parent node is - // created and pushed. - SubtreeArray children = slice.subtrees; - ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras); - - MutableSubtree parent = ts_subtree_new_node( - symbol, &children, production_id, self->language - ); - - // This pop operation may have caused multiple stack versions to collapse - // into one, because they all diverged from a common state. In that case, - // choose one of the arrays of trees to be the parent node's children, and - // delete the rest of the tree arrays. - while (i + 1 < pop.size) { - StackSlice next_slice = pop.contents[i + 1]; - if (next_slice.version != slice.version) break; - i++; - - SubtreeArray next_slice_children = next_slice.subtrees; - ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2); - - if (ts_parser__select_children( - self, - ts_subtree_from_mut(parent), - &next_slice_children - )) { - ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); - ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); - array_swap(&self->trailing_extras, &self->trailing_extras2); - parent = ts_subtree_new_node( - symbol, &next_slice_children, production_id, self->language - ); - } else { - array_clear(&self->trailing_extras2); - ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); - } - } - - t_state_id state = ts_stack_state(self->stack, slice_version); - t_state_id next_state = ts_language_next_state(self->language, state, symbol); - if (end_of_non_terminal_extra && next_state == state) { - parent.ptr->extra = true; - } - if (is_fragile || pop.size > 1 || initial_version_count > 1) { - parent.ptr->fragile_left = true; - parent.ptr->fragile_right = true; - parent.ptr->parse_state = TS_TREE_STATE_NONE; - } else { - parent.ptr->parse_state = state; - } - parent.ptr->dynamic_precedence += dynamic_precedence; - - // Push the parent node onto the stack, along with any extra tokens that - // were previously on top of the stack. - ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); - for (uint32_t j = 0; j < self->trailing_extras.size; j++) { - ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); - } - - for (StackVersion j = 0; j < slice_version; j++) { - if (j == version) continue; - if (ts_stack_merge(self->stack, j, slice_version)) { - removed_version_count++; - break; - } - } - } - - // Return the first new stack version that was created. - return ts_stack_version_count(self->stack) > initial_version_count - ? initial_version_count - : STACK_VERSION_NONE; -} - -static void ts_parser__accept( - t_parser *self, - StackVersion version, - Subtree lookahead -) { - assert(ts_subtree_is_eof(lookahead)); - ts_stack_push(self->stack, version, lookahead, false, 1); - - StackSliceArray pop = ts_stack_pop_all(self->stack, version); - for (uint32_t i = 0; i < pop.size; i++) { - SubtreeArray trees = pop.contents[i].subtrees; - - Subtree root = NULL_SUBTREE; - for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { - Subtree tree = trees.contents[j]; - if (!ts_subtree_extra(tree)) { - assert(!tree.data.is_inline); - uint32_t child_count = ts_subtree_child_count(tree); - const Subtree *children = ts_subtree_children(tree); - for (uint32_t k = 0; k < child_count; k++) { - ts_subtree_retain(children[k]); - } - array_splice(&trees, j, 1, child_count, children); - root = ts_subtree_from_mut(ts_subtree_new_node( - ts_subtree_symbol(tree), - &trees, - tree.ptr->production_id, - self->language - )); - ts_subtree_release(&self->tree_pool, tree); - break; - } - } - - assert(root.ptr); - self->accept_count++; - - if (self->finished_tree.ptr) { - if (ts_parser__select_tree(self, self->finished_tree, root)) { - ts_subtree_release(&self->tree_pool, self->finished_tree); - self->finished_tree = root; - } else { - ts_subtree_release(&self->tree_pool, root); - } - } else { - self->finished_tree = root; - } - } - - ts_stack_remove_version(self->stack, pop.contents[0].version); - ts_stack_halt(self->stack, version); -} - -static bool ts_parser__do_all_potential_reductions( - t_parser *self, - StackVersion starting_version, - t_symbol lookahead_symbol -) { - uint32_t initial_version_count = ts_stack_version_count(self->stack); - - bool can_shift_lookahead_symbol = false; - StackVersion version = starting_version; - for (unsigned i = 0; true; i++) { - uint32_t version_count = ts_stack_version_count(self->stack); - if (version >= version_count) break; - - bool merged = false; - for (StackVersion j = initial_version_count; j < version; j++) { - if (ts_stack_merge(self->stack, j, version)) { - merged = true; - break; - } - } - if (merged) continue; - - t_state_id state = ts_stack_state(self->stack, version); - bool has_shift_action = false; - array_clear(&self->reduce_actions); - - t_symbol first_symbol, end_symbol; - if (lookahead_symbol != 0) { - first_symbol = lookahead_symbol; - end_symbol = lookahead_symbol + 1; - } else { - first_symbol = 1; - end_symbol = self->language->token_count; - } - - for (t_symbol symbol = first_symbol; symbol < end_symbol; symbol++) { - TableEntry entry; - ts_language_table_entry(self->language, state, symbol, &entry); - for (uint32_t j = 0; j < entry.action_count; j++) { - TSParseAction action = entry.actions[j]; - switch (action.type) { - case TSParseActionTypeShift: - case TSParseActionTypeRecover: - if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; - break; - case TSParseActionTypeReduce: - if (action.reduce.child_count > 0) - ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) { - .symbol = action.reduce.symbol, - .count = action.reduce.child_count, - .dynamic_precedence = action.reduce.dynamic_precedence, - .production_id = action.reduce.production_id, - }); - break; - default: - break; - } - } - } - - StackVersion reduction_version = STACK_VERSION_NONE; - for (uint32_t j = 0; j < self->reduce_actions.size; j++) { - ReduceAction action = self->reduce_actions.contents[j]; - - reduction_version = ts_parser__reduce( - self, version, action.symbol, action.count, - action.dynamic_precedence, action.production_id, - true, false - ); - } - - if (has_shift_action) { - can_shift_lookahead_symbol = true; - } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) { - ts_stack_renumber_version(self->stack, reduction_version, version); - continue; - } else if (lookahead_symbol != 0) { - ts_stack_remove_version(self->stack, version); - } - - if (version == starting_version) { - version = version_count; - } else { - version++; - } - } - - return can_shift_lookahead_symbol; -} - -static bool ts_parser__recover_to_state( - t_parser *self, - StackVersion version, - unsigned depth, - t_state_id goal_state -) { - StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); - StackVersion previous_version = STACK_VERSION_NONE; - - for (unsigned i = 0; i < pop.size; i++) { - StackSlice slice = pop.contents[i]; - - if (slice.version == previous_version) { - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - array_erase(&pop, i--); - continue; - } - - if (ts_stack_state(self->stack, slice.version) != goal_state) { - ts_stack_halt(self->stack, slice.version); - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - array_erase(&pop, i--); - continue; - } - - SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); - if (error_trees.size > 0) { - assert(error_trees.size == 1); - Subtree error_tree = error_trees.contents[0]; - uint32_t error_child_count = ts_subtree_child_count(error_tree); - if (error_child_count > 0) { - array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree)); - for (unsigned j = 0; j < error_child_count; j++) { - ts_subtree_retain(slice.subtrees.contents[j]); - } - } - ts_subtree_array_delete(&self->tree_pool, &error_trees); - } - - ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); - - if (slice.subtrees.size > 0) { - Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); - ts_stack_push(self->stack, slice.version, error, false, goal_state); - } else { - array_delete(&slice.subtrees); - } - - for (unsigned j = 0; j < self->trailing_extras.size; j++) { - Subtree tree = self->trailing_extras.contents[j]; - ts_stack_push(self->stack, slice.version, tree, false, goal_state); - } - - previous_version = slice.version; - } - - return previous_version != STACK_VERSION_NONE; -} - -static void ts_parser__recover( - t_parser *self, - StackVersion version, - Subtree lookahead -) { - bool did_recover = false; - unsigned previous_version_count = ts_stack_version_count(self->stack); - Length position = ts_stack_position(self->stack, version); - StackSummary *summary = ts_stack_get_summary(self->stack, version); - unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); - unsigned current_error_cost = ts_stack_error_cost(self->stack, version); - - // When the parser is in the error state, there are two strategies for recovering with a - // given lookahead token: - // 1. Find a previous state on the stack in which that lookahead token would be valid. Then, - // create a new stack version that is in that state again. This entails popping all of the - // subtrees that have been pushed onto the stack since that previous state, and wrapping - // them in an ERROR node. - // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and - // move on to the next lookahead token, remaining in the error state. - // - // First, try the strategy 1. Upon entering the error state, the parser recorded a summary - // of the previous parse states and their depths. Look at each state in the summary, to see - // if the current lookahead token would be valid in that state. - if (summary && !ts_subtree_is_error(lookahead)) { - for (unsigned i = 0; i < summary->size; i++) { - StackSummaryEntry entry = summary->contents[i]; - - if (entry.state == ERROR_STATE) continue; - if (entry.position.bytes == position.bytes) continue; - unsigned depth = entry.depth; - if (node_count_since_error > 0) depth++; - - // Do not recover in ways that create redundant stack versions. - bool would_merge = false; - for (unsigned j = 0; j < previous_version_count; j++) { - if ( - ts_stack_state(self->stack, j) == entry.state && - ts_stack_position(self->stack, j).bytes == position.bytes - ) { - would_merge = true; - break; - } - } - if (would_merge) continue; - - // Do not recover if the result would clearly be worse than some existing stack version. - unsigned new_cost = - current_error_cost + - entry.depth * ERROR_COST_PER_SKIPPED_TREE + - (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + - (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; - if (ts_parser__better_version_exists(self, version, false, new_cost)) break; - - // If the current lookahead token is valid in some previous state, recover to that state. - // Then stop looking for further recoveries. - if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) { - if (ts_parser__recover_to_state(self, version, depth, entry.state)) { - did_recover = true; - LOG("recover_to_previous state:%u, depth:%u", entry.state, depth); - LOG_STACK(); - break; - } - } - } - } - - // In the process of attempting to recover, some stack versions may have been created - // and subsequently halted. Remove those versions. - for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { - if (!ts_stack_is_active(self->stack, i)) { - ts_stack_remove_version(self->stack, i--); - } - } - - // If strategy 1 succeeded, a new stack version will have been created which is able to handle - // the current lookahead token. Now, in addition, try strategy 2 described above: skip the - // current lookahead token by wrapping it in an ERROR node. - - // Don't pursue this additional strategy if there are already too many stack versions. - if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } - - if ( - did_recover && - ts_subtree_has_external_scanner_state_change(lookahead) - ) { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } - - // If the parser is still in the error state at the end of the file, just wrap everything - // in an ERROR node and terminate. - if (ts_subtree_is_eof(lookahead)) { - LOG("recover_eof"); - SubtreeArray children = array_new(); - Subtree parent = ts_subtree_new_error_node(&children, false, self->language); - ts_stack_push(self->stack, version, parent, false, 1); - ts_parser__accept(self, version, lookahead); - return; - } - - // Do not recover if the result would clearly be worse than some existing stack version. - unsigned new_cost = - current_error_cost + ERROR_COST_PER_SKIPPED_TREE + - ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + - ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; - if (ts_parser__better_version_exists(self, version, false, new_cost)) { - ts_stack_halt(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - return; - } - - // If the current lookahead token is an extra token, mark it as extra. This means it won't - // be counted in error cost calculations. - unsigned n; - const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); - if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { - MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_extra(&mutable_lookahead, true); - lookahead = ts_subtree_from_mut(mutable_lookahead); - } - - // Wrap the lookahead token in an ERROR. - LOG("skip_token symbol:%s", TREE_NAME(lookahead)); - SubtreeArray children = array_new(); - array_reserve(&children, 1); - array_push(&children, lookahead); - MutableSubtree error_repeat = ts_subtree_new_node( - ts_builtin_sym_error_repeat, - &children, - 0, - self->language - ); - - // If other tokens have already been skipped, so there is already an ERROR at the top of the - // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger - // ERROR. - if (node_count_since_error > 0) { - StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); - - // TODO: Figure out how to make this condition occur. - // See https://github.com/atom/atom/issues/18450#issuecomment-439579778 - // If multiple stack versions have merged at this point, just pick one of the errors - // arbitrarily and discard the rest. - if (pop.size > 1) { - for (unsigned i = 1; i < pop.size; i++) { - ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees); - } - while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) { - ts_stack_remove_version(self->stack, pop.contents[0].version + 1); - } - } - - ts_stack_renumber_version(self->stack, pop.contents[0].version, version); - array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat)); - error_repeat = ts_subtree_new_node( - ts_builtin_sym_error_repeat, - &pop.contents[0].subtrees, - 0, - self->language - ); - } - - // Push the new ERROR onto the stack. - ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE); - if (ts_subtree_has_external_tokens(lookahead)) { - ts_stack_set_last_external_token( - self->stack, version, ts_subtree_last_external_token(lookahead) - ); - } -} - -static void ts_parser__handle_error( - t_parser *self, - StackVersion version, - Subtree lookahead -) { - uint32_t previous_version_count = ts_stack_version_count(self->stack); - - // Perform any reductions that can happen in this state, regardless of the lookahead. After - // skipping one or more invalid tokens, the parser might find a token that would have allowed - // a reduction to take place. - ts_parser__do_all_potential_reductions(self, version, 0); - uint32_t version_count = ts_stack_version_count(self->stack); - Length position = ts_stack_position(self->stack, version); - - // Push a discontinuity onto the stack. Merge all of the stack versions that - // were created in the previous step. - bool did_insert_missing_token = false; - for (StackVersion v = version; v < version_count;) { - if (!did_insert_missing_token) { - t_state_id state = ts_stack_state(self->stack, v); - for ( - t_symbol missing_symbol = 1; - missing_symbol < (uint16_t)self->language->token_count; - missing_symbol++ - ) { - t_state_id state_after_missing_symbol = ts_language_next_state( - self->language, state, missing_symbol - ); - if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) { - continue; - } - - if (ts_language_has_reduce_action( - self->language, - state_after_missing_symbol, - ts_subtree_leaf_symbol(lookahead) - )) { - // In case the parser is currently outside of any included range, the lexer will - // snap to the beginning of the next included range. The missing token's padding - // must be assigned to position it within the next included range. - ts_lexer_reset(&self->lexer, position); - ts_lexer_mark_end(&self->lexer); - Length padding = length_sub(self->lexer.token_end_position, position); - uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead); - - StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); - Subtree missing_tree = ts_subtree_new_missing_leaf( - &self->tree_pool, missing_symbol, - padding, lookahead_bytes, - self->language - ); - ts_stack_push( - self->stack, version_with_missing_tree, - missing_tree, false, - state_after_missing_symbol - ); - - if (ts_parser__do_all_potential_reductions( - self, version_with_missing_tree, - ts_subtree_leaf_symbol(lookahead) - )) { - LOG( - "recover_with_missing symbol:%s, state:%u", - SYM_NAME(missing_symbol), - ts_stack_state(self->stack, version_with_missing_tree) - ); - did_insert_missing_token = true; - break; - } - } - } - } - - ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); - v = (v == version) ? previous_version_count : v + 1; - } - - for (unsigned i = previous_version_count; i < version_count; i++) { - bool did_merge = ts_stack_merge(self->stack, version, previous_version_count); - assert(did_merge); - (void)did_merge; // fix warning/error with clang -Os - } - - ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); - - // Begin recovery with the current lookahead node, rather than waiting for the - // next turn of the parse loop. This ensures that the tree accounts for the - // current lookahead token's "lookahead bytes" value, which describes how far - // the lexer needed to look ahead beyond the content of the token in order to - // recognize it. - if (ts_subtree_child_count(lookahead) > 0) { - ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); - } - ts_parser__recover(self, version, lookahead); - - LOG_STACK(); -} - -static bool ts_parser__advance( - t_parser *self, - StackVersion version, - bool allow_node_reuse -) { - t_state_id state = ts_stack_state(self->stack, version); - uint32_t position = ts_stack_position(self->stack, version).bytes; - Subtree last_external_token = ts_stack_last_external_token(self->stack, version); - - bool did_reuse = true; - Subtree lookahead = NULL_SUBTREE; - TableEntry table_entry = {.action_count = 0}; - - // If possible, reuse a node from the previous syntax tree. - if (allow_node_reuse) { - lookahead = ts_parser__reuse_node( - self, version, &state, position, last_external_token, &table_entry - ); - } - - // If no node from the previous syntax tree could be reused, then try to - // reuse the token previously returned by the lexer. - if (!lookahead.ptr) { - did_reuse = false; - lookahead = ts_parser__get_cached_token( - self, state, position, last_external_token, &table_entry - ); - } - - bool needs_lex = !lookahead.ptr; - for (;;) { - // Otherwise, re-run the lexer. - if (needs_lex) { - needs_lex = false; - lookahead = ts_parser__lex(self, version, state); - if (self->has_scanner_error) return false; - - if (lookahead.ptr) { - ts_parser__set_cached_token(self, position, last_external_token, lookahead); - ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); - } - - // When parsing a non-terminal extra, a null lookahead indicates the - // end of the rule. The reduction is stored in the EOF table entry. - // After the reduction, the lexer needs to be run again. - else { - ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); - } - } - - // If a cancellation flag or a timeout was provided, then check every - // time a fixed number of parse actions has been processed. - if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { - self->operation_count = 0; - } - if ( - self->operation_count == 0 && - ((self->cancellation_flag && atomic_load(self->cancellation_flag)) || - (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock))) - ) { - if (lookahead.ptr) { - ts_subtree_release(&self->tree_pool, lookahead); - } - return false; - } - - // Process each parse action for the current lookahead token in - // the current state. If there are multiple actions, then this is - // an ambiguous state. REDUCE actions always create a new stack - // version, whereas SHIFT actions update the existing stack version - // and terminate this loop. - StackVersion last_reduction_version = STACK_VERSION_NONE; - for (uint32_t i = 0; i < table_entry.action_count; i++) { - TSParseAction action = table_entry.actions[i]; - - switch (action.type) { - case TSParseActionTypeShift: { - if (action.shift.repetition) break; - t_state_id next_state; - if (action.shift.extra) { - next_state = state; - LOG("shift_extra"); - } else { - next_state = action.shift.state; - LOG("shift state:%u", next_state); - } - - if (ts_subtree_child_count(lookahead) > 0) { - ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node); - next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); - } - - ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); - if (did_reuse) reusable_node_advance(&self->reusable_node); - return true; - } - - case TSParseActionTypeReduce: { - bool is_fragile = table_entry.action_count > 1; - bool end_of_non_terminal_extra = lookahead.ptr == NULL; - LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); - StackVersion reduction_version = ts_parser__reduce( - self, version, action.reduce.symbol, action.reduce.child_count, - action.reduce.dynamic_precedence, action.reduce.production_id, - is_fragile, end_of_non_terminal_extra - ); - if (reduction_version != STACK_VERSION_NONE) { - last_reduction_version = reduction_version; - } - break; - } - - case TSParseActionTypeAccept: { - LOG("accept"); - ts_parser__accept(self, version, lookahead); - return true; - } - - case TSParseActionTypeRecover: { - if (ts_subtree_child_count(lookahead) > 0) { - ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); - } - - ts_parser__recover(self, version, lookahead); - if (did_reuse) reusable_node_advance(&self->reusable_node); - return true; - } - } - } - - // If a reduction was performed, then replace the current stack version - // with one of the stack versions created by a reduction, and continue - // processing this version of the stack with the same lookahead symbol. - if (last_reduction_version != STACK_VERSION_NONE) { - ts_stack_renumber_version(self->stack, last_reduction_version, version); - LOG_STACK(); - state = ts_stack_state(self->stack, version); - - // At the end of a non-terminal extra rule, the lexer will return a - // null subtree, because the parser needs to perform a fixed reduction - // regardless of the lookahead node. After performing that reduction, - // (and completing the non-terminal extra rule) run the lexer again based - // on the current parse state. - if (!lookahead.ptr) { - needs_lex = true; - } else { - ts_language_table_entry( - self->language, - state, - ts_subtree_leaf_symbol(lookahead), - &table_entry - ); - } - - continue; - } - - // A non-terminal extra rule was reduced and merged into an existing - // stack version. This version can be discarded. - if (!lookahead.ptr) { - ts_stack_halt(self->stack, version); - return true; - } - - // If there were no parse actions for the current lookahead token, then - // it is not valid in this state. If the current lookahead token is a - // keyword, then switch to treating it as the normal word token if that - // token is valid in this state. - if ( - ts_subtree_is_keyword(lookahead) && - ts_subtree_symbol(lookahead) != self->language->keyword_capture_token - ) { - ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry); - if (table_entry.action_count > 0) { - LOG( - "switch from_keyword:%s, to_word_token:%s", - TREE_NAME(lookahead), - SYM_NAME(self->language->keyword_capture_token) - ); - - MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language); - lookahead = ts_subtree_from_mut(mutable_lookahead); - continue; - } - } - - // If the current lookahead token is not valid and the parser is - // already in the error state, restart the error recovery process. - // TODO - can this be unified with the other `RECOVER` case above? - if (state == ERROR_STATE) { - ts_parser__recover(self, version, lookahead); - return true; - } - - // If the current lookahead token is not valid and the previous - // subtree on the stack was reused from an old tree, it isn't actually - // valid to reuse it. Remove it from the stack, and in its place, - // push each of its children. Then try again to process the current - // lookahead. - if (ts_parser__breakdown_top_of_stack(self, version)) { - state = ts_stack_state(self->stack, version); - ts_subtree_release(&self->tree_pool, lookahead); - needs_lex = true; - continue; - } - - // At this point, the current lookahead token is definitely not valid - // for this parse stack version. Mark this version as paused and continue - // processing any other stack versions that might exist. If some other - // version advances successfully, then this version can simply be removed. - // But if all versions end up paused, then error recovery is needed. - LOG("detect_error"); - ts_stack_pause(self->stack, version, lookahead); - return true; - } -} - -static unsigned ts_parser__condense_stack(t_parser *self) { - bool made_changes = false; - unsigned min_error_cost = UINT_MAX; - for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { - // Prune any versions that have been marked for removal. - if (ts_stack_is_halted(self->stack, i)) { - ts_stack_remove_version(self->stack, i); - i--; - continue; - } - - // Keep track of the minimum error cost of any stack version so - // that it can be returned. - ErrorStatus status_i = ts_parser__version_status(self, i); - if (!status_i.is_in_error && status_i.cost < min_error_cost) { - min_error_cost = status_i.cost; - } - - // Examine each pair of stack versions, removing any versions that - // are clearly worse than another version. Ensure that the versions - // are ordered from most promising to least promising. - for (StackVersion j = 0; j < i; j++) { - ErrorStatus status_j = ts_parser__version_status(self, j); - - switch (ts_parser__compare_versions(self, status_j, status_i)) { - case ErrorComparisonTakeLeft: - made_changes = true; - ts_stack_remove_version(self->stack, i); - i--; - j = i; - break; - - case ErrorComparisonPreferLeft: - case ErrorComparisonNone: - if (ts_stack_merge(self->stack, j, i)) { - made_changes = true; - i--; - j = i; - } - break; - - case ErrorComparisonPreferRight: - made_changes = true; - if (ts_stack_merge(self->stack, j, i)) { - i--; - j = i; - } else { - ts_stack_swap_versions(self->stack, i, j); - } - break; - - case ErrorComparisonTakeRight: - made_changes = true; - ts_stack_remove_version(self->stack, j); - i--; - j--; - break; - } - } - } - - // Enforce a hard upper bound on the number of stack versions by - // discarding the least promising versions. - while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { - ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); - made_changes = true; - } - - // If the best-performing stack version is currently paused, or all - // versions are paused, then resume the best paused version and begin - // the error recovery process. Otherwise, remove the paused versions. - if (ts_stack_version_count(self->stack) > 0) { - bool has_unpaused_version = false; - for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { - if (ts_stack_is_paused(self->stack, i)) { - if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) { - LOG("resume version:%u", i); - min_error_cost = ts_stack_error_cost(self->stack, i); - Subtree lookahead = ts_stack_resume(self->stack, i); - ts_parser__handle_error(self, i, lookahead); - has_unpaused_version = true; - } else { - ts_stack_remove_version(self->stack, i); - i--; - n--; - } - } else { - has_unpaused_version = true; - } - } - } - - if (made_changes) { - LOG("condense"); - LOG_STACK(); - } - - return min_error_cost; -} - -static bool ts_parser_has_outstanding_parse(t_parser *self) { - return ( - self->external_scanner_payload || - ts_stack_state(self->stack, 0) != 1 || - ts_stack_node_count_since_error(self->stack, 0) != 0 - ); -} - -// Parser - Public - -t_parser *ts_parser_new(void) { - t_parser *self = ts_calloc(1, sizeof(t_parser)); - ts_lexer_init(&self->lexer); - array_init(&self->reduce_actions); - array_reserve(&self->reduce_actions, 4); - self->tree_pool = ts_subtree_pool_new(32); - self->stack = ts_stack_new(&self->tree_pool); - self->finished_tree = NULL_SUBTREE; - self->reusable_node = reusable_node_new(); - self->dot_graph_file = NULL; - self->cancellation_flag = NULL; - self->timeout_duration = 0; - self->language = NULL; - self->has_scanner_error = false; - self->external_scanner_payload = NULL; - self->end_clock = clock_null(); - self->operation_count = 0; - self->old_tree = NULL_SUBTREE; - self->included_range_differences = (TSRangeArray) array_new(); - self->included_range_difference_index = 0; - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - return self; -} - -void ts_parser_delete(t_parser *self) { - if (!self) return; - - ts_parser_set_language(self, NULL); - ts_stack_delete(self->stack); - if (self->reduce_actions.contents) { - array_delete(&self->reduce_actions); - } - if (self->included_range_differences.contents) { - array_delete(&self->included_range_differences); - } - if (self->old_tree.ptr) { - ts_subtree_release(&self->tree_pool, self->old_tree); - self->old_tree = NULL_SUBTREE; - } - ts_lexer_delete(&self->lexer); - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - ts_subtree_pool_delete(&self->tree_pool); - reusable_node_delete(&self->reusable_node); - array_delete(&self->trailing_extras); - array_delete(&self->trailing_extras2); - array_delete(&self->scratch_trees); - ts_free(self); -} - -const t_language *ts_parser_language(const t_parser *self) { - return self->language; -} - -bool ts_parser_set_language(t_parser *self, const t_language *language) { - ts_parser_reset(self); - ts_language_delete(self->language); - self->language = NULL; - - if (language) { - if ( - language->version > TREE_SITTER_LANGUAGE_VERSION || - language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION - ) return false; - - - } - - self->language = ts_language_copy(language); - return true; -} - -t_logger ts_parser_logger(const t_parser *self) { - return self->lexer.logger; -} - -void ts_parser_set_logger(t_parser *self, t_logger logger) { - self->lexer.logger = logger; -} - -void ts_parser_print_dot_graphs(t_parser *self, int fd) { - if (self->dot_graph_file) { - fclose(self->dot_graph_file); - } - - if (fd >= 0) { - #ifdef _WIN32 - self->dot_graph_file = _fdopen(fd, "a"); - #else - self->dot_graph_file = fdopen(fd, "a"); - #endif - } else { - self->dot_graph_file = NULL; - } -} - -const size_t *ts_parser_cancellation_flag(const t_parser *self) { - return (const size_t *)self->cancellation_flag; -} - -void ts_parser_set_cancellation_flag(t_parser *self, const size_t *flag) { - self->cancellation_flag = (const volatile size_t *)flag; -} - -uint64_t ts_parser_timeout_micros(const t_parser *self) { - return duration_to_micros(self->timeout_duration); -} - -void ts_parser_set_timeout_micros(t_parser *self, uint64_t timeout_micros) { - self->timeout_duration = duration_from_micros(timeout_micros); -} - -bool ts_parser_set_included_ranges( - t_parser *self, - const t_range *ranges, - uint32_t count -) { - return ts_lexer_set_included_ranges(&self->lexer, ranges, count); -} - -const t_range *ts_parser_included_ranges(const t_parser *self, uint32_t *count) { - return ts_lexer_included_ranges(&self->lexer, count); -} - -void ts_parser_reset(t_parser *self) { - ts_parser__external_scanner_destroy(self); - - if (self->old_tree.ptr) { - ts_subtree_release(&self->tree_pool, self->old_tree); - self->old_tree = NULL_SUBTREE; - } - - reusable_node_clear(&self->reusable_node); - ts_lexer_reset(&self->lexer, length_zero()); - ts_stack_clear(self->stack); - ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); - if (self->finished_tree.ptr) { - ts_subtree_release(&self->tree_pool, self->finished_tree); - self->finished_tree = NULL_SUBTREE; - } - self->accept_count = 0; - self->has_scanner_error = false; -} - -t_tree *ts_parser_parse( - t_parser *self, - const t_tree *old_tree, - t_input input -) { - t_tree *result = NULL; - if (!self->language || !input.read) return NULL; - - - - ts_lexer_set_input(&self->lexer, input); - array_clear(&self->included_range_differences); - self->included_range_difference_index = 0; - - if (ts_parser_has_outstanding_parse(self)) { - LOG("resume_parsing"); - } else { - ts_parser__external_scanner_create(self); - if (self->has_scanner_error) goto exit; - - if (old_tree) { - ts_subtree_retain(old_tree->root); - self->old_tree = old_tree->root; - ts_range_array_get_changed_ranges( - old_tree->included_ranges, old_tree->included_range_count, - self->lexer.included_ranges, self->lexer.included_range_count, - &self->included_range_differences - ); - reusable_node_reset(&self->reusable_node, old_tree->root); - LOG("parse_after_edit"); - LOG_TREE(self->old_tree); - for (unsigned i = 0; i < self->included_range_differences.size; i++) { - t_range *range = &self->included_range_differences.contents[i]; - LOG("different_included_range %u - %u", range->start_byte, range->end_byte); - } - } else { - reusable_node_clear(&self->reusable_node); - LOG("new_parse"); - } - } - - self->operation_count = 0; - if (self->timeout_duration) { - self->end_clock = clock_after(clock_now(), self->timeout_duration); - } else { - self->end_clock = clock_null(); - } - - uint32_t position = 0, last_position = 0, version_count = 0; - do { - for ( - StackVersion version = 0; - version_count = ts_stack_version_count(self->stack), - version < version_count; - version++ - ) { - bool allow_node_reuse = version_count == 1; - while (ts_stack_is_active(self->stack, version)) { - LOG( - "process version:%u, version_count:%u, state:%d, row:%u, col:%u", - version, - ts_stack_version_count(self->stack), - ts_stack_state(self->stack, version), - ts_stack_position(self->stack, version).extent.row, - ts_stack_position(self->stack, version).extent.column - ); - - if (!ts_parser__advance(self, version, allow_node_reuse)) { - if (self->has_scanner_error) goto exit; - return NULL; - } - - LOG_STACK(); - - position = ts_stack_position(self->stack, version).bytes; - if (position > last_position || (version > 0 && position == last_position)) { - last_position = position; - break; - } - } - } - - // After advancing each version of the stack, re-sort the versions by their cost, - // removing any versions that are no longer worth pursuing. - unsigned min_error_cost = ts_parser__condense_stack(self); - - // If there's already a finished parse tree that's better than any in-progress version, - // then terminate parsing. Clear the parse stack to remove any extra references to subtrees - // within the finished tree, ensuring that these subtrees can be safely mutated in-place - // for rebalancing. - if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) { - ts_stack_clear(self->stack); - break; - } - - while (self->included_range_difference_index < self->included_range_differences.size) { - t_range *range = &self->included_range_differences.contents[self->included_range_difference_index]; - if (range->end_byte <= position) { - self->included_range_difference_index++; - } else { - break; - } - } - } while (version_count != 0); - - assert(self->finished_tree.ptr); - ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); - LOG("done"); - LOG_TREE(self->finished_tree); - - result = ts_tree_new( - self->finished_tree, - self->language, - self->lexer.included_ranges, - self->lexer.included_range_count - ); - self->finished_tree = NULL_SUBTREE; - -exit: - ts_parser_reset(self); - return result; -} - -t_tree *ts_parser_parse_string( - t_parser *self, - const t_tree *old_tree, - const char *string, - uint32_t length -) { - return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8); -} - -t_tree *ts_parser_parse_string_encoding( - t_parser *self, - const t_tree *old_tree, - const char *string, - uint32_t length, - t_input_encoding encoding -) { - TSStringInput input = {string, length}; - return ts_parser_parse(self, old_tree, (t_input) { - &input, - ts_string_input_read, - encoding, - }); -} - -#undef LOG diff --git a/parser/src/parser.h b/parser/src/parser.h deleted file mode 100644 index fa49485b..00000000 --- a/parser/src/parser.h +++ /dev/null @@ -1,265 +0,0 @@ -#ifndef TREE_SITTER_PARSER_H_ -#define TREE_SITTER_PARSER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#define ts_builtin_sym_error ((t_symbol)-1) -#define ts_builtin_sym_end 0 -#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - -#ifndef TREE_SITTER_API_H_ -typedef uint16_t TSStateId; -typedef uint16_t TSSymbol; -typedef uint16_t TSFieldId; -typedef struct TSLanguage TSLanguage; -#endif - -typedef struct { - t_field_id field_id; - uint8_t child_index; - bool inherited; -} TSFieldMapEntry; - -typedef struct { - uint16_t index; - uint16_t length; -} TSFieldMapSlice; - -typedef struct { - bool visible; - bool named; - bool supertype; -} TSSymbolMetadata; - -typedef struct TSLexer TSLexer; - -struct TSLexer { - int32_t lookahead; - t_symbol result_symbol; - void (*advance)(TSLexer *, bool); - void (*mark_end)(TSLexer *); - uint32_t (*get_column)(TSLexer *); - bool (*is_at_included_range_start)(const TSLexer *); - bool (*eof)(const TSLexer *); -}; - -typedef enum { - TSParseActionTypeShift, - TSParseActionTypeReduce, - TSParseActionTypeAccept, - TSParseActionTypeRecover, -} TSParseActionType; - -typedef union { - struct { - uint8_t type; - t_state_id state; - bool extra; - bool repetition; - } shift; - struct { - uint8_t type; - uint8_t child_count; - t_symbol symbol; - int16_t dynamic_precedence; - uint16_t production_id; - } reduce; - uint8_t type; -} TSParseAction; - -typedef struct { - uint16_t lex_state; - uint16_t external_lex_state; -} TSLexMode; - -typedef union { - TSParseAction action; - struct { - uint8_t count; - bool reusable; - } entry; -} TSParseActionEntry; - -typedef struct { - int32_t start; - int32_t end; -} TSCharacterRange; - -struct t_language { - uint32_t version; - uint32_t symbol_count; - uint32_t alias_count; - uint32_t token_count; - uint32_t external_token_count; - uint32_t state_count; - uint32_t large_state_count; - uint32_t production_id_count; - uint32_t field_count; - uint16_t max_alias_sequence_length; - const uint16_t *parse_table; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSParseActionEntry *parse_actions; - const char * const *symbol_names; - const char * const *field_names; - const TSFieldMapSlice *field_map_slices; - const TSFieldMapEntry *field_map_entries; - const TSSymbolMetadata *symbol_metadata; - const t_symbol *public_symbol_map; - const uint16_t *alias_map; - const t_symbol *alias_sequences; - const TSLexMode *lex_modes; - bool (*lex_fn)(TSLexer *, t_state_id); - bool (*keyword_lex_fn)(TSLexer *, t_state_id); - t_symbol keyword_capture_token; - struct { - const bool *states; - const t_symbol *symbol_map; - void *(*create)(void); - void (*destroy)(void *); - bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); - unsigned (*serialize)(void *, char *); - void (*deserialize)(void *, const char *, unsigned); - } external_scanner; - const t_state_id *primary_state_ids; -}; - -static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { - uint32_t index = 0; - uint32_t size = len - index; - while (size > 1) { - uint32_t half_size = size / 2; - uint32_t mid_index = index + half_size; - TSCharacterRange *range = &ranges[mid_index]; - if (lookahead >= range->start && lookahead <= range->end) { - return true; - } else if (lookahead > range->end) { - index = mid_index; - } - size -= half_size; - } - TSCharacterRange *range = &ranges[index]; - return (lookahead >= range->start && lookahead <= range->end); -} - -/* - * Lexer Macros - */ - -#ifdef _MSC_VER -#define UNUSED __pragma(warning(suppress : 4101)) -#else -#define UNUSED __attribute__((unused)) -#endif - -#define START_LEXER() \ - bool result = false; \ - bool skip = false; \ - UNUSED \ - bool eof = false; \ - int32_t lookahead; \ - goto start; \ - next_state: \ - lexer->advance(lexer, skip); \ - start: \ - skip = false; \ - lookahead = lexer->lookahead; - -#define ADVANCE(state_value) \ - { \ - state = state_value; \ - goto next_state; \ - } - -#define ADVANCE_MAP(...) \ - { \ - static const uint16_t map[] = { __VA_ARGS__ }; \ - for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ - if (map[i] == lookahead) { \ - state = map[i + 1]; \ - goto next_state; \ - } \ - } \ - } - -#define SKIP(state_value) \ - { \ - skip = true; \ - state = state_value; \ - goto next_state; \ - } - -#define ACCEPT_TOKEN(symbol_value) \ - result = true; \ - lexer->result_symbol = symbol_value; \ - lexer->mark_end(lexer); - -#define END_STATE() return result; - -/* - * Parse Table Macros - */ - -#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) - -#define STATE(id) id - -#define ACTIONS(id) id - -#define SHIFT(state_value) \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .state = (state_value) \ - } \ - }} - -#define SHIFT_REPEAT(state_value) \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .state = (state_value), \ - .repetition = true \ - } \ - }} - -#define SHIFT_EXTRA() \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .extra = true \ - } \ - }} - -#define REDUCE(symbol_name, children, precedence, prod_id) \ - {{ \ - .reduce = { \ - .type = TSParseActionTypeReduce, \ - .symbol = symbol_name, \ - .child_count = children, \ - .dynamic_precedence = precedence, \ - .production_id = prod_id \ - }, \ - }} - -#define RECOVER() \ - {{ \ - .type = TSParseActionTypeRecover \ - }} - -#define ACCEPT_INPUT() \ - {{ \ - .type = TSParseActionTypeAccept \ - }} - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_PARSER_H_ diff --git a/parser/src/point.h b/parser/src/point.h deleted file mode 100644 index 3723299e..00000000 --- a/parser/src/point.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef TREE_SITTER_POINT_H_ -#define TREE_SITTER_POINT_H_ - -#include "./api.h" - -#define POINT_ZERO ((t_point) {0, 0}) -#define POINT_MAX ((t_point) {UINT32_MAX, UINT32_MAX}) - -static inline t_point point__new(unsigned row, unsigned column) { - t_point result = {row, column}; - return result; -} - -static inline t_point point_add(t_point a, t_point b) { - if (b.row > 0) - return point__new(a.row + b.row, b.column); - else - return point__new(a.row, a.column + b.column); -} - -static inline t_point point_sub(t_point a, t_point b) { - if (a.row > b.row) - return point__new(a.row - b.row, a.column); - else - return point__new(0, a.column - b.column); -} - -static inline bool point_lte(t_point a, t_point b) { - return (a.row < b.row) || (a.row == b.row && a.column <= b.column); -} - -static inline bool point_lt(t_point a, t_point b) { - return (a.row < b.row) || (a.row == b.row && a.column < b.column); -} - -static inline bool point_gt(t_point a, t_point b) { - return (a.row > b.row) || (a.row == b.row && a.column > b.column); -} - -static inline bool point_gte(t_point a, t_point b) { - return (a.row > b.row) || (a.row == b.row && a.column >= b.column); -} - -static inline bool point_eq(t_point a, t_point b) { - return a.row == b.row && a.column == b.column; -} - -static inline t_point point_min(t_point a, t_point b) { - if (a.row < b.row || (a.row == b.row && a.column < b.column)) - return a; - else - return b; -} - -static inline t_point point_max(t_point a, t_point b) { - if (a.row > b.row || (a.row == b.row && a.column > b.column)) - return a; - else - return b; -} - -#endif diff --git a/parser/src/query.c b/parser/src/query.c deleted file mode 100644 index ffded551..00000000 --- a/parser/src/query.c +++ /dev/null @@ -1,4134 +0,0 @@ -#include "./api.h" -#include "./alloc.h" -#include "./array.h" -#include "./language.h" -#include "./point.h" -#include "./tree_cursor.h" -// #include "./unicode.h" -#include - -// #define DEBUG_ANALYZE_QUERY -// #define DEBUG_EXECUTE_QUERY - -#define MAX_STEP_CAPTURE_COUNT 3 -#define MAX_NEGATED_FIELD_COUNT 8 -#define MAX_STATE_PREDECESSOR_COUNT 256 -#define MAX_ANALYSIS_STATE_DEPTH 8 -#define MAX_ANALYSIS_ITERATION_COUNT 256 - -/* - * Stream - A sequence of unicode characters derived from a UTF8 string. - * This struct is used in parsing queries from S-expressions. - */ -typedef struct { - const char *input; - const char *start; - const char *end; - int32_t next; - uint8_t next_size; -} Stream; - -/* - * QueryStep - A step in the process of matching a query. Each node within - * a query S-expression corresponds to one of these steps. An entire pattern - * is represented as a sequence of these steps. The basic properties of a - * node are represented by these fields: - * - `symbol` - The grammar symbol to match. A zero value represents the - * wildcard symbol, '_'. - * - `field` - The field name to match. A zero value means that a field name - * was not specified. - * - `capture_ids` - An array of integers representing the names of captures - * associated with this node in the pattern, terminated by a `NONE` value. - * - `depth` - The depth where this node occurs in the pattern. The root node - * of the pattern has depth zero. - * - `negated_field_list_id` - An id representing a set of fields that must - * not be present on a node matching this step. - * - * Steps have some additional fields in order to handle the `.` (or "anchor") operator, - * which forbids additional child nodes: - * - `is_immediate` - Indicates that the node matching this step cannot be preceded - * by other sibling nodes that weren't specified in the pattern. - * - `is_last_child` - Indicates that the node matching this step cannot have any - * subsequent named siblings. - * - * For simple patterns, steps are matched in sequential order. But in order to - * handle alternative/repeated/optional sub-patterns, query steps are not always - * structured as a linear sequence; they sometimes need to split and merge. This - * is done using the following fields: - * - `alternative_index` - The index of a different query step that serves as - * an alternative to this step. A `NONE` value represents no alternative. - * When a query state reaches a step with an alternative index, the state - * is duplicated, with one copy remaining at the original step, and one copy - * moving to the alternative step. The alternative may have its own alternative - * step, so this splitting is an iterative process. - * - `is_dead_end` - Indicates that this state cannot be passed directly, and - * exists only in order to redirect to an alternative index, with no splitting. - * - `is_pass_through` - Indicates that state has no matching logic of its own, - * and exists only to split a state. One copy of the state advances immediately - * to the next step, and one moves to the alternative step. - * - `alternative_is_immediate` - Indicates that this step's alternative step - * should be treated as if `is_immediate` is true. - * - * Steps also store some derived state that summarizes how they relate to other - * steps within the same pattern. This is used to optimize the matching process: - * - `contains_captures` - Indicates that this step or one of its child steps - * has a non-empty `capture_ids` list. - * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then - * it and all of its subsequent sibling steps within the same parent pattern - * are guaranteed to match. - * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but - * for the entire top-level pattern. When iterating through a query's - * captures using `ts_query_cursor_next_capture`, this field is used to - * detect that a capture can safely be returned from a match that has not - * even completed yet. - */ -typedef struct { - t_symbol symbol; - t_symbol supertype_symbol; - t_field_id field; - uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t depth; - uint16_t alternative_index; - uint16_t negated_field_list_id; - bool is_named: 1; - bool is_immediate: 1; - bool is_last_child: 1; - bool is_pass_through: 1; - bool is_dead_end: 1; - bool alternative_is_immediate: 1; - bool contains_captures: 1; - bool root_pattern_guaranteed: 1; - bool parent_pattern_guaranteed: 1; -} QueryStep; - -/* - * Slice - A slice of an external array. Within a query, capture names, - * literal string values, and predicate step information are stored in three - * contiguous arrays. Individual captures, string values, and predicates are - * represented as slices of these three arrays. - */ -typedef struct { - uint32_t offset; - uint32_t length; -} Slice; - -/* - * SymbolTable - a two-way mapping of strings to ids. - */ -typedef struct { - Array(char) characters; - Array(Slice) slices; -} SymbolTable; - -/** - * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. - */ -typedef Array(uint8_t) CaptureQuantifiers; - -/* - * PatternEntry - Information about the starting point for matching a particular - * pattern. These entries are stored in a 'pattern map' - a sorted array that - * makes it possible to efficiently lookup patterns based on the symbol for their - * first step. The entry consists of the following fields: - * - `pattern_index` - the index of the pattern within the query - * - `step_index` - the index of the pattern's first step in the shared `steps` array - * - `is_rooted` - whether or not the pattern has a single root node. This property - * affects decisions about whether or not to start the pattern for nodes outside - * of a QueryCursor's range restriction. - */ -typedef struct { - uint16_t step_index; - uint16_t pattern_index; - bool is_rooted; -} PatternEntry; - -typedef struct { - Slice steps; - Slice predicate_steps; - uint32_t start_byte; - bool is_non_local; -} QueryPattern; - -typedef struct { - uint32_t byte_offset; - uint16_t step_index; -} StepOffset; - -/* - * QueryState - The state of an in-progress match of a particular pattern - * in a query. While executing, a `TSQueryCursor` must keep track of a number - * of possible in-progress matches. Each of those possible matches is - * represented as one of these states. Fields: - * - `id` - A numeric id that is exposed to the public API. This allows the - * caller to remove a given match, preventing any more of its captures - * from being returned. - * - `start_depth` - The depth in the tree where the first step of the state's - * pattern was matched. - * - `pattern_index` - The pattern that the state is matching. - * - `consumed_capture_count` - The number of captures from this match that - * have already been returned. - * - `capture_list_id` - A numeric id that can be used to retrieve the state's - * list of captures from the `CaptureListPool`. - * - `seeking_immediate_match` - A flag that indicates that the state's next - * step must be matched by the very next sibling. This is used when - * processing repetitions. - * - `has_in_progress_alternatives` - A flag that indicates that there is are - * other states that have the same captures as this state, but are at - * different steps in their pattern. This means that in order to obey the - * 'longest-match' rule, this state should not be returned as a match until - * it is clear that there can be no other alternative match with more captures. - */ -typedef struct { - uint32_t id; - uint32_t capture_list_id; - uint16_t start_depth; - uint16_t step_index; - uint16_t pattern_index; - uint16_t consumed_capture_count: 12; - bool seeking_immediate_match: 1; - bool has_in_progress_alternatives: 1; - bool dead: 1; - bool needs_parent: 1; -} QueryState; - -typedef Array(t_query_capture) CaptureList; - -/* - * CaptureListPool - A collection of *lists* of captures. Each query state needs - * to maintain its own list of captures. To avoid repeated allocations, this struct - * maintains a fixed set of capture lists, and keeps track of which ones are - * currently in use by a query state. - */ -typedef struct { - Array(CaptureList) list; - CaptureList empty_list; - // The maximum number of capture lists that we are allowed to allocate. We - // never allow `list` to allocate more entries than this, dropping pending - // matches if needed to stay under the limit. - uint32_t max_capture_list_count; - // The number of capture lists allocated in `list` that are not currently in - // use. We reuse those existing-but-unused capture lists before trying to - // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture - // list's length to indicate that it's not in use. - uint32_t free_capture_list_count; -} CaptureListPool; - -/* - * AnalysisState - The state needed for walking the parse table when analyzing - * a query pattern, to determine at which steps the pattern might fail to match. - */ -typedef struct { - t_state_id parse_state; - t_symbol parent_symbol; - uint16_t child_index; - t_field_id field_id: 15; - bool done: 1; -} AnalysisStateEntry; - -typedef struct { - AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; - uint16_t depth; - uint16_t step_index; - t_symbol root_symbol; -} AnalysisState; - -typedef Array(AnalysisState *) AnalysisStateSet; - -typedef struct { - AnalysisStateSet states; - AnalysisStateSet next_states; - AnalysisStateSet deeper_states; - AnalysisStateSet state_pool; - Array(uint16_t) final_step_indices; - Array(t_symbol) finished_parent_symbols; - bool did_abort; -} QueryAnalysis; - -/* - * AnalysisSubgraph - A subset of the states in the parse table that are used - * in constructing nodes with a certain symbol. Each state is accompanied by - * some information about the possible node that could be produced in - * downstream states. - */ -typedef struct { - t_state_id state; - uint16_t production_id; - uint8_t child_index: 7; - bool done: 1; -} AnalysisSubgraphNode; - -typedef struct { - t_symbol symbol; - Array(t_state_id) start_states; - Array(AnalysisSubgraphNode) nodes; -} AnalysisSubgraph; - -typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; - -/* - * StatePredecessorMap - A map that stores the predecessors of each parse state. - * This is used during query analysis to determine which parse states can lead - * to which reduce actions. - */ -typedef struct { - t_state_id *contents; -} StatePredecessorMap; - -/* - * TSQuery - A tree query, compiled from a string of S-expressions. The query - * itself is immutable. The mutable state used in the process of executing the - * query is stored in a `TSQueryCursor`. - */ -struct t_query { - SymbolTable captures; - SymbolTable predicate_values; - Array(CaptureQuantifiers) capture_quantifiers; - Array(QueryStep) steps; - Array(PatternEntry) pattern_map; - Array(t_query_predicate_step) predicate_steps; - Array(QueryPattern) patterns; - Array(StepOffset) step_offsets; - Array(t_field_id) negated_fields; - Array(char) string_buffer; - Array(t_symbol) repeat_symbols_with_rootless_patterns; - const t_language *language; - uint16_t wildcard_root_pattern_count; -}; - -/* - * TSQueryCursor - A stateful struct used to execute a query on a tree. - */ -struct t_query_cursor { - const t_query *query; - t_tree_cursor cursor; - Array(QueryState) states; - Array(QueryState) finished_states; - CaptureListPool capture_list_pool; - uint32_t depth; - uint32_t max_start_depth; - uint32_t start_byte; - uint32_t end_byte; - t_point start_point; - t_point end_point; - uint32_t next_state_id; - bool on_visible_node; - bool ascending; - bool halted; - bool did_exceed_match_limit; -}; - -static const t_query_error PARENT_DONE = -1; -static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; -static const uint16_t NONE = UINT16_MAX; -static const t_symbol WILDCARD_SYMBOL = 0; - -/********** - * Stream - **********/ - -// Advance to the next unicode code point in the stream. -static bool stream_advance(Stream *self) { - self->input += self->next_size; - if (self->input < self->end) { - uint32_t size = ts_decode_ascii( - (const uint8_t *)self->input, - (uint32_t)(self->end - self->input), - &self->next - ); - if (size > 0) { - self->next_size = size; - return true; - } - } else { - self->next_size = 0; - self->next = '\0'; - } - return false; -} - -// Reset the stream to the given input position, represented as a pointer -// into the input string. -static void stream_reset(Stream *self, const char *input) { - self->input = input; - self->next_size = 0; - stream_advance(self); -} - -static Stream stream_new(const char *string, uint32_t length) { - Stream self = { - .next = 0, - .input = string, - .start = string, - .end = string + length, - }; - stream_advance(&self); - return self; -} - -static void stream_skip_whitespace(Stream *self) { - for (;;) { - if (iswspace(self->next)) { - stream_advance(self); - } else if (self->next == ';') { - // skip over comments - stream_advance(self); - while (self->next && self->next != '\n') { - if (!stream_advance(self)) break; - } - } else { - break; - } - } -} - -static bool stream_is_ident_start(Stream *self) { - return iswalnum(self->next) || self->next == '_' || self->next == '-'; -} - -static void stream_scan_identifier(Stream *stream) { - do { - stream_advance(stream); - } while ( - iswalnum(stream->next) || - stream->next == '_' || - stream->next == '-' || - stream->next == '.' || - stream->next == '?' || - stream->next == '!' - ); -} - -static uint32_t stream_offset(Stream *self) { - return (uint32_t)(self->input - self->start); -} - -/****************** - * CaptureListPool - ******************/ - -static CaptureListPool capture_list_pool_new(void) { - return (CaptureListPool) { - .list = array_new(), - .empty_list = array_new(), - .max_capture_list_count = UINT32_MAX, - .free_capture_list_count = 0, - }; -} - -static void capture_list_pool_reset(CaptureListPool *self) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - // This invalid size means that the list is not in use. - self->list.contents[i].size = UINT32_MAX; - } - self->free_capture_list_count = self->list.size; -} - -static void capture_list_pool_delete(CaptureListPool *self) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - array_delete(&self->list.contents[i]); - } - array_delete(&self->list); -} - -static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { - if (id >= self->list.size) return &self->empty_list; - return &self->list.contents[id]; -} - -static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { - assert(id < self->list.size); - return &self->list.contents[id]; -} - -static bool capture_list_pool_is_empty(const CaptureListPool *self) { - // The capture list pool is empty if all allocated lists are in use, and we - // have reached the maximum allowed number of allocated lists. - return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; -} - -static uint16_t capture_list_pool_acquire(CaptureListPool *self) { - // First see if any already allocated capture list is currently unused. - if (self->free_capture_list_count > 0) { - for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - if (self->list.contents[i].size == UINT32_MAX) { - array_clear(&self->list.contents[i]); - self->free_capture_list_count--; - return i; - } - } - } - - // Otherwise allocate and initialize a new capture list, as long as that - // doesn't put us over the requested maximum. - uint32_t i = self->list.size; - if (i >= self->max_capture_list_count) { - return NONE; - } - CaptureList list; - array_init(&list); - array_push(&self->list, list); - return i; -} - -static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { - if (id >= self->list.size) return; - self->list.contents[id].size = UINT32_MAX; - self->free_capture_list_count++; -} - -/************** - * Quantifiers - **************/ - -static t_quantifier quantifier_mul( - t_quantifier left, - t_quantifier right -) { - switch (left) - { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierZeroOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierOne: - return right; - case TSQuantifierOneOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -static t_quantifier quantifier_join( - t_quantifier left, - t_quantifier right -) { - switch (left) - { - case TSQuantifierZero: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZero; - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - }; - break; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - case TSQuantifierOne: - return TSQuantifierZeroOrOne; - break; - case TSQuantifierZeroOrMore: - case TSQuantifierOneOrMore: - return TSQuantifierZeroOrMore; - break; - }; - break; - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - return TSQuantifierOne; - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOneOrMore: - switch (right) { - case TSQuantifierZero: - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -static t_quantifier quantifier_add( - t_quantifier left, - t_quantifier right -) { - switch (left) - { - case TSQuantifierZero: - return right; - case TSQuantifierZeroOrOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZeroOrOne; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierZeroOrMore: - switch (right) { - case TSQuantifierZero: - return TSQuantifierZeroOrMore; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - return TSQuantifierZeroOrMore; - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOne: - switch (right) { - case TSQuantifierZero: - return TSQuantifierOne; - case TSQuantifierZeroOrOne: - case TSQuantifierZeroOrMore: - case TSQuantifierOne: - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - }; - break; - case TSQuantifierOneOrMore: - return TSQuantifierOneOrMore; - } - return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! -} - -// Create new capture quantifiers structure -static CaptureQuantifiers capture_quantifiers_new(void) { - return (CaptureQuantifiers) array_new(); -} - -// Delete capture quantifiers structure -static void capture_quantifiers_delete( - CaptureQuantifiers *self -) { - array_delete(self); -} - -// Clear capture quantifiers structure -static void capture_quantifiers_clear( - CaptureQuantifiers *self -) { - array_clear(self); -} - -// Replace capture quantifiers with the given quantifiers -static void capture_quantifiers_replace( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - array_clear(self); - array_push_all(self, quantifiers); -} - -// Return capture quantifier for the given capture id -static t_quantifier capture_quantifier_for_id( - const CaptureQuantifiers *self, - uint16_t id -) { - return (self->size <= id) ? TSQuantifierZero : (t_quantifier) *array_get(self, id); -} - -// Add the given quantifier to the current value for id -static void capture_quantifiers_add_for_id( - CaptureQuantifiers *self, - uint16_t id, - t_quantifier quantifier -) { - if (self->size <= id) { - array_grow_by(self, id + 1 - self->size); - } - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_add((t_quantifier) *own_quantifier, quantifier); -} - -// Point-wise add the given quantifiers to the current values -static void capture_quantifiers_add_all( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - if (self->size < quantifiers->size) { - array_grow_by(self, quantifiers->size - self->size); - } - for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { - uint8_t *quantifier = array_get(quantifiers, id); - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_add((t_quantifier) *own_quantifier, (t_quantifier) *quantifier); - } -} - -// Join the given quantifier with the current values -static void capture_quantifiers_mul( - CaptureQuantifiers *self, - t_quantifier quantifier -) { - for (uint16_t id = 0; id < (uint16_t)self->size; id++) { - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_mul((t_quantifier) *own_quantifier, quantifier); - } -} - -// Point-wise join the quantifiers from a list of alternatives with the current values -static void capture_quantifiers_join_all( - CaptureQuantifiers *self, - CaptureQuantifiers *quantifiers -) { - if (self->size < quantifiers->size) { - array_grow_by(self, quantifiers->size - self->size); - } - for (uint32_t id = 0; id < quantifiers->size; id++) { - uint8_t *quantifier = array_get(quantifiers, id); - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_join((t_quantifier) *own_quantifier, (t_quantifier) *quantifier); - } - for (uint32_t id = quantifiers->size; id < self->size; id++) { - uint8_t *own_quantifier = array_get(self, id); - *own_quantifier = (uint8_t) quantifier_join((t_quantifier) *own_quantifier, TSQuantifierZero); - } -} - -/************** - * SymbolTable - **************/ - -static SymbolTable symbol_table_new(void) { - return (SymbolTable) { - .characters = array_new(), - .slices = array_new(), - }; -} - -static void symbol_table_delete(SymbolTable *self) { - array_delete(&self->characters); - array_delete(&self->slices); -} - -static int symbol_table_id_for_name( - const SymbolTable *self, - const char *name, - uint32_t length -) { - for (unsigned i = 0; i < self->slices.size; i++) { - Slice slice = self->slices.contents[i]; - if ( - slice.length == length && - !strncmp(&self->characters.contents[slice.offset], name, length) - ) return i; - } - return -1; -} - -static const char *symbol_table_name_for_id( - const SymbolTable *self, - uint16_t id, - uint32_t *length -) { - Slice slice = self->slices.contents[id]; - *length = slice.length; - return &self->characters.contents[slice.offset]; -} - -static uint16_t symbol_table_insert_name( - SymbolTable *self, - const char *name, - uint32_t length -) { - int id = symbol_table_id_for_name(self, name, length); - if (id >= 0) return (uint16_t)id; - Slice slice = { - .offset = self->characters.size, - .length = length, - }; - array_grow_by(&self->characters, length + 1); - memcpy(&self->characters.contents[slice.offset], name, length); - self->characters.contents[self->characters.size - 1] = 0; - array_push(&self->slices, slice); - return self->slices.size - 1; -} - -/************ - * QueryStep - ************/ - -static QueryStep query_step__new( - t_symbol symbol, - uint16_t depth, - bool is_immediate -) { - QueryStep step = { - .symbol = symbol, - .depth = depth, - .field = 0, - .alternative_index = NONE, - .negated_field_list_id = 0, - .contains_captures = false, - .is_last_child = false, - .is_named = false, - .is_pass_through = false, - .is_dead_end = false, - .root_pattern_guaranteed = false, - .is_immediate = is_immediate, - .alternative_is_immediate = false, - }; - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - step.capture_ids[i] = NONE; - } - return step; -} - -static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - if (self->capture_ids[i] == NONE) { - self->capture_ids[i] = capture_id; - break; - } - } -} - -static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { - for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - if (self->capture_ids[i] == capture_id) { - self->capture_ids[i] = NONE; - while (i + 1 < MAX_STEP_CAPTURE_COUNT) { - if (self->capture_ids[i + 1] == NONE) break; - self->capture_ids[i] = self->capture_ids[i + 1]; - self->capture_ids[i + 1] = NONE; - i++; - } - break; - } - } -} - -/********************** - * StatePredecessorMap - **********************/ - -static inline StatePredecessorMap state_predecessor_map_new( - const t_language *language -) { - return (StatePredecessorMap) { - .contents = ts_calloc( - (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), - sizeof(t_state_id) - ), - }; -} - -static inline void state_predecessor_map_delete(StatePredecessorMap *self) { - ts_free(self->contents); -} - -static inline void state_predecessor_map_add( - StatePredecessorMap *self, - t_state_id state, - t_state_id predecessor -) { - size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - t_state_id *count = &self->contents[index]; - if ( - *count == 0 || - (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) - ) { - (*count)++; - self->contents[index + *count] = predecessor; - } -} - -static inline const t_state_id *state_predecessor_map_get( - const StatePredecessorMap *self, - t_state_id state, - unsigned *count -) { - size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - *count = self->contents[index]; - return &self->contents[index + 1]; -} - -/**************** - * AnalysisState - ****************/ - -static unsigned analysis_state__recursion_depth(const AnalysisState *self) { - unsigned result = 0; - for (unsigned i = 0; i < self->depth; i++) { - t_symbol symbol = self->stack[i].parent_symbol; - for (unsigned j = 0; j < i; j++) { - if (self->stack[j].parent_symbol == symbol) { - result++; - break; - } - } - } - return result; -} - -static inline int analysis_state__compare_position( - AnalysisState *const *self, - AnalysisState *const *other -) { - for (unsigned i = 0; i < (*self)->depth; i++) { - if (i >= (*other)->depth) return -1; - if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1; - if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1; - } - if ((*self)->depth < (*other)->depth) return 1; - if ((*self)->step_index < (*other)->step_index) return -1; - if ((*self)->step_index > (*other)->step_index) return 1; - return 0; -} - -static inline int analysis_state__compare( - AnalysisState *const *self, - AnalysisState *const *other -) { - int result = analysis_state__compare_position(self, other); - if (result != 0) return result; - for (unsigned i = 0; i < (*self)->depth; i++) { - if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1; - if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1; - if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1; - if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1; - if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1; - if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1; - } - return 0; -} - -static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { - if (self->depth == 0) { - return &self->stack[0]; - } - return &self->stack[self->depth - 1]; -} - -static inline bool analysis_state__has_supertype(AnalysisState *self, t_symbol symbol) { - for (unsigned i = 0; i < self->depth; i++) { - if (self->stack[i].parent_symbol == symbol) return true; - } - return false; -} - -/****************** - * AnalysisStateSet - ******************/ - -// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by -// cloning one from scratch. -static inline AnalysisState *analysis_state_pool__clone_or_reuse( - AnalysisStateSet *self, - AnalysisState *borrowed_item -) { - AnalysisState *new_item; - if (self->size) { - new_item = array_pop(self); - } else { - new_item = ts_malloc(sizeof(AnalysisState)); - } - *new_item = *borrowed_item; - return new_item; -} - -// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this -// set. The set does not contain duplicates, so if the item is already present, it will not be -// inserted, and no clone will be made. -// -// The caller retains ownership of the passed-in memory. However, the clone that is created by this -// function will be managed by the state set. -static inline void analysis_state_set__insert_sorted( - AnalysisStateSet *self, - AnalysisStateSet *pool, - AnalysisState *borrowed_item -) { - unsigned index, exists; - array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); - if (!exists) { - AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); - array_insert(self, index, new_item); - } -} - -// Inserts a clone of the passed-in item at the end position of this list. -// -// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function -// `analysis_state__compare`) than largest item already in this set. If items are inserted in the -// wrong order, the set will not function properly for future use. -// -// The caller retains ownership of the passed-in memory. However, the clone that is created by this -// function will be managed by the state set. -static inline void analysis_state_set__push( - AnalysisStateSet *self, - AnalysisStateSet *pool, - AnalysisState *borrowed_item -) { - AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); - array_push(self, new_item); -} - -// Removes all items from this set, returning it to an empty state. -static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { - array_push_all(pool, self); - array_clear(self); -} - -// Releases all memory that is managed with this state set, including any items currently present. -// After calling this function, the set is no longer suitable for use. -static inline void analysis_state_set__delete(AnalysisStateSet *self) { - for (unsigned i = 0; i < self->size; i++) { - ts_free(self->contents[i]); - } - array_delete(self); -} - -/**************** - * QueryAnalyzer - ****************/ - -static inline QueryAnalysis query_analysis__new(void) { - return (QueryAnalysis) { - .states = array_new(), - .next_states = array_new(), - .deeper_states = array_new(), - .state_pool = array_new(), - .final_step_indices = array_new(), - .finished_parent_symbols = array_new(), - .did_abort = false, - }; -} - -static inline void query_analysis__delete(QueryAnalysis *self) { - analysis_state_set__delete(&self->states); - analysis_state_set__delete(&self->next_states); - analysis_state_set__delete(&self->deeper_states); - analysis_state_set__delete(&self->state_pool); - array_delete(&self->final_step_indices); - array_delete(&self->finished_parent_symbols); -} - -/*********************** - * AnalysisSubgraphNode - ***********************/ - -static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { - if (self->state < other->state) return -1; - if (self->state > other->state) return 1; - if (self->child_index < other->child_index) return -1; - if (self->child_index > other->child_index) return 1; - if (self->done < other->done) return -1; - if (self->done > other->done) return 1; - if (self->production_id < other->production_id) return -1; - if (self->production_id > other->production_id) return 1; - return 0; -} - -/********* - * Query - *********/ - -// The `pattern_map` contains a mapping from TSSymbol values to indices in the -// `steps` array. For a given syntax node, the `pattern_map` makes it possible -// to quickly find the starting steps of all of the patterns whose root matches -// that node. Each entry has two fields: a `pattern_index`, which identifies one -// of the patterns in the query, and a `step_index`, which indicates the start -// offset of that pattern's steps within the `steps` array. -// -// The entries are sorted by the patterns' root symbols, and lookups use a -// binary search. This ensures that the cost of this initial lookup step -// scales logarithmically with the number of patterns in the query. -// -// This returns `true` if the symbol is present and `false` otherwise. -// If the symbol is not present `*result` is set to the index where the -// symbol should be inserted. -static inline bool ts_query__pattern_map_search( - const t_query *self, - t_symbol needle, - uint32_t *result -) { - uint32_t base_index = self->wildcard_root_pattern_count; - uint32_t size = self->pattern_map.size - base_index; - if (size == 0) { - *result = base_index; - return false; - } - while (size > 1) { - uint32_t half_size = size / 2; - uint32_t mid_index = base_index + half_size; - t_symbol mid_symbol = self->steps.contents[ - self->pattern_map.contents[mid_index].step_index - ].symbol; - if (needle > mid_symbol) base_index = mid_index; - size -= half_size; - } - - t_symbol symbol = self->steps.contents[ - self->pattern_map.contents[base_index].step_index - ].symbol; - - if (needle > symbol) { - base_index++; - if (base_index < self->pattern_map.size) { - symbol = self->steps.contents[ - self->pattern_map.contents[base_index].step_index - ].symbol; - } - } - - *result = base_index; - return needle == symbol; -} - -// Insert a new pattern's start index into the pattern map, maintaining -// the pattern map's ordering invariant. -static inline void ts_query__pattern_map_insert( - t_query *self, - t_symbol symbol, - PatternEntry new_entry -) { - uint32_t index; - ts_query__pattern_map_search(self, symbol, &index); - - // Ensure that the entries are sorted not only by symbol, but also - // by pattern_index. This way, states for earlier patterns will be - // initiated first, which allows the ordering of the states array - // to be maintained more efficiently. - while (index < self->pattern_map.size) { - PatternEntry *entry = &self->pattern_map.contents[index]; - if ( - self->steps.contents[entry->step_index].symbol == symbol && - entry->pattern_index < new_entry.pattern_index - ) { - index++; - } else { - break; - } - } - - array_insert(&self->pattern_map, index, new_entry); -} - -// Walk the subgraph for this non-terminal, tracking all of the possible -// sequences of progress within the pattern. -static void ts_query__perform_analysis( - t_query *self, - const AnalysisSubgraphArray *subgraphs, - QueryAnalysis *analysis -) { - unsigned recursion_depth_limit = 0; - unsigned prev_final_step_count = 0; - array_clear(&analysis->final_step_indices); - array_clear(&analysis->finished_parent_symbols); - - for (unsigned iteration = 0;; iteration++) { - if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { - analysis->did_abort = true; - break; - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("Iteration: %u. Final step indices:", iteration); - for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { - printf(" %4u", analysis->final_step_indices.contents[j]); - } - printf("\n"); - for (unsigned j = 0; j < analysis->states.size; j++) { - AnalysisState *state = analysis->states.contents[j]; - printf(" %3u: step: %u, stack: [", j, state->step_index); - for (unsigned k = 0; k < state->depth; k++) { - printf( - " {%s, child: %u, state: %4u", - self->language->symbol_names[state->stack[k].parent_symbol], - state->stack[k].child_index, - state->stack[k].parse_state - ); - if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); - if (state->stack[k].done) printf(", DONE"); - printf("}"); - } - printf(" ]\n"); - } - #endif - - // If no further progress can be made within the current recursion depth limit, then - // bump the depth limit by one, and continue to process the states the exceeded the - // limit. But only allow this if progress has been made since the last time the depth - // limit was increased. - if (analysis->states.size == 0) { - if ( - analysis->deeper_states.size > 0 && - analysis->final_step_indices.size > prev_final_step_count - ) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); - #endif - - prev_final_step_count = analysis->final_step_indices.size; - recursion_depth_limit++; - AnalysisStateSet _states = analysis->states; - analysis->states = analysis->deeper_states; - analysis->deeper_states = _states; - continue; - } - - break; - } - - analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); - for (unsigned j = 0; j < analysis->states.size; j++) { - AnalysisState * const state = analysis->states.contents[j]; - - // For efficiency, it's important to avoid processing the same analysis state more - // than once. To achieve this, keep the states in order of ascending position within - // their hypothetical syntax trees. In each iteration of this loop, start by advancing - // the states that have made the least progress. Avoid advancing states that have already - // made more progress. - if (analysis->next_states.size > 0) { - int comparison = analysis_state__compare_position( - &state, - array_back(&analysis->next_states) - ); - if (comparison == 0) { - analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); - continue; - } else if (comparison > 0) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Terminate iteration at state %u\n", j); - #endif - while (j < analysis->states.size) { - analysis_state_set__push( - &analysis->next_states, - &analysis->state_pool, - analysis->states.contents[j] - ); - j++; - } - break; - } - } - - const t_state_id parse_state = analysis_state__top(state)->parse_state; - const t_symbol parent_symbol = analysis_state__top(state)->parent_symbol; - const t_field_id parent_field_id = analysis_state__top(state)->field_id; - const unsigned child_index = analysis_state__top(state)->child_index; - const QueryStep * const step = &self->steps.contents[state->step_index]; - - unsigned subgraph_index, exists; - array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) continue; - const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index]; - - // Follow every possible path in the parse table, but only visit states that - // are part of the subgraph for the current symbol. - LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); - while (ts_lookahead_iterator__next(&lookahead_iterator)) { - t_symbol sym = lookahead_iterator.symbol; - - AnalysisSubgraphNode successor = { - .state = parse_state, - .child_index = child_index, - }; - if (lookahead_iterator.action_count) { - const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; - if (action->type == TSParseActionTypeShift) { - if (!action->shift.extra) { - successor.state = action->shift.state; - successor.child_index++; - } - } else { - continue; - } - } else if (lookahead_iterator.next_state != 0) { - successor.state = lookahead_iterator.next_state; - successor.child_index++; - } else { - continue; - } - - unsigned node_index; - array_search_sorted_with( - &subgraph->nodes, - analysis_subgraph_node__compare, &successor, - &node_index, &exists - ); - while (node_index < subgraph->nodes.size) { - AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; - if (node->state != successor.state || node->child_index != successor.child_index) break; - - // Use the subgraph to determine what alias and field will eventually be applied - // to this child node. - t_symbol alias = ts_language_alias_at(self->language, node->production_id, child_index); - t_symbol visible_symbol = alias - ? alias - : self->language->symbol_metadata[sym].visible - ? self->language->public_symbol_map[sym] - : 0; - t_field_id field_id = parent_field_id; - if (!field_id) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); - for (; field_map != field_map_end; field_map++) { - if (!field_map->inherited && field_map->child_index == child_index) { - field_id = field_map->field_id; - break; - } - } - } - - // Create a new state that has advanced past this hypothetical subtree. - AnalysisState next_state = *state; - AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); - next_state_top->child_index = successor.child_index; - next_state_top->parse_state = successor.state; - if (node->done) next_state_top->done = true; - - // Determine if this hypothetical child node would match the current step - // of the query pattern. - bool does_match = false; - if (visible_symbol) { - does_match = true; - if (step->symbol == WILDCARD_SYMBOL) { - if ( - step->is_named && - !self->language->symbol_metadata[visible_symbol].named - ) does_match = false; - } else if (step->symbol != visible_symbol) { - does_match = false; - } - if (step->field && step->field != field_id) { - does_match = false; - } - if ( - step->supertype_symbol && - !analysis_state__has_supertype(state, step->supertype_symbol) - ) does_match = false; - } - - // If this child is hidden, then descend into it and walk through its children. - // If the top entry of the stack is at the end of its rule, then that entry can - // be replaced. Otherwise, push a new entry onto the stack. - else if (sym >= self->language->token_count) { - if (!next_state_top->done) { - if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { - #ifdef DEBUG_ANALYZE_QUERY - printf("Exceeded depth limit for state %u\n", j); - #endif - - analysis->did_abort = true; - continue; - } - - next_state.depth++; - next_state_top = analysis_state__top(&next_state); - } - - *next_state_top = (AnalysisStateEntry) { - .parse_state = parse_state, - .parent_symbol = sym, - .child_index = 0, - .field_id = field_id, - .done = false, - }; - - if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { - analysis_state_set__insert_sorted( - &analysis->deeper_states, - &analysis->state_pool, - &next_state - ); - continue; - } - } - - // Pop from the stack when this state reached the end of its current syntax node. - while (next_state.depth > 0 && next_state_top->done) { - next_state.depth--; - next_state_top = analysis_state__top(&next_state); - } - - // If this hypothetical child did match the current step of the query pattern, - // then advance to the next step at the current depth. This involves skipping - // over any descendant steps of the current child. - const QueryStep *next_step = step; - if (does_match) { - for (;;) { - next_state.step_index++; - next_step = &self->steps.contents[next_state.step_index]; - if ( - next_step->depth == PATTERN_DONE_MARKER || - next_step->depth <= step->depth - ) break; - } - } else if (successor.state == parse_state) { - continue; - } - - for (;;) { - // Skip pass-through states. Although these states have alternatives, they are only - // used to implement repetitions, and query analysis does not need to process - // repetitions in order to determine whether steps are possible and definite. - if (next_step->is_pass_through) { - next_state.step_index++; - next_step++; - continue; - } - - // If the pattern is finished or hypothetical parent node is complete, then - // record that matching can terminate at this step of the pattern. Otherwise, - // add this state to the list of states to process on the next iteration. - if (!next_step->is_dead_end) { - bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; - if (did_finish_pattern) { - array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); - } else if (next_state.depth == 0) { - array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); - } else { - analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); - } - } - - // If the state has advanced to a step with an alternative step, then add another state - // at that alternative step. This process is simpler than the process of actually matching a - // pattern during query execution, because for the purposes of query analysis, there is no - // need to process repetitions. - if ( - does_match && - next_step->alternative_index != NONE && - next_step->alternative_index > next_state.step_index - ) { - next_state.step_index = next_step->alternative_index; - next_step = &self->steps.contents[next_state.step_index]; - } else { - break; - } - } - } - } - } - - AnalysisStateSet _states = analysis->states; - analysis->states = analysis->next_states; - analysis->next_states = _states; - } -} - -static bool ts_query__analyze_patterns(t_query *self, unsigned *error_offset) { - Array(uint16_t) non_rooted_pattern_start_steps = array_new(); - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *pattern = &self->pattern_map.contents[i]; - if (!pattern->is_rooted) { - QueryStep *step = &self->steps.contents[pattern->step_index]; - if (step->symbol != WILDCARD_SYMBOL) { - array_push(&non_rooted_pattern_start_steps, i); - } - } - } - - // Walk forward through all of the steps in the query, computing some - // basic information about each step. Mark all of the steps that contain - // captures, and record the indices of all of the steps that have child steps. - Array(uint32_t) parent_step_indices = array_new(); - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) { - step->parent_pattern_guaranteed = true; - step->root_pattern_guaranteed = true; - continue; - } - - bool has_children = false; - bool is_wildcard = step->symbol == WILDCARD_SYMBOL; - step->contains_captures = step->capture_ids[0] != NONE; - for (unsigned j = i + 1; j < self->steps.size; j++) { - QueryStep *next_step = &self->steps.contents[j]; - if ( - next_step->depth == PATTERN_DONE_MARKER || - next_step->depth <= step->depth - ) break; - if (next_step->capture_ids[0] != NONE) { - step->contains_captures = true; - } - if (!is_wildcard) { - next_step->root_pattern_guaranteed = true; - next_step->parent_pattern_guaranteed = true; - } - has_children = true; - } - - if (has_children && !is_wildcard) { - array_push(&parent_step_indices, i); - } - } - - // For every parent symbol in the query, initialize an 'analysis subgraph'. - // This subgraph lists all of the states in the parse table that are directly - // involved in building subtrees for this symbol. - // - // In addition to the parent symbols in the query, construct subgraphs for all - // of the hidden symbols in the grammar, because these might occur within - // one of the parent nodes, such that their children appear to belong to the - // parent. - AnalysisSubgraphArray subgraphs = array_new(); - for (unsigned i = 0; i < parent_step_indices.size; i++) { - uint32_t parent_step_index = parent_step_indices.contents[i]; - t_symbol parent_symbol = self->steps.contents[parent_step_index].symbol; - AnalysisSubgraph subgraph = { .symbol = parent_symbol }; - array_insert_sorted_by(&subgraphs, .symbol, subgraph); - } - for (t_symbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { - if (!ts_language_symbol_metadata(self->language, sym).visible) { - AnalysisSubgraph subgraph = { .symbol = sym }; - array_insert_sorted_by(&subgraphs, .symbol, subgraph); - } - } - - // Scan the parse table to find the data needed to populate these subgraphs. - // Collect three things during this scan: - // 1) All of the parse states where one of these symbols can start. - // 2) All of the parse states where one of these symbols can end, along - // with information about the node that would be created. - // 3) A list of predecessor states for each state. - StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); - for (t_state_id state = 1; state < (uint16_t)self->language->state_count; state++) { - unsigned subgraph_index, exists; - LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); - while (ts_lookahead_iterator__next(&lookahead_iterator)) { - if (lookahead_iterator.action_count) { - for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { - const TSParseAction *action = &lookahead_iterator.actions[i]; - if (action->type == TSParseActionTypeReduce) { - const t_symbol *aliases, *aliases_end; - ts_language_aliases_for_symbol( - self->language, - action->reduce.symbol, - &aliases, - &aliases_end - ); - for (const t_symbol *symbol = aliases; symbol < aliases_end; symbol++) { - array_search_sorted_by( - &subgraphs, - .symbol, - *symbol, - &subgraph_index, - &exists - ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { - array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { - .state = state, - .production_id = action->reduce.production_id, - .child_index = action->reduce.child_count, - .done = true, - })); - } - } - } - } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { - t_state_id next_state = action->shift.state; - state_predecessor_map_add(&predecessor_map, next_state, state); - } - } - } else if (lookahead_iterator.next_state != 0) { - if (lookahead_iterator.next_state != state) { - state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); - } - if (ts_language_state_is_primary(self->language, state)) { - const t_symbol *aliases, *aliases_end; - ts_language_aliases_for_symbol( - self->language, - lookahead_iterator.symbol, - &aliases, - &aliases_end - ); - for (const t_symbol *symbol = aliases; symbol < aliases_end; symbol++) { - array_search_sorted_by( - &subgraphs, - .symbol, - *symbol, - &subgraph_index, - &exists - ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if ( - subgraph->start_states.size == 0 || - *array_back(&subgraph->start_states) != state - ) - array_push(&subgraph->start_states, state); - } - } - } - } - } - } - - // For each subgraph, compute the preceding states by walking backward - // from the end states using the predecessor map. - Array(AnalysisSubgraphNode) next_nodes = array_new(); - for (unsigned i = 0; i < subgraphs.size; i++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[i]; - if (subgraph->nodes.size == 0) { - array_delete(&subgraph->start_states); - array_erase(&subgraphs, i); - i--; - continue; - } - array_assign(&next_nodes, &subgraph->nodes); - while (next_nodes.size > 0) { - AnalysisSubgraphNode node = array_pop(&next_nodes); - if (node.child_index > 1) { - unsigned predecessor_count; - const t_state_id *predecessors = state_predecessor_map_get( - &predecessor_map, - node.state, - &predecessor_count - ); - for (unsigned j = 0; j < predecessor_count; j++) { - AnalysisSubgraphNode predecessor_node = { - .state = predecessors[j], - .child_index = node.child_index - 1, - .production_id = node.production_id, - .done = false, - }; - unsigned index, exists; - array_search_sorted_with( - &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, - &index, &exists - ); - if (!exists) { - array_insert(&subgraph->nodes, index, predecessor_node); - array_push(&next_nodes, predecessor_node); - } - } - } - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("\nSubgraphs:\n"); - for (unsigned i = 0; i < subgraphs.size; i++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[i]; - printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); - for (unsigned j = 0; j < subgraph->start_states.size; j++) { - printf( - " {state: %u}\n", - subgraph->start_states.contents[j] - ); - } - for (unsigned j = 0; j < subgraph->nodes.size; j++) { - AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; - printf( - " {state: %u, child_index: %u, production_id: %u, done: %d}\n", - node->state, node->child_index, node->production_id, node->done - ); - } - printf("\n"); - } - #endif - - // For each non-terminal pattern, determine if the pattern can successfully match, - // and identify all of the possible children within the pattern where matching could fail. - bool all_patterns_are_valid = true; - QueryAnalysis analysis = query_analysis__new(); - for (unsigned i = 0; i < parent_step_indices.size; i++) { - uint16_t parent_step_index = parent_step_indices.contents[i]; - uint16_t parent_depth = self->steps.contents[parent_step_index].depth; - t_symbol parent_symbol = self->steps.contents[parent_step_index].symbol; - if (parent_symbol == ts_builtin_sym_error) continue; - - // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's - // root symbol is a terminal, then return an error. - unsigned subgraph_index, exists; - array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) { - unsigned first_child_step_index = parent_step_index + 1; - uint32_t j, child_exists; - array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); - assert(child_exists); - *error_offset = self->step_offsets.contents[j].byte_offset; - all_patterns_are_valid = false; - break; - } - - // Initialize an analysis state at every parse state in the table where - // this parent symbol can occur. - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - analysis_state_set__clear(&analysis.states, &analysis.state_pool); - analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); - for (unsigned j = 0; j < subgraph->start_states.size; j++) { - t_state_id parse_state = subgraph->start_states.contents[j]; - analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { - .step_index = parent_step_index + 1, - .stack = { - [0] = { - .parse_state = parse_state, - .parent_symbol = parent_symbol, - .child_index = 0, - .field_id = 0, - .done = false, - }, - }, - .depth = 1, - .root_symbol = parent_symbol, - })); - } - - #ifdef DEBUG_ANALYZE_QUERY - printf( - "\nWalk states for %s:\n", - ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol) - ); - #endif - - analysis.did_abort = false; - ts_query__perform_analysis(self, &subgraphs, &analysis); - - // If this pattern could not be fully analyzed, then every step should - // be considered fallible. - if (analysis.did_abort) { - for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { - QueryStep *step = &self->steps.contents[j]; - if ( - step->depth <= parent_depth || - step->depth == PATTERN_DONE_MARKER - ) break; - if (!step->is_dead_end) { - step->parent_pattern_guaranteed = false; - step->root_pattern_guaranteed = false; - } - } - continue; - } - - // If this pattern cannot match, store the pattern index so that it can be - // returned to the caller. - if (analysis.finished_parent_symbols.size == 0) { - assert(analysis.final_step_indices.size > 0); - uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); - uint32_t j, impossible_exists; - array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); - if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; - *error_offset = self->step_offsets.contents[j].byte_offset; - all_patterns_are_valid = false; - break; - } - - // Mark as fallible any step where a match terminated. - // Later, this property will be propagated to all of the step's predecessors. - for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { - uint32_t final_step_index = analysis.final_step_indices.contents[j]; - QueryStep *step = &self->steps.contents[final_step_index]; - if ( - step->depth != PATTERN_DONE_MARKER && - step->depth > parent_depth && - !step->is_dead_end - ) { - step->parent_pattern_guaranteed = false; - step->root_pattern_guaranteed = false; - } - } - } - - // Mark as indefinite any step with captures that are used in predicates. - Array(uint16_t) predicate_capture_ids = array_new(); - for (unsigned i = 0; i < self->patterns.size; i++) { - QueryPattern *pattern = &self->patterns.contents[i]; - - // Gather all of the captures that are used in predicates for this pattern. - array_clear(&predicate_capture_ids); - for ( - unsigned start = pattern->predicate_steps.offset, - end = start + pattern->predicate_steps.length, - j = start; j < end; j++ - ) { - t_query_predicate_step *step = &self->predicate_steps.contents[j]; - if (step->type == TSQueryPredicateStepTypeCapture) { - uint16_t value_id = step->value_id; - array_insert_sorted_by(&predicate_capture_ids, , value_id); - } - } - - // Find all of the steps that have these captures. - for ( - unsigned start = pattern->steps.offset, - end = start + pattern->steps.length, - j = start; j < end; j++ - ) { - QueryStep *step = &self->steps.contents[j]; - for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { - uint16_t capture_id = step->capture_ids[k]; - if (capture_id == NONE) break; - unsigned index, exists; - array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); - if (exists) { - step->root_pattern_guaranteed = false; - break; - } - } - } - } - - // Propagate fallibility. If a pattern is fallible at a given step, then it is - // fallible at all of its preceding steps. - bool done = self->steps.size == 0; - while (!done) { - done = true; - for (unsigned i = self->steps.size - 1; i > 0; i--) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) continue; - - // Determine if this step is definite or has definite alternatives. - bool parent_pattern_guaranteed = false; - for (;;) { - if (step->root_pattern_guaranteed) { - parent_pattern_guaranteed = true; - break; - } - if (step->alternative_index == NONE || step->alternative_index < i) { - break; - } - step = &self->steps.contents[step->alternative_index]; - } - - // If not, mark its predecessor as indefinite. - if (!parent_pattern_guaranteed) { - QueryStep *prev_step = &self->steps.contents[i - 1]; - if ( - !prev_step->is_dead_end && - prev_step->depth != PATTERN_DONE_MARKER && - prev_step->root_pattern_guaranteed - ) { - prev_step->root_pattern_guaranteed = false; - done = false; - } - } - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("Steps:\n"); - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) { - printf(" %u: DONE\n", i); - } else { - printf( - " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", - i, - (step->symbol == WILDCARD_SYMBOL) - ? "ANY" - : ts_language_symbol_name(self->language, step->symbol), - (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), - step->depth, - step->parent_pattern_guaranteed, - step->root_pattern_guaranteed - ); - } - } - #endif - - // Determine which repetition symbols in this language have the possibility - // of matching non-rooted patterns in this query. These repetition symbols - // prevent certain optimizations with range restrictions. - analysis.did_abort = false; - for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { - uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i]; - PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index]; - - analysis_state_set__clear(&analysis.states, &analysis.state_pool); - analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); - for (unsigned j = 0; j < subgraphs.size; j++) { - AnalysisSubgraph *subgraph = &subgraphs.contents[j]; - TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); - if (metadata.visible || metadata.named) continue; - - for (uint32_t k = 0; k < subgraph->start_states.size; k++) { - t_state_id parse_state = subgraph->start_states.contents[k]; - analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { - .step_index = pattern_entry->step_index, - .stack = { - [0] = { - .parse_state = parse_state, - .parent_symbol = subgraph->symbol, - .child_index = 0, - .field_id = 0, - .done = false, - }, - }, - .root_symbol = subgraph->symbol, - .depth = 1, - })); - } - } - - #ifdef DEBUG_ANALYZE_QUERY - printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); - #endif - - ts_query__perform_analysis( - self, - &subgraphs, - &analysis - ); - - if (analysis.finished_parent_symbols.size > 0) { - self->patterns.contents[pattern_entry->pattern_index].is_non_local = true; - } - - for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { - t_symbol symbol = analysis.finished_parent_symbols.contents[k]; - array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); - } - } - - #ifdef DEBUG_ANALYZE_QUERY - if (self->repeat_symbols_with_rootless_patterns.size > 0) { - printf("\nRepetition symbols with rootless patterns:\n"); - printf("aborted analysis: %d\n", analysis.did_abort); - for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { - TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; - printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); - } - printf("\n"); - } - #endif - - // Cleanup - for (unsigned i = 0; i < subgraphs.size; i++) { - array_delete(&subgraphs.contents[i].start_states); - array_delete(&subgraphs.contents[i].nodes); - } - array_delete(&subgraphs); - query_analysis__delete(&analysis); - array_delete(&next_nodes); - array_delete(&non_rooted_pattern_start_steps); - array_delete(&parent_step_indices); - array_delete(&predicate_capture_ids); - state_predecessor_map_delete(&predecessor_map); - - return all_patterns_are_valid; -} - -static void ts_query__add_negated_fields( - t_query *self, - uint16_t step_index, - t_field_id *field_ids, - uint16_t field_count -) { - QueryStep *step = &self->steps.contents[step_index]; - - // The negated field array stores a list of field lists, separated by zeros. - // Try to find the start index of an existing list that matches this new list. - bool failed_match = false; - unsigned match_count = 0; - unsigned start_i = 0; - for (unsigned i = 0; i < self->negated_fields.size; i++) { - t_field_id existing_field_id = self->negated_fields.contents[i]; - - // At each zero value, terminate the match attempt. If we've exactly - // matched the new field list, then reuse this index. Otherwise, - // start over the matching process. - if (existing_field_id == 0) { - if (match_count == field_count) { - step->negated_field_list_id = start_i; - return; - } else { - start_i = i + 1; - match_count = 0; - failed_match = false; - } - } - - // If the existing list matches our new list so far, then advance - // to the next element of the new list. - else if ( - match_count < field_count && - existing_field_id == field_ids[match_count] && - !failed_match - ) { - match_count++; - } - - // Otherwise, this existing list has failed to match. - else { - match_count = 0; - failed_match = true; - } - } - - step->negated_field_list_id = self->negated_fields.size; - array_extend(&self->negated_fields, field_count, field_ids); - array_push(&self->negated_fields, 0); -} - -static t_query_error ts_query__parse_string_literal( - t_query *self, - Stream *stream -) { - const char *string_start = stream->input; - if (stream->next != '"') return TSQueryErrorSyntax; - stream_advance(stream); - const char *prev_position = stream->input; - - bool is_escaped = false; - array_clear(&self->string_buffer); - for (;;) { - if (is_escaped) { - is_escaped = false; - switch (stream->next) { - case 'n': - array_push(&self->string_buffer, '\n'); - break; - case 'r': - array_push(&self->string_buffer, '\r'); - break; - case 't': - array_push(&self->string_buffer, '\t'); - break; - case '0': - array_push(&self->string_buffer, '\0'); - break; - default: - array_extend(&self->string_buffer, stream->next_size, stream->input); - break; - } - prev_position = stream->input + stream->next_size; - } else { - if (stream->next == '\\') { - array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); - prev_position = stream->input + 1; - is_escaped = true; - } else if (stream->next == '"') { - array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); - stream_advance(stream); - return TSQueryErrorNone; - } else if (stream->next == '\n') { - stream_reset(stream, string_start); - return TSQueryErrorSyntax; - } - } - if (!stream_advance(stream)) { - stream_reset(stream, string_start); - return TSQueryErrorSyntax; - } - } -} - -// Parse a single predicate associated with a pattern, adding it to the -// query's internal `predicate_steps` array. Predicates are arbitrary -// S-expressions associated with a pattern which are meant to be handled at -// a higher level of abstraction, such as the Rust/JavaScript bindings. They -// can contain '@'-prefixed capture names, double-quoted strings, and bare -// symbols, which also represent strings. -static t_query_error ts_query__parse_predicate( - t_query *self, - Stream *stream -) { - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *predicate_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - predicate_name); - uint16_t id = symbol_table_insert_name( - &self->predicate_values, - predicate_name, - length - ); - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeString, - .value_id = id, - })); - stream_skip_whitespace(stream); - - for (;;) { - if (stream->next == ')') { - stream_advance(stream); - stream_skip_whitespace(stream); - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeDone, - .value_id = 0, - })); - break; - } - - // Parse an '@'-prefixed capture name - else if (stream->next == '@') { - stream_advance(stream); - - // Parse the capture name - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *capture_name = stream->input; - stream_scan_identifier(stream); - uint32_t capture_length = (uint32_t)(stream->input - capture_name); - - // Add the capture id to the first step of the pattern - int capture_id = symbol_table_id_for_name( - &self->captures, - capture_name, - capture_length - ); - if (capture_id == -1) { - stream_reset(stream, capture_name); - return TSQueryErrorCapture; - } - - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeCapture, - .value_id = capture_id, - })); - } - - // Parse a string literal - else if (stream->next == '"') { - t_query_error e = ts_query__parse_string_literal(self, stream); - if (e) return e; - uint16_t query_id = symbol_table_insert_name( - &self->predicate_values, - self->string_buffer.contents, - self->string_buffer.size - ); - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeString, - .value_id = query_id, - })); - } - - // Parse a bare symbol - else if (stream_is_ident_start(stream)) { - const char *symbol_start = stream->input; - stream_scan_identifier(stream); - uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); - uint16_t query_id = symbol_table_insert_name( - &self->predicate_values, - symbol_start, - symbol_length - ); - array_push(&self->predicate_steps, ((t_query_predicate_step) { - .type = TSQueryPredicateStepTypeString, - .value_id = query_id, - })); - } - - else { - return TSQueryErrorSyntax; - } - - stream_skip_whitespace(stream); - } - - return 0; -} - -// Read one S-expression pattern from the stream, and incorporate it into -// the query's internal state machine representation. For nested patterns, -// this function calls itself recursively. -// -// The caller is responsible for passing in a dedicated CaptureQuantifiers. -// These should not be shared between different calls to ts_query__parse_pattern! -static t_query_error ts_query__parse_pattern( - t_query *self, - Stream *stream, - uint32_t depth, - bool is_immediate, - CaptureQuantifiers *capture_quantifiers -) { - if (stream->next == 0) return TSQueryErrorSyntax; - if (stream->next == ')' || stream->next == ']') return PARENT_DONE; - - const uint32_t starting_step_index = self->steps.size; - - // Store the byte offset of each step in the query. - if ( - self->step_offsets.size == 0 || - array_back(&self->step_offsets)->step_index != starting_step_index - ) { - array_push(&self->step_offsets, ((StepOffset) { - .step_index = starting_step_index, - .byte_offset = stream_offset(stream), - })); - } - - // An open bracket is the start of an alternation. - if (stream->next == '[') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // Parse each branch, and add a placeholder step in between the branches. - Array(uint32_t) branch_step_indices = array_new(); - CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - uint32_t start_index = self->steps.size; - t_query_error e = ts_query__parse_pattern( - self, - stream, - depth, - is_immediate, - &branch_capture_quantifiers - ); - - if (e == PARENT_DONE) { - if (stream->next == ']' && branch_step_indices.size > 0) { - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&branch_capture_quantifiers); - array_delete(&branch_step_indices); - return e; - } - - if (start_index == starting_step_index) { - capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); - } else { - capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); - } - - array_push(&branch_step_indices, start_index); - array_push(&self->steps, query_step__new(0, depth, false)); - capture_quantifiers_clear(&branch_capture_quantifiers); - } - (void)array_pop(&self->steps); - - // For all of the branches except for the last one, add the subsequent branch as an - // alternative, and link the end of the branch to the current end of the steps. - for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { - uint32_t step_index = branch_step_indices.contents[i]; - uint32_t next_step_index = branch_step_indices.contents[i + 1]; - QueryStep *start_step = &self->steps.contents[step_index]; - QueryStep *end_step = &self->steps.contents[next_step_index - 1]; - start_step->alternative_index = next_step_index; - end_step->alternative_index = self->steps.size; - end_step->is_dead_end = true; - } - - capture_quantifiers_delete(&branch_capture_quantifiers); - array_delete(&branch_step_indices); - } - - // An open parenthesis can be the start of three possible constructs: - // * A grouped sequence - // * A predicate - // * A named node - else if (stream->next == '(') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // If this parenthesis is followed by a node, then it represents a grouped sequence. - if (stream->next == '(' || stream->next == '"' || stream->next == '[') { - bool child_is_immediate = is_immediate; - CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - if (stream->next == '.') { - child_is_immediate = true; - stream_advance(stream); - stream_skip_whitespace(stream); - } - t_query_error e = ts_query__parse_pattern( - self, - stream, - depth, - child_is_immediate, - &child_capture_quantifiers - ); - if (e == PARENT_DONE) { - if (stream->next == ')') { - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&child_capture_quantifiers); - return e; - } - - capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); - capture_quantifiers_clear(&child_capture_quantifiers); - child_is_immediate = false; - } - - capture_quantifiers_delete(&child_capture_quantifiers); - } - - // A dot/pound character indicates the start of a predicate. - else if (stream->next == '.' || stream->next == '#') { - stream_advance(stream); - return ts_query__parse_predicate(self, stream); - } - - // Otherwise, this parenthesis is the start of a named node. - else { - t_symbol symbol; - - // Parse a normal node name - if (stream_is_ident_start(stream)) { - const char *node_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - node_name); - - // Parse the wildcard symbol - if (length == 1 && node_name[0] == '_') { - symbol = WILDCARD_SYMBOL; - } - - else { - symbol = ts_language_symbol_for_name( - self->language, - node_name, - length, - true - ); - if (!symbol) { - stream_reset(stream, node_name); - return TSQueryErrorNodeType; - } - } - } else { - return TSQueryErrorSyntax; - } - - // Add a step for the node. - array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - QueryStep *step = array_back(&self->steps); - if (ts_language_symbol_metadata(self->language, symbol).supertype) { - step->supertype_symbol = step->symbol; - step->symbol = WILDCARD_SYMBOL; - } - if (symbol == WILDCARD_SYMBOL) { - step->is_named = true; - } - - stream_skip_whitespace(stream); - - if (stream->next == '/') { - stream_advance(stream); - if (!stream_is_ident_start(stream)) { - return TSQueryErrorSyntax; - } - - const char *node_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - node_name); - - step->symbol = ts_language_symbol_for_name( - self->language, - node_name, - length, - true - ); - if (!step->symbol) { - stream_reset(stream, node_name); - return TSQueryErrorNodeType; - } - - stream_skip_whitespace(stream); - } - - // Parse the child patterns - bool child_is_immediate = false; - uint16_t last_child_step_index = 0; - uint16_t negated_field_count = 0; - t_field_id negated_field_ids[MAX_NEGATED_FIELD_COUNT]; - CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); - for (;;) { - // Parse a negated field assertion - if (stream->next == '!') { - stream_advance(stream); - stream_skip_whitespace(stream); - if (!stream_is_ident_start(stream)) { - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorSyntax; - } - const char *field_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - field_name); - stream_skip_whitespace(stream); - - t_field_id field_id = ts_language_field_id_for_name( - self->language, - field_name, - length - ); - if (!field_id) { - stream->input = field_name; - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorField; - } - - // Keep the field ids sorted. - if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { - negated_field_ids[negated_field_count] = field_id; - negated_field_count++; - } - - continue; - } - - // Parse a sibling anchor - if (stream->next == '.') { - child_is_immediate = true; - stream_advance(stream); - stream_skip_whitespace(stream); - } - - uint16_t step_index = self->steps.size; - t_query_error e = ts_query__parse_pattern( - self, - stream, - depth + 1, - child_is_immediate, - &child_capture_quantifiers - ); - if (e == PARENT_DONE) { - if (stream->next == ')') { - if (child_is_immediate) { - if (last_child_step_index == 0) { - capture_quantifiers_delete(&child_capture_quantifiers); - return TSQueryErrorSyntax; - } - self->steps.contents[last_child_step_index].is_last_child = true; - } - - if (negated_field_count) { - ts_query__add_negated_fields( - self, - starting_step_index, - negated_field_ids, - negated_field_count - ); - } - - stream_advance(stream); - break; - } - e = TSQueryErrorSyntax; - } - if (e) { - capture_quantifiers_delete(&child_capture_quantifiers); - return e; - } - - capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); - - last_child_step_index = step_index; - child_is_immediate = false; - capture_quantifiers_clear(&child_capture_quantifiers); - } - capture_quantifiers_delete(&child_capture_quantifiers); - } - } - - // Parse a wildcard pattern - else if (stream->next == '_') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // Add a step that matches any kind of node - array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); - } - - // Parse a double-quoted anonymous leaf node expression - else if (stream->next == '"') { - const char *string_start = stream->input; - t_query_error e = ts_query__parse_string_literal(self, stream); - if (e) return e; - - // Add a step for the node - t_symbol symbol = ts_language_symbol_for_name( - self->language, - self->string_buffer.contents, - self->string_buffer.size, - false - ); - if (!symbol) { - stream_reset(stream, string_start + 1); - return TSQueryErrorNodeType; - } - array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - } - - // Parse a field-prefixed pattern - else if (stream_is_ident_start(stream)) { - // Parse the field name - const char *field_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - field_name); - stream_skip_whitespace(stream); - - if (stream->next != ':') { - stream_reset(stream, field_name); - return TSQueryErrorSyntax; - } - stream_advance(stream); - stream_skip_whitespace(stream); - - // Parse the pattern - CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); - t_query_error e = ts_query__parse_pattern( - self, - stream, - depth, - is_immediate, - &field_capture_quantifiers - ); - if (e) { - capture_quantifiers_delete(&field_capture_quantifiers); - if (e == PARENT_DONE) e = TSQueryErrorSyntax; - return e; - } - - // Add the field name to the first step of the pattern - t_field_id field_id = ts_language_field_id_for_name( - self->language, - field_name, - length - ); - if (!field_id) { - stream->input = field_name; - return TSQueryErrorField; - } - - uint32_t step_index = starting_step_index; - QueryStep *step = &self->steps.contents[step_index]; - for (;;) { - step->field = field_id; - if ( - step->alternative_index != NONE && - step->alternative_index > step_index && - step->alternative_index < self->steps.size - ) { - step_index = step->alternative_index; - step = &self->steps.contents[step_index]; - } else { - break; - } - } - - capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); - capture_quantifiers_delete(&field_capture_quantifiers); - } - - else { - return TSQueryErrorSyntax; - } - - stream_skip_whitespace(stream); - - // Parse suffixes modifiers for this pattern - t_quantifier quantifier = TSQuantifierOne; - for (;;) { - // Parse the one-or-more operator. - if (stream->next == '+') { - quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); - repeat_step.alternative_index = starting_step_index; - repeat_step.is_pass_through = true; - repeat_step.alternative_is_immediate = true; - array_push(&self->steps, repeat_step); - } - - // Parse the zero-or-more repetition operator. - else if (stream->next == '*') { - quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); - repeat_step.alternative_index = starting_step_index; - repeat_step.is_pass_through = true; - repeat_step.alternative_is_immediate = true; - array_push(&self->steps, repeat_step); - - // Stop when `step->alternative_index` is `NONE` or it points to - // `repeat_step` or beyond. Note that having just been pushed, - // `repeat_step` occupies slot `self->steps.size - 1`. - QueryStep *step = &self->steps.contents[starting_step_index]; - while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { - step = &self->steps.contents[step->alternative_index]; - } - step->alternative_index = self->steps.size; - } - - // Parse the optional operator. - else if (stream->next == '?') { - quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); - - stream_advance(stream); - stream_skip_whitespace(stream); - - QueryStep *step = &self->steps.contents[starting_step_index]; - while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { - step = &self->steps.contents[step->alternative_index]; - } - step->alternative_index = self->steps.size; - } - - // Parse an '@'-prefixed capture pattern - else if (stream->next == '@') { - stream_advance(stream); - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; - const char *capture_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = (uint32_t)(stream->input - capture_name); - stream_skip_whitespace(stream); - - // Add the capture id to the first step of the pattern - uint16_t capture_id = symbol_table_insert_name( - &self->captures, - capture_name, - length - ); - - // Add the capture quantifier - capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); - - uint32_t step_index = starting_step_index; - for (;;) { - QueryStep *step = &self->steps.contents[step_index]; - query_step__add_capture(step, capture_id); - if ( - step->alternative_index != NONE && - step->alternative_index > step_index && - step->alternative_index < self->steps.size - ) { - step_index = step->alternative_index; - } else { - break; - } - } - } - - // No more suffix modifiers - else { - break; - } - } - - capture_quantifiers_mul(capture_quantifiers, quantifier); - - return 0; -} - -t_query *ts_query_new( - const t_language *language, - const char *source, - uint32_t source_len, - uint32_t *error_offset, - t_query_error *error_type -) { - if ( - !language || - language->version > TREE_SITTER_LANGUAGE_VERSION || - language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION - ) { - *error_type = TSQueryErrorLanguage; - return NULL; - } - - t_query *self = ts_malloc(sizeof(t_query)); - *self = (t_query) { - .steps = array_new(), - .pattern_map = array_new(), - .captures = symbol_table_new(), - .capture_quantifiers = array_new(), - .predicate_values = symbol_table_new(), - .predicate_steps = array_new(), - .patterns = array_new(), - .step_offsets = array_new(), - .string_buffer = array_new(), - .negated_fields = array_new(), - .repeat_symbols_with_rootless_patterns = array_new(), - .wildcard_root_pattern_count = 0, - .language = ts_language_copy(language), - }; - - array_push(&self->negated_fields, 0); - - // Parse all of the S-expressions in the given string. - Stream stream = stream_new(source, source_len); - stream_skip_whitespace(&stream); - while (stream.input < stream.end) { - uint32_t pattern_index = self->patterns.size; - uint32_t start_step_index = self->steps.size; - uint32_t start_predicate_step_index = self->predicate_steps.size; - array_push(&self->patterns, ((QueryPattern) { - .steps = (Slice) {.offset = start_step_index}, - .predicate_steps = (Slice) {.offset = start_predicate_step_index}, - .start_byte = stream_offset(&stream), - .is_non_local = false, - })); - CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); - *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); - array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); - - QueryPattern *pattern = array_back(&self->patterns); - pattern->steps.length = self->steps.size - start_step_index; - pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; - - // If any pattern could not be parsed, then report the error information - // and terminate. - if (*error_type) { - if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; - *error_offset = stream_offset(&stream); - capture_quantifiers_delete(&capture_quantifiers); - ts_query_delete(self); - return NULL; - } - - // Maintain a list of capture quantifiers for each pattern - array_push(&self->capture_quantifiers, capture_quantifiers); - - // Maintain a map that can look up patterns for a given root symbol. - uint16_t wildcard_root_alternative_index = NONE; - for (;;) { - QueryStep *step = &self->steps.contents[start_step_index]; - - // If a pattern has a wildcard at its root, but it has a non-wildcard child, - // then optimize the matching process by skipping matching the wildcard. - // Later, during the matching process, the query cursor will check that - // there is a parent node, and capture it if necessary. - if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { - QueryStep *second_step = &self->steps.contents[start_step_index + 1]; - if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { - wildcard_root_alternative_index = step->alternative_index; - start_step_index += 1; - step = second_step; - } - } - - // Determine whether the pattern has a single root node. This affects - // decisions about whether or not to start matching the pattern when - // a query cursor has a range restriction or when immediately within an - // error node. - uint32_t start_depth = step->depth; - bool is_rooted = start_depth == 0; - for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { - QueryStep *child_step = &self->steps.contents[step_index]; - if (child_step->is_dead_end) break; - if (child_step->depth == start_depth) { - is_rooted = false; - break; - } - } - - ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { - .step_index = start_step_index, - .pattern_index = pattern_index, - .is_rooted = is_rooted - }); - if (step->symbol == WILDCARD_SYMBOL) { - self->wildcard_root_pattern_count++; - } - - // If there are alternatives or options at the root of the pattern, - // then add multiple entries to the pattern map. - if (step->alternative_index != NONE) { - start_step_index = step->alternative_index; - } else if (wildcard_root_alternative_index != NONE) { - start_step_index = wildcard_root_alternative_index; - wildcard_root_alternative_index = NONE; - } else { - break; - } - } - } - - if (!ts_query__analyze_patterns(self, error_offset)) { - *error_type = TSQueryErrorStructure; - ts_query_delete(self); - return NULL; - } - - array_delete(&self->string_buffer); - return self; -} - -void ts_query_delete(t_query *self) { - if (self) { - array_delete(&self->steps); - array_delete(&self->pattern_map); - array_delete(&self->predicate_steps); - array_delete(&self->patterns); - array_delete(&self->step_offsets); - array_delete(&self->string_buffer); - array_delete(&self->negated_fields); - array_delete(&self->repeat_symbols_with_rootless_patterns); - ts_language_delete(self->language); - symbol_table_delete(&self->captures); - symbol_table_delete(&self->predicate_values); - for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { - CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); - capture_quantifiers_delete(capture_quantifiers); - } - array_delete(&self->capture_quantifiers); - ts_free(self); - } -} - -uint32_t ts_query_pattern_count(const t_query *self) { - return self->patterns.size; -} - -uint32_t ts_query_capture_count(const t_query *self) { - return self->captures.slices.size; -} - -uint32_t ts_query_string_count(const t_query *self) { - return self->predicate_values.slices.size; -} - -const char *ts_query_capture_name_for_id( - const t_query *self, - uint32_t index, - uint32_t *length -) { - return symbol_table_name_for_id(&self->captures, index, length); -} - -t_quantifier ts_query_capture_quantifier_for_id( - const t_query *self, - uint32_t pattern_index, - uint32_t capture_index -) { - CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); - return capture_quantifier_for_id(capture_quantifiers, capture_index); -} - -const char *ts_query_string_value_for_id( - const t_query *self, - uint32_t index, - uint32_t *length -) { - return symbol_table_name_for_id(&self->predicate_values, index, length); -} - -const t_query_predicate_step *ts_query_predicates_for_pattern( - const t_query *self, - uint32_t pattern_index, - uint32_t *step_count -) { - Slice slice = self->patterns.contents[pattern_index].predicate_steps; - *step_count = slice.length; - if (self->predicate_steps.contents == NULL) { - return NULL; - } - return &self->predicate_steps.contents[slice.offset]; -} - -uint32_t ts_query_start_byte_for_pattern( - const t_query *self, - uint32_t pattern_index -) { - return self->patterns.contents[pattern_index].start_byte; -} - -bool ts_query_is_pattern_rooted( - const t_query *self, - uint32_t pattern_index -) { - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *entry = &self->pattern_map.contents[i]; - if (entry->pattern_index == pattern_index) { - if (!entry->is_rooted) return false; - } - } - return true; -} - -bool ts_query_is_pattern_non_local( - const t_query *self, - uint32_t pattern_index -) { - if (pattern_index < self->patterns.size) { - return self->patterns.contents[pattern_index].is_non_local; - } else { - return false; - } -} - -bool ts_query_is_pattern_guaranteed_at_step( - const t_query *self, - uint32_t byte_offset -) { - uint32_t step_index = UINT32_MAX; - for (unsigned i = 0; i < self->step_offsets.size; i++) { - StepOffset *step_offset = &self->step_offsets.contents[i]; - if (step_offset->byte_offset > byte_offset) break; - step_index = step_offset->step_index; - } - if (step_index < self->steps.size) { - return self->steps.contents[step_index].root_pattern_guaranteed; - } else { - return false; - } -} - -bool ts_query__step_is_fallible( - const t_query *self, - uint16_t step_index -) { - assert((uint32_t)step_index + 1 < self->steps.size); - QueryStep *step = &self->steps.contents[step_index]; - QueryStep *next_step = &self->steps.contents[step_index + 1]; - return ( - next_step->depth != PATTERN_DONE_MARKER && - next_step->depth > step->depth && - !next_step->parent_pattern_guaranteed - ); -} - -void ts_query_disable_capture( - t_query *self, - const char *name, - uint32_t length -) { - // Remove capture information for any pattern step that previously - // captured with the given name. - int id = symbol_table_id_for_name(&self->captures, name, length); - if (id != -1) { - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - query_step__remove_capture(step, id); - } - } -} - -void ts_query_disable_pattern( - t_query *self, - uint32_t pattern_index -) { - // Remove the given pattern from the pattern map. Its steps will still - // be in the `steps` array, but they will never be read. - for (unsigned i = 0; i < self->pattern_map.size; i++) { - PatternEntry *pattern = &self->pattern_map.contents[i]; - if (pattern->pattern_index == pattern_index) { - array_erase(&self->pattern_map, i); - i--; - } - } -} - -/*************** - * QueryCursor - ***************/ - -t_query_cursor *ts_query_cursor_new(void) { - t_query_cursor *self = ts_malloc(sizeof(t_query_cursor)); - *self = (t_query_cursor) { - .did_exceed_match_limit = false, - .ascending = false, - .halted = false, - .states = array_new(), - .finished_states = array_new(), - .capture_list_pool = capture_list_pool_new(), - .start_byte = 0, - .end_byte = UINT32_MAX, - .start_point = {0, 0}, - .end_point = POINT_MAX, - .max_start_depth = UINT32_MAX, - }; - array_reserve(&self->states, 8); - array_reserve(&self->finished_states, 8); - return self; -} - -void ts_query_cursor_delete(t_query_cursor *self) { - array_delete(&self->states); - array_delete(&self->finished_states); - ts_tree_cursor_delete(&self->cursor); - capture_list_pool_delete(&self->capture_list_pool); - ts_free(self); -} - -bool ts_query_cursor_did_exceed_match_limit(const t_query_cursor *self) { - return self->did_exceed_match_limit; -} - -uint32_t ts_query_cursor_match_limit(const t_query_cursor *self) { - return self->capture_list_pool.max_capture_list_count; -} - -void ts_query_cursor_set_match_limit(t_query_cursor *self, uint32_t limit) { - self->capture_list_pool.max_capture_list_count = limit; -} - -#ifdef DEBUG_EXECUTE_QUERY -#define LOG(...) fprintf(stderr, __VA_ARGS__) -#else -#define LOG(...) -#endif - -void ts_query_cursor_exec( - t_query_cursor *self, - const t_query *query, - t_parse_node node -) { - if (query) { - LOG("query steps:\n"); - for (unsigned i = 0; i < query->steps.size; i++) { - QueryStep *step = &query->steps.contents[i]; - LOG(" %u: {", i); - if (step->depth == PATTERN_DONE_MARKER) { - LOG("DONE"); - } else if (step->is_dead_end) { - LOG("dead_end"); - } else if (step->is_pass_through) { - LOG("pass_through"); - } else if (step->symbol != WILDCARD_SYMBOL) { - LOG("symbol: %s", query->language->symbol_names[step->symbol]); - } else { - LOG("symbol: *"); - } - if (step->field) { - LOG(", field: %s", query->language->field_names[step->field]); - } - if (step->alternative_index != NONE) { - LOG(", alternative: %u", step->alternative_index); - } - LOG("},\n"); - } - } - - array_clear(&self->states); - array_clear(&self->finished_states); - ts_tree_cursor_reset(&self->cursor, node); - capture_list_pool_reset(&self->capture_list_pool); - self->on_visible_node = true; - self->next_state_id = 0; - self->depth = 0; - self->ascending = false; - self->halted = false; - self->query = query; - self->did_exceed_match_limit = false; -} - -void ts_query_cursor_set_byte_range( - t_query_cursor *self, - uint32_t start_byte, - uint32_t end_byte -) { - if (end_byte == 0) { - end_byte = UINT32_MAX; - } - self->start_byte = start_byte; - self->end_byte = end_byte; -} - -void ts_query_cursor_set_point_range( - t_query_cursor *self, - t_point start_point, - t_point end_point -) { - if (end_point.row == 0 && end_point.column == 0) { - end_point = POINT_MAX; - } - self->start_point = start_point; - self->end_point = end_point; -} - -// Search through all of the in-progress states, and find the captured -// node that occurs earliest in the document. -static bool ts_query_cursor__first_in_progress_capture( - t_query_cursor *self, - uint32_t *state_index, - uint32_t *byte_offset, - uint32_t *pattern_index, - bool *root_pattern_guaranteed -) { - bool result = false; - *state_index = UINT32_MAX; - *byte_offset = UINT32_MAX; - *pattern_index = UINT32_MAX; - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i]; - if (state->dead) continue; - - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - if (state->consumed_capture_count >= captures->size) { - continue; - } - - t_parse_node node = captures->contents[state->consumed_capture_count].node; - if ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ) { - state->consumed_capture_count++; - i--; - continue; - } - - uint32_t node_start_byte = ts_node_start_byte(node); - if ( - !result || - node_start_byte < *byte_offset || - (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) - ) { - QueryStep *step = &self->query->steps.contents[state->step_index]; - if (root_pattern_guaranteed) { - *root_pattern_guaranteed = step->root_pattern_guaranteed; - } else if (step->root_pattern_guaranteed) { - continue; - } - - result = true; - *state_index = i; - *byte_offset = node_start_byte; - *pattern_index = state->pattern_index; - } - } - return result; -} - -// Determine which node is first in a depth-first traversal -int ts_query_cursor__compare_nodes(t_parse_node left, t_parse_node right) { - if (left.id != right.id) { - uint32_t left_start = ts_node_start_byte(left); - uint32_t right_start = ts_node_start_byte(right); - if (left_start < right_start) return -1; - if (left_start > right_start) return 1; - uint32_t left_node_count = ts_node_end_byte(left); - uint32_t right_node_count = ts_node_end_byte(right); - if (left_node_count > right_node_count) return -1; - if (left_node_count < right_node_count) return 1; - } - return 0; -} - -// Determine if either state contains a superset of the other state's captures. -void ts_query_cursor__compare_captures( - t_query_cursor *self, - QueryState *left_state, - QueryState *right_state, - bool *left_contains_right, - bool *right_contains_left -) { - const CaptureList *left_captures = capture_list_pool_get( - &self->capture_list_pool, - left_state->capture_list_id - ); - const CaptureList *right_captures = capture_list_pool_get( - &self->capture_list_pool, - right_state->capture_list_id - ); - *left_contains_right = true; - *right_contains_left = true; - unsigned i = 0, j = 0; - for (;;) { - if (i < left_captures->size) { - if (j < right_captures->size) { - t_query_capture *left = &left_captures->contents[i]; - t_query_capture *right = &right_captures->contents[j]; - if (left->node.id == right->node.id && left->index == right->index) { - i++; - j++; - } else { - switch (ts_query_cursor__compare_nodes(left->node, right->node)) { - case -1: - *right_contains_left = false; - i++; - break; - case 1: - *left_contains_right = false; - j++; - break; - default: - *right_contains_left = false; - *left_contains_right = false; - i++; - j++; - break; - } - } - } else { - *right_contains_left = false; - break; - } - } else { - if (j < right_captures->size) { - *left_contains_right = false; - } - break; - } - } -} - -static void ts_query_cursor__add_state( - t_query_cursor *self, - const PatternEntry *pattern -) { - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - - // Keep the states array in ascending order of start_depth and pattern_index, - // so that it can be processed more efficiently elsewhere. Usually, there is - // no work to do here because of two facts: - // * States with lower start_depth are naturally added first due to the - // order in which nodes are visited. - // * Earlier patterns are naturally added first because of the ordering of the - // pattern_map data structure that's used to initiate matches. - // - // This loop is only needed in cases where two conditions hold: - // * A pattern consists of more than one sibling node, so that its states - // remain in progress after exiting the node that started the match. - // * The first node in the pattern matches against multiple nodes at the - // same depth. - // - // An example of this is the pattern '((comment)* (function))'. If multiple - // `comment` nodes appear in a row, then we may initiate a new state for this - // pattern while another state for the same pattern is already in progress. - // If there are multiple patterns like this in a query, then this loop will - // need to execute in order to keep the states ordered by pattern_index. - uint32_t index = self->states.size; - while (index > 0) { - QueryState *prev_state = &self->states.contents[index - 1]; - if (prev_state->start_depth < start_depth) break; - if (prev_state->start_depth == start_depth) { - // Avoid inserting an unnecessary duplicate state, which would be - // immediately pruned by the longest-match criteria. - if ( - prev_state->pattern_index == pattern->pattern_index && - prev_state->step_index == pattern->step_index - ) return; - if (prev_state->pattern_index <= pattern->pattern_index) break; - } - index--; - } - - LOG( - " start state. pattern:%u, step:%u\n", - pattern->pattern_index, - pattern->step_index - ); - array_insert(&self->states, index, ((QueryState) { - .id = UINT32_MAX, - .capture_list_id = NONE, - .step_index = pattern->step_index, - .pattern_index = pattern->pattern_index, - .start_depth = start_depth, - .consumed_capture_count = 0, - .seeking_immediate_match = true, - .has_in_progress_alternatives = false, - .needs_parent = step->depth == 1, - .dead = false, - })); -} - -// Acquire a capture list for this state. If there are no capture lists left in the -// pool, this will steal the capture list from another existing state, and mark that -// other state as 'dead'. -static CaptureList *ts_query_cursor__prepare_to_capture( - t_query_cursor *self, - QueryState *state, - unsigned state_index_to_preserve -) { - if (state->capture_list_id == NONE) { - state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); - - // If there are no capture lists left in the pool, then terminate whichever - // state has captured the earliest node in the document, and steal its - // capture list. - if (state->capture_list_id == NONE) { - self->did_exceed_match_limit = true; - uint32_t state_index, byte_offset, pattern_index; - if ( - ts_query_cursor__first_in_progress_capture( - self, - &state_index, - &byte_offset, - &pattern_index, - NULL - ) && - state_index != state_index_to_preserve - ) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - state_index, pattern_index, byte_offset - ); - QueryState *other_state = &self->states.contents[state_index]; - state->capture_list_id = other_state->capture_list_id; - other_state->capture_list_id = NONE; - other_state->dead = true; - CaptureList *list = capture_list_pool_get_mut( - &self->capture_list_pool, - state->capture_list_id - ); - array_clear(list); - return list; - } else { - LOG(" ran out of capture lists"); - return NULL; - } - } - } - return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); -} - -static void ts_query_cursor__capture( - t_query_cursor *self, - QueryState *state, - QueryStep *step, - t_parse_node node -) { - if (state->dead) return; - CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); - if (!capture_list) { - state->dead = true; - return; - } - - for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { - uint16_t capture_id = step->capture_ids[j]; - if (step->capture_ids[j] == NONE) break; - array_push(capture_list, ((t_query_capture) { node, capture_id })); - LOG( - " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", - ts_node_type(node), - state->pattern_index, - capture_id, - capture_list->size - ); - } -} - -// Duplicate the given state and insert the newly-created state immediately after -// the given state in the `states` array. Ensures that the given state reference is -// still valid, even if the states array is reallocated. -static QueryState *ts_query_cursor__copy_state( - t_query_cursor *self, - QueryState **state_ref -) { - const QueryState *state = *state_ref; - uint32_t state_index = (uint32_t)(state - self->states.contents); - QueryState copy = *state; - copy.capture_list_id = NONE; - - // If the state has captures, copy its capture list. - if (state->capture_list_id != NONE) { - CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); - if (!new_captures) return NULL; - const CaptureList *old_captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - array_push_all(new_captures, old_captures); - } - - array_insert(&self->states, state_index + 1, copy); - *state_ref = &self->states.contents[state_index]; - return &self->states.contents[state_index + 1]; -} - -static inline bool ts_query_cursor__should_descend( - t_query_cursor *self, - bool node_intersects_range -) { - - if (node_intersects_range && self->depth < self->max_start_depth) { - return true; - } - - // If there are in-progress matches whose remaining steps occur - // deeper in the tree, then descend. - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i];; - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if ( - next_step->depth != PATTERN_DONE_MARKER && - state->start_depth + next_step->depth > self->depth - ) { - return true; - } - } - - if (self->depth >= self->max_start_depth) { - return false; - } - - // If the current node is hidden, then a non-rooted pattern might match - // one if its roots inside of this node, and match another of its roots - // as part of a sibling node, so we may need to descend. - if (!self->on_visible_node) { - // Descending into a repetition node outside of the range can be - // expensive, because these nodes can have many visible children. - // Avoid descending into repetition nodes unless we have already - // determined that this query can match rootless patterns inside - // of this type of repetition node. - Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); - if (ts_subtree_is_repetition(subtree)) { - bool exists; - uint32_t index; - array_search_sorted_by( - &self->query->repeat_symbols_with_rootless_patterns,, - ts_subtree_symbol(subtree), - &index, - &exists - ); - return exists; - } - - return true; - } - - return false; -} - -// Walk the tree, processing patterns until at least one pattern finishes, -// If one or more patterns finish, return `true` and store their states in the -// `finished_states` array. Multiple patterns can finish on the same node. If -// there are no more matches, return `false`. -static inline bool ts_query_cursor__advance( - t_query_cursor *self, - bool stop_on_definite_step -) { - bool did_match = false; - for (;;) { - if (self->halted) { - while (self->states.size > 0) { - QueryState state = array_pop(&self->states); - capture_list_pool_release( - &self->capture_list_pool, - state.capture_list_id - ); - } - } - - if (did_match || self->halted) return did_match; - - // Exit the current node. - if (self->ascending) { - if (self->on_visible_node) { - LOG( - "leave node. depth:%u, type:%s\n", - self->depth, - ts_node_type(ts_tree_cursor_current_node(&self->cursor)) - ); - - // After leaving a node, remove any states that cannot make further progress. - uint32_t deleted_count = 0; - for (unsigned i = 0, n = self->states.size; i < n; i++) { - QueryState *state = &self->states.contents[i]; - QueryStep *step = &self->query->steps.contents[state->step_index]; - - // If a state completed its pattern inside of this node, but was deferred from finishing - // in order to search for longer matches, mark it as finished. - if ( - step->depth == PATTERN_DONE_MARKER && - (state->start_depth > self->depth || self->depth == 0) - ) { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - did_match = true; - deleted_count++; - } - - // If a state needed to match something within this node, then remove that state - // as it has failed to match. - else if ( - step->depth != PATTERN_DONE_MARKER && - (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth - ) { - LOG( - " failed to match. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - deleted_count++; - } - - else if (deleted_count > 0) { - self->states.contents[i - deleted_count] = *state; - } - } - self->states.size -= deleted_count; - } - - // Leave this node by stepping to its next sibling or to its parent. - switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { - case TreeCursorStepVisible: - if (!self->on_visible_node) { - self->depth++; - self->on_visible_node = true; - } - self->ascending = false; - break; - case TreeCursorStepHidden: - if (self->on_visible_node) { - self->depth--; - self->on_visible_node = false; - } - self->ascending = false; - break; - default: - if (ts_tree_cursor_goto_parent(&self->cursor)) { - self->depth--; - } else { - LOG("halt at root\n"); - self->halted = true; - } - } - } - - // Enter a new node. - else { - // Get the properties of the current node. - t_parse_node node = ts_tree_cursor_current_node(&self->cursor); - t_parse_node parent_node = ts_tree_cursor_parent_node(&self->cursor); - bool parent_precedes_range = !ts_node_is_null(parent_node) && ( - ts_node_end_byte(parent_node) <= self->start_byte || - point_lte(ts_node_end_point(parent_node), self->start_point) - ); - bool parent_follows_range = !ts_node_is_null(parent_node) && ( - ts_node_start_byte(parent_node) >= self->end_byte || - point_gte(ts_node_start_point(parent_node), self->end_point) - ); - bool node_precedes_range = parent_precedes_range || ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ); - bool node_follows_range = parent_follows_range || ( - ts_node_start_byte(node) >= self->end_byte || - point_gte(ts_node_start_point(node), self->end_point) - ); - bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; - bool node_intersects_range = !node_precedes_range && !node_follows_range; - - if (self->on_visible_node) { - t_symbol symbol = ts_node_symbol(node); - bool is_named = ts_node_is_named(node); - bool has_later_siblings; - bool has_later_named_siblings; - bool can_have_later_siblings_with_this_field; - t_field_id field_id = 0; - t_symbol supertypes[8] = {0}; - unsigned supertype_count = 8; - ts_tree_cursor_current_status( - &self->cursor, - &field_id, - &has_later_siblings, - &has_later_named_siblings, - &can_have_later_siblings_with_this_field, - supertypes, - &supertype_count - ); - LOG( - "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", - self->depth, - ts_node_type(node), - ts_language_field_name_for_id(self->query->language, field_id), - ts_node_start_point(node).row, - self->states.size, - self->finished_states.size - ); - - bool node_is_error = symbol == ts_builtin_sym_error; - bool parent_is_error = - !ts_node_is_null(parent_node) && - ts_node_symbol(parent_node) == ts_builtin_sym_error; - - // Add new states for any patterns whose root node is a wildcard. - if (!node_is_error) { - for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { - PatternEntry *pattern = &self->query->pattern_map.contents[i]; - - // If this node matches the first step of the pattern, then add a new - // state at the start of this pattern. - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - if ( - (pattern->is_rooted ? - node_intersects_range : - (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) && - (!step->supertype_symbol || supertype_count > 0) && - (start_depth <= self->max_start_depth) - ) { - ts_query_cursor__add_state(self, pattern); - } - } - } - - // Add new states for any patterns whose root node matches this node. - unsigned i; - if (ts_query__pattern_map_search(self->query, symbol, &i)) { - PatternEntry *pattern = &self->query->pattern_map.contents[i]; - - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - uint32_t start_depth = self->depth - step->depth; - do { - // If this node matches the first step of the pattern, then add a new - // state at the start of this pattern. - if ( - (pattern->is_rooted ? - node_intersects_range : - (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) && - (start_depth <= self->max_start_depth) - ) { - ts_query_cursor__add_state(self, pattern); - } - - // Advance to the next pattern whose root node matches this node. - i++; - if (i == self->query->pattern_map.size) break; - pattern = &self->query->pattern_map.contents[i]; - step = &self->query->steps.contents[pattern->step_index]; - } while (step->symbol == symbol); - } - - // Update all of the in-progress states with current node. - for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { - QueryState *state = &self->states.contents[j]; - QueryStep *step = &self->query->steps.contents[state->step_index]; - state->has_in_progress_alternatives = false; - copy_count = 0; - - // Check that the node matches all of the criteria for the next - // step of the pattern. - if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; - - // Determine if this node matches this step of the pattern, and also - // if this node can have later siblings that match this step of the - // pattern. - bool node_does_match = false; - if (step->symbol == WILDCARD_SYMBOL) { - node_does_match = !node_is_error && (is_named || !step->is_named); - } else { - node_does_match = symbol == step->symbol; - } - bool later_sibling_can_match = has_later_siblings; - if ((step->is_immediate && is_named) || state->seeking_immediate_match) { - later_sibling_can_match = false; - } - if (step->is_last_child && has_later_named_siblings) { - node_does_match = false; - } - if (step->supertype_symbol) { - bool has_supertype = false; - for (unsigned k = 0; k < supertype_count; k++) { - if (supertypes[k] == step->supertype_symbol) { - has_supertype = true; - break; - } - } - if (!has_supertype) node_does_match = false; - } - if (step->field) { - if (step->field == field_id) { - if (!can_have_later_siblings_with_this_field) { - later_sibling_can_match = false; - } - } else { - node_does_match = false; - } - } - - if (step->negated_field_list_id) { - t_field_id *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; - for (;;) { - t_field_id negated_field_id = *negated_field_ids; - if (negated_field_id) { - negated_field_ids++; - if (ts_node_child_by_field_id(node, negated_field_id).id) { - node_does_match = false; - break; - } - } else { - break; - } - } - } - - // Remove states immediately if it is ever clear that they cannot match. - if (!node_does_match) { - if (!later_sibling_can_match) { - LOG( - " discard state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->states, j); - j--; - } - continue; - } - - // Some patterns can match their root node in multiple ways, capturing different - // children. If this pattern step could match later children within the same - // parent, then this query state cannot simply be updated in place. It must be - // split into two states: one that matches this node, and one which skips over - // this node, to preserve the possibility of matching later siblings. - if (later_sibling_can_match && ( - step->contains_captures || - ts_query__step_is_fallible(self->query, state->step_index) - )) { - if (ts_query_cursor__copy_state(self, &state)) { - LOG( - " split state for capture. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - copy_count++; - } - } - - // If this pattern started with a wildcard, such that the pattern map - // actually points to the *second* step of the pattern, then check - // that the node has a parent, and capture the parent node if necessary. - if (state->needs_parent) { - t_parse_node parent = ts_tree_cursor_parent_node(&self->cursor); - if (ts_node_is_null(parent)) { - LOG(" missing parent node\n"); - state->dead = true; - } else { - state->needs_parent = false; - QueryStep *skipped_wildcard_step = step; - do { - skipped_wildcard_step--; - } while ( - skipped_wildcard_step->is_dead_end || - skipped_wildcard_step->is_pass_through || - skipped_wildcard_step->depth > 0 - ); - if (skipped_wildcard_step->capture_ids[0] != NONE) { - LOG(" capture wildcard parent\n"); - ts_query_cursor__capture( - self, - state, - skipped_wildcard_step, - parent - ); - } - } - } - - // If the current node is captured in this pattern, add it to the capture list. - if (step->capture_ids[0] != NONE) { - ts_query_cursor__capture(self, state, step, node); - } - - if (state->dead) { - array_erase(&self->states, j); - j--; - continue; - } - - // Advance this state to the next step of its pattern. - state->step_index++; - state->seeking_immediate_match = false; - LOG( - " advance state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; - - // If this state's next step has an alternative step, then copy the state in order - // to pursue both alternatives. The alternative step itself may have an alternative, - // so this is an interactive process. - unsigned end_index = j + 1; - for (unsigned k = j; k < end_index; k++) { - QueryState *child_state = &self->states.contents[k]; - QueryStep *child_step = &self->query->steps.contents[child_state->step_index]; - if (child_step->alternative_index != NONE) { - // A "dead-end" step exists only to add a non-sequential jump into the step sequence, - // via its alternative index. When a state reaches a dead-end step, it jumps straight - // to the step's alternative. - if (child_step->is_dead_end) { - child_state->step_index = child_step->alternative_index; - k--; - continue; - } - - // A "pass-through" step exists only to add a branch into the step sequence, - // via its alternative_index. When a state reaches a pass-through step, it splits - // in order to process the alternative step, and then it advances to the next step. - if (child_step->is_pass_through) { - child_state->step_index++; - k--; - } - - QueryState *copy = ts_query_cursor__copy_state(self, &child_state); - if (copy) { - LOG( - " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", - copy->pattern_index, - copy->step_index, - next_step->alternative_index, - next_step->alternative_is_immediate, - capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size - ); - end_index++; - copy_count++; - copy->step_index = child_step->alternative_index; - if (child_step->alternative_is_immediate) { - copy->seeking_immediate_match = true; - } - } - } - } - } - - for (unsigned j = 0; j < self->states.size; j++) { - QueryState *state = &self->states.contents[j]; - if (state->dead) { - array_erase(&self->states, j); - j--; - continue; - } - - // Enforce the longest-match criteria. When a query pattern contains optional or - // repeated nodes, this is necessary to avoid multiple redundant states, where - // one state has a strict subset of another state's captures. - bool did_remove = false; - for (unsigned k = j + 1; k < self->states.size; k++) { - QueryState *other_state = &self->states.contents[k]; - - // Query states are kept in ascending order of start_depth and pattern_index. - // Since the longest-match criteria is only used for deduping matches of the same - // pattern and root node, we only need to perform pairwise comparisons within a - // small slice of the states array. - if ( - other_state->start_depth != state->start_depth || - other_state->pattern_index != state->pattern_index - ) break; - - bool left_contains_right, right_contains_left; - ts_query_cursor__compare_captures( - self, - state, - other_state, - &left_contains_right, - &right_contains_left - ); - if (left_contains_right) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); - array_erase(&self->states, k); - k--; - continue; - } - other_state->has_in_progress_alternatives = true; - } - if (right_contains_left) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->states, j); - j--; - did_remove = true; - break; - } - state->has_in_progress_alternatives = true; - } - } - - // If the state is at the end of its pattern, remove it from the list - // of in-progress states and add it to the list of finished states. - if (!did_remove) { - LOG( - " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", - state->pattern_index, - state->start_depth, - state->step_index, - capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size - ); - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (next_step->depth == PATTERN_DONE_MARKER) { - if (state->has_in_progress_alternatives) { - LOG(" defer finishing pattern %u\n", state->pattern_index); - } else { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - array_erase(&self->states, (uint32_t)(state - self->states.contents)); - did_match = true; - j--; - } - } - } - } - } - - if (ts_query_cursor__should_descend(self, node_intersects_range)) { - switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { - case TreeCursorStepVisible: - self->depth++; - self->on_visible_node = true; - continue; - case TreeCursorStepHidden: - self->on_visible_node = false; - continue; - default: - break; - } - } - - self->ascending = true; - } - } -} - -bool ts_query_cursor_next_match( - t_query_cursor *self, - t_query_match *match -) { - if (self->finished_states.size == 0) { - if (!ts_query_cursor__advance(self, false)) { - return false; - } - } - - QueryState *state = &self->finished_states.contents[0]; - if (state->id == UINT32_MAX) state->id = self->next_state_id++; - match->id = state->id; - match->pattern_index = state->pattern_index; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - match->captures = captures->contents; - match->capture_count = captures->size; - capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->finished_states, 0); - return true; -} - -void ts_query_cursor_remove_match( - t_query_cursor *self, - uint32_t match_id -) { - for (unsigned i = 0; i < self->finished_states.size; i++) { - const QueryState *state = &self->finished_states.contents[i]; - if (state->id == match_id) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->finished_states, i); - return; - } - } - - // Remove unfinished query states as well to prevent future - // captures for a match being removed. - for (unsigned i = 0; i < self->states.size; i++) { - const QueryState *state = &self->states.contents[i]; - if (state->id == match_id) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->states, i); - return; - } - } -} - -bool ts_query_cursor_next_capture( - t_query_cursor *self, - t_query_match *match, - uint32_t *capture_index -) { - // The goal here is to return captures in order, even though they may not - // be discovered in order, because patterns can overlap. Search for matches - // until there is a finished capture that is before any unfinished capture. - for (;;) { - // First, find the earliest capture in an unfinished match. - uint32_t first_unfinished_capture_byte; - uint32_t first_unfinished_pattern_index; - uint32_t first_unfinished_state_index; - bool first_unfinished_state_is_definite = false; - ts_query_cursor__first_in_progress_capture( - self, - &first_unfinished_state_index, - &first_unfinished_capture_byte, - &first_unfinished_pattern_index, - &first_unfinished_state_is_definite - ); - - // Then find the earliest capture in a finished match. It must occur - // before the first capture in an *unfinished* match. - QueryState *first_finished_state = NULL; - uint32_t first_finished_capture_byte = first_unfinished_capture_byte; - uint32_t first_finished_pattern_index = first_unfinished_pattern_index; - for (unsigned i = 0; i < self->finished_states.size;) { - QueryState *state = &self->finished_states.contents[i]; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - - // Remove states whose captures are all consumed. - if (state->consumed_capture_count >= captures->size) { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->finished_states, i); - continue; - } - - t_parse_node node = captures->contents[state->consumed_capture_count].node; - - bool node_precedes_range = ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ); - bool node_follows_range = ( - ts_node_start_byte(node) >= self->end_byte || - point_gte(ts_node_start_point(node), self->end_point) - ); - bool node_outside_of_range = node_precedes_range || node_follows_range; - - // Skip captures that are outside of the cursor's range. - if (node_outside_of_range) { - state->consumed_capture_count++; - continue; - } - - uint32_t node_start_byte = ts_node_start_byte(node); - if ( - node_start_byte < first_finished_capture_byte || - ( - node_start_byte == first_finished_capture_byte && - state->pattern_index < first_finished_pattern_index - ) - ) { - first_finished_state = state; - first_finished_capture_byte = node_start_byte; - first_finished_pattern_index = state->pattern_index; - } - i++; - } - - // If there is finished capture that is clearly before any unfinished - // capture, then return its match, and its capture index. Internally - // record the fact that the capture has been 'consumed'. - QueryState *state; - if (first_finished_state) { - state = first_finished_state; - } else if (first_unfinished_state_is_definite) { - state = &self->states.contents[first_unfinished_state_index]; - } else { - state = NULL; - } - - if (state) { - if (state->id == UINT32_MAX) state->id = self->next_state_id++; - match->id = state->id; - match->pattern_index = state->pattern_index; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - match->captures = captures->contents; - match->capture_count = captures->size; - *capture_index = state->consumed_capture_count; - state->consumed_capture_count++; - return true; - } - - if (capture_list_pool_is_empty(&self->capture_list_pool)) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - first_unfinished_state_index, - first_unfinished_pattern_index, - first_unfinished_capture_byte - ); - capture_list_pool_release( - &self->capture_list_pool, - self->states.contents[first_unfinished_state_index].capture_list_id - ); - array_erase(&self->states, first_unfinished_state_index); - } - - // If there are no finished matches that are ready to be returned, then - // continue finding more matches. - if ( - !ts_query_cursor__advance(self, true) && - self->finished_states.size == 0 - ) return false; - } -} - -void ts_query_cursor_set_max_start_depth( - t_query_cursor *self, - uint32_t max_start_depth -) { - self->max_start_depth = max_start_depth; -} - -#undef LOG diff --git a/parser/src/reduce_action.h b/parser/src/reduce_action.h deleted file mode 100644 index fbc6c1d3..00000000 --- a/parser/src/reduce_action.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef TREE_SITTER_REDUCE_ACTION_H_ -#define TREE_SITTER_REDUCE_ACTION_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./array.h" -#include "./api.h" - -typedef struct { - uint32_t count; - t_symbol symbol; - int dynamic_precedence; - unsigned short production_id; -} ReduceAction; - -typedef Array(ReduceAction) ReduceActionSet; - -static inline void ts_reduce_action_set_add(ReduceActionSet *self, - ReduceAction new_action) { - for (uint32_t i = 0; i < self->size; i++) { - ReduceAction action = self->contents[i]; - if (action.symbol == new_action.symbol && action.count == new_action.count) - return; - } - array_push(self, new_action); -} - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_REDUCE_ACTION_H_ diff --git a/parser/src/reusable_node.h b/parser/src/reusable_node.h deleted file mode 100644 index 63fe3c1a..00000000 --- a/parser/src/reusable_node.h +++ /dev/null @@ -1,95 +0,0 @@ -#include "./subtree.h" - -typedef struct { - Subtree tree; - uint32_t child_index; - uint32_t byte_offset; -} StackEntry; - -typedef struct { - Array(StackEntry) stack; - Subtree last_external_token; -} ReusableNode; - -static inline ReusableNode reusable_node_new(void) { - return (ReusableNode) {array_new(), NULL_SUBTREE}; -} - -static inline void reusable_node_clear(ReusableNode *self) { - array_clear(&self->stack); - self->last_external_token = NULL_SUBTREE; -} - -static inline Subtree reusable_node_tree(ReusableNode *self) { - return self->stack.size > 0 - ? self->stack.contents[self->stack.size - 1].tree - : NULL_SUBTREE; -} - -static inline uint32_t reusable_node_byte_offset(ReusableNode *self) { - return self->stack.size > 0 - ? self->stack.contents[self->stack.size - 1].byte_offset - : UINT32_MAX; -} - -static inline void reusable_node_delete(ReusableNode *self) { - array_delete(&self->stack); -} - -static inline void reusable_node_advance(ReusableNode *self) { - StackEntry last_entry = *array_back(&self->stack); - uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); - if (ts_subtree_has_external_tokens(last_entry.tree)) { - self->last_external_token = ts_subtree_last_external_token(last_entry.tree); - } - - Subtree tree; - uint32_t next_index; - do { - StackEntry popped_entry = array_pop(&self->stack); - next_index = popped_entry.child_index + 1; - if (self->stack.size == 0) return; - tree = array_back(&self->stack)->tree; - } while (ts_subtree_child_count(tree) <= next_index); - - array_push(&self->stack, ((StackEntry) { - .tree = ts_subtree_children(tree)[next_index], - .child_index = next_index, - .byte_offset = byte_offset, - })); -} - -static inline bool reusable_node_descend(ReusableNode *self) { - StackEntry last_entry = *array_back(&self->stack); - if (ts_subtree_child_count(last_entry.tree) > 0) { - array_push(&self->stack, ((StackEntry) { - .tree = ts_subtree_children(last_entry.tree)[0], - .child_index = 0, - .byte_offset = last_entry.byte_offset, - })); - return true; - } else { - return false; - } -} - -static inline void reusable_node_advance_past_leaf(ReusableNode *self) { - while (reusable_node_descend(self)) {} - reusable_node_advance(self); -} - -static inline void reusable_node_reset(ReusableNode *self, Subtree tree) { - reusable_node_clear(self); - array_push(&self->stack, ((StackEntry) { - .tree = tree, - .child_index = 0, - .byte_offset = 0, - })); - - // Never reuse the root node, because it has a non-standard internal structure - // due to transformations that are applied when it is accepted: adding the EOF - // child and any extra children. - if (!reusable_node_descend(self)) { - reusable_node_clear(self); - } -} diff --git a/parser/src/scanner.c b/parser/src/scanner.c index a63963cf..f5e3eb81 100644 --- a/parser/src/scanner.c +++ b/parser/src/scanner.c @@ -1,5 +1,4 @@ -#include "./array.h" -#include "./parser.h" +#include "./api.h" #include #include diff --git a/parser/src/stack.c b/parser/src/stack.c deleted file mode 100644 index 8f7816ca..00000000 --- a/parser/src/stack.c +++ /dev/null @@ -1,899 +0,0 @@ -#include "./alloc.h" -#include "./language.h" -#include "./subtree.h" -#include "./array.h" -#include "./stack.h" -#include "./length.h" -#include -#include -#include - -#define MAX_LINK_COUNT 8 -#define MAX_NODE_POOL_SIZE 50 -#define MAX_ITERATOR_COUNT 64 - -#if defined _WIN32 && !defined __GNUC__ -#define forceinline __forceinline -#else -#define forceinline static inline __attribute__((always_inline)) -#endif - -typedef struct StackNode StackNode; - -typedef struct { - StackNode *node; - Subtree subtree; - bool is_pending; -} StackLink; - -struct StackNode { - t_state_id state; - Length position; - StackLink links[MAX_LINK_COUNT]; - short unsigned int link_count; - uint32_t ref_count; - unsigned error_cost; - unsigned node_count; - int dynamic_precedence; -}; - -typedef struct { - StackNode *node; - SubtreeArray subtrees; - uint32_t subtree_count; - bool is_pending; -} StackIterator; - -typedef Array(StackNode *) StackNodeArray; - -typedef enum { - StackStatusActive, - StackStatusPaused, - StackStatusHalted, -} StackStatus; - -typedef struct { - StackNode *node; - StackSummary *summary; - unsigned node_count_at_last_error; - Subtree last_external_token; - Subtree lookahead_when_paused; - StackStatus status; -} StackHead; - -struct Stack { - Array(StackHead) heads; - StackSliceArray slices; - Array(StackIterator) iterators; - StackNodeArray node_pool; - StackNode *base_node; - SubtreePool *subtree_pool; -}; - -typedef unsigned StackAction; -enum { - StackActionNone, - StackActionStop = 1, - StackActionPop = 2, -}; - -typedef StackAction (*StackCallback)(void *, const StackIterator *); - -static void stack_node_retain(StackNode *self) { - if (!self) - return; - assert(self->ref_count > 0); - self->ref_count++; - assert(self->ref_count != 0); -} - -static void stack_node_release( - StackNode *self, - StackNodeArray *pool, - SubtreePool *subtree_pool -) { -recur: - assert(self->ref_count != 0); - self->ref_count--; - if (self->ref_count > 0) return; - - StackNode *first_predecessor = NULL; - if (self->link_count > 0) { - for (unsigned i = self->link_count - 1; i > 0; i--) { - StackLink link = self->links[i]; - if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); - stack_node_release(link.node, pool, subtree_pool); - } - StackLink link = self->links[0]; - if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); - first_predecessor = self->links[0].node; - } - - if (pool->size < MAX_NODE_POOL_SIZE) { - array_push(pool, self); - } else { - ts_free(self); - } - - if (first_predecessor) { - self = first_predecessor; - goto recur; - } -} - -/// Get the number of nodes in the subtree, for the purpose of measuring -/// how much progress has been made by a given version of the stack. -static uint32_t stack__subtree_node_count(Subtree subtree) { - uint32_t count = ts_subtree_visible_descendant_count(subtree); - if (ts_subtree_visible(subtree)) count++; - - // Count intermediate error nodes even though they are not visible, - // because a stack version's node count is used to check whether it - // has made any progress since the last time it encountered an error. - if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++; - - return count; -} - -static StackNode *stack_node_new( - StackNode *previous_node, - Subtree subtree, - bool is_pending, - t_state_id state, - StackNodeArray *pool -) { - StackNode *node = pool->size > 0 - ? array_pop(pool) - : ts_malloc(sizeof(StackNode)); - *node = (StackNode) { - .ref_count = 1, - .link_count = 0, - .state = state - }; - - if (previous_node) { - node->link_count = 1; - node->links[0] = (StackLink) { - .node = previous_node, - .subtree = subtree, - .is_pending = is_pending, - }; - - node->position = previous_node->position; - node->error_cost = previous_node->error_cost; - node->dynamic_precedence = previous_node->dynamic_precedence; - node->node_count = previous_node->node_count; - - if (subtree.ptr) { - node->error_cost += ts_subtree_error_cost(subtree); - node->position = length_add(node->position, ts_subtree_total_size(subtree)); - node->node_count += stack__subtree_node_count(subtree); - node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); - } - } else { - node->position = length_zero(); - node->error_cost = 0; - } - - return node; -} - -static bool stack__subtree_is_equivalent(Subtree left, Subtree right) { - if (left.ptr == right.ptr) return true; - if (!left.ptr || !right.ptr) return false; - - // Symbols must match - if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false; - - // If both have errors, don't bother keeping both. - if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true; - - return ( - ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && - ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && - ts_subtree_child_count(left) == ts_subtree_child_count(right) && - ts_subtree_extra(left) == ts_subtree_extra(right) && - ts_subtree_external_scanner_state_eq(left, right) - ); -} - -static void stack_node_add_link( - StackNode *self, - StackLink link, - SubtreePool *subtree_pool -) { - if (link.node == self) return; - - for (int i = 0; i < self->link_count; i++) { - StackLink *existing_link = &self->links[i]; - if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) { - // In general, we preserve ambiguities until they are removed from the stack - // during a pop operation where multiple paths lead to the same node. But in - // the special case where two links directly connect the same pair of nodes, - // we can safely remove the ambiguity ahead of time without changing behavior. - if (existing_link->node == link.node) { - if ( - ts_subtree_dynamic_precedence(link.subtree) > - ts_subtree_dynamic_precedence(existing_link->subtree) - ) { - ts_subtree_retain(link.subtree); - ts_subtree_release(subtree_pool, existing_link->subtree); - existing_link->subtree = link.subtree; - self->dynamic_precedence = - link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree); - } - return; - } - - // If the previous nodes are mergeable, merge them recursively. - if ( - existing_link->node->state == link.node->state && - existing_link->node->position.bytes == link.node->position.bytes && - existing_link->node->error_cost == link.node->error_cost - ) { - for (int j = 0; j < link.node->link_count; j++) { - stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); - } - int32_t dynamic_precedence = link.node->dynamic_precedence; - if (link.subtree.ptr) { - dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); - } - if (dynamic_precedence > self->dynamic_precedence) { - self->dynamic_precedence = dynamic_precedence; - } - return; - } - } - } - - if (self->link_count == MAX_LINK_COUNT) return; - - stack_node_retain(link.node); - unsigned node_count = link.node->node_count; - int dynamic_precedence = link.node->dynamic_precedence; - self->links[self->link_count++] = link; - - if (link.subtree.ptr) { - ts_subtree_retain(link.subtree); - node_count += stack__subtree_node_count(link.subtree); - dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); - } - - if (node_count > self->node_count) self->node_count = node_count; - if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence; -} - -static void stack_head_delete( - StackHead *self, - StackNodeArray *pool, - SubtreePool *subtree_pool -) { - if (self->node) { - if (self->last_external_token.ptr) { - ts_subtree_release(subtree_pool, self->last_external_token); - } - if (self->lookahead_when_paused.ptr) { - ts_subtree_release(subtree_pool, self->lookahead_when_paused); - } - if (self->summary) { - array_delete(self->summary); - ts_free(self->summary); - } - stack_node_release(self->node, pool, subtree_pool); - } -} - -static StackVersion ts_stack__add_version( - Stack *self, - StackVersion original_version, - StackNode *node -) { - StackHead head = { - .node = node, - .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, - .last_external_token = self->heads.contents[original_version].last_external_token, - .status = StackStatusActive, - .lookahead_when_paused = NULL_SUBTREE, - }; - array_push(&self->heads, head); - stack_node_retain(node); - if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token); - return (StackVersion)(self->heads.size - 1); -} - -static void ts_stack__add_slice( - Stack *self, - StackVersion original_version, - StackNode *node, - SubtreeArray *subtrees -) { - for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { - StackVersion version = self->slices.contents[i].version; - if (self->heads.contents[version].node == node) { - StackSlice slice = {*subtrees, version}; - array_insert(&self->slices, i + 1, slice); - return; - } - } - - StackVersion version = ts_stack__add_version(self, original_version, node); - StackSlice slice = { *subtrees, version }; - array_push(&self->slices, slice); -} - -static StackSliceArray stack__iter( - Stack *self, - StackVersion version, - StackCallback callback, - void *payload, - int goal_subtree_count -) { - array_clear(&self->slices); - array_clear(&self->iterators); - - StackHead *head = array_get(&self->heads, version); - StackIterator new_iterator = { - .node = head->node, - .subtrees = array_new(), - .subtree_count = 0, - .is_pending = true, - }; - - bool include_subtrees = false; - if (goal_subtree_count >= 0) { - include_subtrees = true; - array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); - } - - array_push(&self->iterators, new_iterator); - - while (self->iterators.size > 0) { - for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { - StackIterator *iterator = &self->iterators.contents[i]; - StackNode *node = iterator->node; - - StackAction action = callback(payload, iterator); - bool should_pop = action & StackActionPop; - bool should_stop = action & StackActionStop || node->link_count == 0; - - if (should_pop) { - SubtreeArray subtrees = iterator->subtrees; - if (!should_stop) { - ts_subtree_array_copy(subtrees, &subtrees); - } - ts_subtree_array_reverse(&subtrees); - ts_stack__add_slice( - self, - version, - node, - &subtrees - ); - } - - if (should_stop) { - if (!should_pop) { - ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); - } - array_erase(&self->iterators, i); - i--, size--; - continue; - } - - for (uint32_t j = 1; j <= node->link_count; j++) { - StackIterator *next_iterator; - StackLink link; - if (j == node->link_count) { - link = node->links[0]; - next_iterator = &self->iterators.contents[i]; - } else { - if (self->iterators.size >= MAX_ITERATOR_COUNT) continue; - link = node->links[j]; - StackIterator current_iterator = self->iterators.contents[i]; - array_push(&self->iterators, current_iterator); - next_iterator = array_back(&self->iterators); - ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); - } - - next_iterator->node = link.node; - if (link.subtree.ptr) { - if (include_subtrees) { - array_push(&next_iterator->subtrees, link.subtree); - ts_subtree_retain(link.subtree); - } - - if (!ts_subtree_extra(link.subtree)) { - next_iterator->subtree_count++; - if (!link.is_pending) { - next_iterator->is_pending = false; - } - } - } else { - next_iterator->subtree_count++; - next_iterator->is_pending = false; - } - } - } - } - - return self->slices; -} - -Stack *ts_stack_new(SubtreePool *subtree_pool) { - Stack *self = ts_calloc(1, sizeof(Stack)); - - array_init(&self->heads); - array_init(&self->slices); - array_init(&self->iterators); - array_init(&self->node_pool); - array_reserve(&self->heads, 4); - array_reserve(&self->slices, 4); - array_reserve(&self->iterators, 4); - array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); - - self->subtree_pool = subtree_pool; - self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); - ts_stack_clear(self); - - return self; -} - -void ts_stack_delete(Stack *self) { - if (self->slices.contents) - array_delete(&self->slices); - if (self->iterators.contents) - array_delete(&self->iterators); - stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); - for (uint32_t i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); - } - array_clear(&self->heads); - if (self->node_pool.contents) { - for (uint32_t i = 0; i < self->node_pool.size; i++) - ts_free(self->node_pool.contents[i]); - array_delete(&self->node_pool); - } - array_delete(&self->heads); - ts_free(self); -} - -uint32_t ts_stack_version_count(const Stack *self) { - return self->heads.size; -} - -t_state_id ts_stack_state(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->state; -} - -Length ts_stack_position(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->position; -} - -Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->last_external_token; -} - -void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) { - StackHead *head = array_get(&self->heads, version); - if (token.ptr) ts_subtree_retain(token); - if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token); - head->last_external_token = token; -} - -unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - unsigned result = head->node->error_cost; - if ( - head->status == StackStatusPaused || - (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) { - result += ERROR_COST_PER_RECOVERY; - } - return result; -} - -unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - if (head->node->node_count < head->node_count_at_last_error) { - head->node_count_at_last_error = head->node->node_count; - } - return head->node->node_count - head->node_count_at_last_error; -} - -void ts_stack_push( - Stack *self, - StackVersion version, - Subtree subtree, - bool pending, - t_state_id state -) { - StackHead *head = array_get(&self->heads, version); - StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); - if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count; - head->node = new_node; -} - -forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { - unsigned *goal_subtree_count = payload; - if (iterator->subtree_count == *goal_subtree_count) { - return StackActionPop | StackActionStop; - } else { - return StackActionNone; - } -} - -StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { - return stack__iter(self, version, pop_count_callback, &count, (int)count); -} - -forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { - (void)payload; - if (iterator->subtree_count >= 1) { - if (iterator->is_pending) { - return StackActionPop | StackActionStop; - } else { - return StackActionStop; - } - } else { - return StackActionNone; - } -} - -StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { - StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0); - if (pop.size > 0) { - ts_stack_renumber_version(self, pop.contents[0].version, version); - pop.contents[0].version = version; - } - return pop; -} - -forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { - if (iterator->subtrees.size > 0) { - bool *found_error = payload; - if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) { - *found_error = true; - return StackActionPop | StackActionStop; - } else { - return StackActionStop; - } - } else { - return StackActionNone; - } -} - -SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { - StackNode *node = array_get(&self->heads, version)->node; - for (unsigned i = 0; i < node->link_count; i++) { - if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) { - bool found_error = false; - StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1); - if (pop.size > 0) { - assert(pop.size == 1); - ts_stack_renumber_version(self, pop.contents[0].version, version); - return pop.contents[0].subtrees; - } - break; - } - } - return (SubtreeArray) {.size = 0}; -} - -forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { - (void)payload; - return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; -} - -StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) { - return stack__iter(self, version, pop_all_callback, NULL, 0); -} - -typedef struct { - StackSummary *summary; - unsigned max_depth; -} SummarizeStackSession; - -forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { - SummarizeStackSession *session = payload; - t_state_id state = iterator->node->state; - unsigned depth = iterator->subtree_count; - if (depth > session->max_depth) return StackActionStop; - for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { - StackSummaryEntry entry = session->summary->contents[i]; - if (entry.depth < depth) break; - if (entry.depth == depth && entry.state == state) return StackActionNone; - } - array_push(session->summary, ((StackSummaryEntry) { - .position = iterator->node->position, - .depth = depth, - .state = state, - })); - return StackActionNone; -} - -void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { - SummarizeStackSession session = { - .summary = ts_malloc(sizeof(StackSummary)), - .max_depth = max_depth - }; - array_init(session.summary); - stack__iter(self, version, summarize_stack_callback, &session, -1); - StackHead *head = &self->heads.contents[version]; - if (head->summary) { - array_delete(head->summary); - ts_free(head->summary); - } - head->summary = session.summary; -} - -StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->summary; -} - -int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { - return array_get(&self->heads, version)->node->dynamic_precedence; -} - -bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) { - const StackHead *head = array_get(&self->heads, version); - const StackNode *node = head->node; - if (node->error_cost == 0) return true; - while (node) { - if (node->link_count > 0) { - Subtree subtree = node->links[0].subtree; - if (subtree.ptr) { - if (ts_subtree_total_bytes(subtree) > 0) { - return true; - } else if ( - node->node_count > head->node_count_at_last_error && - ts_subtree_error_cost(subtree) == 0 - ) { - node = node->links[0].node; - continue; - } - } - } - break; - } - return false; -} - -void ts_stack_remove_version(Stack *self, StackVersion version) { - stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); - array_erase(&self->heads, version); -} - -void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { - if (v1 == v2) return; - assert(v2 < v1); - assert((uint32_t)v1 < self->heads.size); - StackHead *source_head = &self->heads.contents[v1]; - StackHead *target_head = &self->heads.contents[v2]; - if (target_head->summary && !source_head->summary) { - source_head->summary = target_head->summary; - target_head->summary = NULL; - } - stack_head_delete(target_head, &self->node_pool, self->subtree_pool); - *target_head = *source_head; - array_erase(&self->heads, v1); -} - -void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) { - StackHead temporary_head = self->heads.contents[v1]; - self->heads.contents[v1] = self->heads.contents[v2]; - self->heads.contents[v2] = temporary_head; -} - -StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { - assert(version < self->heads.size); - array_push(&self->heads, self->heads.contents[version]); - StackHead *head = array_back(&self->heads); - stack_node_retain(head->node); - if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token); - head->summary = NULL; - return self->heads.size - 1; -} - -bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { - if (!ts_stack_can_merge(self, version1, version2)) return false; - StackHead *head1 = &self->heads.contents[version1]; - StackHead *head2 = &self->heads.contents[version2]; - for (uint32_t i = 0; i < head2->node->link_count; i++) { - stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); - } - if (head1->node->state == ERROR_STATE) { - head1->node_count_at_last_error = head1->node->node_count; - } - ts_stack_remove_version(self, version2); - return true; -} - -bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) { - StackHead *head1 = &self->heads.contents[version1]; - StackHead *head2 = &self->heads.contents[version2]; - return - head1->status == StackStatusActive && - head2->status == StackStatusActive && - head1->node->state == head2->node->state && - head1->node->position.bytes == head2->node->position.bytes && - head1->node->error_cost == head2->node->error_cost && - ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token); -} - -void ts_stack_halt(Stack *self, StackVersion version) { - array_get(&self->heads, version)->status = StackStatusHalted; -} - -void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) { - StackHead *head = array_get(&self->heads, version); - head->status = StackStatusPaused; - head->lookahead_when_paused = lookahead; - head->node_count_at_last_error = head->node->node_count; -} - -bool ts_stack_is_active(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusActive; -} - -bool ts_stack_is_halted(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusHalted; -} - -bool ts_stack_is_paused(const Stack *self, StackVersion version) { - return array_get(&self->heads, version)->status == StackStatusPaused; -} - -Subtree ts_stack_resume(Stack *self, StackVersion version) { - StackHead *head = array_get(&self->heads, version); - assert(head->status == StackStatusPaused); - Subtree result = head->lookahead_when_paused; - head->status = StackStatusActive; - head->lookahead_when_paused = NULL_SUBTREE; - return result; -} - -void ts_stack_clear(Stack *self) { - stack_node_retain(self->base_node); - for (uint32_t i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); - } - array_clear(&self->heads); - array_push(&self->heads, ((StackHead) { - .node = self->base_node, - .status = StackStatusActive, - .last_external_token = NULL_SUBTREE, - .lookahead_when_paused = NULL_SUBTREE, - })); -} - -bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) { - array_reserve(&self->iterators, 32); - if (!f) f = stderr; - - fprintf(f, "digraph stack {\n"); - fprintf(f, "rankdir=\"RL\";\n"); - fprintf(f, "edge [arrowhead=none]\n"); - - Array(StackNode *) visited_nodes = array_new(); - - array_clear(&self->iterators); - for (uint32_t i = 0; i < self->heads.size; i++) { - StackHead *head = &self->heads.contents[i]; - if (head->status == StackStatusHalted) continue; - - fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); - fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); - - if (head->status == StackStatusPaused) { - fprintf(f, "color=red "); - } - fprintf(f, - "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u", - i, - ts_stack_node_count_since_error(self, i), - ts_stack_error_cost(self, i) - ); - - if (head->summary) { - fprintf(f, "\nsummary:"); - for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state); - } - - if (head->last_external_token.ptr) { - const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; - const char *data = ts_external_scanner_state_data(state); - fprintf(f, "\nexternal_scanner_state:"); - for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); - } - - fprintf(f, "\"]\n"); - array_push(&self->iterators, ((StackIterator) { - .node = head->node - })); - } - - bool all_iterators_done = false; - while (!all_iterators_done) { - all_iterators_done = true; - - for (uint32_t i = 0; i < self->iterators.size; i++) { - StackIterator iterator = self->iterators.contents[i]; - StackNode *node = iterator.node; - - for (uint32_t j = 0; j < visited_nodes.size; j++) { - if (visited_nodes.contents[j] == node) { - node = NULL; - break; - } - } - - if (!node) continue; - all_iterators_done = false; - - fprintf(f, "node_%p [", (void *)node); - if (node->state == ERROR_STATE) { - fprintf(f, "label=\"?\""); - } else if ( - node->link_count == 1 && - node->links[0].subtree.ptr && - ts_subtree_extra(node->links[0].subtree) - ) { - fprintf(f, "shape=point margin=0 label=\"\""); - } else { - fprintf(f, "label=\"%d\"", node->state); - } - - fprintf( - f, - " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", - node->position.extent.row + 1, - node->position.extent.column, - node->node_count, - node->error_cost, - node->dynamic_precedence - ); - - for (int j = 0; j < node->link_count; j++) { - StackLink link = node->links[j]; - fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); - if (link.is_pending) fprintf(f, "style=dashed "); - if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray "); - - if (!link.subtree.ptr) { - fprintf(f, "color=red"); - } else { - fprintf(f, "label=\""); - bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree); - if (quoted) fprintf(f, "'"); - ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree)); - if (quoted) fprintf(f, "'"); - fprintf(f, "\""); - fprintf( - f, - "labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"", - ts_subtree_error_cost(link.subtree), - ts_subtree_dynamic_precedence(link.subtree) - ); - } - - fprintf(f, "];\n"); - - StackIterator *next_iterator; - if (j == 0) { - next_iterator = &self->iterators.contents[i]; - } else { - array_push(&self->iterators, iterator); - next_iterator = array_back(&self->iterators); - } - next_iterator->node = link.node; - } - - array_push(&visited_nodes, node); - } - } - - fprintf(f, "}\n"); - - array_delete(&visited_nodes); - return true; -} - -#undef forceinline diff --git a/parser/src/stack.h b/parser/src/stack.h deleted file mode 100644 index 1f40c25a..00000000 --- a/parser/src/stack.h +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef TREE_SITTER_PARSE_STACK_H_ -#define TREE_SITTER_PARSE_STACK_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./array.h" -#include "./subtree.h" -#include "./error_costs.h" -#include - -typedef struct Stack Stack; - -typedef unsigned StackVersion; -#define STACK_VERSION_NONE ((StackVersion)-1) - -typedef struct { - SubtreeArray subtrees; - StackVersion version; -} StackSlice; -typedef Array(StackSlice) StackSliceArray; - -typedef struct { - Length position; - unsigned depth; - t_state_id state; -} StackSummaryEntry; -typedef Array(StackSummaryEntry) StackSummary; - -// Create a stack. -Stack *ts_stack_new(SubtreePool *); - -// Release the memory reserved for a given stack. -void ts_stack_delete(Stack *); - -// Get the stack's current number of versions. -uint32_t ts_stack_version_count(const Stack *); - -// Get the state at the top of the given version of the stack. If the stack is -// empty, this returns the initial state, 0. -t_state_id ts_stack_state(const Stack *, StackVersion); - -// Get the last external token associated with a given version of the stack. -Subtree ts_stack_last_external_token(const Stack *, StackVersion); - -// Set the last external token associated with a given version of the stack. -void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree ); - -// Get the position of the given version of the stack within the document. -Length ts_stack_position(const Stack *, StackVersion); - -// Push a tree and state onto the given version of the stack. -// -// This transfers ownership of the tree to the Stack. Callers that -// need to retain ownership of the tree for their own purposes should -// first retain the tree. -void ts_stack_push(Stack *, StackVersion, Subtree , bool, t_state_id); - -// Pop the given number of entries from the given version of the stack. This -// operation can increase the number of stack versions by revealing multiple -// versions which had previously been merged. It returns an array that -// specifies the index of each revealed version and the trees that were -// removed from that version. -StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count); - -// Remove an error at the top of the given version of the stack. -SubtreeArray ts_stack_pop_error(Stack *, StackVersion); - -// Remove any pending trees from the top of the given version of the stack. -StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); - -// Remove any all trees from the given version of the stack. -StackSliceArray ts_stack_pop_all(Stack *, StackVersion); - -// Get the maximum number of tree nodes reachable from this version of the stack -// since the last error was detected. -unsigned ts_stack_node_count_since_error(const Stack *, StackVersion); - -int ts_stack_dynamic_precedence(Stack *, StackVersion); - -bool ts_stack_has_advanced_since_error(const Stack *, StackVersion); - -// Compute a summary of all the parse states near the top of the given -// version of the stack and store the summary for later retrieval. -void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth); - -// Retrieve a summary of all the parse states near the top of the -// given version of the stack. -StackSummary *ts_stack_get_summary(Stack *, StackVersion); - -// Get the total cost of all errors on the given version of the stack. -unsigned ts_stack_error_cost(const Stack *, StackVersion version); - -// Merge the given two stack versions if possible, returning true -// if they were successfully merged and false otherwise. -bool ts_stack_merge(Stack *, StackVersion, StackVersion); - -// Determine whether the given two stack versions can be merged. -bool ts_stack_can_merge(Stack *, StackVersion, StackVersion); - -Subtree ts_stack_resume(Stack *, StackVersion); - -void ts_stack_pause(Stack *, StackVersion, Subtree); - -void ts_stack_halt(Stack *, StackVersion); - -bool ts_stack_is_active(const Stack *, StackVersion); - -bool ts_stack_is_paused(const Stack *, StackVersion); - -bool ts_stack_is_halted(const Stack *, StackVersion); - -void ts_stack_renumber_version(Stack *, StackVersion, StackVersion); - -void ts_stack_swap_versions(Stack *, StackVersion, StackVersion); - -StackVersion ts_stack_copy_version(Stack *, StackVersion); - -// Remove the given version from the stack. -void ts_stack_remove_version(Stack *, StackVersion); - -void ts_stack_clear(Stack *); - -bool ts_stack_print_dot_graph(Stack *, const t_language *, FILE *); - -typedef void (*StackIterateCallback)(void *, t_state_id, uint32_t); - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_PARSE_STACK_H_ diff --git a/parser/src/subtree.c b/parser/src/subtree.c deleted file mode 100644 index e3a33ce7..00000000 --- a/parser/src/subtree.c +++ /dev/null @@ -1,1060 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "./alloc.h" -#include "./array.h" -#include "./atomic.h" -#include "./subtree.h" -#include "./length.h" -#include "./language.h" -#include "./error_costs.h" -#include - -typedef struct { - Length start; - Length old_end; - Length new_end; -} Edit; - -#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX -#define TS_MAX_TREE_POOL_SIZE 32 - -// ExternalScannerState - -void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { - self->length = length; - if (length > sizeof(self->short_data)) { - self->long_data = ts_malloc(length); - memcpy(self->long_data, data, length); - } else { - memcpy(self->short_data, data, length); - } -} - -ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) { - ExternalScannerState result = *self; - if (self->length > sizeof(self->short_data)) { - result.long_data = ts_malloc(self->length); - memcpy(result.long_data, self->long_data, self->length); - } - return result; -} - -void ts_external_scanner_state_delete(ExternalScannerState *self) { - if (self->length > sizeof(self->short_data)) { - ts_free(self->long_data); - } -} - -const char *ts_external_scanner_state_data(const ExternalScannerState *self) { - if (self->length > sizeof(self->short_data)) { - return self->long_data; - } else { - return self->short_data; - } -} - -bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) { - return - self->length == length && - memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; -} - -// SubtreeArray - -void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { - dest->size = self.size; - dest->capacity = self.capacity; - dest->contents = self.contents; - if (self.capacity > 0) { - dest->contents = ts_calloc(self.capacity, sizeof(Subtree)); - memcpy(dest->contents, self.contents, self.size * sizeof(Subtree)); - for (uint32_t i = 0; i < self.size; i++) { - ts_subtree_retain(dest->contents[i]); - } - } -} - -void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { - for (uint32_t i = 0; i < self->size; i++) { - ts_subtree_release(pool, self->contents[i]); - } - array_clear(self); -} - -void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { - ts_subtree_array_clear(pool, self); - array_delete(self); -} - -void ts_subtree_array_remove_trailing_extras( - SubtreeArray *self, - SubtreeArray *destination -) { - array_clear(destination); - while (self->size > 0) { - Subtree last = self->contents[self->size - 1]; - if (ts_subtree_extra(last)) { - self->size--; - array_push(destination, last); - } else { - break; - } - } - ts_subtree_array_reverse(destination); -} - -void ts_subtree_array_reverse(SubtreeArray *self) { - for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) { - size_t reverse_index = self->size - 1 - i; - Subtree swap = self->contents[i]; - self->contents[i] = self->contents[reverse_index]; - self->contents[reverse_index] = swap; - } -} - -// SubtreePool - -SubtreePool ts_subtree_pool_new(uint32_t capacity) { - SubtreePool self = {array_new(), array_new()}; - array_reserve(&self.free_trees, capacity); - return self; -} - -void ts_subtree_pool_delete(SubtreePool *self) { - if (self->free_trees.contents) { - for (unsigned i = 0; i < self->free_trees.size; i++) { - ts_free(self->free_trees.contents[i].ptr); - } - array_delete(&self->free_trees); - } - if (self->tree_stack.contents) array_delete(&self->tree_stack); -} - -static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) { - if (self->free_trees.size > 0) { - return array_pop(&self->free_trees).ptr; - } else { - return ts_malloc(sizeof(SubtreeHeapData)); - } -} - -static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) { - if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) { - array_push(&self->free_trees, (MutableSubtree) {.ptr = tree}); - } else { - ts_free(tree); - } -} - -// Subtree - -static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) { - return - padding.bytes < TS_MAX_INLINE_TREE_LENGTH && - padding.extent.row < 16 && - padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && - size.extent.row == 0 && - size.extent.column < TS_MAX_INLINE_TREE_LENGTH && - lookahead_bytes < 16; -} - -Subtree ts_subtree_new_leaf( - SubtreePool *pool, t_symbol symbol, Length padding, Length size, - uint32_t lookahead_bytes, t_state_id parse_state, - bool has_external_tokens, bool depends_on_column, - bool is_keyword, const t_language *language -) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - bool extra = symbol == ts_builtin_sym_end; - - bool is_inline = ( - symbol <= UINT8_MAX && - !has_external_tokens && - ts_subtree_can_inline(padding, size, lookahead_bytes) - ); - - if (is_inline) { - return (Subtree) {{ - .parse_state = parse_state, - .symbol = symbol, - .padding_bytes = padding.bytes, - .padding_rows = padding.extent.row, - .padding_columns = padding.extent.column, - .size_bytes = size.bytes, - .lookahead_bytes = lookahead_bytes, - .visible = metadata.visible, - .named = metadata.named, - .extra = extra, - .has_changes = false, - .is_missing = false, - .is_keyword = is_keyword, - .is_inline = true, - }}; - } else { - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - *data = (SubtreeHeapData) { - .ref_count = 1, - .padding = padding, - .size = size, - .lookahead_bytes = lookahead_bytes, - .error_cost = 0, - .child_count = 0, - .symbol = symbol, - .parse_state = parse_state, - .visible = metadata.visible, - .named = metadata.named, - .extra = extra, - .fragile_left = false, - .fragile_right = false, - .has_changes = false, - .has_external_tokens = has_external_tokens, - .has_external_scanner_state_change = false, - .depends_on_column = depends_on_column, - .is_missing = false, - .is_keyword = is_keyword, - {{.first_leaf = {.symbol = 0, .parse_state = 0}}} - }; - return (Subtree) {.ptr = data}; - } -} - -void ts_subtree_set_symbol( - MutableSubtree *self, - t_symbol symbol, - const t_language *language -) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - if (self->data.is_inline) { - assert(symbol < UINT8_MAX); - self->data.symbol = symbol; - self->data.named = metadata.named; - self->data.visible = metadata.visible; - } else { - self->ptr->symbol = symbol; - self->ptr->named = metadata.named; - self->ptr->visible = metadata.visible; - } -} - -Subtree ts_subtree_new_error( - SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, - uint32_t bytes_scanned, t_state_id parse_state, const t_language *language -) { - Subtree result = ts_subtree_new_leaf( - pool, ts_builtin_sym_error, padding, size, bytes_scanned, - parse_state, false, false, false, language - ); - SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; - data->fragile_left = true; - data->fragile_right = true; - data->lookahead_char = lookahead_char; - return result; -} - -// Clone a subtree. -MutableSubtree ts_subtree_clone(Subtree self) { - size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - Subtree *new_children = ts_malloc(alloc_size); - Subtree *old_children = ts_subtree_children(self); - memcpy(new_children, old_children, alloc_size); - SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count]; - if (self.ptr->child_count > 0) { - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - ts_subtree_retain(new_children[i]); - } - } else if (self.ptr->has_external_tokens) { - result->external_scanner_state = ts_external_scanner_state_copy( - &self.ptr->external_scanner_state - ); - } - result->ref_count = 1; - return (MutableSubtree) {.ptr = result}; -} - -// Get mutable version of a subtree. -// -// This takes ownership of the subtree. If the subtree has only one owner, -// this will directly convert it into a mutable version. Otherwise, it will -// perform a copy. -MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { - if (self.data.is_inline) return (MutableSubtree) {self.data}; - if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); - MutableSubtree result = ts_subtree_clone(self); - ts_subtree_release(pool, self); - return result; -} - -static void ts_subtree__compress( - MutableSubtree self, - unsigned count, - const t_language *language, - MutableSubtreeArray *stack -) { - unsigned initial_stack_size = stack->size; - - MutableSubtree tree = self; - t_symbol symbol = tree.ptr->symbol; - for (unsigned i = 0; i < count; i++) { - if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; - - MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); - if ( - child.data.is_inline || - child.ptr->child_count < 2 || - child.ptr->ref_count > 1 || - child.ptr->symbol != symbol - ) break; - - MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); - if ( - grandchild.data.is_inline || - grandchild.ptr->child_count < 2 || - grandchild.ptr->ref_count > 1 || - grandchild.ptr->symbol != symbol - ) break; - - ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); - ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; - ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); - array_push(stack, tree); - tree = grandchild; - } - - while (stack->size > initial_stack_size) { - tree = array_pop(stack); - MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); - MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]); - ts_subtree_summarize_children(grandchild, language); - ts_subtree_summarize_children(child, language); - ts_subtree_summarize_children(tree, language); - } -} - -void ts_subtree_balance(Subtree self, SubtreePool *pool, const t_language *language) { - array_clear(&pool->tree_stack); - - if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); - } - - while (pool->tree_stack.size > 0) { - MutableSubtree tree = array_pop(&pool->tree_stack); - - if (tree.ptr->repeat_depth > 0) { - Subtree child1 = ts_subtree_children(tree)[0]; - Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; - long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); - if (repeat_delta > 0) { - unsigned n = (unsigned)repeat_delta; - for (unsigned i = n / 2; i > 0; i /= 2) { - ts_subtree__compress(tree, i, language, &pool->tree_stack); - n -= i; - } - } - } - - for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - Subtree child = ts_subtree_children(tree)[i]; - if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); - } - } - } -} - -// Assign all of the node's properties that depend on its children. -void ts_subtree_summarize_children( - MutableSubtree self, - const t_language *language -) { - assert(!self.data.is_inline); - - self.ptr->named_child_count = 0; - self.ptr->visible_child_count = 0; - self.ptr->error_cost = 0; - self.ptr->repeat_depth = 0; - self.ptr->visible_descendant_count = 0; - self.ptr->has_external_tokens = false; - self.ptr->depends_on_column = false; - self.ptr->has_external_scanner_state_change = false; - self.ptr->dynamic_precedence = 0; - - uint32_t structural_index = 0; - const t_symbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); - uint32_t lookahead_end_byte = 0; - - const Subtree *children = ts_subtree_children(self); - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = children[i]; - - if ( - self.ptr->size.extent.row == 0 && - ts_subtree_depends_on_column(child) - ) { - self.ptr->depends_on_column = true; - } - - if (ts_subtree_has_external_scanner_state_change(child)) { - self.ptr->has_external_scanner_state_change = true; - } - - if (i == 0) { - self.ptr->padding = ts_subtree_padding(child); - self.ptr->size = ts_subtree_size(child); - } else { - self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child)); - } - - uint32_t child_lookahead_end_byte = - self.ptr->padding.bytes + - self.ptr->size.bytes + - ts_subtree_lookahead_bytes(child); - if (child_lookahead_end_byte > lookahead_end_byte) { - lookahead_end_byte = child_lookahead_end_byte; - } - - if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) { - self.ptr->error_cost += ts_subtree_error_cost(child); - } - - uint32_t grandchild_count = ts_subtree_child_count(child); - if ( - self.ptr->symbol == ts_builtin_sym_error || - self.ptr->symbol == ts_builtin_sym_error_repeat - ) { - if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { - if (ts_subtree_visible(child)) { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; - } else if (grandchild_count > 0) { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; - } - } - } - - self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); - self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child); - - if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) { - self.ptr->visible_descendant_count++; - self.ptr->visible_child_count++; - if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) { - self.ptr->named_child_count++; - } - } else if (ts_subtree_visible(child)) { - self.ptr->visible_descendant_count++; - self.ptr->visible_child_count++; - if (ts_subtree_named(child)) self.ptr->named_child_count++; - } else if (grandchild_count > 0) { - self.ptr->visible_child_count += child.ptr->visible_child_count; - self.ptr->named_child_count += child.ptr->named_child_count; - } - - if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true; - - if (ts_subtree_is_error(child)) { - self.ptr->fragile_left = self.ptr->fragile_right = true; - self.ptr->parse_state = TS_TREE_STATE_NONE; - } - - if (!ts_subtree_extra(child)) structural_index++; - } - - self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; - - if ( - self.ptr->symbol == ts_builtin_sym_error || - self.ptr->symbol == ts_builtin_sym_error_repeat - ) { - self.ptr->error_cost += - ERROR_COST_PER_RECOVERY + - ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + - ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; - } - - if (self.ptr->child_count > 0) { - Subtree first_child = children[0]; - Subtree last_child = children[self.ptr->child_count - 1]; - - self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); - self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child); - - if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true; - if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true; - - if ( - self.ptr->child_count >= 2 && - !self.ptr->visible && - !self.ptr->named && - ts_subtree_symbol(first_child) == self.ptr->symbol - ) { - if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) { - self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1; - } else { - self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1; - } - } - } -} - -// Create a new parent node with the given children. -// -// This takes ownership of the children array. -MutableSubtree ts_subtree_new_node( - t_symbol symbol, - SubtreeArray *children, - unsigned production_id, - const t_language *language -) { - TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; - - // Allocate the node's data at the end of the array of children. - size_t new_byte_size = ts_subtree_alloc_size(children->size); - if (children->capacity * sizeof(Subtree) < new_byte_size) { - children->contents = ts_realloc(children->contents, new_byte_size); - children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); - } - SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; - - *data = (SubtreeHeapData) { - .ref_count = 1, - .symbol = symbol, - .child_count = children->size, - .visible = metadata.visible, - .named = metadata.named, - .has_changes = false, - .has_external_scanner_state_change = false, - .fragile_left = fragile, - .fragile_right = fragile, - .is_keyword = false, - {{ - .visible_descendant_count = 0, - .production_id = production_id, - .first_leaf = {.symbol = 0, .parse_state = 0}, - }} - }; - MutableSubtree result = {.ptr = data}; - ts_subtree_summarize_children(result, language); - return result; -} - -// Create a new error node containing the given children. -// -// This node is treated as 'extra'. Its children are prevented from having -// having any effect on the parse state. -Subtree ts_subtree_new_error_node( - SubtreeArray *children, - bool extra, - const t_language *language -) { - MutableSubtree result = ts_subtree_new_node( - ts_builtin_sym_error, children, 0, language - ); - result.ptr->extra = extra; - return ts_subtree_from_mut(result); -} - -// Create a new 'missing leaf' node. -// -// This node is treated as 'extra'. Its children are prevented from having -// having any effect on the parse state. -Subtree ts_subtree_new_missing_leaf( - SubtreePool *pool, - t_symbol symbol, - Length padding, - uint32_t lookahead_bytes, - const t_language *language -) { - Subtree result = ts_subtree_new_leaf( - pool, symbol, padding, length_zero(), lookahead_bytes, - 0, false, false, false, language - ); - if (result.data.is_inline) { - result.data.is_missing = true; - } else { - ((SubtreeHeapData *)result.ptr)->is_missing = true; - } - return result; -} - -void ts_subtree_retain(Subtree self) { - if (self.data.is_inline) return; - assert(self.ptr->ref_count > 0); - atomic_inc((volatile uint32_t *)&self.ptr->ref_count); - assert(self.ptr->ref_count != 0); -} - -void ts_subtree_release(SubtreePool *pool, Subtree self) { - if (self.data.is_inline) return; - array_clear(&pool->tree_stack); - - assert(self.ptr->ref_count > 0); - if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); - } - - while (pool->tree_stack.size > 0) { - MutableSubtree tree = array_pop(&pool->tree_stack); - if (tree.ptr->child_count > 0) { - Subtree *children = ts_subtree_children(tree); - for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - Subtree child = children[i]; - if (child.data.is_inline) continue; - assert(child.ptr->ref_count > 0); - if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); - } - } - ts_free(children); - } else { - if (tree.ptr->has_external_tokens) { - ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); - } - ts_subtree_pool_free(pool, tree.ptr); - } - } -} - -int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) { - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left)); - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right)); - - while (pool->tree_stack.size > 0) { - right = ts_subtree_from_mut(array_pop(&pool->tree_stack)); - left = ts_subtree_from_mut(array_pop(&pool->tree_stack)); - - int result = 0; - if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) result = -1; - else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) result = 1; - else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) result = -1; - else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) result = 1; - if (result != 0) { - array_clear(&pool->tree_stack); - return result; - } - - for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) { - Subtree left_child = ts_subtree_children(left)[i - 1]; - Subtree right_child = ts_subtree_children(right)[i - 1]; - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child)); - array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child)); - } - } - - return 0; -} - -static inline void ts_subtree_set_has_changes(MutableSubtree *self) { - if (self->data.is_inline) { - self->data.has_changes = true; - } else { - self->ptr->has_changes = true; - } -} - -Subtree ts_subtree_edit(Subtree self, const t_input_edit *input_edit, SubtreePool *pool) { - typedef struct { - Subtree *tree; - Edit edit; - } EditEntry; - - Array(EditEntry) stack = array_new(); - array_push(&stack, ((EditEntry) { - .tree = &self, - .edit = (Edit) { - .start = {input_edit->start_byte, input_edit->start_point}, - .old_end = {input_edit->old_end_byte, input_edit->old_end_point}, - .new_end = {input_edit->new_end_byte, input_edit->new_end_point}, - }, - })); - - while (stack.size) { - EditEntry entry = array_pop(&stack); - Edit edit = entry.edit; - bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; - bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; - bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); - - Length size = ts_subtree_size(*entry.tree); - Length padding = ts_subtree_padding(*entry.tree); - Length total_size = length_add(padding, size); - uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); - uint32_t end_byte = total_size.bytes + lookahead_bytes; - if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; - - // If the edit is entirely within the space before this subtree, then shift this - // subtree over according to the edit without changing its size. - if (edit.old_end.bytes <= padding.bytes) { - padding = length_add(edit.new_end, length_sub(padding, edit.old_end)); - } - - // If the edit starts in the space before this subtree and extends into this subtree, - // shrink the subtree's content to compensate for the change in the space before it. - else if (edit.start.bytes < padding.bytes) { - size = length_saturating_sub(size, length_sub(edit.old_end, padding)); - padding = edit.new_end; - } - - // If the edit is a pure insertion right at the start of the subtree, - // shift the subtree over according to the insertion. - else if (edit.start.bytes == padding.bytes && is_pure_insertion) { - padding = edit.new_end; - } - - // If the edit is within this subtree, resize the subtree to reflect the edit. - else if ( - edit.start.bytes < total_size.bytes || - (edit.start.bytes == total_size.bytes && is_pure_insertion) - ) { - size = length_add( - length_sub(edit.new_end, padding), - length_saturating_sub(total_size, edit.old_end) - ); - } - - MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); - - if (result.data.is_inline) { - if (ts_subtree_can_inline(padding, size, lookahead_bytes)) { - result.data.padding_bytes = padding.bytes; - result.data.padding_rows = padding.extent.row; - result.data.padding_columns = padding.extent.column; - result.data.size_bytes = size.bytes; - } else { - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - data->ref_count = 1; - data->padding = padding; - data->size = size; - data->lookahead_bytes = lookahead_bytes; - data->error_cost = 0; - data->child_count = 0; - data->symbol = result.data.symbol; - data->parse_state = result.data.parse_state; - data->visible = result.data.visible; - data->named = result.data.named; - data->extra = result.data.extra; - data->fragile_left = false; - data->fragile_right = false; - data->has_changes = false; - data->has_external_tokens = false; - data->depends_on_column = false; - data->is_missing = result.data.is_missing; - data->is_keyword = result.data.is_keyword; - result.ptr = data; - } - } else { - result.ptr->padding = padding; - result.ptr->size = size; - } - - ts_subtree_set_has_changes(&result); - *entry.tree = ts_subtree_from_mut(result); - - Length child_left, child_right = length_zero(); - for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { - Subtree *child = &ts_subtree_children(*entry.tree)[i]; - Length child_size = ts_subtree_total_size(*child); - child_left = child_right; - child_right = length_add(child_left, child_size); - - // If this child ends before the edit, it is not affected. - if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; - - // Keep editing child nodes until a node is reached that starts after the edit. - // Also, if this node's validity depends on its column position, then continue - // invaliditing child nodes until reaching a line break. - if (( - (child_left.bytes > edit.old_end.bytes) || - (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) - ) && ( - !invalidate_first_row || - child_left.extent.row > entry.tree->ptr->padding.extent.row - )) { - break; - } - - // Transform edit into the child's coordinate space. - Edit child_edit = { - .start = length_saturating_sub(edit.start, child_left), - .old_end = length_saturating_sub(edit.old_end, child_left), - .new_end = length_saturating_sub(edit.new_end, child_left), - }; - - // Interpret all inserted text as applying to the *first* child that touches the edit. - // Subsequent children are only never have any text inserted into them; they are only - // shrunk to compensate for the edit. - if ( - child_right.bytes > edit.start.bytes || - (child_right.bytes == edit.start.bytes && is_pure_insertion) - ) { - edit.new_end = edit.start; - } - - // Children that occur before the edit are not reshaped by the edit. - else { - child_edit.old_end = child_edit.start; - child_edit.new_end = child_edit.start; - } - - // Queue processing of this child's subtree. - array_push(&stack, ((EditEntry) { - .tree = child, - .edit = child_edit, - })); - } - } - - array_delete(&stack); - return self; -} - -Subtree ts_subtree_last_external_token(Subtree tree) { - if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE; - while (tree.ptr->child_count > 0) { - for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) { - Subtree child = ts_subtree_children(tree)[i]; - if (ts_subtree_has_external_tokens(child)) { - tree = child; - break; - } - } - } - return tree; -} - -static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) { - if (chr == -1) - return snprintf(str, n, "INVALID"); - else if (chr == '\0') - return snprintf(str, n, "'\\0'"); - else if (chr == '\n') - return snprintf(str, n, "'\\n'"); - else if (chr == '\t') - return snprintf(str, n, "'\\t'"); - else if (chr == '\r') - return snprintf(str, n, "'\\r'"); - else if (0 < chr && chr < 128 && isprint(chr)) - return snprintf(str, n, "'%c'", chr); - else - return snprintf(str, n, "%d", chr); -} - -static const char *const ROOT_FIELD = "__ROOT__"; - -static size_t ts_subtree__write_to_string( - Subtree self, char *string, size_t limit, - const t_language *language, bool include_all, - t_symbol alias_symbol, bool alias_is_named, const char *field_name -) { - if (!self.ptr) return snprintf(string, limit, "(NULL)"); - - char *cursor = string; - char **writer = (limit > 1) ? &cursor : &string; - bool is_root = field_name == ROOT_FIELD; - bool is_visible = - include_all || - ts_subtree_missing(self) || - ( - alias_symbol - ? alias_is_named - : ts_subtree_visible(self) && ts_subtree_named(self) - ); - - if (is_visible) { - if (!is_root) { - cursor += snprintf(*writer, limit, " "); - if (field_name) { - cursor += snprintf(*writer, limit, "%s: ", field_name); - } - } - - if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) { - cursor += snprintf(*writer, limit, "(UNEXPECTED "); - cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); - } else { - t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); - const char *symbol_name = ts_language_symbol_name(language, symbol); - if (ts_subtree_missing(self)) { - cursor += snprintf(*writer, limit, "(MISSING "); - if (alias_is_named || ts_subtree_named(self)) { - cursor += snprintf(*writer, limit, "%s", symbol_name); - } else { - cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); - } - } else { - cursor += snprintf(*writer, limit, "(%s", symbol_name); - } - } - } else if (is_root) { - t_symbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); - const char *symbol_name = ts_language_symbol_name(language, symbol); - if (ts_subtree_child_count(self) > 0) { - cursor += snprintf(*writer, limit, "(%s", symbol_name); - } else if (ts_subtree_named(self)) { - cursor += snprintf(*writer, limit, "(%s)", symbol_name); - } else { - cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); - } - } - - if (ts_subtree_child_count(self)) { - const t_symbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - language, - self.ptr->production_id, - &field_map, - &field_map_end - ); - - uint32_t structural_child_index = 0; - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = ts_subtree_children(self)[i]; - if (ts_subtree_extra(child)) { - cursor += ts_subtree__write_to_string( - child, *writer, limit, - language, include_all, - 0, false, NULL - ); - } else { - t_symbol subtree_alias_symbol = alias_sequence - ? alias_sequence[structural_child_index] - : 0; - bool subtree_alias_is_named = subtree_alias_symbol - ? ts_language_symbol_metadata(language, subtree_alias_symbol).named - : false; - - const char *child_field_name = is_visible ? NULL : field_name; - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if (!map->inherited && map->child_index == structural_child_index) { - child_field_name = language->field_names[map->field_id]; - break; - } - } - - cursor += ts_subtree__write_to_string( - child, *writer, limit, - language, include_all, - subtree_alias_symbol, subtree_alias_is_named, child_field_name - ); - structural_child_index++; - } - } - } - - if (is_visible) cursor += snprintf(*writer, limit, ")"); - - return cursor - string; -} - -char *ts_subtree_string( - Subtree self, - t_symbol alias_symbol, - bool alias_is_named, - const t_language *language, - bool include_all -) { - char scratch_string[1]; - size_t size = ts_subtree__write_to_string( - self, scratch_string, 1, - language, include_all, - alias_symbol, alias_is_named, ROOT_FIELD - ) + 1; - char *result = ts_malloc(size * sizeof(char)); - ts_subtree__write_to_string( - self, result, size, - language, include_all, - alias_symbol, alias_is_named, ROOT_FIELD - ); - return result; -} - -void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, - const t_language *language, t_symbol alias_symbol, - FILE *f) { - t_symbol subtree_symbol = ts_subtree_symbol(*self); - t_symbol symbol = alias_symbol ? alias_symbol : subtree_symbol; - uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); - fprintf(f, "tree_%p [label=\"", (void *)self); - ts_language_write_symbol_as_dot_string(language, f, symbol); - fprintf(f, "\""); - - if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext"); - if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray"); - - fprintf(f, ", tooltip=\"" - "range: %u - %u\n" - "state: %d\n" - "error-cost: %u\n" - "has-changes: %u\n" - "depends-on-column: %u\n" - "descendant-count: %u\n" - "repeat-depth: %u\n" - "lookahead-bytes: %u", - start_offset, end_offset, - ts_subtree_parse_state(*self), - ts_subtree_error_cost(*self), - ts_subtree_has_changes(*self), - ts_subtree_depends_on_column(*self), - ts_subtree_visible_descendant_count(*self), - ts_subtree_repeat_depth(*self), - ts_subtree_lookahead_bytes(*self) - ); - - if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) { - fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); - } - - fprintf(f, "\"]\n"); - - uint32_t child_start_offset = start_offset; - uint32_t child_info_offset = - language->max_alias_sequence_length * - ts_subtree_production_id(*self); - for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { - const Subtree *child = &ts_subtree_children(*self)[i]; - t_symbol subtree_alias_symbol = 0; - if (!ts_subtree_extra(*child) && child_info_offset) { - subtree_alias_symbol = language->alias_sequences[child_info_offset]; - child_info_offset++; - } - ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f); - fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); - child_start_offset += ts_subtree_total_bytes(*child); - } -} - -void ts_subtree_print_dot_graph(Subtree self, const t_language *language, FILE *f) { - fprintf(f, "digraph tree {\n"); - fprintf(f, "edge [arrowhead=none]\n"); - ts_subtree__print_dot_graph(&self, 0, language, 0, f); - fprintf(f, "}\n"); -} - -const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { - static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; - if ( - self.ptr && - !self.data.is_inline && - self.ptr->has_external_tokens && - self.ptr->child_count == 0 - ) { - return &self.ptr->external_scanner_state; - } else { - return &empty_state; - } -} - -bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) { - const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self); - const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other); - return ts_external_scanner_state_eq( - state_self, - ts_external_scanner_state_data(state_other), - state_other->length - ); -} diff --git a/parser/src/subtree.h b/parser/src/subtree.h deleted file mode 100644 index 4b218fa6..00000000 --- a/parser/src/subtree.h +++ /dev/null @@ -1,382 +0,0 @@ -#ifndef TREE_SITTER_SUBTREE_H_ -#define TREE_SITTER_SUBTREE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include "./length.h" -#include "./array.h" -#include "./error_costs.h" -#include "./host.h" -#include "./api.h" -#include "./parser.h" - -#define TS_TREE_STATE_NONE USHRT_MAX -#define NULL_SUBTREE ((Subtree) {.ptr = NULL}) - -// The serialized state of an external scanner. -// -// Every time an external token subtree is created after a call to an -// external scanner, the scanner's `serialize` function is called to -// retrieve a serialized copy of its state. The bytes are then copied -// onto the subtree itself so that the scanner's state can later be -// restored using its `deserialize` function. -// -// Small byte arrays are stored inline, and long ones are allocated -// separately on the heap. -typedef struct { - union { - char *long_data; - char short_data[24]; - }; - uint32_t length; -} ExternalScannerState; - -// A compact representation of a subtree. -// -// This representation is used for small leaf nodes that are not -// errors, and were not created by an external scanner. -// -// The idea behind the layout of this struct is that the `is_inline` -// bit will fall exactly into the same location as the least significant -// bit of the pointer in `Subtree` or `MutableSubtree`, respectively. -// Because of alignment, for any valid pointer this will be 0, giving -// us the opportunity to make use of this bit to signify whether to use -// the pointer or the inline struct. -typedef struct SubtreeInlineData SubtreeInlineData; - -#define SUBTREE_BITS \ - bool visible : 1; \ - bool named : 1; \ - bool extra : 1; \ - bool has_changes : 1; \ - bool is_missing : 1; \ - bool is_keyword : 1; - -#define SUBTREE_SIZE \ - uint8_t padding_columns; \ - uint8_t padding_rows : 4; \ - uint8_t lookahead_bytes : 4; \ - uint8_t padding_bytes; \ - uint8_t size_bytes; - -#if TS_BIG_ENDIAN -#if TS_PTR_SIZE == 32 - -struct SubtreeInlineData { - uint16_t parse_state; - uint8_t symbol; - SUBTREE_BITS - bool unused : 1; - bool is_inline : 1; - SUBTREE_SIZE -}; - -#else - -struct SubtreeInlineData { - SUBTREE_SIZE - uint16_t parse_state; - uint8_t symbol; - SUBTREE_BITS - bool unused : 1; - bool is_inline : 1; -}; - -#endif -#else - -struct SubtreeInlineData { - bool is_inline : 1; - SUBTREE_BITS - uint8_t symbol; - uint16_t parse_state; - SUBTREE_SIZE -}; - -#endif - -#undef SUBTREE_BITS -#undef SUBTREE_SIZE - -// A heap-allocated representation of a subtree. -// -// This representation is used for parent nodes, external tokens, -// errors, and other leaf nodes whose data is too large to fit into -// the inline representation. -typedef struct { - volatile uint32_t ref_count; - Length padding; - Length size; - uint32_t lookahead_bytes; - uint32_t error_cost; - uint32_t child_count; - t_symbol symbol; - t_state_id parse_state; - - bool visible : 1; - bool named : 1; - bool extra : 1; - bool fragile_left : 1; - bool fragile_right : 1; - bool has_changes : 1; - bool has_external_tokens : 1; - bool has_external_scanner_state_change : 1; - bool depends_on_column: 1; - bool is_missing : 1; - bool is_keyword : 1; - - union { - // Non-terminal subtrees (`child_count > 0`) - struct { - uint32_t visible_child_count; - uint32_t named_child_count; - uint32_t visible_descendant_count; - int32_t dynamic_precedence; - uint16_t repeat_depth; - uint16_t production_id; - struct { - t_symbol symbol; - t_state_id parse_state; - } first_leaf; - }; - - // External terminal subtrees (`child_count == 0 && has_external_tokens`) - ExternalScannerState external_scanner_state; - - // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`) - int32_t lookahead_char; - }; -} SubtreeHeapData; - -// The fundamental building block of a syntax tree. -typedef union { - SubtreeInlineData data; - const SubtreeHeapData *ptr; -} Subtree; - -// Like Subtree, but mutable. -typedef union { - SubtreeInlineData data; - SubtreeHeapData *ptr; -} MutableSubtree; - -typedef Array(Subtree) SubtreeArray; -typedef Array(MutableSubtree) MutableSubtreeArray; - -typedef struct { - MutableSubtreeArray free_trees; - MutableSubtreeArray tree_stack; -} SubtreePool; - -void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned); -const char *ts_external_scanner_state_data(const ExternalScannerState *); -bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned); -void ts_external_scanner_state_delete(ExternalScannerState *self); - -void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); -void ts_subtree_array_clear(SubtreePool *, SubtreeArray *); -void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); -void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); -void ts_subtree_array_reverse(SubtreeArray *); - -SubtreePool ts_subtree_pool_new(uint32_t capacity); -void ts_subtree_pool_delete(SubtreePool *); - -Subtree ts_subtree_new_leaf( - SubtreePool *, t_symbol, Length, Length, uint32_t, - t_state_id, bool, bool, bool, const t_language * -); -Subtree ts_subtree_new_error( - SubtreePool *, int32_t, Length, Length, uint32_t, t_state_id, const t_language * -); -MutableSubtree ts_subtree_new_node(t_symbol, SubtreeArray *, unsigned, const t_language *); -Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const t_language *); -Subtree ts_subtree_new_missing_leaf(SubtreePool *, t_symbol, Length, uint32_t, const t_language *); -MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); -void ts_subtree_retain(Subtree); -void ts_subtree_release(SubtreePool *, Subtree); -int ts_subtree_compare(Subtree, Subtree, SubtreePool *); -void ts_subtree_set_symbol(MutableSubtree *, t_symbol, const t_language *); -void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const t_language *); -void ts_subtree_summarize_children(MutableSubtree, const t_language *); -void ts_subtree_balance(Subtree, SubtreePool *, const t_language *); -Subtree ts_subtree_edit(Subtree, const t_input_edit *edit, SubtreePool *); -char *ts_subtree_string(Subtree, t_symbol, bool, const t_language *, bool include_all); -void ts_subtree_print_dot_graph(Subtree, const t_language *, FILE *); -Subtree ts_subtree_last_external_token(Subtree); -const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); -bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); - -#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name) - -static inline t_symbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } -static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); } -static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); } -static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); } -static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); } -static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); } -static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); } -static inline t_state_id ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); } -static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); } - -#undef SUBTREE_GET - -// Get the size needed to store a heap-allocated subtree with the given -// number of children. -static inline size_t ts_subtree_alloc_size(uint32_t child_count) { - return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); -} - -// Get a subtree's children, which are allocated immediately before the -// tree's own heap data. -#define ts_subtree_children(self) \ - ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) - -static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { - if (self->data.is_inline) { - self->data.extra = is_extra; - } else { - self->ptr->extra = is_extra; - } -} - -static inline t_symbol ts_subtree_leaf_symbol(Subtree self) { - if (self.data.is_inline) return self.data.symbol; - if (self.ptr->child_count == 0) return self.ptr->symbol; - return self.ptr->first_leaf.symbol; -} - -static inline t_state_id ts_subtree_leaf_parse_state(Subtree self) { - if (self.data.is_inline) return self.data.parse_state; - if (self.ptr->child_count == 0) return self.ptr->parse_state; - return self.ptr->first_leaf.parse_state; -} - -static inline Length ts_subtree_padding(Subtree self) { - if (self.data.is_inline) { - Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}; - return result; - } else { - return self.ptr->padding; - } -} - -static inline Length ts_subtree_size(Subtree self) { - if (self.data.is_inline) { - Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; - return result; - } else { - return self.ptr->size; - } -} - -static inline Length ts_subtree_total_size(Subtree self) { - return length_add(ts_subtree_padding(self), ts_subtree_size(self)); -} - -static inline uint32_t ts_subtree_total_bytes(Subtree self) { - return ts_subtree_total_size(self).bytes; -} - -static inline uint32_t ts_subtree_child_count(Subtree self) { - return self.data.is_inline ? 0 : self.ptr->child_count; -} - -static inline uint32_t ts_subtree_repeat_depth(Subtree self) { - return self.data.is_inline ? 0 : self.ptr->repeat_depth; -} - -static inline uint32_t ts_subtree_is_repetition(Subtree self) { - return self.data.is_inline - ? 0 - : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; -} - -static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) { - return (self.data.is_inline || self.ptr->child_count == 0) - ? 0 - : self.ptr->visible_descendant_count; -} - -static inline uint32_t ts_subtree_visible_child_count(Subtree self) { - if (ts_subtree_child_count(self) > 0) { - return self.ptr->visible_child_count; - } else { - return 0; - } -} - -static inline uint32_t ts_subtree_error_cost(Subtree self) { - if (ts_subtree_missing(self)) { - return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; - } else { - return self.data.is_inline ? 0 : self.ptr->error_cost; - } -} - -static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { - return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; -} - -static inline uint16_t ts_subtree_production_id(Subtree self) { - if (ts_subtree_child_count(self) > 0) { - return self.ptr->production_id; - } else { - return 0; - } -} - -static inline bool ts_subtree_fragile_left(Subtree self) { - return self.data.is_inline ? false : self.ptr->fragile_left; -} - -static inline bool ts_subtree_fragile_right(Subtree self) { - return self.data.is_inline ? false : self.ptr->fragile_right; -} - -static inline bool ts_subtree_has_external_tokens(Subtree self) { - return self.data.is_inline ? false : self.ptr->has_external_tokens; -} - -static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) { - return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change; -} - -static inline bool ts_subtree_depends_on_column(Subtree self) { - return self.data.is_inline ? false : self.ptr->depends_on_column; -} - -static inline bool ts_subtree_is_fragile(Subtree self) { - return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); -} - -static inline bool ts_subtree_is_error(Subtree self) { - return ts_subtree_symbol(self) == ts_builtin_sym_error; -} - -static inline bool ts_subtree_is_eof(Subtree self) { - return ts_subtree_symbol(self) == ts_builtin_sym_end; -} - -static inline Subtree ts_subtree_from_mut(MutableSubtree self) { - Subtree result; - result.data = self.data; - return result; -} - -static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) { - MutableSubtree result; - result.data = self.data; - return result; -} - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_SUBTREE_H_ diff --git a/parser/src/tree.c b/parser/src/tree.c deleted file mode 100644 index fd25e53f..00000000 --- a/parser/src/tree.c +++ /dev/null @@ -1,165 +0,0 @@ -#define _POSIX_C_SOURCE 200112L - -#include "./api.h" -#include "./array.h" -#include "./get_changed_ranges.h" -#include "./length.h" -#include "./subtree.h" -#include "./tree_cursor.h" -#include "./tree.h" - -t_tree *ts_tree_new( - Subtree root, const t_language *language, - const t_range *included_ranges, unsigned included_range_count -) { - t_tree *result = ts_malloc(sizeof(t_tree)); - result->root = root; - result->language = ts_language_copy(language); - result->included_ranges = ts_calloc(included_range_count, sizeof(t_range)); - memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(t_range)); - result->included_range_count = included_range_count; - return result; -} - -t_tree *ts_tree_copy(const t_tree *self) { - ts_subtree_retain(self->root); - return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); -} - -void ts_tree_delete(t_tree *self) { - if (!self) return; - - SubtreePool pool = ts_subtree_pool_new(0); - ts_subtree_release(&pool, self->root); - ts_subtree_pool_delete(&pool); - ts_language_delete(self->language); - ts_free(self->included_ranges); - ts_free(self); -} - -t_parse_node ts_tree_root_node(const t_tree *self) { - return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); -} - -t_parse_node ts_tree_root_node_with_offset( - const t_tree *self, - uint32_t offset_bytes, - t_point offset_extent -) { - Length offset = {offset_bytes, offset_extent}; - return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); -} - -const t_language *ts_tree_language(const t_tree *self) { - return self->language; -} - -void ts_tree_edit(t_tree *self, const t_input_edit *edit) { - for (unsigned i = 0; i < self->included_range_count; i++) { - t_range *range = &self->included_ranges[i]; - if (range->end_byte >= edit->old_end_byte) { - if (range->end_byte != UINT32_MAX) { - range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); - range->end_point = point_add( - edit->new_end_point, - point_sub(range->end_point, edit->old_end_point) - ); - if (range->end_byte < edit->new_end_byte) { - range->end_byte = UINT32_MAX; - range->end_point = POINT_MAX; - } - } - } else if (range->end_byte > edit->start_byte) { - range->end_byte = edit->start_byte; - range->end_point = edit->start_point; - } - if (range->start_byte >= edit->old_end_byte) { - range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); - range->start_point = point_add( - edit->new_end_point, - point_sub(range->start_point, edit->old_end_point) - ); - if (range->start_byte < edit->new_end_byte) { - range->start_byte = UINT32_MAX; - range->start_point = POINT_MAX; - } - } else if (range->start_byte > edit->start_byte) { - range->start_byte = edit->start_byte; - range->start_point = edit->start_point; - } - } - - SubtreePool pool = ts_subtree_pool_new(0); - self->root = ts_subtree_edit(self->root, edit, &pool); - ts_subtree_pool_delete(&pool); -} - -t_range *ts_tree_included_ranges(const t_tree *self, uint32_t *length) { - *length = self->included_range_count; - t_range *ranges = ts_calloc(self->included_range_count, sizeof(t_range)); - memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(t_range)); - return ranges; -} - -t_range *ts_tree_get_changed_ranges(const t_tree *old_tree, const t_tree *new_tree, uint32_t *length) { - TreeCursor cursor1 = {NULL, array_new(), 0}; - TreeCursor cursor2 = {NULL, array_new(), 0}; - ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); - ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); - - TSRangeArray included_range_differences = array_new(); - ts_range_array_get_changed_ranges( - old_tree->included_ranges, old_tree->included_range_count, - new_tree->included_ranges, new_tree->included_range_count, - &included_range_differences - ); - - t_range *result; - *length = ts_subtree_get_changed_ranges( - &old_tree->root, &new_tree->root, &cursor1, &cursor2, - old_tree->language, &included_range_differences, &result - ); - - array_delete(&included_range_differences); - array_delete(&cursor1.stack); - array_delete(&cursor2.stack); - return result; -} - -#ifdef _WIN32 - -#include -#include - -int _ts_dup(HANDLE handle) { - HANDLE dup_handle; - if (!DuplicateHandle( - GetCurrentProcess(), handle, - GetCurrentProcess(), &dup_handle, - 0, FALSE, DUPLICATE_SAME_ACCESS - )) return -1; - - return _open_osfhandle((intptr_t)dup_handle, 0); -} - -void ts_tree_print_dot_graph(const TSTree *self, int fd) { - FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); - ts_subtree_print_dot_graph(self->root, self->language, file); - fclose(file); -} - -#else - -#include - -int _ts_dup(int file_descriptor) { - return dup(file_descriptor); -} - -void ts_tree_print_dot_graph(const t_tree *self, int file_descriptor) { - FILE *file = fdopen(_ts_dup(file_descriptor), "a"); - ts_subtree_print_dot_graph(self->root, self->language, file); - fclose(file); -} - -#endif diff --git a/parser/src/tree.h b/parser/src/tree.h deleted file mode 100644 index 9488a40b..00000000 --- a/parser/src/tree.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef TREE_SITTER_TREE_H_ -#define TREE_SITTER_TREE_H_ - -#include "./subtree.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - const Subtree *child; - const Subtree *parent; - Length position; - t_symbol alias_symbol; -} ParentCacheEntry; - -struct t_tree { - Subtree root; - const t_language *language; - t_range *included_ranges; - unsigned included_range_count; -}; - -t_tree *ts_tree_new(Subtree root, const t_language *language, const t_range *, unsigned); -t_parse_node ts_node_new(const t_tree *, const Subtree *, Length, t_symbol); - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_TREE_H_ diff --git a/parser/src/tree_cursor.c b/parser/src/tree_cursor.c deleted file mode 100644 index 7c36317b..00000000 --- a/parser/src/tree_cursor.c +++ /dev/null @@ -1,714 +0,0 @@ -#include "./api.h" -#include "./alloc.h" -#include "./tree_cursor.h" -#include "./language.h" -#include "./tree.h" - -typedef struct { - Subtree parent; - const t_tree *tree; - Length position; - uint32_t child_index; - uint32_t structural_child_index; - uint32_t descendant_index; - const t_symbol *alias_sequence; -} CursorChildIterator; - -// CursorChildIterator - -static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) { - TreeCursorEntry *entry = &self->stack.contents[index]; - if (index == 0 || ts_subtree_visible(*entry->subtree)) { - return true; - } else if (!ts_subtree_extra(*entry->subtree)) { - TreeCursorEntry *parent_entry = &self->stack.contents[index - 1]; - return ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - ); - } else { - return false; - } -} - -static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { - TreeCursorEntry *last_entry = array_back(&self->stack); - if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; - } - const t_symbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - last_entry->subtree->ptr->production_id - ); - - uint32_t descendant_index = last_entry->descendant_index; - if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { - descendant_index += 1; - } - - return (CursorChildIterator) { - .tree = self->tree, - .parent = *last_entry->subtree, - .position = last_entry->position, - .child_index = 0, - .structural_child_index = 0, - .descendant_index = descendant_index, - .alias_sequence = alias_sequence, - }; -} - -static inline bool ts_tree_cursor_child_iterator_next( - CursorChildIterator *self, - TreeCursorEntry *result, - bool *visible -) { - if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; - const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - *result = (TreeCursorEntry) { - .subtree = child, - .position = self->position, - .child_index = self->child_index, - .structural_child_index = self->structural_child_index, - .descendant_index = self->descendant_index, - }; - *visible = ts_subtree_visible(*child); - bool extra = ts_subtree_extra(*child); - if (!extra) { - if (self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; - } - self->structural_child_index++; - } - - self->descendant_index += ts_subtree_visible_descendant_count(*child); - if (*visible) { - self->descendant_index += 1; - } - - self->position = length_add(self->position, ts_subtree_size(*child)); - self->child_index++; - - if (self->child_index < self->parent.ptr->child_count) { - Subtree next_child = ts_subtree_children(self->parent)[self->child_index]; - self->position = length_add(self->position, ts_subtree_padding(next_child)); - } - - return true; -} - -// Return a position that, when `b` is added to it, yields `a`. This -// can only be computed if `b` has zero rows. Otherwise, this function -// returns `LENGTH_UNDEFINED`, and the caller needs to recompute -// the position some other way. -static inline Length length_backtrack(Length a, Length b) { - if (length_is_undefined(a) || b.extent.row != 0) { - return LENGTH_UNDEFINED; - } - - Length result; - result.bytes = a.bytes - b.bytes; - result.extent.row = a.extent.row; - result.extent.column = a.extent.column - b.extent.column; - return result; -} - -static inline bool ts_tree_cursor_child_iterator_previous( - CursorChildIterator *self, - TreeCursorEntry *result, - bool *visible -) { - // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into - // account unsigned underflow - if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; - const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - *result = (TreeCursorEntry) { - .subtree = child, - .position = self->position, - .child_index = self->child_index, - .structural_child_index = self->structural_child_index, - }; - *visible = ts_subtree_visible(*child); - bool extra = ts_subtree_extra(*child); - if (!extra && self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; - self->structural_child_index--; - } - - self->position = length_backtrack(self->position, ts_subtree_padding(*child)); - self->child_index--; - - // unsigned can underflow so compare it to child_count - if (self->child_index < self->parent.ptr->child_count) { - Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; - Length size = ts_subtree_size(previous_child); - self->position = length_backtrack(self->position, size); - } - - return true; -} - -// TSTreeCursor - lifecycle - -t_tree_cursor ts_tree_cursor_new(t_parse_node node) { - t_tree_cursor self = {NULL, NULL, {0, 0, 0}}; - ts_tree_cursor_init((TreeCursor *)&self, node); - return self; -} - -void ts_tree_cursor_reset(t_tree_cursor *_self, t_parse_node node) { - ts_tree_cursor_init((TreeCursor *)_self, node); -} - -void ts_tree_cursor_init(TreeCursor *self, t_parse_node node) { - self->tree = node.tree; - self->root_alias_symbol = node.context[3]; - array_clear(&self->stack); - array_push(&self->stack, ((TreeCursorEntry) { - .subtree = (const Subtree *)node.id, - .position = { - ts_node_start_byte(node), - ts_node_start_point(node) - }, - .child_index = 0, - .structural_child_index = 0, - .descendant_index = 0, - })); -} - -void ts_tree_cursor_delete(t_tree_cursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - array_delete(&self->stack); -} - -// TSTreeCursor - walking the tree - -TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_tree_cursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (visible) { - array_push(&self->stack, entry); - return TreeCursorStepVisible; - } - if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - array_push(&self->stack, entry); - return TreeCursorStepHidden; - } - } - return TreeCursorStepNone; -} - -bool ts_tree_cursor_goto_first_child(t_tree_cursor *self) { - for (;;) { - switch (ts_tree_cursor_goto_first_child_internal(self)) { - case TreeCursorStepHidden: - continue; - case TreeCursorStepVisible: - return true; - default: - return false; - } - } - return false; -} - -TreeCursorStep ts_tree_cursor_goto_last_child_internal(t_tree_cursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone; - - TreeCursorEntry last_entry = {0}; - TreeCursorStep last_step = TreeCursorStepNone; - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (visible) { - last_entry = entry; - last_step = TreeCursorStepVisible; - } - else if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - last_entry = entry; - last_step = TreeCursorStepHidden; - } - } - if (last_entry.subtree) { - array_push(&self->stack, last_entry); - return last_step; - } - - return TreeCursorStepNone; -} - -bool ts_tree_cursor_goto_last_child(t_tree_cursor *self) { - for (;;) { - switch (ts_tree_cursor_goto_last_child_internal(self)) { - case TreeCursorStepHidden: - continue; - case TreeCursorStepVisible: - return true; - default: - return false; - } - } - return false; -} - -static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( - t_tree_cursor *_self, - uint32_t goal_byte, - t_point goal_point -) { - TreeCursor *self = (TreeCursor *)_self; - uint32_t initial_size = self->stack.size; - uint32_t visible_child_index = 0; - - bool did_descend; - do { - did_descend = false; - - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); - bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); - uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); - if (at_goal) { - if (visible) { - array_push(&self->stack, entry); - return visible_child_index; - } - if (visible_child_count > 0) { - array_push(&self->stack, entry); - did_descend = true; - break; - } - } else if (visible) { - visible_child_index++; - } else { - visible_child_index += visible_child_count; - } - } - } while (did_descend); - - self->stack.size = initial_size; - return -1; -} - -int64_t ts_tree_cursor_goto_first_child_for_byte(t_tree_cursor *self, uint32_t goal_byte) { - return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); -} - -int64_t ts_tree_cursor_goto_first_child_for_point(t_tree_cursor *self, t_point goal_point) { - return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); -} - -TreeCursorStep ts_tree_cursor_goto_sibling_internal( - t_tree_cursor *_self, - bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { - TreeCursor *self = (TreeCursor *)_self; - uint32_t initial_size = self->stack.size; - - while (self->stack.size > 1) { - TreeCursorEntry entry = array_pop(&self->stack); - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - iterator.child_index = entry.child_index; - iterator.structural_child_index = entry.structural_child_index; - iterator.position = entry.position; - iterator.descendant_index = entry.descendant_index; - - bool visible = false; - advance(&iterator, &entry, &visible); - if (visible && self->stack.size + 1 < initial_size) break; - - while (advance(&iterator, &entry, &visible)) { - if (visible) { - array_push(&self->stack, entry); - return TreeCursorStepVisible; - } - - if (ts_subtree_visible_child_count(*entry.subtree)) { - array_push(&self->stack, entry); - return TreeCursorStepHidden; - } - } - } - - self->stack.size = initial_size; - return TreeCursorStepNone; -} - -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_tree_cursor *_self) { - return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); -} - -bool ts_tree_cursor_goto_next_sibling(t_tree_cursor *self) { - switch (ts_tree_cursor_goto_next_sibling_internal(self)) { - case TreeCursorStepHidden: - ts_tree_cursor_goto_first_child(self); - return true; - case TreeCursorStepVisible: - return true; - default: - return false; - } -} - -TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_tree_cursor *_self) { - // since subtracting across row loses column information, we may have to - // restore it - TreeCursor *self = (TreeCursor *)_self; - - // for that, save current position before traversing - TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( - _self, ts_tree_cursor_child_iterator_previous); - if (step == TreeCursorStepNone) - return step; - - // if length is already valid, there's no need to recompute it - if (!length_is_undefined(array_back(&self->stack)->position)) - return step; - - // restore position from the parent node - const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; - Length position = parent->position; - uint32_t child_index = array_back(&self->stack)->child_index; - const Subtree *children = ts_subtree_children((*(parent->subtree))); - - if (child_index > 0) { - // skip first child padding since its position should match the position of the parent - position = length_add(position, ts_subtree_size(children[0])); - for (uint32_t i = 1; i < child_index; ++i) { - position = length_add(position, ts_subtree_total_size(children[i])); - } - position = length_add(position, ts_subtree_padding(children[child_index])); - } - - array_back(&self->stack)->position = position; - - return step; -} - -bool ts_tree_cursor_goto_previous_sibling(t_tree_cursor *self) { - switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { - case TreeCursorStepHidden: - ts_tree_cursor_goto_last_child(self); - return true; - case TreeCursorStepVisible: - return true; - default: - return false; - } -} - -bool ts_tree_cursor_goto_parent(t_tree_cursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { - if (ts_tree_cursor_is_entry_visible(self, i)) { - self->stack.size = i + 1; - return true; - } - } - return false; -} - -void ts_tree_cursor_goto_descendant( - t_tree_cursor *_self, - uint32_t goal_descendant_index -) { - TreeCursor *self = (TreeCursor *)_self; - - // Ascend to the lowest ancestor that contains the goal node. - for (;;) { - uint32_t i = self->stack.size - 1; - TreeCursorEntry *entry = &self->stack.contents[i]; - uint32_t next_descendant_index = - entry->descendant_index + - (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + - ts_subtree_visible_descendant_count(*entry->subtree); - if ( - (entry->descendant_index <= goal_descendant_index) && - (next_descendant_index > goal_descendant_index) - ) { - break; - } else if (self->stack.size <= 1) { - return; - } else { - self->stack.size--; - } - } - - // Descend to the goal node. - bool did_descend = true; - do { - did_descend = false; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - if (iterator.descendant_index > goal_descendant_index) { - return; - } - - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (iterator.descendant_index > goal_descendant_index) { - array_push(&self->stack, entry); - if (visible && entry.descendant_index == goal_descendant_index) { - return; - } else { - did_descend = true; - break; - } - } - } - } while (did_descend); -} - -uint32_t ts_tree_cursor_current_descendant_index(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *last_entry = array_back(&self->stack); - return last_entry->descendant_index; -} - -t_parse_node ts_tree_cursor_current_node(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *last_entry = array_back(&self->stack); - t_symbol alias_symbol = self->root_alias_symbol; - if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { - TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; - alias_symbol = ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - last_entry->structural_child_index - ); - } - return ts_node_new( - self->tree, - last_entry->subtree, - last_entry->position, - alias_symbol - ); -} - -// Private - Get various facts about the current node that are needed -// when executing tree queries. -void ts_tree_cursor_current_status( - const t_tree_cursor *_self, - t_field_id *field_id, - bool *has_later_siblings, - bool *has_later_named_siblings, - bool *can_have_later_siblings_with_this_field, - t_symbol *supertypes, - unsigned *supertype_count -) { - const TreeCursor *self = (const TreeCursor *)_self; - unsigned max_supertypes = *supertype_count; - *field_id = 0; - *supertype_count = 0; - *has_later_siblings = false; - *has_later_named_siblings = false; - *can_have_later_siblings_with_this_field = false; - - // Walk up the tree, visiting the current node and its invisible ancestors, - // because fields can refer to nodes through invisible *wrapper* nodes, - for (unsigned i = self->stack.size - 1; i > 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - - const t_symbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->production_id - ); - - #define subtree_symbol(subtree, structural_child_index) \ - (( \ - !ts_subtree_extra(subtree) && \ - alias_sequence && \ - alias_sequence[structural_child_index] \ - ) ? \ - alias_sequence[structural_child_index] : \ - ts_subtree_symbol(subtree)) - - // Stop walking up when a visible ancestor is found. - t_symbol entry_symbol = subtree_symbol( - *entry->subtree, - entry->structural_child_index - ); - TSSymbolMetadata entry_metadata = ts_language_symbol_metadata( - self->tree->language, - entry_symbol - ); - if (i != self->stack.size - 1 && entry_metadata.visible) break; - - // Record any supertypes - if (entry_metadata.supertype && *supertype_count < max_supertypes) { - supertypes[*supertype_count] = entry_symbol; - (*supertype_count)++; - } - - // Determine if the current node has later siblings. - if (!*has_later_siblings) { - unsigned sibling_count = parent_entry->subtree->ptr->child_count; - unsigned structural_child_index = entry->structural_child_index; - if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; - for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { - Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; - TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( - self->tree->language, - subtree_symbol(sibling, structural_child_index) - ); - if (sibling_metadata.visible) { - *has_later_siblings = true; - if (*has_later_named_siblings) break; - if (sibling_metadata.named) { - *has_later_named_siblings = true; - break; - } - } else if (ts_subtree_visible_child_count(sibling) > 0) { - *has_later_siblings = true; - if (*has_later_named_siblings) break; - if (sibling.ptr->named_child_count > 0) { - *has_later_named_siblings = true; - break; - } - } - if (!ts_subtree_extra(sibling)) structural_child_index++; - } - } - - #undef subtree_symbol - - if (!ts_subtree_extra(*entry->subtree)) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self->tree->language, - parent_entry->subtree->ptr->production_id, - &field_map, &field_map_end - ); - - // Look for a field name associated with the current node. - if (!*field_id) { - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if (!map->inherited && map->child_index == entry->structural_child_index) { - *field_id = map->field_id; - break; - } - } - } - - // Determine if the current node can have later siblings with the same field name. - if (*field_id) { - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if ( - map->field_id == *field_id && - map->child_index > entry->structural_child_index - ) { - *can_have_later_siblings_with_this_field = true; - break; - } - } - } - } - } -} - -uint32_t ts_tree_cursor_current_depth(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - uint32_t depth = 0; - for (unsigned i = 1; i < self->stack.size; i++) { - if (ts_tree_cursor_is_entry_visible(self, i)) { - depth++; - } - } - return depth; -} - -t_parse_node ts_tree_cursor_parent_node(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - for (int i = (int)self->stack.size - 2; i >= 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - bool is_visible = true; - t_symbol alias_symbol = 0; - if (i > 0) { - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - alias_symbol = ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - ); - is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree); - } - if (is_visible) { - return ts_node_new( - self->tree, - entry->subtree, - entry->position, - alias_symbol - ); - } - } - return ts_node_new(NULL, NULL, length_zero(), 0); -} - -t_field_id ts_tree_cursor_current_field_id(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - - // Walk up the tree, visiting the current node and its invisible ancestors. - for (unsigned i = self->stack.size - 1; i > 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - - // Stop walking up when another visible node is found. - if ( - i != self->stack.size - 1 && - ts_tree_cursor_is_entry_visible(self, i) - ) break; - - if (ts_subtree_extra(*entry->subtree)) break; - - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self->tree->language, - parent_entry->subtree->ptr->production_id, - &field_map, &field_map_end - ); - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if (!map->inherited && map->child_index == entry->structural_child_index) { - return map->field_id; - } - } - } - return 0; -} - -const char *ts_tree_cursor_current_field_name(const t_tree_cursor *_self) { - t_field_id id = ts_tree_cursor_current_field_id(_self); - if (id) { - const TreeCursor *self = (const TreeCursor *)_self; - return self->tree->language->field_names[id]; - } else { - return NULL; - } -} - -t_tree_cursor ts_tree_cursor_copy(const t_tree_cursor *_cursor) { - const TreeCursor *cursor = (const TreeCursor *)_cursor; - t_tree_cursor res = {NULL, NULL, {0, 0}}; - TreeCursor *copy = (TreeCursor *)&res; - copy->tree = cursor->tree; - copy->root_alias_symbol = cursor->root_alias_symbol; - array_init(©->stack); - array_push_all(©->stack, &cursor->stack); - return res; -} - -void ts_tree_cursor_reset_to(t_tree_cursor *_dst, const t_tree_cursor *_src) { - const TreeCursor *cursor = (const TreeCursor *)_src; - TreeCursor *copy = (TreeCursor *)_dst; - copy->tree = cursor->tree; - copy->root_alias_symbol = cursor->root_alias_symbol; - array_clear(©->stack); - array_push_all(©->stack, &cursor->stack); -} diff --git a/parser/src/tree_cursor.h b/parser/src/tree_cursor.h deleted file mode 100644 index 39edd0e0..00000000 --- a/parser/src/tree_cursor.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef TREE_SITTER_TREE_CURSOR_H_ -#define TREE_SITTER_TREE_CURSOR_H_ - -#include "./subtree.h" - -typedef struct { - const Subtree *subtree; - Length position; - uint32_t child_index; - uint32_t structural_child_index; - uint32_t descendant_index; -} TreeCursorEntry; - -typedef struct { - const t_tree *tree; - Array(TreeCursorEntry) stack; - t_symbol root_alias_symbol; -} TreeCursor; - -typedef enum { - TreeCursorStepNone, - TreeCursorStepHidden, - TreeCursorStepVisible, -} TreeCursorStep; - -void ts_tree_cursor_init(TreeCursor *, t_parse_node); -void ts_tree_cursor_current_status( - const t_tree_cursor *, - t_field_id *, - bool *, - bool *, - bool *, - t_symbol *, - unsigned * -); - -TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_tree_cursor *); -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_tree_cursor *); - -static inline Subtree ts_tree_cursor_current_subtree(const t_tree_cursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *last_entry = array_back(&self->stack); - return *last_entry->subtree; -} - -t_parse_node ts_tree_cursor_parent_node(const t_tree_cursor *); - -#endif // TREE_SITTER_TREE_CURSOR_H_ diff --git a/parser/src/unicode.h b/parser/src/unicode.h deleted file mode 100644 index 0fba56a6..00000000 --- a/parser/src/unicode.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef TREE_SITTER_UNICODE_H_ -#define TREE_SITTER_UNICODE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#define U_EXPORT -#define U_EXPORT2 -#include "unicode/utf8.h" -#include "unicode/utf16.h" - -static const int32_t TS_DECODE_ERROR = U_SENTINEL; - -// These functions read one unicode code point from the given string, -// returning the number of bytes consumed. -typedef uint32_t (*UnicodeDecodeFunction)( - const uint8_t *string, - uint32_t length, - int32_t *code_point -); - -static inline uint32_t ts_decode_utf8( - const uint8_t *string, - uint32_t length, - int32_t *code_point -) { - uint32_t i = 0; - U8_NEXT(string, i, length, *code_point); - return i; -} - -static inline uint32_t ts_decode_utf16( - const uint8_t *string, - uint32_t length, - int32_t *code_point -) { - uint32_t i = 0; - U16_NEXT(((uint16_t *)string), i, length, *code_point); - return i * 2; -} - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_UNICODE_H_ diff --git a/sources/main.c b/sources/main.c index 227ce0f2..d87f3d6a 100644 --- a/sources/main.c +++ b/sources/main.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */ -/* Updated: 2024/04/30 22:05:49 by maiboyer ### ########.fr */ +/* Updated: 2024/05/01 10:36:58 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -129,6 +129,7 @@ t_i32 main(t_i32 argc, t_str argv[], t_str envp[]) (void)argc; (void)argv; (void)envp; + utils = (t_utils){}; utils.parser = create_myparser(); //ft_find_path(arge, &utils); utils.name_shell = "42sh > "; From 509b551ce436f99665c3703d2a3f4f8185238285 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Wed, 1 May 2024 17:26:08 +0200 Subject: [PATCH 13/14] Cleanup --- includes/app/node.h | 2 +- includes/minishell.h | 8 +- parser/includes/alloc.h | 54 -- parser/includes/api.h | 1190 +------------------------------ parser/includes/array.h | 287 -------- parser/includes/error_costs.h | 23 - parser/includes/lexer.h | 57 -- parser/includes/parser.h | 12 - parser/includes/parser_length.h | 64 -- parser/includes/point.h | 21 - parser/includes/point/inline1.h | 50 -- parser/includes/point/inline2.h | 49 -- parser/includes/point/inline3.h | 29 - parser/includes/reduce_action.h | 36 - sources/ft_exit.c | 2 + sources/main.c | 30 +- sources/node/node.c | 14 +- 17 files changed, 37 insertions(+), 1891 deletions(-) delete mode 100644 parser/includes/alloc.h delete mode 100644 parser/includes/array.h delete mode 100644 parser/includes/error_costs.h delete mode 100644 parser/includes/lexer.h delete mode 100644 parser/includes/parser.h delete mode 100644 parser/includes/parser_length.h delete mode 100644 parser/includes/point.h delete mode 100644 parser/includes/point/inline1.h delete mode 100644 parser/includes/point/inline2.h delete mode 100644 parser/includes/point/inline3.h delete mode 100644 parser/includes/reduce_action.h diff --git a/includes/app/node.h b/includes/app/node.h index 7d884d6a..2cf7efd3 100644 --- a/includes/app/node.h +++ b/includes/app/node.h @@ -28,7 +28,7 @@ typedef struct s_node struct s_node *childs; } t_node; -t_node build_node(t_parse_node curr, t_const_str input); +t_node build_node(TSNode curr, t_const_str input); t_str node_getstr(t_node *node); void free_node(t_node t); diff --git a/includes/minishell.h b/includes/minishell.h index 80e87435..c35111dd 100644 --- a/includes/minishell.h +++ b/includes/minishell.h @@ -30,10 +30,10 @@ # define PATH_FILES "/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games" -typedef struct s_myparser +typedef struct s_parser { - t_parser *parser; -} t_myparser; + TSParser *parser; +} t_parser; typedef struct s_utils { @@ -42,7 +42,7 @@ typedef struct s_utils t_str *strs_input; t_str *path; t_str *envp; - t_myparser parser; + t_parser parser; t_node current_node; } t_utils; diff --git a/parser/includes/alloc.h b/parser/includes/alloc.h deleted file mode 100644 index 07d93a77..00000000 --- a/parser/includes/alloc.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef TREE_SITTER_ALLOC_H_ -#define TREE_SITTER_ALLOC_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -// Allow clients to override allocation functions -#ifdef TREE_SITTER_REUSE_ALLOCATOR - -extern void *(*ts_current_malloc)(size_t); -extern void *(*ts_current_calloc)(size_t, size_t); -extern void *(*ts_current_realloc)(void *, size_t); -extern void (*ts_current_free)(void *); - -#ifndef malloc -#define malloc ts_current_malloc -#endif -#ifndef calloc -#define calloc ts_current_calloc -#endif -#ifndef realloc -#define realloc ts_current_realloc -#endif -#ifndef free -#define free ts_current_free -#endif - -#else - -#ifndef malloc -#define malloc malloc -#endif -#ifndef calloc -#define calloc calloc -#endif -#ifndef realloc -#define realloc realloc -#endif -#ifndef free -#define free free -#endif - -#endif - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_ALLOC_H_ diff --git a/parser/includes/api.h b/parser/includes/api.h index 712c57fb..516cf65a 100644 --- a/parser/includes/api.h +++ b/parser/includes/api.h @@ -1,1189 +1 @@ -#ifndef TREE_SITTER_API_H_ -#define TREE_SITTER_API_H_ - -#include -#include -#include - -#include "me/vec/vec_parser_range.h" -#include "parser/types/types_language.h" -#include "parser/types/types_state_id.h" - -/****************************/ -/* Section - ABI Versioning */ -/****************************/ - -/** - * The latest ABI version that is supported by the current version of the - * library. When Languages are generated by the Tree-sitter CLI, they are - * assigned an ABI version number that corresponds to the current CLI version. - * The Tree-sitter library is generally backwards-compatible with languages - * generated using older CLI versions, but is not forwards-compatible. - */ -#define TREE_SITTER_LANGUAGE_VERSION 14 - -/** - * The earliest ABI version that is supported by the current version of the - * library. - */ -#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13 - -/*******************/ -/* Section - Types */ -/*******************/ - -typedef struct s_parser t_parser; -typedef struct s_parse_tree t_parse_tree; -typedef struct s_query t_query; -typedef struct s_query_cursor t_query_cursor; -typedef struct s_lookahead_iterator t_lookahead_iterator; - -typedef enum t_input_encoding -{ - InputEncoding8, - InputEncoding16, -} t_input_encoding; - -typedef enum t_symbol_type -{ - SymbolTypeRegular, - SymbolTypeAnonymous, - SymbolTypeAuxiliary, -} t_symbol_type; - -typedef struct t_parse_input -{ - void *payload; - const char *(*read)(void *payload, t_u32 byte_index, t_point position, - t_u32 *bytes_read); - t_input_encoding encoding; -} t_parse_input; - -typedef enum t_parse_log_type -{ - LogTypeParse, - LogTypeLex, -} t_parse_log_type; - -typedef struct t_parse_logger -{ - void *payload; - void (*log)(void *payload, t_parse_log_type log_type, const char *buffer); -} t_parse_logger; - -typedef struct t_input_edit -{ - t_u32 start_byte; - t_u32 old_end_byte; - t_u32 new_end_byte; - t_point start_point; - t_point old_end_point; - t_point new_end_point; -} t_input_edit; - -typedef struct t_parse_node -{ - t_u32 context[4]; - const void *id; - const t_parse_tree *tree; -} t_parse_node; - -typedef struct t_parse_tree_cursor -{ - const void *tree; - const void *id; - t_u32 context[3]; -} t_parse_tree_cursor; - -typedef struct t_queryCapture -{ - t_parse_node node; - t_u32 index; -} t_queryCapture; - -typedef enum t_parse_quantifier -{ - ParseQuantifierZero = 0, // must match the array initialization value - ParseQuantifierZeroOrOne, - ParseQuantifierZeroOrMore, - ParseQuantifierOne, - ParseQuantifierOneOrMore, -} t_parse_quantifier; - -typedef struct t_query_match -{ - t_u32 id; - t_u16 pattern_index; - t_u16 capture_count; - const t_queryCapture *captures; -} t_query_match; - -typedef enum t_queryPredicateStepType -{ - t_queryPredicateStepTypeDone, - t_queryPredicateStepTypeCapture, - t_queryPredicateStepTypeString, -} t_queryPredicateStepType; - -typedef struct t_queryPredicateStep -{ - t_queryPredicateStepType type; - t_u32 value_id; -} t_queryPredicateStep; - -typedef enum t_queryError -{ - t_queryErrorNone = 0, - t_queryErrorSyntax, - t_queryErrorNodeType, - t_queryErrorField, - t_queryErrorCapture, - t_queryErrorStructure, - t_queryErrorLanguage, -} t_queryError; - -/********************/ -/* Section - Parser */ -/********************/ - -/** - * Create a new parser. - */ -t_parser *ts_parser_new(void); - -/** - * Delete the parser, freeing all of the memory that it used. - */ -void ts_parser_delete(t_parser *self); - -/** - * Get the parser's current language. - */ -const t_language *ts_parser_language(const t_parser *self); - -/** - * Set the language that the parser should use for parsing. - * - * Returns a boolean indicating whether or not the language was successfully - * assigned. True means assignment succeeded. False means there was a - * version mismatch: the language was generated with an incompatible version - * of the Tree-sitter CLI. Check the language's version using - * [`ts_language_version`] and compare it to this library's - * [`TREE_SITTER_LANGUAGE_VERSION`] and - * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. - */ -bool ts_parser_set_language(t_parser *self, const t_language *language); - -/** - * Set the ranges of text that the parser should include when parsing. - * - * By default, the parser will always include entire documents. This - * function allows you to parse only a *portion* of a document but still - * return a syntax tree whose ranges match up with the document as a whole. - * You can also pass multiple disjoint ranges. - * - * The second and third parameters specify the location and length of an - * array of ranges. The parser does *not* take ownership of these ranges; it - * copies the data, so it doesn't matter how these ranges are allocated. - * - * If `count` is zero, then the entire document will be parsed. Otherwise, - * the given ranges must be ordered from earliest to latest in the document, - * and they must not overlap. That is, the following must hold for all: - * - * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte` - * - * If this requirement is not satisfied, the operation will fail, the ranges - * will not be assigned, and this function will return `false`. On success, - * this function returns `true` - */ -bool ts_parser_set_included_ranges(t_parser *self, const t_parser_range *ranges, - t_u32 count); - -/** - * Get the ranges of text that the parser will include when parsing. - * - * The returned pointer is owned by the parser. The caller should not free - * it or write to it. The length of the array will be written to the given - * `count` pointer. - */ -const t_parser_range *ts_parser_included_ranges(const t_parser *self, - t_u32 *count); - -/** - * Use the parser to parse some source code and create a syntax tree. - * - * If you are parsing this document for the first time, pass `NULL` for the - * `old_tree` parameter. Otherwise, if you have already parsed an earlier - * version of this document and the document has since been edited, pass the - * previous syntax tree so that the unchanged parts of it can be reused. - * This will save time and memory. For this to work correctly, you must have - * already edited the old syntax tree using the [`ts_tree_edit`] function in - a - * way that exactly matches the source code changes. - * - * The [`t_parse_input`] parameter lets you specify how to read the text. It has - the - * following three fields: - * 1. [`read`]: A function to retrieve a chunk of text at a given byte - offset - * and (row, column) position. The function should return a pointer to - the - * text and write its length to the [`bytes_read`] pointer. The parser - does - * not take ownership of this buffer; it just borrows it until it has - * finished reading it. The function should write a zero value to the - * [`bytes_read`] pointer to indicate the end of the document. - * 2. [`payload`]: An arbitrary pointer that will be passed to each - invocation - * of the [`read`] function. - * 3. [`encoding`]: An indication of how the text is encoded. Either - * `InputEncoding8` or `InputEncoding16`. - * - * This function returns a syntax tree on success, and `NULL` on failure. - There - * are three possible reasons for failure: - * 1. The parser does not have a language assigned. Check for this using the - [`ts_parser_language`] function. - * 2. Parsing was cancelled due to a timeout that was set by an earlier call - to - * the [`ts_parser_set_timeot_micros`] function. You can resume parsing - from - * where the parser left out by calling [`ts_parser_parse`] again with - the - * same arguments. Or you can start parsing from scratch by first calling - * [`ts_parser_reset`]. - * 3. Parsing was cancelled using a cancellation flag that was set by an - * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume - parsing - * from where the parser left out by calling [`ts_parser_parse`] again - with - * the same arguments. - * - * [`read`]: t_parse_input::read - * [`payload`]: t_parse_input::payload - * [`encoding`]: t_parse_input::encoding - * [`bytes_read`]: t_parse_input::read - */ -t_parse_tree *ts_parser_parse(t_parser *self, const t_parse_tree *old_tree, - t_parse_input input); - -/** - * Use the parser to parse some source code stored in one contiguous buffer. - * The first two parameters are the same as in the [`ts_parser_parse`] - * function above. The second two parameters indicate the location of the - * buffer and its length in bytes. - */ -t_parse_tree *ts_parser_parse_string(t_parser *self, - const t_parse_tree *old_tree, - const char *string, t_u32 length); - -/** - * Use the parser to parse some source code stored in one contiguous buffer - * with a given encoding. The first four parameters work the same as in the - * [`ts_parser_parse_string`] method above. The final parameter indicates - * whether the text is encoded as UTF8 or UTF16. - */ -t_parse_tree *ts_parser_parse_string_encoding(t_parser *self, - const t_parse_tree *old_tree, - const char *string, t_u32 length, - t_input_encoding encoding); - -/** - * Instruct the parser to start the next parse from the beginning. - * - * If the parser previously failed because of a timeout or a cancellation, - * then by default, it will resume where it left off on the next call to - * [`ts_parser_parse`] or other parsing functions. If you don't want to - * resume, and instead intend to use this parser to parse some other - * document, you must call [`ts_parser_reset`] first. - */ -void ts_parser_reset(t_parser *self); - -/** - * Set the maximum duration in microseconds that parsing should be allowed - * to take before halting. - * - * If parsing takes longer than this, it will halt early, returning NULL. - * See [`ts_parser_parse`] for more information. - */ -void ts_parser_set_timeot_micros(t_parser *self, t_u64 timeot_micros); - -/** - * Get the duration in microseconds that parsing is allowed to take. - */ -t_u64 ts_parser_timeot_micros(const t_parser *self); - -/** - * Set the parser's current cancellation flag pointer. - * - * If a non-null pointer is assigned, then the parser will periodically read - * from this pointer during parsing. If it reads a non-zero value, it will - * halt early, returning NULL. See [`ts_parser_parse`] for more information. - */ -void ts_parser_set_cancellation_flag(t_parser *self, const size_t *flag); - -/** - * Get the parser's current cancellation flag pointer. - */ -const size_t *ts_parser_cancellation_flag(const t_parser *self); - -/** - * Set the logger that a parser should use during parsing. - * - * The parser does not take ownership over the logger payload. If a logger - * was previously assigned, the caller is responsible for releasing any - * memory owned by the previous logger. - */ -void ts_parser_set_logger(t_parser *self, t_parse_logger logger); - -/** - * Get the parser's current logger. - */ -t_parse_logger ts_parser_logger(const t_parser *self); - -/** - * Set the file descriptor to which the parser should write debugging graphs - * during parsing. The graphs are formatted in the DOT language. You may - * want to pipe these graphs directly to a `dot(1)` process in order to - * generate SVG output. You can turn off this logging by passing a negative - * number. - */ -void ts_parser_print_dot_graphs(t_parser *self, int fd); - -/******************/ -/* Section - Tree */ -/******************/ - -/** - * Create a shallow copy of the syntax tree. This is very fast. - * - * You need to copy a syntax tree in order to use it on more than one thread - * at a time, as syntax trees are not thread safe. - */ -t_parse_tree *ts_tree_copy(const t_parse_tree *self); - -/** - * Delete the syntax tree, freeing all of the memory that it used. - */ -void ts_tree_delete(t_parse_tree *self); - -/** - * Get the root node of the syntax tree. - */ -t_parse_node ts_tree_root_node(const t_parse_tree *self); - -/** - * Get the root node of the syntax tree, but with its position - * shifted forward by the given offset. - */ -t_parse_node ts_tree_root_node_with_offset(const t_parse_tree *self, - t_u32 offset_bytes, - t_point offset_extent); - -/** - * Get the language that was used to parse the syntax tree. - */ -const t_language *ts_tree_language(const t_parse_tree *self); - -/** - * Get the array of included ranges that was used to parse the syntax tree. - * - * The returned pointer must be freed by the caller. - */ -t_parser_range *ts_tree_included_ranges(const t_parse_tree *self, - t_u32 *length); - -/** - * Edit the syntax tree to keep it in sync with source code that has been - * edited. - * - * You must describe the edit both in terms of byte offsets and in terms of - * (row, column) coordinates. - */ -void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit); - -/** - * Compare an old edited syntax tree to a new syntax tree representing the - * same document, returning an array of ranges whose syntactic structure has - * changed. - * - * For this to work correctly, the old syntax tree must have been edited - * such that its ranges match up to the new tree. Generally, you'll want to - * call this function right after calling one of the [`ts_parser_parse`] - * functions. You need to pass the old tree that was passed to parse, as - * well as the new tree that was returned from that function. - * - * The returned array is allocated using `malloc` and the caller is - * responsible for freeing it using `free`. The length of the array will be - * written to the given `length` pointer. - */ -t_parser_range *ts_tree_get_changed_ranges(const t_parse_tree *old_tree, - const t_parse_tree *new_tree, - t_u32 *length); - -/** - * Write a DOT graph describing the syntax tree to the given file. - */ -void ts_tree_print_dot_graph(const t_parse_tree *self, int file_descriptor); - -/******************/ -/* Section - Node */ -/******************/ - -/** - * Get the node's type as a null-terminated string. - */ -const char *ts_node_type(t_parse_node self); - -/** - * Get the node's type as a numerical id. - */ -t_symbol ts_node_symbol(t_parse_node self); - -/** - * Get the node's language. - */ -const t_language *ts_node_language(t_parse_node self); - -/** - * Get the node's type as it appears in the grammar ignoring aliases as a - * null-terminated string. - */ -const char *ts_node_grammar_type(t_parse_node self); - -/** - * Get the node's type as a numerical id as it appears in the grammar - * ignoring aliases. This should be used in [`ts_language_next_state`] - * instead of - * [`ts_node_symbol`]. - */ -t_symbol ts_node_grammar_symbol(t_parse_node self); - -/** - * Get the node's start byte. - */ -t_u32 ts_node_start_byte(t_parse_node self); - -/** - * Get the node's start position in terms of rows and columns. - */ -t_point ts_node_start_point(t_parse_node self); - -/** - * Get the node's end byte. - */ -t_u32 ts_node_end_byte(t_parse_node self); - -/** - * Get the node's end position in terms of rows and columns. - */ -t_point ts_node_end_point(t_parse_node self); - -/** - * Get an S-expression representing the node as a string. - * - * This string is allocated with `malloc` and the caller is responsible for - * freeing it using `free`. - */ -char *ts_node_string(t_parse_node self); - -/** - * Check if the node is null. Functions like [`ts_node_child`] and - * [`ts_node_next_sibling`] will return a null node to indicate that no such - * node was found. - */ -bool ts_node_is_null(t_parse_node self); - -/** - * Check if the node is *named*. Named nodes correspond to named rules in - * the grammar, whereas *anonymous* nodes correspond to string literals in - * the grammar. - */ -bool ts_node_is_named(t_parse_node self); - -/** - * Check if the node is *missing*. Missing nodes are inserted by the parser - * in order to recover from certain kinds of syntax errors. - */ -bool ts_node_is_missing(t_parse_node self); - -/** - * Check if the node is *extra*. Extra nodes represent things like comments, - * which are not required the grammar, but can appear anywhere. - */ -bool ts_node_is_extra(t_parse_node self); - -/** - * Check if a syntax node has been edited. - */ -bool ts_node_has_changes(t_parse_node self); - -/** - * Check if the node is a syntax error or contains any syntax errors. - */ -bool ts_node_has_error(t_parse_node self); - -/** - * Check if the node is a syntax error. - */ -bool ts_node_is_error(t_parse_node self); - -/** - * Get this node's parse state. - */ -t_state_id ts_node_parse_state(t_parse_node self); - -/** - * Get the parse state after this node. - */ -t_state_id ts_node_next_parse_state(t_parse_node self); - -/** - * Get the node's immediate parent. - */ -t_parse_node ts_node_parent(t_parse_node self); - -/** - * Get the node's child at the given index, where zero represents the first - * child. - */ -t_parse_node ts_node_child(t_parse_node self, t_u32 child_index); - -/** - * Get the field name for node's child at the given index, where zero - * represents the first child. Returns NULL, if no field is found. - */ -const char *ts_node_field_name_for_child(t_parse_node self, t_u32 child_index); - -/** - * Get the node's number of children. - */ -t_u32 ts_node_child_count(t_parse_node self); - -/** - * Get the node's *named* child at the given index. - * - * See also [`ts_node_is_named`]. - */ -t_parse_node ts_node_named_child(t_parse_node self, t_u32 child_index); - -/** - * Get the node's number of *named* children. - * - * See also [`ts_node_is_named`]. - */ -t_u32 ts_node_named_child_count(t_parse_node self); - -/** - * Get the node's child with the given field name. - */ -t_parse_node ts_node_child_by_field_name(t_parse_node self, const char *name, - t_u32 name_length); - -/** - * Get the node's child with the given numerical field id. - * - * You can convert a field name to an id using the - * [`ts_language_field_id_for_name`] function. - */ -t_parse_node ts_node_child_by_field_id(t_parse_node self, t_field_id field_id); - -/** - * Get the node's next / previous sibling. - */ -t_parse_node ts_node_next_sibling(t_parse_node self); -t_parse_node ts_node_prev_sibling(t_parse_node self); - -/** - * Get the node's next / previous *named* sibling. - */ -t_parse_node ts_node_next_named_sibling(t_parse_node self); -t_parse_node ts_node_prev_named_sibling(t_parse_node self); - -/** - * Get the node's first child that extends beyond the given byte offset. - */ -t_parse_node ts_node_first_child_for_byte(t_parse_node self, t_u32 byte); - -/** - * Get the node's first named child that extends beyond the given byte - * offset. - */ -t_parse_node ts_node_first_named_child_for_byte(t_parse_node self, t_u32 byte); - -/** - * Get the node's number of descendants, including one for the node itself. - */ -t_u32 ts_node_descendant_count(t_parse_node self); - -/** - * Get the smallest node within this node that spans the given range of - * bytes or (row, column) positions. - */ -t_parse_node ts_node_descendant_for_byte_range(t_parse_node self, t_u32 start, - t_u32 end); -t_parse_node ts_node_descendant_for_point_range(t_parse_node self, - t_point start, t_point end); - -/** - * Get the smallest named node within this node that spans the given range - * of bytes or (row, column) positions. - */ -t_parse_node ts_node_named_descendant_for_byte_range(t_parse_node self, - t_u32 start, t_u32 end); -t_parse_node ts_node_named_descendant_for_point_range(t_parse_node self, - t_point start, - t_point end); - -/** - * Edit the node to keep it in-sync with source code that has been edited. - * - * This function is only rarely needed. When you edit a syntax tree with the - * [`ts_tree_edit`] function, all of the nodes that you retrieve from the - * tree afterward will already reflect the edit. You only need to use - * [`ts_node_edit`] when you have a [`t_parse_node`] instance that you want to - * keep and continue to use after an edit. - */ -void ts_node_edit(t_parse_node *self, const t_input_edit *edit); - -/** - * Check if two nodes are identical. - */ -bool ts_node_eq(t_parse_node self, t_parse_node other); - -/************************/ -/* Section - TreeCursor */ -/************************/ - -/** - * Create a new tree cursor starting from the given node. - * - * A tree cursor allows you to walk a syntax tree more efficiently than is - * possible using the [`t_parse_node`] functions. It is a mutable object that is - * always on a certain syntax node, and can be moved imperatively to - * different nodes. - */ -t_parse_tree_cursor ts_tree_cursor_new(t_parse_node node); - -/** - * Delete a tree cursor, freeing all of the memory that it used. - */ -void ts_tree_cursor_delete(t_parse_tree_cursor *self); - -/** - * Re-initialize a tree cursor to start at a different node. - */ -void ts_tree_cursor_reset(t_parse_tree_cursor *self, t_parse_node node); - -/** - * Re-initialize a tree cursor to the same position as another cursor. - * - * Unlike [`ts_tree_cursor_reset`], this will not lose parent information - * and allows reusing already created cursors. - */ -void ts_tree_cursor_reset_to(t_parse_tree_cursor *dst, - const t_parse_tree_cursor *src); - -/** - * Get the tree cursor's current node. - */ -t_parse_node ts_tree_cursor_current_node(const t_parse_tree_cursor *self); - -/** - * Get the field name of the tree cursor's current node. - * - * This returns `NULL` if the current node doesn't have a field. - * See also [`ts_node_child_by_field_name`]. - */ -const char *ts_tree_cursor_current_field_name(const t_parse_tree_cursor *self); - -/** - * Get the field id of the tree cursor's current node. - * - * This returns zero if the current node doesn't have a field. - * See also [`ts_node_child_by_field_id`], - * [`ts_language_field_id_for_name`]. - */ -t_field_id ts_tree_cursor_current_field_id(const t_parse_tree_cursor *self); - -/** - * Move the cursor to the parent of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` - * if there was no parent node (the cursor was already on the root node). - */ -bool ts_tree_cursor_goto_parent(t_parse_tree_cursor *self); - -/** - * Move the cursor to the next sibling of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` - * if there was no next sibling node. - */ -bool ts_tree_cursor_goto_next_sibling(t_parse_tree_cursor *self); - -/** - * Move the cursor to the previous sibling of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` - * if there was no previous sibling node. - * - * Note, that this function may be slower than - * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are - * stored. In the worst case, this will need to iterate through all the - * children upto the previous sibling node to recalculate its position. - */ -bool ts_tree_cursor_goto_previous_sibling(t_parse_tree_cursor *self); - -/** - * Move the cursor to the first child of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` - * if there were no children. - */ -bool ts_tree_cursor_goto_first_child(t_parse_tree_cursor *self); - -/** - * Move the cursor to the last child of its current node. - * - * This returns `true` if the cursor successfully moved, and returns `false` - * if there were no children. - * - * Note that this function may be slower than - * [`ts_tree_cursor_goto_first_child`] because it needs to iterate through - * all the children to compute the child's position. - */ -bool ts_tree_cursor_goto_last_child(t_parse_tree_cursor *self); - -/** - * Move the cursor to the node that is the nth descendant of - * the original node that the cursor was constructed with, where - * zero represents the original node itself. - */ -void ts_tree_cursor_goto_descendant(t_parse_tree_cursor *self, - t_u32 goal_descendant_index); - -/** - * Get the index of the cursor's current node out of all of the - * descendants of the original node that the cursor was constructed with. - */ -t_u32 ts_tree_cursor_current_descendant_index(const t_parse_tree_cursor *self); - -/** - * Get the depth of the cursor's current node relative to the original - * node that the cursor was constructed with. - */ -t_u32 ts_tree_cursor_current_depth(const t_parse_tree_cursor *self); - -/** - * Move the cursor to the first child of its current node that extends - * beyond the given byte offset or point. - * - * This returns the index of the child node if one was found, and returns -1 - * if no such child was found. - */ -t_i64 ts_tree_cursor_goto_first_child_for_byte(t_parse_tree_cursor *self, - t_u32 goal_byte); -t_i64 ts_tree_cursor_goto_first_child_for_point(t_parse_tree_cursor *self, - t_point goal_point); - -t_parse_tree_cursor ts_tree_cursor_copy(const t_parse_tree_cursor *cursor); - -/*******************/ -/* Section - Query */ -/*******************/ - -/** - * Create a new query from a string containing one or more S-expression - * patterns. The query is associated with a particular language, and can - * only be run on syntax nodes parsed with that language. - * - * If all of the given patterns are valid, this returns a [`t_query`]. - * If a pattern is invalid, this returns `NULL`, and provides two pieces - * of information about the problem: - * 1. The byte offset of the error is written to the `error_offset` - * parameter. - * 2. The type of error is written to the `error_type` parameter. - */ -t_query *ts_query_new(const t_language *language, const char *source, - t_u32 source_len, t_u32 *error_offset, - t_queryError *error_type); - -/** - * Delete a query, freeing all of the memory that it used. - */ -void ts_query_delete(t_query *self); - -/** - * Get the number of patterns, captures, or string literals in the query. - */ -t_u32 ts_query_pattern_count(const t_query *self); -t_u32 ts_query_capture_count(const t_query *self); -t_u32 ts_query_string_count(const t_query *self); - -/** - * Get the byte offset where the given pattern starts in the query's source. - * - * This can be useful when combining queries by concatenating their source - * code strings. - */ -t_u32 ts_query_start_byte_for_pattern(const t_query *self, t_u32 pattern_index); - -/** - * Get all of the predicates for the given pattern in the query. - * - * The predicates are represented as a single array of steps. There are - * three types of steps in this array, which correspond to the three legal - * values for the `type` field: - * - `t_queryPredicateStepTypeCapture` - Steps with this type represent - * names of captures. Their `value_id` can be used with the - * [`ts_query_capture_name_for_id`] function to obtain the name of the - * capture. - * - `t_queryPredicateStepTypeString` - Steps with this type represent - * literal strings. Their `value_id` can be used with the - * [`ts_query_string_value_for_id`] function to obtain their string - * value. - * - `t_queryPredicateStepTypeDone` - Steps with this type are *sentinels* - * that represent the end of an individual predicate. If a pattern has - * two predicates, then there will be two steps with this `type` in the - * array. - */ -const t_queryPredicateStep *ts_query_predicates_for_pattern(const t_query *self, - t_u32 pattern_index, - t_u32 *step_count); - -/* - * Check if the given pattern in the query has a single root node. - */ -bool ts_query_is_pattern_rooted(const t_query *self, t_u32 pattern_index); - -/* - * Check if the given pattern in the query is 'non local'. - * - * A non-local pattern has multiple root nodes and can match within a - * repeating sequence of nodes, as specified by the grammar. Non-local - * patterns disable certain optimizations that would otherwise be possible - * when executing a query on a specific range of a syntax tree. - */ -bool ts_query_is_pattern_non_local(const t_query *self, t_u32 pattern_index); - -/* - * Check if a given pattern is guaranteed to match once a given step is - * reached. The step is specified by its byte offset in the query's source - * code. - */ -bool ts_query_is_pattern_guaranteed_at_step(const t_query *self, - t_u32 byte_offset); - -/** - * Get the name and length of one of the query's captures, or one of the - * query's string literals. Each capture and string is associated with a - * numeric id based on the order that it appeared in the query's source. - */ -const char *ts_query_capture_name_for_id(const t_query *self, t_u32 index, - t_u32 *length); - -/** - * Get the quantifier of the query's captures. Each capture is * associated - * with a numeric id based on the order that it appeared in the query's - * source. - */ -t_parse_quantifier ts_query_capture_quantifier_for_id(const t_query *self, - t_u32 pattern_index, - t_u32 capture_index); - -const char *ts_query_string_value_for_id(const t_query *self, t_u32 index, - t_u32 *length); - -/** - * Disable a certain capture within a query. - * - * This prevents the capture from being returned in matches, and also avoids - * any resource usage associated with recording the capture. Currently, - * there is no way to undo this. - */ -void ts_query_disable_capture(t_query *self, const char *name, t_u32 length); - -/** - * Disable a certain pattern within a query. - * - * This prevents the pattern from matching and removes most of the overhead - * associated with the pattern. Currently, there is no way to undo this. - */ -void ts_query_disable_pattern(t_query *self, t_u32 pattern_index); - -/** - * Create a new cursor for executing a given query. - * - * The cursor stores the state that is needed to iteratively search - * for matches. To use the query cursor, first call [`ts_query_cursor_exec`] - * to start running a given query on a given syntax node. Then, there are - * two options for consuming the results of the query: - * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of - * the *matches* in the order that they were found. Each match contains the - * index of the pattern that matched, and an array of captures. Because - * multiple patterns can match the same set of nodes, one match may - * contain captures that appear *before* some of the captures from a - * previous match. - * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all - * of the individual *captures* in the order that they appear. This is - * useful if don't care about which pattern matched, and just want a single - * ordered sequence of captures. - * - * If you don't care about consuming all of the results, you can stop - * calling - * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any - * point. You can then start executing another query on another node by - * calling - * [`ts_query_cursor_exec`] again. - */ -t_query_cursor *ts_query_cursor_new(void); - -/** - * Delete a query cursor, freeing all of the memory that it used. - */ -void ts_query_cursor_delete(t_query_cursor *self); - -/** - * Start running a given query on a given node. - */ -void ts_query_cursor_exec(t_query_cursor *self, const t_query *query, - t_parse_node node); - -/** - * Manage the maximum number of in-progress matches allowed by this query - * cursor. - * - * Query cursors have an optional maximum capacity for storing lists of - * in-progress captures. If this capacity is exceeded, then the - * earliest-starting match will silently be dropped to make room for further - * matches. This maximum capacity is optional — by default, query cursors - * allow any number of pending matches, dynamically allocating new space for - * them as needed as the query is executed. - */ -bool ts_query_cursor_did_exceed_match_limit(const t_query_cursor *self); -t_u32 ts_query_cursor_match_limit(const t_query_cursor *self); -void ts_query_cursor_set_match_limit(t_query_cursor *self, t_u32 limit); - -/** - * Set the range of bytes or (row, column) positions in which the query - * will be executed. - */ -void ts_query_cursor_set_byte_range(t_query_cursor *self, t_u32 start_byte, - t_u32 end_byte); -void ts_query_cursor_set_point_range(t_query_cursor *self, t_point start_point, - t_point end_point); - -/** - * Advance to the next match of the currently running query. - * - * If there is a match, write it to `*match` and return `true`. - * Otherwise, return `false`. - */ -bool ts_query_cursor_next_match(t_query_cursor *self, t_query_match *match); -void ts_query_cursor_remove_match(t_query_cursor *self, t_u32 match_id); - -/** - * Advance to the next capture of the currently running query. - * - * If there is a capture, write its match to `*match` and its index within - * the matche's capture list to `*capture_index`. Otherwise, return `false`. - */ -bool ts_query_cursor_next_capture(t_query_cursor *self, t_query_match *match, - t_u32 *capture_index); - -/** - * Set the maximum start depth for a query cursor. - * - * This prevents cursors from exploring children nodes at a certain depth. - * Note if a pattern includes many children, then they will still be - * checked. - * - * The zero max start depth value can be used as a special behavior and - * it helps to destructure a subtree by staying on a node and using captures - * for interested parts. Note that the zero max start depth only limit a - * search depth for a pattern's root node but other nodes that are parts of - * the pattern may be searched at any depth what defined by the pattern - * structure. - * - * Set to `UINT32_MAX` to remove the maximum start depth. - */ -void ts_query_cursor_set_max_start_depth(t_query_cursor *self, - t_u32 max_start_depth); - -/**********************/ -/* Section - Language */ -/**********************/ - -/** - * Get another reference to the given language. - */ -const t_language *ts_language_copy(const t_language *self); - -/** - * Free any dynamically-allocated resources for this language, if - * this is the last reference. - */ -void ts_language_delete(const t_language *self); - -/** - * Get the number of distinct node types in the language. - */ -t_u32 ts_language_symbol_count(const t_language *self); - -/** - * Get the number of valid states in this language. - */ -t_u32 ts_language_state_count(const t_language *self); - -/** - * Get a node type string for the given numerical id. - */ -const char *ts_language_symbol_name(const t_language *self, t_symbol symbol); - -/** - * Get the numerical id for the given node type string. - */ -t_symbol ts_language_symbol_for_name(const t_language *self, const char *string, - t_u32 length, bool is_named); - -/** - * Get the number of distinct field names in the language. - */ -t_u32 ts_language_field_count(const t_language *self); - -/** - * Get the field name string for the given numerical id. - */ -const char *ts_language_field_name_for_id(const t_language *self, - t_field_id id); - -/** - * Get the numerical id for the given field name string. - */ -t_field_id ts_language_field_id_for_name(const t_language *self, - const char *name, t_u32 name_length); - -/** - * Check whether the given node type id belongs to named nodes, anonymous - * nodes, or a hidden nodes. - * - * See also [`ts_node_is_named`]. Hidden nodes are never returned from the - * API. - */ -t_symbol_type ts_language_symbol_type(const t_language *self, t_symbol symbol); - -/** - * Get the ABI version number for this language. This version number is used - * to ensure that languages were generated by a compatible version of - * Tree-sitter. - * - * See also [`ts_parser_set_language`]. - */ -t_u32 ts_language_version(const t_language *self); - -/** - * Get the next parse state. Combine this with lookahead iterators to - * generate completion suggestions or valid symbols in error nodes. Use - * [`ts_node_grammar_symbol`] for valid symbols. - */ -t_state_id ts_language_next_state(const t_language *self, t_state_id state, - t_symbol symbol); - -/********************************/ -/* Section - Lookahead Iterator */ -/********************************/ - -/** - * Create a new lookahead iterator for the given language and parse state. - * - * This returns `NULL` if state is invalid for the language. - * - * Repeatedly using [`ts_lookahead_iterator_next`] and - * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in - * the given parse state. Newly created lookahead iterators will contain the - * `ERROR` symbol. - * - * Lookahead iterators can be useful to generate suggestions and improve - * syntax error diagnostics. To get symbols valid in an ERROR node, use the - * lookahead iterator on its first leaf node state. For `MISSING` nodes, a - * lookahead iterator created on the previous non-extra leaf node may be - * appropriate. - */ -t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, - t_state_id state); - -/** - * Delete a lookahead iterator freeing all the memory used. - */ -void ts_lookahead_iterator_delete(t_lookahead_iterator *self); - -/** - * Reset the lookahead iterator to another state. - * - * This returns `true` if the iterator was reset to the given state and - * `false` otherwise. - */ -bool ts_lookahead_iterator_reset_state(t_lookahead_iterator *self, - t_state_id state); - -/** - * Reset the lookahead iterator. - * - * This returns `true` if the language was set successfully and `false` - * otherwise. - */ -bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, - const t_language *language, t_state_id state); - -/** - * Get the current language of the lookahead iterator. - */ -const t_language *ts_lookahead_iterator_language( - const t_lookahead_iterator *self); - -/** - * Advance the lookahead iterator to the next symbol. - * - * This returns `true` if there is a new symbol and `false` otherwise. - */ -bool ts_lookahead_iterator_next(t_lookahead_iterator *self); - -/** - * Get the current symbol of the lookahead iterator; - */ -t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self); - -/** - * Get the current symbol type of the lookahead iterator as a null - * terminated string. - */ -const char *ts_lookahead_iterator_current_symbol_name( - const t_lookahead_iterator *self); - -/**********************************/ -/* Section - Global Configuration */ -/**********************************/ - -/** - * Set the allocation functions used by the library. - * - * By default, Tree-sitter uses the standard libc allocation functions, - * but aborts the process when an allocation fails. This function lets - * you supply alternative allocation functions at runtime. - * - * If you pass `NULL` for any parameter, Tree-sitter will switch back to - * its default implementation of that function. - * - * If you call this function after the library has already been used, then - * you must ensure that either: - * 1. All the existing objects have been freed. - * 2. The new allocator shares its state with the old one, so it is capable - * of freeing memory that was allocated by the old allocator. - */ -void ts_set_allocator(void *(*new_malloc)(size_t), - void *(*new_calloc)(size_t, size_t), - void *(*new_realloc)(void *, size_t), - void (*new_free)(void *)); - -#endif // TREE_SITTER_API_H_ +#include "../src/api.h" \ No newline at end of file diff --git a/parser/includes/array.h b/parser/includes/array.h deleted file mode 100644 index 8ecfb702..00000000 --- a/parser/includes/array.h +++ /dev/null @@ -1,287 +0,0 @@ -#ifndef TREE_SITTER_ARRAY_H_ -#define TREE_SITTER_ARRAY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - - - -#include -#include -#include -#include -#include - -#ifdef _MSC_VER -#pragma warning(disable : 4101) -#elif defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif - -#define Array(T) \ - struct { \ - T *contents; \ - uint32_t size; \ - uint32_t capacity; \ - } - -/// Initialize an array. -#define array_init(self) \ - ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) - -/// Create an empty array. -#define array_new() \ - { NULL, 0, 0 } - -/// Get a pointer to the element at a given `index` in the array. -#define array_get(self, _index) \ - (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) - -/// Get a pointer to the first element in the array. -#define array_front(self) array_get(self, 0) - -/// Get a pointer to the last element in the array. -#define array_back(self) array_get(self, (self)->size - 1) - -/// Clear the array, setting its size to zero. Note that this does not free any -/// memory allocated for the array's contents. -#define array_clear(self) ((self)->size = 0) - -/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is -/// less than the array's current capacity, this function has no effect. -#define array_reserve(self, new_capacity) \ - _array__reserve((Array *)(self), array_elem_size(self), new_capacity) - -/// Free any memory allocated for this array. Note that this does not free any -/// memory allocated for the array's contents. -#define array_delete(self) _array__delete((Array *)(self)) - -/// Push a new `element` onto the end of the array. -#define array_push(self, element) \ - (_array__grow((Array *)(self), 1, array_elem_size(self)), \ - (self)->contents[(self)->size++] = (element)) - -/// Increase the array's size by `count` elements. -/// New elements are zero-initialized. -#define array_grow_by(self, count) \ - (_array__grow((Array *)(self), count, array_elem_size(self)), \ - memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)), \ - (self)->size += (count)) - -/// Append all elements from one array to the end of another. -#define array_push_all(self, other) \ - array_extend((self), (other)->size, (other)->contents) - -/// Append `count` elements to the end of the array, reading their values from the -/// `contents` pointer. -#define array_extend(self, count, contents) \ - _array__splice( \ - (Array *)(self), array_elem_size(self), (self)->size, \ - 0, count, contents \ - ) - -/// Remove `old_count` elements from the array starting at the given `index`. At -/// the same index, insert `new_count` new elements, reading their values from the -/// `new_contents` pointer. -#define array_splice(self, _index, old_count, new_count, new_contents) \ - _array__splice( \ - (Array *)(self), array_elem_size(self), _index, \ - old_count, new_count, new_contents \ - ) - -/// Insert one `element` into the array at the given `index`. -#define array_insert(self, _index, element) \ - _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) - -/// Remove one element from the array at the given `index`. -#define array_erase(self, _index) \ - _array__erase((Array *)(self), array_elem_size(self), _index) - -/// Pop the last element off the array, returning the element by value. -#define array_pop(self) ((self)->contents[--(self)->size]) - -/// Assign the contents of one array to another, reallocating if necessary. -#define array_assign(self, other) \ - _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) - -/// Swap one array with another -#define array_swap(self, other) \ - _array__swap((Array *)(self), (Array *)(other)) - -/// Get the size of the array contents -#define array_elem_size(self) (sizeof *(self)->contents) - -/// Search a sorted array for a given `needle` value, using the given `compare` -/// callback to determine the order. -/// -/// If an existing element is found to be equal to `needle`, then the `index` -/// out-parameter is set to the existing value's index, and the `exists` -/// out-parameter is set to true. Otherwise, `index` is set to an index where -/// `needle` should be inserted in order to preserve the sorting, and `exists` -/// is set to false. -#define array_search_sorted_with(self, compare, needle, _index, _exists) \ - _array__search_sorted(self, 0, compare, , needle, _index, _exists) - -/// Search a sorted array for a given `needle` value, using integer comparisons -/// of a given struct field (specified with a leading dot) to determine the order. -/// -/// See also `array_search_sorted_with`. -#define array_search_sorted_by(self, field, needle, _index, _exists) \ - _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) - -/// Insert a given `value` into a sorted array, using the given `compare` -/// callback to determine the order. -#define array_insert_sorted_with(self, compare, value) \ - do { \ - unsigned _index, _exists; \ - array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ - if (!_exists) array_insert(self, _index, value); \ - } while (0) - -/// Insert a given `value` into a sorted array, using integer comparisons of -/// a given struct field (specified with a leading dot) to determine the order. -/// -/// See also `array_search_sorted_by`. -#define array_insert_sorted_by(self, field, value) \ - do { \ - unsigned _index, _exists; \ - array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ - if (!_exists) array_insert(self, _index, value); \ - } while (0) - -// Private - -typedef Array(void) Array; - -/// This is not what you're looking for, see `array_delete`. -static inline void _array__delete(Array *self) { - if (self->contents) { - free(self->contents); - self->contents = NULL; - self->size = 0; - self->capacity = 0; - } -} - -/// This is not what you're looking for, see `array_erase`. -static inline void _array__erase(Array *self, size_t element_size, - uint32_t index) { - assert(index < self->size); - char *contents = (char *)self->contents; - memmove(contents + index * element_size, contents + (index + 1) * element_size, - (self->size - index - 1) * element_size); - self->size--; -} - -/// This is not what you're looking for, see `array_reserve`. -static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { - if (new_capacity > self->capacity) { - if (self->contents) { - self->contents = realloc(self->contents, new_capacity * element_size); - } else { - self->contents = malloc(new_capacity * element_size); - } - self->capacity = new_capacity; - } -} - -/// This is not what you're looking for, see `array_assign`. -static inline void _array__assign(Array *self, const Array *other, size_t element_size) { - _array__reserve(self, element_size, other->size); - self->size = other->size; - memcpy(self->contents, other->contents, self->size * element_size); -} - -/// This is not what you're looking for, see `array_swap`. -static inline void _array__swap(Array *self, Array *other) { - Array swap = *other; - *other = *self; - *self = swap; -} - -/// This is not what you're looking for, see `array_push` or `array_grow_by`. -static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { - uint32_t new_size = self->size + count; - if (new_size > self->capacity) { - uint32_t new_capacity = self->capacity * 2; - if (new_capacity < 8) new_capacity = 8; - if (new_capacity < new_size) new_capacity = new_size; - _array__reserve(self, element_size, new_capacity); - } -} - -/// This is not what you're looking for, see `array_splice`. -static inline void _array__splice(Array *self, size_t element_size, - uint32_t index, uint32_t old_count, - uint32_t new_count, const void *elements) { - uint32_t new_size = self->size + new_count - old_count; - uint32_t old_end = index + old_count; - uint32_t new_end = index + new_count; - assert(old_end <= self->size); - - _array__reserve(self, element_size, new_size); - - char *contents = (char *)self->contents; - if (self->size > old_end) { - memmove( - contents + new_end * element_size, - contents + old_end * element_size, - (self->size - old_end) * element_size - ); - } - if (new_count > 0) { - if (elements) { - memcpy( - (contents + index * element_size), - elements, - new_count * element_size - ); - } else { - memset( - (contents + index * element_size), - 0, - new_count * element_size - ); - } - } - self->size += new_count - old_count; -} - -/// A binary search routine, based on Rust's `std::slice::binary_search_by`. -/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. -#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ - do { \ - *(_index) = start; \ - *(_exists) = false; \ - uint32_t size = (self)->size - *(_index); \ - if (size == 0) break; \ - int comparison; \ - while (size > 1) { \ - uint32_t half_size = size / 2; \ - uint32_t mid_index = *(_index) + half_size; \ - comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ - if (comparison <= 0) *(_index) = mid_index; \ - size -= half_size; \ - } \ - comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ - if (comparison == 0) *(_exists) = true; \ - else if (comparison < 0) *(_index) += 1; \ - } while (0) - -/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) -/// parameter by reference in order to work with the generic sorting function above. -#define _compare_int(a, b) ((int)*(a) - (int)(b)) - -#ifdef _MSC_VER -#pragma warning(default : 4101) -#elif defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic pop -#endif - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_ARRAY_H_ diff --git a/parser/includes/error_costs.h b/parser/includes/error_costs.h deleted file mode 100644 index e73e4a2e..00000000 --- a/parser/includes/error_costs.h +++ /dev/null @@ -1,23 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* error_costs.h :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/04/30 14:26:02 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 14:26:04 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#ifndef ERROR_COSTS_H -#define ERROR_COSTS_H - -#define ERROR_STATE 0 -#define ERROR_COST_PER_RECOVERY 500 -#define ERROR_COST_PER_MISSING_TREE 110 -#define ERROR_COST_PER_SKIPPED_TREE 100 -#define ERROR_COST_PER_SKIPPED_LINE 30 -#define ERROR_COST_PER_SKIPPED_CHAR 1 - -#endif /* ERROR_COSTS_H */ diff --git a/parser/includes/lexer.h b/parser/includes/lexer.h deleted file mode 100644 index 79651d7a..00000000 --- a/parser/includes/lexer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* lexer.h :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/04/23 19:51:24 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 14:28:34 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#ifndef LEXER_H -#define LEXER_H - -#include "me/types.h" -#include "./api.h" -#include "parser/parser_length.h" -#include "parser/types/types_lexer.h" - -#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - -typedef struct s_liblexer -{ - t_lexer data; - t_parse_length current_position; - t_parse_length token_start_position; - t_parse_length token_end_position; - - t_parser_range *included_ranges; - const char *chunk; - t_parse_input input; - t_parse_logger logger; - - t_u32 included_range_count; - t_u32 current_included_range_index; - t_u32 chunk_start; - t_u32 chunk_size; - t_u32 lookahead_size; - bool did_get_column; - - char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; -} t_liblexer; - -void ts_lexer_init(t_liblexer *self); -void ts_lexer_delete(t_liblexer *self); -void ts_lexer_set_input(t_liblexer *self, t_parse_input input); -void ts_lexer_reset(t_liblexer *self, t_parse_length range); -void ts_lexer_start(t_liblexer *self); -void ts_lexer_finish(t_liblexer *self, t_i32 *data); -void ts_lexer_advance_to_end(t_liblexer *self); -void ts_lexer_mark_end(t_liblexer *self); -bool ts_lexer_set_included_ranges(t_liblexer *self, - const t_parser_range *ranges, t_u32 count); -t_parser_range *ts_lexer_included_ranges(const t_liblexer *self, t_u32 *count); - -#endif /* LEXER_H */ diff --git a/parser/includes/parser.h b/parser/includes/parser.h deleted file mode 100644 index 298bf954..00000000 --- a/parser/includes/parser.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef TREE_SITTER_PARSER_H_ -#define TREE_SITTER_PARSER_H_ - -#include "../parse_types.h" -#include "parser/lexer.h" -#include -#include -#include - -#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - -#endif // TREE_SITTER_PARSER_H_ diff --git a/parser/includes/parser_length.h b/parser/includes/parser_length.h deleted file mode 100644 index 78b37591..00000000 --- a/parser/includes/parser_length.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef TREE_SITTER_LENGTH_H_ -#define TREE_SITTER_LENGTH_H_ - -#include "parser/point.h" -#include "./api.h" -#include -#include - -typedef struct s_parse_length -{ - t_u32 bytes; - t_point extent; -} t_parse_length; - -static const t_parse_length LENGTH_UNDEFINED = {0, {0, 1}}; -static const t_parse_length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; - -static inline bool length_is_undefined(t_parse_length length) -{ - return (length.bytes == 0 && length.extent.column != 0); -} - -static inline t_parse_length length_min(t_parse_length len1, - t_parse_length len2) -{ - if (len1.bytes < len2.bytes) - return (len1); - else - return (len2); -} - -static inline t_parse_length length_add(t_parse_length len1, - t_parse_length len2) -{ - t_parse_length result; - result.bytes = len1.bytes + len2.bytes; - result.extent = point_add(len1.extent, len2.extent); - return (result); -} - -static inline t_parse_length length_sub(t_parse_length len1, - t_parse_length len2) -{ - t_parse_length result; - result.bytes = len1.bytes - len2.bytes; - result.extent = point_sub(len1.extent, len2.extent); - return (result); -} - -static inline t_parse_length length_zero(void) -{ - return ((t_parse_length){0, {0, 0}}); -} - -static inline t_parse_length length_saturating_sub(t_parse_length len1, - t_parse_length len2) -{ - if (len1.bytes > len2.bytes) - return (length_sub(len1, len2)); - else - return (length_zero()); -} - -#endif diff --git a/parser/includes/point.h b/parser/includes/point.h deleted file mode 100644 index f315dd73..00000000 --- a/parser/includes/point.h +++ /dev/null @@ -1,21 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* point.h :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/04/30 14:35:22 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 14:46:18 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#ifndef POINT_H -#define POINT_H - -#include "parser/point/inline1.h" -#include "parser/point/inline2.h" -#include "parser/point/inline3.h" - - -#endif /* POINT_H */ diff --git a/parser/includes/point/inline1.h b/parser/includes/point/inline1.h deleted file mode 100644 index 746241a8..00000000 --- a/parser/includes/point/inline1.h +++ /dev/null @@ -1,50 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* inline1.h :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/04/30 14:35:50 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 14:43:49 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#ifndef INLINE1_H -#define INLINE1_H - -#include "parser/types/types_point.h" - -static inline t_point point__new(unsigned row, unsigned column) -{ - t_point result = {row, column}; - return result; -} - -static inline t_point point_add(t_point a, t_point b) -{ - if (b.row > 0) - return point__new(a.row + b.row, b.column); - else - return point__new(a.row, a.column + b.column); -} - -static inline t_point point_sub(t_point a, t_point b) -{ - if (a.row > b.row) - return point__new(a.row - b.row, a.column); - else - return point__new(0, a.column - b.column); -} - -static inline bool point_lte(t_point a, t_point b) -{ - return (a.row < b.row) || (a.row == b.row && a.column <= b.column); -} - -static inline bool point_lt(t_point a, t_point b) -{ - return (a.row < b.row) || (a.row == b.row && a.column < b.column); -} - -#endif /* INLINE1_H */ diff --git a/parser/includes/point/inline2.h b/parser/includes/point/inline2.h deleted file mode 100644 index 8d0e455e..00000000 --- a/parser/includes/point/inline2.h +++ /dev/null @@ -1,49 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* inline2.h :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/04/30 14:43:58 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 14:44:12 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#ifndef INLINE2_H -#define INLINE2_H - -#include "parser/types/types_point.h" - -static inline bool point_gt(t_point a, t_point b) -{ - return (a.row > b.row) || (a.row == b.row && a.column > b.column); -} - -static inline bool point_gte(t_point a, t_point b) -{ - return (a.row > b.row) || (a.row == b.row && a.column >= b.column); -} - -static inline bool point_eq(t_point a, t_point b) -{ - return a.row == b.row && a.column == b.column; -} - -static inline t_point point_min(t_point a, t_point b) -{ - if (a.row < b.row || (a.row == b.row && a.column < b.column)) - return a; - else - return b; -} - -static inline t_point point_max(t_point a, t_point b) -{ - if (a.row > b.row || (a.row == b.row && a.column > b.column)) - return a; - else - return b; -} - -#endif /* INLINE2_H */ diff --git a/parser/includes/point/inline3.h b/parser/includes/point/inline3.h deleted file mode 100644 index 5d68736b..00000000 --- a/parser/includes/point/inline3.h +++ /dev/null @@ -1,29 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* inline3.h :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/04/30 14:44:49 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 15:04:39 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#ifndef INLINE3_H -#define INLINE3_H - -#include "parser/types/types_point.h" -#include - -static inline t_point point_val_zero(void) -{ - return ((t_point){0, 0}); -} - -static inline t_point point_val_max(void) -{ - return ((t_point){UINT32_MAX, UINT32_MAX}); -} - -#endif /* INLINE3_H */ diff --git a/parser/includes/reduce_action.h b/parser/includes/reduce_action.h deleted file mode 100644 index 228ae3e8..00000000 --- a/parser/includes/reduce_action.h +++ /dev/null @@ -1,36 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* reduce_action.h :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/04/30 15:25:12 by maiboyer #+# #+# */ -/* Updated: 2024/04/30 15:25:38 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#ifndef REDUCE_ACTION_H -#define REDUCE_ACTION_H - -#include "me/types.h" -#include "me/vec/vec_reduce_action.h" -#include "./api.h" -#include "parser/types/types_reduce_action.h" - -static inline void ts_reduce_action_set_add(t_vec_reduce_action *self, - t_reduce_action new_action) -{ - t_reduce_action action; - - for (t_u32 i = 0; i < self->len; i++) - { - action = self->buffer[i]; - if (action.symbol == new_action.symbol && - action.count == new_action.count) - return; - } - vec_reduce_action_push(self, new_action); -} - -#endif /* REDUCE_ACTION_H */ diff --git a/sources/ft_exit.c b/sources/ft_exit.c index ace74c4b..32468b64 100644 --- a/sources/ft_exit.c +++ b/sources/ft_exit.c @@ -12,6 +12,8 @@ #include "../includes/minishell.h" +void ts_parser_delete(TSParser *self); + void ft_free(void *ptr) { if (!ptr) diff --git a/sources/main.c b/sources/main.c index d87f3d6a..143a1f80 100644 --- a/sources/main.c +++ b/sources/main.c @@ -15,6 +15,14 @@ #include "me/string/str_len.h" #include "parser/api.h" +TSParser *ts_parser_new(); +void ts_tree_delete(TSTree *); +TSNode ts_tree_root_node(TSTree *); +TSTree *ts_parser_parse_string(TSParser *, TSTree *oldtree, t_const_str input, + t_usize len); +void ts_parser_delete(TSParser *self); +void ts_parser_set_language(TSParser *self, TSLanguage *lang); + void print_node_data(t_node *t, t_usize depth) { t_usize idx; @@ -28,10 +36,10 @@ void print_node_data(t_node *t, t_usize depth) print_node_data(&t->childs[idx++], depth + 1); } -t_node parse_to_nodes(t_parser *parser, t_const_str input) +t_node parse_to_nodes(TSParser *parser, t_const_str input) { - t_parse_tree *tree; - t_parse_node node; + TSTree *tree; + TSNode node; t_node ret; tree = ts_parser_parse_string(parser, NULL, input, str_len(input)); @@ -40,7 +48,7 @@ t_node parse_to_nodes(t_parser *parser, t_const_str input) ts_tree_delete(tree); return (ret); } -t_node parse_str(t_myparser *parser, t_const_str input) +t_node parse_str(t_parser *parser, t_const_str input) { return (parse_to_nodes(parser->parser, input)); } @@ -104,20 +112,20 @@ void ft_find_path(t_str arge[], t_utils *utils) utils->path = ft_split(PATH_FILES, ':'); } -t_language *tree_sitter_bash(void); +TSLanguage *tree_sitter_bash(void); -t_myparser create_myparser(void) +t_parser create_myparser(void) { - t_language *lang; - t_parser *parser; + TSLanguage *lang; + TSParser *parser; lang = tree_sitter_bash(); parser = ts_parser_new(); ts_parser_set_language(parser, lang); - return ((t_myparser){.parser = parser}); + return ((t_parser){.parser = parser}); } -void free_myparser(t_myparser self) +void free_myparser(t_parser self) { ts_parser_delete(self.parser); } @@ -131,7 +139,7 @@ t_i32 main(t_i32 argc, t_str argv[], t_str envp[]) (void)envp; utils = (t_utils){}; utils.parser = create_myparser(); - //ft_find_path(arge, &utils); + // ft_find_path(arge, &utils); utils.name_shell = "42sh > "; ft_take_args(&utils); } diff --git a/sources/node/node.c b/sources/node/node.c index d36edc53..71b2a745 100644 --- a/sources/node/node.c +++ b/sources/node/node.c @@ -16,13 +16,19 @@ #include "me/string/str_l_copy.h" #include "parser/api.h" -t_node build_node(t_parse_node curr, t_const_str input); +t_node build_node(TSNode current, t_const_str input); +TSNode ts_node_child(TSNode parent, t_usize idx); +TSSymbol ts_node_symbol(TSNode self); +t_const_str ts_node_type(TSNode self); +t_u32 ts_node_start_byte(TSNode self); +t_u32 ts_node_end_byte(TSNode self); +t_u32 ts_node_child_count(TSNode self); -t_node *build_childs(t_parse_node parent, t_const_str input, t_usize count) +t_node *build_childs(TSNode parent, t_const_str input, t_usize count) { t_node *ret; t_usize idx; - t_parse_node child; + TSNode child; ret = mem_alloc_array(sizeof(*ret), count); if (ret == NULL) @@ -37,7 +43,7 @@ t_node *build_childs(t_parse_node parent, t_const_str input, t_usize count) return (ret); } -t_node build_node(t_parse_node curr, t_const_str input) +t_node build_node(TSNode curr, t_const_str input) { t_node out; From 1032b41df25fc1537eab1c7dd02daeff83bb093c Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Wed, 1 May 2024 17:37:10 +0200 Subject: [PATCH 14/14] oups --- parser/src/create_language.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/parser/src/create_language.c b/parser/src/create_language.c index d7c96baa..c99df381 100644 --- a/parser/src/create_language.c +++ b/parser/src/create_language.c @@ -6,23 +6,13 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/04/25 16:13:52 by maiboyer #+# #+# */ -<<<<<<< HEAD:parser/src/create_language.c -/* Updated: 2024/05/01 15:52:38 by maiboyer ### ########.fr */ +/* Updated: 2024/05/01 17:36:58 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ #include "../static/headers/constants.h" #include "../static/headers/symbols.h" #include "../parse_types.h" -======= -/* Updated: 2024/04/30 16:37:30 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#include "./static/headers/constants.h" -#include "./static/headers/symbols.h" -#include "./parse_types.h" ->>>>>>> master:parser/create_language.c const uint16_t *create_parse_table(void); const uint16_t *create_small_parse_table(void);