From 7329c95ba0e07112f7fc59c6f5a7158e74ab0a98 Mon Sep 17 00:00:00 2001 From: Maix0 Date: Sun, 18 Aug 2024 21:46:29 +0200 Subject: [PATCH] Restored to old state that doesn't leak --- parser/include/parser/stack.h | 6 +- parser/include/parser/subtree.h | 167 +++++--- parser/src/create_language.c | 4 +- parser/src/language.c | 4 +- parser/src/lexer.c | 38 +- parser/src/node.c | 115 ++++-- parser/src/parser.c | 156 ++++---- parser/src/scanner.c | 26 +- parser/src/stack.c | 120 +++--- parser/src/subtree.c | 663 +++++++++++++++++++++++++++----- parser/src/tree.c | 57 +++ 11 files changed, 1028 insertions(+), 328 deletions(-) diff --git a/parser/include/parser/stack.h b/parser/include/parser/stack.h index 63cb62d2..8b7dbd81 100644 --- a/parser/include/parser/stack.h +++ b/parser/include/parser/stack.h @@ -1,14 +1,14 @@ #ifndef PARSE_STACK_H #define PARSE_STACK_H -#include "me/types.h" #include "parser/array.h" #include "parser/subtree.h" +#include "me/types.h" typedef struct Stack Stack; typedef t_u32 StackVersion; -#define STACK_VERSION_NONE ((StackVersion) - 1) +#define STACK_VERSION_NONE ((StackVersion)-1) typedef struct StackSlice { @@ -29,7 +29,7 @@ typedef Array(StackSummaryEntry) StackSummary; typedef void (*StackIterateCallback)(void *, TSStateId, t_u32); // Create a stack. -Stack *ts_stack_new(void); +Stack *ts_stack_new(SubtreePool *); // Release the memory reserved for a given stack. void ts_stack_delete(Stack *); diff --git a/parser/include/parser/subtree.h b/parser/include/parser/subtree.h index 0ab3f253..1082518e 100644 --- a/parser/include/parser/subtree.h +++ b/parser/include/parser/subtree.h @@ -1,11 +1,11 @@ #ifndef SUBTREE_H #define SUBTREE_H -#include "me/types.h" #include "parser/api.h" #include "parser/array.h" #include "parser/length.h" #include "parser/parser.h" +#include "me/types.h" #include #include #include @@ -36,6 +36,37 @@ struct ExternalScannerState // separately on the heap. typedef struct ExternalScannerState ExternalScannerState; +// A compact representation of a subtree. +// +// This representation is used for small leaf nodes that are not +// errors, and were not created by an external scanner. +// +// The idea behind the layout of this struct is that the `is_inline` +// bit will fall exactly into the same location as the least significant +// bit of the pointer in `Subtree` or `MutableSubtree`, respectively. +// Because of alignment, for any valid pointer this will be 0, giving +// us the opportunity to make use of this bit to signify whether to use +// the pointer or the inline struct. +typedef struct SubtreeInlineData SubtreeInlineData; + +struct SubtreeInlineData +{ + bool is_inline : 1; + bool visible : 1; + bool named : 1; + bool extra : 1; + bool has_changes : 1; + bool is_missing : 1; + bool is_keyword : 1; + t_u8 symbol; + t_u16 parse_state; + t_u8 padding_columns; + t_u8 padding_rows : 4; + t_u8 lookahead_bytes : 4; + t_u8 padding_bytes; + t_u8 size_bytes; +}; + // A heap-allocated representation of a subtree. // // This representation is used for parent nodes, external tokens, @@ -43,14 +74,14 @@ typedef struct ExternalScannerState ExternalScannerState; // the inline representation. typedef struct SubtreeHeapData { - t_u32 ref_count; - Length padding; - Length size; - t_u32 lookahead_bytes; - t_u32 error_cost; - t_u32 child_count; - TSSymbol symbol; - TSStateId parse_state; + volatile t_u32 ref_count; + Length padding; + Length size; + t_u32 lookahead_bytes; + t_u32 error_cost; + t_u32 child_count; + TSSymbol symbol; + TSStateId parse_state; bool visible : 1; bool named : 1; @@ -91,83 +122,94 @@ typedef struct SubtreeHeapData // The fundamental building block of a syntax tree. typedef union Subtree { + SubtreeInlineData data; const SubtreeHeapData *ptr; } Subtree; // Like Subtree, but mutable. typedef union MutableSubtree { - SubtreeHeapData *ptr; + SubtreeInlineData data; + SubtreeHeapData *ptr; } MutableSubtree; typedef Array(Subtree) SubtreeArray; typedef Array(MutableSubtree) MutableSubtreeArray; +typedef struct SubtreePool +{ + MutableSubtreeArray free_trees; + MutableSubtreeArray tree_stack; +} SubtreePool; + void ts_external_scanner_state_init(ExternalScannerState *, const t_u8 *, t_u32); const t_u8 *ts_external_scanner_state_data(const ExternalScannerState *); bool ts_external_scanner_state_eq(const ExternalScannerState *self, const t_u8 *, t_u32); void ts_external_scanner_state_delete(ExternalScannerState *self); void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); -void ts_subtree_array_clear(SubtreeArray *); -void ts_subtree_array_delete(SubtreeArray *); +void ts_subtree_array_clear(SubtreePool *, SubtreeArray *); +void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); void ts_subtree_array_reverse(SubtreeArray *); -Subtree ts_subtree_new_leaf(TSSymbol, Length, Length, t_u32, TSStateId, bool, bool, bool, const TSLanguage *); -Subtree ts_subtree_new_error(t_i32, Length, Length, t_u32, TSStateId, const TSLanguage *); -MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, t_u32, const TSLanguage *); -Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); -Subtree ts_subtree_new_missing_leaf(TSSymbol, Length, t_u32, const TSLanguage *); -MutableSubtree ts_subtree_make_mut(Subtree); -void ts_subtree_retain(Subtree); -void ts_subtree_release(Subtree); -int ts_subtree_compare(Subtree, Subtree); -void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); -void ts_subtree_summarize(MutableSubtree, const Subtree *, t_u32, const TSLanguage *); -void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); -void ts_subtree_balance(Subtree, const TSLanguage *); -Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit); -char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all); -void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); -Subtree ts_subtree_last_external_token(Subtree); +SubtreePool ts_subtree_pool_new(t_u32 capacity); +void ts_subtree_pool_delete(SubtreePool *); + +Subtree ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, t_u32, TSStateId, bool, bool, bool, const TSLanguage *); +Subtree ts_subtree_new_error(SubtreePool *, t_i32, Length, Length, t_u32, TSStateId, const TSLanguage *); +MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, t_u32, const TSLanguage *); +Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); +Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, t_u32, const TSLanguage *); +MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); +void ts_subtree_retain(Subtree); +void ts_subtree_release(SubtreePool *, Subtree); +int ts_subtree_compare(Subtree, Subtree, SubtreePool *); +void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); +void ts_subtree_summarize(MutableSubtree, const Subtree *, t_u32, const TSLanguage *); +void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); +void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); +Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); +char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all); +void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); +Subtree ts_subtree_last_external_token(Subtree); const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); static inline TSSymbol ts_subtree_symbol(Subtree self) { - return ((self).ptr->symbol); + return ((self).data.is_inline ? (self).data.symbol : (self).ptr->symbol); } static inline bool ts_subtree_visible(Subtree self) { - return ((self).ptr->visible); + return ((self).data.is_inline ? (self).data.visible : (self).ptr->visible); } static inline bool ts_subtree_named(Subtree self) { - return ((self).ptr->named); + return ((self).data.is_inline ? (self).data.named : (self).ptr->named); } static inline bool ts_subtree_extra(Subtree self) { - return ((self).ptr->extra); + return ((self).data.is_inline ? (self).data.extra : (self).ptr->extra); } static inline bool ts_subtree_has_changes(Subtree self) { - return ((self).ptr->has_changes); + return ((self).data.is_inline ? (self).data.has_changes : (self).ptr->has_changes); } static inline bool ts_subtree_missing(Subtree self) { - return ((self).ptr->is_missing); + return ((self).data.is_inline ? (self).data.is_missing : (self).ptr->is_missing); } static inline bool ts_subtree_is_keyword(Subtree self) { - return ((self).ptr->is_keyword); + return ((self).data.is_inline ? (self).data.is_keyword : (self).ptr->is_keyword); } static inline TSStateId ts_subtree_parse_state(Subtree self) { - return ((self).ptr->parse_state); + return ((self).data.is_inline ? (self).data.parse_state : (self).ptr->parse_state); } static inline t_u32 ts_subtree_lookahead_bytes(Subtree self) { - return ((self).ptr->lookahead_bytes); + return ((self).data.is_inline ? (self).data.lookahead_bytes : (self).ptr->lookahead_bytes); } // Get the size needed to store a heap-allocated subtree with the given @@ -179,15 +221,20 @@ static inline size_t ts_subtree_alloc_size(t_u32 child_count) // Get a subtree's children, which are allocated immediately before the // tree's own heap data. -#define ts_subtree_children(self) ((Subtree *)((self).ptr) - (self).ptr->child_count) +#define ts_subtree_children(self) ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { - self->ptr->extra = is_extra; + if (self->data.is_inline) + self->data.extra = is_extra; + else + self->ptr->extra = is_extra; } static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) { + if (self.data.is_inline) + return self.data.symbol; if (self.ptr->child_count == 0) return self.ptr->symbol; return self.ptr->first_leaf.symbol; @@ -195,6 +242,8 @@ static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) { + if (self.data.is_inline) + return self.data.parse_state; if (self.ptr->child_count == 0) return self.ptr->parse_state; return self.ptr->first_leaf.parse_state; @@ -202,12 +251,18 @@ static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) static inline Length ts_subtree_padding(Subtree self) { - return self.ptr->padding; + if (self.data.is_inline) + return ((Length){self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}); + else + return self.ptr->padding; } static inline Length ts_subtree_size(Subtree self) { - return self.ptr->size; + if (self.data.is_inline) + return ((Length){self.data.size_bytes, {0, self.data.size_bytes}}); + else + return self.ptr->size; } static inline Length ts_subtree_total_size(Subtree self) @@ -222,22 +277,22 @@ static inline t_u32 ts_subtree_total_bytes(Subtree self) static inline t_u32 ts_subtree_child_count(Subtree self) { - return (self.ptr->child_count); + return (self.data.is_inline ? 0 : self.ptr->child_count); } static inline t_u32 ts_subtree_repeat_depth(Subtree self) { - return (self.ptr->repeat_depth); + return (self.data.is_inline ? 0 : self.ptr->repeat_depth); } static inline t_u32 ts_subtree_is_repetition(Subtree self) { - return (!self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0); + return (self.data.is_inline ? 0 : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0); } static inline t_u32 ts_subtree_visible_descendant_count(Subtree self) { - return ((self.ptr->child_count == 0) ? 0 : self.ptr->visible_descendant_count); + return ((self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->visible_descendant_count); } static inline t_u32 ts_subtree_visible_child_count(Subtree self) @@ -253,12 +308,12 @@ static inline t_u32 ts_subtree_error_cost(Subtree self) if (ts_subtree_missing(self)) return (ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY); else - return (self.ptr->error_cost); + return (self.data.is_inline ? 0 : self.ptr->error_cost); } static inline t_i32 ts_subtree_dynamic_precedence(Subtree self) { - return ((self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence); + return ((self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence); } static inline t_u16 ts_subtree_production_id(Subtree self) @@ -271,32 +326,32 @@ static inline t_u16 ts_subtree_production_id(Subtree self) static inline bool ts_subtree_fragile_left(Subtree self) { - return (self.ptr->fragile_left); + return (self.data.is_inline ? false : self.ptr->fragile_left); } static inline bool ts_subtree_fragile_right(Subtree self) { - return (self.ptr->fragile_right); + return (self.data.is_inline ? false : self.ptr->fragile_right); } static inline bool ts_subtree_has_external_tokens(Subtree self) { - return (self.ptr->has_external_tokens); + return (self.data.is_inline ? false : self.ptr->has_external_tokens); } static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) { - return (self.ptr->has_external_scanner_state_change); + return (self.data.is_inline ? false : self.ptr->has_external_scanner_state_change); } static inline bool ts_subtree_depends_on_column(Subtree self) { - return (self.ptr->depends_on_column); + return (self.data.is_inline ? false : self.ptr->depends_on_column); } static inline bool ts_subtree_is_fragile(Subtree self) { - return ((self.ptr->fragile_left || self.ptr->fragile_right)); + return (self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right)); } static inline bool ts_subtree_is_error(Subtree self) @@ -313,7 +368,7 @@ static inline Subtree ts_subtree_from_mut(MutableSubtree self) { Subtree result; - result.ptr = self.ptr; + result.data = self.data; return (result); } @@ -321,7 +376,7 @@ static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) { MutableSubtree result; - result.ptr = (void *)self.ptr; + result.data = self.data; return (result); } diff --git a/parser/src/create_language.c b/parser/src/create_language.c index 5b0a4abb..58e510b3 100644 --- a/parser/src/create_language.c +++ b/parser/src/create_language.c @@ -54,7 +54,7 @@ static struct ExternalScannerDefinition init_scanner(void) }); } -/*R static R*/ void init_language(TSLanguage *language) +static void init_language(TSLanguage *language) { static uint32_t empty_map[] = {0, 0 ,0}; @@ -80,7 +80,7 @@ static struct ExternalScannerDefinition init_scanner(void) const TSLanguage *tree_sitter_sh(void) { - /*R static R*/ bool init = false; + static bool init = false; static TSLanguage language = { .version = LANGUAGE_VERSION, .symbol_count = SYMBOL_COUNT, diff --git a/parser/src/language.c b/parser/src/language.c index f243e0bd..2d6c65d3 100644 --- a/parser/src/language.c +++ b/parser/src/language.c @@ -1,7 +1,7 @@ #include "parser/language.h" -#include "me/types.h" #include "parser/api.h" #include "parser/parser.h" +#include "me/types.h" #include #include @@ -308,4 +308,4 @@ void ts_language_aliases_for_symbol(const TSLanguage *self, TSSymbol original_sy } idx += count; } -} +} \ No newline at end of file diff --git a/parser/src/lexer.c b/parser/src/lexer.c index 47b405ae..9f83f733 100644 --- a/parser/src/lexer.c +++ b/parser/src/lexer.c @@ -1,10 +1,12 @@ #include "parser/lexer.h" +#include "parser/length.h" +#include "parser/input.h" #include "me/mem/mem.h" #include "me/types.h" -#include "parser/input.h" -#include "parser/length.h" #include +#define LOG(...) + static const t_i32 BYTE_ORDER_MARK = 0xFEFF; static const TSRange DEFAULT_RANGE = {.start_point = @@ -23,7 +25,7 @@ static const TSRange DEFAULT_RANGE = {.start_point = // Check if the lexer has reached EOF. This state is stored // by setting the lexer's `current_included_range_index` such that // it has consumed all of its available ranges. -/*R static R*/ bool ts_lexer__eof(const TSLexer *_self) +static bool ts_lexer__eof(const TSLexer *_self) { Lexer *self = (Lexer *)_self; return self->current_included_range_index == self->included_range_count; @@ -31,7 +33,7 @@ static const TSRange DEFAULT_RANGE = {.start_point = // Clear the currently stored chunk of source code, because the lexer's // position has changed. -/*R static R*/ void ts_lexer__clear_chunk(Lexer *self) +static void ts_lexer__clear_chunk(Lexer *self) { self->chunk = NULL; self->chunk_size = 0; @@ -40,7 +42,7 @@ static const TSRange DEFAULT_RANGE = {.start_point = // Call the lexer's input callback to obtain a new chunk of source code // for the current position. -/*R static R*/ void ts_lexer__get_chunk(Lexer *self) +static void ts_lexer__get_chunk(Lexer *self) { self->chunk_start = self->current_position.bytes; self->chunk = self->input.read(self->input.payload, self->current_position.bytes, self->current_position.extent, &self->chunk_size); @@ -54,7 +56,7 @@ static const TSRange DEFAULT_RANGE = {.start_point = // Decode the next unicode character in the current chunk of source code. // This assumes that the lexer has already retrieved a chunk of source // code that spans the current position. -/*R static R*/ void ts_lexer__get_lookahead(Lexer *self) +static void ts_lexer__get_lookahead(Lexer *self) { t_u32 position_in_chunk = self->current_position.bytes - self->chunk_start; t_u32 size = self->chunk_size - position_in_chunk; @@ -87,7 +89,7 @@ static const TSRange DEFAULT_RANGE = {.start_point = } } -/*R static R*/ void ts_lexer_goto(Lexer *self, Length position) +static void ts_lexer_goto(Lexer *self, Length position) { self->current_position = position; @@ -143,7 +145,7 @@ static const TSRange DEFAULT_RANGE = {.start_point = } // Intended to be called only from functions that control logging. -/*R static R*/ void ts_lexer__do_advance(Lexer *self, bool skip) +static void ts_lexer__do_advance(Lexer *self, bool skip) { if (self->lookahead_size) { @@ -202,17 +204,27 @@ static const TSRange DEFAULT_RANGE = {.start_point = // Advance to the next character in the source code, retrieving a new // chunk of source code if needed. -/*R static R*/ void ts_lexer__advance(TSLexer *_self, bool skip) +static void ts_lexer__advance(TSLexer *_self, bool skip) { Lexer *self = (Lexer *)_self; if (!self->chunk) return; + + if (skip) + { + LOG("skip", self->data.lookahead) + } + else + { + LOG("consume", self->data.lookahead) + } + ts_lexer__do_advance(self, skip); } // Mark that a token match has completed. This can be called multiple // times if a longer match is found later. -/*R static R*/ void ts_lexer__mark_end(TSLexer *_self) +static void ts_lexer__mark_end(TSLexer *_self) { Lexer *self = (Lexer *)_self; if (!ts_lexer__eof(&self->data)) @@ -234,7 +246,7 @@ static const TSRange DEFAULT_RANGE = {.start_point = self->token_end_position = self->current_position; } -/*R static R*/ t_u32 ts_lexer__get_column(TSLexer *_self) +static t_u32 ts_lexer__get_column(TSLexer *_self) { Lexer *self = (Lexer *)_self; @@ -268,7 +280,7 @@ static const TSRange DEFAULT_RANGE = {.start_point = // Is the lexer at a boundary between two disjoint included ranges of // source code? This is exposed as an API because some languages' external // scanners need to perform custom actions at these boundaries. -/*R static R*/ bool ts_lexer__is_at_included_range_start(const TSLexer *_self) +static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { const Lexer *self = (const Lexer *)_self; if (self->current_included_range_index < self->included_range_count) @@ -426,3 +438,5 @@ TSRange *ts_lexer_included_ranges(const Lexer *self, t_u32 *count) *count = self->included_range_count; return self->included_ranges; } + +#undef LOG diff --git a/parser/src/node.c b/parser/src/node.c index 6b6d4208..f85bc3e1 100644 --- a/parser/src/node.c +++ b/parser/src/node.c @@ -1,9 +1,9 @@ -#include "me/types.h" #include "parser/api.h" #include "parser/language.h" -#include "parser/point.h" #include "parser/subtree.h" #include "parser/tree.h" +#include "parser/point.h" +#include "me/types.h" typedef struct NodeChildIterator { @@ -26,7 +26,7 @@ TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, }; } -/*R static inline R*/ TSNode ts_node__null(void) +static inline TSNode ts_node__null(void) { return ts_node_new(NULL, NULL, length_zero(), 0); } @@ -43,19 +43,19 @@ TSPoint ts_node_start_point(TSNode self) return (TSPoint){self.context[1], self.context[2]}; } -/*R static inline R*/ t_u32 ts_node__alias(const TSNode *self) +static inline t_u32 ts_node__alias(const TSNode *self) { return self->context[3]; } -/*R static inline R*/ Subtree ts_node__subtree(TSNode self) +static inline Subtree ts_node__subtree(TSNode self) { return *(const Subtree *)self.id; } // NodeChildIterator -/*R static inline R*/ NodeChildIterator ts_node_iterate_children(const TSNode *node) +static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { Subtree subtree = ts_node__subtree(*node); if (ts_subtree_child_count(subtree) == 0) @@ -73,19 +73,17 @@ TSPoint ts_node_start_point(TSNode self) }; } -/*R static inline R*/ bool ts_node_child_iterator_done(NodeChildIterator *self) +static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { return self->child_index == self->parent.ptr->child_count; } -/*R static inline R*/ bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) +static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) { if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; TSSymbol alias_symbol = 0; - if (child == NULL) - return (false); if (!ts_subtree_extra(*child)) { if (self->alias_sequence) @@ -106,7 +104,7 @@ TSPoint ts_node_start_point(TSNode self) // TSNode - private -/*R static inline R*/ bool ts_node__is_relevant(TSNode self, bool include_anonymous) +static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { Subtree tree = ts_node__subtree(self); if (include_anonymous) @@ -127,7 +125,7 @@ TSPoint ts_node_start_point(TSNode self) } } -/*R static inline R*/ t_u32 ts_node__relevant_child_count(TSNode self, bool include_anonymous) +static inline t_u32 ts_node__relevant_child_count(TSNode self, bool include_anonymous) { Subtree tree = ts_node__subtree(self); if (ts_subtree_child_count(tree) > 0) @@ -147,7 +145,7 @@ TSPoint ts_node_start_point(TSNode self) } } -/*R static inline R*/ TSNode ts_node__child(TSNode self, t_u32 child_index, bool include_anonymous) +static inline TSNode ts_node__child(TSNode self, t_u32 child_index, bool include_anonymous) { TSNode result = self; bool did_descend = true; @@ -188,7 +186,7 @@ TSPoint ts_node_start_point(TSNode self) return ts_node__null(); } -/*R static R*/ bool ts_subtree_has_trailing_empty_descendant(Subtree self, Subtree other) +static bool ts_subtree_has_trailing_empty_descendant(Subtree self, Subtree other) { for (t_u32 i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { @@ -203,7 +201,7 @@ TSPoint ts_node_start_point(TSNode self) return false; } -/*R static inline R*/ TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) +static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) { Subtree self_subtree = ts_node__subtree(self); bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; @@ -282,7 +280,7 @@ TSPoint ts_node_start_point(TSNode self) return ts_node__null(); } -/*R static inline R*/ TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) +static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) { t_u32 target_end_byte = ts_node_end_byte(self); @@ -353,7 +351,7 @@ TSPoint ts_node_start_point(TSNode self) return ts_node__null(); } -/*R static inline R*/ TSNode ts_node__first_child_for_byte(TSNode self, t_u32 goal, bool include_anonymous) +static inline TSNode ts_node__first_child_for_byte(TSNode self, t_u32 goal, bool include_anonymous) { TSNode node = self; bool did_descend = true; @@ -385,7 +383,7 @@ TSPoint ts_node_start_point(TSNode self) return ts_node__null(); } -/*R static inline R*/ TSNode ts_node__descendant_for_byte_range(TSNode self, t_u32 range_start, t_u32 range_end, bool include_anonymous) +static inline TSNode ts_node__descendant_for_byte_range(TSNode self, t_u32 range_start, t_u32 range_end, bool include_anonymous) { TSNode node = self; TSNode last_visible_node = self; @@ -426,7 +424,7 @@ TSPoint ts_node_start_point(TSNode self) return last_visible_node; } -/*R static inline R*/ TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint range_start, TSPoint range_end, bool include_anonymous) +static inline TSNode ts_node__descendant_for_point_range(TSNode self, TSPoint range_start, TSPoint range_end, bool include_anonymous) { TSNode node = self; TSNode last_visible_node = self; @@ -511,14 +509,12 @@ t_const_str ts_node_grammar_type(TSNode self) return ts_language_symbol_name(self.tree->language, symbol); } -/* char *ts_node_string(TSNode self) { TSSymbol alias_symbol = ts_node__alias(&self); return ts_subtree_string(ts_node__subtree(self), alias_symbol, ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, self.tree->language, false); } -*/ bool ts_node_eq(TSNode self, TSNode other) { @@ -717,7 +713,7 @@ recur: return ts_node__null(); } -/*R static inline R*/ t_const_str ts_node__field_name_from_language(TSNode self, t_u32 structural_child_index) +static inline t_const_str ts_node__field_name_from_language(TSNode self, t_u32 structural_child_index) { const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map(self.tree->language, ts_node__subtree(self).ptr->production_id, &field_map, &field_map_end); @@ -807,9 +803,84 @@ t_u32 ts_node_named_child_count(TSNode self) { Subtree tree = ts_node__subtree(self); if (ts_subtree_child_count(tree) > 0) + { return tree.ptr->named_child_count; + } else + { return 0; + } +} + +TSNode ts_node_next_sibling(TSNode self) +{ + return ts_node__next_sibling(self, true); +} + +TSNode ts_node_next_named_sibling(TSNode self) +{ + return ts_node__next_sibling(self, false); +} + +TSNode ts_node_prev_sibling(TSNode self) +{ + return ts_node__prev_sibling(self, true); +} + +TSNode ts_node_prev_named_sibling(TSNode self) +{ + return ts_node__prev_sibling(self, false); +} + +TSNode ts_node_first_child_for_byte(TSNode self, t_u32 byte) +{ + return ts_node__first_child_for_byte(self, byte, true); +} + +TSNode ts_node_first_named_child_for_byte(TSNode self, t_u32 byte) +{ + return ts_node__first_child_for_byte(self, byte, false); +} + +TSNode ts_node_descendant_for_byte_range(TSNode self, t_u32 start, t_u32 end) +{ + return ts_node__descendant_for_byte_range(self, start, end, true); +} + +TSNode ts_node_named_descendant_for_byte_range(TSNode self, t_u32 start, t_u32 end) +{ + return ts_node__descendant_for_byte_range(self, start, end, false); +} + +TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end) +{ + return ts_node__descendant_for_point_range(self, start, end, true); +} + +TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end) +{ + return ts_node__descendant_for_point_range(self, start, end, false); +} + +void ts_node_edit(TSNode *self, const TSInputEdit *edit) +{ + t_u32 start_byte = ts_node_start_byte(*self); + TSPoint start_point = ts_node_start_point(*self); + + if (start_byte >= edit->old_end_byte) + { + start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); + start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point)); + } + else if (start_byte > edit->start_byte) + { + start_byte = edit->new_end_byte; + start_point = edit->new_end_point; + } + + self->context[0] = start_byte; + self->context[1] = start_point.row; + self->context[2] = start_point.column; } TSSymbol ts_node_field_id_for_child(TSNode self, t_u32 child_index) diff --git a/parser/src/parser.c b/parser/src/parser.c index 7f35a520..1d3ecb1a 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -1,7 +1,5 @@ #define _POSIX_C_SOURCE 200112L -#include "me/mem/mem.h" -#include "me/types.h" #include "parser/api.h" #include "parser/array.h" #include "parser/language.h" @@ -11,6 +9,8 @@ #include "parser/stack.h" #include "parser/subtree.h" #include "parser/tree.h" +#include "me/mem/mem.h" +#include "me/types.h" #include #include @@ -40,6 +40,7 @@ struct TSParser { Lexer lexer; Stack *stack; + SubtreePool tree_pool; const TSLanguage *language; ReduceActionSet reduce_actions; Subtree finished_tree; @@ -81,7 +82,7 @@ typedef struct TSStringInput // StringInput -/*R static R*/ const t_u8 *ts_string_input_read(void *_self, t_u32 byte, TSPoint point, t_u32 *length) +static const t_u8 *ts_string_input_read(void *_self, t_u32 byte, TSPoint point, t_u32 *length) { (void)point; TSStringInput *self = (TSStringInput *)_self; @@ -98,8 +99,28 @@ typedef struct TSStringInput } // Parser - Private +/* +static void ts_parser__log(TSParser *self) +{ + if (self->lexer.logger.log) + { + self->lexer.logger.log(self->lexer.logger.payload, TSLogTypeParse, self->lexer.debug_buffer); + } -/*R static R*/ bool ts_parser__breakdown_top_of_stack(TSParser *self, StackVersion version) + if (self->dot_graph_file) + { + fprintf(self->dot_graph_file, "graph {\nlabel=\""); + for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) + { + if (*chr == '"' || *chr == '\\') + fputc('\\', self->dot_graph_file); + fputc(*chr, self->dot_graph_file); + } + fprintf(self->dot_graph_file, "\"\n}\n\n"); + } +} +*/ +static bool ts_parser__breakdown_top_of_stack(TSParser *self, StackVersion version) { bool did_break_down = false; bool pending = false; @@ -142,7 +163,7 @@ typedef struct TSStringInput ts_stack_push(self->stack, slice.version, tree, false, state); } - ts_subtree_release(parent); + ts_subtree_release(&self->tree_pool, parent); array_delete(&slice.subtrees); LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent)); @@ -153,7 +174,7 @@ typedef struct TSStringInput return did_break_down; } -/*R static R*/ ErrorComparison ts_parser__compare_versions(TSParser *self, ErrorStatus a, ErrorStatus b) +static ErrorComparison ts_parser__compare_versions(TSParser *self, ErrorStatus a, ErrorStatus b) { (void)self; if (!a.is_in_error && b.is_in_error) @@ -211,7 +232,7 @@ typedef struct TSStringInput return ErrorComparisonNone; } -/*R static R*/ ErrorStatus ts_parser__version_status(TSParser *self, StackVersion version) +static ErrorStatus ts_parser__version_status(TSParser *self, StackVersion version) { t_u32 cost = ts_stack_error_cost(self->stack, version); bool is_paused = ts_stack_is_paused(self->stack, version); @@ -223,7 +244,7 @@ typedef struct TSStringInput .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE}; } -/*R static R*/ bool ts_parser__better_version_exists(TSParser *self, StackVersion version, bool is_in_error, t_u32 cost) +static bool ts_parser__better_version_exists(TSParser *self, StackVersion version, bool is_in_error, t_u32 cost) { if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) { @@ -259,19 +280,19 @@ typedef struct TSStringInput return false; } -/*R static R*/ bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) +static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) { return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); } -/*R static R*/ bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) +static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) { (void)(lex_mode); return self->language->keyword_lex_fn(&self->lexer.data, 0); } -/*R static R*/ void ts_parser__external_scanner_create(TSParser *self) +static void ts_parser__external_scanner_create(TSParser *self) { if (self->language && self->language->external_scanner.states) { @@ -283,7 +304,7 @@ typedef struct TSStringInput } } -/*R static R*/ void ts_parser__external_scanner_destroy(TSParser *self) +static void ts_parser__external_scanner_destroy(TSParser *self) { if (self->language && self->external_scanner_payload && self->language->external_scanner.destroy) { @@ -292,7 +313,7 @@ typedef struct TSStringInput self->external_scanner_payload = NULL; } -/*R static R*/ t_u32 ts_parser__external_scanner_serialize(TSParser *self) +static t_u32 ts_parser__external_scanner_serialize(TSParser *self) { t_u32 length = self->language->external_scanner.serialize(self->external_scanner_payload, self->lexer.debug_buffer); @@ -300,7 +321,7 @@ typedef struct TSStringInput return length; } -/*R static R*/ void ts_parser__external_scanner_deserialize(TSParser *self, Subtree external_token) +static void ts_parser__external_scanner_deserialize(TSParser *self, Subtree external_token) { const t_u8 *data = NULL; t_u32 length = 0; @@ -313,13 +334,13 @@ typedef struct TSStringInput self->language->external_scanner.deserialize(self->external_scanner_payload, data, length); } -/*R static R*/ bool ts_parser__external_scanner_scan(TSParser *self, TSStateId external_lex_state) +static bool ts_parser__external_scanner_scan(TSParser *self, TSStateId external_lex_state) { const bool *valid_external_tokens = ts_language_enabled_external_tokens(self->language, external_lex_state); return self->language->external_scanner.scan(self->external_scanner_payload, &self->lexer.data, valid_external_tokens); } -/*R static R*/ bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, Subtree tree, TableEntry *table_entry) +static bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, Subtree tree, TableEntry *table_entry) { TSLexMode current_lex_mode = self->language->lex_modes[state]; TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree); @@ -347,7 +368,7 @@ typedef struct TSStringInput return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; } -/*R static R*/ Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId parse_state) +static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId parse_state) { TSLexMode lex_mode = self->language->lex_modes[parse_state]; if (lex_mode.lex_state == (t_u16)-1) @@ -466,7 +487,7 @@ typedef struct TSStringInput Length padding = length_sub(error_start_position, start_position); Length size = length_sub(error_end_position, error_start_position); t_u32 lookahead_bytes = lookahead_end_byte - error_end_position.bytes; - result = ts_subtree_new_error(first_error_character, padding, size, lookahead_bytes, parse_state, self->language); + result = ts_subtree_new_error(&self->tree_pool, first_error_character, padding, size, lookahead_bytes, parse_state, self->language); } else { @@ -495,8 +516,8 @@ typedef struct TSStringInput } } - result = ts_subtree_new_leaf(symbol, padding, size, lookahead_bytes, parse_state, found_external_token, called_get_column, - is_keyword, self->language); + result = ts_subtree_new_leaf(&self->tree_pool, symbol, padding, size, lookahead_bytes, parse_state, found_external_token, + called_get_column, is_keyword, self->language); if (found_external_token) { @@ -510,8 +531,8 @@ typedef struct TSStringInput return result; } -/*R static R*/ Subtree ts_parser__get_cached_token(TSParser *self, TSStateId state, size_t position, Subtree last_external_token, - TableEntry *table_entry) +static Subtree ts_parser__get_cached_token(TSParser *self, TSStateId state, size_t position, Subtree last_external_token, + TableEntry *table_entry) { TokenCache *cache = &self->token_cache; if (cache->token.ptr && cache->byte_index == position && @@ -527,7 +548,7 @@ typedef struct TSStringInput return NULL_SUBTREE; } -/*R static R*/ void ts_parser__set_cached_token(TSParser *self, t_u32 byte_index, Subtree last_external_token, Subtree token) +static void ts_parser__set_cached_token(TSParser *self, t_u32 byte_index, Subtree last_external_token, Subtree token) { TokenCache *cache = &self->token_cache; if (token.ptr) @@ -535,9 +556,9 @@ typedef struct TSStringInput if (last_external_token.ptr) ts_subtree_retain(last_external_token); if (cache->token.ptr) - ts_subtree_release(cache->token); + ts_subtree_release(&self->tree_pool, cache->token); if (cache->last_external_token.ptr) - ts_subtree_release(cache->last_external_token); + ts_subtree_release(&self->tree_pool, cache->last_external_token); cache->token = token; cache->byte_index = byte_index; cache->last_external_token = last_external_token; @@ -547,9 +568,8 @@ typedef struct TSStringInput // // The decision is based on the trees' error costs (if any), their dynamic precedence, // and finally, as a default, by a recursive comparison of the trees' symbols. -/*R static R*/ bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) +static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) { - (void)(self); if (!left.ptr) return true; if (!right.ptr) @@ -584,7 +604,7 @@ typedef struct TSStringInput if (ts_subtree_error_cost(left) > 0) return true; - int comparison = ts_subtree_compare(left, right); + int comparison = ts_subtree_compare(left, right, &self->tree_pool); switch (comparison) { case -1: @@ -602,7 +622,7 @@ typedef struct TSStringInput // Determine if a given tree's children should be replaced by an alternative // array of children. -/*R static R*/ bool ts_parser__select_children(TSParser *self, Subtree left, const SubtreeArray *children) +static bool ts_parser__select_children(TSParser *self, Subtree left, const SubtreeArray *children) { array_assign(&self->scratch_trees, children); @@ -615,13 +635,13 @@ typedef struct TSStringInput return ts_parser__select_tree(self, left, ts_subtree_from_mut(scratch_tree)); } -/*R static R*/ void ts_parser__shift(TSParser *self, StackVersion version, TSStateId state, Subtree lookahead, bool extra) +static void ts_parser__shift(TSParser *self, StackVersion version, TSStateId state, Subtree lookahead, bool extra) { bool is_leaf = ts_subtree_child_count(lookahead) == 0; Subtree subtree_to_push = lookahead; if (extra != ts_subtree_extra(lookahead) && is_leaf) { - MutableSubtree result = ts_subtree_make_mut(lookahead); + MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead); ts_subtree_set_extra(&result, extra); subtree_to_push = ts_subtree_from_mut(result); } @@ -633,8 +653,8 @@ typedef struct TSStringInput } } -/*R static R*/ StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, t_u32 count, int dynamic_precedence, - t_u16 production_id, bool is_fragile, bool end_of_non_terminal_extra) +static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, t_u32 count, int dynamic_precedence, + t_u16 production_id, bool is_fragile, bool end_of_non_terminal_extra) { t_u32 initial_version_count = ts_stack_version_count(self->stack); @@ -657,14 +677,14 @@ typedef struct TSStringInput if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) { ts_stack_remove_version(self->stack, slice_version); - ts_subtree_array_delete(&slice.subtrees); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); removed_version_count++; while (i + 1 < pop.size) { StackSlice next_slice = pop.contents[i + 1]; if (next_slice.version != slice.version) break; - ts_subtree_array_delete(&next_slice.subtrees); + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); i++; } continue; @@ -694,15 +714,15 @@ typedef struct TSStringInput if (ts_parser__select_children(self, ts_subtree_from_mut(parent), &next_slice_children)) { - ts_subtree_array_clear(&self->trailing_extras); - ts_subtree_release(ts_subtree_from_mut(parent)); + ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); + ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); array_swap(&self->trailing_extras, &self->trailing_extras2); parent = ts_subtree_new_node(symbol, &next_slice_children, production_id, self->language); } else { array_clear(&self->trailing_extras2); - ts_subtree_array_delete(&next_slice.subtrees); + ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); } } @@ -748,7 +768,7 @@ typedef struct TSStringInput return ts_stack_version_count(self->stack) > initial_version_count ? initial_version_count : STACK_VERSION_NONE; } -/*R static R*/ void ts_parser__accept(TSParser *self, StackVersion version, Subtree lookahead) +static void ts_parser__accept(TSParser *self, StackVersion version, Subtree lookahead) { assert(ts_subtree_is_eof(lookahead)); ts_stack_push(self->stack, version, lookahead, false, 1); @@ -764,6 +784,7 @@ typedef struct TSStringInput Subtree tree = trees.contents[j]; if (!ts_subtree_extra(tree)) { + assert(!tree.data.is_inline); t_u32 child_count = ts_subtree_child_count(tree); const Subtree *children = ts_subtree_children(tree); for (t_u32 k = 0; k < child_count; k++) @@ -772,7 +793,7 @@ typedef struct TSStringInput } array_splice(&trees, j, 1, child_count, children); root = ts_subtree_from_mut(ts_subtree_new_node(ts_subtree_symbol(tree), &trees, tree.ptr->production_id, self->language)); - ts_subtree_release(tree); + ts_subtree_release(&self->tree_pool, tree); break; } } @@ -784,12 +805,12 @@ typedef struct TSStringInput { if (ts_parser__select_tree(self, self->finished_tree, root)) { - ts_subtree_release(self->finished_tree); + ts_subtree_release(&self->tree_pool, self->finished_tree); self->finished_tree = root; } else { - ts_subtree_release(root); + ts_subtree_release(&self->tree_pool, root); } } else @@ -802,7 +823,7 @@ typedef struct TSStringInput ts_stack_halt(self->stack, version); } -/*R static R*/ bool ts_parser__do_all_potential_reductions(TSParser *self, StackVersion starting_version, TSSymbol lookahead_symbol) +static bool ts_parser__do_all_potential_reductions(TSParser *self, StackVersion starting_version, TSSymbol lookahead_symbol) { t_u32 initial_version_count = ts_stack_version_count(self->stack); @@ -907,7 +928,7 @@ typedef struct TSStringInput return can_shift_lookahead_symbol; } -/*R static R*/ bool ts_parser__recover_to_state(TSParser *self, StackVersion version, t_u32 depth, TSStateId goal_state) +static bool ts_parser__recover_to_state(TSParser *self, StackVersion version, t_u32 depth, TSStateId goal_state) { StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); StackVersion previous_version = STACK_VERSION_NONE; @@ -918,7 +939,7 @@ typedef struct TSStringInput if (slice.version == previous_version) { - ts_subtree_array_delete(&slice.subtrees); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); array_erase(&pop, i--); continue; } @@ -926,7 +947,7 @@ typedef struct TSStringInput if (ts_stack_state(self->stack, slice.version) != goal_state) { ts_stack_halt(self->stack, slice.version); - ts_subtree_array_delete(&slice.subtrees); + ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); array_erase(&pop, i--); continue; } @@ -945,7 +966,7 @@ typedef struct TSStringInput ts_subtree_retain(slice.subtrees.contents[j]); } } - ts_subtree_array_delete(&error_trees); + ts_subtree_array_delete(&self->tree_pool, &error_trees); } ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); @@ -972,7 +993,7 @@ typedef struct TSStringInput return previous_version != STACK_VERSION_NONE; } -/*R static R*/ void ts_parser__recover(TSParser *self, StackVersion version, Subtree lookahead) +static void ts_parser__recover(TSParser *self, StackVersion version, Subtree lookahead) { bool did_recover = false; t_u32 previous_version_count = ts_stack_version_count(self->stack); @@ -1060,14 +1081,14 @@ typedef struct TSStringInput if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { ts_stack_halt(self->stack, version); - ts_subtree_release(lookahead); + ts_subtree_release(&self->tree_pool, lookahead); return; } if (did_recover && ts_subtree_has_external_scanner_state_change(lookahead)) { ts_stack_halt(self->stack, version); - ts_subtree_release(lookahead); + ts_subtree_release(&self->tree_pool, lookahead); return; } @@ -1089,7 +1110,7 @@ typedef struct TSStringInput if (ts_parser__better_version_exists(self, version, false, new_cost)) { ts_stack_halt(self->stack, version); - ts_subtree_release(lookahead); + ts_subtree_release(&self->tree_pool, lookahead); return; } @@ -1099,7 +1120,7 @@ typedef struct TSStringInput const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { - MutableSubtree mutable_lookahead = ts_subtree_make_mut(lookahead); + MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); ts_subtree_set_extra(&mutable_lookahead, true); lookahead = ts_subtree_from_mut(mutable_lookahead); } @@ -1126,7 +1147,7 @@ typedef struct TSStringInput { for (t_u32 i = 1; i < pop.size; i++) { - ts_subtree_array_delete(&pop.contents[i].subtrees); + ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees); } while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) { @@ -1147,7 +1168,7 @@ typedef struct TSStringInput } } -/*R static R*/ void ts_parser__handle_error(TSParser *self, StackVersion version, Subtree lookahead) +static void ts_parser__handle_error(TSParser *self, StackVersion version, Subtree lookahead) { t_u32 previous_version_count = ts_stack_version_count(self->stack); @@ -1185,7 +1206,8 @@ typedef struct TSStringInput t_u32 lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead); StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); - Subtree missing_tree = ts_subtree_new_missing_leaf(missing_symbol, padding, lookahead_bytes, self->language); + Subtree missing_tree = + ts_subtree_new_missing_leaf(&self->tree_pool, missing_symbol, padding, lookahead_bytes, self->language); ts_stack_push(self->stack, version_with_missing_tree, missing_tree, false, state_after_missing_symbol); if (ts_parser__do_all_potential_reductions(self, version_with_missing_tree, ts_subtree_leaf_symbol(lookahead))) @@ -1222,7 +1244,7 @@ typedef struct TSStringInput LOG_STACK(); } -/*R static R*/ bool ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) +static bool ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) { (void)(allow_node_reuse); TSStateId state = ts_stack_state(self->stack, version); @@ -1275,7 +1297,7 @@ typedef struct TSStringInput { if (lookahead.ptr) { - ts_subtree_release(lookahead); + ts_subtree_release(&self->tree_pool, lookahead); } return false; } @@ -1391,7 +1413,7 @@ typedef struct TSStringInput { LOG("switch from_keyword:%s, to_word_token:%s", TREE_NAME(lookahead), SYM_NAME(self->language->keyword_capture_token)); - MutableSubtree mutable_lookahead = ts_subtree_make_mut(lookahead); + MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language); lookahead = ts_subtree_from_mut(mutable_lookahead); continue; @@ -1415,7 +1437,7 @@ typedef struct TSStringInput if (ts_parser__breakdown_top_of_stack(self, version)) { state = ts_stack_state(self->stack, version); - ts_subtree_release(lookahead); + ts_subtree_release(&self->tree_pool, lookahead); needs_lex = true; continue; } @@ -1431,7 +1453,7 @@ typedef struct TSStringInput } } -/*R static R*/ t_u32 ts_parser__condense_stack(TSParser *self) +static t_u32 ts_parser__condense_stack(TSParser *self) { bool made_changes = false; t_u32 min_error_cost = UINT_MAX; @@ -1551,7 +1573,7 @@ typedef struct TSStringInput return min_error_cost; } -/*R static R*/ bool ts_parser_has_outstanding_parse(TSParser *self) +static bool ts_parser_has_outstanding_parse(TSParser *self) { return (self->external_scanner_payload || ts_stack_state(self->stack, 0) != 1 || ts_stack_node_count_since_error(self->stack, 0) != 0); } @@ -1564,7 +1586,8 @@ TSParser *ts_parser_new(void) ts_lexer_init(&self->lexer); array_init(&self->reduce_actions); array_reserve(&self->reduce_actions, 4); - self->stack = ts_stack_new(); + self->tree_pool = ts_subtree_pool_new(32); + self->stack = ts_stack_new(&self->tree_pool); self->finished_tree = NULL_SUBTREE; self->cancellation_flag = NULL; self->language = NULL; @@ -1590,11 +1613,12 @@ void ts_parser_delete(TSParser *self) } if (self->old_tree.ptr) { - ts_subtree_release(self->old_tree); + ts_subtree_release(&self->tree_pool, self->old_tree); self->old_tree = NULL_SUBTREE; } ts_lexer_delete(&self->lexer); ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); + ts_subtree_pool_delete(&self->tree_pool); array_delete(&self->trailing_extras); array_delete(&self->trailing_extras2); array_delete(&self->scratch_trees); @@ -1627,7 +1651,7 @@ void ts_parser_reset(TSParser *self) ts_parser__external_scanner_destroy(self); if (self->old_tree.ptr) { - ts_subtree_release(self->old_tree); + ts_subtree_release(&self->tree_pool, self->old_tree); self->old_tree = NULL_SUBTREE; } @@ -1636,7 +1660,7 @@ void ts_parser_reset(TSParser *self) ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); if (self->finished_tree.ptr) { - ts_subtree_release(self->finished_tree); + ts_subtree_release(&self->tree_pool, self->finished_tree); self->finished_tree = NULL_SUBTREE; } self->accept_count = 0; @@ -1715,7 +1739,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) } while (version_count != 0); assert(self->finished_tree.ptr); - ts_subtree_balance(self->finished_tree, self->language); + ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); LOG("done"); LOG_TREE(self->finished_tree); diff --git a/parser/src/scanner.c b/parser/src/scanner.c index bdd0b6bd..024a0418 100644 --- a/parser/src/scanner.c +++ b/parser/src/scanner.c @@ -57,22 +57,22 @@ typedef struct Scanner Array(Heredoc) heredocs; } Scanner; -/*R static inline R*/ void advance(TSLexer *lexer) +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } -/*R static inline R*/ void skip(TSLexer *lexer) +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } -/*R static inline R*/ bool in_error_recovery(const bool *valid_symbols) +static inline bool in_error_recovery(const bool *valid_symbols) { return valid_symbols[ERROR_RECOVERY]; } -/*R static inline R*/ void reset_string(String *string) +static inline void reset_string(String *string) { if (string->size > 0) { @@ -81,7 +81,7 @@ typedef struct Scanner } } -/*R static inline R*/ void reset_heredoc(Heredoc *heredoc) +static inline void reset_heredoc(Heredoc *heredoc) { heredoc->is_raw = false; heredoc->started = false; @@ -89,7 +89,7 @@ typedef struct Scanner reset_string(&heredoc->delimiter); } -/*R static inline R*/ void reset(Scanner *scanner) +static inline void reset(Scanner *scanner) { for (t_u32 i = 0; i < scanner->heredocs.size; i++) { @@ -129,7 +129,7 @@ static t_u32 serialize(Scanner *scanner, t_u8 *buffer) return size; } -/*R static R*/ void deserialize(Scanner *scanner, const t_u8 *buffer, t_u32 length) +static void deserialize(Scanner *scanner, const t_u8 *buffer, t_u32 length) { if (length == 0) { @@ -181,7 +181,7 @@ static t_u32 serialize(Scanner *scanner, t_u8 *buffer) * POSIX-mandated substitution, and assumes the default value for * IFS. */ -/*R static R*/ bool advance_word(TSLexer *lexer, String *unquoted_word) +static bool advance_word(TSLexer *lexer, String *unquoted_word) { bool empty = true; t_i32 quote = 0; @@ -213,7 +213,7 @@ static t_u32 serialize(Scanner *scanner, t_u8 *buffer) return !empty; } -/*R static inline R*/ bool scan_bare_dollar(TSLexer *lexer) +static inline bool scan_bare_dollar(TSLexer *lexer) { while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer)) skip(lexer); @@ -229,7 +229,7 @@ static t_u32 serialize(Scanner *scanner, t_u8 *buffer) return false; } -/*R static R*/ bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer) +static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer) { while (iswspace(lexer->lookahead)) { @@ -248,7 +248,7 @@ static t_u32 serialize(Scanner *scanner, t_u8 *buffer) return found_delimiter; } -/*R static R*/ bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer) +static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer) { reset_string(&heredoc->current_leading_word); // Scan the first 'n' characters on this line, to see if they match the @@ -268,7 +268,7 @@ static t_u32 serialize(Scanner *scanner, t_u8 *buffer) return heredoc->delimiter.size == 0 ? false : strcmp(heredoc->current_leading_word.contents, heredoc->delimiter.contents) == 0; } -/*R static R*/ bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, enum TokenType middle_type, enum TokenType end_type) +static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, enum TokenType middle_type, enum TokenType end_type) { bool did_advance = false; Heredoc *heredoc = array_back(&scanner->heredocs); @@ -394,7 +394,7 @@ static t_u32 serialize(Scanner *scanner, t_u8 *buffer) } } -/*R static R*/ bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) +static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) { diff --git a/parser/src/stack.c b/parser/src/stack.c index 6046f449..b64fd055 100644 --- a/parser/src/stack.c +++ b/parser/src/stack.c @@ -1,11 +1,11 @@ #include "parser/stack.h" -#include "me/mem/mem.h" -#include "me/types.h" #include "parser/array.h" #include "parser/language.h" #include "parser/length.h" #include "parser/subtree.h" +#include "me/mem/mem.h" +#include "me/types.h" #include #include @@ -24,14 +24,14 @@ typedef struct StackLink struct StackNode { - TSStateId state; - Length position; - StackLink links[MAX_LINK_COUNT]; - t_u16 link_count; - t_u32 ref_count; - t_u32 error_cost; - t_u32 node_count; - int dynamic_precedence; + TSStateId state; + Length position; + StackLink links[MAX_LINK_COUNT]; + t_u16 link_count; + t_u32 ref_count; + t_u32 error_cost; + t_u32 node_count; + int dynamic_precedence; }; typedef struct StackIterator @@ -55,7 +55,7 @@ typedef struct StackHead { StackNode *node; StackSummary *summary; - t_u32 node_count_at_last_error; + t_u32 node_count_at_last_error; Subtree last_external_token; Subtree lookahead_when_paused; StackStatus status; @@ -66,7 +66,9 @@ struct Stack Array(StackHead) heads; StackSliceArray slices; Array(StackIterator) iterators; - StackNode *base_node; + StackNodeArray node_pool; + StackNode *base_node; + SubtreePool *subtree_pool; }; typedef t_u32 StackAction; @@ -79,7 +81,7 @@ enum StackAction typedef StackAction (*StackCallback)(void *, const StackIterator *); -/*R static R*/ void stack_node_retain(StackNode *self) +static void stack_node_retain(StackNode *self) { if (!self) return; @@ -88,7 +90,7 @@ typedef StackAction (*StackCallback)(void *, const StackIterator *); assert(self->ref_count != 0); } -/*R static R*/ void stack_node_release(StackNode *self) +static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) { recur: assert(self->ref_count != 0); @@ -103,15 +105,24 @@ recur: { StackLink link = self->links[i]; if (link.subtree.ptr) - ts_subtree_release(link.subtree); - stack_node_release(link.node); + ts_subtree_release(subtree_pool, link.subtree); + stack_node_release(link.node, pool, subtree_pool); } StackLink link = self->links[0]; if (link.subtree.ptr) - ts_subtree_release(link.subtree); + ts_subtree_release(subtree_pool, link.subtree); first_predecessor = self->links[0].node; } - mem_free(self); + + if (pool->size < MAX_NODE_POOL_SIZE) + { + array_push(pool, self); + } + else + { + mem_free(self); + } + if (first_predecessor) { self = first_predecessor; @@ -121,7 +132,7 @@ recur: /// Get the number of nodes in the subtree, for the purpose of measuring /// how much progress has been made by a given version of the stack. -/*R static R*/ t_u32 stack__subtree_node_count(Subtree subtree) +static t_u32 stack__subtree_node_count(Subtree subtree) { t_u32 count = ts_subtree_visible_descendant_count(subtree); if (ts_subtree_visible(subtree)) @@ -136,9 +147,9 @@ recur: return count; } -/*R static R*/ StackNode *stack_node_new(StackNode *previous_node, Subtree subtree, bool is_pending, TSStateId state) +static StackNode *stack_node_new(StackNode *previous_node, Subtree subtree, bool is_pending, TSStateId state, StackNodeArray *pool) { - StackNode *node = mem_alloc(sizeof(*node)); + StackNode *node = pool->size > 0 ? array_pop(pool) : mem_alloc(sizeof(StackNode)); *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; if (previous_node) @@ -172,7 +183,7 @@ recur: return node; } -/*R static R*/ bool stack__subtree_is_equivalent(Subtree left, Subtree right) +static bool stack__subtree_is_equivalent(Subtree left, Subtree right) { if (left.ptr == right.ptr) return true; @@ -192,7 +203,7 @@ recur: ts_subtree_extra(left) == ts_subtree_extra(right) && ts_subtree_external_scanner_state_eq(left, right)); } -/*R static R*/ void stack_node_add_link(StackNode *self, StackLink link) +static void stack_node_add_link(StackNode *self, StackLink link, SubtreePool *subtree_pool) { if (link.node == self) return; @@ -211,7 +222,7 @@ recur: if (ts_subtree_dynamic_precedence(link.subtree) > ts_subtree_dynamic_precedence(existing_link->subtree)) { ts_subtree_retain(link.subtree); - ts_subtree_release(existing_link->subtree); + ts_subtree_release(subtree_pool, existing_link->subtree); existing_link->subtree = link.subtree; self->dynamic_precedence = link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree); } @@ -224,7 +235,7 @@ recur: { for (int j = 0; j < link.node->link_count; j++) { - stack_node_add_link(existing_link->node, link.node->links[j]); + stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); } t_i32 dynamic_precedence = link.node->dynamic_precedence; if (link.subtree.ptr) @@ -245,7 +256,7 @@ recur: stack_node_retain(link.node); t_u32 node_count = link.node->node_count; - int dynamic_precedence = link.node->dynamic_precedence; + int dynamic_precedence = link.node->dynamic_precedence; self->links[self->link_count++] = link; if (link.subtree.ptr) @@ -261,28 +272,28 @@ recur: self->dynamic_precedence = dynamic_precedence; } -/*R static R*/ void stack_head_delete(StackHead *self) +static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) { if (self->node) { if (self->last_external_token.ptr) { - ts_subtree_release(self->last_external_token); + ts_subtree_release(subtree_pool, self->last_external_token); } if (self->lookahead_when_paused.ptr) { - ts_subtree_release(self->lookahead_when_paused); + ts_subtree_release(subtree_pool, self->lookahead_when_paused); } if (self->summary) { array_delete(self->summary); mem_free(self->summary); } - stack_node_release(self->node); + stack_node_release(self->node, pool, subtree_pool); } } -/*R static R*/ StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, StackNode *node) +static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, StackNode *node) { StackHead head = { .node = node, @@ -298,7 +309,7 @@ recur: return (StackVersion)(self->heads.size - 1); } -/*R static R*/ void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node, SubtreeArray *subtrees) +static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node, SubtreeArray *subtrees) { for (t_u32 i = self->slices.size - 1; i + 1 > 0; i--) { @@ -316,7 +327,7 @@ recur: array_push(&self->slices, slice); } -/*R static R*/ StackSliceArray stack__iter(Stack *self, StackVersion version, StackCallback callback, void *payload, int goal_subtree_count) +static StackSliceArray stack__iter(Stack *self, StackVersion version, StackCallback callback, void *payload, int goal_subtree_count) { array_clear(&self->slices); array_clear(&self->iterators); @@ -364,7 +375,7 @@ recur: { if (!should_pop) { - ts_subtree_array_delete(&iterator->subtrees); + ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); } array_erase(&self->iterators, i); i--, size--; @@ -421,18 +432,21 @@ recur: return self->slices; } -Stack *ts_stack_new(void) +Stack *ts_stack_new(SubtreePool *subtree_pool) { - Stack *self = mem_alloc(sizeof(*self)); + Stack *self = mem_alloc_array(1, sizeof(Stack)); array_init(&self->heads); array_init(&self->slices); array_init(&self->iterators); + array_init(&self->node_pool); array_reserve(&self->heads, 4); array_reserve(&self->slices, 4); array_reserve(&self->iterators, 4); + array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); - self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1); + self->subtree_pool = subtree_pool; + self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); ts_stack_clear(self); return self; @@ -444,12 +458,18 @@ void ts_stack_delete(Stack *self) array_delete(&self->slices); if (self->iterators.contents) array_delete(&self->iterators); - stack_node_release(self->base_node); + stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); for (t_u32 i = 0; i < self->heads.size; i++) { - stack_head_delete(&self->heads.contents[i]); + stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); } array_clear(&self->heads); + if (self->node_pool.contents) + { + for (t_u32 i = 0; i < self->node_pool.size; i++) + mem_free(self->node_pool.contents[i]); + array_delete(&self->node_pool); + } array_delete(&self->heads); mem_free(self); } @@ -480,14 +500,14 @@ void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree if (token.ptr) ts_subtree_retain(token); if (head->last_external_token.ptr) - ts_subtree_release(head->last_external_token); + ts_subtree_release(self->subtree_pool, head->last_external_token); head->last_external_token = token; } t_u32 ts_stack_error_cost(const Stack *self, StackVersion version) { StackHead *head = array_get(&self->heads, version); - t_u32 result = head->node->error_cost; + t_u32 result = head->node->error_cost; if (head->status == StackStatusPaused || (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) { result += ERROR_COST_PER_RECOVERY; @@ -508,7 +528,7 @@ t_u32 ts_stack_node_count_since_error(const Stack *self, StackVersion version) void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state) { StackHead *head = array_get(&self->heads, version); - StackNode *new_node = stack_node_new(head->node, subtree, pending, state); + StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count; head->node = new_node; @@ -619,14 +639,14 @@ StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) typedef struct SummarizeStackSession { StackSummary *summary; - t_u32 max_depth; + t_u32 max_depth; } SummarizeStackSession; StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { SummarizeStackSession *session = payload; TSStateId state = iterator->node->state; - t_u32 depth = iterator->subtree_count; + t_u32 depth = iterator->subtree_count; if (depth > session->max_depth) return StackActionStop; for (t_u32 i = session->summary->size - 1; i + 1 > 0; i--) @@ -700,7 +720,7 @@ bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) void ts_stack_remove_version(Stack *self, StackVersion version) { - stack_head_delete(array_get(&self->heads, version)); + stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); array_erase(&self->heads, version); } @@ -717,7 +737,7 @@ void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) source_head->summary = target_head->summary; target_head->summary = NULL; } - stack_head_delete(target_head); + stack_head_delete(target_head, &self->node_pool, self->subtree_pool); *target_head = *source_head; array_erase(&self->heads, v1); } @@ -749,7 +769,7 @@ bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) StackHead *head2 = &self->heads.contents[version2]; for (t_u32 i = 0; i < head2->node->link_count; i++) { - stack_node_add_link(head1->node, head2->node->links[i]); + stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); } if (head1->node->state == ERROR_STATE) { @@ -810,7 +830,9 @@ void ts_stack_clear(Stack *self) { stack_node_retain(self->base_node); for (t_u32 i = 0; i < self->heads.size; i++) - stack_head_delete(&self->heads.contents[i]); + { + stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); + } array_clear(&self->heads); array_push(&self->heads, ((StackHead){ .node = self->base_node, @@ -819,3 +841,5 @@ void ts_stack_clear(Stack *self) .lookahead_when_paused = NULL_SUBTREE, })); } + +#undef forceinline diff --git a/parser/src/subtree.c b/parser/src/subtree.c index 09d793d6..afa2a10d 100644 --- a/parser/src/subtree.c +++ b/parser/src/subtree.c @@ -1,51 +1,59 @@ #include #include -#include #include #include +#include -#include "me/mem/mem.h" #include "me/types.h" #include "parser/array.h" #include "parser/language.h" #include "parser/length.h" #include "parser/subtree.h" +#include "me/mem/mem.h" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#pragma GCC diagnostic ignored "-Wunknown-pragmas" -#pragma clang diagnostic ignored "-Wunused-parameter" - -typedef struct s_edit Edit; -struct s_edit +typedef struct { Length start; Length old_end; Length new_end; -}; +} Edit; #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX -#define TS_MAX_TREE_POOL_SIZE 0 +#define TS_MAX_TREE_POOL_SIZE 32 // ExternalScannerState void ts_external_scanner_state_init(ExternalScannerState *self, const t_u8 *data, t_u32 length) { self->length = length; - self->long_data = mem_alloc(length); - memcpy(self->long_data, data, length); + if (length > sizeof(self->short_data)) + { + self->long_data = mem_alloc(length); + memcpy(self->long_data, data, length); + } + else + { + memcpy(self->short_data, data, length); + } } ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) { ExternalScannerState result = *self; - result.long_data = mem_alloc(self->length); - memcpy(result.long_data, self->long_data, self->length); + if (self->length > sizeof(self->short_data)) + { + result.long_data = mem_alloc(self->length); + memcpy(result.long_data, self->long_data, self->length); + } return result; } void ts_external_scanner_state_delete(ExternalScannerState *self) { - mem_free(self->long_data); + if (self->length > sizeof(self->short_data)) + { + mem_free(self->long_data); + } } const t_u8 *ts_external_scanner_state_data(const ExternalScannerState *self) @@ -83,18 +91,18 @@ void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) } } -void ts_subtree_array_clear(SubtreeArray *self) +void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { for (t_u32 i = 0; i < self->size; i++) { - ts_subtree_release(self->contents[i]); + ts_subtree_release(pool, self->contents[i]); } array_clear(self); } -void ts_subtree_array_delete(SubtreeArray *self) +void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { - ts_subtree_array_clear(self); + ts_subtree_array_clear(pool, self); array_delete(self); } @@ -128,60 +136,139 @@ void ts_subtree_array_reverse(SubtreeArray *self) } } -/*R static R*/ SubtreeHeapData *ts_subtree_pool_allocate() +// SubtreePool + +SubtreePool ts_subtree_pool_new(t_u32 capacity) { - return mem_alloc(sizeof(SubtreeHeapData)); + SubtreePool self = {array_new(), array_new()}; + array_reserve(&self.free_trees, capacity); + return self; } -// /*R static R*/ void ts_subtree_pool_free(SubtreeHeapData *tree) -//{ -// mem_free(tree); -// } +void ts_subtree_pool_delete(SubtreePool *self) +{ + if (self->free_trees.contents) + { + for (t_u32 i = 0; i < self->free_trees.size; i++) + { + mem_free(self->free_trees.contents[i].ptr); + } + array_delete(&self->free_trees); + } + if (self->tree_stack.contents) + array_delete(&self->tree_stack); +} + +static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) +{ + if (self->free_trees.size > 0) + { + return array_pop(&self->free_trees).ptr; + } + else + { + return mem_alloc(sizeof(SubtreeHeapData)); + } +} + +static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) +{ + if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) + { + array_push(&self->free_trees, (MutableSubtree){.ptr = tree}); + } + else + { + mem_free(tree); + } +} // Subtree -Subtree ts_subtree_new_leaf(TSSymbol symbol, Length padding, Length size, t_u32 lookahead_bytes, TSStateId parse_state, - bool has_external_tokens, bool depends_on_column, bool is_keyword, const TSLanguage *language) +static inline bool ts_subtree_can_inline(Length padding, Length size, t_u32 lookahead_bytes) +{ + return padding.bytes < TS_MAX_INLINE_TREE_LENGTH && padding.extent.row < 16 && padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && + size.extent.row == 0 && size.extent.column < TS_MAX_INLINE_TREE_LENGTH && lookahead_bytes < 16; +} + +Subtree ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size, t_u32 lookahead_bytes, + TSStateId parse_state, bool has_external_tokens, bool depends_on_column, bool is_keyword, + const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool extra = symbol == ts_builtin_sym_end; - SubtreeHeapData *data = ts_subtree_pool_allocate(); - *data = (SubtreeHeapData){.ref_count = 1, - .padding = padding, - .size = size, - .lookahead_bytes = lookahead_bytes, - .error_cost = 0, - .child_count = 0, - .symbol = symbol, - .parse_state = parse_state, - .visible = metadata.visible, - .named = metadata.named, - .extra = extra, - .fragile_left = false, - .fragile_right = false, - .has_changes = false, - .has_external_tokens = has_external_tokens, - .has_external_scanner_state_change = false, - .depends_on_column = depends_on_column, - .is_missing = false, - .is_keyword = is_keyword, - {{.first_leaf = {.symbol = 0, .parse_state = 0}}}}; - return (Subtree){.ptr = data}; + bool is_inline = (symbol <= UINT8_MAX && !has_external_tokens && ts_subtree_can_inline(padding, size, lookahead_bytes)); + + if (is_inline) + { + return (Subtree){{ + .parse_state = parse_state, + .symbol = symbol, + .padding_bytes = padding.bytes, + .padding_rows = padding.extent.row, + .padding_columns = padding.extent.column, + .size_bytes = size.bytes, + .lookahead_bytes = lookahead_bytes, + .visible = metadata.visible, + .named = metadata.named, + .extra = extra, + .has_changes = false, + .is_missing = false, + .is_keyword = is_keyword, + .is_inline = true, + }}; + } + else + { + SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + *data = (SubtreeHeapData){.ref_count = 1, + .padding = padding, + .size = size, + .lookahead_bytes = lookahead_bytes, + .error_cost = 0, + .child_count = 0, + .symbol = symbol, + .parse_state = parse_state, + .visible = metadata.visible, + .named = metadata.named, + .extra = extra, + .fragile_left = false, + .fragile_right = false, + .has_changes = false, + .has_external_tokens = has_external_tokens, + .has_external_scanner_state_change = false, + .depends_on_column = depends_on_column, + .is_missing = false, + .is_keyword = is_keyword, + {{.first_leaf = {.symbol = 0, .parse_state = 0}}}}; + return (Subtree){.ptr = data}; + } } void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - self->ptr->symbol = symbol; - self->ptr->named = metadata.named; - self->ptr->visible = metadata.visible; + if (self->data.is_inline) + { + assert(symbol < UINT8_MAX); + self->data.symbol = symbol; + self->data.named = metadata.named; + self->data.visible = metadata.visible; + } + else + { + self->ptr->symbol = symbol; + self->ptr->named = metadata.named; + self->ptr->visible = metadata.visible; + } } -Subtree ts_subtree_new_error(t_i32 lookahead_char, Length padding, Length size, t_u32 bytes_scanned, TSStateId parse_state, - const TSLanguage *language) +Subtree ts_subtree_new_error(SubtreePool *pool, t_i32 lookahead_char, Length padding, Length size, t_u32 bytes_scanned, + TSStateId parse_state, const TSLanguage *language) { - Subtree result = ts_subtree_new_leaf(ts_builtin_sym_error, padding, size, bytes_scanned, parse_state, false, false, false, language); + Subtree result = + ts_subtree_new_leaf(pool, ts_builtin_sym_error, padding, size, bytes_scanned, parse_state, false, false, false, language); SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; data->fragile_left = true; data->fragile_right = true; @@ -217,17 +304,18 @@ MutableSubtree ts_subtree_clone(Subtree self) // This takes ownership of the subtree. If the subtree has only one owner, // this will directly convert it into a mutable version. Otherwise, it will // perform a copy. -MutableSubtree ts_subtree_make_mut(Subtree self) +MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { + if (self.data.is_inline) + return (MutableSubtree){self.data}; if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); MutableSubtree result = ts_subtree_clone(self); - ts_subtree_release(self); + ts_subtree_release(pool, self); return result; } -/* -static void ts_subtree__compress(MutableSubtree self, t_u32 count, const TSLanguage *language, MutableSubtreeArray *stack) +static void ts_subtree__compress(MutableSubtree self, t_u32 count, const TSLanguage *language, MutableSubtreeArray *stack) { t_u32 initial_stack_size = stack->size; @@ -239,11 +327,12 @@ static void ts_subtree__compress(MutableSubtree self, t_u32 count, const TSLang break; MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); - if (child.ptr->child_count < 2 || child.ptr->ref_count > 1 || child.ptr->symbol != symbol) + if (child.data.is_inline || child.ptr->child_count < 2 || child.ptr->ref_count > 1 || child.ptr->symbol != symbol) break; MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); - if (grandchild.ptr->child_count < 2 || grandchild.ptr->ref_count > 1 || grandchild.ptr->symbol != symbol) + if (grandchild.data.is_inline || grandchild.ptr->child_count < 2 || grandchild.ptr->ref_count > 1 || + grandchild.ptr->symbol != symbol) break; ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); @@ -263,12 +352,11 @@ static void ts_subtree__compress(MutableSubtree self, t_u32 count, const TSLang ts_subtree_summarize_children(tree, language); } } -*/ -void ts_subtree_balance(Subtree self, const TSLanguage *language) +void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) { - printf("BALANCING TREES\n"); - /* + array_clear(&pool->tree_stack); + if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) { array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); @@ -303,12 +391,13 @@ void ts_subtree_balance(Subtree self, const TSLanguage *language) } } } - */ } // Assign all of the node's properties that depend on its children. void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *language) { + assert(!self.data.is_inline); + self.ptr->named_child_count = 0; self.ptr->visible_child_count = 0; self.ptr->error_cost = 0; @@ -319,9 +408,9 @@ void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *langua self.ptr->has_external_scanner_state_change = false; self.ptr->dynamic_precedence = 0; - t_u32 structural_index = 0; + t_u32 structural_index = 0; const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); - t_u32 lookahead_end_byte = 0; + t_u32 lookahead_end_byte = 0; const Subtree *children = ts_subtree_children(self); for (t_u32 i = 0; i < self.ptr->child_count; i++) @@ -500,49 +589,38 @@ Subtree ts_subtree_new_error_node(SubtreeArray *children, bool extra, const TSLa // // This node is treated as 'extra'. Its children are prevented from having // having any effect on the parse state. -Subtree ts_subtree_new_missing_leaf(TSSymbol symbol, Length padding, t_u32 lookahead_bytes, const TSLanguage *language) +Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, t_u32 lookahead_bytes, + const TSLanguage *language) { - Subtree result = ts_subtree_new_leaf(symbol, padding, length_zero(), lookahead_bytes, 0, false, false, false, language); - ((SubtreeHeapData *)result.ptr)->is_missing = true; + Subtree result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), lookahead_bytes, 0, false, false, false, language); + if (result.data.is_inline) + { + result.data.is_missing = true; + } + else + { + ((SubtreeHeapData *)result.ptr)->is_missing = true; + } return result; } void ts_subtree_retain(Subtree self) { + if (self.data.is_inline) + return; assert(self.ptr->ref_count > 0); (*(t_u32 *)(&self.ptr->ref_count))++; assert(self.ptr->ref_count != 0); } -void ts_subtree_release(Subtree self) +void ts_subtree_release(SubtreePool *pool, Subtree self) { - t_usize i; - Subtree *children; - if (self.ptr->ref_count > 0 && --(*(t_u32 *)(&self.ptr->ref_count)) == 0) - { - - if (self.ptr->child_count > 0) - { - children = ts_subtree_children(self); - i = 0; - while (i < self.ptr->child_count) - ts_subtree_release(children[i++]); - mem_free(children); - } - else - { - - if (self.ptr->has_external_tokens) - ts_external_scanner_state_delete((void *)&self.ptr->external_scanner_state); - mem_free((void *)self.ptr); - } - } - - /* + if (self.data.is_inline) + return; array_clear(&pool->tree_stack); assert(self.ptr->ref_count > 0); - if () + if (--(*(t_u32 *)(&self.ptr->ref_count)) == 0) { array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); } @@ -556,6 +634,8 @@ void ts_subtree_release(Subtree self) for (t_u32 i = 0; i < tree.ptr->child_count; i++) { Subtree child = children[i]; + if (child.data.is_inline) + continue; assert(child.ptr->ref_count > 0); if (--(*(t_u32 *)(&child.ptr->ref_count)) == 0) { @@ -573,13 +653,10 @@ void ts_subtree_release(Subtree self) ts_subtree_pool_free(pool, tree.ptr); } } - */ } -int ts_subtree_compare(Subtree left, Subtree right) +int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) { - printf("SUBTREE COMPARE\n"); - /* array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left)); array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right)); @@ -610,14 +687,183 @@ int ts_subtree_compare(Subtree left, Subtree right) array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child)); array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child)); } - }*/ + } return 0; } -/*R static inline R*/ void ts_subtree_set_has_changes(MutableSubtree *self) +static inline void ts_subtree_set_has_changes(MutableSubtree *self) { - self->ptr->has_changes = true; + if (self->data.is_inline) + { + self->data.has_changes = true; + } + else + { + self->ptr->has_changes = true; + } +} + +Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) +{ + typedef struct + { + Subtree *tree; + Edit edit; + } EditEntry; + + Array(EditEntry) stack = array_new(); + array_push(&stack, ((EditEntry){ + .tree = &self, + .edit = + (Edit){ + .start = {input_edit->start_byte, input_edit->start_point}, + .old_end = {input_edit->old_end_byte, input_edit->old_end_point}, + .new_end = {input_edit->new_end_byte, input_edit->new_end_point}, + }, + })); + + while (stack.size) + { + EditEntry entry = array_pop(&stack); + Edit edit = entry.edit; + bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; + bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; + bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); + + Length size = ts_subtree_size(*entry.tree); + Length padding = ts_subtree_padding(*entry.tree); + Length total_size = length_add(padding, size); + t_u32 lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); + t_u32 end_byte = total_size.bytes + lookahead_bytes; + if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) + continue; + + // If the edit is entirely within the space before this subtree, then shift this + // subtree over according to the edit without changing its size. + if (edit.old_end.bytes <= padding.bytes) + { + padding = length_add(edit.new_end, length_sub(padding, edit.old_end)); + } + + // If the edit starts in the space before this subtree and extends into this subtree, + // shrink the subtree's content to compensate for the change in the space before it. + else if (edit.start.bytes < padding.bytes) + { + size = length_saturating_sub(size, length_sub(edit.old_end, padding)); + padding = edit.new_end; + } + + // If the edit is a pure insertion right at the start of the subtree, + // shift the subtree over according to the insertion. + else if (edit.start.bytes == padding.bytes && is_pure_insertion) + { + padding = edit.new_end; + } + + // If the edit is within this subtree, resize the subtree to reflect the edit. + else if (edit.start.bytes < total_size.bytes || (edit.start.bytes == total_size.bytes && is_pure_insertion)) + { + size = length_add(length_sub(edit.new_end, padding), length_saturating_sub(total_size, edit.old_end)); + } + + MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); + + if (result.data.is_inline) + { + if (ts_subtree_can_inline(padding, size, lookahead_bytes)) + { + result.data.padding_bytes = padding.bytes; + result.data.padding_rows = padding.extent.row; + result.data.padding_columns = padding.extent.column; + result.data.size_bytes = size.bytes; + } + else + { + SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + data->ref_count = 1; + data->padding = padding; + data->size = size; + data->lookahead_bytes = lookahead_bytes; + data->error_cost = 0; + data->child_count = 0; + data->symbol = result.data.symbol; + data->parse_state = result.data.parse_state; + data->visible = result.data.visible; + data->named = result.data.named; + data->extra = result.data.extra; + data->fragile_left = false; + data->fragile_right = false; + data->has_changes = false; + data->has_external_tokens = false; + data->depends_on_column = false; + data->is_missing = result.data.is_missing; + data->is_keyword = result.data.is_keyword; + result.ptr = data; + } + } + else + { + result.ptr->padding = padding; + result.ptr->size = size; + } + + ts_subtree_set_has_changes(&result); + *entry.tree = ts_subtree_from_mut(result); + + Length child_left, child_right = length_zero(); + for (t_u32 i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) + { + Subtree *child = &ts_subtree_children(*entry.tree)[i]; + Length child_size = ts_subtree_total_size(*child); + child_left = child_right; + child_right = length_add(child_left, child_size); + + // If this child ends before the edit, it is not affected. + if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) + continue; + + // Keep editing child nodes until a node is reached that starts after the edit. + // Also, if this node's validity depends on its column position, then continue + // invaliditing child nodes until reaching a line break. + if (((child_left.bytes > edit.old_end.bytes) || (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) && + (!invalidate_first_row || child_left.extent.row > entry.tree->ptr->padding.extent.row)) + { + break; + } + + // Transform edit into the child's coordinate space. + Edit child_edit = { + .start = length_saturating_sub(edit.start, child_left), + .old_end = length_saturating_sub(edit.old_end, child_left), + .new_end = length_saturating_sub(edit.new_end, child_left), + }; + + // Interpret all inserted text as applying to the *first* child that touches the edit. + // Subsequent children are only never have any text inserted into them; they are only + // shrunk to compensate for the edit. + if (child_right.bytes > edit.start.bytes || (child_right.bytes == edit.start.bytes && is_pure_insertion)) + { + edit.new_end = edit.start; + } + + // Children that occur before the edit are not reshaped by the edit. + else + { + child_edit.old_end = child_edit.start; + child_edit.new_end = child_edit.start; + } + + // Queue processing of this child's subtree. + array_push(&stack, ((EditEntry){ + .tree = child, + .edit = child_edit, + })); + } + } + + array_delete(&stack); + return self; } Subtree ts_subtree_last_external_token(Subtree tree) @@ -639,13 +885,222 @@ Subtree ts_subtree_last_external_token(Subtree tree) return tree; } +static size_t ts_subtree__write_char_to_string(char *str, size_t n, t_i32 chr) +{ + if (chr == -1) + return snprintf(str, n, "INVALID"); + else if (chr == '\0') + return snprintf(str, n, "'\\0'"); + else if (chr == '\n') + return snprintf(str, n, "'\\n'"); + else if (chr == '\t') + return snprintf(str, n, "'\\t'"); + else if (chr == '\r') + return snprintf(str, n, "'\\r'"); + else if (0 < chr && chr < 128 && isprint(chr)) + return snprintf(str, n, "'%c'", chr); + else + return snprintf(str, n, "%d", chr); +} + +static t_const_str const ROOT_FIELD = "__ROOT__"; + +static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t limit, const TSLanguage *language, bool include_all, + TSSymbol alias_symbol, bool alias_is_named, t_const_str field_name) +{ + if (!self.ptr) + return snprintf(string, limit, "(NULL)"); + + char *cursor = string; + char **writer = (limit > 1) ? &cursor : &string; + bool is_root = field_name == ROOT_FIELD; + bool is_visible = + include_all || ts_subtree_missing(self) || (alias_symbol ? alias_is_named : ts_subtree_visible(self) && ts_subtree_named(self)); + + if (is_visible) + { + if (!is_root) + { + cursor += snprintf(*writer, limit, " "); + if (field_name) + { + cursor += snprintf(*writer, limit, "%s: ", field_name); + } + } + + if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) + { + cursor += snprintf(*writer, limit, "(UNEXPECTED "); + cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); + } + else + { + TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); + t_const_str symbol_name = ts_language_symbol_name(language, symbol); + if (ts_subtree_missing(self)) + { + cursor += snprintf(*writer, limit, "(MISSING "); + if (alias_is_named || ts_subtree_named(self)) + { + cursor += snprintf(*writer, limit, "%s", symbol_name); + } + else + { + cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); + } + } + else + { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } + } + } + else if (is_root) + { + TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); + t_const_str symbol_name = ts_language_symbol_name(language, symbol); + if (ts_subtree_child_count(self) > 0) + { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } + else if (ts_subtree_named(self)) + { + cursor += snprintf(*writer, limit, "(%s)", symbol_name); + } + else + { + cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); + } + } + + if (ts_subtree_child_count(self)) + { + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(language, self.ptr->production_id, &field_map, &field_map_end); + + t_u32 structural_child_index = 0; + for (t_u32 i = 0; i < self.ptr->child_count; i++) + { + Subtree child = ts_subtree_children(self)[i]; + if (ts_subtree_extra(child)) + { + cursor += ts_subtree__write_to_string(child, *writer, limit, language, include_all, 0, false, NULL); + } + else + { + TSSymbol subtree_alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; + bool subtree_alias_is_named = + subtree_alias_symbol ? ts_language_symbol_metadata(language, subtree_alias_symbol).named : false; + + t_const_str child_field_name = is_visible ? NULL : field_name; + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) + { + if (!map->inherited && map->child_index == structural_child_index) + { + child_field_name = language->field_names[map->field_id]; + break; + } + } + + cursor += ts_subtree__write_to_string(child, *writer, limit, language, include_all, subtree_alias_symbol, + subtree_alias_is_named, child_field_name); + structural_child_index++; + } + } + } + + if (is_visible) + cursor += snprintf(*writer, limit, ")"); + + return cursor - string; +} + +char *ts_subtree_string(Subtree self, TSSymbol alias_symbol, bool alias_is_named, const TSLanguage *language, bool include_all) +{ + char scratch_string[1]; + size_t size = ts_subtree__write_to_string(self, scratch_string, 1, language, include_all, alias_symbol, alias_is_named, ROOT_FIELD) + 1; + char *result = mem_alloc(size * sizeof(char)); + ts_subtree__write_to_string(self, result, size, language, include_all, alias_symbol, alias_is_named, ROOT_FIELD); + return result; +} + + +/* +void ts_subtree__print_dot_graph(const Subtree *self, t_u32 start_offset, const TSLanguage *language, TSSymbol alias_symbol, FILE *f) +{ + TSSymbol subtree_symbol = ts_subtree_symbol(*self); + TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol; + t_u32 end_offset = start_offset + ts_subtree_total_bytes(*self); + fprintf(f, "tree_%p [label=\"", (void *)self); + ts_language_write_symbol_as_dot_string(language, f, symbol); + fprintf(f, "\""); + + if (ts_subtree_child_count(*self) == 0) + fprintf(f, ", shape=plaintext"); + if (ts_subtree_extra(*self)) + fprintf(f, ", fontcolor=gray"); + + fprintf(f, + ", tooltip=\"" + "range: %u - %u\n" + "state: %d\n" + "error-cost: %u\n" + "has-changes: %u\n" + "depends-on-column: %u\n" + "descendant-count: %u\n" + "repeat-depth: %u\n" + "lookahead-bytes: %u", + start_offset, end_offset, ts_subtree_parse_state(*self), ts_subtree_error_cost(*self), ts_subtree_has_changes(*self), + ts_subtree_depends_on_column(*self), ts_subtree_visible_descendant_count(*self), ts_subtree_repeat_depth(*self), + ts_subtree_lookahead_bytes(*self)); + + if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) + { + fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); + } + + fprintf(f, "\"]\n"); + + t_u32 child_start_offset = start_offset; + t_u32 child_info_offset = language->max_alias_sequence_length * ts_subtree_production_id(*self); + for (t_u32 i = 0, n = ts_subtree_child_count(*self); i < n; i++) + { + const Subtree *child = &ts_subtree_children(*self)[i]; + TSSymbol subtree_alias_symbol = 0; + if (!ts_subtree_extra(*child) && child_info_offset) + { + subtree_alias_symbol = language->alias_sequences[child_info_offset]; + child_info_offset++; + } + ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f); + fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); + child_start_offset += ts_subtree_total_bytes(*child); + } +} +*/ + +/* +void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) +{ + fprintf(f, "digraph tree {\n"); + fprintf(f, "edge [arrowhead=none]\n"); + ts_subtree__print_dot_graph(&self, 0, language, 0, f); + fprintf(f, "}\n"); +} +*/ + const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; - if (self.ptr && self.ptr->has_external_tokens && self.ptr->child_count == 0) + if (self.ptr && !self.data.is_inline && self.ptr->has_external_tokens && self.ptr->child_count == 0) + { return &self.ptr->external_scanner_state; + } else + { return &empty_state; + } } bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) diff --git a/parser/src/tree.c b/parser/src/tree.c index 3145a83f..da90fa43 100644 --- a/parser/src/tree.c +++ b/parser/src/tree.c @@ -30,6 +30,10 @@ void ts_tree_delete(TSTree *self) { if (!self) return; + + SubtreePool pool = ts_subtree_pool_new(0); + ts_subtree_release(&pool, self->root); + ts_subtree_pool_delete(&pool); ts_language_delete(self->language); mem_free(self->included_ranges); mem_free(self); @@ -50,3 +54,56 @@ const TSLanguage *ts_tree_language(const TSTree *self) { return self->language; } + +void ts_tree_edit(TSTree *self, const TSInputEdit *edit) +{ + for (t_u32 i = 0; i < self->included_range_count; i++) + { + TSRange *range = &self->included_ranges[i]; + if (range->end_byte >= edit->old_end_byte) + { + if (range->end_byte != UINT32_MAX) + { + range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); + range->end_point = point_add(edit->new_end_point, point_sub(range->end_point, edit->old_end_point)); + if (range->end_byte < edit->new_end_byte) + { + range->end_byte = UINT32_MAX; + range->end_point = POINT_MAX; + } + } + } + else if (range->end_byte > edit->start_byte) + { + range->end_byte = edit->start_byte; + range->end_point = edit->start_point; + } + if (range->start_byte >= edit->old_end_byte) + { + range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); + range->start_point = point_add(edit->new_end_point, point_sub(range->start_point, edit->old_end_point)); + if (range->start_byte < edit->new_end_byte) + { + range->start_byte = UINT32_MAX; + range->start_point = POINT_MAX; + } + } + else if (range->start_byte > edit->start_byte) + { + range->start_byte = edit->start_byte; + range->start_point = edit->start_point; + } + } + + SubtreePool pool = ts_subtree_pool_new(0); + self->root = ts_subtree_edit(self->root, edit, &pool); + ts_subtree_pool_delete(&pool); +} + +TSRange *ts_tree_included_ranges(const TSTree *self, t_u32 *length) +{ + *length = self->included_range_count; + TSRange *ranges = mem_alloc_array(self->included_range_count, sizeof(TSRange)); + memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange)); + return ranges; +}