From 881a43bd47a6eaed7467a45af14d56feb83d1425 Mon Sep 17 00:00:00 2001 From: Maix0 Date: Sun, 18 Aug 2024 22:00:21 +0200 Subject: [PATCH] Removed the inline representation of subtrees --- parser/include/parser/subtree.h | 75 ++++++++----------- parser/src/parser.c | 1 - parser/src/subtree.c | 126 +++++--------------------------- 3 files changed, 47 insertions(+), 155 deletions(-) diff --git a/parser/include/parser/subtree.h b/parser/include/parser/subtree.h index 1082518e..231af778 100644 --- a/parser/include/parser/subtree.h +++ b/parser/include/parser/subtree.h @@ -1,11 +1,11 @@ #ifndef SUBTREE_H #define SUBTREE_H +#include "me/types.h" #include "parser/api.h" #include "parser/array.h" #include "parser/length.h" #include "parser/parser.h" -#include "me/types.h" #include #include #include @@ -47,7 +47,7 @@ typedef struct ExternalScannerState ExternalScannerState; // Because of alignment, for any valid pointer this will be 0, giving // us the opportunity to make use of this bit to signify whether to use // the pointer or the inline struct. -typedef struct SubtreeInlineData SubtreeInlineData; +typedef struct SubtreeInlineData SubtreeInlineData_; struct SubtreeInlineData { @@ -122,14 +122,12 @@ typedef struct SubtreeHeapData // The fundamental building block of a syntax tree. typedef union Subtree { - SubtreeInlineData data; const SubtreeHeapData *ptr; } Subtree; // Like Subtree, but mutable. typedef union MutableSubtree { - SubtreeInlineData data; - SubtreeHeapData *ptr; + SubtreeHeapData *ptr; } MutableSubtree; typedef Array(Subtree) SubtreeArray; @@ -177,39 +175,39 @@ bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); static inline TSSymbol ts_subtree_symbol(Subtree self) { - return ((self).data.is_inline ? (self).data.symbol : (self).ptr->symbol); + return ((self).ptr->symbol); } static inline bool ts_subtree_visible(Subtree self) { - return ((self).data.is_inline ? (self).data.visible : (self).ptr->visible); + return ((self).ptr->visible); } static inline bool ts_subtree_named(Subtree self) { - return ((self).data.is_inline ? (self).data.named : (self).ptr->named); + return ((self).ptr->named); } static inline bool ts_subtree_extra(Subtree self) { - return ((self).data.is_inline ? (self).data.extra : (self).ptr->extra); + return ((self).ptr->extra); } static inline bool ts_subtree_has_changes(Subtree self) { - return ((self).data.is_inline ? (self).data.has_changes : (self).ptr->has_changes); + return ((self).ptr->has_changes); } static inline bool ts_subtree_missing(Subtree self) { - return ((self).data.is_inline ? (self).data.is_missing : (self).ptr->is_missing); + return ((self).ptr->is_missing); } static inline bool ts_subtree_is_keyword(Subtree self) { - return ((self).data.is_inline ? (self).data.is_keyword : (self).ptr->is_keyword); + return ((self).ptr->is_keyword); } static inline TSStateId ts_subtree_parse_state(Subtree self) { - return ((self).data.is_inline ? (self).data.parse_state : (self).ptr->parse_state); + return ((self).ptr->parse_state); } static inline t_u32 ts_subtree_lookahead_bytes(Subtree self) { - return ((self).data.is_inline ? (self).data.lookahead_bytes : (self).ptr->lookahead_bytes); + return ((self).ptr->lookahead_bytes); } // Get the size needed to store a heap-allocated subtree with the given @@ -221,20 +219,15 @@ static inline size_t ts_subtree_alloc_size(t_u32 child_count) // Get a subtree's children, which are allocated immediately before the // tree's own heap data. -#define ts_subtree_children(self) ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) +#define ts_subtree_children(self) ((Subtree *)((self).ptr) - (self).ptr->child_count) static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { - if (self->data.is_inline) - self->data.extra = is_extra; - else - self->ptr->extra = is_extra; + self->ptr->extra = is_extra; } static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) { - if (self.data.is_inline) - return self.data.symbol; if (self.ptr->child_count == 0) return self.ptr->symbol; return self.ptr->first_leaf.symbol; @@ -242,8 +235,6 @@ static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) { - if (self.data.is_inline) - return self.data.parse_state; if (self.ptr->child_count == 0) return self.ptr->parse_state; return self.ptr->first_leaf.parse_state; @@ -251,18 +242,12 @@ static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) static inline Length ts_subtree_padding(Subtree self) { - if (self.data.is_inline) - return ((Length){self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}); - else - return self.ptr->padding; + return self.ptr->padding; } static inline Length ts_subtree_size(Subtree self) { - if (self.data.is_inline) - return ((Length){self.data.size_bytes, {0, self.data.size_bytes}}); - else - return self.ptr->size; + return self.ptr->size; } static inline Length ts_subtree_total_size(Subtree self) @@ -277,22 +262,22 @@ static inline t_u32 ts_subtree_total_bytes(Subtree self) static inline t_u32 ts_subtree_child_count(Subtree self) { - return (self.data.is_inline ? 0 : self.ptr->child_count); + return (self.ptr->child_count); } static inline t_u32 ts_subtree_repeat_depth(Subtree self) { - return (self.data.is_inline ? 0 : self.ptr->repeat_depth); + return (self.ptr->repeat_depth); } static inline t_u32 ts_subtree_is_repetition(Subtree self) { - return (self.data.is_inline ? 0 : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0); + return (!self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0); } static inline t_u32 ts_subtree_visible_descendant_count(Subtree self) { - return ((self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->visible_descendant_count); + return ((self.ptr->child_count == 0) ? 0 : self.ptr->visible_descendant_count); } static inline t_u32 ts_subtree_visible_child_count(Subtree self) @@ -308,12 +293,12 @@ static inline t_u32 ts_subtree_error_cost(Subtree self) if (ts_subtree_missing(self)) return (ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY); else - return (self.data.is_inline ? 0 : self.ptr->error_cost); + return (self.ptr->error_cost); } static inline t_i32 ts_subtree_dynamic_precedence(Subtree self) { - return ((self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence); + return ((self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence); } static inline t_u16 ts_subtree_production_id(Subtree self) @@ -326,32 +311,32 @@ static inline t_u16 ts_subtree_production_id(Subtree self) static inline bool ts_subtree_fragile_left(Subtree self) { - return (self.data.is_inline ? false : self.ptr->fragile_left); + return (self.ptr->fragile_left); } static inline bool ts_subtree_fragile_right(Subtree self) { - return (self.data.is_inline ? false : self.ptr->fragile_right); + return (self.ptr->fragile_right); } static inline bool ts_subtree_has_external_tokens(Subtree self) { - return (self.data.is_inline ? false : self.ptr->has_external_tokens); + return (self.ptr->has_external_tokens); } static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) { - return (self.data.is_inline ? false : self.ptr->has_external_scanner_state_change); + return (self.ptr->has_external_scanner_state_change); } static inline bool ts_subtree_depends_on_column(Subtree self) { - return (self.data.is_inline ? false : self.ptr->depends_on_column); + return (self.ptr->depends_on_column); } static inline bool ts_subtree_is_fragile(Subtree self) { - return (self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right)); + return ((self.ptr->fragile_left || self.ptr->fragile_right)); } static inline bool ts_subtree_is_error(Subtree self) @@ -368,7 +353,7 @@ static inline Subtree ts_subtree_from_mut(MutableSubtree self) { Subtree result; - result.data = self.data; + result.ptr = self.ptr; return (result); } @@ -376,7 +361,7 @@ static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) { MutableSubtree result; - result.data = self.data; + result.ptr = (void *)self.ptr; return (result); } diff --git a/parser/src/parser.c b/parser/src/parser.c index 1d3ecb1a..3745e5e9 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -784,7 +784,6 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look Subtree tree = trees.contents[j]; if (!ts_subtree_extra(tree)) { - assert(!tree.data.is_inline); t_u32 child_count = ts_subtree_child_count(tree); const Subtree *children = ts_subtree_children(tree); for (t_u32 k = 0; k < child_count; k++) diff --git a/parser/src/subtree.c b/parser/src/subtree.c index afa2a10d..fe17ce84 100644 --- a/parser/src/subtree.c +++ b/parser/src/subtree.c @@ -1,15 +1,15 @@ #include #include +#include #include #include -#include +#include "me/mem/mem.h" #include "me/types.h" #include "parser/array.h" #include "parser/language.h" #include "parser/length.h" #include "parser/subtree.h" -#include "me/mem/mem.h" typedef struct { @@ -191,35 +191,12 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, t_u32 look size.extent.row == 0 && size.extent.column < TS_MAX_INLINE_TREE_LENGTH && lookahead_bytes < 16; } -Subtree ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size, t_u32 lookahead_bytes, - TSStateId parse_state, bool has_external_tokens, bool depends_on_column, bool is_keyword, - const TSLanguage *language) +Subtree ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size, t_u32 lookahead_bytes, TSStateId parse_state, + bool has_external_tokens, bool depends_on_column, bool is_keyword, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool extra = symbol == ts_builtin_sym_end; - bool is_inline = (symbol <= UINT8_MAX && !has_external_tokens && ts_subtree_can_inline(padding, size, lookahead_bytes)); - - if (is_inline) - { - return (Subtree){{ - .parse_state = parse_state, - .symbol = symbol, - .padding_bytes = padding.bytes, - .padding_rows = padding.extent.row, - .padding_columns = padding.extent.column, - .size_bytes = size.bytes, - .lookahead_bytes = lookahead_bytes, - .visible = metadata.visible, - .named = metadata.named, - .extra = extra, - .has_changes = false, - .is_missing = false, - .is_keyword = is_keyword, - .is_inline = true, - }}; - } - else { SubtreeHeapData *data = ts_subtree_pool_allocate(pool); *data = (SubtreeHeapData){.ref_count = 1, @@ -249,14 +226,6 @@ Subtree ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - if (self->data.is_inline) - { - assert(symbol < UINT8_MAX); - self->data.symbol = symbol; - self->data.named = metadata.named; - self->data.visible = metadata.visible; - } - else { self->ptr->symbol = symbol; self->ptr->named = metadata.named; @@ -306,8 +275,6 @@ MutableSubtree ts_subtree_clone(Subtree self) // perform a copy. MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { - if (self.data.is_inline) - return (MutableSubtree){self.data}; if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); MutableSubtree result = ts_subtree_clone(self); @@ -327,12 +294,11 @@ static void ts_subtree__compress(MutableSubtree self, t_u32 count, const TSLangu break; MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); - if (child.data.is_inline || child.ptr->child_count < 2 || child.ptr->ref_count > 1 || child.ptr->symbol != symbol) + if (child.ptr->child_count < 2 || child.ptr->ref_count > 1 || child.ptr->symbol != symbol) break; MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); - if (grandchild.data.is_inline || grandchild.ptr->child_count < 2 || grandchild.ptr->ref_count > 1 || - grandchild.ptr->symbol != symbol) + if (grandchild.ptr->child_count < 2 || grandchild.ptr->ref_count > 1 || grandchild.ptr->symbol != symbol) break; ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); @@ -396,8 +362,6 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu // Assign all of the node's properties that depend on its children. void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *language) { - assert(!self.data.is_inline); - self.ptr->named_child_count = 0; self.ptr->visible_child_count = 0; self.ptr->error_cost = 0; @@ -408,9 +372,9 @@ void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *langua self.ptr->has_external_scanner_state_change = false; self.ptr->dynamic_precedence = 0; - t_u32 structural_index = 0; + t_u32 structural_index = 0; const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); - t_u32 lookahead_end_byte = 0; + t_u32 lookahead_end_byte = 0; const Subtree *children = ts_subtree_children(self); for (t_u32 i = 0; i < self.ptr->child_count; i++) @@ -589,25 +553,15 @@ Subtree ts_subtree_new_error_node(SubtreeArray *children, bool extra, const TSLa // // This node is treated as 'extra'. Its children are prevented from having // having any effect on the parse state. -Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, t_u32 lookahead_bytes, - const TSLanguage *language) +Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, t_u32 lookahead_bytes, const TSLanguage *language) { Subtree result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), lookahead_bytes, 0, false, false, false, language); - if (result.data.is_inline) - { - result.data.is_missing = true; - } - else - { - ((SubtreeHeapData *)result.ptr)->is_missing = true; - } + ((SubtreeHeapData *)result.ptr)->is_missing = true; return result; } void ts_subtree_retain(Subtree self) { - if (self.data.is_inline) - return; assert(self.ptr->ref_count > 0); (*(t_u32 *)(&self.ptr->ref_count))++; assert(self.ptr->ref_count != 0); @@ -615,8 +569,6 @@ void ts_subtree_retain(Subtree self) void ts_subtree_release(SubtreePool *pool, Subtree self) { - if (self.data.is_inline) - return; array_clear(&pool->tree_stack); assert(self.ptr->ref_count > 0); @@ -634,8 +586,6 @@ void ts_subtree_release(SubtreePool *pool, Subtree self) for (t_u32 i = 0; i < tree.ptr->child_count; i++) { Subtree child = children[i]; - if (child.data.is_inline) - continue; assert(child.ptr->ref_count > 0); if (--(*(t_u32 *)(&child.ptr->ref_count)) == 0) { @@ -694,14 +644,7 @@ int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) static inline void ts_subtree_set_has_changes(MutableSubtree *self) { - if (self->data.is_inline) - { - self->data.has_changes = true; - } - else - { - self->ptr->has_changes = true; - } + self->ptr->has_changes = true; } Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) @@ -731,11 +674,11 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); - Length size = ts_subtree_size(*entry.tree); - Length padding = ts_subtree_padding(*entry.tree); - Length total_size = length_add(padding, size); - t_u32 lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); - t_u32 end_byte = total_size.bytes + lookahead_bytes; + Length size = ts_subtree_size(*entry.tree); + Length padding = ts_subtree_padding(*entry.tree); + Length total_size = length_add(padding, size); + t_u32 lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); + t_u32 end_byte = total_size.bytes + lookahead_bytes; if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; @@ -769,40 +712,6 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); - if (result.data.is_inline) - { - if (ts_subtree_can_inline(padding, size, lookahead_bytes)) - { - result.data.padding_bytes = padding.bytes; - result.data.padding_rows = padding.extent.row; - result.data.padding_columns = padding.extent.column; - result.data.size_bytes = size.bytes; - } - else - { - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - data->ref_count = 1; - data->padding = padding; - data->size = size; - data->lookahead_bytes = lookahead_bytes; - data->error_cost = 0; - data->child_count = 0; - data->symbol = result.data.symbol; - data->parse_state = result.data.parse_state; - data->visible = result.data.visible; - data->named = result.data.named; - data->extra = result.data.extra; - data->fragile_left = false; - data->fragile_right = false; - data->has_changes = false; - data->has_external_tokens = false; - data->depends_on_column = false; - data->is_missing = result.data.is_missing; - data->is_keyword = result.data.is_keyword; - result.ptr = data; - } - } - else { result.ptr->padding = padding; result.ptr->size = size; @@ -1025,7 +934,6 @@ char *ts_subtree_string(Subtree self, TSSymbol alias_symbol, bool alias_is_named return result; } - /* void ts_subtree__print_dot_graph(const Subtree *self, t_u32 start_offset, const TSLanguage *language, TSSymbol alias_symbol, FILE *f) { @@ -1093,7 +1001,7 @@ void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE * const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; - if (self.ptr && !self.data.is_inline && self.ptr->has_external_tokens && self.ptr->child_count == 0) + if (self.ptr && self.ptr->has_external_tokens && self.ptr->child_count == 0) { return &self.ptr->external_scanner_state; }