From 5898689697c30dde0316ba35535ecc7f308de86f Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Sun, 30 Jun 2024 19:37:09 +0200 Subject: [PATCH] trimmed more fat --- parser/src/language.c | 8 +- parser/src/lexer.h | 8 +- parser/src/lib.c | 3 - parser/src/parser.c | 119 +-- parser/src/reduce_action.h | 8 +- parser/src/reusable_node.h | 154 +-- parser/src/stack.h | 37 +- parser/src/subtree.h | 523 +++++----- parser/src/tree.c | 49 +- parser/src/tree.h | 34 +- parser/src/tree_cursor.c | 714 -------------- parser/src/tree_cursor.h | 48 - parser/src/wasm_store.c | 1847 ------------------------------------ parser/src/wasm_store.h | 31 - 14 files changed, 432 insertions(+), 3151 deletions(-) delete mode 100644 parser/src/tree_cursor.c delete mode 100644 parser/src/tree_cursor.h delete mode 100644 parser/src/wasm_store.c delete mode 100644 parser/src/wasm_store.h diff --git a/parser/src/language.c b/parser/src/language.c index 84b15c01..0d379007 100644 --- a/parser/src/language.c +++ b/parser/src/language.c @@ -1,19 +1,13 @@ #include "./language.h" -#include "./wasm_store.h" #include "api.h" #include const TSLanguage *ts_language_copy(const TSLanguage *self) { - if (self && ts_language_is_wasm(self)) { - ts_wasm_language_retain(self); - } return self; } void ts_language_delete(const TSLanguage *self) { - if (self && ts_language_is_wasm(self)) { - ts_wasm_language_release(self); - } + (void)(self); } uint32_t ts_language_symbol_count(const TSLanguage *self) { diff --git a/parser/src/lexer.h b/parser/src/lexer.h index a8cc38f1..4c5fcc9a 100644 --- a/parser/src/lexer.h +++ b/parser/src/lexer.h @@ -1,16 +1,13 @@ #ifndef TREE_SITTER_LEXER_H_ #define TREE_SITTER_LEXER_H_ -#ifdef __cplusplus -extern "C" { -#endif #include "./length.h" #include "./subtree.h" #include "api.h" #include "./parser.h" -typedef struct { +typedef struct Lexer{ TSLexer data; Length current_position; Length token_start_position; @@ -42,8 +39,5 @@ void ts_lexer_mark_end(Lexer *); bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count); TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); -#ifdef __cplusplus -} -#endif #endif // TREE_SITTER_LEXER_H_ diff --git a/parser/src/lib.c b/parser/src/lib.c index 5644226b..314201ca 100644 --- a/parser/src/lib.c +++ b/parser/src/lib.c @@ -1,7 +1,6 @@ #define _POSIX_C_SOURCE 200112L #include "./alloc.c" -// #include "./get_changed_ranges.c" #include "./language.c" #include "./lexer.c" #include "./node.c" @@ -9,8 +8,6 @@ #include "./stack.c" #include "./subtree.c" #include "./tree.c" -#include "./tree_cursor.c" -#include "./wasm_store.c" #include "./lex.c" #include "./create_language.c" #include "./scanner.c" diff --git a/parser/src/parser.c b/parser/src/parser.c index 2a1a841b..1335f76b 100644 --- a/parser/src/parser.c +++ b/parser/src/parser.c @@ -13,7 +13,6 @@ #include "./stack.h" #include "./subtree.h" #include "./tree.h" -#include "./wasm_store.h" #include "api.h" #include #include @@ -96,7 +95,7 @@ static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; -typedef struct +typedef struct TokenCache { Subtree token; Subtree last_external_token; @@ -109,7 +108,6 @@ struct TSParser Stack *stack; SubtreePool tree_pool; const TSLanguage *language; - TSWasmStore *wasm_store; ReduceActionSet reduce_actions; Subtree finished_tree; SubtreeArray trailing_extras; @@ -129,7 +127,7 @@ struct TSParser bool has_scanner_error; }; -typedef struct +typedef struct ErrorStatus { unsigned cost; unsigned node_count; @@ -137,7 +135,7 @@ typedef struct bool is_in_error; } ErrorStatus; -typedef enum +typedef enum ErrorComparison { ErrorComparisonTakeLeft, ErrorComparisonPreferLeft, @@ -146,7 +144,7 @@ typedef enum ErrorComparisonTakeRight, } ErrorComparison; -typedef struct +typedef struct TSStringInput { const char *string; uint32_t length; @@ -374,42 +372,22 @@ static bool ts_parser__better_version_exists(TSParser *self, StackVersion versio static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) { - if (ts_language_is_wasm(self->language)) - { - return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state); - } - else - { - return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); - } + return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); } static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) { (void)(lex_mode); - if (ts_language_is_wasm(self->language)) - { - return ts_wasm_store_call_lex_keyword(self->wasm_store, 0); - } - else - { - return self->language->keyword_lex_fn(&self->lexer.data, 0); - } + + return self->language->keyword_lex_fn(&self->lexer.data, 0); } static void ts_parser__external_scanner_create(TSParser *self) { if (self->language && self->language->external_scanner.states) { - if (ts_language_is_wasm(self->language)) - { - self->external_scanner_payload = (void *)(uintptr_t)ts_wasm_store_call_scanner_create(self->wasm_store); - if (ts_wasm_store_has_error(self->wasm_store)) - { - self->has_scanner_error = true; - } - } - else if (self->language->external_scanner.create) + + if (self->language->external_scanner.create) { self->external_scanner_payload = self->language->external_scanner.create(); } @@ -418,8 +396,7 @@ static void ts_parser__external_scanner_create(TSParser *self) static void ts_parser__external_scanner_destroy(TSParser *self) { - if (self->language && self->external_scanner_payload && self->language->external_scanner.destroy && - !ts_language_is_wasm(self->language)) + if (self->language && self->external_scanner_payload && self->language->external_scanner.destroy) { self->language->external_scanner.destroy(self->external_scanner_payload); } @@ -428,16 +405,10 @@ static void ts_parser__external_scanner_destroy(TSParser *self) static unsigned ts_parser__external_scanner_serialize(TSParser *self) { - if (ts_language_is_wasm(self->language)) - { - return ts_wasm_store_call_scanner_serialize(self->wasm_store, (uintptr_t)self->external_scanner_payload, self->lexer.debug_buffer); - } - else - { - uint32_t length = self->language->external_scanner.serialize(self->external_scanner_payload, self->lexer.debug_buffer); - assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); - return length; - } + + uint32_t length = self->language->external_scanner.serialize(self->external_scanner_payload, self->lexer.debug_buffer); + assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + return length; } static void ts_parser__external_scanner_deserialize(TSParser *self, Subtree external_token) @@ -450,37 +421,13 @@ static void ts_parser__external_scanner_deserialize(TSParser *self, Subtree exte length = external_token.ptr->external_scanner_state.length; } - if (ts_language_is_wasm(self->language)) - { - ts_wasm_store_call_scanner_deserialize(self->wasm_store, (uintptr_t)self->external_scanner_payload, data, length); - if (ts_wasm_store_has_error(self->wasm_store)) - { - self->has_scanner_error = true; - } - } - else - { - self->language->external_scanner.deserialize(self->external_scanner_payload, data, length); - } + self->language->external_scanner.deserialize(self->external_scanner_payload, data, length); } static bool ts_parser__external_scanner_scan(TSParser *self, TSStateId external_lex_state) { - if (ts_language_is_wasm(self->language)) - { - bool result = ts_wasm_store_call_scanner_scan(self->wasm_store, (uintptr_t)self->external_scanner_payload, - external_lex_state * self->language->external_token_count); - if (ts_wasm_store_has_error(self->wasm_store)) - { - self->has_scanner_error = true; - } - return result; - } - else - { - const bool *valid_external_tokens = ts_language_enabled_external_tokens(self->language, external_lex_state); - return self->language->external_scanner.scan(self->external_scanner_payload, &self->lexer.data, valid_external_tokens); - } + const bool *valid_external_tokens = ts_language_enabled_external_tokens(self->language, external_lex_state); + return self->language->external_scanner.scan(self->external_scanner_payload, &self->lexer.data, valid_external_tokens); } static bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, Subtree tree, TableEntry *table_entry) @@ -1876,7 +1823,6 @@ void ts_parser_delete(TSParser *self) ts_subtree_release(&self->tree_pool, self->old_tree); self->old_tree = NULL_SUBTREE; } - ts_wasm_store_delete(self->wasm_store); ts_lexer_delete(&self->lexer); ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); ts_subtree_pool_delete(&self->tree_pool); @@ -1902,12 +1848,6 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { if (language->version > TREE_SITTER_LANGUAGE_VERSION || language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false; - - if (ts_language_is_wasm(language)) - { - if (!self->wasm_store || !ts_wasm_store_start(self->wasm_store, &self->lexer.data, language)) - return false; - } } self->language = ts_language_copy(language); @@ -1978,11 +1918,6 @@ const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) void ts_parser_reset(TSParser *self) { ts_parser__external_scanner_destroy(self); - if (self->wasm_store) - { - ts_wasm_store_reset(self->wasm_store); - } - if (self->old_tree.ptr) { ts_subtree_release(&self->tree_pool, self->old_tree); @@ -2009,13 +1944,6 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) if (!self->language || !input.read) return NULL; - if (ts_language_is_wasm(self->language)) - { - if (!self->wasm_store) - return NULL; - ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language); - } - ts_lexer_set_input(&self->lexer, input); self->included_range_difference_index = 0; @@ -2140,17 +2068,4 @@ TSTree *ts_parser_parse_string_encoding(TSParser *self, const TSTree *old_tree, }); } -void ts_parser_set_wasm_store(TSParser *self, TSWasmStore *store) -{ - ts_wasm_store_delete(self->wasm_store); - self->wasm_store = store; -} - -TSWasmStore *ts_parser_take_wasm_store(TSParser *self) -{ - TSWasmStore *result = self->wasm_store; - self->wasm_store = NULL; - return result; -} - #undef LOG diff --git a/parser/src/reduce_action.h b/parser/src/reduce_action.h index 0637c24c..02ae2cc9 100644 --- a/parser/src/reduce_action.h +++ b/parser/src/reduce_action.h @@ -1,14 +1,11 @@ #ifndef TREE_SITTER_REDUCE_ACTION_H_ #define TREE_SITTER_REDUCE_ACTION_H_ -#ifdef __cplusplus -extern "C" { -#endif #include "./array.h" #include "api.h" -typedef struct { +typedef struct ReduceAction { uint32_t count; TSSymbol symbol; int dynamic_precedence; @@ -27,8 +24,5 @@ static inline void ts_reduce_action_set_add(ReduceActionSet *self, array_push(self, new_action); } -#ifdef __cplusplus -} -#endif #endif // TREE_SITTER_REDUCE_ACTION_H_ diff --git a/parser/src/reusable_node.h b/parser/src/reusable_node.h index 63fe3c1a..339edc59 100644 --- a/parser/src/reusable_node.h +++ b/parser/src/reusable_node.h @@ -1,95 +1,111 @@ #include "./subtree.h" -typedef struct { - Subtree tree; - uint32_t child_index; - uint32_t byte_offset; +typedef struct StackEntry +{ + Subtree tree; + uint32_t child_index; + uint32_t byte_offset; } StackEntry; -typedef struct { - Array(StackEntry) stack; - Subtree last_external_token; +typedef struct ReusableNode +{ + Array(StackEntry) stack; + Subtree last_external_token; } ReusableNode; -static inline ReusableNode reusable_node_new(void) { - return (ReusableNode) {array_new(), NULL_SUBTREE}; +static inline ReusableNode reusable_node_new(void) +{ + return (ReusableNode){array_new(), NULL_SUBTREE}; } -static inline void reusable_node_clear(ReusableNode *self) { - array_clear(&self->stack); - self->last_external_token = NULL_SUBTREE; +static inline void reusable_node_clear(ReusableNode *self) +{ + array_clear(&self->stack); + self->last_external_token = NULL_SUBTREE; } -static inline Subtree reusable_node_tree(ReusableNode *self) { - return self->stack.size > 0 - ? self->stack.contents[self->stack.size - 1].tree - : NULL_SUBTREE; +static inline Subtree reusable_node_tree(ReusableNode *self) +{ + return self->stack.size > 0 ? self->stack.contents[self->stack.size - 1].tree : NULL_SUBTREE; } -static inline uint32_t reusable_node_byte_offset(ReusableNode *self) { - return self->stack.size > 0 - ? self->stack.contents[self->stack.size - 1].byte_offset - : UINT32_MAX; +static inline uint32_t reusable_node_byte_offset(ReusableNode *self) +{ + return self->stack.size > 0 ? self->stack.contents[self->stack.size - 1].byte_offset : UINT32_MAX; } -static inline void reusable_node_delete(ReusableNode *self) { - array_delete(&self->stack); +static inline void reusable_node_delete(ReusableNode *self) +{ + array_delete(&self->stack); } -static inline void reusable_node_advance(ReusableNode *self) { - StackEntry last_entry = *array_back(&self->stack); - uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); - if (ts_subtree_has_external_tokens(last_entry.tree)) { - self->last_external_token = ts_subtree_last_external_token(last_entry.tree); - } +static inline void reusable_node_advance(ReusableNode *self) +{ + StackEntry last_entry = *array_back(&self->stack); + uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); + if (ts_subtree_has_external_tokens(last_entry.tree)) + { + self->last_external_token = ts_subtree_last_external_token(last_entry.tree); + } - Subtree tree; - uint32_t next_index; - do { - StackEntry popped_entry = array_pop(&self->stack); - next_index = popped_entry.child_index + 1; - if (self->stack.size == 0) return; - tree = array_back(&self->stack)->tree; - } while (ts_subtree_child_count(tree) <= next_index); + Subtree tree; + uint32_t next_index; + do + { + StackEntry popped_entry = array_pop(&self->stack); + next_index = popped_entry.child_index + 1; + if (self->stack.size == 0) + return; + tree = array_back(&self->stack)->tree; + } while (ts_subtree_child_count(tree) <= next_index); - array_push(&self->stack, ((StackEntry) { - .tree = ts_subtree_children(tree)[next_index], - .child_index = next_index, - .byte_offset = byte_offset, - })); + array_push(&self->stack, ((StackEntry){ + .tree = ts_subtree_children(tree)[next_index], + .child_index = next_index, + .byte_offset = byte_offset, + })); } -static inline bool reusable_node_descend(ReusableNode *self) { - StackEntry last_entry = *array_back(&self->stack); - if (ts_subtree_child_count(last_entry.tree) > 0) { - array_push(&self->stack, ((StackEntry) { - .tree = ts_subtree_children(last_entry.tree)[0], - .child_index = 0, - .byte_offset = last_entry.byte_offset, - })); - return true; - } else { - return false; - } +static inline bool reusable_node_descend(ReusableNode *self) +{ + StackEntry last_entry = *array_back(&self->stack); + if (ts_subtree_child_count(last_entry.tree) > 0) + { + array_push(&self->stack, ((StackEntry){ + .tree = ts_subtree_children(last_entry.tree)[0], + .child_index = 0, + .byte_offset = last_entry.byte_offset, + })); + return true; + } + else + { + return false; + } } -static inline void reusable_node_advance_past_leaf(ReusableNode *self) { - while (reusable_node_descend(self)) {} - reusable_node_advance(self); +static inline void reusable_node_advance_past_leaf(ReusableNode *self) +{ + while (reusable_node_descend(self)) + { + } + reusable_node_advance(self); } -static inline void reusable_node_reset(ReusableNode *self, Subtree tree) { - reusable_node_clear(self); - array_push(&self->stack, ((StackEntry) { - .tree = tree, - .child_index = 0, - .byte_offset = 0, - })); +static inline void reusable_node_reset(ReusableNode *self, Subtree tree) +{ + reusable_node_clear(self); + array_push(&self->stack, ((StackEntry){ + .tree = tree, + .child_index = 0, + .byte_offset = 0, + })); - // Never reuse the root node, because it has a non-standard internal structure - // due to transformations that are applied when it is accepted: adding the EOF - // child and any extra children. - if (!reusable_node_descend(self)) { - reusable_node_clear(self); - } + // Never reuse the root node, because it has a non-standard internal structure + // due to transformations that are applied when it is accepted: adding the EOF + // child and any extra children. + if (!reusable_node_descend(self)) + { + reusable_node_clear(self); + } } diff --git a/parser/src/stack.h b/parser/src/stack.h index 86abbc9d..11c005e5 100644 --- a/parser/src/stack.h +++ b/parser/src/stack.h @@ -1,13 +1,9 @@ #ifndef TREE_SITTER_PARSE_STACK_H_ #define TREE_SITTER_PARSE_STACK_H_ -#ifdef __cplusplus -extern "C" { -#endif - #include "./array.h" -#include "./subtree.h" #include "./error_costs.h" +#include "./subtree.h" #include typedef struct Stack Stack; @@ -15,19 +11,24 @@ typedef struct Stack Stack; typedef unsigned StackVersion; #define STACK_VERSION_NONE ((StackVersion)-1) -typedef struct { - SubtreeArray subtrees; - StackVersion version; +typedef struct StackSlice +{ + SubtreeArray subtrees; + StackVersion version; } StackSlice; typedef Array(StackSlice) StackSliceArray; -typedef struct { - Length position; - unsigned depth; - TSStateId state; +typedef struct StackSummaryEntry +{ + Length position; + unsigned depth; + TSStateId state; } StackSummaryEntry; + typedef Array(StackSummaryEntry) StackSummary; +typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t); + // Create a stack. Stack *ts_stack_new(SubtreePool *); @@ -45,7 +46,7 @@ TSStateId ts_stack_state(const Stack *, StackVersion); Subtree ts_stack_last_external_token(const Stack *, StackVersion); // Set the last external token associated with a given version of the stack. -void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree ); +void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree); // Get the position of the given version of the stack within the document. Length ts_stack_position(const Stack *, StackVersion); @@ -55,7 +56,7 @@ Length ts_stack_position(const Stack *, StackVersion); // This transfers ownership of the tree to the Stack. Callers that // need to retain ownership of the tree for their own purposes should // first retain the tree. -void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId); +void ts_stack_push(Stack *, StackVersion, Subtree, bool, TSStateId); // Pop the given number of entries from the given version of the stack. This // operation can increase the number of stack versions by revealing multiple @@ -124,10 +125,4 @@ void ts_stack_clear(Stack *); bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *); -typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t); - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_PARSE_STACK_H_ +#endif // TREE_SITTER_PARSE_STACK_H_ diff --git a/parser/src/subtree.h b/parser/src/subtree.h index 0b3062e9..58452dd6 100644 --- a/parser/src/subtree.h +++ b/parser/src/subtree.h @@ -1,22 +1,18 @@ #ifndef TREE_SITTER_SUBTREE_H_ #define TREE_SITTER_SUBTREE_H_ -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include "./length.h" #include "./array.h" #include "./error_costs.h" #include "./host.h" -#include "api.h" +#include "./length.h" #include "./parser.h" +#include "api.h" +#include +#include +#include #define TS_TREE_STATE_NONE USHRT_MAX -#define NULL_SUBTREE ((Subtree) {.ptr = NULL}) +#define NULL_SUBTREE ((Subtree){.ptr = NULL}) // The serialized state of an external scanner. // @@ -28,12 +24,13 @@ extern "C" { // // Small byte arrays are stored inline, and long ones are allocated // separately on the heap. -typedef struct { - union { - char *long_data; - char short_data[24]; - }; - uint32_t length; +typedef struct ExternalScannerState +{ + union { + char *long_data; + char short_data[24]; + }; + uint32_t length; } ExternalScannerState; // A compact representation of a subtree. @@ -49,53 +46,56 @@ typedef struct { // the pointer or the inline struct. typedef struct SubtreeInlineData SubtreeInlineData; -#define SUBTREE_BITS \ - bool visible : 1; \ - bool named : 1; \ - bool extra : 1; \ - bool has_changes : 1; \ - bool is_missing : 1; \ - bool is_keyword : 1; +#define SUBTREE_BITS \ + bool visible : 1; \ + bool named : 1; \ + bool extra : 1; \ + bool has_changes : 1; \ + bool is_missing : 1; \ + bool is_keyword : 1; -#define SUBTREE_SIZE \ - uint8_t padding_columns; \ - uint8_t padding_rows : 4; \ - uint8_t lookahead_bytes : 4; \ - uint8_t padding_bytes; \ - uint8_t size_bytes; +#define SUBTREE_SIZE \ + uint8_t padding_columns; \ + uint8_t padding_rows : 4; \ + uint8_t lookahead_bytes : 4; \ + uint8_t padding_bytes; \ + uint8_t size_bytes; #if TS_BIG_ENDIAN -#if TS_PTR_SIZE == 32 +# if TS_PTR_SIZE == 32 -struct SubtreeInlineData { - uint16_t parse_state; - uint8_t symbol; - SUBTREE_BITS - bool unused : 1; - bool is_inline : 1; - SUBTREE_SIZE +struct SubtreeInlineData +{ + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; + SUBTREE_SIZE }; -#else +# else -struct SubtreeInlineData { - SUBTREE_SIZE - uint16_t parse_state; - uint8_t symbol; - SUBTREE_BITS - bool unused : 1; - bool is_inline : 1; +struct SubtreeInlineData +{ + SUBTREE_SIZE + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; }; -#endif +# endif #else -struct SubtreeInlineData { - bool is_inline : 1; - SUBTREE_BITS - uint8_t symbol; - uint16_t parse_state; - SUBTREE_SIZE +struct SubtreeInlineData +{ + bool is_inline : 1; + SUBTREE_BITS + uint8_t symbol; + uint16_t parse_state; + SUBTREE_SIZE }; #endif @@ -108,75 +108,79 @@ struct SubtreeInlineData { // This representation is used for parent nodes, external tokens, // errors, and other leaf nodes whose data is too large to fit into // the inline representation. -typedef struct { - volatile uint32_t ref_count; - Length padding; - Length size; - uint32_t lookahead_bytes; - uint32_t error_cost; - uint32_t child_count; - TSSymbol symbol; - TSStateId parse_state; +typedef struct SubtreeHeapData +{ + volatile uint32_t ref_count; + Length padding; + Length size; + uint32_t lookahead_bytes; + uint32_t error_cost; + uint32_t child_count; + TSSymbol symbol; + TSStateId parse_state; - bool visible : 1; - bool named : 1; - bool extra : 1; - bool fragile_left : 1; - bool fragile_right : 1; - bool has_changes : 1; - bool has_external_tokens : 1; - bool has_external_scanner_state_change : 1; - bool depends_on_column: 1; - bool is_missing : 1; - bool is_keyword : 1; + bool visible : 1; + bool named : 1; + bool extra : 1; + bool fragile_left : 1; + bool fragile_right : 1; + bool has_changes : 1; + bool has_external_tokens : 1; + bool has_external_scanner_state_change : 1; + bool depends_on_column : 1; + bool is_missing : 1; + bool is_keyword : 1; - union { - // Non-terminal subtrees (`child_count > 0`) - struct { - uint32_t visible_child_count; - uint32_t named_child_count; - uint32_t visible_descendant_count; - int32_t dynamic_precedence; - uint16_t repeat_depth; - uint16_t production_id; - struct { - TSSymbol symbol; - TSStateId parse_state; - } first_leaf; - }; + union { + // Non-terminal subtrees (`child_count > 0`) + struct + { + uint32_t visible_child_count; + uint32_t named_child_count; + uint32_t visible_descendant_count; + int32_t dynamic_precedence; + uint16_t repeat_depth; + uint16_t production_id; + struct + { + TSSymbol symbol; + TSStateId parse_state; + } first_leaf; + }; - // External terminal subtrees (`child_count == 0 && has_external_tokens`) - ExternalScannerState external_scanner_state; + // External terminal subtrees (`child_count == 0 && has_external_tokens`) + ExternalScannerState external_scanner_state; - // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`) - int32_t lookahead_char; - }; + // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`) + int32_t lookahead_char; + }; } SubtreeHeapData; // The fundamental building block of a syntax tree. -typedef union { - SubtreeInlineData data; - const SubtreeHeapData *ptr; +typedef union Subtree { + SubtreeInlineData data; + const SubtreeHeapData *ptr; } Subtree; // Like Subtree, but mutable. -typedef union { - SubtreeInlineData data; - SubtreeHeapData *ptr; +typedef union MutableSubtree { + SubtreeInlineData data; + SubtreeHeapData *ptr; } MutableSubtree; typedef Array(Subtree) SubtreeArray; typedef Array(MutableSubtree) MutableSubtreeArray; -typedef struct { - MutableSubtreeArray free_trees; - MutableSubtreeArray tree_stack; +typedef struct SubtreePool +{ + MutableSubtreeArray free_trees; + MutableSubtreeArray tree_stack; } SubtreePool; -void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned); +void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned); const char *ts_external_scanner_state_data(const ExternalScannerState *); -bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned); -void ts_external_scanner_state_delete(ExternalScannerState *self); +bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned); +void ts_external_scanner_state_delete(ExternalScannerState *self); void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); void ts_subtree_array_clear(SubtreePool *, SubtreeArray *); @@ -185,198 +189,263 @@ void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); void ts_subtree_array_reverse(SubtreeArray *); SubtreePool ts_subtree_pool_new(uint32_t capacity); -void ts_subtree_pool_delete(SubtreePool *); +void ts_subtree_pool_delete(SubtreePool *); -Subtree ts_subtree_new_leaf( - SubtreePool *, TSSymbol, Length, Length, uint32_t, - TSStateId, bool, bool, bool, const TSLanguage * -); -Subtree ts_subtree_new_error( - SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * -); +Subtree ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, uint32_t, TSStateId, bool, bool, bool, const TSLanguage *); +Subtree ts_subtree_new_error(SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *); MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); -Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); -Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *); +Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); +Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *); MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); -void ts_subtree_retain(Subtree); -void ts_subtree_release(SubtreePool *, Subtree); -int ts_subtree_compare(Subtree, Subtree, SubtreePool *); -void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); -void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); -void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); -void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); -Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); -char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all); -void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); -Subtree ts_subtree_last_external_token(Subtree); +void ts_subtree_retain(Subtree); +void ts_subtree_release(SubtreePool *, Subtree); +int ts_subtree_compare(Subtree, Subtree, SubtreePool *); +void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); +void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); +void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); +void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); +Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); +char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all); +void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); +Subtree ts_subtree_last_external_token(Subtree); const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); -bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); +bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); #define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name) -static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } -static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); } -static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); } -static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); } -static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); } -static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); } -static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); } -static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); } -static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); } +static inline TSSymbol ts_subtree_symbol(Subtree self) +{ + return SUBTREE_GET(self, symbol); +} +static inline bool ts_subtree_visible(Subtree self) +{ + return SUBTREE_GET(self, visible); +} +static inline bool ts_subtree_named(Subtree self) +{ + return SUBTREE_GET(self, named); +} +static inline bool ts_subtree_extra(Subtree self) +{ + return SUBTREE_GET(self, extra); +} +static inline bool ts_subtree_has_changes(Subtree self) +{ + return SUBTREE_GET(self, has_changes); +} +static inline bool ts_subtree_missing(Subtree self) +{ + return SUBTREE_GET(self, is_missing); +} +static inline bool ts_subtree_is_keyword(Subtree self) +{ + return SUBTREE_GET(self, is_keyword); +} +static inline TSStateId ts_subtree_parse_state(Subtree self) +{ + return SUBTREE_GET(self, parse_state); +} +static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) +{ + return SUBTREE_GET(self, lookahead_bytes); +} #undef SUBTREE_GET // Get the size needed to store a heap-allocated subtree with the given // number of children. -static inline size_t ts_subtree_alloc_size(uint32_t child_count) { - return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); +static inline size_t ts_subtree_alloc_size(uint32_t child_count) +{ + return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); } // Get a subtree's children, which are allocated immediately before the // tree's own heap data. -#define ts_subtree_children(self) \ - ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) +#define ts_subtree_children(self) ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) -static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { - if (self->data.is_inline) { - self->data.extra = is_extra; - } else { - self->ptr->extra = is_extra; - } +static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) +{ + if (self->data.is_inline) + { + self->data.extra = is_extra; + } + else + { + self->ptr->extra = is_extra; + } } -static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) { - if (self.data.is_inline) return self.data.symbol; - if (self.ptr->child_count == 0) return self.ptr->symbol; - return self.ptr->first_leaf.symbol; +static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) +{ + if (self.data.is_inline) + return self.data.symbol; + if (self.ptr->child_count == 0) + return self.ptr->symbol; + return self.ptr->first_leaf.symbol; } -static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) { - if (self.data.is_inline) return self.data.parse_state; - if (self.ptr->child_count == 0) return self.ptr->parse_state; - return self.ptr->first_leaf.parse_state; +static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) +{ + if (self.data.is_inline) + return self.data.parse_state; + if (self.ptr->child_count == 0) + return self.ptr->parse_state; + return self.ptr->first_leaf.parse_state; } -static inline Length ts_subtree_padding(Subtree self) { - if (self.data.is_inline) { - Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}; - return result; - } else { - return self.ptr->padding; - } +static inline Length ts_subtree_padding(Subtree self) +{ + if (self.data.is_inline) + { + Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}; + return result; + } + else + { + return self.ptr->padding; + } } -static inline Length ts_subtree_size(Subtree self) { - if (self.data.is_inline) { - Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; - return result; - } else { - return self.ptr->size; - } +static inline Length ts_subtree_size(Subtree self) +{ + if (self.data.is_inline) + { + Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; + return result; + } + else + { + return self.ptr->size; + } } -static inline Length ts_subtree_total_size(Subtree self) { - return length_add(ts_subtree_padding(self), ts_subtree_size(self)); +static inline Length ts_subtree_total_size(Subtree self) +{ + return length_add(ts_subtree_padding(self), ts_subtree_size(self)); } -static inline uint32_t ts_subtree_total_bytes(Subtree self) { - return ts_subtree_total_size(self).bytes; +static inline uint32_t ts_subtree_total_bytes(Subtree self) +{ + return ts_subtree_total_size(self).bytes; } -static inline uint32_t ts_subtree_child_count(Subtree self) { - return self.data.is_inline ? 0 : self.ptr->child_count; +static inline uint32_t ts_subtree_child_count(Subtree self) +{ + return self.data.is_inline ? 0 : self.ptr->child_count; } -static inline uint32_t ts_subtree_repeat_depth(Subtree self) { - return self.data.is_inline ? 0 : self.ptr->repeat_depth; +static inline uint32_t ts_subtree_repeat_depth(Subtree self) +{ + return self.data.is_inline ? 0 : self.ptr->repeat_depth; } -static inline uint32_t ts_subtree_is_repetition(Subtree self) { - return self.data.is_inline - ? 0 - : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; +static inline uint32_t ts_subtree_is_repetition(Subtree self) +{ + return self.data.is_inline ? 0 : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; } -static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) { - return (self.data.is_inline || self.ptr->child_count == 0) - ? 0 - : self.ptr->visible_descendant_count; +static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) +{ + return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->visible_descendant_count; } -static inline uint32_t ts_subtree_visible_child_count(Subtree self) { - if (ts_subtree_child_count(self) > 0) { - return self.ptr->visible_child_count; - } else { - return 0; - } +static inline uint32_t ts_subtree_visible_child_count(Subtree self) +{ + if (ts_subtree_child_count(self) > 0) + { + return self.ptr->visible_child_count; + } + else + { + return 0; + } } -static inline uint32_t ts_subtree_error_cost(Subtree self) { - if (ts_subtree_missing(self)) { - return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; - } else { - return self.data.is_inline ? 0 : self.ptr->error_cost; - } +static inline uint32_t ts_subtree_error_cost(Subtree self) +{ + if (ts_subtree_missing(self)) + { + return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; + } + else + { + return self.data.is_inline ? 0 : self.ptr->error_cost; + } } -static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { - return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; +static inline int32_t ts_subtree_dynamic_precedence(Subtree self) +{ + return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; } -static inline uint16_t ts_subtree_production_id(Subtree self) { - if (ts_subtree_child_count(self) > 0) { - return self.ptr->production_id; - } else { - return 0; - } +static inline uint16_t ts_subtree_production_id(Subtree self) +{ + if (ts_subtree_child_count(self) > 0) + { + return self.ptr->production_id; + } + else + { + return 0; + } } -static inline bool ts_subtree_fragile_left(Subtree self) { - return self.data.is_inline ? false : self.ptr->fragile_left; +static inline bool ts_subtree_fragile_left(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->fragile_left; } -static inline bool ts_subtree_fragile_right(Subtree self) { - return self.data.is_inline ? false : self.ptr->fragile_right; +static inline bool ts_subtree_fragile_right(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->fragile_right; } -static inline bool ts_subtree_has_external_tokens(Subtree self) { - return self.data.is_inline ? false : self.ptr->has_external_tokens; +static inline bool ts_subtree_has_external_tokens(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->has_external_tokens; } -static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) { - return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change; +static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change; } -static inline bool ts_subtree_depends_on_column(Subtree self) { - return self.data.is_inline ? false : self.ptr->depends_on_column; +static inline bool ts_subtree_depends_on_column(Subtree self) +{ + return self.data.is_inline ? false : self.ptr->depends_on_column; } -static inline bool ts_subtree_is_fragile(Subtree self) { - return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); +static inline bool ts_subtree_is_fragile(Subtree self) +{ + return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); } -static inline bool ts_subtree_is_error(Subtree self) { - return ts_subtree_symbol(self) == ts_builtin_sym_error; +static inline bool ts_subtree_is_error(Subtree self) +{ + return ts_subtree_symbol(self) == ts_builtin_sym_error; } -static inline bool ts_subtree_is_eof(Subtree self) { - return ts_subtree_symbol(self) == ts_builtin_sym_end; +static inline bool ts_subtree_is_eof(Subtree self) +{ + return ts_subtree_symbol(self) == ts_builtin_sym_end; } -static inline Subtree ts_subtree_from_mut(MutableSubtree self) { - Subtree result; - result.data = self.data; - return result; +static inline Subtree ts_subtree_from_mut(MutableSubtree self) +{ + Subtree result; + result.data = self.data; + return result; } -static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) { - MutableSubtree result; - result.data = self.data; - return result; +static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) +{ + MutableSubtree result; + result.data = self.data; + return result; } #ifdef __cplusplus } #endif -#endif // TREE_SITTER_SUBTREE_H_ +#endif // TREE_SITTER_SUBTREE_H_ diff --git a/parser/src/tree.c b/parser/src/tree.c index 2aa9b9fa..6f88f399 100644 --- a/parser/src/tree.c +++ b/parser/src/tree.c @@ -4,7 +4,6 @@ #include "./array.h" #include "./length.h" #include "./subtree.h" -#include "./tree_cursor.h" #include "api.h" TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count) @@ -106,51 +105,7 @@ TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) return ranges; } -// TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) -// { -// TreeCursor cursor1 = {NULL, array_new(), 0}; -// TreeCursor cursor2 = {NULL, array_new(), 0}; -// ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); -// ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); - -// TSRangeArray included_range_differences = array_new(); -// ts_range_array_get_changed_ranges(old_tree->included_ranges, old_tree->included_range_count, new_tree->included_ranges, -// new_tree->included_range_count, &included_range_differences); - -// TSRange *result; -// *length = ts_subtree_get_changed_ranges(&old_tree->root, &new_tree->root, &cursor1, &cursor2, old_tree->language, -// &included_range_differences, &result); - -// array_delete(&included_range_differences); -// array_delete(&cursor1.stack); -// array_delete(&cursor2.stack); -// return result; -// } - -#ifdef _WIN32 - -# include -# include - -int _ts_dup(HANDLE handle) -{ - HANDLE dup_handle; - if (!DuplicateHandle(GetCurrentProcess(), handle, GetCurrentProcess(), &dup_handle, 0, FALSE, DUPLICATE_SAME_ACCESS)) - return -1; - - return _open_osfhandle((intptr_t)dup_handle, 0); -} - -void ts_tree_print_dot_graph(const TSTree *self, int fd) -{ - FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); - ts_subtree_print_dot_graph(self->root, self->language, file); - fclose(file); -} - -#else - -# include +#include int _ts_dup(int file_descriptor) { @@ -163,5 +118,3 @@ void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) ts_subtree_print_dot_graph(self->root, self->language, file); fclose(file); } - -#endif diff --git a/parser/src/tree.h b/parser/src/tree.h index f012f888..3692adc9 100644 --- a/parser/src/tree.h +++ b/parser/src/tree.h @@ -3,29 +3,23 @@ #include "./subtree.h" -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - const Subtree *child; - const Subtree *parent; - Length position; - TSSymbol alias_symbol; +typedef struct ParentCacheEntry +{ + const Subtree *child; + const Subtree *parent; + Length position; + TSSymbol alias_symbol; } ParentCacheEntry; -struct TSTree { - Subtree root; - const TSLanguage *language; - TSRange *included_ranges; - unsigned included_range_count; +struct TSTree +{ + Subtree root; + const TSLanguage *language; + TSRange *included_ranges; + unsigned included_range_count; }; TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned); -TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); +TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_TREE_H_ +#endif // TREE_SITTER_TREE_H_ diff --git a/parser/src/tree_cursor.c b/parser/src/tree_cursor.c deleted file mode 100644 index c1a3d8a4..00000000 --- a/parser/src/tree_cursor.c +++ /dev/null @@ -1,714 +0,0 @@ -#include "api.h" -#include "./alloc.h" -#include "./tree_cursor.h" -#include "./language.h" -#include "./tree.h" - -typedef struct { - Subtree parent; - const TSTree *tree; - Length position; - uint32_t child_index; - uint32_t structural_child_index; - uint32_t descendant_index; - const TSSymbol *alias_sequence; -} CursorChildIterator; - -// CursorChildIterator - -static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) { - TreeCursorEntry *entry = &self->stack.contents[index]; - if (index == 0 || ts_subtree_visible(*entry->subtree)) { - return true; - } else if (!ts_subtree_extra(*entry->subtree)) { - TreeCursorEntry *parent_entry = &self->stack.contents[index - 1]; - return ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - ); - } else { - return false; - } -} - -static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { - TreeCursorEntry *last_entry = array_back(&self->stack); - if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; - } - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - last_entry->subtree->ptr->production_id - ); - - uint32_t descendant_index = last_entry->descendant_index; - if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { - descendant_index += 1; - } - - return (CursorChildIterator) { - .tree = self->tree, - .parent = *last_entry->subtree, - .position = last_entry->position, - .child_index = 0, - .structural_child_index = 0, - .descendant_index = descendant_index, - .alias_sequence = alias_sequence, - }; -} - -static inline bool ts_tree_cursor_child_iterator_next( - CursorChildIterator *self, - TreeCursorEntry *result, - bool *visible -) { - if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; - const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - *result = (TreeCursorEntry) { - .subtree = child, - .position = self->position, - .child_index = self->child_index, - .structural_child_index = self->structural_child_index, - .descendant_index = self->descendant_index, - }; - *visible = ts_subtree_visible(*child); - bool extra = ts_subtree_extra(*child); - if (!extra) { - if (self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; - } - self->structural_child_index++; - } - - self->descendant_index += ts_subtree_visible_descendant_count(*child); - if (*visible) { - self->descendant_index += 1; - } - - self->position = length_add(self->position, ts_subtree_size(*child)); - self->child_index++; - - if (self->child_index < self->parent.ptr->child_count) { - Subtree next_child = ts_subtree_children(self->parent)[self->child_index]; - self->position = length_add(self->position, ts_subtree_padding(next_child)); - } - - return true; -} - -// Return a position that, when `b` is added to it, yields `a`. This -// can only be computed if `b` has zero rows. Otherwise, this function -// returns `LENGTH_UNDEFINED`, and the caller needs to recompute -// the position some other way. -static inline Length length_backtrack(Length a, Length b) { - if (length_is_undefined(a) || b.extent.row != 0) { - return LENGTH_UNDEFINED; - } - - Length result; - result.bytes = a.bytes - b.bytes; - result.extent.row = a.extent.row; - result.extent.column = a.extent.column - b.extent.column; - return result; -} - -static inline bool ts_tree_cursor_child_iterator_previous( - CursorChildIterator *self, - TreeCursorEntry *result, - bool *visible -) { - // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into - // account unsigned underflow - if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; - const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - *result = (TreeCursorEntry) { - .subtree = child, - .position = self->position, - .child_index = self->child_index, - .structural_child_index = self->structural_child_index, - }; - *visible = ts_subtree_visible(*child); - bool extra = ts_subtree_extra(*child); - if (!extra && self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; - self->structural_child_index--; - } - - self->position = length_backtrack(self->position, ts_subtree_padding(*child)); - self->child_index--; - - // unsigned can underflow so compare it to child_count - if (self->child_index < self->parent.ptr->child_count) { - Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; - Length size = ts_subtree_size(previous_child); - self->position = length_backtrack(self->position, size); - } - - return true; -} - -// TSTreeCursor - lifecycle - -TSTreeCursor ts_tree_cursor_new(TSNode node) { - TSTreeCursor self = {NULL, NULL, {0, 0, 0}}; - ts_tree_cursor_init((TreeCursor *)&self, node); - return self; -} - -void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) { - ts_tree_cursor_init((TreeCursor *)_self, node); -} - -void ts_tree_cursor_init(TreeCursor *self, TSNode node) { - self->tree = node.tree; - self->root_alias_symbol = node.context[3]; - array_clear(&self->stack); - array_push(&self->stack, ((TreeCursorEntry) { - .subtree = (const Subtree *)node.id, - .position = { - ts_node_start_byte(node), - ts_node_start_point(node) - }, - .child_index = 0, - .structural_child_index = 0, - .descendant_index = 0, - })); -} - -void ts_tree_cursor_delete(TSTreeCursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - array_delete(&self->stack); -} - -// TSTreeCursor - walking the tree - -TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (visible) { - array_push(&self->stack, entry); - return TreeCursorStepVisible; - } - if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - array_push(&self->stack, entry); - return TreeCursorStepHidden; - } - } - return TreeCursorStepNone; -} - -bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { - for (;;) { - switch (ts_tree_cursor_goto_first_child_internal(self)) { - case TreeCursorStepHidden: - continue; - case TreeCursorStepVisible: - return true; - default: - return false; - } - } - return false; -} - -TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone; - - TreeCursorEntry last_entry = {0}; - TreeCursorStep last_step = TreeCursorStepNone; - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (visible) { - last_entry = entry; - last_step = TreeCursorStepVisible; - } - else if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - last_entry = entry; - last_step = TreeCursorStepHidden; - } - } - if (last_entry.subtree) { - array_push(&self->stack, last_entry); - return last_step; - } - - return TreeCursorStepNone; -} - -bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) { - for (;;) { - switch (ts_tree_cursor_goto_last_child_internal(self)) { - case TreeCursorStepHidden: - continue; - case TreeCursorStepVisible: - return true; - default: - return false; - } - } - return false; -} - -static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( - TSTreeCursor *_self, - uint32_t goal_byte, - TSPoint goal_point -) { - TreeCursor *self = (TreeCursor *)_self; - uint32_t initial_size = self->stack.size; - uint32_t visible_child_index = 0; - - bool did_descend; - do { - did_descend = false; - - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); - bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); - uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); - if (at_goal) { - if (visible) { - array_push(&self->stack, entry); - return visible_child_index; - } - if (visible_child_count > 0) { - array_push(&self->stack, entry); - did_descend = true; - break; - } - } else if (visible) { - visible_child_index++; - } else { - visible_child_index += visible_child_count; - } - } - } while (did_descend); - - self->stack.size = initial_size; - return -1; -} - -int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) { - return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); -} - -int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) { - return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); -} - -TreeCursorStep ts_tree_cursor_goto_sibling_internal( - TSTreeCursor *_self, - bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { - TreeCursor *self = (TreeCursor *)_self; - uint32_t initial_size = self->stack.size; - - while (self->stack.size > 1) { - TreeCursorEntry entry = array_pop(&self->stack); - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - iterator.child_index = entry.child_index; - iterator.structural_child_index = entry.structural_child_index; - iterator.position = entry.position; - iterator.descendant_index = entry.descendant_index; - - bool visible = false; - advance(&iterator, &entry, &visible); - if (visible && self->stack.size + 1 < initial_size) break; - - while (advance(&iterator, &entry, &visible)) { - if (visible) { - array_push(&self->stack, entry); - return TreeCursorStepVisible; - } - - if (ts_subtree_visible_child_count(*entry.subtree)) { - array_push(&self->stack, entry); - return TreeCursorStepHidden; - } - } - } - - self->stack.size = initial_size; - return TreeCursorStepNone; -} - -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { - return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); -} - -bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { - switch (ts_tree_cursor_goto_next_sibling_internal(self)) { - case TreeCursorStepHidden: - ts_tree_cursor_goto_first_child(self); - return true; - case TreeCursorStepVisible: - return true; - default: - return false; - } -} - -TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) { - // since subtracting across row loses column information, we may have to - // restore it - TreeCursor *self = (TreeCursor *)_self; - - // for that, save current position before traversing - TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( - _self, ts_tree_cursor_child_iterator_previous); - if (step == TreeCursorStepNone) - return step; - - // if length is already valid, there's no need to recompute it - if (!length_is_undefined(array_back(&self->stack)->position)) - return step; - - // restore position from the parent node - const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; - Length position = parent->position; - uint32_t child_index = array_back(&self->stack)->child_index; - const Subtree *children = ts_subtree_children((*(parent->subtree))); - - if (child_index > 0) { - // skip first child padding since its position should match the position of the parent - position = length_add(position, ts_subtree_size(children[0])); - for (uint32_t i = 1; i < child_index; ++i) { - position = length_add(position, ts_subtree_total_size(children[i])); - } - position = length_add(position, ts_subtree_padding(children[child_index])); - } - - array_back(&self->stack)->position = position; - - return step; -} - -bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) { - switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { - case TreeCursorStepHidden: - ts_tree_cursor_goto_last_child(self); - return true; - case TreeCursorStepVisible: - return true; - default: - return false; - } -} - -bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { - TreeCursor *self = (TreeCursor *)_self; - for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { - if (ts_tree_cursor_is_entry_visible(self, i)) { - self->stack.size = i + 1; - return true; - } - } - return false; -} - -void ts_tree_cursor_goto_descendant( - TSTreeCursor *_self, - uint32_t goal_descendant_index -) { - TreeCursor *self = (TreeCursor *)_self; - - // Ascend to the lowest ancestor that contains the goal node. - for (;;) { - uint32_t i = self->stack.size - 1; - TreeCursorEntry *entry = &self->stack.contents[i]; - uint32_t next_descendant_index = - entry->descendant_index + - (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + - ts_subtree_visible_descendant_count(*entry->subtree); - if ( - (entry->descendant_index <= goal_descendant_index) && - (next_descendant_index > goal_descendant_index) - ) { - break; - } else if (self->stack.size <= 1) { - return; - } else { - self->stack.size--; - } - } - - // Descend to the goal node. - bool did_descend = true; - do { - did_descend = false; - bool visible; - TreeCursorEntry entry; - CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - if (iterator.descendant_index > goal_descendant_index) { - return; - } - - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - if (iterator.descendant_index > goal_descendant_index) { - array_push(&self->stack, entry); - if (visible && entry.descendant_index == goal_descendant_index) { - return; - } else { - did_descend = true; - break; - } - } - } - } while (did_descend); -} - -uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *last_entry = array_back(&self->stack); - return last_entry->descendant_index; -} - -TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *last_entry = array_back(&self->stack); - TSSymbol alias_symbol = self->root_alias_symbol; - if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { - TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; - alias_symbol = ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - last_entry->structural_child_index - ); - } - return ts_node_new( - self->tree, - last_entry->subtree, - last_entry->position, - alias_symbol - ); -} - -// Private - Get various facts about the current node that are needed -// when executing tree queries. -void ts_tree_cursor_current_status( - const TSTreeCursor *_self, - TSFieldId *field_id, - bool *has_later_siblings, - bool *has_later_named_siblings, - bool *can_have_later_siblings_with_this_field, - TSSymbol *supertypes, - unsigned *supertype_count -) { - const TreeCursor *self = (const TreeCursor *)_self; - unsigned max_supertypes = *supertype_count; - *field_id = 0; - *supertype_count = 0; - *has_later_siblings = false; - *has_later_named_siblings = false; - *can_have_later_siblings_with_this_field = false; - - // Walk up the tree, visiting the current node and its invisible ancestors, - // because fields can refer to nodes through invisible *wrapper* nodes, - for (unsigned i = self->stack.size - 1; i > 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->production_id - ); - - #define subtree_symbol(subtree, structural_child_index) \ - (( \ - !ts_subtree_extra(subtree) && \ - alias_sequence && \ - alias_sequence[structural_child_index] \ - ) ? \ - alias_sequence[structural_child_index] : \ - ts_subtree_symbol(subtree)) - - // Stop walking up when a visible ancestor is found. - TSSymbol entry_symbol = subtree_symbol( - *entry->subtree, - entry->structural_child_index - ); - TSSymbolMetadata entry_metadata = ts_language_symbol_metadata( - self->tree->language, - entry_symbol - ); - if (i != self->stack.size - 1 && entry_metadata.visible) break; - - // Record any supertypes - if (entry_metadata.supertype && *supertype_count < max_supertypes) { - supertypes[*supertype_count] = entry_symbol; - (*supertype_count)++; - } - - // Determine if the current node has later siblings. - if (!*has_later_siblings) { - unsigned sibling_count = parent_entry->subtree->ptr->child_count; - unsigned structural_child_index = entry->structural_child_index; - if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; - for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { - Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; - TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( - self->tree->language, - subtree_symbol(sibling, structural_child_index) - ); - if (sibling_metadata.visible) { - *has_later_siblings = true; - if (*has_later_named_siblings) break; - if (sibling_metadata.named) { - *has_later_named_siblings = true; - break; - } - } else if (ts_subtree_visible_child_count(sibling) > 0) { - *has_later_siblings = true; - if (*has_later_named_siblings) break; - if (sibling.ptr->named_child_count > 0) { - *has_later_named_siblings = true; - break; - } - } - if (!ts_subtree_extra(sibling)) structural_child_index++; - } - } - - #undef subtree_symbol - - if (!ts_subtree_extra(*entry->subtree)) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self->tree->language, - parent_entry->subtree->ptr->production_id, - &field_map, &field_map_end - ); - - // Look for a field name associated with the current node. - if (!*field_id) { - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if (!map->inherited && map->child_index == entry->structural_child_index) { - *field_id = map->field_id; - break; - } - } - } - - // Determine if the current node can have later siblings with the same field name. - if (*field_id) { - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if ( - map->field_id == *field_id && - map->child_index > entry->structural_child_index - ) { - *can_have_later_siblings_with_this_field = true; - break; - } - } - } - } - } -} - -uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - uint32_t depth = 0; - for (unsigned i = 1; i < self->stack.size; i++) { - if (ts_tree_cursor_is_entry_visible(self, i)) { - depth++; - } - } - return depth; -} - -TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - for (int i = (int)self->stack.size - 2; i >= 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - bool is_visible = true; - TSSymbol alias_symbol = 0; - if (i > 0) { - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - alias_symbol = ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - ); - is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree); - } - if (is_visible) { - return ts_node_new( - self->tree, - entry->subtree, - entry->position, - alias_symbol - ); - } - } - return ts_node_new(NULL, NULL, length_zero(), 0); -} - -TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - - // Walk up the tree, visiting the current node and its invisible ancestors. - for (unsigned i = self->stack.size - 1; i > 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - - // Stop walking up when another visible node is found. - if ( - i != self->stack.size - 1 && - ts_tree_cursor_is_entry_visible(self, i) - ) break; - - if (ts_subtree_extra(*entry->subtree)) break; - - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( - self->tree->language, - parent_entry->subtree->ptr->production_id, - &field_map, &field_map_end - ); - for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - if (!map->inherited && map->child_index == entry->structural_child_index) { - return map->field_id; - } - } - } - return 0; -} - -const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { - TSFieldId id = ts_tree_cursor_current_field_id(_self); - if (id) { - const TreeCursor *self = (const TreeCursor *)_self; - return self->tree->language->field_names[id]; - } else { - return NULL; - } -} - -TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { - const TreeCursor *cursor = (const TreeCursor *)_cursor; - TSTreeCursor res = {NULL, NULL, {0, 0}}; - TreeCursor *copy = (TreeCursor *)&res; - copy->tree = cursor->tree; - copy->root_alias_symbol = cursor->root_alias_symbol; - array_init(©->stack); - array_push_all(©->stack, &cursor->stack); - return res; -} - -void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) { - const TreeCursor *cursor = (const TreeCursor *)_src; - TreeCursor *copy = (TreeCursor *)_dst; - copy->tree = cursor->tree; - copy->root_alias_symbol = cursor->root_alias_symbol; - array_clear(©->stack); - array_push_all(©->stack, &cursor->stack); -} diff --git a/parser/src/tree_cursor.h b/parser/src/tree_cursor.h deleted file mode 100644 index 96a386df..00000000 --- a/parser/src/tree_cursor.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef TREE_SITTER_TREE_CURSOR_H_ -#define TREE_SITTER_TREE_CURSOR_H_ - -#include "./subtree.h" - -typedef struct { - const Subtree *subtree; - Length position; - uint32_t child_index; - uint32_t structural_child_index; - uint32_t descendant_index; -} TreeCursorEntry; - -typedef struct { - const TSTree *tree; - Array(TreeCursorEntry) stack; - TSSymbol root_alias_symbol; -} TreeCursor; - -typedef enum { - TreeCursorStepNone, - TreeCursorStepHidden, - TreeCursorStepVisible, -} TreeCursorStep; - -void ts_tree_cursor_init(TreeCursor *, TSNode); -void ts_tree_cursor_current_status( - const TSTreeCursor *, - TSFieldId *, - bool *, - bool *, - bool *, - TSSymbol *, - unsigned * -); - -TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *); -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *); - -static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) { - const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *last_entry = array_back(&self->stack); - return *last_entry->subtree; -} - -TSNode ts_tree_cursor_parent_node(const TSTreeCursor *); - -#endif // TREE_SITTER_TREE_CURSOR_H_ diff --git a/parser/src/wasm_store.c b/parser/src/wasm_store.c deleted file mode 100644 index 37523818..00000000 --- a/parser/src/wasm_store.c +++ /dev/null @@ -1,1847 +0,0 @@ -#include "api.h" -#include "./parser.h" -#include - -#ifdef TREE_SITTER_FEATURE_WASM - -#include "./alloc.h" -#include "./array.h" -#include "./atomic.h" -#include "./language.h" -#include "./lexer.h" -#include "./wasm/wasm-stdlib.h" -#include "./wasm_store.h" - -#include -#include -#include - -#define array_len(a) (sizeof(a) / sizeof(a[0])) - -// The following symbols from the C and C++ standard libraries are available -// for external scanners to use. -const char *STDLIB_SYMBOLS[] = { - #include "./stdlib-symbols.txt" -}; - -// The contents of the `dylink.0` custom section of a wasm module, -// as specified by the current WebAssembly dynamic linking ABI proposal. -typedef struct { - uint32_t memory_size; - uint32_t memory_align; - uint32_t table_size; - uint32_t table_align; -} WasmDylinkInfo; - -// WasmLanguageId - A pointer used to identify a language. This language id is -// reference-counted, so that its ownership can be shared between the language -// itself and the instances of the language that are held in wasm stores. -typedef struct { - volatile uint32_t ref_count; - volatile uint32_t is_language_deleted; -} WasmLanguageId; - -// LanguageWasmModule - Additional data associated with a wasm-backed -// `TSLanguage`. This data is read-only and does not reference a particular -// wasm store, so it can be shared by all users of a `TSLanguage`. A pointer to -// this is stored on the language itself. -typedef struct { - volatile uint32_t ref_count; - WasmLanguageId *language_id; - wasmtime_module_t *module; - const char *name; - char *symbol_name_buffer; - char *field_name_buffer; - WasmDylinkInfo dylink_info; -} LanguageWasmModule; - -// LanguageWasmInstance - Additional data associated with an instantiation of -// a `TSLanguage` in a particular wasm store. The wasm store holds one of -// these structs for each language that it has instantiated. -typedef struct { - WasmLanguageId *language_id; - wasmtime_instance_t instance; - int32_t external_states_address; - int32_t lex_main_fn_index; - int32_t lex_keyword_fn_index; - int32_t scanner_create_fn_index; - int32_t scanner_destroy_fn_index; - int32_t scanner_serialize_fn_index; - int32_t scanner_deserialize_fn_index; - int32_t scanner_scan_fn_index; -} LanguageWasmInstance; - -typedef struct { - uint32_t reset_heap; - uint32_t proc_exit; - uint32_t abort; - uint32_t assert_fail; - uint32_t notify_memory_growth; - uint32_t debug_message; - uint32_t at_exit; - uint32_t args_get; - uint32_t args_sizes_get; -} BuiltinFunctionIndices; - -// TSWasmStore - A struct that allows a given `Parser` to use wasm-backed -// languages. This struct is mutable, and can only be used by one parser at a -// time. -struct TSWasmStore { - wasm_engine_t *engine; - wasmtime_store_t *store; - wasmtime_table_t function_table; - wasmtime_memory_t memory; - TSLexer *current_lexer; - LanguageWasmInstance *current_instance; - Array(LanguageWasmInstance) language_instances; - uint32_t current_memory_offset; - uint32_t current_function_table_offset; - uint32_t *stdlib_fn_indices; - BuiltinFunctionIndices builtin_fn_indices; - wasmtime_global_t stack_pointer_global; - wasm_globaltype_t *const_i32_type; - bool has_error; - uint32_t lexer_address; -}; - -typedef Array(char) StringData; - -// LanguageInWasmMemory - The memory layout of a `TSLanguage` when compiled to -// wasm32. This is used to copy static language data out of the wasm memory. -typedef struct { - uint32_t version; - uint32_t symbol_count; - uint32_t alias_count; - uint32_t token_count; - uint32_t external_token_count; - uint32_t state_count; - uint32_t large_state_count; - uint32_t production_id_count; - uint32_t field_count; - uint16_t max_alias_sequence_length; - int32_t parse_table; - int32_t small_parse_table; - int32_t small_parse_table_map; - int32_t parse_actions; - int32_t symbol_names; - int32_t field_names; - int32_t field_map_slices; - int32_t field_map_entries; - int32_t symbol_metadata; - int32_t public_symbol_map; - int32_t alias_map; - int32_t alias_sequences; - int32_t lex_modes; - int32_t lex_fn; - int32_t keyword_lex_fn; - TSSymbol keyword_capture_token; - struct { - int32_t states; - int32_t symbol_map; - int32_t create; - int32_t destroy; - int32_t scan; - int32_t serialize; - int32_t deserialize; - } external_scanner; - int32_t primary_state_ids; -} LanguageInWasmMemory; - -// LexerInWasmMemory - The memory layout of a `TSLexer` when compiled to wasm32. -// This is used to copy mutable lexing state in and out of the wasm memory. -typedef struct { - int32_t lookahead; - TSSymbol result_symbol; - int32_t advance; - int32_t mark_end; - int32_t get_column; - int32_t is_at_included_range_start; - int32_t eof; -} LexerInWasmMemory; - -static volatile uint32_t NEXT_LANGUAGE_ID; - -// Linear memory layout: -// [ <-- stack | stdlib statics | lexer | language statics --> | serialization_buffer | heap --> ] -#define MAX_MEMORY_SIZE (128 * 1024 * 1024 / MEMORY_PAGE_SIZE) - -/************************ - * WasmDylinkMemoryInfo - ***********************/ - -static uint8_t read_u8(const uint8_t **p, const uint8_t *end) { - return *(*p)++; -} - -static inline uint64_t read_uleb128(const uint8_t **p, const uint8_t *end) { - uint64_t value = 0; - unsigned shift = 0; - do { - if (*p == end) return UINT64_MAX; - value += (uint64_t)(**p & 0x7f) << shift; - shift += 7; - } while (*((*p)++) >= 128); - return value; -} - -static bool wasm_dylink_info__parse( - const uint8_t *bytes, - size_t length, - WasmDylinkInfo *info -) { - const uint8_t WASM_MAGIC_NUMBER[4] = {0, 'a', 's', 'm'}; - const uint8_t WASM_VERSION[4] = {1, 0, 0, 0}; - const uint8_t WASM_CUSTOM_SECTION = 0x0; - const uint8_t WASM_DYLINK_MEM_INFO = 0x1; - - const uint8_t *p = bytes; - const uint8_t *end = bytes + length; - - if (length < 8) return false; - if (memcmp(p, WASM_MAGIC_NUMBER, 4) != 0) return false; - p += 4; - if (memcmp(p, WASM_VERSION, 4) != 0) return false; - p += 4; - - while (p < end) { - uint8_t section_id = read_u8(&p, end); - uint32_t section_length = read_uleb128(&p, end); - const uint8_t *section_end = p + section_length; - if (section_end > end) return false; - - if (section_id == WASM_CUSTOM_SECTION) { - uint32_t name_length = read_uleb128(&p, section_end); - const uint8_t *name_end = p + name_length; - if (name_end > section_end) return false; - - if (name_length == 8 && memcmp(p, "dylink.0", 8) == 0) { - p = name_end; - while (p < section_end) { - uint8_t subsection_type = read_u8(&p, section_end); - uint32_t subsection_size = read_uleb128(&p, section_end); - const uint8_t *subsection_end = p + subsection_size; - if (subsection_end > section_end) return false; - if (subsection_type == WASM_DYLINK_MEM_INFO) { - info->memory_size = read_uleb128(&p, subsection_end); - info->memory_align = read_uleb128(&p, subsection_end); - info->table_size = read_uleb128(&p, subsection_end); - info->table_align = read_uleb128(&p, subsection_end); - return true; - } - p = subsection_end; - } - } - } - p = section_end; - } - return false; -} - -/******************************************* - * Native callbacks exposed to wasm modules - *******************************************/ - - static wasm_trap_t *callback__abort( - void *env, - wasmtime_caller_t* caller, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - return wasmtime_trap_new("wasm module called abort", 24); -} - -static wasm_trap_t *callback__debug_message( - void *env, - wasmtime_caller_t* caller, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - wasmtime_context_t *context = wasmtime_caller_context(caller); - TSWasmStore *store = env; - assert(args_and_results_len == 2); - uint32_t string_address = args_and_results[0].i32; - uint32_t value = args_and_results[1].i32; - uint8_t *memory = wasmtime_memory_data(context, &store->memory); - printf("DEBUG: %s %u\n", &memory[string_address], value); - return NULL; -} - -static wasm_trap_t *callback__noop( - void *env, - wasmtime_caller_t* caller, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - return NULL; -} - -static wasm_trap_t *callback__lexer_advance( - void *env, - wasmtime_caller_t* caller, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - wasmtime_context_t *context = wasmtime_caller_context(caller); - assert(args_and_results_len == 2); - - TSWasmStore *store = env; - TSLexer *lexer = store->current_lexer; - bool skip = args_and_results[1].i32; - lexer->advance(lexer, skip); - - uint8_t *memory = wasmtime_memory_data(context, &store->memory); - memcpy(&memory[store->lexer_address], &lexer->lookahead, sizeof(lexer->lookahead)); - return NULL; -} - -static wasm_trap_t *callback__lexer_mark_end( - void *env, - wasmtime_caller_t* caller, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - TSWasmStore *store = env; - TSLexer *lexer = store->current_lexer; - lexer->mark_end(lexer); - return NULL; -} - -static wasm_trap_t *callback__lexer_get_column( - void *env, - wasmtime_caller_t* caller, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - TSWasmStore *store = env; - TSLexer *lexer = store->current_lexer; - uint32_t result = lexer->get_column(lexer); - args_and_results[0].i32 = result; - return NULL; -} - -static wasm_trap_t *callback__lexer_is_at_included_range_start( - void *env, - wasmtime_caller_t* caller, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - TSWasmStore *store = env; - TSLexer *lexer = store->current_lexer; - bool result = lexer->is_at_included_range_start(lexer); - args_and_results[0].i32 = result; - return NULL; -} - -static wasm_trap_t *callback__lexer_eof( - void *env, - wasmtime_caller_t* caller, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - TSWasmStore *store = env; - TSLexer *lexer = store->current_lexer; - bool result = lexer->eof(lexer); - args_and_results[0].i32 = result; - return NULL; -} - -typedef struct { - uint32_t *storage_location; - wasmtime_func_unchecked_callback_t callback; - wasm_functype_t *type; -} FunctionDefinition; - -static void *copy(const void *data, size_t size) { - void *result = ts_malloc(size); - memcpy(result, data, size); - return result; -} - -static void *copy_unsized_static_array( - const uint8_t *data, - int32_t start_address, - const int32_t all_addresses[], - size_t address_count -) { - int32_t end_address = 0; - for (unsigned i = 0; i < address_count; i++) { - if (all_addresses[i] > start_address) { - if (!end_address || all_addresses[i] < end_address) { - end_address = all_addresses[i]; - } - } - } - - if (!end_address) return NULL; - size_t size = end_address - start_address; - void *result = ts_malloc(size); - memcpy(result, &data[start_address], size); - return result; -} - -static void *copy_strings( - const uint8_t *data, - int32_t array_address, - size_t count, - StringData *string_data -) { - const char **result = ts_malloc(count * sizeof(char *)); - for (unsigned i = 0; i < count; i++) { - int32_t address; - memcpy(&address, &data[array_address + i * sizeof(address)], sizeof(address)); - if (address == 0) { - result[i] = (const char *)-1; - } else { - const uint8_t *string = &data[address]; - uint32_t len = strlen((const char *)string); - result[i] = (const char *)(uintptr_t)string_data->size; - array_extend(string_data, len + 1, string); - } - } - for (unsigned i = 0; i < count; i++) { - if (result[i] == (const char *)-1) { - result[i] = NULL; - } else { - result[i] = string_data->contents + (uintptr_t)result[i]; - } - } - return result; -} - -static bool name_eq(const wasm_name_t *name, const char *string) { - return strncmp(string, name->data, name->size) == 0; -} - -static inline wasm_functype_t* wasm_functype_new_4_0( - wasm_valtype_t* p1, - wasm_valtype_t* p2, - wasm_valtype_t* p3, - wasm_valtype_t* p4 -) { - wasm_valtype_t* ps[4] = {p1, p2, p3, p4}; - wasm_valtype_vec_t params, results; - wasm_valtype_vec_new(¶ms, 4, ps); - wasm_valtype_vec_new_empty(&results); - return wasm_functype_new(¶ms, &results); -} - -#define format(output, ...) \ - do { \ - size_t message_length = snprintf((char *)NULL, 0, __VA_ARGS__); \ - *output = ts_malloc(message_length + 1); \ - snprintf(*output, message_length + 1, __VA_ARGS__); \ - } while (0) - -WasmLanguageId *language_id_new() { - WasmLanguageId *self = ts_malloc(sizeof(WasmLanguageId)); - self->is_language_deleted = false; - self->ref_count = 1; - return self; -} - -WasmLanguageId *language_id_clone(WasmLanguageId *self) { - atomic_inc(&self->ref_count); - return self; -} - -void language_id_delete(WasmLanguageId *self) { - if (atomic_dec(&self->ref_count) == 0) { - ts_free(self); - } -} - -static wasmtime_extern_t get_builtin_extern( - wasmtime_table_t *table, - unsigned index -) { - return (wasmtime_extern_t) { - .kind = WASMTIME_EXTERN_FUNC, - .of.func = (wasmtime_func_t) { - .store_id = table->store_id, - .__private = index - } - }; -} - -static bool ts_wasm_store__provide_builtin_import( - TSWasmStore *self, - const wasm_name_t *import_name, - wasmtime_extern_t *import -) { - wasmtime_error_t *error = NULL; - wasmtime_context_t *context = wasmtime_store_context(self->store); - - // Dynamic linking parameters - if (name_eq(import_name, "__memory_base")) { - wasmtime_val_t value = WASM_I32_VAL(self->current_memory_offset); - wasmtime_global_t global; - error = wasmtime_global_new(context, self->const_i32_type, &value, &global); - assert(!error); - *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global}; - } else if (name_eq(import_name, "__table_base")) { - wasmtime_val_t value = WASM_I32_VAL(self->current_function_table_offset); - wasmtime_global_t global; - error = wasmtime_global_new(context, self->const_i32_type, &value, &global); - assert(!error); - *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global}; - } else if (name_eq(import_name, "__stack_pointer")) { - *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = self->stack_pointer_global}; - } else if (name_eq(import_name, "__indirect_function_table")) { - *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_TABLE, .of.table = self->function_table}; - } else if (name_eq(import_name, "memory")) { - *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_MEMORY, .of.memory = self->memory}; - } - - // Builtin functions - else if (name_eq(import_name, "__assert_fail")) { - *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.assert_fail); - } else if (name_eq(import_name, "__cxa_atexit")) { - *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.at_exit); - } else if (name_eq(import_name, "args_get")) { - *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.args_get); - } else if (name_eq(import_name, "args_sizes_get")) { - *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.args_sizes_get); - } else if (name_eq(import_name, "abort")) { - *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.abort); - } else if (name_eq(import_name, "proc_exit")) { - *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.proc_exit); - } else if (name_eq(import_name, "emscripten_notify_memory_growth")) { - *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.notify_memory_growth); - } else if (name_eq(import_name, "tree_sitter_debug_message")) { - *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.debug_message); - } else { - return false; - } - - return true; -} - -static bool ts_wasm_store__call_module_initializer( - TSWasmStore *self, - const wasm_name_t *export_name, - wasmtime_extern_t *export, - wasm_trap_t **trap -) { - if ( - name_eq(export_name, "_initialize") || - name_eq(export_name, "__wasm_apply_data_relocs") || - name_eq(export_name, "__wasm_call_ctors") - ) { - wasmtime_context_t *context = wasmtime_store_context(self->store); - wasmtime_func_t initialization_func = export->of.func; - wasmtime_error_t *error = wasmtime_func_call(context, &initialization_func, NULL, 0, NULL, 0, trap); - assert(!error); - return true; - } else { - return false; - } -} - -TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) { - TSWasmStore *self = ts_calloc(1, sizeof(TSWasmStore)); - wasmtime_store_t *store = wasmtime_store_new(engine, self, NULL); - wasmtime_context_t *context = wasmtime_store_context(store); - wasmtime_error_t *error = NULL; - wasm_trap_t *trap = NULL; - wasm_message_t message = WASM_EMPTY_VEC; - wasm_exporttype_vec_t export_types = WASM_EMPTY_VEC; - wasmtime_extern_t *imports = NULL; - wasmtime_module_t *stdlib_module = NULL; - wasm_memorytype_t *memory_type = NULL; - wasm_tabletype_t *table_type = NULL; - - // Define functions called by scanners via function pointers on the lexer. - LexerInWasmMemory lexer = { - .lookahead = 0, - .result_symbol = 0, - }; - FunctionDefinition lexer_definitions[] = { - { - (uint32_t *)&lexer.advance, - callback__lexer_advance, - wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - { - (uint32_t *)&lexer.mark_end, - callback__lexer_mark_end, - wasm_functype_new_1_0(wasm_valtype_new_i32()) - }, - { - (uint32_t *)&lexer.get_column, - callback__lexer_get_column, - wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - { - (uint32_t *)&lexer.is_at_included_range_start, - callback__lexer_is_at_included_range_start, - wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - { - (uint32_t *)&lexer.eof, - callback__lexer_eof, - wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - }; - - // Define builtin functions that can be imported by scanners. - BuiltinFunctionIndices builtin_fn_indices; - FunctionDefinition builtin_definitions[] = { - { - &builtin_fn_indices.proc_exit, - callback__abort, - wasm_functype_new_1_0(wasm_valtype_new_i32()) - }, - { - &builtin_fn_indices.abort, - callback__abort, - wasm_functype_new_0_0() - }, - { - &builtin_fn_indices.assert_fail, - callback__abort, - wasm_functype_new_4_0(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - { - &builtin_fn_indices.notify_memory_growth, - callback__noop, - wasm_functype_new_1_0(wasm_valtype_new_i32()) - }, - { - &builtin_fn_indices.debug_message, - callback__debug_message, - wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - { - &builtin_fn_indices.at_exit, - callback__noop, - wasm_functype_new_3_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - { - &builtin_fn_indices.args_get, - callback__noop, - wasm_functype_new_2_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - { - &builtin_fn_indices.args_sizes_get, - callback__noop, - wasm_functype_new_2_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) - }, - }; - - // Create all of the wasm functions. - unsigned builtin_definitions_len = array_len(builtin_definitions); - unsigned lexer_definitions_len = array_len(lexer_definitions); - for (unsigned i = 0; i < builtin_definitions_len; i++) { - FunctionDefinition *definition = &builtin_definitions[i]; - wasmtime_func_t func; - wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func); - *definition->storage_location = func.__private; - wasm_functype_delete(definition->type); - } - for (unsigned i = 0; i < lexer_definitions_len; i++) { - FunctionDefinition *definition = &lexer_definitions[i]; - wasmtime_func_t func; - wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func); - *definition->storage_location = func.__private; - wasm_functype_delete(definition->type); - } - - // Compile the stdlib module. - error = wasmtime_module_new(engine, STDLIB_WASM, STDLIB_WASM_LEN, &stdlib_module); - if (error) { - wasmtime_error_message(error, &message); - wasm_error->kind = TSWasmErrorKindCompile; - format( - &wasm_error->message, - "failed to compile wasm stdlib: %.*s", - (int)message.size, message.data - ); - goto error; - } - - // Retrieve the stdlib module's imports. - wasm_importtype_vec_t import_types = WASM_EMPTY_VEC; - wasmtime_module_imports(stdlib_module, &import_types); - - // Find the initial number of memory pages needed by the stdlib. - const wasm_memorytype_t *stdlib_memory_type; - for (unsigned i = 0; i < import_types.size; i++) { - wasm_importtype_t *import_type = import_types.data[i]; - const wasm_name_t *import_name = wasm_importtype_name(import_type); - if (name_eq(import_name, "memory")) { - const wasm_externtype_t *type = wasm_importtype_type(import_type); - stdlib_memory_type = wasm_externtype_as_memorytype_const(type); - } - } - if (!stdlib_memory_type) { - wasm_error->kind = TSWasmErrorKindCompile; - format( - &wasm_error->message, - "wasm stdlib is missing the 'memory' import" - ); - goto error; - } - - // Initialize store's memory - uint64_t initial_memory_pages = wasmtime_memorytype_minimum(stdlib_memory_type); - wasm_limits_t memory_limits = {.min = initial_memory_pages, .max = MAX_MEMORY_SIZE}; - memory_type = wasm_memorytype_new(&memory_limits); - wasmtime_memory_t memory; - error = wasmtime_memory_new(context, memory_type, &memory); - if (error) { - wasmtime_error_message(error, &message); - wasm_error->kind = TSWasmErrorKindAllocate; - format( - &wasm_error->message, - "failed to allocate wasm memory: %.*s", - (int)message.size, message.data - ); - goto error; - } - wasm_memorytype_delete(memory_type); - memory_type = NULL; - - // Initialize store's function table - wasm_limits_t table_limits = {.min = 1, .max = wasm_limits_max_default}; - table_type = wasm_tabletype_new(wasm_valtype_new(WASM_FUNCREF), &table_limits); - wasmtime_val_t initializer = {.kind = WASMTIME_FUNCREF}; - wasmtime_table_t function_table; - error = wasmtime_table_new(context, table_type, &initializer, &function_table); - if (error) { - wasmtime_error_message(error, &message); - wasm_error->kind = TSWasmErrorKindAllocate; - format( - &wasm_error->message, - "failed to allocate wasm table: %.*s", - (int)message.size, message.data - ); - goto error; - } - wasm_tabletype_delete(table_type); - table_type = NULL; - - unsigned stdlib_symbols_len = array_len(STDLIB_SYMBOLS); - - // Define globals for the stack and heap start addresses. - wasm_globaltype_t *const_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_CONST); - wasm_globaltype_t *var_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_VAR); - - wasmtime_val_t stack_pointer_value = WASM_I32_VAL(0); - wasmtime_global_t stack_pointer_global; - error = wasmtime_global_new(context, var_i32_type, &stack_pointer_value, &stack_pointer_global); - assert(!error); - - *self = (TSWasmStore) { - .engine = engine, - .store = store, - .memory = memory, - .function_table = function_table, - .language_instances = array_new(), - .stdlib_fn_indices = ts_calloc(stdlib_symbols_len, sizeof(uint32_t)), - .builtin_fn_indices = builtin_fn_indices, - .stack_pointer_global = stack_pointer_global, - .current_memory_offset = 0, - .current_function_table_offset = 0, - .const_i32_type = const_i32_type, - }; - - // Set up the imports for the stdlib module. - imports = ts_calloc(import_types.size, sizeof(wasmtime_extern_t)); - for (unsigned i = 0; i < import_types.size; i++) { - wasm_importtype_t *type = import_types.data[i]; - const wasm_name_t *import_name = wasm_importtype_name(type); - if (!ts_wasm_store__provide_builtin_import(self, import_name, &imports[i])) { - wasm_error->kind = TSWasmErrorKindInstantiate; - format( - &wasm_error->message, - "unexpected import in wasm stdlib: %.*s\n", - (int)import_name->size, import_name->data - ); - goto error; - } - } - - // Instantiate the stdlib module. - wasmtime_instance_t instance; - error = wasmtime_instance_new(context, stdlib_module, imports, import_types.size, &instance, &trap); - ts_free(imports); - imports = NULL; - if (error) { - wasmtime_error_message(error, &message); - wasm_error->kind = TSWasmErrorKindInstantiate; - format( - &wasm_error->message, - "failed to instantiate wasm stdlib module: %.*s", - (int)message.size, message.data - ); - goto error; - } - if (trap) { - wasm_trap_message(trap, &message); - wasm_error->kind = TSWasmErrorKindInstantiate; - format( - &wasm_error->message, - "trapped when instantiating wasm stdlib module: %.*s", - (int)message.size, message.data - ); - goto error; - } - wasm_importtype_vec_delete(&import_types); - - // Process the stdlib module's exports. - for (unsigned i = 0; i < stdlib_symbols_len; i++) { - self->stdlib_fn_indices[i] = UINT32_MAX; - } - wasmtime_module_exports(stdlib_module, &export_types); - for (unsigned i = 0; i < export_types.size; i++) { - wasm_exporttype_t *export_type = export_types.data[i]; - const wasm_name_t *name = wasm_exporttype_name(export_type); - - char *export_name; - size_t name_len; - wasmtime_extern_t export = {.kind = WASM_EXTERN_GLOBAL}; - bool exists = wasmtime_instance_export_nth(context, &instance, i, &export_name, &name_len, &export); - assert(exists); - - if (export.kind == WASMTIME_EXTERN_GLOBAL) { - if (name_eq(name, "__stack_pointer")) { - self->stack_pointer_global = export.of.global; - } - } - - if (export.kind == WASMTIME_EXTERN_FUNC) { - if (ts_wasm_store__call_module_initializer(self, name, &export, &trap)) { - if (trap) { - wasm_trap_message(trap, &message); - wasm_error->kind = TSWasmErrorKindInstantiate; - format( - &wasm_error->message, - "trap when calling stdlib relocation function: %.*s\n", - (int)message.size, message.data - ); - goto error; - } - continue; - } - - if (name_eq(name, "reset_heap")) { - self->builtin_fn_indices.reset_heap = export.of.func.__private; - continue; - } - - for (unsigned j = 0; j < stdlib_symbols_len; j++) { - if (name_eq(name, STDLIB_SYMBOLS[j])) { - self->stdlib_fn_indices[j] = export.of.func.__private; - break; - } - } - } - } - - if (self->builtin_fn_indices.reset_heap == UINT32_MAX) { - wasm_error->kind = TSWasmErrorKindInstantiate; - format( - &wasm_error->message, - "missing malloc reset function in wasm stdlib" - ); - goto error; - } - - for (unsigned i = 0; i < stdlib_symbols_len; i++) { - if (self->stdlib_fn_indices[i] == UINT32_MAX) { - wasm_error->kind = TSWasmErrorKindInstantiate; - format( - &wasm_error->message, - "missing exported symbol in wasm stdlib: %s", - STDLIB_SYMBOLS[i] - ); - goto error; - } - } - - wasm_exporttype_vec_delete(&export_types); - wasmtime_module_delete(stdlib_module); - - // Add all of the lexer callback functions to the function table. Store their function table - // indices on the in-memory lexer. - uint32_t table_index; - error = wasmtime_table_grow(context, &function_table, lexer_definitions_len, &initializer, &table_index); - if (error) { - wasmtime_error_message(error, &message); - wasm_error->kind = TSWasmErrorKindAllocate; - format( - &wasm_error->message, - "failed to grow wasm table to initial size: %.*s", - (int)message.size, message.data - ); - goto error; - } - for (unsigned i = 0; i < lexer_definitions_len; i++) { - FunctionDefinition *definition = &lexer_definitions[i]; - wasmtime_func_t func = {function_table.store_id, *definition->storage_location}; - wasmtime_val_t func_val = {.kind = WASMTIME_FUNCREF, .of.funcref = func}; - error = wasmtime_table_set(context, &function_table, table_index, &func_val); - assert(!error); - *(int32_t *)(definition->storage_location) = table_index; - table_index++; - } - - self->current_function_table_offset = table_index; - self->lexer_address = initial_memory_pages * MEMORY_PAGE_SIZE; - self->current_memory_offset = self->lexer_address + sizeof(LexerInWasmMemory); - - // Grow the memory enough to hold the builtin lexer and serialization buffer. - uint32_t new_pages_needed = (self->current_memory_offset - self->lexer_address - 1) / MEMORY_PAGE_SIZE + 1; - uint64_t prev_memory_size; - wasmtime_memory_grow(context, &memory, new_pages_needed, &prev_memory_size); - - uint8_t *memory_data = wasmtime_memory_data(context, &memory); - memcpy(&memory_data[self->lexer_address], &lexer, sizeof(lexer)); - return self; - -error: - ts_free(self); - if (stdlib_module) wasmtime_module_delete(stdlib_module); - if (store) wasmtime_store_delete(store); - if (import_types.size) wasm_importtype_vec_delete(&import_types); - if (memory_type) wasm_memorytype_delete(memory_type); - if (table_type) wasm_tabletype_delete(table_type); - if (trap) wasm_trap_delete(trap); - if (error) wasmtime_error_delete(error); - if (message.size) wasm_byte_vec_delete(&message); - if (export_types.size) wasm_exporttype_vec_delete(&export_types); - if (imports) ts_free(imports); - return NULL; -} - -void ts_wasm_store_delete(TSWasmStore *self) { - if (!self) return; - ts_free(self->stdlib_fn_indices); - wasm_globaltype_delete(self->const_i32_type); - wasmtime_store_delete(self->store); - wasm_engine_delete(self->engine); - for (unsigned i = 0; i < self->language_instances.size; i++) { - LanguageWasmInstance *instance = &self->language_instances.contents[i]; - language_id_delete(instance->language_id); - } - array_delete(&self->language_instances); - ts_free(self); -} - -size_t ts_wasm_store_language_count(const TSWasmStore *self) { - size_t result = 0; - for (unsigned i = 0; i < self->language_instances.size; i++) { - const WasmLanguageId *id = self->language_instances.contents[i].language_id; - if (!id->is_language_deleted) { - result++; - } - } - return result; -} - -static uint32_t ts_wasm_store__heap_address(TSWasmStore *self) { - return self->current_memory_offset + TREE_SITTER_SERIALIZATION_BUFFER_SIZE; -} - -static uint32_t ts_wasm_store__serialization_buffer_address(TSWasmStore *self) { - return self->current_memory_offset; -} - -static bool ts_wasm_store__instantiate( - TSWasmStore *self, - wasmtime_module_t *module, - const char *language_name, - const WasmDylinkInfo *dylink_info, - wasmtime_instance_t *result, - int32_t *language_address, - char **error_message -) { - wasmtime_error_t *error = NULL; - wasm_trap_t *trap = NULL; - wasm_message_t message = WASM_EMPTY_VEC; - char *language_function_name = NULL; - wasmtime_extern_t *imports = NULL; - wasmtime_context_t *context = wasmtime_store_context(self->store); - - // Grow the function table to make room for the new functions. - wasmtime_val_t initializer = {.kind = WASMTIME_FUNCREF}; - uint32_t prev_table_size; - error = wasmtime_table_grow(context, &self->function_table, dylink_info->table_size, &initializer, &prev_table_size); - if (error) { - format(error_message, "invalid function table size %u", dylink_info->table_size); - goto error; - } - - // Grow the memory to make room for the new data. - uint32_t needed_memory_size = ts_wasm_store__heap_address(self) + dylink_info->memory_size; - uint32_t current_memory_size = wasmtime_memory_data_size(context, &self->memory); - if (needed_memory_size > current_memory_size) { - uint32_t pages_to_grow = ( - needed_memory_size - current_memory_size + MEMORY_PAGE_SIZE - 1) / - MEMORY_PAGE_SIZE; - uint64_t prev_memory_size; - error = wasmtime_memory_grow(context, &self->memory, pages_to_grow, &prev_memory_size); - if (error) { - format(error_message, "invalid memory size %u", dylink_info->memory_size); - goto error; - } - } - - // Construct the language function name as string. - format(&language_function_name, "tree_sitter_%s", language_name); - - const uint64_t store_id = self->function_table.store_id; - - // Build the imports list for the module. - wasm_importtype_vec_t import_types = WASM_EMPTY_VEC; - wasmtime_module_imports(module, &import_types); - imports = ts_calloc(import_types.size, sizeof(wasmtime_extern_t)); - - for (unsigned i = 0; i < import_types.size; i++) { - const wasm_importtype_t *import_type = import_types.data[i]; - const wasm_name_t *import_name = wasm_importtype_name(import_type); - if (import_name->size == 0) { - format(error_message, "empty import name"); - goto error; - } - - if (ts_wasm_store__provide_builtin_import(self, import_name, &imports[i])) { - continue; - } - - bool defined_in_stdlib = false; - for (unsigned j = 0; j < array_len(STDLIB_SYMBOLS); j++) { - if (name_eq(import_name, STDLIB_SYMBOLS[j])) { - uint16_t address = self->stdlib_fn_indices[j]; - imports[i] = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_FUNC, .of.func = {store_id, address}}; - defined_in_stdlib = true; - break; - } - } - - if (!defined_in_stdlib) { - format( - error_message, - "invalid import '%.*s'\n", - (int)import_name->size, import_name->data - ); - goto error; - } - } - - wasmtime_instance_t instance; - error = wasmtime_instance_new(context, module, imports, import_types.size, &instance, &trap); - wasm_importtype_vec_delete(&import_types); - ts_free(imports); - imports = NULL; - if (error) { - wasmtime_error_message(error, &message); - format( - error_message, - "error instantiating wasm module: %.*s\n", - (int)message.size, message.data - ); - goto error; - } - if (trap) { - wasm_trap_message(trap, &message); - format( - error_message, - "trap when instantiating wasm module: %.*s\n", - (int)message.size, message.data - ); - goto error; - } - - self->current_memory_offset += dylink_info->memory_size; - self->current_function_table_offset += dylink_info->table_size; - - // Process the module's exports. - bool found_language = false; - wasmtime_extern_t language_extern; - wasm_exporttype_vec_t export_types = WASM_EMPTY_VEC; - wasmtime_module_exports(module, &export_types); - for (unsigned i = 0; i < export_types.size; i++) { - wasm_exporttype_t *export_type = export_types.data[i]; - const wasm_name_t *name = wasm_exporttype_name(export_type); - - size_t name_len; - char *export_name; - wasmtime_extern_t export = {.kind = WASM_EXTERN_GLOBAL}; - bool exists = wasmtime_instance_export_nth(context, &instance, i, &export_name, &name_len, &export); - assert(exists); - - // If the module exports an initialization or data-relocation function, call it. - if (ts_wasm_store__call_module_initializer(self, name, &export, &trap)) { - if (trap) { - wasm_trap_message(trap, &message); - format( - error_message, - "trap when calling data relocation function: %.*s\n", - (int)message.size, message.data - ); - goto error; - } - } - - // Find the main language function for the module. - else if (name_eq(name, language_function_name)) { - language_extern = export; - found_language = true; - } - } - wasm_exporttype_vec_delete(&export_types); - - if (!found_language) { - format( - error_message, - "module did not contain language function: %s", - language_function_name - ); - goto error; - } - - // Invoke the language function to get the static address of the language object. - wasmtime_func_t language_func = language_extern.of.func; - wasmtime_val_t language_address_val; - error = wasmtime_func_call(context, &language_func, NULL, 0, &language_address_val, 1, &trap); - assert(!error); - if (trap) { - wasm_trap_message(trap, &message); - format( - error_message, - "trapped when calling language function: %s: %.*s\n", - language_function_name, (int)message.size, message.data - ); - goto error; - } - - if (language_address_val.kind != WASMTIME_I32) { - format( - error_message, - "language function did not return an integer: %s\n", - language_function_name - ); - goto error; - } - - ts_free(language_function_name); - *result = instance; - *language_address = language_address_val.of.i32; - return true; - -error: - if (language_function_name) ts_free(language_function_name); - if (message.size) wasm_byte_vec_delete(&message); - if (error) wasmtime_error_delete(error); - if (trap) wasm_trap_delete(trap); - if (imports) ts_free(imports); - return false; -} - -static bool ts_wasm_store__sentinel_lex_fn(TSLexer *_lexer, TSStateId state) { - return false; -} - -const TSLanguage *ts_wasm_store_load_language( - TSWasmStore *self, - const char *language_name, - const char *wasm, - uint32_t wasm_len, - TSWasmError *wasm_error -) { - WasmDylinkInfo dylink_info; - wasmtime_module_t *module = NULL; - wasmtime_error_t *error = NULL; - wasm_error->kind = TSWasmErrorKindNone; - - if (!wasm_dylink_info__parse((const unsigned char *)wasm, wasm_len, &dylink_info)) { - wasm_error->kind = TSWasmErrorKindParse; - format(&wasm_error->message, "failed to parse dylink section of wasm module"); - goto error; - } - - // Compile the wasm code. - error = wasmtime_module_new(self->engine, (const uint8_t *)wasm, wasm_len, &module); - if (error) { - wasm_message_t message; - wasmtime_error_message(error, &message); - wasm_error->kind = TSWasmErrorKindCompile; - format(&wasm_error->message, "error compiling wasm module: %.*s", (int)message.size, message.data); - wasm_byte_vec_delete(&message); - goto error; - } - - // Instantiate the module in this store. - wasmtime_instance_t instance; - int32_t language_address; - if (!ts_wasm_store__instantiate( - self, - module, - language_name, - &dylink_info, - &instance, - &language_address, - &wasm_error->message - )) { - wasm_error->kind = TSWasmErrorKindInstantiate; - goto error; - } - - // Copy all of the static data out of the language object in wasm memory, - // constructing a native language object. - LanguageInWasmMemory wasm_language; - wasmtime_context_t *context = wasmtime_store_context(self->store); - const uint8_t *memory = wasmtime_memory_data(context, &self->memory); - memcpy(&wasm_language, &memory[language_address], sizeof(LanguageInWasmMemory)); - - if (wasm_language.version < LANGUAGE_VERSION_USABLE_VIA_WASM) { - wasm_error->kind = TSWasmErrorKindInstantiate; - format(&wasm_error->message, "language version %u is too old for wasm", wasm_language.version); - goto error; - } - - int32_t addresses[] = { - wasm_language.alias_map, - wasm_language.alias_sequences, - wasm_language.field_map_entries, - wasm_language.field_map_slices, - wasm_language.field_names, - wasm_language.keyword_lex_fn, - wasm_language.lex_fn, - wasm_language.lex_modes, - wasm_language.parse_actions, - wasm_language.parse_table, - wasm_language.primary_state_ids, - wasm_language.primary_state_ids, - wasm_language.public_symbol_map, - wasm_language.small_parse_table, - wasm_language.small_parse_table_map, - wasm_language.symbol_metadata, - wasm_language.symbol_metadata, - wasm_language.symbol_names, - wasm_language.external_token_count > 0 ? wasm_language.external_scanner.states : 0, - wasm_language.external_token_count > 0 ? wasm_language.external_scanner.symbol_map : 0, - wasm_language.external_token_count > 0 ? wasm_language.external_scanner.create : 0, - wasm_language.external_token_count > 0 ? wasm_language.external_scanner.destroy : 0, - wasm_language.external_token_count > 0 ? wasm_language.external_scanner.scan : 0, - wasm_language.external_token_count > 0 ? wasm_language.external_scanner.serialize : 0, - wasm_language.external_token_count > 0 ? wasm_language.external_scanner.deserialize : 0, - language_address, - self->current_memory_offset, - }; - uint32_t address_count = array_len(addresses); - - TSLanguage *language = ts_calloc(1, sizeof(TSLanguage)); - StringData symbol_name_buffer = array_new(); - StringData field_name_buffer = array_new(); - - *language = (TSLanguage) { - .version = wasm_language.version, - .symbol_count = wasm_language.symbol_count, - .alias_count = wasm_language.alias_count, - .token_count = wasm_language.token_count, - .external_token_count = wasm_language.external_token_count, - .state_count = wasm_language.state_count, - .large_state_count = wasm_language.large_state_count, - .production_id_count = wasm_language.production_id_count, - .field_count = wasm_language.field_count, - .max_alias_sequence_length = wasm_language.max_alias_sequence_length, - .keyword_capture_token = wasm_language.keyword_capture_token, - .parse_table = copy( - &memory[wasm_language.parse_table], - wasm_language.large_state_count * wasm_language.symbol_count * sizeof(uint16_t) - ), - .parse_actions = copy_unsized_static_array( - memory, - wasm_language.parse_actions, - addresses, - address_count - ), - .symbol_names = copy_strings( - memory, - wasm_language.symbol_names, - wasm_language.symbol_count + wasm_language.alias_count, - &symbol_name_buffer - ), - .symbol_metadata = copy( - &memory[wasm_language.symbol_metadata], - (wasm_language.symbol_count + wasm_language.alias_count) * sizeof(TSSymbolMetadata) - ), - .public_symbol_map = copy( - &memory[wasm_language.public_symbol_map], - (wasm_language.symbol_count + wasm_language.alias_count) * sizeof(TSSymbol) - ), - .lex_modes = copy( - &memory[wasm_language.lex_modes], - wasm_language.state_count * sizeof(TSLexMode) - ), - }; - - if (language->field_count > 0 && language->production_id_count > 0) { - language->field_map_slices = copy( - &memory[wasm_language.field_map_slices], - wasm_language.production_id_count * sizeof(TSFieldMapSlice) - ); - - // Determine the number of field map entries by finding the greatest index - // in any of the slices. - uint32_t field_map_entry_count = 0; - for (uint32_t i = 0; i < wasm_language.production_id_count; i++) { - TSFieldMapSlice slice = language->field_map_slices[i]; - uint32_t slice_end = slice.index + slice.length; - if (slice_end > field_map_entry_count) { - field_map_entry_count = slice_end; - } - } - - language->field_map_entries = copy( - &memory[wasm_language.field_map_entries], - field_map_entry_count * sizeof(TSFieldMapEntry) - ); - language->field_names = copy_strings( - memory, - wasm_language.field_names, - wasm_language.field_count + 1, - &field_name_buffer - ); - } - - if (language->max_alias_sequence_length > 0 && language->production_id_count > 0) { - // The alias map contains symbols, alias counts, and aliases, terminated by a null symbol. - int32_t alias_map_size = 0; - for (;;) { - TSSymbol symbol; - memcpy(&symbol, &memory[wasm_language.alias_map + alias_map_size], sizeof(symbol)); - alias_map_size += sizeof(TSSymbol); - if (symbol == 0) break; - uint16_t value_count; - memcpy(&value_count, &memory[wasm_language.alias_map + alias_map_size], sizeof(value_count)); - alias_map_size += value_count * sizeof(TSSymbol); - } - language->alias_map = copy( - &memory[wasm_language.alias_map], - alias_map_size * sizeof(TSSymbol) - ); - language->alias_sequences = copy( - &memory[wasm_language.alias_sequences], - wasm_language.production_id_count * wasm_language.max_alias_sequence_length * sizeof(TSSymbol) - ); - } - - if (language->state_count > language->large_state_count) { - uint32_t small_state_count = wasm_language.state_count - wasm_language.large_state_count; - language->small_parse_table_map = copy( - &memory[wasm_language.small_parse_table_map], - small_state_count * sizeof(uint32_t) - ); - language->small_parse_table = copy_unsized_static_array( - memory, - wasm_language.small_parse_table, - addresses, - address_count - ); - } - - if (language->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { - language->primary_state_ids = copy( - &memory[wasm_language.primary_state_ids], - wasm_language.state_count * sizeof(TSStateId) - ); - } - - if (language->external_token_count > 0) { - language->external_scanner.symbol_map = copy( - &memory[wasm_language.external_scanner.symbol_map], - wasm_language.external_token_count * sizeof(TSSymbol) - ); - language->external_scanner.states = (void *)(uintptr_t)wasm_language.external_scanner.states; - } - - unsigned name_len = strlen(language_name); - char *name = ts_malloc(name_len + 1); - memcpy(name, language_name, name_len); - name[name_len] = '\0'; - - LanguageWasmModule *language_module = ts_malloc(sizeof(LanguageWasmModule)); - *language_module = (LanguageWasmModule) { - .language_id = language_id_new(), - .module = module, - .name = name, - .symbol_name_buffer = symbol_name_buffer.contents, - .field_name_buffer = field_name_buffer.contents, - .dylink_info = dylink_info, - .ref_count = 1, - }; - - // The lex functions are not used for wasm languages. Use those two fields - // to mark this language as WASM-based and to store the language's - // WASM-specific data. - language->lex_fn = ts_wasm_store__sentinel_lex_fn; - language->keyword_lex_fn = (void *)language_module; - - // Clear out any instances of languages that have been deleted. - for (unsigned i = 0; i < self->language_instances.size; i++) { - WasmLanguageId *id = self->language_instances.contents[i].language_id; - if (id->is_language_deleted) { - language_id_delete(id); - array_erase(&self->language_instances, i); - i--; - } - } - - // Store this store's instance of this language module. - array_push(&self->language_instances, ((LanguageWasmInstance) { - .language_id = language_id_clone(language_module->language_id), - .instance = instance, - .external_states_address = wasm_language.external_scanner.states, - .lex_main_fn_index = wasm_language.lex_fn, - .lex_keyword_fn_index = wasm_language.keyword_lex_fn, - .scanner_create_fn_index = wasm_language.external_scanner.create, - .scanner_destroy_fn_index = wasm_language.external_scanner.destroy, - .scanner_serialize_fn_index = wasm_language.external_scanner.serialize, - .scanner_deserialize_fn_index = wasm_language.external_scanner.deserialize, - .scanner_scan_fn_index = wasm_language.external_scanner.scan, - })); - - return language; - -error: - if (module) wasmtime_module_delete(module); - return NULL; -} - -bool ts_wasm_store_add_language( - TSWasmStore *self, - const TSLanguage *language, - uint32_t *index -) { - wasmtime_context_t *context = wasmtime_store_context(self->store); - const LanguageWasmModule *language_module = (void *)language->keyword_lex_fn; - - // Search for this store's instance of the language module. Also clear out any - // instances of languages that have been deleted. - bool exists = false; - for (unsigned i = 0; i < self->language_instances.size; i++) { - WasmLanguageId *id = self->language_instances.contents[i].language_id; - if (id->is_language_deleted) { - language_id_delete(id); - array_erase(&self->language_instances, i); - i--; - } else if (id == language_module->language_id) { - exists = true; - *index = i; - } - } - - // If the language module has not been instantiated in this store, then add - // it to this store. - if (!exists) { - *index = self->language_instances.size; - char *message; - wasmtime_instance_t instance; - int32_t language_address; - if (!ts_wasm_store__instantiate( - self, - language_module->module, - language_module->name, - &language_module->dylink_info, - &instance, - &language_address, - &message - )) { - ts_free(message); - return false; - } - - LanguageInWasmMemory wasm_language; - const uint8_t *memory = wasmtime_memory_data(context, &self->memory); - memcpy(&wasm_language, &memory[language_address], sizeof(LanguageInWasmMemory)); - array_push(&self->language_instances, ((LanguageWasmInstance) { - .language_id = language_id_clone(language_module->language_id), - .instance = instance, - .external_states_address = wasm_language.external_scanner.states, - .lex_main_fn_index = wasm_language.lex_fn, - .lex_keyword_fn_index = wasm_language.keyword_lex_fn, - .scanner_create_fn_index = wasm_language.external_scanner.create, - .scanner_destroy_fn_index = wasm_language.external_scanner.destroy, - .scanner_serialize_fn_index = wasm_language.external_scanner.serialize, - .scanner_deserialize_fn_index = wasm_language.external_scanner.deserialize, - .scanner_scan_fn_index = wasm_language.external_scanner.scan, - })); - } - - return true; -} - -void ts_wasm_store_reset_heap(TSWasmStore *self) { - wasmtime_context_t *context = wasmtime_store_context(self->store); - wasmtime_func_t func = { - self->function_table.store_id, - self->builtin_fn_indices.reset_heap - }; - wasm_trap_t *trap = NULL; - wasmtime_val_t args[1] = { - {.of.i32 = ts_wasm_store__heap_address(self), .kind = WASMTIME_I32}, - }; - - wasmtime_error_t *error = wasmtime_func_call(context, &func, args, 1, NULL, 0, &trap); - assert(!error); - assert(!trap); -} - -bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language) { - uint32_t instance_index; - if (!ts_wasm_store_add_language(self, language, &instance_index)) return false; - self->current_lexer = lexer; - self->current_instance = &self->language_instances.contents[instance_index]; - self->has_error = false; - ts_wasm_store_reset_heap(self); - return true; -} - -void ts_wasm_store_reset(TSWasmStore *self) { - self->current_lexer = NULL; - self->current_instance = NULL; - self->has_error = false; - ts_wasm_store_reset_heap(self); -} - -static void ts_wasm_store__call( - TSWasmStore *self, - int32_t function_index, - wasmtime_val_raw_t *args_and_results, - size_t args_and_results_len -) { - wasmtime_context_t *context = wasmtime_store_context(self->store); - wasmtime_val_t value; - bool succeeded = wasmtime_table_get(context, &self->function_table, function_index, &value); - assert(succeeded); - assert(value.kind == WASMTIME_FUNCREF); - wasmtime_func_t func = value.of.funcref; - - wasm_trap_t *trap = NULL; - wasmtime_error_t *error = wasmtime_func_call_unchecked(context, &func, args_and_results, args_and_results_len, &trap); - if (error) { - // wasm_message_t message; - // wasmtime_error_message(error, &message); - // fprintf( - // stderr, - // "error in wasm module: %.*s\n", - // (int)message.size, message.data - // ); - wasmtime_error_delete(error); - self->has_error = true; - } else if (trap) { - // wasm_message_t message; - // wasm_trap_message(trap, &message); - // fprintf( - // stderr, - // "trap in wasm module: %.*s\n", - // (int)message.size, message.data - // ); - wasm_trap_delete(trap); - self->has_error = true; - } -} - -static bool ts_wasm_store__call_lex_function(TSWasmStore *self, unsigned function_index, TSStateId state) { - wasmtime_context_t *context = wasmtime_store_context(self->store); - uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); - memcpy( - &memory_data[self->lexer_address], - &self->current_lexer->lookahead, - sizeof(self->current_lexer->lookahead) - ); - - wasmtime_val_raw_t args[2] = { - {.i32 = self->lexer_address}, - {.i32 = state}, - }; - ts_wasm_store__call(self, function_index, args, 2); - if (self->has_error) return false; - bool result = args[0].i32; - - memcpy( - &self->current_lexer->lookahead, - &memory_data[self->lexer_address], - sizeof(self->current_lexer->lookahead) + sizeof(self->current_lexer->result_symbol) - ); - return result; -} - -bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state) { - return ts_wasm_store__call_lex_function( - self, - self->current_instance->lex_main_fn_index, - state - ); -} - -bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state) { - return ts_wasm_store__call_lex_function( - self, - self->current_instance->lex_keyword_fn_index, - state - ); -} - -uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self) { - wasmtime_val_raw_t args[1] = {{.i32 = 0}}; - ts_wasm_store__call(self, self->current_instance->scanner_create_fn_index, args, 1); - if (self->has_error) return 0; - return args[0].i32; -} - -void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address) { - if (self->current_instance) { - wasmtime_val_raw_t args[1] = {{.i32 = scanner_address}}; - ts_wasm_store__call(self, self->current_instance->scanner_destroy_fn_index, args, 1); - } -} - -bool ts_wasm_store_call_scanner_scan( - TSWasmStore *self, - uint32_t scanner_address, - uint32_t valid_tokens_ix -) { - wasmtime_context_t *context = wasmtime_store_context(self->store); - uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); - - memcpy( - &memory_data[self->lexer_address], - &self->current_lexer->lookahead, - sizeof(self->current_lexer->lookahead) - ); - - uint32_t valid_tokens_address = - self->current_instance->external_states_address + - (valid_tokens_ix * sizeof(bool)); - wasmtime_val_raw_t args[3] = { - {.i32 = scanner_address}, - {.i32 = self->lexer_address}, - {.i32 = valid_tokens_address} - }; - ts_wasm_store__call(self, self->current_instance->scanner_scan_fn_index, args, 3); - if (self->has_error) return false; - - memcpy( - &self->current_lexer->lookahead, - &memory_data[self->lexer_address], - sizeof(self->current_lexer->lookahead) + sizeof(self->current_lexer->result_symbol) - ); - return args[0].i32; -} - -uint32_t ts_wasm_store_call_scanner_serialize( - TSWasmStore *self, - uint32_t scanner_address, - char *buffer -) { - wasmtime_context_t *context = wasmtime_store_context(self->store); - uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); - uint32_t serialization_buffer_address = ts_wasm_store__serialization_buffer_address(self); - - wasmtime_val_raw_t args[2] = { - {.i32 = scanner_address}, - {.i32 = serialization_buffer_address}, - }; - ts_wasm_store__call(self, self->current_instance->scanner_serialize_fn_index, args, 2); - if (self->has_error) return 0; - - uint32_t length = args[0].i32; - if (length > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { - self->has_error = true; - return 0; - } - - if (length > 0) { - memcpy( - ((Lexer *)self->current_lexer)->debug_buffer, - &memory_data[serialization_buffer_address], - length - ); - } - return length; -} - -void ts_wasm_store_call_scanner_deserialize( - TSWasmStore *self, - uint32_t scanner_address, - const char *buffer, - unsigned length -) { - wasmtime_context_t *context = wasmtime_store_context(self->store); - uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); - uint32_t serialization_buffer_address = ts_wasm_store__serialization_buffer_address(self); - - if (length > 0) { - memcpy( - &memory_data[serialization_buffer_address], - buffer, - length - ); - } - - wasmtime_val_raw_t args[3] = { - {.i32 = scanner_address}, - {.i32 = serialization_buffer_address}, - {.i32 = length}, - }; - ts_wasm_store__call(self, self->current_instance->scanner_deserialize_fn_index, args, 3); -} - -bool ts_wasm_store_has_error(const TSWasmStore *self) { - return self->has_error; -} - -bool ts_language_is_wasm(const TSLanguage *self) { - return self->lex_fn == ts_wasm_store__sentinel_lex_fn; -} - -static inline LanguageWasmModule *ts_language__wasm_module(const TSLanguage *self) { - return (LanguageWasmModule *)self->keyword_lex_fn; -} - -void ts_wasm_language_retain(const TSLanguage *self) { - LanguageWasmModule *module = ts_language__wasm_module(self); - assert(module->ref_count > 0); - atomic_inc(&module->ref_count); -} - -void ts_wasm_language_release(const TSLanguage *self) { - LanguageWasmModule *module = ts_language__wasm_module(self); - assert(module->ref_count > 0); - if (atomic_dec(&module->ref_count) == 0) { - // Update the language id to reflect that the language is deleted. This allows any wasm stores - // that hold wasm instances for this language to delete those instances. - atomic_inc(&module->language_id->is_language_deleted); - language_id_delete(module->language_id); - - ts_free((void *)module->field_name_buffer); - ts_free((void *)module->symbol_name_buffer); - ts_free((void *)module->name); - wasmtime_module_delete(module->module); - ts_free(module); - - ts_free((void *)self->alias_map); - ts_free((void *)self->alias_sequences); - ts_free((void *)self->external_scanner.symbol_map); - ts_free((void *)self->field_map_entries); - ts_free((void *)self->field_map_slices); - ts_free((void *)self->field_names); - ts_free((void *)self->lex_modes); - ts_free((void *)self->parse_actions); - ts_free((void *)self->parse_table); - ts_free((void *)self->primary_state_ids); - ts_free((void *)self->public_symbol_map); - ts_free((void *)self->small_parse_table); - ts_free((void *)self->small_parse_table_map); - ts_free((void *)self->symbol_metadata); - ts_free((void *)self->symbol_names); - ts_free((void *)self); - } -} - -#else - -// If the WASM feature is not enabled, define dummy versions of all of the -// wasm-related functions. - -void ts_wasm_store_delete(TSWasmStore *self) { - (void)self; -} - -bool ts_wasm_store_start( - TSWasmStore *self, - TSLexer *lexer, - const TSLanguage *language -) { - (void)self; - (void)lexer; - (void)language; - return false; -} - -void ts_wasm_store_reset(TSWasmStore *self) { - (void)self; -} - -bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state) { - (void)self; - (void)state; - return false; -} - -bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state) { - (void)self; - (void)state; - return false; -} - -uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self) { - (void)self; - return 0; -} - -void ts_wasm_store_call_scanner_destroy( - TSWasmStore *self, - uint32_t scanner_address -) { - (void)self; - (void)scanner_address; -} - -bool ts_wasm_store_call_scanner_scan( - TSWasmStore *self, - uint32_t scanner_address, - uint32_t valid_tokens_ix -) { - (void)self; - (void)scanner_address; - (void)valid_tokens_ix; - return false; -} - -uint32_t ts_wasm_store_call_scanner_serialize( - TSWasmStore *self, - uint32_t scanner_address, - char *buffer -) { - (void)self; - (void)scanner_address; - (void)buffer; - return 0; -} - -void ts_wasm_store_call_scanner_deserialize( - TSWasmStore *self, - uint32_t scanner_address, - const char *buffer, - unsigned length -) { - (void)self; - (void)scanner_address; - (void)buffer; - (void)length; -} - -bool ts_wasm_store_has_error(const TSWasmStore *self) { - (void)self; - return false; -} - -bool ts_language_is_wasm(const TSLanguage *self) { - (void)self; - return false; -} - -void ts_wasm_language_retain(const TSLanguage *self) { - (void)self; -} - -void ts_wasm_language_release(const TSLanguage *self) { - (void)self; -} - -#endif diff --git a/parser/src/wasm_store.h b/parser/src/wasm_store.h deleted file mode 100644 index 1ad2ae57..00000000 --- a/parser/src/wasm_store.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef TREE_SITTER_WASM_H_ -#define TREE_SITTER_WASM_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "api.h" -#include "./parser.h" - -bool ts_wasm_store_start(TSWasmStore *, TSLexer *, const TSLanguage *); -void ts_wasm_store_reset(TSWasmStore *); -bool ts_wasm_store_has_error(const TSWasmStore *); - -bool ts_wasm_store_call_lex_main(TSWasmStore *, TSStateId); -bool ts_wasm_store_call_lex_keyword(TSWasmStore *, TSStateId); - -uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *); -void ts_wasm_store_call_scanner_destroy(TSWasmStore *, uint32_t); -bool ts_wasm_store_call_scanner_scan(TSWasmStore *, uint32_t, uint32_t); -uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *, uint32_t, char *); -void ts_wasm_store_call_scanner_deserialize(TSWasmStore *, uint32_t, const char *, unsigned); - -void ts_wasm_language_retain(const TSLanguage *); -void ts_wasm_language_release(const TSLanguage *); - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_WASM_H_