Removing even more shit in the parser lib
This commit is contained in:
parent
7e88e17d67
commit
a7bfe526b0
20 changed files with 215 additions and 461 deletions
|
|
@ -6,7 +6,7 @@
|
||||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||||
/* +#+#+#+#+#+ +#+ */
|
/* +#+#+#+#+#+ +#+ */
|
||||||
/* Created: 2024/06/17 12:41:56 by maiboyer #+# #+# */
|
/* Created: 2024/06/17 12:41:56 by maiboyer #+# #+# */
|
||||||
/* Updated: 2024/07/02 21:55:19 by maiboyer ### ########.fr */
|
/* Updated: 2024/07/03 18:47:44 by maiboyer ### ########.fr */
|
||||||
/* */
|
/* */
|
||||||
/* ************************************************************************** */
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
|
@ -666,7 +666,7 @@ t_error build_sym_expansion(t_parse_node self, t_const_str input, t_ast_node *ou
|
||||||
(void)(self);
|
(void)(self);
|
||||||
if (out == NULL)
|
if (out == NULL)
|
||||||
return (ERROR);
|
return (ERROR);
|
||||||
if (ts_node_symbol(self) != sym_simple_expansion)
|
if (ts_node_symbol(self) != sym_expansion)
|
||||||
return (ERROR);
|
return (ERROR);
|
||||||
ret = ast_alloc(AST_EXPANSION);
|
ret = ast_alloc(AST_EXPANSION);
|
||||||
ret->data.expansion.kind = E_OP_NONE;
|
ret->data.expansion.kind = E_OP_NONE;
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
#include "api.h"
|
#include "api.h"
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
typedef struct
|
typedef struct NodeChildIterator
|
||||||
{
|
{
|
||||||
Subtree parent;
|
Subtree parent;
|
||||||
const TSTree *tree;
|
const TSTree *tree;
|
||||||
|
|
|
||||||
|
|
@ -1,21 +1,16 @@
|
||||||
#define _POSIX_C_SOURCE 200112L
|
#define _POSIX_C_SOURCE 200112L
|
||||||
|
|
||||||
|
#include "./api.h"
|
||||||
#include "./array.h"
|
#include "./array.h"
|
||||||
#include "me/mem/mem.h"
|
|
||||||
// #include "./atomic.h"
|
|
||||||
// #include "./clock.h"
|
|
||||||
#include "./error_costs.h"
|
|
||||||
#include "./language.h"
|
#include "./language.h"
|
||||||
#include "./length.h"
|
#include "./length.h"
|
||||||
#include "./lexer.h"
|
#include "./lexer.h"
|
||||||
#include "./reduce_action.h"
|
#include "./reduce_action.h"
|
||||||
#include "./reusable_node.h"
|
|
||||||
#include "./stack.h"
|
#include "./stack.h"
|
||||||
#include "./subtree.h"
|
#include "./subtree.h"
|
||||||
#include "./tree.h"
|
#include "./tree.h"
|
||||||
#include "api.h"
|
#include "me/mem/mem.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <inttypes.h>
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
@ -55,7 +50,6 @@ struct TSParser
|
||||||
SubtreeArray trailing_extras2;
|
SubtreeArray trailing_extras2;
|
||||||
SubtreeArray scratch_trees;
|
SubtreeArray scratch_trees;
|
||||||
TokenCache token_cache;
|
TokenCache token_cache;
|
||||||
ReusableNode reusable_node;
|
|
||||||
void *external_scanner_payload;
|
void *external_scanner_payload;
|
||||||
unsigned accept_count;
|
unsigned accept_count;
|
||||||
unsigned operation_count;
|
unsigned operation_count;
|
||||||
|
|
@ -182,26 +176,6 @@ static bool ts_parser__breakdown_top_of_stack(TSParser *self, StackVersion versi
|
||||||
return did_break_down;
|
return did_break_down;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ts_parser__breakdown_lookahead(TSParser *self, Subtree *lookahead, TSStateId state, ReusableNode *reusable_node)
|
|
||||||
{
|
|
||||||
bool did_descend = false;
|
|
||||||
Subtree tree = reusable_node_tree(reusable_node);
|
|
||||||
while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state)
|
|
||||||
{
|
|
||||||
LOG("state_mismatch sym:%s", TREE_NAME(tree));
|
|
||||||
reusable_node_descend(reusable_node);
|
|
||||||
tree = reusable_node_tree(reusable_node);
|
|
||||||
did_descend = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (did_descend)
|
|
||||||
{
|
|
||||||
ts_subtree_release(&self->tree_pool, *lookahead);
|
|
||||||
*lookahead = tree;
|
|
||||||
ts_subtree_retain(*lookahead);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static ErrorComparison ts_parser__compare_versions(TSParser *self, ErrorStatus a, ErrorStatus b)
|
static ErrorComparison ts_parser__compare_versions(TSParser *self, ErrorStatus a, ErrorStatus b)
|
||||||
{
|
{
|
||||||
(void)self;
|
(void)self;
|
||||||
|
|
@ -592,100 +566,6 @@ static void ts_parser__set_cached_token(TSParser *self, uint32_t byte_index, Sub
|
||||||
cache->last_external_token = last_external_token;
|
cache->last_external_token = last_external_token;
|
||||||
}
|
}
|
||||||
|
|
||||||
// static bool ts_parser__has_included_range_difference(const TSParser *self, uint32_t start_position, uint32_t end_position)
|
|
||||||
// {
|
|
||||||
// return ts_range_array_intersects(&self->included_range_differences, self->included_range_difference_index, start_position,
|
|
||||||
// end_position);
|
|
||||||
// }
|
|
||||||
|
|
||||||
static Subtree ts_parser__reuse_node(TSParser *self, StackVersion version, TSStateId *state, uint32_t position, Subtree last_external_token,
|
|
||||||
TableEntry *table_entry)
|
|
||||||
{
|
|
||||||
Subtree result;
|
|
||||||
while ((result = reusable_node_tree(&self->reusable_node)).ptr)
|
|
||||||
{
|
|
||||||
uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
|
|
||||||
uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
|
|
||||||
|
|
||||||
// Do not reuse an EOF node if the included ranges array has changes
|
|
||||||
// later on in the file.
|
|
||||||
if (ts_subtree_is_eof(result))
|
|
||||||
end_byte_offset = UINT32_MAX;
|
|
||||||
|
|
||||||
if (byte_offset > position)
|
|
||||||
{
|
|
||||||
LOG("before_reusable_node symbol:%s", TREE_NAME(result));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (byte_offset < position)
|
|
||||||
{
|
|
||||||
LOG("past_reusable_node symbol:%s", TREE_NAME(result));
|
|
||||||
if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node))
|
|
||||||
{
|
|
||||||
reusable_node_advance(&self->reusable_node);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token))
|
|
||||||
{
|
|
||||||
LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
|
|
||||||
reusable_node_advance(&self->reusable_node);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *reason = NULL;
|
|
||||||
if (ts_subtree_has_changes(result))
|
|
||||||
{
|
|
||||||
reason = "has_changes";
|
|
||||||
}
|
|
||||||
else if (ts_subtree_is_error(result))
|
|
||||||
{
|
|
||||||
reason = "is_error";
|
|
||||||
}
|
|
||||||
else if (ts_subtree_missing(result))
|
|
||||||
{
|
|
||||||
reason = "is_missing";
|
|
||||||
}
|
|
||||||
else if (ts_subtree_is_fragile(result))
|
|
||||||
{
|
|
||||||
reason = "is_fragile";
|
|
||||||
}
|
|
||||||
// else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset))
|
|
||||||
// {
|
|
||||||
// reason = "contains_different_included_range";
|
|
||||||
// }
|
|
||||||
|
|
||||||
if (reason)
|
|
||||||
{
|
|
||||||
LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
|
|
||||||
if (!reusable_node_descend(&self->reusable_node))
|
|
||||||
{
|
|
||||||
reusable_node_advance(&self->reusable_node);
|
|
||||||
ts_parser__breakdown_top_of_stack(self, version);
|
|
||||||
*state = ts_stack_state(self->stack, version);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
|
|
||||||
ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
|
|
||||||
if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry))
|
|
||||||
{
|
|
||||||
LOG("cant_reuse_node symbol:%s, first_leaf_symbol:%s", TREE_NAME(result), SYM_NAME(leaf_symbol));
|
|
||||||
reusable_node_advance_past_leaf(&self->reusable_node);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG("reuse_node symbol:%s", TREE_NAME(result));
|
|
||||||
ts_subtree_retain(result);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL_SUBTREE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine if a given tree should be replaced by an alternative tree.
|
// Determine if a given tree should be replaced by an alternative tree.
|
||||||
//
|
//
|
||||||
// The decision is based on the trees' error costs (if any), their dynamic precedence,
|
// The decision is based on the trees' error costs (if any), their dynamic precedence,
|
||||||
|
|
@ -1361,10 +1241,6 @@ static void ts_parser__handle_error(TSParser *self, StackVersion version, Subtre
|
||||||
// current lookahead token's "lookahead bytes" value, which describes how far
|
// current lookahead token's "lookahead bytes" value, which describes how far
|
||||||
// the lexer needed to look ahead beyond the content of the token in order to
|
// the lexer needed to look ahead beyond the content of the token in order to
|
||||||
// recognize it.
|
// recognize it.
|
||||||
if (ts_subtree_child_count(lookahead) > 0)
|
|
||||||
{
|
|
||||||
ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
|
|
||||||
}
|
|
||||||
ts_parser__recover(self, version, lookahead);
|
ts_parser__recover(self, version, lookahead);
|
||||||
|
|
||||||
LOG_STACK();
|
LOG_STACK();
|
||||||
|
|
@ -1372,25 +1248,18 @@ static void ts_parser__handle_error(TSParser *self, StackVersion version, Subtre
|
||||||
|
|
||||||
static bool ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse)
|
static bool ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse)
|
||||||
{
|
{
|
||||||
|
(void)(allow_node_reuse);
|
||||||
TSStateId state = ts_stack_state(self->stack, version);
|
TSStateId state = ts_stack_state(self->stack, version);
|
||||||
uint32_t position = ts_stack_position(self->stack, version).bytes;
|
uint32_t position = ts_stack_position(self->stack, version).bytes;
|
||||||
Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
|
Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
|
||||||
|
|
||||||
bool did_reuse = true;
|
|
||||||
Subtree lookahead = NULL_SUBTREE;
|
Subtree lookahead = NULL_SUBTREE;
|
||||||
TableEntry table_entry = {.action_count = 0};
|
TableEntry table_entry = {.action_count = 0};
|
||||||
|
|
||||||
// If possible, reuse a node from the previous syntax tree.
|
|
||||||
if (allow_node_reuse)
|
|
||||||
{
|
|
||||||
lookahead = ts_parser__reuse_node(self, version, &state, position, last_external_token, &table_entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no node from the previous syntax tree could be reused, then try to
|
// If no node from the previous syntax tree could be reused, then try to
|
||||||
// reuse the token previously returned by the lexer.
|
// reuse the token previously returned by the lexer.
|
||||||
if (!lookahead.ptr)
|
if (!lookahead.ptr)
|
||||||
{
|
{
|
||||||
did_reuse = false;
|
|
||||||
lookahead = ts_parser__get_cached_token(self, state, position, last_external_token, &table_entry);
|
lookahead = ts_parser__get_cached_token(self, state, position, last_external_token, &table_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1464,13 +1333,12 @@ static bool ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
||||||
|
|
||||||
if (ts_subtree_child_count(lookahead) > 0)
|
if (ts_subtree_child_count(lookahead) > 0)
|
||||||
{
|
{
|
||||||
ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
|
|
||||||
next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
|
next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
|
||||||
}
|
}
|
||||||
|
|
||||||
ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
|
ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
|
||||||
if (did_reuse)
|
// if (did_reuse)
|
||||||
reusable_node_advance(&self->reusable_node);
|
// reusable_node_advance(&self->reusable_node);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1495,14 +1363,8 @@ static bool ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
||||||
}
|
}
|
||||||
|
|
||||||
case TSParseActionTypeRecover: {
|
case TSParseActionTypeRecover: {
|
||||||
if (ts_subtree_child_count(lookahead) > 0)
|
|
||||||
{
|
|
||||||
ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
|
|
||||||
}
|
|
||||||
|
|
||||||
ts_parser__recover(self, version, lookahead);
|
ts_parser__recover(self, version, lookahead);
|
||||||
if (did_reuse)
|
|
||||||
reusable_node_advance(&self->reusable_node);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1729,7 +1591,6 @@ TSParser *ts_parser_new(void)
|
||||||
self->tree_pool = ts_subtree_pool_new(32);
|
self->tree_pool = ts_subtree_pool_new(32);
|
||||||
self->stack = ts_stack_new(&self->tree_pool);
|
self->stack = ts_stack_new(&self->tree_pool);
|
||||||
self->finished_tree = NULL_SUBTREE;
|
self->finished_tree = NULL_SUBTREE;
|
||||||
self->reusable_node = reusable_node_new();
|
|
||||||
self->cancellation_flag = NULL;
|
self->cancellation_flag = NULL;
|
||||||
self->language = NULL;
|
self->language = NULL;
|
||||||
self->has_scanner_error = false;
|
self->has_scanner_error = false;
|
||||||
|
|
@ -1760,7 +1621,6 @@ void ts_parser_delete(TSParser *self)
|
||||||
ts_lexer_delete(&self->lexer);
|
ts_lexer_delete(&self->lexer);
|
||||||
ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
|
ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
|
||||||
ts_subtree_pool_delete(&self->tree_pool);
|
ts_subtree_pool_delete(&self->tree_pool);
|
||||||
reusable_node_delete(&self->reusable_node);
|
|
||||||
array_delete(&self->trailing_extras);
|
array_delete(&self->trailing_extras);
|
||||||
array_delete(&self->trailing_extras2);
|
array_delete(&self->trailing_extras2);
|
||||||
array_delete(&self->scratch_trees);
|
array_delete(&self->scratch_trees);
|
||||||
|
|
@ -1797,7 +1657,6 @@ void ts_parser_reset(TSParser *self)
|
||||||
self->old_tree = NULL_SUBTREE;
|
self->old_tree = NULL_SUBTREE;
|
||||||
}
|
}
|
||||||
|
|
||||||
reusable_node_clear(&self->reusable_node);
|
|
||||||
ts_lexer_reset(&self->lexer, length_zero());
|
ts_lexer_reset(&self->lexer, length_zero());
|
||||||
ts_stack_clear(self->stack);
|
ts_stack_clear(self->stack);
|
||||||
ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
|
ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
|
||||||
|
|
@ -1830,7 +1689,6 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input)
|
||||||
if (self->has_scanner_error)
|
if (self->has_scanner_error)
|
||||||
goto exit;
|
goto exit;
|
||||||
|
|
||||||
reusable_node_clear(&self->reusable_node);
|
|
||||||
LOG("new_parse");
|
LOG("new_parse");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,6 @@
|
||||||
#ifndef TREE_SITTER_PARSER_H_
|
#ifndef TREE_SITTER_PARSER_H_
|
||||||
#define TREE_SITTER_PARSER_H_
|
#define TREE_SITTER_PARSER_H_
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
@ -14,191 +10,206 @@ extern "C" {
|
||||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||||
|
|
||||||
#ifndef TREE_SITTER_API_H_
|
#ifndef TREE_SITTER_API_H_
|
||||||
typedef uint16_t TSStateId;
|
typedef uint16_t TSStateId;
|
||||||
typedef uint16_t TSSymbol;
|
typedef uint16_t TSSymbol;
|
||||||
typedef uint16_t TSFieldId;
|
typedef uint16_t TSFieldId;
|
||||||
typedef struct TSLanguage TSLanguage;
|
typedef struct TSLanguage TSLanguage;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct TSFieldMapEntry
|
||||||
TSFieldId field_id;
|
{
|
||||||
uint8_t child_index;
|
TSFieldId field_id;
|
||||||
bool inherited;
|
uint8_t child_index;
|
||||||
|
bool inherited;
|
||||||
} TSFieldMapEntry;
|
} TSFieldMapEntry;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct TSFieldMapSlice
|
||||||
uint16_t index;
|
{
|
||||||
uint16_t length;
|
uint16_t index;
|
||||||
|
uint16_t length;
|
||||||
} TSFieldMapSlice;
|
} TSFieldMapSlice;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct TSSymbolMetadata
|
||||||
bool visible;
|
{
|
||||||
bool named;
|
bool visible;
|
||||||
bool supertype;
|
bool named;
|
||||||
|
bool supertype;
|
||||||
} TSSymbolMetadata;
|
} TSSymbolMetadata;
|
||||||
|
|
||||||
typedef struct TSLexer TSLexer;
|
typedef struct TSLexer TSLexer;
|
||||||
|
|
||||||
struct TSLexer {
|
struct TSLexer
|
||||||
int32_t lookahead;
|
{
|
||||||
TSSymbol result_symbol;
|
int32_t lookahead;
|
||||||
void (*advance)(TSLexer *, bool);
|
TSSymbol result_symbol;
|
||||||
void (*mark_end)(TSLexer *);
|
void (*advance)(TSLexer *, bool);
|
||||||
uint32_t (*get_column)(TSLexer *);
|
void (*mark_end)(TSLexer *);
|
||||||
bool (*is_at_included_range_start)(const TSLexer *);
|
uint32_t (*get_column)(TSLexer *);
|
||||||
bool (*eof)(const TSLexer *);
|
bool (*is_at_included_range_start)(const TSLexer *);
|
||||||
|
bool (*eof)(const TSLexer *);
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef enum {
|
typedef enum TSParseActionType
|
||||||
TSParseActionTypeShift,
|
{
|
||||||
TSParseActionTypeReduce,
|
TSParseActionTypeShift,
|
||||||
TSParseActionTypeAccept,
|
TSParseActionTypeReduce,
|
||||||
TSParseActionTypeRecover,
|
TSParseActionTypeAccept,
|
||||||
|
TSParseActionTypeRecover,
|
||||||
} TSParseActionType;
|
} TSParseActionType;
|
||||||
|
|
||||||
typedef union {
|
typedef union TSParseAction {
|
||||||
struct {
|
struct TSParseActionShift
|
||||||
uint8_t type;
|
{
|
||||||
TSStateId state;
|
uint8_t type;
|
||||||
bool extra;
|
TSStateId state;
|
||||||
bool repetition;
|
bool extra;
|
||||||
} shift;
|
bool repetition;
|
||||||
struct {
|
} shift;
|
||||||
uint8_t type;
|
struct TSParseActionReduce
|
||||||
uint8_t child_count;
|
{
|
||||||
TSSymbol symbol;
|
uint8_t type;
|
||||||
int16_t dynamic_precedence;
|
uint8_t child_count;
|
||||||
uint16_t production_id;
|
TSSymbol symbol;
|
||||||
} reduce;
|
int16_t dynamic_precedence;
|
||||||
uint8_t type;
|
uint16_t production_id;
|
||||||
|
} reduce;
|
||||||
|
uint8_t type;
|
||||||
} TSParseAction;
|
} TSParseAction;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct TSLexMode
|
||||||
uint16_t lex_state;
|
{
|
||||||
uint16_t external_lex_state;
|
uint16_t lex_state;
|
||||||
|
uint16_t external_lex_state;
|
||||||
} TSLexMode;
|
} TSLexMode;
|
||||||
|
|
||||||
typedef union {
|
typedef union TSParseActionEntry {
|
||||||
TSParseAction action;
|
TSParseAction action;
|
||||||
struct {
|
struct TSParseActionEntryInner
|
||||||
uint8_t count;
|
{
|
||||||
bool reusable;
|
uint8_t count;
|
||||||
} entry;
|
bool reusable;
|
||||||
|
} entry;
|
||||||
} TSParseActionEntry;
|
} TSParseActionEntry;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct TSCharacterRange
|
||||||
int32_t start;
|
{
|
||||||
int32_t end;
|
int32_t start;
|
||||||
|
int32_t end;
|
||||||
} TSCharacterRange;
|
} TSCharacterRange;
|
||||||
|
|
||||||
struct TSLanguage {
|
struct TSLanguage
|
||||||
uint32_t version;
|
{
|
||||||
uint32_t symbol_count;
|
uint32_t version;
|
||||||
uint32_t alias_count;
|
uint32_t symbol_count;
|
||||||
uint32_t token_count;
|
uint32_t alias_count;
|
||||||
uint32_t external_token_count;
|
uint32_t token_count;
|
||||||
uint32_t state_count;
|
uint32_t external_token_count;
|
||||||
uint32_t large_state_count;
|
uint32_t state_count;
|
||||||
uint32_t production_id_count;
|
uint32_t large_state_count;
|
||||||
uint32_t field_count;
|
uint32_t production_id_count;
|
||||||
uint16_t max_alias_sequence_length;
|
uint32_t field_count;
|
||||||
const uint16_t *parse_table;
|
uint16_t max_alias_sequence_length;
|
||||||
const uint16_t *small_parse_table;
|
const uint16_t *parse_table;
|
||||||
const uint32_t *small_parse_table_map;
|
const uint16_t *small_parse_table;
|
||||||
const TSParseActionEntry *parse_actions;
|
const uint32_t *small_parse_table_map;
|
||||||
const char * const *symbol_names;
|
const TSParseActionEntry *parse_actions;
|
||||||
const char * const *field_names;
|
const char *const *symbol_names;
|
||||||
const TSFieldMapSlice *field_map_slices;
|
const char *const *field_names;
|
||||||
const TSFieldMapEntry *field_map_entries;
|
const TSFieldMapSlice *field_map_slices;
|
||||||
const TSSymbolMetadata *symbol_metadata;
|
const TSFieldMapEntry *field_map_entries;
|
||||||
const TSSymbol *public_symbol_map;
|
const TSSymbolMetadata *symbol_metadata;
|
||||||
const uint16_t *alias_map;
|
const TSSymbol *public_symbol_map;
|
||||||
const TSSymbol *alias_sequences;
|
const uint16_t *alias_map;
|
||||||
const TSLexMode *lex_modes;
|
const TSSymbol *alias_sequences;
|
||||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
const TSLexMode *lex_modes;
|
||||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||||
TSSymbol keyword_capture_token;
|
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||||
struct ExternalScannerDefinition {
|
TSSymbol keyword_capture_token;
|
||||||
const bool *states;
|
struct ExternalScannerDefinition
|
||||||
const TSSymbol *symbol_map;
|
{
|
||||||
void *(*create)(void);
|
const bool *states;
|
||||||
void (*destroy)(void *);
|
const TSSymbol *symbol_map;
|
||||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
void *(*create)(void);
|
||||||
unsigned (*serialize)(void *, char *);
|
void (*destroy)(void *);
|
||||||
void (*deserialize)(void *, const char *, unsigned);
|
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||||
} external_scanner;
|
unsigned (*serialize)(void *, char *);
|
||||||
const TSStateId *primary_state_ids;
|
void (*deserialize)(void *, const char *, unsigned);
|
||||||
|
} external_scanner;
|
||||||
|
const TSStateId *primary_state_ids;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead)
|
||||||
uint32_t index = 0;
|
{
|
||||||
uint32_t size = len - index;
|
uint32_t index = 0;
|
||||||
while (size > 1) {
|
uint32_t size = len - index;
|
||||||
uint32_t half_size = size / 2;
|
while (size > 1)
|
||||||
uint32_t mid_index = index + half_size;
|
{
|
||||||
TSCharacterRange *range = &ranges[mid_index];
|
uint32_t half_size = size / 2;
|
||||||
if (lookahead >= range->start && lookahead <= range->end) {
|
uint32_t mid_index = index + half_size;
|
||||||
return true;
|
TSCharacterRange *range = &ranges[mid_index];
|
||||||
} else if (lookahead > range->end) {
|
if (lookahead >= range->start && lookahead <= range->end)
|
||||||
index = mid_index;
|
{
|
||||||
}
|
return true;
|
||||||
size -= half_size;
|
}
|
||||||
}
|
else if (lookahead > range->end)
|
||||||
TSCharacterRange *range = &ranges[index];
|
{
|
||||||
return (lookahead >= range->start && lookahead <= range->end);
|
index = mid_index;
|
||||||
|
}
|
||||||
|
size -= half_size;
|
||||||
|
}
|
||||||
|
TSCharacterRange *range = &ranges[index];
|
||||||
|
return (lookahead >= range->start && lookahead <= range->end);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lexer Macros
|
* Lexer Macros
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#define UNUSED __pragma(warning(suppress : 4101))
|
|
||||||
#else
|
|
||||||
#define UNUSED __attribute__((unused))
|
#define UNUSED __attribute__((unused))
|
||||||
#endif
|
|
||||||
|
|
||||||
#define START_LEXER() \
|
#define START_LEXER() \
|
||||||
bool result = false; \
|
bool result = false; \
|
||||||
bool skip = false; \
|
bool skip = false; \
|
||||||
UNUSED \
|
UNUSED \
|
||||||
bool eof = false; \
|
bool eof = false; \
|
||||||
int32_t lookahead; \
|
int32_t lookahead; \
|
||||||
goto start; \
|
goto start; \
|
||||||
next_state: \
|
next_state: \
|
||||||
lexer->advance(lexer, skip); \
|
lexer->advance(lexer, skip); \
|
||||||
start: \
|
start: \
|
||||||
skip = false; \
|
skip = false; \
|
||||||
lookahead = lexer->lookahead;
|
lookahead = lexer->lookahead;
|
||||||
|
|
||||||
#define ADVANCE(state_value) \
|
#define ADVANCE(state_value) \
|
||||||
{ \
|
{ \
|
||||||
state = state_value; \
|
state = state_value; \
|
||||||
goto next_state; \
|
goto next_state; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ADVANCE_MAP(...) \
|
#define ADVANCE_MAP(...) \
|
||||||
{ \
|
{ \
|
||||||
static const uint16_t map[] = { __VA_ARGS__ }; \
|
static const uint16_t map[] = {__VA_ARGS__}; \
|
||||||
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
|
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) \
|
||||||
if (map[i] == lookahead) { \
|
{ \
|
||||||
state = map[i + 1]; \
|
if (map[i] == lookahead) \
|
||||||
goto next_state; \
|
{ \
|
||||||
} \
|
state = map[i + 1]; \
|
||||||
} \
|
goto next_state; \
|
||||||
}
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
#define SKIP(state_value) \
|
#define SKIP(state_value) \
|
||||||
{ \
|
{ \
|
||||||
skip = true; \
|
skip = true; \
|
||||||
state = state_value; \
|
state = state_value; \
|
||||||
goto next_state; \
|
goto next_state; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ACCEPT_TOKEN(symbol_value) \
|
#define ACCEPT_TOKEN(symbol_value) \
|
||||||
result = true; \
|
result = true; \
|
||||||
lexer->result_symbol = symbol_value; \
|
lexer->result_symbol = symbol_value; \
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
|
|
||||||
#define END_STATE() return result;
|
#define END_STATE() return result;
|
||||||
|
|
||||||
|
|
@ -206,60 +217,56 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
|
||||||
* Parse Table Macros
|
* Parse Table Macros
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
|
#define SMALL_STATE(id) ((id)-LARGE_STATE_COUNT)
|
||||||
|
|
||||||
#define STATE(id) id
|
#define STATE(id) id
|
||||||
|
|
||||||
#define ACTIONS(id) id
|
#define ACTIONS(id) id
|
||||||
|
|
||||||
#define SHIFT(state_value) \
|
#define SHIFT(state_value) \
|
||||||
{{ \
|
{ \
|
||||||
.shift = { \
|
{ \
|
||||||
.type = TSParseActionTypeShift, \
|
.shift = {.type = TSParseActionTypeShift, .state = (state_value) } \
|
||||||
.state = (state_value) \
|
} \
|
||||||
} \
|
}
|
||||||
}}
|
|
||||||
|
|
||||||
#define SHIFT_REPEAT(state_value) \
|
#define SHIFT_REPEAT(state_value) \
|
||||||
{{ \
|
{ \
|
||||||
.shift = { \
|
{ \
|
||||||
.type = TSParseActionTypeShift, \
|
.shift = {.type = TSParseActionTypeShift, .state = (state_value), .repetition = true } \
|
||||||
.state = (state_value), \
|
} \
|
||||||
.repetition = true \
|
}
|
||||||
} \
|
|
||||||
}}
|
|
||||||
|
|
||||||
#define SHIFT_EXTRA() \
|
#define SHIFT_EXTRA() \
|
||||||
{{ \
|
{ \
|
||||||
.shift = { \
|
{ \
|
||||||
.type = TSParseActionTypeShift, \
|
.shift = {.type = TSParseActionTypeShift, .extra = true } \
|
||||||
.extra = true \
|
} \
|
||||||
} \
|
}
|
||||||
}}
|
|
||||||
|
|
||||||
#define REDUCE(symbol_name, children, precedence, prod_id) \
|
#define REDUCE(symbol_name, children, precedence, prod_id) \
|
||||||
{{ \
|
{ \
|
||||||
.reduce = { \
|
{ \
|
||||||
.type = TSParseActionTypeReduce, \
|
.reduce = {.type = TSParseActionTypeReduce, \
|
||||||
.symbol = symbol_name, \
|
.symbol = symbol_name, \
|
||||||
.child_count = children, \
|
.child_count = children, \
|
||||||
.dynamic_precedence = precedence, \
|
.dynamic_precedence = precedence, \
|
||||||
.production_id = prod_id \
|
.production_id = prod_id}, \
|
||||||
}, \
|
} \
|
||||||
}}
|
}
|
||||||
|
|
||||||
#define RECOVER() \
|
#define RECOVER() \
|
||||||
{{ \
|
{ \
|
||||||
.type = TSParseActionTypeRecover \
|
{ \
|
||||||
}}
|
.type = TSParseActionTypeRecover \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
#define ACCEPT_INPUT() \
|
#define ACCEPT_INPUT() \
|
||||||
{{ \
|
{ \
|
||||||
.type = TSParseActionTypeAccept \
|
{ \
|
||||||
}}
|
.type = TSParseActionTypeAccept \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#endif // TREE_SITTER_PARSER_H_
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TREE_SITTER_PARSER_H_
|
|
||||||
|
|
|
||||||
|
|
@ -1,111 +0,0 @@
|
||||||
#include "./subtree.h"
|
|
||||||
|
|
||||||
typedef struct StackEntry
|
|
||||||
{
|
|
||||||
Subtree tree;
|
|
||||||
uint32_t child_index;
|
|
||||||
uint32_t byte_offset;
|
|
||||||
} StackEntry;
|
|
||||||
|
|
||||||
typedef struct ReusableNode
|
|
||||||
{
|
|
||||||
Array(StackEntry) stack;
|
|
||||||
Subtree last_external_token;
|
|
||||||
} ReusableNode;
|
|
||||||
|
|
||||||
static inline ReusableNode reusable_node_new(void)
|
|
||||||
{
|
|
||||||
return (ReusableNode){array_new(), NULL_SUBTREE};
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void reusable_node_clear(ReusableNode *self)
|
|
||||||
{
|
|
||||||
array_clear(&self->stack);
|
|
||||||
self->last_external_token = NULL_SUBTREE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline Subtree reusable_node_tree(ReusableNode *self)
|
|
||||||
{
|
|
||||||
return self->stack.size > 0 ? self->stack.contents[self->stack.size - 1].tree : NULL_SUBTREE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uint32_t reusable_node_byte_offset(ReusableNode *self)
|
|
||||||
{
|
|
||||||
return self->stack.size > 0 ? self->stack.contents[self->stack.size - 1].byte_offset : UINT32_MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void reusable_node_delete(ReusableNode *self)
|
|
||||||
{
|
|
||||||
array_delete(&self->stack);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void reusable_node_advance(ReusableNode *self)
|
|
||||||
{
|
|
||||||
StackEntry last_entry = *array_back(&self->stack);
|
|
||||||
uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
|
|
||||||
if (ts_subtree_has_external_tokens(last_entry.tree))
|
|
||||||
{
|
|
||||||
self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
|
|
||||||
}
|
|
||||||
|
|
||||||
Subtree tree;
|
|
||||||
uint32_t next_index;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
StackEntry popped_entry = array_pop(&self->stack);
|
|
||||||
next_index = popped_entry.child_index + 1;
|
|
||||||
if (self->stack.size == 0)
|
|
||||||
return;
|
|
||||||
tree = array_back(&self->stack)->tree;
|
|
||||||
} while (ts_subtree_child_count(tree) <= next_index);
|
|
||||||
|
|
||||||
array_push(&self->stack, ((StackEntry){
|
|
||||||
.tree = ts_subtree_children(tree)[next_index],
|
|
||||||
.child_index = next_index,
|
|
||||||
.byte_offset = byte_offset,
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool reusable_node_descend(ReusableNode *self)
|
|
||||||
{
|
|
||||||
StackEntry last_entry = *array_back(&self->stack);
|
|
||||||
if (ts_subtree_child_count(last_entry.tree) > 0)
|
|
||||||
{
|
|
||||||
array_push(&self->stack, ((StackEntry){
|
|
||||||
.tree = ts_subtree_children(last_entry.tree)[0],
|
|
||||||
.child_index = 0,
|
|
||||||
.byte_offset = last_entry.byte_offset,
|
|
||||||
}));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void reusable_node_advance_past_leaf(ReusableNode *self)
|
|
||||||
{
|
|
||||||
while (reusable_node_descend(self))
|
|
||||||
{
|
|
||||||
}
|
|
||||||
reusable_node_advance(self);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void reusable_node_reset(ReusableNode *self, Subtree tree)
|
|
||||||
{
|
|
||||||
reusable_node_clear(self);
|
|
||||||
array_push(&self->stack, ((StackEntry){
|
|
||||||
.tree = tree,
|
|
||||||
.child_index = 0,
|
|
||||||
.byte_offset = 0,
|
|
||||||
}));
|
|
||||||
|
|
||||||
// Never reuse the root node, because it has a non-standard internal structure
|
|
||||||
// due to transformations that are applied when it is accepted: adding the EOF
|
|
||||||
// child and any extra children.
|
|
||||||
if (!reusable_node_descend(self))
|
|
||||||
{
|
|
||||||
reusable_node_clear(self);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
|
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
|
||||||
/* +#+#+#+#+#+ +#+ */
|
/* +#+#+#+#+#+ +#+ */
|
||||||
/* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */
|
/* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */
|
||||||
/* Updated: 2024/06/30 16:44:34 by maiboyer ### ########.fr */
|
/* Updated: 2024/07/03 18:46:59 by maiboyer ### ########.fr */
|
||||||
/* */
|
/* */
|
||||||
/* ************************************************************************** */
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
|
@ -80,7 +80,7 @@ void print_node_data(t_node *t, t_usize depth)
|
||||||
idx = 0;
|
idx = 0;
|
||||||
if (t->kind == 7)
|
if (t->kind == 7)
|
||||||
return;
|
return;
|
||||||
printf("\x1b[%im[%s](%lu)\x1b[0m", t->field_str == NULL ? 90 : 32, t->field_str == NULL ? "nil" : t->field_str, t->field);
|
printf("\x1b[%im[%-6s](%lu)\x1b[0m", t->field_str == NULL ? 90 : 32, t->field_str == NULL ? "nil" : t->field_str, t->field);
|
||||||
while (idx++ < depth + 1)
|
while (idx++ < depth + 1)
|
||||||
printf("\t");
|
printf("\t");
|
||||||
idx = 0;
|
idx = 0;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue