From 0862cd35c4e91bca0be6e54c23d8ad4b16a0552d Mon Sep 17 00:00:00 2001 From: maix0 Date: Thu, 19 Sep 2024 22:19:22 +0200 Subject: [PATCH] update: parser/src/parser a bit more normed --- Filelist.sh.mk | 4 +- allocator/Filelist.aq.mk | 4 +- ast/Filelist.ast.mk | 8 +- exec/Filelist.exec.mk | 20 +- line/Filelist.line.mk | 4 +- parser/Filelist.parser.mk | 1 + parser/include/parser/inner/parser_inner.h | 29 ++- parser/src/parser/parser_accept.c | 102 ++++---- parser/src/parser/parser_advance.c | 234 +++++++++--------- parser/src/parser/parser_advance_bis.c | 56 +++++ .../parser/parser_breakdown_top_of_stack.c | 9 +- parser/src/parser/parser_external_scanner.c | 20 +- parser/src/parser/parser_handle_error.c | 10 +- parser/src/parser/parser_lex.c | 25 +- parser/src/parser/parser_parse.c | 116 +++++---- parser/src/parser/parser_parse_str.c | 3 +- parser/src/parser/parser_recover.c | 12 +- parser/src/parser/parser_recover_to_tree.c | 6 +- parser/src/parser/parser_reduce.c | 3 +- parser/src/parser/parser_select.c | 10 +- parser/src/parser/parser_shift.c | 3 +- parser/src/parser/parser_versions.c | 70 ++++-- stdme/Filelist.me.mk | 10 +- 23 files changed, 448 insertions(+), 311 deletions(-) create mode 100644 parser/src/parser/parser_advance_bis.c diff --git a/Filelist.sh.mk b/Filelist.sh.mk index 778d0dd8..141e9970 100644 --- a/Filelist.sh.mk +++ b/Filelist.sh.mk @@ -1,8 +1,8 @@ SRC_FILES = \ -env \ _env_norm_helper \ -ft_exit \ _helper_main \ +env \ +ft_exit \ main \ signal_handler \ diff --git a/allocator/Filelist.aq.mk b/allocator/Filelist.aq.mk index 63698d77..ead60e1c 100644 --- a/allocator/Filelist.aq.mk +++ b/allocator/Filelist.aq.mk @@ -9,11 +9,11 @@ me_alloc/merge_blocks \ me_alloc/pages \ me_alloc/realloc \ vg/dummy_block \ +vg/dummy_mem_status \ vg/dummy_mempool \ vg/dummy_mempool_bis \ -vg/dummy_mem_status \ vg/valgrind_block \ +vg/valgrind_mem_status \ vg/valgrind_mempool \ vg/valgrind_mempool_bis \ -vg/valgrind_mem_status \ diff --git a/ast/Filelist.ast.mk b/ast/Filelist.ast.mk index 327e50c7..00c506a1 100644 --- a/ast/Filelist.ast.mk +++ b/ast/Filelist.ast.mk @@ -1,4 +1,8 @@ SRC_FILES = \ +_here_doc \ +_not_done_boucle_print \ +_not_done_function \ +_not_done_scripting_print \ ast_alloc/ast_alloc \ ast_alloc/ast_alloc_scripting \ ast_free/ast_free \ @@ -19,10 +23,6 @@ from_node/other_node \ from_node/redirect_node \ from_node/scripting_node \ from_node/string_node \ -_here_doc \ -_not_done_boucle_print \ -_not_done_function \ -_not_done_scripting_print \ print_ast/ast_print \ print_ast/ast_print_arithmetic \ print_ast/ast_print_command \ diff --git a/exec/Filelist.exec.mk b/exec/Filelist.exec.mk index 20e9d35a..1dce545c 100644 --- a/exec/Filelist.exec.mk +++ b/exec/Filelist.exec.mk @@ -1,40 +1,40 @@ SRC_FILES = \ -builtins/cd \ +_read_dir \ builtins/_debug \ +builtins/cd \ builtins/echo \ builtins/env \ builtins/exit \ builtins/export \ builtins/pwd \ builtins/unset \ -_read_dir \ -run_arithmetic/arithmetic \ -run_arithmetic/arithmetic_operation \ run_arithmetic/_get_op \ -run_arithmetic/operator_bis \ run_arithmetic/_run_arith \ run_arithmetic/_to_ast_node \ +run_arithmetic/arithmetic \ +run_arithmetic/arithmetic_operation \ +run_arithmetic/operator_bis \ run_ast/_ast_into_str \ run_ast/_ast_into_str2 \ run_ast/_ast_into_str3 \ run_ast/_ast_into_str4 \ run_ast/_ast_into_str5 \ run_ast/_ast_into_str6 \ +run_ast/_run_exit_code \ +run_ast/_run_exp_operators \ +run_ast/_spawn_cmd \ +run_ast/_spawn_cmd_redir_fd \ +run_ast/_spawn_cmd_redir_heredoc \ run_ast/run_builtins \ run_ast/run_builtins2 \ run_ast/run_cmd_sub \ run_ast/run_command \ -run_ast/_run_exit_code \ run_ast/run_expansion \ run_ast/run_expansion_builtin \ -run_ast/_run_exp_operators \ run_ast/run_list \ run_ast/run_pipeline \ run_ast/run_pipeline_helper \ run_ast/run_program \ run_ast/run_subshell \ run_ast/run_words \ -run_ast/_spawn_cmd \ -run_ast/_spawn_cmd_redir_fd \ -run_ast/_spawn_cmd_redir_heredoc \ diff --git a/line/Filelist.line.mk b/line/Filelist.line.mk index 2c93bfb5..4d84cc2e 100644 --- a/line/Filelist.line.mk +++ b/line/Filelist.line.mk @@ -2,11 +2,11 @@ SRC_FILES = \ line \ line_edit_actions \ line_edit_actions2 \ -line_editing \ -line_editing2 \ line_edit_mode \ line_edit_mode_interal \ line_edit_mode_specific_key \ +line_editing \ +line_editing2 \ line_globals \ line_history \ line_internals \ diff --git a/parser/Filelist.parser.mk b/parser/Filelist.parser.mk index 408fd0ab..3095453c 100644 --- a/parser/Filelist.parser.mk +++ b/parser/Filelist.parser.mk @@ -31,6 +31,7 @@ node/node_iterator \ node/node_relevent \ parser/parser_accept \ parser/parser_advance \ +parser/parser_advance_bis \ parser/parser_breakdown_top_of_stack \ parser/parser_condense_stack \ parser/parser_do_reduction \ diff --git a/parser/include/parser/inner/parser_inner.h b/parser/include/parser/inner/parser_inner.h index f92afe44..d2e09199 100644 --- a/parser/include/parser/inner/parser_inner.h +++ b/parser/include/parser/inner/parser_inner.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/10 13:56:47 by maiboyer #+# #+# */ -/* Updated: 2024/09/19 16:44:59 by maiboyer ### ########.fr */ +/* Updated: 2024/09/19 22:10:08 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -79,6 +79,33 @@ struct s_string_input t_u32 length; }; +struct s_parser_advance_state +{ + TSParseAction action; + TableEntry table_entry; + bool end_of_non_terminal_extra; + bool is_fragile; + bool needs_lex; + t_stack_version last_reduction_version; + t_stack_version reduction_version; + t_state_id next_state; + t_state_id state; + t_subtree lookahead; + t_subtree mutable_lookahead; + t_u32 i; +}; + +struct s_parser_parse_state +{ + t_tree *result; + t_u32 position; + t_u32 last_position; + t_u32 version_count; + t_stack_version version; + t_u32 min_error_cost; + bool first; +}; + t_parser *ts_parser_new(t_language *language); t_tree *ts_parser_parse(t_parser *self, t_input input); t_tree *ts_parser_parse_string(t_parser *self, t_const_str string, t_u32 length); diff --git a/parser/src/parser/parser_accept.c b/parser/src/parser/parser_accept.c index c448c53a..af7f23d1 100644 --- a/parser/src/parser/parser_accept.c +++ b/parser/src/parser/parser_accept.c @@ -6,28 +6,72 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/13 14:02:35 by maiboyer #+# #+# */ -/* Updated: 2024/09/19 17:23:29 by maiboyer ### ########.fr */ +/* Updated: 2024/09/19 21:37:04 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ +#include "me/types.h" #include "parser/inner/parser_inner.h" -void ts_parser__accept(t_parser *self, t_stack_version version, - t_subtree lookahead) +void _parser_accept_endloop(t_parser *self, t_subtree root, t_u32 *i) +{ + self->accept_count++; + if (self->finished_tree) + { + if (ts_parser__select_tree(self, self->finished_tree, root)) + { + ts_subtree_release(self->finished_tree); + self->finished_tree = root; + } + else + ts_subtree_release(root); + } + else + self->finished_tree = root; + (*i)++; +} + +bool parser_select_root(\ + t_parser *self, t_vec_subtree *trees, t_subtree *root, t_u32 *j) +{ + t_u32 k; + t_u32 n; + const t_subtree *childs; + t_subtree tree; + + tree = trees->buffer[*j]; + if (!ts_subtree_extra(tree)) + { + n = ts_subtree_child_count(tree); + childs = ts_subtree_children(tree); + k = 0; + while (k < n) + { + childs[k]->ref_count++; + k++; + } + vec_subtree_splice(trees, vec_subtree_splice_args(*j, 1, n, childs)); + *root = ts_subtree_new_node(ts_subtree_symbol(tree), trees, \ + tree->production_id, self->language); + ts_subtree_release(tree); + return (true); + } + return ((*j)--, false); +} + +void ts_parser__accept(\ + t_parser *self, t_stack_version v, t_subtree lookahead) { - t_u32 child_count; - const t_subtree *children; t_stack_slice_array pop; t_vec_subtree trees; t_subtree root; t_u32 i; t_u32 j; - t_u32 k; - t_subtree tree; - assert(ts_subtree_is_eof(lookahead)); - ts_stack_push(self->stack, (struct s_stack_push_arg){version, lookahead, false, 1}); - pop = ts_stack_pop_all(self->stack, version); + if (!ts_subtree_is_eof(lookahead)) + me_abort("failed assertion: lookahead isn't eof tree"); + ts_stack_push(self->stack, (struct s_stack_push_arg){v, lookahead, 0, 1}); + pop = ts_stack_pop_all(self->stack, v); i = 0; while (i < pop.size) { @@ -35,42 +79,10 @@ void ts_parser__accept(t_parser *self, t_stack_version version, root = NULL; j = trees.len - 1; while (j + 1 > 0) - { - tree = trees.buffer[j]; - if (!ts_subtree_extra(tree)) - { - child_count = ts_subtree_child_count(tree); - children = ts_subtree_children(tree); - k = 0; - while (k < child_count) - { - children[k]->ref_count++; - k++; - } - vec_subtree_splice(&trees, vec_subtree_splice_args(j, 1, - child_count, children)); - root = (ts_subtree_new_node(ts_subtree_symbol(tree), &trees, - tree->production_id, self->language)); - ts_subtree_release(tree); + if (parser_select_root(self, &trees, &root, &j)) break ; - } - j--; - } - self->accept_count++; - if (self->finished_tree) - { - if (ts_parser__select_tree(self, self->finished_tree, root)) - { - ts_subtree_release(self->finished_tree); - self->finished_tree = root; - } - else - ts_subtree_release(root); - } - else - self->finished_tree = root; - i++; + _parser_accept_endloop(self, root, &i); } ts_stack_remove_version(self->stack, pop.contents[0].version); - ts_stack_halt(self->stack, version); + ts_stack_halt(self->stack, v); } diff --git a/parser/src/parser/parser_advance.c b/parser/src/parser/parser_advance.c index a3ba320f..d56f2475 100644 --- a/parser/src/parser/parser_advance.c +++ b/parser/src/parser/parser_advance.c @@ -6,131 +6,131 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/13 14:01:20 by maiboyer #+# #+# */ -/* Updated: 2024/09/19 17:24:14 by maiboyer ### ########.fr */ +/* Updated: 2024/09/19 22:17:53 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ #include "parser/inner/parser_inner.h" -bool ts_parser__advance(t_parser *self, t_stack_version version) -{ - t_state_id state; - t_subtree mutable_lookahead; - t_subtree lookahead; - TableEntry table_entry; - bool needs_lex; - t_u32 i; - t_stack_version last_reduction_version; - TSParseAction action; - t_state_id next_state; - bool is_fragile; - bool end_of_non_terminal_extra; - t_stack_version reduction_version; +bool _parser_advance_do_lex(\ + t_parser *self, t_stack_version version, \ + struct s_parser_advance_state *state); +bool _parser_advance_shift(\ + t_parser *self, t_stack_version version, \ + struct s_parser_advance_state *state); - lookahead = NULL; - table_entry = (TableEntry){.action_count = 0}; - state = ts_stack_state(self->stack, version); - needs_lex = true; - while (true) +bool _process_single_action(\ + t_parser *self, t_stack_version version, \ + struct s_parser_advance_state *state) +{ + state->action = state->table_entry.actions[state->i]; + if (state->action.type == TSParseActionTypeShift) { - if (needs_lex) - { - needs_lex = false; - lookahead = ts_parser__lex(self, version, state); - if (self->has_scanner_error) - return (false); - if (lookahead) - ts_language_table_entry(self->language, state, - ts_subtree_symbol(lookahead), &table_entry); - else - ts_language_table_entry(self->language, state, - ts_builtin_sym_end, &table_entry); - } - last_reduction_version = STACK_VERSION_NONE; - i = 0; - while (i < table_entry.action_count) - { - action = table_entry.actions[i]; - if (action.type == TSParseActionTypeShift) - { - if (action.shift.repetition) - { - i++; - continue ; - } - if (action.shift.extra) - next_state = state; - else - next_state = action.shift.state; - if (ts_subtree_child_count(lookahead) > 0) - next_state = ts_language_next_state(self->language, state, - ts_subtree_symbol(lookahead)); - ts_parser__shift(self, version, next_state, lookahead, - action.shift.extra); - return (true); - } - if (action.type == TSParseActionTypeReduce) - { - is_fragile = table_entry.action_count > 1; - end_of_non_terminal_extra = lookahead == NULL; - reduction_version = ts_parser__reduce(self, version, - action.reduce.symbol, action.reduce.child_count, - action.reduce.dynamic_precedence, - action.reduce.production_id, is_fragile, - end_of_non_terminal_extra); - if (reduction_version != (t_stack_version)STACK_VERSION_NONE) - last_reduction_version = reduction_version; - i++; - } - if (action.type == TSParseActionTypeAccept) - return (ts_parser__accept(self, version, lookahead), true); - if (action.type == TSParseActionTypeRecover) - return (ts_parser__recover(self, version, lookahead), true); - } - if (last_reduction_version != (t_stack_version)STACK_VERSION_NONE) - { - ts_stack_renumber_version(self->stack, last_reduction_version, - version); - state = ts_stack_state(self->stack, version); - if (!lookahead) - needs_lex = true; - else - ts_language_table_entry(self->language, state, - ts_subtree_leaf_symbol(lookahead), &table_entry); - continue ; - } - if (!lookahead) - { - ts_stack_halt(self->stack, version); + if (_parser_advance_shift(self, version, state)) return (true); - } - if (ts_subtree_is_keyword(lookahead) - && ts_subtree_symbol(lookahead) != self->language->keyword_capture_token) - { - ts_language_table_entry(self->language, state, - self->language->keyword_capture_token, &table_entry); - if (table_entry.action_count > 0) - { - mutable_lookahead = ts_subtree_ensure_owner(lookahead); - ts_subtree_set_symbol(&mutable_lookahead, - self->language->keyword_capture_token, self->language); - lookahead = mutable_lookahead; - continue ; - } - } - if (state == ERROR_STATE) - { - ts_parser__recover(self, version, lookahead); - return (true); - } - if (ts_parser__breakdown_top_of_stack(self, version)) - { - state = ts_stack_state(self->stack, version); - ts_subtree_release(lookahead); - needs_lex = true; - continue ; - } - ts_stack_pause(self->stack, version, lookahead); + return (false); + } + if (state->action.type == TSParseActionTypeReduce) + { + state->is_fragile = state->table_entry.action_count > 1; + state->end_of_non_terminal_extra = state->lookahead == NULL; + state->reduction_version = ts_parser__reduce(self, version, + state->action.reduce.symbol, state->action.reduce.child_count, + state->action.reduce.dynamic_precedence, + state->action.reduce.production_id, state->is_fragile, + state->end_of_non_terminal_extra); + if (state->reduction_version != (t_stack_version)STACK_VERSION_NONE) + state->last_reduction_version = state->reduction_version; + state->i++; + } + if (state->action.type == TSParseActionTypeAccept) + return (ts_parser__accept(self, version, state->lookahead), true); + if (state->action.type == TSParseActionTypeRecover) + return (ts_parser__recover(self, version, state->lookahead), true); + return (false); +} + +bool _parser_handle_first_reduction(\ + t_parser *self, t_stack_version version, \ + struct s_parser_advance_state *state) +{ + if (state->last_reduction_version != (t_stack_version)STACK_VERSION_NONE) + { + ts_stack_renumber_version(self->stack, state->last_reduction_version, + version); + state->state = ts_stack_state(self->stack, version); + if (!state->lookahead) + state->needs_lex = true; + else + ts_language_table_entry(self->language, state->state, + ts_subtree_leaf_symbol(state->lookahead), &state->table_entry); return (true); } + return (false); +} + +bool _parser_handle_keyword(\ + t_parser *self, t_stack_version version, \ + struct s_parser_advance_state *state) +{ + (void)(version); + if (ts_subtree_is_keyword(state->lookahead) + && ts_subtree_symbol(state->lookahead) \ + != self->language->keyword_capture_token) + { + ts_language_table_entry(self->language, state->state, + self->language->keyword_capture_token, &state->table_entry); + if (state->table_entry.action_count > 0) + { + state->mutable_lookahead = ts_subtree_ensure_owner(\ + state->lookahead); + ts_subtree_set_symbol(&state->mutable_lookahead, + self->language->keyword_capture_token, self->language); + state->lookahead = state->mutable_lookahead; + return (true); + } + } + return (false); +} + +bool _parser_loop_inner(\ + t_parser *self, t_stack_version version, \ + struct s_parser_advance_state *state) +{ + while (state->i < state->table_entry.action_count) + if (_process_single_action(self, version, state)) + return (true); + if (_parser_handle_first_reduction(self, version, state)) + return (false); + if (!state->lookahead) + return (ts_stack_halt(self->stack, version), true); + if (_parser_handle_keyword(self, version, state)) + return (false); + if (state->state == ERROR_STATE) + return (ts_parser__recover(self, version, state->lookahead), true); + if (ts_parser__breakdown_top_of_stack(self, version)) + { + state->state = ts_stack_state(self->stack, version); + ts_subtree_release(state->lookahead); + state->needs_lex = true; + return (false); + } + return (ts_stack_pause(self->stack, version, state->lookahead), true); +} + +bool ts_parser__advance(t_parser *self, t_stack_version version) +{ + struct s_parser_advance_state state; + + state.lookahead = NULL; + state.table_entry = (TableEntry){.action_count = 0}; + state.state = ts_stack_state(self->stack, version); + state.needs_lex = true; + while (true) + { + if (_parser_advance_do_lex(self, version, &state)) + return (false); + if (_parser_loop_inner(self, version, &state)) + return (true); + } } diff --git a/parser/src/parser/parser_advance_bis.c b/parser/src/parser/parser_advance_bis.c new file mode 100644 index 00000000..e1357b2c --- /dev/null +++ b/parser/src/parser/parser_advance_bis.c @@ -0,0 +1,56 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* parser_advance_bis.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/13 14:01:20 by maiboyer #+# #+# */ +/* Updated: 2024/09/19 22:00:08 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser/inner/parser_inner.h" + +bool _parser_advance_do_lex(\ + t_parser *self, t_stack_version version, \ + struct s_parser_advance_state *state) +{ + if (state->needs_lex) + { + state->needs_lex = false; + state->lookahead = ts_parser__lex(self, version, state->state); + if (self->has_scanner_error) + return (true); + if (state->lookahead) + ts_language_table_entry(self->language, state->state, + ts_subtree_symbol(state->lookahead), &state->table_entry); + else + ts_language_table_entry(self->language, state->state, + ts_builtin_sym_end, &state->table_entry); + } + state->last_reduction_version = (t_stack_version)STACK_VERSION_NONE; + state->i = 0; + return (false); +} + +bool _parser_advance_shift(\ + t_parser *self, t_stack_version version, \ + struct s_parser_advance_state *state) +{ + if (state->action.shift.repetition) + { + state->i++; + return (false); + } + if (state->action.shift.extra) + state->next_state = state->state; + else + state->next_state = state->action.shift.state; + if (ts_subtree_child_count(state->lookahead) > 0) + state->next_state = ts_language_next_state(self->language, state->state, + ts_subtree_symbol(state->lookahead)); + ts_parser__shift(self, version, state->next_state, state->lookahead, + state->action.shift.extra); + return (true); +} diff --git a/parser/src/parser/parser_breakdown_top_of_stack.c b/parser/src/parser/parser_breakdown_top_of_stack.c index 6dffcb7f..c4520c02 100644 --- a/parser/src/parser/parser_breakdown_top_of_stack.c +++ b/parser/src/parser/parser_breakdown_top_of_stack.c @@ -57,15 +57,18 @@ bool ts_parser__breakdown_top_of_stack(t_parser *self, state = ts_language_next_state(self->language, state, ts_subtree_symbol(child)); child->ref_count++; - ts_stack_push(self->stack, \ -(struct s_stack_push_arg){slice.version, child, pending, state}); + ts_stack_push(self->stack, + (struct s_stack_push_arg){slice.version, child, pending, + state}); j++; } j = 1; while (j < slice.subtrees.len) { tree = slice.subtrees.buffer[j]; - ts_stack_push(self->stack, (struct s_stack_push_arg){slice.version, tree, false, state}); + ts_stack_push(self->stack, + (struct s_stack_push_arg){slice.version, tree, false, + state}); j++; } ts_subtree_release(parent); diff --git a/parser/src/parser/parser_external_scanner.c b/parser/src/parser/parser_external_scanner.c index 8c61d3c5..1f0932ca 100644 --- a/parser/src/parser/parser_external_scanner.c +++ b/parser/src/parser/parser_external_scanner.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/11 16:44:11 by maiboyer #+# #+# */ -/* Updated: 2024/09/19 17:23:41 by maiboyer ### ########.fr */ +/* Updated: 2024/09/19 21:38:35 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -22,7 +22,7 @@ void ts_parser__external_scanner_destroy(t_parser *self) if (self->external_scanner_payload != NULL) { self->language->external_scanner.destroy(\ - self->external_scanner_payload); + self->external_scanner_payload); self->external_scanner_payload = NULL; } } @@ -32,14 +32,14 @@ t_u32 ts_parser__external_scanner_serialize(t_parser *self) t_u32 length; length = self->language->external_scanner.serialize(\ - self->external_scanner_payload, self->lexer.debug_buffer); + self->external_scanner_payload, self->lexer.debug_buffer); if (length > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) me_abort("assertion failed in " __FILE__ " `length > " \ "TREE_SITTER_SERIALIZATION_BUFFER_SIZE`"); return (length); } -void ts_parser__external_scanner_deserialize(t_parser *self, +void ts_parser__external_scanner_deserialize(t_parser *self, \ t_subtree external_token) { const t_u8 *data; @@ -50,12 +50,12 @@ void ts_parser__external_scanner_deserialize(t_parser *self, if (external_token) { data = ts_external_scanner_state_data(\ - &external_token->external_scanner_state); + &external_token->external_scanner_state); length = external_token->external_scanner_state.length; printf("HERE\n"); } - self->language->external_scanner.deserialize(self->external_scanner_payload, - data, length); + self->language->external_scanner.deserialize(\ + self->external_scanner_payload, data, length); } bool ts_parser__external_scanner_scan(t_parser *self, @@ -63,8 +63,8 @@ bool ts_parser__external_scanner_scan(t_parser *self, { const bool *valid_external_tokens; - valid_external_tokens = ts_language_enabled_external_tokens(self->language, - external_lex_state); + valid_external_tokens = ts_language_enabled_external_tokens(\ + self->language, external_lex_state); return (self->language->external_scanner.scan(\ - self->external_scanner_payload, &self->lexer, valid_external_tokens)); + self->external_scanner_payload, &self->lexer, valid_external_tokens)); } diff --git a/parser/src/parser/parser_handle_error.c b/parser/src/parser/parser_handle_error.c index d42dcd6b..4b126f1d 100644 --- a/parser/src/parser/parser_handle_error.c +++ b/parser/src/parser/parser_handle_error.c @@ -17,8 +17,8 @@ void ts_lexer__mark_end(t_lexer *_self); void ts_parser__handle_error(t_parser *self, t_stack_version version, t_subtree lookahead) { - t_length padding; - t_length position; + t_length padding; + t_length position; t_state_id state; t_state_id state_after_missing_symbol; t_symbol missing_symbol; @@ -67,7 +67,8 @@ void ts_parser__handle_error(t_parser *self, t_stack_version version, v); missing_tree = ts_subtree_new_missing_leaf(missing_symbol, padding, lookahead_bytes, self->language); - ts_stack_push(self->stack, (struct s_stack_push_arg){version_with_missing_tree, + ts_stack_push(self->stack, + (struct s_stack_push_arg){version_with_missing_tree, missing_tree, false, state_after_missing_symbol}); if (ts_parser__do_all_potential_reductions(self, version_with_missing_tree, @@ -80,7 +81,8 @@ void ts_parser__handle_error(t_parser *self, t_stack_version version, missing_symbol++; } } - ts_stack_push(self->stack, (struct s_stack_push_arg){v, NULL, false, ERROR_STATE}); + ts_stack_push(self->stack, (struct s_stack_push_arg){v, NULL, false, + ERROR_STATE}); if (v == version) v = previous_version_count; else diff --git a/parser/src/parser/parser_lex.c b/parser/src/parser/parser_lex.c index dd8440c4..5811593f 100644 --- a/parser/src/parser/parser_lex.c +++ b/parser/src/parser/parser_lex.c @@ -15,12 +15,12 @@ t_subtree ts_parser__lex(t_parser *self, t_stack_version version, t_state_id parse_state) { - t_length current_position; - t_length error_end_position; - t_length error_start_position; - t_length padding; - t_length size; - t_length start_position; + t_length current_position; + t_length error_end_position; + t_length error_start_position; + t_length padding; + t_length size; + t_length start_position; TSLexMode lex_mode; t_symbol symbol; bool called_get_column; @@ -87,8 +87,7 @@ t_subtree ts_parser__lex(t_parser *self, t_stack_version version, ts_lexer_reset(&self->lexer, current_position); } ts_lexer_start(&self->lexer); - found_token = self->language->lex_fn(&self->lexer, - lex_mode.lex_state); + found_token = self->language->lex_fn(&self->lexer, lex_mode.lex_state); ts_lexer_finish(&self->lexer, &lookahead_end_byte); if (found_token) break ; @@ -122,8 +121,8 @@ t_subtree ts_parser__lex(t_parser *self, t_stack_version version, padding = length_sub(error_start_position, start_position); size = length_sub(error_end_position, error_start_position); lookahead_bytes = lookahead_end_byte - error_end_position.bytes; - result = ts_subtree_new_error((t_st_newerr_args){first_error_character, padding, size, - lookahead_bytes, parse_state, self->language}); + result = ts_subtree_new_error((t_st_newerr_args){first_error_character, + padding, size, lookahead_bytes, parse_state, self->language}); } else { @@ -151,9 +150,9 @@ t_subtree ts_parser__lex(t_parser *self, t_stack_version version, symbol = self->lexer.funcs.result_symbol; } } - result = ts_subtree_new_leaf((t_st_newleaf_args){symbol, padding, size, lookahead_bytes, - parse_state, found_external_token, called_get_column, - is_keyword, self->language}); + result = ts_subtree_new_leaf((t_st_newleaf_args){symbol, padding, size, + lookahead_bytes, parse_state, found_external_token, + called_get_column, is_keyword, self->language}); if (found_external_token) { ts_external_scanner_state_init(&result->external_scanner_state, diff --git a/parser/src/parser/parser_parse.c b/parser/src/parser/parser_parse.c index cf4208e0..f7a4df9a 100644 --- a/parser/src/parser/parser_parse.c +++ b/parser/src/parser/parser_parse.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/13 13:56:28 by maiboyer #+# #+# */ -/* Updated: 2024/09/13 13:56:36 by maiboyer ### ########.fr */ +/* Updated: 2024/09/19 22:18:23 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -19,70 +19,88 @@ bool _parse_condition(t_parser *self, t_u32 *version_count, return (*version < *version_count); } -t_tree *ts_parser_parse(t_parser *self, t_input input) +t_error _parser_parse_init_state(\ + t_parser *self, t_input input, struct s_parser_parse_state *state) { - t_tree *result; - t_u32 position; - t_u32 last_position; - t_u32 version_count; - t_stack_version version; - t_u32 min_error_cost; - bool first; - - result = NULL; + self->operation_count = 0; + state->first = true; + state->last_position = 0; + state->position = 0; + state->result = NULL; + state->version = 0; + state->version_count = 0; if (!self->language || !input.read) - return (NULL); + return (ERROR); ts_lexer_set_input(&self->lexer, input); if (!ts_parser_has_outstanding_parse(self)) { ts_parser__external_scanner_create(self); if (self->has_scanner_error) - return (ts_parser_reset(self), result); + return (ts_parser_reset(self), ERROR); } - self->operation_count = 0; - position = 0; - last_position = 0; - version_count = 0; - version = 0; - first = true; - while (first || version_count != 0) + return (NO_ERROR); +} + +t_tree *_parser_parse_end(\ + t_parser *self, t_input input, struct s_parser_parse_state state) +{ + (void)(input); + if (self->finished_tree == NULL) + me_abort("self->finished_tree == NULL"); + ts_subtree_balance(self->finished_tree, self->language); + state.result = ts_tree_new(self->finished_tree, self->language); + self->finished_tree = NULL; + ts_parser_reset(self); + return (state.result); +} + +t_error _parser_parse_mainloop(\ + t_parser *self, t_input input, struct s_parser_parse_state *state) +{ + (void)(input); + state->first = false; + state->version = 0; + while (_parse_condition(self, &state->version_count, &state->version)) { - first = false; - version = 0; - while (_parse_condition(self, &version_count, &version)) + while (ts_stack_is_active(self->stack, state->version)) { - while (ts_stack_is_active(self->stack, version)) + if (!ts_parser__advance(self, state->version)) { - if (!ts_parser__advance(self, version)) - { - if (self->has_scanner_error) - return (ts_parser_reset(self), result); - return (NULL); - } - position = ts_stack_position(self->stack, version).bytes; - if (position > last_position || (version > 0 - && position == last_position)) - { - last_position = position; - break ; - } + if (self->has_scanner_error) + return (ts_parser_reset(self), ERROR); + return (ERROR); + } + state->position = \ + ts_stack_position(self->stack, state->version).bytes; + if (state->position > state->last_position \ + || (state->version > 0 && state->position == state->last_position)) + { + state->last_position = state->position; + break ; } - version++; } - min_error_cost = ts_parser__condense_stack(self); - if (self->finished_tree - && ts_subtree_error_cost(self->finished_tree) < min_error_cost) + state->version++; + } + return (NO_ERROR); +} + +t_tree *ts_parser_parse(t_parser *self, t_input input) +{ + struct s_parser_parse_state state; + + if (_parser_parse_init_state(self, input, &state)) + return (NULL); + while (state.first || state.version_count != 0) + { + if (_parser_parse_mainloop(self, input, &state)) + return (NULL); + state.min_error_cost = ts_parser__condense_stack(self); + if (self->finished_tree \ + && ts_subtree_error_cost(self->finished_tree) < state.min_error_cost) { ts_stack_clear(self->stack); break ; } } - if (self->finished_tree == NULL) - me_abort("self->finished_tree == NULL"); - ts_subtree_balance(self->finished_tree, self->language); - result = ts_tree_new(self->finished_tree, self->language); - self->finished_tree = NULL; - ts_parser_reset(self); - return (result); + return (_parser_parse_end(self, input, state)); } - diff --git a/parser/src/parser/parser_parse_str.c b/parser/src/parser/parser_parse_str.c index 03830cdf..62a44d5b 100644 --- a/parser/src/parser/parser_parse_str.c +++ b/parser/src/parser/parser_parse_str.c @@ -37,6 +37,5 @@ t_tree *ts_parser_parse_string(t_parser *self, t_const_str string, t_string_input input; input = (t_string_input){(const t_u8 *)string, length}; - return (ts_parser_parse(self, \ - (t_input){&input, ts_string_input_read})); + return (ts_parser_parse(self, (t_input){&input, ts_string_input_read})); } diff --git a/parser/src/parser/parser_recover.c b/parser/src/parser/parser_recover.c index 0bbab81a..19cdc2e5 100644 --- a/parser/src/parser/parser_recover.c +++ b/parser/src/parser/parser_recover.c @@ -15,7 +15,7 @@ void ts_parser__recover(t_parser *self, t_stack_version version, t_subtree lookahead) { - t_length position; + t_length position; bool did_recover; bool would_merge; t_stack_slice_array pop; @@ -123,8 +123,8 @@ void ts_parser__recover(t_parser *self, t_stack_version version, { children = vec_subtree_new(16, NULL); parent = ts_subtree_new_error_node(&children, false, self->language); - ts_stack_push(self->stack, \ - (struct s_stack_push_arg){version, parent, false, 1}); + ts_stack_push(self->stack, (struct s_stack_push_arg){version, parent, + false, 1}); ts_parser__accept(self, version, lookahead); return ; } @@ -140,7 +140,7 @@ void ts_parser__recover(t_parser *self, t_stack_version version, } actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); - if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n \ + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { mutable_lookahead = ts_subtree_ensure_owner(lookahead); @@ -170,8 +170,8 @@ void ts_parser__recover(t_parser *self, t_stack_version version, error_repeat = ts_subtree_new_node(ts_builtin_sym_error_repeat, &pop.contents[0].subtrees, 0, self->language); } - ts_stack_push(self->stack, \ - (struct s_stack_push_arg){version, (error_repeat), false, ERROR_STATE}); + ts_stack_push(self->stack, (struct s_stack_push_arg){version, + (error_repeat), false, ERROR_STATE}); if (ts_subtree_has_external_tokens(lookahead)) ts_stack_set_last_external_token(self->stack, version, ts_subtree_last_external_token(lookahead)); diff --git a/parser/src/parser/parser_recover_to_tree.c b/parser/src/parser/parser_recover_to_tree.c index 46bb1aa0..20b312f3 100644 --- a/parser/src/parser/parser_recover_to_tree.c +++ b/parser/src/parser/parser_recover_to_tree.c @@ -71,7 +71,8 @@ bool ts_parser__recover_to_state(t_parser *self, t_stack_version version, { error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); - ts_stack_push(self->stack, (struct s_stack_push_arg){slice.version, error, false, goal_state}); + ts_stack_push(self->stack, (struct s_stack_push_arg){slice.version, + error, false, goal_state}); } else { @@ -81,7 +82,8 @@ bool ts_parser__recover_to_state(t_parser *self, t_stack_version version, while (j < self->trailing_extras.len) { tree = self->trailing_extras.buffer[j]; - ts_stack_push(self->stack, (struct s_stack_push_arg){slice.version, tree, false, goal_state}); + ts_stack_push(self->stack, (struct s_stack_push_arg){slice.version, + tree, false, goal_state}); j++; } previous_version = slice.version; diff --git a/parser/src/parser/parser_reduce.c b/parser/src/parser/parser_reduce.c index fb8c844a..065fe801 100644 --- a/parser/src/parser/parser_reduce.c +++ b/parser/src/parser/parser_reduce.c @@ -97,7 +97,8 @@ t_stack_version ts_parser__reduce(t_parser *self, t_stack_version version, else parent->parse_state = state; parent->dynamic_precedence += dynamic_precedence; - ts_stack_push(self->stack, (struct s_stack_push_arg){slice_version, (parent), false, next_state}); + ts_stack_push(self->stack, (struct s_stack_push_arg){slice_version, + (parent), false, next_state}); j = 0; while (j < self->trailing_extras.len) { diff --git a/parser/src/parser/parser_select.c b/parser/src/parser/parser_select.c index 4d8c009a..1d27bf91 100644 --- a/parser/src/parser/parser_select.c +++ b/parser/src/parser/parser_select.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/13 13:55:07 by maiboyer #+# #+# */ -/* Updated: 2024/09/13 14:09:10 by maiboyer ### ########.fr */ +/* Updated: 2024/09/19 21:38:54 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -31,11 +31,11 @@ bool ts_parser__select_tree(t_parser *self, t_subtree left, t_subtree right) return (true); if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) return (false); - if (ts_subtree_dynamic_precedence(right) > \ - ts_subtree_dynamic_precedence(left)) + if (ts_subtree_dynamic_precedence(right) \ + > ts_subtree_dynamic_precedence(left)) return (true); - if (ts_subtree_dynamic_precedence(left) > \ - ts_subtree_dynamic_precedence(right)) + if (ts_subtree_dynamic_precedence(left) \ + > ts_subtree_dynamic_precedence(right)) return (false); if (ts_subtree_error_cost(left) > 0) return (true); diff --git a/parser/src/parser/parser_shift.c b/parser/src/parser/parser_shift.c index 5a624c1f..495a6339 100644 --- a/parser/src/parser/parser_shift.c +++ b/parser/src/parser/parser_shift.c @@ -27,7 +27,8 @@ void ts_parser__shift(t_parser *self, t_stack_version version, ts_subtree_set_extra(&result, extra); subtree_to_push = (result); } - ts_stack_push(self->stack,(struct s_stack_push_arg){ version, subtree_to_push, !is_leaf, state}); + ts_stack_push(self->stack, (struct s_stack_push_arg){version, + subtree_to_push, !is_leaf, state}); if (ts_subtree_has_external_tokens(subtree_to_push)) ts_stack_set_last_external_token(self->stack, version, ts_subtree_last_external_token(subtree_to_push)); diff --git a/parser/src/parser/parser_versions.c b/parser/src/parser/parser_versions.c index d7e6e47f..c3ff97c5 100644 --- a/parser/src/parser/parser_versions.c +++ b/parser/src/parser/parser_versions.c @@ -6,37 +6,47 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/13 13:42:50 by maiboyer #+# #+# */ -/* Updated: 2024/09/13 13:42:58 by maiboyer ### ########.fr */ +/* Updated: 2024/09/19 22:17:04 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ #include "parser/inner/parser_inner.h" -t_error_comparison ts_parser__compare_versions(t_parser *self, - t_error_status a, t_error_status b) +bool _parser_compare_inner(t_parser *self, \ + t_error_status a, t_error_status b, t_error_comparison *ret) { - (void)self; + (void)(self); if (!a.is_in_error && b.is_in_error) { if (a.cost < b.cost) - return (ECTakeLeft); + return (*ret = ECTakeLeft, true); else - return (ECPreferLeft); + return (*ret = ECPreferLeft, true); } if (a.is_in_error && !b.is_in_error) { if (b.cost < a.cost) - return (ECTakeRight); + return (*ret = ECTakeRight, true); else - return (ECPreferRight); + return (*ret = ECPreferRight, true); } if (a.cost < b.cost) { if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) - return (ECTakeLeft); + return (*ret = ECTakeLeft, true); else - return (ECPreferLeft); + return (*ret = ECPreferLeft, true); } + return (false); +} + +t_error_comparison ts_parser__compare_versions(t_parser *self, + t_error_status a, t_error_status b) +{ + t_error_comparison ret; + + if (_parser_compare_inner(self, a, b, &ret)) + return (ret); if (b.cost < a.cost) { if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) @@ -68,26 +78,15 @@ t_error_status ts_parser__version_status(t_parser *self, version) == ERROR_STATE}); } -bool ts_parser__better_version_exists(t_parser *self, - t_stack_version version, bool is_in_error, t_u32 cost) +bool _better_version_end(\ + t_parser *self, t_stack_version version, \ + t_length position, t_error_status status) { - t_error_status status_i; - t_length position; - t_error_status status; t_stack_version i; t_stack_version n; t_error_comparison cmp; + t_error_status status_i; - if (self->finished_tree - && ts_subtree_error_cost(self->finished_tree) <= cost) - return (true); - position = ts_stack_position(self->stack, version); - status = (t_error_status){ - .cost = cost, - .is_in_error = is_in_error, - .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - .node_count = ts_stack_node_count_since_error(self->stack, version), - }; i = 0; n = ts_stack_version_count(self->stack); while (i < n) @@ -101,12 +100,29 @@ bool ts_parser__better_version_exists(t_parser *self, status_i = ts_parser__version_status(self, i); cmp = ts_parser__compare_versions(self, status, status_i); if (cmp == ECTakeRight) - { return (true); - } if (cmp == ECPreferRight && ts_stack_can_merge(self->stack, i, version)) return (true); i++; } return (false); } + +bool ts_parser__better_version_exists(t_parser *self, + t_stack_version version, bool is_in_error, t_u32 cost) +{ + t_length position; + t_error_status status; + + if (self->finished_tree + && ts_subtree_error_cost(self->finished_tree) <= cost) + return (true); + position = ts_stack_position(self->stack, version); + status = (t_error_status){ + .cost = cost, + .is_in_error = is_in_error, + .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), + .node_count = ts_stack_node_count_since_error(self->stack, version), + }; + return (_better_version_end(self, version, position, status)); +} diff --git a/stdme/Filelist.me.mk b/stdme/Filelist.me.mk index 7f234e90..03b7c42f 100644 --- a/stdme/Filelist.me.mk +++ b/stdme/Filelist.me.mk @@ -35,10 +35,10 @@ fs/fs_internal \ fs/getters \ fs/putfd \ gnl/get_next_line \ -hash/hasher \ hash/hash_signed \ hash/hash_str \ hash/hash_unsigned \ +hash/hasher \ hash/sip/sip13 \ hash/sip/sip_utils \ hash/sip/sip_utils2 \ @@ -86,10 +86,6 @@ printf/printf \ printf/printf_fd \ printf/printf_str \ printf/vprintf \ -string/mod \ -string/string_insert \ -string/string_remove \ -string/string_reserve \ str/str_clone \ str/str_compare \ str/str_find_chr \ @@ -106,6 +102,10 @@ str/str_n_find_str \ str/str_split \ str/str_substring \ str/str_trim \ +string/mod \ +string/string_insert \ +string/string_remove \ +string/string_reserve \ GEN_FILES = \ convert/i16_to_str \