From 74937f457f26218e3a48da571ebcbc291366520a Mon Sep 17 00:00:00 2001 From: Raphael Date: Sat, 14 Sep 2024 16:21:39 +0200 Subject: [PATCH] style: norming the scanner.c (missing some stuff like too many lines) --- Filelist.sh.mk | 4 +- allocator/Filelist.aq.mk | 4 +- ast/Filelist.ast.mk | 8 +- exec/Filelist.exec.mk | 16 +- line/Filelist.line.mk | 4 +- parser/Filelist.parser.mk | 4 + parser/src/scanner/helper.c | 56 +++++ parser/src/scanner/scan.c | 241 ++++++++++++++++++ parser/src/scanner/scanner.c | 406 +++---------------------------- parser/src/scanner/serialize.c | 162 ++++++++++++ parser/src/scanner/tree_sitter.c | 86 +++++++ stdme/Filelist.me.mk | 10 +- 12 files changed, 602 insertions(+), 399 deletions(-) create mode 100644 parser/src/scanner/helper.c create mode 100644 parser/src/scanner/scan.c create mode 100644 parser/src/scanner/serialize.c create mode 100644 parser/src/scanner/tree_sitter.c diff --git a/Filelist.sh.mk b/Filelist.sh.mk index 2a7c700f..0717d0c1 100644 --- a/Filelist.sh.mk +++ b/Filelist.sh.mk @@ -1,8 +1,8 @@ SRC_FILES = \ -_env_norm_helper \ -_helper_main \ env \ +_env_norm_helper \ ft_exit \ +_helper_main \ main \ node/node \ signal_handler \ diff --git a/allocator/Filelist.aq.mk b/allocator/Filelist.aq.mk index ead60e1c..63698d77 100644 --- a/allocator/Filelist.aq.mk +++ b/allocator/Filelist.aq.mk @@ -9,11 +9,11 @@ me_alloc/merge_blocks \ me_alloc/pages \ me_alloc/realloc \ vg/dummy_block \ -vg/dummy_mem_status \ vg/dummy_mempool \ vg/dummy_mempool_bis \ +vg/dummy_mem_status \ vg/valgrind_block \ -vg/valgrind_mem_status \ vg/valgrind_mempool \ vg/valgrind_mempool_bis \ +vg/valgrind_mem_status \ diff --git a/ast/Filelist.ast.mk b/ast/Filelist.ast.mk index 00c506a1..327e50c7 100644 --- a/ast/Filelist.ast.mk +++ b/ast/Filelist.ast.mk @@ -1,8 +1,4 @@ SRC_FILES = \ -_here_doc \ -_not_done_boucle_print \ -_not_done_function \ -_not_done_scripting_print \ ast_alloc/ast_alloc \ ast_alloc/ast_alloc_scripting \ ast_free/ast_free \ @@ -23,6 +19,10 @@ from_node/other_node \ from_node/redirect_node \ from_node/scripting_node \ from_node/string_node \ +_here_doc \ +_not_done_boucle_print \ +_not_done_function \ +_not_done_scripting_print \ print_ast/ast_print \ print_ast/ast_print_arithmetic \ print_ast/ast_print_command \ diff --git a/exec/Filelist.exec.mk b/exec/Filelist.exec.mk index d4fd6874..c9f88f8f 100644 --- a/exec/Filelist.exec.mk +++ b/exec/Filelist.exec.mk @@ -1,31 +1,33 @@ SRC_FILES = \ -builtins/_debug \ builtins/cd \ +builtins/_debug \ builtins/echo \ builtins/env \ builtins/exit \ builtins/export \ builtins/pwd \ builtins/unset \ -run_arithmetic/_get_op \ -run_arithmetic/_run_arith \ -run_arithmetic/_to_ast_node \ run_arithmetic/arithmetic \ run_arithmetic/arithmetic_operation \ +run_arithmetic/_get_op \ run_arithmetic/operator_bis \ +run_arithmetic/_run_arith \ +run_arithmetic/_to_ast_node \ run_ast/_ast_into_str \ run_ast/_ast_into_str2 \ -run_ast/_run_exit_code \ -run_ast/_run_exp_operators \ -run_ast/_spawn_cmd \ +run_ast/_ast_into_str3 \ +run_ast/_ast_into_str4 \ run_ast/run_builtins \ run_ast/run_cmd_sub \ run_ast/run_command \ +run_ast/_run_exit_code \ run_ast/run_expansion \ run_ast/run_expansion_builtin \ +run_ast/_run_exp_operators \ run_ast/run_list \ run_ast/run_pipeline \ run_ast/run_program \ run_ast/run_subshell \ run_ast/run_words \ +run_ast/_spawn_cmd \ diff --git a/line/Filelist.line.mk b/line/Filelist.line.mk index 4d84cc2e..2c93bfb5 100644 --- a/line/Filelist.line.mk +++ b/line/Filelist.line.mk @@ -2,11 +2,11 @@ SRC_FILES = \ line \ line_edit_actions \ line_edit_actions2 \ +line_editing \ +line_editing2 \ line_edit_mode \ line_edit_mode_interal \ line_edit_mode_specific_key \ -line_editing \ -line_editing2 \ line_globals \ line_history \ line_internals \ diff --git a/parser/Filelist.parser.mk b/parser/Filelist.parser.mk index e1669c37..c1d3e551 100644 --- a/parser/Filelist.parser.mk +++ b/parser/Filelist.parser.mk @@ -49,7 +49,11 @@ parser/parser_shift \ parser/parser_versions \ point/point_funcs1 \ point/point_funcs2 \ +scanner/helper \ +scanner/scan \ scanner/scanner \ +scanner/serialize \ +scanner/tree_sitter \ stack/stack_add_link \ stack/stack_funcs1 \ stack/stack_funcs2 \ diff --git a/parser/src/scanner/helper.c b/parser/src/scanner/helper.c new file mode 100644 index 00000000..a9776458 --- /dev/null +++ b/parser/src/scanner/helper.c @@ -0,0 +1,56 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* helper.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: rparodi +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/14 16:12:41 by rparodi #+# #+# */ +/* Updated: 2024/09/14 16:19:31 by rparodi ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser/inner/scanner_inner.h" + +bool in_error_recovery(const bool *valid_symbols); +void reset(t_scanner *scanner); +bool advance_word(t_lexer *lexer, t_string *unquoted_word); +t_u32 serialize(t_scanner *scanner, t_u8 *buffer); +void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length); +bool scan_bare_dollar(t_lexer *lexer); +bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, + enum e_token_type middle_type, enum e_token_type end_type); +bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_concat(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_advance_words(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_literals(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols); +void *tree_sitter_sh_external_scanner_create(void); +bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, + const bool *valid_symbols); +t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state); +void tree_sitter_sh_external_scanner_deserialize(void *payload, + const t_u8 *state, t_u32 length); +void tree_sitter_sh_external_scanner_destroy(void *payload); + +bool in_error_recovery(const bool *valid_symbols) +{ + return (valid_symbols[ERROR_RECOVERY]); +} + +void reset(t_scanner *scanner) +{ + t_u32 i; + + i = 0; + while (i < scanner->heredocs.len) + reset_heredoc(vec_heredoc_get(&scanner->heredocs, i++)); +} diff --git a/parser/src/scanner/scan.c b/parser/src/scanner/scan.c new file mode 100644 index 00000000..e0b59309 --- /dev/null +++ b/parser/src/scanner/scan.c @@ -0,0 +1,241 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* scan.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: rparodi +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/14 16:09:30 by rparodi #+# #+# */ +/* Updated: 2024/09/14 16:19:47 by rparodi ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser/inner/scanner_inner.h" + +bool in_error_recovery(const bool *valid_symbols); +void reset(t_scanner *scanner); +bool advance_word(t_lexer *lexer, t_string *unquoted_word); +t_u32 serialize(t_scanner *scanner, t_u8 *buffer); +void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length); +bool scan_bare_dollar(t_lexer *lexer); +bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, + enum e_token_type middle_type, enum e_token_type end_type); +bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_concat(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_advance_words(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_literals(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols); +void *tree_sitter_sh_external_scanner_create(void); +bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, + const bool *valid_symbols); +t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state); +void tree_sitter_sh_external_scanner_deserialize(void *payload, + const t_u8 *state, t_u32 length); +void tree_sitter_sh_external_scanner_destroy(void *payload); + +bool scan_bare_dollar(t_lexer *lexer) +{ + while (me_isspace(lexer->data.lookahead) && lexer->data.lookahead != '\n' + && !lexer->data.eof((void *)lexer)) + lexer->data.advance((void *)lexer, true); + if (lexer->data.lookahead == '$') + { + lexer->data.advance((void *)lexer, false); + lexer->data.result_symbol = BARE_DOLLAR; + lexer->data.mark_end((void *)lexer); + return (me_isspace(lexer->data.lookahead) + || lexer->data.eof((void *)lexer) || lexer->data.lookahead == '\"'); + } + return (false); +} + +bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer) +{ + bool found_delimiter; + + found_delimiter = advance_word(lexer, &heredoc->delimiter); + while (me_isspace(lexer->data.lookahead)) + { + lexer->data.advance((void *)lexer, true); + } + lexer->data.result_symbol = HEREDOC_START; + heredoc->is_raw = lexer->data.lookahead == '\'' + || lexer->data.lookahead == '"' || lexer->data.lookahead == '\\'; + if (!found_delimiter) + { + string_clear(&heredoc->delimiter); + return (false); + } + return (found_delimiter); +} + +// Scan the first 'n' characters on this line, to see if they match the +// heredoc delimiter +bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer) +{ + t_i32 size; + + size = 0; + string_clear(&heredoc->current_leading_word); + if (heredoc->delimiter.len > 0) + { + while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' + && (t_i32) + * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead + && heredoc->current_leading_word.len < heredoc->delimiter.len) + { + string_push_char(&heredoc->current_leading_word, + lexer->data.lookahead); + lexer->data.advance((void *)lexer, false); + size++; + } + } + string_push_char(&heredoc->current_leading_word, '\0'); + if (heredoc->delimiter.len == 0) + return (false); + return (str_compare(heredoc->current_leading_word.buf, + heredoc->delimiter.buf)); +} + +bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, + enum e_token_type middle_type, enum e_token_type end_type) +{ + bool did_advance; + t_heredoc *heredoc; + + did_advance = false; + heredoc = vec_heredoc_last(&scanner->heredocs); + while (true) + { + if (lexer->data.lookahead == '\0') + { + if (lexer->data.eof((void *)lexer) && did_advance) + { + reset_heredoc(heredoc); + lexer->data.result_symbol = end_type; + return (true); + } + return (false); + } + else if (lexer->data.lookahead == '\\') + { + did_advance = true; + lexer->data.advance((void *)lexer, false); + lexer->data.advance((void *)lexer, false); + } + else if (lexer->data.lookahead == '$') + { + if (heredoc->is_raw) + { + did_advance = true; + lexer->data.advance((void *)lexer, false); + } + if (did_advance) + { + lexer->data.mark_end((void *)lexer); + lexer->data.result_symbol = middle_type; + heredoc->started = true; + lexer->data.advance((void *)lexer, false); + if (me_isalpha(lexer->data.lookahead) + || lexer->data.lookahead == '{' + || lexer->data.lookahead == '(') + return (true); + } + if (middle_type == HEREDOC_BODY_BEGINNING + && lexer->data.get_column((void *)lexer) == 0) + { + lexer->data.result_symbol = middle_type; + heredoc->started = true; + return (true); + } + return (false); + } + else if (lexer->data.lookahead == '\n') + { + if (!did_advance) + lexer->data.advance((void *)lexer, true); + else + lexer->data.advance((void *)lexer, false); + did_advance = true; + if (heredoc->allows_indent) + { + while (me_isspace(lexer->data.lookahead)) + lexer->data.advance((void *)lexer, false); + } + lexer->data.result_symbol = end_type; + if (heredoc->started) + lexer->data.result_symbol = middle_type; + lexer->data.mark_end((void *)lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) + { + if (lexer->data.result_symbol == HEREDOC_END) + vec_heredoc_pop(&scanner->heredocs, NULL); + return (true); + } + } + else + { + if (lexer->data.get_column((void *)lexer) == 0) + { + while (me_isspace(lexer->data.lookahead)) + { + if (did_advance) + lexer->data.advance((void *)lexer, false); + else + lexer->data.advance((void *)lexer, true); + } + if (end_type != SIMPLE_HEREDOC_BODY) + { + lexer->data.result_symbol = middle_type; + if (scan_heredoc_end_identifier(heredoc, lexer)) + return (true); + } + if (end_type == SIMPLE_HEREDOC_BODY) + { + lexer->data.result_symbol = end_type; + lexer->data.mark_end((void *)lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) + return (true); + } + } + did_advance = true; + lexer->data.advance((void *)lexer, false); + } + } +} + +bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols) +{ + (void)(scanner); + (void)(lexer); + (void)(valid_symbols); + if (valid_symbols[IMMEDIATE_DOUBLE_HASH] + && !(valid_symbols[ERROR_RECOVERY])) + { + if (lexer->data.lookahead == '#') + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '#') + { + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead != '}') + { + lexer->data.result_symbol = IMMEDIATE_DOUBLE_HASH; + lexer->data.mark_end((void *)lexer); + return (true); + } + } + } + } + return (false); +} diff --git a/parser/src/scanner/scanner.c b/parser/src/scanner/scanner.c index 754e301b..db6dcaee 100644 --- a/parser/src/scanner/scanner.c +++ b/parser/src/scanner/scanner.c @@ -6,112 +6,40 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/10 15:41:11 by rparodi #+# #+# */ -/* Updated: 2024/09/14 11:38:03 by maiboyer ### ########.fr */ +/* Updated: 2024/09/14 16:21:00 by rparodi ### ########.fr */ /* */ /* ************************************************************************** */ #include "parser/inner/scanner_inner.h" -bool in_error_recovery(const bool *valid_symbols) -{ - return (valid_symbols[ERROR_RECOVERY]); -} - -void reset(t_scanner *scanner) -{ - t_u32 i; - - i = 0; - while (i < scanner->heredocs.len) - reset_heredoc(vec_heredoc_get(&scanner->heredocs, i++)); -} - -t_u32 serialize(t_scanner *scanner, t_u8 *buffer) -{ - t_u32 size; - t_usize delimiter_size; - t_usize i; - t_heredoc *heredoc; - - i = 0; - size = 0; - buffer[size++] = (char)scanner->last_glob_paren_depth; - buffer[size++] = (char)scanner->ext_was_in_double_quote; - buffer[size++] = (char)scanner->ext_saw_outside_quote; - buffer[size++] = (char)scanner->heredocs.len; - while (i < scanner->heredocs.len) - { - heredoc = vec_heredoc_get(&scanner->heredocs, i); - if (heredoc->delimiter.len + 3 - + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) - return (0); - buffer[size++] = (char)heredoc->is_raw; - buffer[size++] = (char)heredoc->started; - buffer[size++] = (char)heredoc->allows_indent; - delimiter_size = heredoc->delimiter.len; - mem_copy(&buffer[size], &delimiter_size, sizeof(t_usize)); - size += sizeof(t_usize); - if (heredoc->delimiter.len > 0) - { - mem_copy(&buffer[size], heredoc->delimiter.buf, - heredoc->delimiter.len); - size += heredoc->delimiter.len; - } - i++; - } - return (size); -} - -void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length) -{ - t_u32 size; - t_u32 heredoc_count; - t_usize i; - t_usize delimiter_size; - t_heredoc *heredoc; - t_heredoc new_heredoc; - - if (length == 0) - reset(scanner); - else - { - i = 0; - heredoc_count = 0; - size = 0; - scanner->last_glob_paren_depth = buffer[size++]; - scanner->ext_was_in_double_quote = buffer[size++]; - scanner->ext_saw_outside_quote = buffer[size++]; - heredoc_count = (t_u8)buffer[size++]; - while (i < heredoc_count) - { - heredoc = NULL; - if (i < scanner->heredocs.len) - heredoc = vec_heredoc_get(&scanner->heredocs, i); - else - { - new_heredoc = heredoc_new(); - vec_heredoc_push(&scanner->heredocs, new_heredoc); - heredoc = vec_heredoc_last(&scanner->heredocs); - } - heredoc->is_raw = buffer[size++]; - heredoc->started = buffer[size++]; - heredoc->allows_indent = buffer[size++]; - mem_copy(&delimiter_size, &buffer[size], sizeof(t_usize)); - size += sizeof(t_usize); - heredoc->delimiter.len = delimiter_size; - string_reserve(&heredoc->delimiter, heredoc->delimiter.len); - if (heredoc->delimiter.len > 0) - { - mem_copy(heredoc->delimiter.buf, &buffer[size], - heredoc->delimiter.len); - size += heredoc->delimiter.len; - } - i++; - } - if (!(size == length)) - me_abort("assertion failed: size == length"); - } -} +bool in_error_recovery(const bool *valid_symbols); +void reset(t_scanner *scanner); +bool advance_word(t_lexer *lexer, t_string *unquoted_word); +t_u32 serialize(t_scanner *scanner, t_u8 *buffer); +void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length); +bool scan_bare_dollar(t_lexer *lexer); +bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, + enum e_token_type middle_type, enum e_token_type end_type); +bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_concat(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_advance_words(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_literals(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols); +void *tree_sitter_sh_external_scanner_create(void); +bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, + const bool *valid_symbols); +t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state); +void tree_sitter_sh_external_scanner_deserialize(void *payload, + const t_u8 *state, t_u32 length); +void tree_sitter_sh_external_scanner_destroy(void *payload); /** * Consume a "word" in POSIX parlance, and returns it unquoted. @@ -120,210 +48,6 @@ void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length) * POSIX-mandated substitution, and assumes the default value for * IFS. */ -bool advance_word(t_lexer *lexer, t_string *unquoted_word) -{ - bool empty; - t_i32 quote; - - empty = true; - quote = 0; - if (lexer->data.lookahead == '\'' || lexer->data.lookahead == '"') - { - quote = lexer->data.lookahead; - lexer->data.advance((void *)lexer, false); - } - while (lexer->data.lookahead && !((quote && (lexer->data.lookahead == quote - || lexer->data.lookahead == '\r' - || lexer->data.lookahead == '\n')) || (!quote - && me_isspace(lexer->data.lookahead)))) - { - if (lexer->data.lookahead == '\\') - { - lexer->data.advance((void *)lexer, false); - if (!lexer->data.lookahead) - return (false); - } - empty = false; - string_push_char(unquoted_word, lexer->data.lookahead); - lexer->data.advance((void *)lexer, false); - } - string_push_char(unquoted_word, '\0'); - if (quote && lexer->data.lookahead == quote) - lexer->data.advance((void *)lexer, false); - return (!empty); -} - -bool scan_bare_dollar(t_lexer *lexer) -{ - while (me_isspace(lexer->data.lookahead) && lexer->data.lookahead != '\n' - && !lexer->data.eof((void *)lexer)) - lexer->data.advance((void *)lexer, true); - if (lexer->data.lookahead == '$') - { - lexer->data.advance((void *)lexer, false); - lexer->data.result_symbol = BARE_DOLLAR; - lexer->data.mark_end((void *)lexer); - return (me_isspace(lexer->data.lookahead) - || lexer->data.eof((void *)lexer) || lexer->data.lookahead == '\"'); - } - return (false); -} - -bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer) -{ - bool found_delimiter; - - found_delimiter = advance_word(lexer, &heredoc->delimiter); - while (me_isspace(lexer->data.lookahead)) - { - lexer->data.advance((void *)lexer, true); - } - lexer->data.result_symbol = HEREDOC_START; - heredoc->is_raw = lexer->data.lookahead == '\'' - || lexer->data.lookahead == '"' || lexer->data.lookahead == '\\'; - if (!found_delimiter) - { - string_clear(&heredoc->delimiter); - return (false); - } - return (found_delimiter); -} - -// Scan the first 'n' characters on this line, to see if they match the -// heredoc delimiter -bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer) -{ - t_i32 size; - - size = 0; - string_clear(&heredoc->current_leading_word); - if (heredoc->delimiter.len > 0) - { - while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' - && (t_i32) - * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead - && heredoc->current_leading_word.len < heredoc->delimiter.len) - { - string_push_char(&heredoc->current_leading_word, - lexer->data.lookahead); - lexer->data.advance((void *)lexer, false); - size++; - } - } - string_push_char(&heredoc->current_leading_word, '\0'); - if (heredoc->delimiter.len == 0) - return (false); - return (str_compare(heredoc->current_leading_word.buf, - heredoc->delimiter.buf)); -} - -bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, - enum e_token_type middle_type, enum e_token_type end_type) -{ - bool did_advance; - t_heredoc *heredoc; - - did_advance = false; - heredoc = vec_heredoc_last(&scanner->heredocs); - while (true) - { - if (lexer->data.lookahead == '\0') - { - if (lexer->data.eof((void *)lexer) && did_advance) - { - reset_heredoc(heredoc); - lexer->data.result_symbol = end_type; - return (true); - } - return (false); - } - else if (lexer->data.lookahead == '\\') - { - did_advance = true; - lexer->data.advance((void *)lexer, false); - lexer->data.advance((void *)lexer, false); - } - else if (lexer->data.lookahead == '$') - { - if (heredoc->is_raw) - { - did_advance = true; - lexer->data.advance((void *)lexer, false); - } - if (did_advance) - { - lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = middle_type; - heredoc->started = true; - lexer->data.advance((void *)lexer, false); - if (me_isalpha(lexer->data.lookahead) - || lexer->data.lookahead == '{' - || lexer->data.lookahead == '(') - return (true); - } - if (middle_type == HEREDOC_BODY_BEGINNING - && lexer->data.get_column((void *)lexer) == 0) - { - lexer->data.result_symbol = middle_type; - heredoc->started = true; - return (true); - } - return (false); - } - else if (lexer->data.lookahead == '\n') - { - if (!did_advance) - lexer->data.advance((void *)lexer, true); - else - lexer->data.advance((void *)lexer, false); - did_advance = true; - if (heredoc->allows_indent) - { - while (me_isspace(lexer->data.lookahead)) - lexer->data.advance((void *)lexer, false); - } - lexer->data.result_symbol = end_type; - if (heredoc->started) - lexer->data.result_symbol = middle_type; - lexer->data.mark_end((void *)lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - if (lexer->data.result_symbol == HEREDOC_END) - vec_heredoc_pop(&scanner->heredocs, NULL); - return (true); - } - } - else - { - if (lexer->data.get_column((void *)lexer) == 0) - { - while (me_isspace(lexer->data.lookahead)) - { - if (did_advance) - lexer->data.advance((void *)lexer, false); - else - lexer->data.advance((void *)lexer, true); - } - if (end_type != SIMPLE_HEREDOC_BODY) - { - lexer->data.result_symbol = middle_type; - if (scan_heredoc_end_identifier(heredoc, lexer)) - return (true); - } - if (end_type == SIMPLE_HEREDOC_BODY) - { - lexer->data.result_symbol = end_type; - lexer->data.mark_end((void *)lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - return (true); - } - } - did_advance = true; - lexer->data.advance((void *)lexer, false); - } - } -} - bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) { @@ -344,34 +68,6 @@ bool scan_concat(t_scanner *scanner, t_lexer *lexer, return (true); } -bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, - const bool *valid_symbols) -{ - (void)(scanner); - (void)(lexer); - (void)(valid_symbols); - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] - && !(valid_symbols[ERROR_RECOVERY])) - { - if (lexer->data.lookahead == '#') - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '#') - { - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead != '}') - { - lexer->data.result_symbol = IMMEDIATE_DOUBLE_HASH; - lexer->data.mark_end((void *)lexer); - return (true); - } - } - } - } - return (false); -} - bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) { @@ -655,47 +351,3 @@ bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) return (scan_advance_words(scanner, lexer, valid_symbols)); return (false); } - -void *tree_sitter_sh_external_scanner_create(void) -{ - t_scanner *scanner; - - scanner = mem_alloc(sizeof(*scanner)); - scanner->heredocs = vec_heredoc_new(0, heredoc_free); - return (scanner); -} - -bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, - const bool *valid_symbols) -{ - t_scanner *scanner; - - scanner = (t_scanner *)payload; - return (scan(scanner, lexer, valid_symbols)); -} - -t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state) -{ - t_scanner *scanner; - - scanner = (t_scanner *)payload; - return (serialize(scanner, state)); -} - -void tree_sitter_sh_external_scanner_deserialize(void *payload, - const t_u8 *state, t_u32 length) -{ - t_scanner *scanner; - - scanner = (t_scanner *)payload; - deserialize(scanner, state, length); -} - -void tree_sitter_sh_external_scanner_destroy(void *payload) -{ - t_scanner *scanner; - - scanner = (t_scanner *)payload; - vec_heredoc_free(scanner->heredocs); - mem_free(scanner); -} diff --git a/parser/src/scanner/serialize.c b/parser/src/scanner/serialize.c new file mode 100644 index 00000000..82c5c4b4 --- /dev/null +++ b/parser/src/scanner/serialize.c @@ -0,0 +1,162 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* serialize.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: rparodi +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/14 16:08:04 by rparodi #+# #+# */ +/* Updated: 2024/09/14 16:21:00 by rparodi ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser/inner/scanner_inner.h" + +bool in_error_recovery(const bool *valid_symbols); +void reset(t_scanner *scanner); +bool advance_word(t_lexer *lexer, t_string *unquoted_word); +t_u32 serialize(t_scanner *scanner, t_u8 *buffer); +void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length); +bool scan_bare_dollar(t_lexer *lexer); +bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, + enum e_token_type middle_type, enum e_token_type end_type); +bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_concat(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_advance_words(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_literals(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols); +void *tree_sitter_sh_external_scanner_create(void); +bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, + const bool *valid_symbols); +t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state); +void tree_sitter_sh_external_scanner_deserialize(void *payload, + const t_u8 *state, t_u32 length); +void tree_sitter_sh_external_scanner_destroy(void *payload); + +bool advance_word(t_lexer *lexer, t_string *unquoted_word) +{ + bool empty; + t_i32 quote; + + empty = true; + quote = 0; + if (lexer->data.lookahead == '\'' || lexer->data.lookahead == '"') + { + quote = lexer->data.lookahead; + lexer->data.advance((void *)lexer, false); + } + while (lexer->data.lookahead && !((quote && (lexer->data.lookahead == quote + || lexer->data.lookahead == '\r' + || lexer->data.lookahead == '\n')) || (!quote + && me_isspace(lexer->data.lookahead)))) + { + if (lexer->data.lookahead == '\\') + { + lexer->data.advance((void *)lexer, false); + if (!lexer->data.lookahead) + return (false); + } + empty = false; + string_push_char(unquoted_word, lexer->data.lookahead); + lexer->data.advance((void *)lexer, false); + } + string_push_char(unquoted_word, '\0'); + if (quote && lexer->data.lookahead == quote) + lexer->data.advance((void *)lexer, false); + return (!empty); +} + +t_u32 serialize(t_scanner *scanner, t_u8 *buffer) +{ + t_u32 size; + t_usize delimiter_size; + t_usize i; + t_heredoc *heredoc; + + i = 0; + size = 0; + buffer[size++] = (char)scanner->last_glob_paren_depth; + buffer[size++] = (char)scanner->ext_was_in_double_quote; + buffer[size++] = (char)scanner->ext_saw_outside_quote; + buffer[size++] = (char)scanner->heredocs.len; + while (i < scanner->heredocs.len) + { + heredoc = vec_heredoc_get(&scanner->heredocs, i); + if (heredoc->delimiter.len + 3 + + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) + return (0); + buffer[size++] = (char)heredoc->is_raw; + buffer[size++] = (char)heredoc->started; + buffer[size++] = (char)heredoc->allows_indent; + delimiter_size = heredoc->delimiter.len; + mem_copy(&buffer[size], &delimiter_size, sizeof(t_usize)); + size += sizeof(t_usize); + if (heredoc->delimiter.len > 0) + { + mem_copy(&buffer[size], heredoc->delimiter.buf, + heredoc->delimiter.len); + size += heredoc->delimiter.len; + } + i++; + } + return (size); +} + +void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length) +{ + t_u32 size; + t_u32 heredoc_count; + t_usize i; + t_usize delimiter_size; + t_heredoc *heredoc; + t_heredoc new_heredoc; + + if (length == 0) + reset(scanner); + else + { + i = 0; + heredoc_count = 0; + size = 0; + scanner->last_glob_paren_depth = buffer[size++]; + scanner->ext_was_in_double_quote = buffer[size++]; + scanner->ext_saw_outside_quote = buffer[size++]; + heredoc_count = (t_u8)buffer[size++]; + while (i < heredoc_count) + { + heredoc = NULL; + if (i < scanner->heredocs.len) + heredoc = vec_heredoc_get(&scanner->heredocs, i); + else + { + new_heredoc = heredoc_new(); + vec_heredoc_push(&scanner->heredocs, new_heredoc); + heredoc = vec_heredoc_last(&scanner->heredocs); + } + heredoc->is_raw = buffer[size++]; + heredoc->started = buffer[size++]; + heredoc->allows_indent = buffer[size++]; + mem_copy(&delimiter_size, &buffer[size], sizeof(t_usize)); + size += sizeof(t_usize); + heredoc->delimiter.len = delimiter_size; + string_reserve(&heredoc->delimiter, heredoc->delimiter.len); + if (heredoc->delimiter.len > 0) + { + mem_copy(heredoc->delimiter.buf, &buffer[size], + heredoc->delimiter.len); + size += heredoc->delimiter.len; + } + i++; + } + if (!(size == length)) + me_abort("assertion failed: size == length"); + } +} diff --git a/parser/src/scanner/tree_sitter.c b/parser/src/scanner/tree_sitter.c new file mode 100644 index 00000000..7f60860d --- /dev/null +++ b/parser/src/scanner/tree_sitter.c @@ -0,0 +1,86 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* tree_sitter.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: rparodi +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/14 16:10:31 by rparodi #+# #+# */ +/* Updated: 2024/09/14 16:20:59 by rparodi ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser/inner/scanner_inner.h" + +bool in_error_recovery(const bool *valid_symbols); +void reset(t_scanner *scanner); +bool advance_word(t_lexer *lexer, t_string *unquoted_word); +t_u32 serialize(t_scanner *scanner, t_u8 *buffer); +void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length); +bool scan_bare_dollar(t_lexer *lexer); +bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer); +bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, + enum e_token_type middle_type, enum e_token_type end_type); +bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_concat(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_advance_words(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan_literals(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols); +bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols); +void *tree_sitter_sh_external_scanner_create(void); +bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, + const bool *valid_symbols); +t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state); +void tree_sitter_sh_external_scanner_deserialize(void *payload, + const t_u8 *state, t_u32 length); +void tree_sitter_sh_external_scanner_destroy(void *payload); + +void *tree_sitter_sh_external_scanner_create(void) +{ + t_scanner *scanner; + + scanner = mem_alloc(sizeof(*scanner)); + scanner->heredocs = vec_heredoc_new(0, heredoc_free); + return (scanner); +} + +bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, + const bool *valid_symbols) +{ + t_scanner *scanner; + + scanner = (t_scanner *)payload; + return (scan(scanner, lexer, valid_symbols)); +} + +t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state) +{ + t_scanner *scanner; + + scanner = (t_scanner *)payload; + return (serialize(scanner, state)); +} + +void tree_sitter_sh_external_scanner_deserialize(void *payload, + const t_u8 *state, t_u32 length) +{ + t_scanner *scanner; + + scanner = (t_scanner *)payload; + deserialize(scanner, state, length); +} + +void tree_sitter_sh_external_scanner_destroy(void *payload) +{ + t_scanner *scanner; + + scanner = (t_scanner *)payload; + vec_heredoc_free(scanner->heredocs); + mem_free(scanner); +} diff --git a/stdme/Filelist.me.mk b/stdme/Filelist.me.mk index 03b7c42f..7f234e90 100644 --- a/stdme/Filelist.me.mk +++ b/stdme/Filelist.me.mk @@ -35,10 +35,10 @@ fs/fs_internal \ fs/getters \ fs/putfd \ gnl/get_next_line \ +hash/hasher \ hash/hash_signed \ hash/hash_str \ hash/hash_unsigned \ -hash/hasher \ hash/sip/sip13 \ hash/sip/sip_utils \ hash/sip/sip_utils2 \ @@ -86,6 +86,10 @@ printf/printf \ printf/printf_fd \ printf/printf_str \ printf/vprintf \ +string/mod \ +string/string_insert \ +string/string_remove \ +string/string_reserve \ str/str_clone \ str/str_compare \ str/str_find_chr \ @@ -102,10 +106,6 @@ str/str_n_find_str \ str/str_split \ str/str_substring \ str/str_trim \ -string/mod \ -string/string_insert \ -string/string_remove \ -string/string_reserve \ GEN_FILES = \ convert/i16_to_str \