From 475038e2b74bb9286f4d38207c1558272b2f22ad Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Fri, 6 Sep 2024 16:46:01 +0200 Subject: [PATCH] update: Updated stuff so the scanner works great now --- allocator/Filelist.aq.mk | 4 +- ast/Filelist.ast.mk | 8 +- exec/Filelist.exec.mk | 8 +- line/Filelist.line.mk | 4 +- output/src/vec/ast/ast.c | 4 +- output/src/vec/ast/ast_functions4.c | 6 +- output/src/vec/estr/estr.c | 4 +- output/src/vec/estr/estr_functions4.c | 6 +- output/src/vec/heredoc/heredoc.c | 4 +- output/src/vec/heredoc/heredoc_functions4.c | 6 +- output/src/vec/pid/pid.c | 4 +- output/src/vec/pid/pid_functions4.c | 6 +- output/src/vec/str/str.c | 4 +- output/src/vec/str/str_functions4.c | 6 +- output/src/vec/subtree/subtree.c | 4 +- output/src/vec/subtree/subtree_functions4.c | 6 +- parser/src/scanner.c | 1019 ++++++++++++----- stdme/Filelist.me.mk | 10 +- .../src/vec/C__PREFIX__.c__TEMPLATE__ | 4 +- .../vec/C__PREFIX___functions4.c__TEMPLATE__ | 6 +- test_heredoc.sh | 4 +- 21 files changed, 787 insertions(+), 340 deletions(-) diff --git a/allocator/Filelist.aq.mk b/allocator/Filelist.aq.mk index 63698d77..ead60e1c 100644 --- a/allocator/Filelist.aq.mk +++ b/allocator/Filelist.aq.mk @@ -9,11 +9,11 @@ me_alloc/merge_blocks \ me_alloc/pages \ me_alloc/realloc \ vg/dummy_block \ +vg/dummy_mem_status \ vg/dummy_mempool \ vg/dummy_mempool_bis \ -vg/dummy_mem_status \ vg/valgrind_block \ +vg/valgrind_mem_status \ vg/valgrind_mempool \ vg/valgrind_mempool_bis \ -vg/valgrind_mem_status \ diff --git a/ast/Filelist.ast.mk b/ast/Filelist.ast.mk index 61fff42a..2e3144a3 100644 --- a/ast/Filelist.ast.mk +++ b/ast/Filelist.ast.mk @@ -1,4 +1,8 @@ SRC_FILES = \ +_here_doc \ +_not_done_boucle_print \ +_not_done_function \ +_not_done_scripting_print \ ast_alloc/ast_alloc \ ast_alloc/ast_alloc_scripting \ ast_free/ast_free \ @@ -17,10 +21,6 @@ from_node/node_utils2 \ from_node/redirect_node \ from_node/scripting_node \ from_node/string_node \ -_here_doc \ -_not_done_boucle_print \ -_not_done_function \ -_not_done_scripting_print \ print_ast/ast_print \ print_ast/ast_print_arithmetic \ print_ast/ast_print_command \ diff --git a/exec/Filelist.exec.mk b/exec/Filelist.exec.mk index 34392b10..5fd1c36b 100644 --- a/exec/Filelist.exec.mk +++ b/exec/Filelist.exec.mk @@ -1,17 +1,17 @@ SRC_FILES = \ -builtins/cd \ builtins/_debug \ +builtins/cd \ builtins/echo \ builtins/env \ builtins/exit \ builtins/export \ builtins/pwd \ builtins/unset \ -run_arithmetic/arithmetic \ -run_arithmetic/arithmetic_operation \ run_arithmetic/_get_op \ -run_arithmetic/operator_bis \ run_arithmetic/_run_arith \ run_arithmetic/_to_ast_node \ +run_arithmetic/arithmetic \ +run_arithmetic/arithmetic_operation \ +run_arithmetic/operator_bis \ run_ast \ diff --git a/line/Filelist.line.mk b/line/Filelist.line.mk index 2c93bfb5..4d84cc2e 100644 --- a/line/Filelist.line.mk +++ b/line/Filelist.line.mk @@ -2,11 +2,11 @@ SRC_FILES = \ line \ line_edit_actions \ line_edit_actions2 \ -line_editing \ -line_editing2 \ line_edit_mode \ line_edit_mode_interal \ line_edit_mode_specific_key \ +line_editing \ +line_editing2 \ line_globals \ line_history \ line_internals \ diff --git a/output/src/vec/ast/ast.c b/output/src/vec/ast/ast.c index a2fe5576..60fac8cc 100644 --- a/output/src/vec/ast/ast.c +++ b/output/src/vec/ast/ast.c @@ -65,11 +65,9 @@ t_error vec_ast_pop(t_vec_ast *vec, t_ast_node *value) t_ast_node temp_value; t_ast_node *ptr; - if (vec == NULL) + if (vec == NULL || vec->len == 0) return (ERROR); ptr = value; - if (vec->len == 0) - return (ERROR); if (value == NULL) ptr = &temp_value; vec->len--; diff --git a/output/src/vec/ast/ast_functions4.c b/output/src/vec/ast/ast_functions4.c index 05e95141..b548a698 100644 --- a/output/src/vec/ast/ast_functions4.c +++ b/output/src/vec/ast/ast_functions4.c @@ -17,9 +17,11 @@ t_ast_node *vec_ast_get(t_vec_ast *vec, t_usize i) { - if (vec == NULL || vec->len >= i) + if (vec == NULL || vec->buffer == NULL) return (NULL); - return (&vec->buffer[i]); + if (i < vec->len) + return (&vec->buffer[i]); + return (NULL); } t_ast_node *vec_ast_last(t_vec_ast *vec) diff --git a/output/src/vec/estr/estr.c b/output/src/vec/estr/estr.c index 5981c2c6..e8a2d5cd 100644 --- a/output/src/vec/estr/estr.c +++ b/output/src/vec/estr/estr.c @@ -65,11 +65,9 @@ t_error vec_estr_pop(t_vec_estr *vec, t_expandable_str *value) t_expandable_str temp_value; t_expandable_str *ptr; - if (vec == NULL) + if (vec == NULL || vec->len == 0) return (ERROR); ptr = value; - if (vec->len == 0) - return (ERROR); if (value == NULL) ptr = &temp_value; vec->len--; diff --git a/output/src/vec/estr/estr_functions4.c b/output/src/vec/estr/estr_functions4.c index 20d07509..25e10dbf 100644 --- a/output/src/vec/estr/estr_functions4.c +++ b/output/src/vec/estr/estr_functions4.c @@ -17,9 +17,11 @@ t_expandable_str *vec_estr_get(t_vec_estr *vec, t_usize i) { - if (vec == NULL || vec->len >= i) + if (vec == NULL || vec->buffer == NULL) return (NULL); - return (&vec->buffer[i]); + if (i < vec->len) + return (&vec->buffer[i]); + return (NULL); } t_expandable_str *vec_estr_last(t_vec_estr *vec) diff --git a/output/src/vec/heredoc/heredoc.c b/output/src/vec/heredoc/heredoc.c index ce3a0cc3..5b3e0445 100644 --- a/output/src/vec/heredoc/heredoc.c +++ b/output/src/vec/heredoc/heredoc.c @@ -65,11 +65,9 @@ t_error vec_heredoc_pop(t_vec_heredoc *vec, t_heredoc *value) t_heredoc temp_value; t_heredoc *ptr; - if (vec == NULL) + if (vec == NULL || vec->len == 0) return (ERROR); ptr = value; - if (vec->len == 0) - return (ERROR); if (value == NULL) ptr = &temp_value; vec->len--; diff --git a/output/src/vec/heredoc/heredoc_functions4.c b/output/src/vec/heredoc/heredoc_functions4.c index 4b4dc2b2..669381a1 100644 --- a/output/src/vec/heredoc/heredoc_functions4.c +++ b/output/src/vec/heredoc/heredoc_functions4.c @@ -17,9 +17,11 @@ t_heredoc *vec_heredoc_get(t_vec_heredoc *vec, t_usize i) { - if (vec == NULL || vec->len >= i) + if (vec == NULL || vec->buffer == NULL) return (NULL); - return (&vec->buffer[i]); + if (i < vec->len) + return (&vec->buffer[i]); + return (NULL); } t_heredoc *vec_heredoc_last(t_vec_heredoc *vec) diff --git a/output/src/vec/pid/pid.c b/output/src/vec/pid/pid.c index da4c4bbb..05303095 100644 --- a/output/src/vec/pid/pid.c +++ b/output/src/vec/pid/pid.c @@ -65,11 +65,9 @@ t_error vec_pid_pop(t_vec_pid *vec, t_pid *value) t_pid temp_value; t_pid *ptr; - if (vec == NULL) + if (vec == NULL || vec->len == 0) return (ERROR); ptr = value; - if (vec->len == 0) - return (ERROR); if (value == NULL) ptr = &temp_value; vec->len--; diff --git a/output/src/vec/pid/pid_functions4.c b/output/src/vec/pid/pid_functions4.c index 2022678b..8fbf49bd 100644 --- a/output/src/vec/pid/pid_functions4.c +++ b/output/src/vec/pid/pid_functions4.c @@ -17,9 +17,11 @@ t_pid *vec_pid_get(t_vec_pid *vec, t_usize i) { - if (vec == NULL || vec->len >= i) + if (vec == NULL || vec->buffer == NULL) return (NULL); - return (&vec->buffer[i]); + if (i < vec->len) + return (&vec->buffer[i]); + return (NULL); } t_pid *vec_pid_last(t_vec_pid *vec) diff --git a/output/src/vec/str/str.c b/output/src/vec/str/str.c index 0c2daa9f..fca3426b 100644 --- a/output/src/vec/str/str.c +++ b/output/src/vec/str/str.c @@ -65,11 +65,9 @@ t_error vec_str_pop(t_vec_str *vec, t_str *value) t_str temp_value; t_str *ptr; - if (vec == NULL) + if (vec == NULL || vec->len == 0) return (ERROR); ptr = value; - if (vec->len == 0) - return (ERROR); if (value == NULL) ptr = &temp_value; vec->len--; diff --git a/output/src/vec/str/str_functions4.c b/output/src/vec/str/str_functions4.c index cd74bb14..4b4d16f2 100644 --- a/output/src/vec/str/str_functions4.c +++ b/output/src/vec/str/str_functions4.c @@ -17,9 +17,11 @@ t_str *vec_str_get(t_vec_str *vec, t_usize i) { - if (vec == NULL || vec->len >= i) + if (vec == NULL || vec->buffer == NULL) return (NULL); - return (&vec->buffer[i]); + if (i < vec->len) + return (&vec->buffer[i]); + return (NULL); } t_str *vec_str_last(t_vec_str *vec) diff --git a/output/src/vec/subtree/subtree.c b/output/src/vec/subtree/subtree.c index ff7fc56e..aa2e1e92 100644 --- a/output/src/vec/subtree/subtree.c +++ b/output/src/vec/subtree/subtree.c @@ -65,11 +65,9 @@ t_error vec_subtree_pop(t_vec_subtree *vec, t_subtree *value) t_subtree temp_value; t_subtree *ptr; - if (vec == NULL) + if (vec == NULL || vec->len == 0) return (ERROR); ptr = value; - if (vec->len == 0) - return (ERROR); if (value == NULL) ptr = &temp_value; vec->len--; diff --git a/output/src/vec/subtree/subtree_functions4.c b/output/src/vec/subtree/subtree_functions4.c index 007d1c5c..98bbddfd 100644 --- a/output/src/vec/subtree/subtree_functions4.c +++ b/output/src/vec/subtree/subtree_functions4.c @@ -17,9 +17,11 @@ t_subtree *vec_subtree_get(t_vec_subtree *vec, t_usize i) { - if (vec == NULL || vec->len >= i) + if (vec == NULL || vec->buffer == NULL) return (NULL); - return (&vec->buffer[i]); + if (i < vec->len) + return (&vec->buffer[i]); + return (NULL); } t_subtree *vec_subtree_last(t_vec_subtree *vec) diff --git a/parser/src/scanner.c b/parser/src/scanner.c index 3114e1fa..60c465ec 100644 --- a/parser/src/scanner.c +++ b/parser/src/scanner.c @@ -1,12 +1,12 @@ #include "me/char/char.h" #include "me/string/string.h" #include "me/types.h" -#include "parser/array.h" +#include "me/mem/mem.h" +#include "me/vec/vec_heredoc.h" #include "parser/inner/heredoc.h" #include "parser/lexer.h" #include "parser/parser.h" -#include -#include +#include "me/str/str.h" typedef struct s_heredoc t_heredoc; typedef struct s_scanner t_scanner; @@ -40,7 +40,7 @@ struct s_scanner t_u8 last_glob_paren_depth; bool ext_was_in_double_quote; bool ext_saw_outside_quote; - Array(t_heredoc) heredocs; + t_vec_heredoc heredocs; }; bool in_error_recovery(const bool *valid_symbols) @@ -52,8 +52,8 @@ void reset(t_scanner *scanner) { t_u32 i; i = 0; - while (i < scanner->heredocs.size) - reset_heredoc(array_get(&scanner->heredocs, i++)); + while (i < scanner->heredocs.len) + reset_heredoc(vec_heredoc_get(&scanner->heredocs, i++)); } t_u32 serialize(t_scanner *scanner, t_u8 *buffer) @@ -68,11 +68,11 @@ t_u32 serialize(t_scanner *scanner, t_u8 *buffer) buffer[size++] = (char)scanner->last_glob_paren_depth; buffer[size++] = (char)scanner->ext_was_in_double_quote; buffer[size++] = (char)scanner->ext_saw_outside_quote; - buffer[size++] = (char)scanner->heredocs.size; + buffer[size++] = (char)scanner->heredocs.len; - while (i < scanner->heredocs.size) + while (i < scanner->heredocs.len) { - heredoc = array_get(&scanner->heredocs, i); + heredoc = vec_heredoc_get(&scanner->heredocs, i); if (heredoc->delimiter.len + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return (0); @@ -81,11 +81,11 @@ t_u32 serialize(t_scanner *scanner, t_u8 *buffer) buffer[size++] = (char)heredoc->allows_indent; delimiter_size = heredoc->delimiter.len; - memcpy(&buffer[size], &delimiter_size, sizeof(t_usize)); + mem_copy(&buffer[size], &delimiter_size, sizeof(t_usize)); size += sizeof(t_usize); if (heredoc->delimiter.len > 0) { - memcpy(&buffer[size], heredoc->delimiter.buf, heredoc->delimiter.len); + mem_copy(&buffer[size], heredoc->delimiter.buf, heredoc->delimiter.len); size += heredoc->delimiter.len; } i++; @@ -116,32 +116,33 @@ void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length) while (i < heredoc_count) { heredoc = NULL; - if (i < scanner->heredocs.size) - heredoc = array_get(&scanner->heredocs, i); + if (i < scanner->heredocs.len) + heredoc = vec_heredoc_get(&scanner->heredocs, i); else { new_heredoc = heredoc_new(); - array_push(&scanner->heredocs, new_heredoc); - heredoc = array_back(&scanner->heredocs); + vec_heredoc_push(&scanner->heredocs, new_heredoc); + heredoc = vec_heredoc_last(&scanner->heredocs); } heredoc->is_raw = buffer[size++]; heredoc->started = buffer[size++]; heredoc->allows_indent = buffer[size++]; - memcpy(&delimiter_size, &buffer[size], sizeof(t_usize)); + mem_copy(&delimiter_size, &buffer[size], sizeof(t_usize)); size += sizeof(t_usize); heredoc->delimiter.len = delimiter_size; string_reserve(&heredoc->delimiter, heredoc->delimiter.len); if (heredoc->delimiter.len > 0) { - memcpy(heredoc->delimiter.buf, &buffer[size], heredoc->delimiter.len); + mem_copy(heredoc->delimiter.buf, &buffer[size], heredoc->delimiter.len); size += heredoc->delimiter.len; } i++; } - assert(size == length); + if (!(size == length)) + me_abort("assertion failed: size == length"); } } @@ -238,13 +239,13 @@ bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer) } } string_push_char(&heredoc->current_leading_word, '\0'); - return (heredoc->delimiter.len == 0 ? false : strcmp(heredoc->current_leading_word.buf, heredoc->delimiter.buf) == 0); + return (heredoc->delimiter.len == 0 ? false : str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf)); } bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, enum e_token_type middle_type, enum e_token_type end_type) { bool did_advance = false; - t_heredoc *heredoc = array_back(&scanner->heredocs); + t_heredoc *heredoc = vec_heredoc_last(&scanner->heredocs); for (;;) { @@ -318,7 +319,7 @@ bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, enum e_token_type { if (lexer->data.result_symbol == HEREDOC_END) { - (void)array_pop(&scanner->heredocs); + vec_heredoc_pop(&scanner->heredocs, NULL); } return true; } @@ -359,329 +360,775 @@ bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, enum e_token_type } } -bool scan_expansion_word(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) +bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) { - (void)(scanner); - (void)(lexer); - (void)(valid_symbols); - bool advanced_once = false; - bool advance_once_space = false; - for (;;) + if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY])) { - if (lexer->data.lookahead == '\"') - return false; - if (lexer->data.lookahead == '$') + if (!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' || + lexer->data.lookahead == '<' || lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || + lexer->data.lookahead == ';' || lexer->data.lookahead == '&' || lexer->data.lookahead == '|' || + lexer->data.lookahead == '{' || lexer->data.lookahead == '}')) { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || - me_isalnum(lexer->data.lookahead)) + lexer->data.result_symbol = CONCAT; + // So for a`b`, we want to return a concat. We check if the + // 2nd backtick has whitespace after it, and if it does we + // return concat. + if (lexer->data.lookahead == '`') { - lexer->data.result_symbol = EXPANSION_WORD; - return advanced_once; + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + while (lexer->data.lookahead != '`' && !lexer->data.eof((void *)lexer)) + { + lexer->data.advance((void *)lexer, false); + } + if (lexer->data.eof((void *)lexer)) + { + return false; + } + if (lexer->data.lookahead == '`') + { + lexer->data.advance((void *)lexer, false); + } + return me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer); + } + // strings w/ expansions that contains escaped quotes or + // backslashes need this to return a concat + if (lexer->data.lookahead == '\\') + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\') + { + return true; + } + if (lexer->data.eof((void *)lexer)) + { + return false; + } + } + else + { + return true; } - advanced_once = true; } + } - if (lexer->data.lookahead == '}') - { - lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = EXPANSION_WORD; - return advanced_once || advance_once_space; - } - - if (lexer->data.lookahead == '(' && !(advanced_once || advance_once_space)) + if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY])) + { + // advance two # and ensure not } after + if (lexer->data.lookahead == '#') { lexer->data.mark_end((void *)lexer); lexer->data.advance((void *)lexer, false); - while (lexer->data.lookahead != ')' && !lexer->data.eof((void *)lexer)) + if (lexer->data.lookahead == '#') { - // if we find a $( or ${ assume this is valid and is - // a garbage concatenation of some weird word + an - // expansion - // I wonder where this can fail - if (lexer->data.lookahead == '$') + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead != '}') + { + lexer->data.result_symbol = IMMEDIATE_DOUBLE_HASH; + lexer->data.mark_end((void *)lexer); + return true; + } + } + } + } + + if (valid_symbols[EMPTY_VALUE]) + { + if (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) || lexer->data.lookahead == ';' || + lexer->data.lookahead == '&') + { + lexer->data.result_symbol = EMPTY_VALUE; + return true; + } + } + + if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 && + !vec_heredoc_last(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY])) + return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY)); + + if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0) + { + t_heredoc *heredoc = vec_heredoc_last(&scanner->heredocs); + if (scan_heredoc_end_identifier(heredoc, lexer)) + { + string_free(heredoc->current_leading_word); + string_free(heredoc->delimiter); + vec_heredoc_pop(&scanner->heredocs, NULL); + lexer->data.result_symbol = HEREDOC_END; + return true; + } + } + + if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started && + !(valid_symbols[ERROR_RECOVERY])) + return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END)); + + if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.len > 0) + return (scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer)); + + if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && + !(valid_symbols[ERROR_RECOVERY])) + { + for (;;) + { + if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' || + (lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) && + !valid_symbols[EXPANSION_WORD]) + { + lexer->data.advance((void *)lexer, true); + } + else if (lexer->data.lookahead == '\\') + { + lexer->data.advance((void *)lexer, true); + + if (lexer->data.eof((void *)lexer)) { lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || - me_isalnum(lexer->data.lookahead)) - { - lexer->data.result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; + lexer->data.result_symbol = VARIABLE_NAME; + return true; + } + + if (lexer->data.lookahead == '\r') + { + lexer->data.advance((void *)lexer, true); + } + if (lexer->data.lookahead == '\n') + { + lexer->data.advance((void *)lexer, true); } else { - advanced_once = advanced_once || !me_isspace(lexer->data.lookahead); - advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead); - lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD]) + { + goto expansion_word; + } + return false; } } - lexer->data.mark_end((void *)lexer); - if (lexer->data.lookahead == ')') - { - advanced_once = true; - lexer->data.advance((void *)lexer, false); - lexer->data.mark_end((void *)lexer); - if (lexer->data.lookahead == '}') - return false; - } else - return false; - } - - if (lexer->data.lookahead == '\'') - return false; - if (lexer->data.eof((void *)lexer)) - return false; - advanced_once = advanced_once || !me_isspace(lexer->data.lookahead); - advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead); - lexer->data.advance((void *)lexer, false); - } - return (false); -} - -bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) -{ - (void)(scanner); - (void)(lexer); - (void)(valid_symbols); - lexer->data.result_symbol = CONCAT; - if (lexer->data.lookahead == '\\') - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\') - return true; - if (lexer->data.eof((void *)lexer)) - return false; - } - return true; -} - -bool check_scan_immediate_double_hash(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) -{ - (void)(scanner); - (void)(lexer); - (void)(valid_symbols); - if (lexer->data.lookahead == '#') - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '#') - { - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead != '}') { - lexer->data.result_symbol = IMMEDIATE_DOUBLE_HASH; - lexer->data.mark_end((void *)lexer); - return (true); + break; } } - } - return (false); -} -bool scan_variable_name(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) -{ - for (;;) - { - if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' || - (lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) && - !valid_symbols[EXPANSION_WORD]) - lexer->data.advance((void *)lexer, true); - else if (lexer->data.lookahead == '\\') + // no '*', '@', '?', '-', '$', '0', '_' + if (!valid_symbols[EXPANSION_WORD] && + (lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' || lexer->data.lookahead == '-' || + lexer->data.lookahead == '0' || lexer->data.lookahead == '_')) { - lexer->data.advance((void *)lexer, true); - - if (lexer->data.eof((void *)lexer)) + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' || + lexer->data.lookahead == '-' || lexer->data.lookahead == '%' || lexer->data.lookahead == '#' || + lexer->data.lookahead == '/') + { + return false; + } + if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead)) { lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = VARIABLE_NAME; + lexer->data.result_symbol = EXTGLOB_PATTERN; return true; } - if (lexer->data.lookahead == '\r') - lexer->data.advance((void *)lexer, true); - if (lexer->data.lookahead == '\n') - lexer->data.advance((void *)lexer, true); - else - { - if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD]) - return (scan_expansion_word(scanner, lexer, valid_symbols)); - return false; - } } - else - break; - } - if (!valid_symbols[EXPANSION_WORD] && (lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' || - lexer->data.lookahead == '-' || lexer->data.lookahead == '0' || lexer->data.lookahead == '_')) - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' || lexer->data.lookahead == '-' || - lexer->data.lookahead == '%' || lexer->data.lookahead == '#' || lexer->data.lookahead == '/') - return (false); - if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead)) - { - lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = EXTGLOB_PATTERN; - return (true); - } - } - if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<') - { - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '<') + if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<') { lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '-') + if (lexer->data.lookahead == '<') { lexer->data.advance((void *)lexer, false); - t_heredoc heredoc = heredoc_new(); - heredoc.allows_indent = true; - array_push(&scanner->heredocs, heredoc); - lexer->data.result_symbol = HEREDOC_ARROW_DASH; + if (lexer->data.lookahead == '-') + { + lexer->data.advance((void *)lexer, false); + t_heredoc heredoc = heredoc_new(); + heredoc.allows_indent = true; + vec_heredoc_push(&scanner->heredocs, heredoc); + lexer->data.result_symbol = HEREDOC_ARROW_DASH; + } + // else if (lexer->data.lookahead == '<' || lexer->data.lookahead == '=') + // { + // return false; + // } + else + { + t_heredoc heredoc = heredoc_new(); + vec_heredoc_push(&scanner->heredocs, heredoc); + lexer->data.result_symbol = HEREDOC_ARROW; + } + return true; } - else - { - t_heredoc heredoc = heredoc_new(); - array_push(&scanner->heredocs, heredoc); - lexer->data.result_symbol = HEREDOC_ARROW; - } - return (true); + return false; } - return (false); - } - bool is_number = true; - if (me_isdigit(lexer->data.lookahead)) - lexer->data.advance((void *)lexer, false); - else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_') - { - is_number = false; - lexer->data.advance((void *)lexer, false); - } - else - { - if (lexer->data.lookahead == '{') - return (false); - if (valid_symbols[EXPANSION_WORD]) - return (scan_expansion_word(scanner, lexer, valid_symbols)); - if (valid_symbols[EXTGLOB_PATTERN]) - return (false); - return false; - } - - for (;;) - { + bool is_number = true; if (me_isdigit(lexer->data.lookahead)) + { lexer->data.advance((void *)lexer, false); + } else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_') { is_number = false; lexer->data.advance((void *)lexer, false); } else - break; - } - - if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<')) - { - lexer->data.result_symbol = FILE_DESCRIPTOR; - return (true); - } - - if (valid_symbols[VARIABLE_NAME]) - { - if (lexer->data.lookahead == '+') { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':') + if (lexer->data.lookahead == '{') { - lexer->data.result_symbol = VARIABLE_NAME; - return (true); + goto brace_start; } - return (false); + if (valid_symbols[EXPANSION_WORD]) + { + goto expansion_word; + } + if (valid_symbols[EXTGLOB_PATTERN]) + { + goto extglob_pattern; + } + return false; } - if (lexer->data.lookahead == '/') - return (false); - if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || - (lexer->data.lookahead == ':' && !valid_symbols[OPENING_PAREN]) || lexer->data.lookahead == '%' || - (lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-')) + + for (;;) { - lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = VARIABLE_NAME; - return (true); + if (me_isdigit(lexer->data.lookahead)) + { + lexer->data.advance((void *)lexer, false); + } + else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_') + { + is_number = false; + lexer->data.advance((void *)lexer, false); + } + else + { + break; + } } - if (lexer->data.lookahead == '?') + + if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<')) { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - lexer->data.result_symbol = VARIABLE_NAME; - return (me_isalpha(lexer->data.lookahead)); + lexer->data.result_symbol = FILE_DESCRIPTOR; + return true; } + + if (valid_symbols[VARIABLE_NAME]) + { + if (lexer->data.lookahead == '+') + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':') + { + lexer->data.result_symbol = VARIABLE_NAME; + return true; + } + return false; + } + if (lexer->data.lookahead == '/') + { + return false; + } + if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || + (lexer->data.lookahead == ':' && + !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable + // names for function words, only handling : for now? #235 + lexer->data.lookahead == '%' || + (lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-')) + { + lexer->data.mark_end((void *)lexer); + lexer->data.result_symbol = VARIABLE_NAME; + return true; + } + + if (lexer->data.lookahead == '?') + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + lexer->data.result_symbol = VARIABLE_NAME; + return me_isalpha(lexer->data.lookahead); + } + } + + return false; } - return (false); -} -bool check_scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) -{ - (void)(valid_symbols); - t_heredoc *heredoc = array_back(&scanner->heredocs); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); - (void)array_pop(&scanner->heredocs); - lexer->data.result_symbol = HEREDOC_END; - return (true); - } - return (false); -} - -bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) -{ - if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]) && - !(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' || lexer->data.lookahead == '<' || - lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || lexer->data.lookahead == ';' || lexer->data.lookahead == '&' || - lexer->data.lookahead == '|' || lexer->data.lookahead == '{' || lexer->data.lookahead == '}')) - return (scan_concat(scanner, lexer, valid_symbols)); - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY]) && - check_scan_immediate_double_hash(scanner, lexer, valid_symbols)) - return (true); - if (valid_symbols[EMPTY_VALUE] && (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) || - lexer->data.lookahead == ';' || lexer->data.lookahead == '&')) - return (lexer->data.result_symbol = EMPTY_VALUE, true); - if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 && - !array_back(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY])) - return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY)); - if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0 && check_scan_heredoc_end(scanner, lexer, valid_symbols)) - return (false); - if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started && - !(valid_symbols[ERROR_RECOVERY])) - return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END)); - if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.size > 0) - return (scan_heredoc_start(array_back(&scanner->heredocs), lexer)); - if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && - !(valid_symbols[ERROR_RECOVERY])) - return (scan_variable_name(scanner, lexer, valid_symbols)); if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) && scan_bare_dollar(lexer)) return (true); + + // if ((valid_symbols[REGEX]) && !(valid_symbols[ERROR_RECOVERY])) + // { + // if (valid_symbols[REGEX]) + // { + // while (me_isspace(lexer->data.lookahead)) + // { + // lexer->data.advance((void *)lexer, true); + // } + // } + // + // if ((lexer->data.lookahead != '"' && lexer->data.lookahead != '\'') || ((lexer->data.lookahead == '$' || lexer->data.lookahead == + // '\'')) || (lexer->data.lookahead == '\'')) + // { + // typedef struct + // { + // bool done; + // bool advanced_once; + // bool found_non_alnumdollarunderdash; + // bool last_was_escape; + // bool in_single_quote; + // t_u32 paren_depth; + // t_u32 bracket_depth; + // t_u32 brace_depth; + // } State; + // + // if (lexer->data.lookahead == '$') + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.advance((void *)lexer, false); + // if (lexer->data.lookahead == '(') + // { + // return false; + // } + // } + // + // lexer->data.mark_end((void *)lexer); + // + // State state = {false, false, false, false, false, 0, 0, 0}; + // while (!state.done) + // { + // if (state.in_single_quote) + // { + // if (lexer->data.lookahead == '\'') + // { + // state.in_single_quote = false; + // lexer->data.advance((void *)lexer, false); + // lexer->data.mark_end((void *)lexer); + // } + // } + // switch (lexer->data.lookahead) + // { + // case '\\': + // state.last_was_escape = true; + // break; + // case '\0': + // return false; + // case '(': + // state.paren_depth++; + // state.last_was_escape = false; + // break; + // case '[': + // state.bracket_depth++; + // state.last_was_escape = false; + // break; + // case '{': + // if (!state.last_was_escape) + // state.brace_depth++; + // state.last_was_escape = false; + // break; + // case ')': + // if (state.paren_depth == 0) + // state.done = true; + // state.paren_depth--; + // state.last_was_escape = false; + // break; + // case ']': + // if (state.bracket_depth == 0) + // state.done = true; + // state.bracket_depth--; + // state.last_was_escape = false; + // break; + // case '}': + // if (state.brace_depth == 0) + // state.done = true; + // state.brace_depth--; + // state.last_was_escape = false; + // break; + // case '\'': + // // Enter or exit a single-quoted string. + // state.in_single_quote = !state.in_single_quote; + // lexer->data.advance((void *)lexer, false); + // state.advanced_once = true; + // state.last_was_escape = false; + // continue; + // default: + // state.last_was_escape = false; + // break; + // } + // + // if (!state.done) + // { + // if (valid_symbols[REGEX]) + // { + // bool was_space = !state.in_single_quote && me_isspace(lexer->data.lookahead); + // lexer->data.advance((void *)lexer, false); + // state.advanced_once = true; + // if (!was_space || state.paren_depth > 0) + // { + // lexer->data.mark_end((void *)lexer); + // } + // } + // } + // } + // + // lexer->data.result_symbol = REGEX; + // if (valid_symbols[REGEX] && !state.advanced_once) + // { + // return false; + // } + // return true; + // } + // } + +extglob_pattern: + // if (valid_symbols[EXTGLOB_PATTERN] && !(valid_symbols[ERROR_RECOVERY])) + // { + // // first skip ws, then check for ? * + @ ! + // while (me_isspace(lexer->data.lookahead)) + // { + // lexer->data.advance((void *)lexer, true); + // } + // + // if (lexer->data.lookahead == '?' || lexer->data.lookahead == '*' || lexer->data.lookahead == '+' || lexer->data.lookahead == '@' || + // lexer->data.lookahead == '!' || lexer->data.lookahead == '-' || lexer->data.lookahead == ')' || lexer->data.lookahead == '\\' || + // lexer->data.lookahead == '.' || lexer->data.lookahead == '[' || (me_isalpha(lexer->data.lookahead))) + // { + // if (lexer->data.lookahead == '\\') + // { + // lexer->data.advance((void *)lexer, false); + // if ((me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '"') && lexer->data.lookahead != '\r' && + // lexer->data.lookahead != '\n') + // { + // lexer->data.advance((void *)lexer, false); + // } + // else + // { + // return false; + // } + // } + // + // if (lexer->data.lookahead == ')' && scanner->last_glob_paren_depth == 0) + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.advance((void *)lexer, false); + // + // if (me_isspace(lexer->data.lookahead)) + // { + // return false; + // } + // } + // + // lexer->data.mark_end((void *)lexer); + // bool was_non_alpha = !me_isalpha(lexer->data.lookahead); + // if (lexer->data.lookahead != '[') + // { + // // no esac + // if (lexer->data.lookahead == 'e') + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.advance((void *)lexer, false); + // if (lexer->data.lookahead == 's') + // { + // lexer->data.advance((void *)lexer, false); + // if (lexer->data.lookahead == 'a') + // { + // lexer->data.advance((void *)lexer, false); + // if (lexer->data.lookahead == 'c') + // { + // lexer->data.advance((void *)lexer, false); + // if (me_isspace(lexer->data.lookahead)) + // { + // return false; + // } + // } + // } + // } + // } + // else + // { + // lexer->data.advance((void *)lexer, false); + // } + // } + // + // // -\w is just a word, find something else special + // if (lexer->data.lookahead == '-') + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.advance((void *)lexer, false); + // while (me_isalnum(lexer->data.lookahead)) + // { + // lexer->data.advance((void *)lexer, false); + // } + // + // if (lexer->data.lookahead == ')' || lexer->data.lookahead == '\\' || lexer->data.lookahead == '.') + // { + // return false; + // } + // lexer->data.mark_end((void *)lexer); + // } + // + // // case item -) or *) + // if (lexer->data.lookahead == ')' && scanner->last_glob_paren_depth == 0) + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.advance((void *)lexer, false); + // if (me_isspace(lexer->data.lookahead)) + // { + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // return was_non_alpha; + // } + // } + // + // if (me_isspace(lexer->data.lookahead)) + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // scanner->last_glob_paren_depth = 0; + // return true; + // } + // + // if (lexer->data.lookahead == '$') + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.advance((void *)lexer, false); + // if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(') + // { + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // return true; + // } + // } + // + // if (lexer->data.lookahead == '|') + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.advance((void *)lexer, false); + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // return true; + // } + // + // if (!me_isalnum(lexer->data.lookahead) && lexer->data.lookahead != '(' && lexer->data.lookahead != '"' && lexer->data.lookahead + // != '[' && lexer->data.lookahead != '?' && lexer->data.lookahead != '/' && lexer->data.lookahead != '\\' && lexer->data.lookahead != + // '_' && lexer->data.lookahead != '*') + // { + // return false; + // } + // + // typedef struct + // { + // bool done; + // bool saw_non_alphadot; + // t_u32 paren_depth; + // t_u32 bracket_depth; + // t_u32 brace_depth; + // } State; + // + // State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0}; + // while (!state.done) + // { + // switch (lexer->data.lookahead) + // { + // case '\0': + // return false; + // case '(': + // state.paren_depth++; + // break; + // case '[': + // state.bracket_depth++; + // break; + // case '{': + // state.brace_depth++; + // break; + // case ')': + // if (state.paren_depth == 0) + // { + // state.done = true; + // } + // state.paren_depth--; + // break; + // case ']': + // if (state.bracket_depth == 0) + // { + // state.done = true; + // } + // state.bracket_depth--; + // break; + // case '}': + // if (state.brace_depth == 0) + // { + // state.done = true; + // } + // state.brace_depth--; + // break; + // } + // + // if (lexer->data.lookahead == '|') + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.advance((void *)lexer, false); + // if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0) + // { + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // return true; + // } + // } + // + // if (!state.done) + // { + // bool was_space = me_isspace(lexer->data.lookahead); + // if (lexer->data.lookahead == '$') + // { + // lexer->data.mark_end((void *)lexer); + // if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\') + // { + // state.saw_non_alphadot = true; + // } + // lexer->data.advance((void *)lexer, false); + // if (lexer->data.lookahead == '(' || lexer->data.lookahead == '{') + // { + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // scanner->last_glob_paren_depth = state.paren_depth; + // return state.saw_non_alphadot; + // } + // } + // if (was_space) + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // scanner->last_glob_paren_depth = 0; + // return state.saw_non_alphadot; + // } + // if (lexer->data.lookahead == '"') + // { + // lexer->data.mark_end((void *)lexer); + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // scanner->last_glob_paren_depth = 0; + // return state.saw_non_alphadot; + // } + // if (lexer->data.lookahead == '\\') + // { + // if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\') + // { + // state.saw_non_alphadot = true; + // } + // lexer->data.advance((void *)lexer, false); + // if (me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '"') + // { + // lexer->data.advance((void *)lexer, false); + // } + // } + // else + // { + // if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\') + // { + // state.saw_non_alphadot = true; + // } + // lexer->data.advance((void *)lexer, false); + // } + // if (!was_space) + // { + // lexer->data.mark_end((void *)lexer); + // } + // } + // } + // + // lexer->data.result_symbol = EXTGLOB_PATTERN; + // scanner->last_glob_paren_depth = 0; + // return state.saw_non_alphadot; + // } + // scanner->last_glob_paren_depth = 0; + // + // return false; + // } + +expansion_word: if (valid_symbols[EXPANSION_WORD]) - return (scan_expansion_word(scanner, lexer, valid_symbols)); - return (false); + { + bool advanced_once = false; + bool advance_once_space = false; + for (;;) + { + if (lexer->data.lookahead == '\"') + return false; + if (lexer->data.lookahead == '$') + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || + me_isalnum(lexer->data.lookahead)) + { + lexer->data.result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } + + if (lexer->data.lookahead == '}') + { + lexer->data.mark_end((void *)lexer); + lexer->data.result_symbol = EXPANSION_WORD; + return advanced_once || advance_once_space; + } + + if (lexer->data.lookahead == '(' && !(advanced_once || advance_once_space)) + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + while (lexer->data.lookahead != ')' && !lexer->data.eof((void *)lexer)) + { + // if we find a $( or ${ assume this is valid and is + // a garbage concatenation of some weird word + an + // expansion + // I wonder where this can fail + if (lexer->data.lookahead == '$') + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || + me_isalnum(lexer->data.lookahead)) + { + lexer->data.result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } + else + { + advanced_once = advanced_once || !me_isspace(lexer->data.lookahead); + advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead); + lexer->data.advance((void *)lexer, false); + } + } + lexer->data.mark_end((void *)lexer); + if (lexer->data.lookahead == ')') + { + advanced_once = true; + lexer->data.advance((void *)lexer, false); + lexer->data.mark_end((void *)lexer); + if (lexer->data.lookahead == '}') + return false; + } + else + return false; + } + + if (lexer->data.lookahead == '\'') + return false; + if (lexer->data.eof((void *)lexer)) + return false; + advanced_once = advanced_once || !me_isspace(lexer->data.lookahead); + advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead); + lexer->data.advance((void *)lexer, false); + } + } + +brace_start: + return false; } void *tree_sitter_sh_external_scanner_create() { t_scanner *scanner = mem_alloc(sizeof(t_scanner)); - array_init(&scanner->heredocs); - return scanner; + scanner->heredocs = vec_heredoc_new(0, NULL); + return (scanner); } bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, const bool *valid_symbols) @@ -705,12 +1152,12 @@ void tree_sitter_sh_external_scanner_deserialize(void *payload, const t_u8 *stat void tree_sitter_sh_external_scanner_destroy(void *payload) { t_scanner *scanner = (t_scanner *)payload; - for (size_t i = 0; i < scanner->heredocs.size; i++) + for (size_t i = 0; i < scanner->heredocs.len; i++) { - t_heredoc *heredoc = array_get(&scanner->heredocs, i); - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); + t_heredoc *heredoc = vec_heredoc_get(&scanner->heredocs, i); + string_free(heredoc->current_leading_word); + string_free(heredoc->delimiter); } - array_delete(&scanner->heredocs); + vec_heredoc_free(scanner->heredocs); mem_free(scanner); } diff --git a/stdme/Filelist.me.mk b/stdme/Filelist.me.mk index 7f234e90..03b7c42f 100644 --- a/stdme/Filelist.me.mk +++ b/stdme/Filelist.me.mk @@ -35,10 +35,10 @@ fs/fs_internal \ fs/getters \ fs/putfd \ gnl/get_next_line \ -hash/hasher \ hash/hash_signed \ hash/hash_str \ hash/hash_unsigned \ +hash/hasher \ hash/sip/sip13 \ hash/sip/sip_utils \ hash/sip/sip_utils2 \ @@ -86,10 +86,6 @@ printf/printf \ printf/printf_fd \ printf/printf_str \ printf/vprintf \ -string/mod \ -string/string_insert \ -string/string_remove \ -string/string_reserve \ str/str_clone \ str/str_compare \ str/str_find_chr \ @@ -106,6 +102,10 @@ str/str_n_find_str \ str/str_split \ str/str_substring \ str/str_trim \ +string/mod \ +string/string_insert \ +string/string_remove \ +string/string_reserve \ GEN_FILES = \ convert/i16_to_str \ diff --git a/stdme/generic_sources/src/vec/C__PREFIX__.c__TEMPLATE__ b/stdme/generic_sources/src/vec/C__PREFIX__.c__TEMPLATE__ index c0da1466..38ea5520 100644 --- a/stdme/generic_sources/src/vec/C__PREFIX__.c__TEMPLATE__ +++ b/stdme/generic_sources/src/vec/C__PREFIX__.c__TEMPLATE__ @@ -65,11 +65,9 @@ t_error vec_C__PREFIX___pop(t_vec_C__PREFIX__ *vec, C__TYPENAME__ *value) C__TYPENAME__ temp_value; C__TYPENAME__ *ptr; - if (vec == NULL) + if (vec == NULL || vec->len == 0) return (ERROR); ptr = value; - if (vec->len == 0) - return (ERROR); if (value == NULL) ptr = &temp_value; vec->len--; diff --git a/stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__ b/stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__ index 3d2abd7b..6350d4ed 100644 --- a/stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__ +++ b/stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__ @@ -17,9 +17,11 @@ C__TYPENAME__ *vec_C__PREFIX___get(t_vec_C__PREFIX__ *vec, t_usize i) { - if (vec == NULL || vec->len >= i) + if (vec == NULL || vec->buffer == NULL) return (NULL); - return (&vec->buffer[i]); + if (i < vec->len) + return (&vec->buffer[i]); + return (NULL); } C__TYPENAME__ *vec_C__PREFIX___last(t_vec_C__PREFIX__ *vec) diff --git a/test_heredoc.sh b/test_heredoc.sh index a403919d..180fa0f5 100755 --- a/test_heredoc.sh +++ b/test_heredoc.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash -make && valgrind --leak-check=full --show-leak-kinds=none --track-origins=yes --track-fds=yes --trace-children=yes --read-var-info=yes --read-inline-info=yes ./minishell <<<'cat <