diff --git a/parser/src/scanner/scanner.c b/parser/src/scanner/scanner.c index a865c910..754e301b 100644 --- a/parser/src/scanner/scanner.c +++ b/parser/src/scanner/scanner.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/10 15:41:11 by rparodi #+# #+# */ -/* Updated: 2024/09/14 13:09:27 by rparodi ### ########.fr */ +/* Updated: 2024/09/14 11:38:03 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -42,8 +42,8 @@ t_u32 serialize(t_scanner *scanner, t_u8 *buffer) while (i < scanner->heredocs.len) { heredoc = vec_heredoc_get(&scanner->heredocs, i); - if (heredoc->delimiter.len + 3 + size >= \ - TREE_SITTER_SERIALIZATION_BUFFER_SIZE) + if (heredoc->delimiter.len + 3 + + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return (0); buffer[size++] = (char)heredoc->is_raw; buffer[size++] = (char)heredoc->started; @@ -53,7 +53,7 @@ t_u32 serialize(t_scanner *scanner, t_u8 *buffer) size += sizeof(t_usize); if (heredoc->delimiter.len > 0) { - mem_copy(&buffer[size], heredoc->delimiter.buf, \ + mem_copy(&buffer[size], heredoc->delimiter.buf, heredoc->delimiter.len); size += heredoc->delimiter.len; } @@ -102,8 +102,8 @@ void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length) string_reserve(&heredoc->delimiter, heredoc->delimiter.len); if (heredoc->delimiter.len > 0) { - mem_copy(heredoc->delimiter.buf, &buffer[size], \ - heredoc->delimiter.len); + mem_copy(heredoc->delimiter.buf, &buffer[size], + heredoc->delimiter.len); size += heredoc->delimiter.len; } i++; @@ -132,9 +132,10 @@ bool advance_word(t_lexer *lexer, t_string *unquoted_word) quote = lexer->data.lookahead; lexer->data.advance((void *)lexer, false); } - while (lexer->data.lookahead && \ - !(quote ? lexer->data.lookahead == quote || lexer->data.lookahead == '\r' \ - || lexer->data.lookahead == '\n' : me_isspace(lexer->data.lookahead))) + while (lexer->data.lookahead && !((quote && (lexer->data.lookahead == quote + || lexer->data.lookahead == '\r' + || lexer->data.lookahead == '\n')) || (!quote + && me_isspace(lexer->data.lookahead)))) { if (lexer->data.lookahead == '\\') { @@ -154,16 +155,16 @@ bool advance_word(t_lexer *lexer, t_string *unquoted_word) bool scan_bare_dollar(t_lexer *lexer) { - while (me_isspace(lexer->data.lookahead) && \ - lexer->data.lookahead != '\n' && !lexer->data.eof((void *)lexer)) + while (me_isspace(lexer->data.lookahead) && lexer->data.lookahead != '\n' + && !lexer->data.eof((void *)lexer)) lexer->data.advance((void *)lexer, true); if (lexer->data.lookahead == '$') { lexer->data.advance((void *)lexer, false); lexer->data.result_symbol = BARE_DOLLAR; lexer->data.mark_end((void *)lexer); - return (me_isspace(lexer->data.lookahead) || \ - lexer->data.eof((void *)lexer) || lexer->data.lookahead == '\"'); + return (me_isspace(lexer->data.lookahead) + || lexer->data.eof((void *)lexer) || lexer->data.lookahead == '\"'); } return (false); } @@ -178,8 +179,8 @@ bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer) lexer->data.advance((void *)lexer, true); } lexer->data.result_symbol = HEREDOC_START; - heredoc->is_raw = lexer->data.lookahead == '\'' || \ - lexer->data.lookahead == '"' || lexer->data.lookahead == '\\'; + heredoc->is_raw = lexer->data.lookahead == '\'' + || lexer->data.lookahead == '"' || lexer->data.lookahead == '\\'; if (!found_delimiter) { string_clear(&heredoc->delimiter); @@ -198,136 +199,133 @@ bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer) string_clear(&heredoc->current_leading_word); if (heredoc->delimiter.len > 0) { - while (lexer->data.lookahead != '\0' \ - && lexer->data.lookahead != '\n' && \ - (t_i32) * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead && \ - heredoc->current_leading_word.len < heredoc->delimiter.len) + while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' + && (t_i32) + * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead + && heredoc->current_leading_word.len < heredoc->delimiter.len) { - string_push_char(&heredoc->current_leading_word, \ - lexer->data.lookahead); + string_push_char(&heredoc->current_leading_word, + lexer->data.lookahead); lexer->data.advance((void *)lexer, false); size++; } } string_push_char(&heredoc->current_leading_word, '\0'); - return (heredoc->delimiter.len == 0 ? false : str_compare(\ - heredoc->current_leading_word.buf, heredoc->delimiter.buf)); + if (heredoc->delimiter.len == 0) + return (false); + return (str_compare(heredoc->current_leading_word.buf, + heredoc->delimiter.buf)); } -bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, \ - enum e_token_type middle_type, enum e_token_type end_type) +bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, + enum e_token_type middle_type, enum e_token_type end_type) { - bool did_advance; + bool did_advance; t_heredoc *heredoc; did_advance = false; heredoc = vec_heredoc_last(&scanner->heredocs); - for (;;) + while (true) { - switch (lexer->data.lookahead) + if (lexer->data.lookahead == '\0') { - case '\0': { - if (lexer->data.eof((void *)lexer) && did_advance) - { - reset_heredoc(heredoc); - lexer->data.result_symbol = end_type; - return true; - } - return (false); + if (lexer->data.eof((void *)lexer) && did_advance) + { + reset_heredoc(heredoc); + lexer->data.result_symbol = end_type; + return (true); } - - case '\\': { + return (false); + } + else if (lexer->data.lookahead == '\\') + { + did_advance = true; + lexer->data.advance((void *)lexer, false); + lexer->data.advance((void *)lexer, false); + } + else if (lexer->data.lookahead == '$') + { + if (heredoc->is_raw) + { did_advance = true; lexer->data.advance((void *)lexer, false); - lexer->data.advance((void *)lexer, false); - break ; } - - case '$': { - if (heredoc->is_raw) - { - did_advance = true; - lexer->data.advance((void *)lexer, false); - break ; - } - if (did_advance) - { - lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = middle_type; - heredoc->started = true; - lexer->data.advance((void *)lexer, false); - if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '{' || lexer->data.lookahead == '(') - { - return (true); - } - break ; - } - if (middle_type == HEREDOC_BODY_BEGINNING && lexer->data.get_column((void *)lexer) == 0) - { - lexer->data.result_symbol = middle_type; - heredoc->started = true; - return (true); - } - return (false); - } - - case '\n': { - if (!did_advance) - lexer->data.advance((void *)lexer, true); - else - lexer->data.advance((void *)lexer, false); - did_advance = true; - if (heredoc->allows_indent) - { - while (me_isspace(lexer->data.lookahead)) - lexer->data.advance((void *)lexer, false); - } - lexer->data.result_symbol = heredoc->started ? middle_type : end_type; + if (did_advance) + { lexer->data.mark_end((void *)lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - if (lexer->data.result_symbol == HEREDOC_END) - vec_heredoc_pop(&scanner->heredocs, NULL); - return (true); - } - break ; - } - - default: { - if (lexer->data.get_column((void *)lexer) == 0) - { - // an alternative is to check the starting column of the - // heredoc body and track that statefully - while (me_isspace(lexer->data.lookahead)) - { - if (did_advance) - lexer->data.advance((void *)lexer, false); - else - lexer->data.advance((void *)lexer, true); - } - if (end_type != SIMPLE_HEREDOC_BODY) - { - lexer->data.result_symbol = middle_type; - if (scan_heredoc_end_identifier(heredoc, lexer)) - return (true); - } - if (end_type == SIMPLE_HEREDOC_BODY) - { - lexer->data.result_symbol = end_type; - lexer->data.mark_end((void *)lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - return (true); - } - } - did_advance = true; + lexer->data.result_symbol = middle_type; + heredoc->started = true; lexer->data.advance((void *)lexer, false); - break ; + if (me_isalpha(lexer->data.lookahead) + || lexer->data.lookahead == '{' + || lexer->data.lookahead == '(') + return (true); } + if (middle_type == HEREDOC_BODY_BEGINNING + && lexer->data.get_column((void *)lexer) == 0) + { + lexer->data.result_symbol = middle_type; + heredoc->started = true; + return (true); + } + return (false); + } + else if (lexer->data.lookahead == '\n') + { + if (!did_advance) + lexer->data.advance((void *)lexer, true); + else + lexer->data.advance((void *)lexer, false); + did_advance = true; + if (heredoc->allows_indent) + { + while (me_isspace(lexer->data.lookahead)) + lexer->data.advance((void *)lexer, false); + } + lexer->data.result_symbol = end_type; + if (heredoc->started) + lexer->data.result_symbol = middle_type; + lexer->data.mark_end((void *)lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) + { + if (lexer->data.result_symbol == HEREDOC_END) + vec_heredoc_pop(&scanner->heredocs, NULL); + return (true); + } + } + else + { + if (lexer->data.get_column((void *)lexer) == 0) + { + while (me_isspace(lexer->data.lookahead)) + { + if (did_advance) + lexer->data.advance((void *)lexer, false); + else + lexer->data.advance((void *)lexer, true); + } + if (end_type != SIMPLE_HEREDOC_BODY) + { + lexer->data.result_symbol = middle_type; + if (scan_heredoc_end_identifier(heredoc, lexer)) + return (true); + } + if (end_type == SIMPLE_HEREDOC_BODY) + { + lexer->data.result_symbol = end_type; + lexer->data.mark_end((void *)lexer); + if (scan_heredoc_end_identifier(heredoc, lexer)) + return (true); + } + } + did_advance = true; + lexer->data.advance((void *)lexer, false); } } } -bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) +bool scan_concat(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols) { (void)(scanner); (void)(lexer); @@ -337,7 +335,8 @@ bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) { lexer->data.mark_end((void *)lexer); lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\') + if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' + || lexer->data.lookahead == '\\') return (true); if (lexer->data.eof((void *)lexer)) return (false); @@ -345,12 +344,14 @@ bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) return (true); } -bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) +bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols) { (void)(scanner); (void)(lexer); (void)(valid_symbols); - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY])) + if (valid_symbols[IMMEDIATE_DOUBLE_HASH] + && !(valid_symbols[ERROR_RECOVERY])) { if (lexer->data.lookahead == '#') { @@ -371,7 +372,8 @@ bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, const bool *valid_symb return (false); } -bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) +bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols) { t_heredoc *heredoc; @@ -390,242 +392,271 @@ bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symb return (false); } -bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) +bool scan_advance_words(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols) { - if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]) && - (!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' || - lexer->data.lookahead == '<' || lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || lexer->data.lookahead == ';' || - lexer->data.lookahead == '&' || lexer->data.lookahead == '|' || lexer->data.lookahead == '{' || lexer->data.lookahead == '}'))) - return (scan_concat(scanner, lexer, valid_symbols)); - if (scan_double_hash(scanner, lexer, valid_symbols)) - return (true); - if (valid_symbols[EMPTY_VALUE] && (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) || - lexer->data.lookahead == ';' || lexer->data.lookahead == '&')) - return (lexer->data.result_symbol = EMPTY_VALUE, true); - if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 && - !vec_heredoc_last(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY])) - return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY)); - if (scan_heredoc_end(scanner, lexer, valid_symbols)) - return (true); - if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started && - !(valid_symbols[ERROR_RECOVERY])) - return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END)); - if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.len > 0) - return (scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer)); - if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && - !(valid_symbols[ERROR_RECOVERY])) + bool advanced_once; + bool advance_once_space; + + advanced_once = false; + advance_once_space = false; + (void)(scanner); + (void)(lexer); + (void)(valid_symbols); + while (true) { - for (;;) - { - if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' || - (lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) && - !valid_symbols[EXPANSION_WORD]) - lexer->data.advance((void *)lexer, true); - else if (lexer->data.lookahead == '\\') - { - lexer->data.advance((void *)lexer, true); - if (lexer->data.eof((void *)lexer)) - { - lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = VARIABLE_NAME; - return (true); - } - if (lexer->data.lookahead == '\r') - lexer->data.advance((void *)lexer, true); - if (lexer->data.lookahead == '\n') - lexer->data.advance((void *)lexer, true); - else - { - if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD]) - goto expansion_word; - return (false); - } - } - else - break ; - } - if (!valid_symbols[EXPANSION_WORD] && - (lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' || lexer->data.lookahead == '-' || - lexer->data.lookahead == '0' || lexer->data.lookahead == '_')) + if (lexer->data.lookahead == '\"') + return (false); + if (lexer->data.lookahead == '$') { lexer->data.mark_end((void *)lexer); lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' || - lexer->data.lookahead == '-' || lexer->data.lookahead == '%' || lexer->data.lookahead == '#' || - lexer->data.lookahead == '/') + if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' + || lexer->data.lookahead == '\'' + || me_isalnum(lexer->data.lookahead)) + { + lexer->data.result_symbol = EXPANSION_WORD; + return (advanced_once); + } + advanced_once = true; + } + if (lexer->data.lookahead == '}') + { + lexer->data.mark_end((void *)lexer); + lexer->data.result_symbol = EXPANSION_WORD; + return (advanced_once || advance_once_space); + } + if (lexer->data.lookahead == '(' && !(advanced_once + || advance_once_space)) + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + while (lexer->data.lookahead != ')' + && !lexer->data.eof((void *)lexer)) + { + if (lexer->data.lookahead == '$') + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '{' + || lexer->data.lookahead == '(' + || lexer->data.lookahead == '\'' + || me_isalnum(lexer->data.lookahead)) + return (lexer->data.result_symbol = EXPANSION_WORD, + advanced_once); + advanced_once = true; + } + else + { + advanced_once = advanced_once + || !me_isspace(lexer->data.lookahead); + advance_once_space = advance_once_space + || me_isspace(lexer->data.lookahead); + lexer->data.advance((void *)lexer, false); + } + } + lexer->data.mark_end((void *)lexer); + if (lexer->data.lookahead == ')') + { + advanced_once = true; + lexer->data.advance((void *)lexer, false); + lexer->data.mark_end((void *)lexer); + if (lexer->data.lookahead == '}') + return (false); + } + else return (false); - if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead)) + } + if (lexer->data.lookahead == '\'') + return (false); + if (lexer->data.eof((void *)lexer)) + return (false); + advanced_once = advanced_once || !me_isspace(lexer->data.lookahead); + advance_once_space = advance_once_space + || me_isspace(lexer->data.lookahead); + lexer->data.advance((void *)lexer, false); + } + return (false); +} + +bool scan_literals(t_scanner *scanner, t_lexer *lexer, + const bool *valid_symbols) +{ + bool is_number; + + while (true) + { + if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' + || lexer->data.lookahead == '\r' + || (lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) + && !valid_symbols[EXPANSION_WORD]) + lexer->data.advance((void *)lexer, true); + else if (lexer->data.lookahead == '\\') + { + lexer->data.advance((void *)lexer, true); + if (lexer->data.eof((void *)lexer)) { lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = EXTGLOB_PATTERN; + lexer->data.result_symbol = VARIABLE_NAME; return (true); } + if (lexer->data.lookahead == '\r') + lexer->data.advance((void *)lexer, true); + if (lexer->data.lookahead == '\n') + lexer->data.advance((void *)lexer, true); + else + { + if (lexer->data.lookahead == '\\' + && valid_symbols[EXPANSION_WORD]) + return (scan_advance_words(scanner, lexer, valid_symbols)); + return (false); + } } - - if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<') + else + break ; + } + if (!valid_symbols[EXPANSION_WORD] && (lexer->data.lookahead == '*' + || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' + || lexer->data.lookahead == '-' || lexer->data.lookahead == '0' + || lexer->data.lookahead == '_')) + { + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' + || lexer->data.lookahead == ':' || lexer->data.lookahead == '-' + || lexer->data.lookahead == '%' || lexer->data.lookahead == '#' + || lexer->data.lookahead == '/') + return (false); + if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead)) + { + lexer->data.mark_end((void *)lexer); + lexer->data.result_symbol = EXTGLOB_PATTERN; + return (true); + } + } + if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<') + { + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '<') { lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '<') - { - lexer->data.advance((void *)lexer, false); - t_heredoc heredoc = heredoc_new(); - vec_heredoc_push(&scanner->heredocs, heredoc); - lexer->data.result_symbol = HEREDOC_ARROW; - return (true); - } - return (false); + vec_heredoc_push(&scanner->heredocs, heredoc_new()); + lexer->data.result_symbol = HEREDOC_ARROW; + return (true); } - - bool is_number = true; + return (false); + } + is_number = true; + if (me_isdigit(lexer->data.lookahead)) + lexer->data.advance((void *)lexer, false); + else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_') + { + is_number = false; + lexer->data.advance((void *)lexer, false); + } + else + { + if (lexer->data.lookahead == '{') + return (false); + if (valid_symbols[EXPANSION_WORD]) + return (scan_advance_words(scanner, lexer, valid_symbols)); + return (false); + } + while (true) + { if (me_isdigit(lexer->data.lookahead)) lexer->data.advance((void *)lexer, false); - else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_') + else if (me_isalpha(lexer->data.lookahead) + || lexer->data.lookahead == '_') { is_number = false; lexer->data.advance((void *)lexer, false); } else + break ; + } + if (is_number && valid_symbols[FILE_DESCRIPTOR] + && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<')) + return (lexer->data.result_symbol = FILE_DESCRIPTOR, true); + if (valid_symbols[VARIABLE_NAME]) + { + if (lexer->data.lookahead == '+') { - if (lexer->data.lookahead == '{') - return (false); - if (valid_symbols[EXPANSION_WORD]) - goto expansion_word; + lexer->data.mark_end((void *)lexer); + lexer->data.advance((void *)lexer, false); + if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':') + return (lexer->data.result_symbol = VARIABLE_NAME, true); return (false); } - - for (;;) + if (lexer->data.lookahead == '/') + return (false); + if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' + || (lexer->data.lookahead == ':' && !valid_symbols[OPENING_PAREN]) + || lexer->data.lookahead == '%' || (lexer->data.lookahead == '#' + && !is_number) || lexer->data.lookahead == '@' + || (lexer->data.lookahead == '-')) { - if (me_isdigit(lexer->data.lookahead)) - lexer->data.advance((void *)lexer, false); - else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_') - { - is_number = false; - lexer->data.advance((void *)lexer, false); - } - else - break ; + lexer->data.mark_end((void *)lexer); + lexer->data.result_symbol = VARIABLE_NAME; + return (true); } - - if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<')) - return (lexer->data.result_symbol = FILE_DESCRIPTOR, true); - if (valid_symbols[VARIABLE_NAME]) + if (lexer->data.lookahead == '?') { - if (lexer->data.lookahead == '+') - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':') - return (lexer->data.result_symbol = VARIABLE_NAME, true); - return (false); - } - if (lexer->data.lookahead == '/') - return (false); - if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || - (lexer->data.lookahead == ':' && !valid_symbols[OPENING_PAREN]) || lexer->data.lookahead == '%' || - (lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-')) - { - lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = VARIABLE_NAME; - return (true); - } - if (lexer->data.lookahead == '?') - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - lexer->data.result_symbol = VARIABLE_NAME; - return me_isalpha(lexer->data.lookahead); - } - } - return (false); - } - - if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) && scan_bare_dollar(lexer)) - return (true); - -expansion_word: - if (valid_symbols[EXPANSION_WORD]) - { - bool advanced_once = false; - bool advance_once_space = false; - for (;;) - { - if (lexer->data.lookahead == '\"') - return (false); - if (lexer->data.lookahead == '$') - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || - me_isalnum(lexer->data.lookahead)) - { - lexer->data.result_symbol = EXPANSION_WORD; - return (advanced_once); - } - advanced_once = true; - } - - if (lexer->data.lookahead == '}') - { - lexer->data.mark_end((void *)lexer); - lexer->data.result_symbol = EXPANSION_WORD; - return (advanced_once || advance_once_space); - } - - if (lexer->data.lookahead == '(' && !(advanced_once || advance_once_space)) - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - while (lexer->data.lookahead != ')' && !lexer->data.eof((void *)lexer)) - { - // if we find a $( or ${ assume this is valid and is - // a garbage concatenation of some weird word + an - // expansion - // I wonder where this can fail - if (lexer->data.lookahead == '$') - { - lexer->data.mark_end((void *)lexer); - lexer->data.advance((void *)lexer, false); - if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || - me_isalnum(lexer->data.lookahead)) - { - lexer->data.result_symbol = EXPANSION_WORD; - return (advanced_once); - } - advanced_once = true; - } - else - { - advanced_once = advanced_once || !me_isspace(lexer->data.lookahead); - advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead); - lexer->data.advance((void *)lexer, false); - } - } - lexer->data.mark_end((void *)lexer); - if (lexer->data.lookahead == ')') - { - advanced_once = true; - lexer->data.advance((void *)lexer, false); - lexer->data.mark_end((void *)lexer); - if (lexer->data.lookahead == '}') - return (false); - } - else - return (false); - } - - if (lexer->data.lookahead == '\'') - return (false); - if (lexer->data.eof((void *)lexer)) - return (false); - advanced_once = advanced_once || !me_isspace(lexer->data.lookahead); - advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead); + lexer->data.mark_end((void *)lexer); lexer->data.advance((void *)lexer, false); + lexer->data.result_symbol = VARIABLE_NAME; + return (me_isalpha(lexer->data.lookahead)); } } return (false); } -void *tree_sitter_sh_external_scanner_create() +bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) +{ + if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]) + && (!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) + || lexer->data.lookahead == '>' || lexer->data.lookahead == '<' + || lexer->data.lookahead == ')' || lexer->data.lookahead == '(' + || lexer->data.lookahead == ';' || lexer->data.lookahead == '&' + || lexer->data.lookahead == '|' || lexer->data.lookahead == '{' + || lexer->data.lookahead == '}'))) + return (scan_concat(scanner, lexer, valid_symbols)); + if (scan_double_hash(scanner, lexer, valid_symbols)) + return (true); + if (valid_symbols[EMPTY_VALUE] && (me_isspace(lexer->data.lookahead) + || lexer->data.eof((void *)lexer) || lexer->data.lookahead == ';' + || lexer->data.lookahead == '&')) + return (lexer->data.result_symbol = EMPTY_VALUE, true); + if ((valid_symbols[HEREDOC_BODY_BEGINNING] + || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 + && !vec_heredoc_last(&scanner->heredocs)->started + && !(valid_symbols[ERROR_RECOVERY])) + return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, + SIMPLE_HEREDOC_BODY)); + if (scan_heredoc_end(scanner, lexer, valid_symbols)) + return (true); + if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 + && vec_heredoc_last(&scanner->heredocs)->started + && !(valid_symbols[ERROR_RECOVERY])) + return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, + HEREDOC_END)); + if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) + && scanner->heredocs.len > 0) + return (scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), + lexer)); + if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] + || valid_symbols[HEREDOC_ARROW]) + && !(valid_symbols[ERROR_RECOVERY])) + return (scan_literals(scanner, lexer, valid_symbols)); + if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) + && scan_bare_dollar(lexer)) + return (true); + if (valid_symbols[EXPANSION_WORD]) + return (scan_advance_words(scanner, lexer, valid_symbols)); + return (false); +} + +void *tree_sitter_sh_external_scanner_create(void) { t_scanner *scanner; @@ -634,7 +665,8 @@ void *tree_sitter_sh_external_scanner_create() return (scanner); } -bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, const bool *valid_symbols) +bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, + const bool *valid_symbols) { t_scanner *scanner; @@ -650,7 +682,8 @@ t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state) return (serialize(scanner, state)); } -void tree_sitter_sh_external_scanner_deserialize(void *payload, const t_u8 *state, t_u32 length) +void tree_sitter_sh_external_scanner_deserialize(void *payload, + const t_u8 *state, t_u32 length) { t_scanner *scanner; diff --git a/test_word.sh b/test_word.sh new file mode 100755 index 00000000..5cebb6e8 --- /dev/null +++ b/test_word.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +make && valgrind --leak-check=full --show-leak-kinds=none --track-origins=yes --track-fds=no --trace-children=yes --read-var-info=yes --read-inline-info=yes ./minishell <<<'/usr/bin/env echo this\ is\ a\ concat" $PATH"'"'jklfsjhklgfd'"