diff --git a/dumper.c b/.dumper.c similarity index 100% rename from dumper.c rename to .dumper.c diff --git a/parser.c b/.parser.c similarity index 100% rename from parser.c rename to .parser.c diff --git a/tree-sitter-sh/src/scanner.c b/tree-sitter-sh/src/scanner.c deleted file mode 100644 index cf4998eb..00000000 --- a/tree-sitter-sh/src/scanner.c +++ /dev/null @@ -1,1245 +0,0 @@ -#include "tree_sitter/array.h" -#include "tree_sitter/parser.h" -/* -#include "array.h" -#include "parser.h" -*/ -#include -#include -#include -#include - -enum TokenType -{ - HEREDOC_START, - SIMPLE_HEREDOC_BODY, - HEREDOC_BODY_BEGINNING, - HEREDOC_CONTENT, - HEREDOC_END, - FILE_DESCRIPTOR, - EMPTY_VALUE, - CONCAT, - VARIABLE_NAME, - REGEX, - EXPANSION_WORD, - EXTGLOB_PATTERN, - BARE_DOLLAR, - IMMEDIATE_DOUBLE_HASH, - HEREDOC_ARROW, - HEREDOC_ARROW_DASH, - NEWLINE, - OPENING_PAREN, - ESAC, - ERROR_RECOVERY, -}; -// enum TokenType { -// HEREDOC_START, -// SIMPLE_HEREDOC_BODY, -// HEREDOC_BODY_BEGINNING, -// HEREDOC_CONTENT, -// HEREDOC_END, -// FILE_DESCRIPTOR, -// EMPTY_VALUE, -// CONCAT, -// VARIABLE_NAME, -// TEST_OPERATOR, -// REGEX, -// REGEX_NO_SLASH, -// REGEX_NO_SPACE, -// EXPANSION_WORD, -// EXTGLOB_PATTERN, -// BARE_DOLLAR, -// BRACE_START, -// IMMEDIATE_DOUBLE_HASH, -// EXTERNAL_EXPANSION_SYM_HASH, -// EXTERNAL_EXPANSION_SYM_BANG, -// EXTERNAL_EXPANSION_SYM_EQUAL, -// CLOSING_BRACE, -// CLOSING_BRACKET, -// HEREDOC_ARROW, -// HEREDOC_ARROW_DASH, -// NEWLINE, -// OPENING_PAREN, -// ESAC, -// ERROR_RECOVERY, -// }; - -typedef Array(char) String; - -typedef struct -{ - bool is_raw; - bool started; - bool allows_indent; - String delimiter; - String current_leading_word; -} Heredoc; - -#define heredoc_new() \ - { \ - .is_raw = false, \ - .started = false, \ - .allows_indent = false, \ - .delimiter = array_new(), \ - .current_leading_word = array_new(), \ - }; - -typedef struct -{ - uint8_t last_glob_paren_depth; - bool ext_was_in_double_quote; - bool ext_saw_outside_quote; - Array(Heredoc) heredocs; -} Scanner; - -static inline void advance(TSLexer *lexer) -{ - lexer->advance(lexer, false); -} - -static inline void skip(TSLexer *lexer) -{ - lexer->advance(lexer, true); -} - -static inline bool in_error_recovery(const bool *valid_symbols) -{ - return valid_symbols[ERROR_RECOVERY]; -} - -static inline void reset_string(String *string) -{ - if (string->size > 0) - { - memset(string->contents, 0, string->size); - array_clear(string); - } -} - -static inline void reset_heredoc(Heredoc *heredoc) -{ - heredoc->is_raw = false; - heredoc->started = false; - heredoc->allows_indent = false; - reset_string(&heredoc->delimiter); -} - -static inline void reset(Scanner *scanner) -{ - for (uint32_t i = 0; i < scanner->heredocs.size; i++) - { - reset_heredoc(array_get(&scanner->heredocs, i)); - } -} - -static unsigned serialize(Scanner *scanner, char *buffer) -{ - uint32_t size = 0; - - buffer[size++] = (char)scanner->last_glob_paren_depth; - buffer[size++] = (char)scanner->ext_was_in_double_quote; - buffer[size++] = (char)scanner->ext_saw_outside_quote; - buffer[size++] = (char)scanner->heredocs.size; - - for (uint32_t i = 0; i < scanner->heredocs.size; i++) - { - Heredoc *heredoc = array_get(&scanner->heredocs, i); - if (heredoc->delimiter.size + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) - { - return 0; - } - - buffer[size++] = (char)heredoc->is_raw; - buffer[size++] = (char)heredoc->started; - buffer[size++] = (char)heredoc->allows_indent; - - memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(uint32_t)); - size += sizeof(uint32_t); - if (heredoc->delimiter.size > 0) - { - memcpy(&buffer[size], heredoc->delimiter.contents, heredoc->delimiter.size); - size += heredoc->delimiter.size; - } - } - return size; -} - -static void deserialize(Scanner *scanner, const char *buffer, unsigned length) -{ - if (length == 0) - { - reset(scanner); - } - else - { - uint32_t size = 0; - scanner->last_glob_paren_depth = buffer[size++]; - scanner->ext_was_in_double_quote = buffer[size++]; - scanner->ext_saw_outside_quote = buffer[size++]; - uint32_t heredoc_count = (unsigned char)buffer[size++]; - for (uint32_t i = 0; i < heredoc_count; i++) - { - Heredoc *heredoc = NULL; - if (i < scanner->heredocs.size) - { - heredoc = array_get(&scanner->heredocs, i); - } - else - { - Heredoc new_heredoc = heredoc_new(); - array_push(&scanner->heredocs, new_heredoc); - heredoc = array_back(&scanner->heredocs); - } - - heredoc->is_raw = buffer[size++]; - heredoc->started = buffer[size++]; - heredoc->allows_indent = buffer[size++]; - - memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t)); - size += sizeof(uint32_t); - array_reserve(&heredoc->delimiter, heredoc->delimiter.size); - - if (heredoc->delimiter.size > 0) - { - memcpy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size); - size += heredoc->delimiter.size; - } - } - assert(size == length); - } -} - -/** - * Consume a "word" in POSIX parlance, and returns it unquoted. - * - * This is an approximate implementation that doesn't deal with any - * POSIX-mandated substitution, and assumes the default value for - * IFS. - */ -static bool advance_word(TSLexer *lexer, String *unquoted_word) -{ - bool empty = true; - int32_t quote = 0; - - if (lexer->lookahead == '\'' || lexer->lookahead == '"') - { - quote = lexer->lookahead; - advance(lexer); - } - - while (lexer->lookahead && - !(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n' : iswspace(lexer->lookahead))) - { - if (lexer->lookahead == '\\') - { - advance(lexer); - if (!lexer->lookahead) - return false; - } - empty = false; - array_push(unquoted_word, lexer->lookahead); - advance(lexer); - } - array_push(unquoted_word, '\0'); - - if (quote && lexer->lookahead == quote) - advance(lexer); - - return !empty; -} - -static inline bool scan_bare_dollar(TSLexer *lexer) -{ - while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer)) - skip(lexer); - - - if (lexer->lookahead == '$') - { - advance(lexer); - lexer->result_symbol = BARE_DOLLAR; - lexer->mark_end(lexer); - return (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"'); - } - - return false; -} - -static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer) -{ - while (iswspace(lexer->lookahead)) - { - skip(lexer); - } - - lexer->result_symbol = HEREDOC_START; - heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\'; - - bool found_delimiter = advance_word(lexer, &heredoc->delimiter); - if (!found_delimiter) - { - reset_string(&heredoc->delimiter); - return false; - } - return found_delimiter; -} - -static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer) -{ - reset_string(&heredoc->current_leading_word); - // Scan the first 'n' characters on this line, to see if they match the - // heredoc delimiter - int32_t size = 0; - if (heredoc->delimiter.size > 0) - { - while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (int32_t)*array_get(&heredoc->delimiter, size) == lexer->lookahead && - heredoc->current_leading_word.size < heredoc->delimiter.size) - { - array_push(&heredoc->current_leading_word, lexer->lookahead); - advance(lexer); - size++; - } - } - array_push(&heredoc->current_leading_word, '\0'); - return heredoc->delimiter.size == 0 ? false : strcmp(heredoc->current_leading_word.contents, heredoc->delimiter.contents) == 0; -} - -static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, enum TokenType middle_type, enum TokenType end_type) -{ - bool did_advance = false; - Heredoc *heredoc = array_back(&scanner->heredocs); - - for (;;) - { - switch (lexer->lookahead) - { - case '\0': { - if (lexer->eof(lexer) && did_advance) - { - reset_heredoc(heredoc); - lexer->result_symbol = end_type; - return true; - } - return false; - } - - case '\\': { - did_advance = true; - advance(lexer); - advance(lexer); - break; - } - - case '$': { - if (heredoc->is_raw) - { - did_advance = true; - advance(lexer); - break; - } - if (did_advance) - { - lexer->mark_end(lexer); - lexer->result_symbol = middle_type; - heredoc->started = true; - advance(lexer); - if (iswalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(') - { - return true; - } - break; - } - if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0) - { - lexer->result_symbol = middle_type; - heredoc->started = true; - return true; - } - return false; - } - - case '\n': { - if (!did_advance) - { - skip(lexer); - } - else - { - advance(lexer); - } - did_advance = true; - if (heredoc->allows_indent) - { - while (iswspace(lexer->lookahead)) - { - advance(lexer); - } - } - lexer->result_symbol = heredoc->started ? middle_type : end_type; - lexer->mark_end(lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - if (lexer->result_symbol == HEREDOC_END) - { - (void)array_pop(&scanner->heredocs); - } - return true; - } - break; - } - - default: { - if (lexer->get_column(lexer) == 0) - { - // an alternative is to check the starting column of the - // heredoc body and track that statefully - while (iswspace(lexer->lookahead)) - { - if (did_advance) - { - advance(lexer); - } - else - { - skip(lexer); - } - } - if (end_type != SIMPLE_HEREDOC_BODY) - { - lexer->result_symbol = middle_type; - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - return true; - } - } - if (end_type == SIMPLE_HEREDOC_BODY) - { - lexer->result_symbol = end_type; - lexer->mark_end(lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - return true; - } - } - } - did_advance = true; - advance(lexer); - break; - } - } - } -} - -static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) -{ - if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) - { - if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' || - lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' || - lexer->lookahead == '|')) - { - lexer->result_symbol = CONCAT; - // So for a`b`, we want to return a concat. We check if the - // 2nd backtick has whitespace after it, and if it does we - // return concat. - if (lexer->lookahead == '`') - { - lexer->mark_end(lexer); - advance(lexer); - while (lexer->lookahead != '`' && !lexer->eof(lexer)) - { - advance(lexer); - } - if (lexer->eof(lexer)) - { - return false; - } - if (lexer->lookahead == '`') - { - advance(lexer); - } - return iswspace(lexer->lookahead) || lexer->eof(lexer); - } - // strings w/ expansions that contains escaped quotes or - // backslashes need this to return a concat - if (lexer->lookahead == '\\') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\') - { - return true; - } - if (lexer->eof(lexer)) - { - return false; - } - } - else - { - return true; - } - } - } - - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols)) - { - // advance two # and ensure not } after - if (lexer->lookahead == '#') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '#') - { - advance(lexer); - if (lexer->lookahead != '}') - { - lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; - lexer->mark_end(lexer); - return true; - } - } - } - } - - if (valid_symbols[EMPTY_VALUE]) - { - if (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&') - { - lexer->result_symbol = EMPTY_VALUE; - return true; - } - } - - if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 && - !array_back(&scanner->heredocs)->started && !in_error_recovery(valid_symbols)) - { - return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY); - } - - if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0) - { - Heredoc *heredoc = array_back(&scanner->heredocs); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); - (void)array_pop(&scanner->heredocs); - lexer->result_symbol = HEREDOC_END; - return true; - } - } - - if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started && - !in_error_recovery(valid_symbols)) - { - return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END); - } - - if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && scanner->heredocs.size > 0) - { - return scan_heredoc_start(array_back(&scanner->heredocs), lexer); - } - - if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && - !in_error_recovery(valid_symbols)) - { - for (;;) - { - if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || - (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && - !valid_symbols[EXPANSION_WORD]) - { - skip(lexer); - } - else if (lexer->lookahead == '\\') - { - skip(lexer); - - if (lexer->eof(lexer)) - { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return true; - } - - if (lexer->lookahead == '\r') - { - skip(lexer); - } - if (lexer->lookahead == '\n') - { - skip(lexer); - } - else - { - if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD]) - { - goto expansion_word; - } - return false; - } - } - else - { - break; - } - } - - // no '*', '@', '?', '-', '$', '0', '_' - if (!valid_symbols[EXPANSION_WORD] && (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || - lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_')) - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' || - lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/') - { - return false; - } - if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead)) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') - { - advance(lexer); - if (lexer->lookahead == '<') - { - advance(lexer); - if (lexer->lookahead == '-') - { - advance(lexer); - Heredoc heredoc = heredoc_new(); - heredoc.allows_indent = true; - array_push(&scanner->heredocs, heredoc); - lexer->result_symbol = HEREDOC_ARROW_DASH; - } - else if (lexer->lookahead == '<' || lexer->lookahead == '=') - { - return false; - } - else - { - Heredoc heredoc = heredoc_new(); - array_push(&scanner->heredocs, heredoc); - lexer->result_symbol = HEREDOC_ARROW; - } - return true; - } - return false; - } - - bool is_number = true; - if (iswdigit(lexer->lookahead)) - { - advance(lexer); - } - else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') - { - is_number = false; - advance(lexer); - } - else - { - if (lexer->lookahead == '{') - { - goto brace_start; - } - if (valid_symbols[EXPANSION_WORD]) - { - goto expansion_word; - } - if (valid_symbols[EXTGLOB_PATTERN]) - { - goto extglob_pattern; - } - return false; - } - - for (;;) - { - if (iswdigit(lexer->lookahead)) - { - advance(lexer); - } - else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') - { - is_number = false; - advance(lexer); - } - else - { - break; - } - } - - if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<')) - { - lexer->result_symbol = FILE_DESCRIPTOR; - return true; - } - - if (valid_symbols[VARIABLE_NAME]) - { - if (lexer->lookahead == '+') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '=' || lexer->lookahead == ':') - { - lexer->result_symbol = VARIABLE_NAME; - return true; - } - return false; - } - if (lexer->lookahead == '/') - { - return false; - } - if (lexer->lookahead == '=' || lexer->lookahead == '[' || - (lexer->lookahead == ':' && - !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable - // names for function words, only handling : for now? #235 - lexer->lookahead == '%' || - (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-')) - { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return true; - } - - if (lexer->lookahead == '?') - { - lexer->mark_end(lexer); - advance(lexer); - lexer->result_symbol = VARIABLE_NAME; - return iswalpha(lexer->lookahead); - } - } - - return false; - } - - if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) - { - return true; - } - - if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols)) - { - if (valid_symbols[REGEX]) - { - while (iswspace(lexer->lookahead)) - { - skip(lexer); - } - } - - if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || ((lexer->lookahead == '$' || lexer->lookahead == '\'')) || - (lexer->lookahead == '\'')) - { - typedef struct - { - bool done; - bool advanced_once; - bool found_non_alnumdollarunderdash; - bool last_was_escape; - bool in_single_quote; - uint32_t paren_depth; - uint32_t bracket_depth; - uint32_t brace_depth; - } State; - - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '(') - { - return false; - } - } - - lexer->mark_end(lexer); - - State state = {false, false, false, false, false, 0, 0, 0}; - while (!state.done) - { - if (state.in_single_quote) - { - if (lexer->lookahead == '\'') - { - state.in_single_quote = false; - advance(lexer); - lexer->mark_end(lexer); - } - } - switch (lexer->lookahead) - { - case '\\': - state.last_was_escape = true; - break; - case '\0': - return false; - case '(': - state.paren_depth++; - state.last_was_escape = false; - break; - case '[': - state.bracket_depth++; - state.last_was_escape = false; - break; - case '{': - if (!state.last_was_escape) - state.brace_depth++; - state.last_was_escape = false; - break; - case ')': - if (state.paren_depth == 0) - state.done = true; - state.paren_depth--; - state.last_was_escape = false; - break; - case ']': - if (state.bracket_depth == 0) - state.done = true; - state.bracket_depth--; - state.last_was_escape = false; - break; - case '}': - if (state.brace_depth == 0) - state.done = true; - state.brace_depth--; - state.last_was_escape = false; - break; - case '\'': - // Enter or exit a single-quoted string. - state.in_single_quote = !state.in_single_quote; - advance(lexer); - state.advanced_once = true; - state.last_was_escape = false; - continue; - default: - state.last_was_escape = false; - break; - } - - if (!state.done) - { - if (valid_symbols[REGEX]) - { - bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); - advance(lexer); - state.advanced_once = true; - if (!was_space || state.paren_depth > 0) - { - lexer->mark_end(lexer); - } - } - } - } - - lexer->result_symbol = REGEX; - if (valid_symbols[REGEX] && !state.advanced_once) - { - return false; - } - return true; - } - } - -extglob_pattern: - if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) - { - // first skip ws, then check for ? * + @ ! - while (iswspace(lexer->lookahead)) - { - skip(lexer); - } - - if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' || - lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' || - lexer->lookahead == '.' || lexer->lookahead == '[' || (iswalpha(lexer->lookahead))) - { - if (lexer->lookahead == '\\') - { - advance(lexer); - if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n') - { - advance(lexer); - } - else - { - return false; - } - } - - if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) - { - lexer->mark_end(lexer); - advance(lexer); - - if (iswspace(lexer->lookahead)) - { - return false; - } - } - - lexer->mark_end(lexer); - bool was_non_alpha = !iswalpha(lexer->lookahead); - if (lexer->lookahead != '[') - { - // no esac - if (lexer->lookahead == 'e') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == 's') - { - advance(lexer); - if (lexer->lookahead == 'a') - { - advance(lexer); - if (lexer->lookahead == 'c') - { - advance(lexer); - if (iswspace(lexer->lookahead)) - { - return false; - } - } - } - } - } - else - { - advance(lexer); - } - } - - // -\w is just a word, find something else special - if (lexer->lookahead == '-') - { - lexer->mark_end(lexer); - advance(lexer); - while (iswalnum(lexer->lookahead)) - { - advance(lexer); - } - - if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.') - { - return false; - } - lexer->mark_end(lexer); - } - - // case item -) or *) - if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) - { - lexer->mark_end(lexer); - advance(lexer); - if (iswspace(lexer->lookahead)) - { - lexer->result_symbol = EXTGLOB_PATTERN; - return was_non_alpha; - } - } - - if (iswspace(lexer->lookahead)) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return true; - } - - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(') - { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (lexer->lookahead == '|') - { - lexer->mark_end(lexer); - advance(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - - if (!iswalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && lexer->lookahead != '[' && - lexer->lookahead != '?' && lexer->lookahead != '/' && lexer->lookahead != '\\' && lexer->lookahead != '_' && - lexer->lookahead != '*') - { - return false; - } - - typedef struct - { - bool done; - bool saw_non_alphadot; - uint32_t paren_depth; - uint32_t bracket_depth; - uint32_t brace_depth; - } State; - - State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0}; - while (!state.done) - { - switch (lexer->lookahead) - { - case '\0': - return false; - case '(': - state.paren_depth++; - break; - case '[': - state.bracket_depth++; - break; - case '{': - state.brace_depth++; - break; - case ')': - if (state.paren_depth == 0) - { - state.done = true; - } - state.paren_depth--; - break; - case ']': - if (state.bracket_depth == 0) - { - state.done = true; - } - state.bracket_depth--; - break; - case '}': - if (state.brace_depth == 0) - { - state.done = true; - } - state.brace_depth--; - break; - } - - if (lexer->lookahead == '|') - { - lexer->mark_end(lexer); - advance(lexer); - if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0) - { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (!state.done) - { - bool was_space = iswspace(lexer->lookahead); - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - if (lexer->lookahead == '(' || lexer->lookahead == '{') - { - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = state.paren_depth; - return state.saw_non_alphadot; - } - } - if (was_space) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - if (lexer->lookahead == '"') - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - if (lexer->lookahead == '\\') - { - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - if (iswspace(lexer->lookahead) || lexer->lookahead == '"') - { - advance(lexer); - } - } - else - { - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - } - if (!was_space) - { - lexer->mark_end(lexer); - } - } - } - - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - scanner->last_glob_paren_depth = 0; - - return false; - } - -expansion_word: - if (valid_symbols[EXPANSION_WORD]) - { - bool advanced_once = false; - bool advance_once_space = false; - for (;;) - { - if (lexer->lookahead == '\"') - { - return false; - } - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || iswalnum(lexer->lookahead)) - { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - - if (lexer->lookahead == '}') - { - lexer->mark_end(lexer); - lexer->result_symbol = EXPANSION_WORD; - return advanced_once || advance_once_space; - } - - if (lexer->lookahead == '(' && !(advanced_once || advance_once_space)) - { - lexer->mark_end(lexer); - advance(lexer); - while (lexer->lookahead != ')' && !lexer->eof(lexer)) - { - // if we find a $( or ${ assume this is valid and is - // a garbage concatenation of some weird word + an - // expansion - // I wonder where this can fail - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || iswalnum(lexer->lookahead)) - { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - else - { - advanced_once = advanced_once || !iswspace(lexer->lookahead); - advance_once_space = advance_once_space || iswspace(lexer->lookahead); - advance(lexer); - } - } - lexer->mark_end(lexer); - if (lexer->lookahead == ')') - { - advanced_once = true; - advance(lexer); - lexer->mark_end(lexer); - if (lexer->lookahead == '}') - { - return false; - } - } - else - { - return false; - } - } - - if (lexer->lookahead == '\'') - { - return false; - } - - if (lexer->eof(lexer)) - { - return false; - } - advanced_once = advanced_once || !iswspace(lexer->lookahead); - advance_once_space = advance_once_space || iswspace(lexer->lookahead); - advance(lexer); - } - } - -brace_start: - return false; -} - -void *tree_sitter_sh_external_scanner_create() -{ - Scanner *scanner = calloc(1, sizeof(Scanner)); - array_init(&scanner->heredocs); - return scanner; -} - -bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) -{ - Scanner *scanner = (Scanner *)payload; - return scan(scanner, lexer, valid_symbols); -} - -unsigned tree_sitter_sh_external_scanner_serialize(void *payload, char *state) -{ - Scanner *scanner = (Scanner *)payload; - return serialize(scanner, state); -} - -void tree_sitter_sh_external_scanner_deserialize(void *payload, const char *state, unsigned length) -{ - Scanner *scanner = (Scanner *)payload; - deserialize(scanner, state, length); -} - -void tree_sitter_sh_external_scanner_destroy(void *payload) -{ - Scanner *scanner = (Scanner *)payload; - for (size_t i = 0; i < scanner->heredocs.size; i++) - { - Heredoc *heredoc = array_get(&scanner->heredocs, i); - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); - } - array_delete(&scanner->heredocs); - free(scanner); -}