diff --git a/.tree-sitter-sh/grammar.js b/.tree-sitter-sh/grammar.js deleted file mode 100644 index 53a96b92..00000000 --- a/.tree-sitter-sh/grammar.js +++ /dev/null @@ -1,606 +0,0 @@ -/** - * @file Bash grammar for tree-sitter - * @author Max Brunsfeld - * @author Amaan Qureshi - * @license MIT - */ - -/// -// @ts-check - -const SPECIAL_CHARACTERS = [ - '|', '&', ';', '<', '>', '(', ')', '$', '`', '\\', '\"', '\'', ' ', '\t', '\n', -] - -const PREC = { - UPDATE: 0, - ASSIGN: 1, - TERNARY: 2, - LOGICAL_OR: 3, - LOGICAL_AND: 4, - BITWISE_OR: 5, - BITWISE_XOR: 6, - BITWISE_AND: 7, - EQUALITY: 8, - COMPARE: 9, - TEST: 10, - UNARY: 11, - SHIFT: 12, - ADD: 13, - MULTIPLY: 14, - EXPONENT: 15, - NEGATE: 16, - PREFIX: 17, - POSTFIX: 18, -}; - -module.exports = grammar({ - name: 'sh', - - conflicts: $ => [ - [$.command, $._variable_assignments], - [$.redirected_statement, $.command], - [$.redirected_statement, $.command_substitution], - [$._expansion_body, $._expansion_regex], - [$.pipeline], - ], - - inline: $ => [ - $._statement, - $._literal, - $._terminated_statement, - $._primary_expression, - $._simple_variable_name, - $._multiline_variable_name, - $._special_variable_name, - $._statement_not_subshell, - ], - - externals: $ => [ - $.file_descriptor, - $._empty_value, - $._concat, - $.variable_name, - $.regex, - $._expansion_word, - $.extglob_pattern, - $._bare_dollar, - $._immediate_double_hash, - //'<<', - /\n/, - '(', - $.__error_recovery, - ], - - extras: $ => [ - $.comment, - /\s/, - /\\\r?\n/, - /\\( |\t|\v|\f)/, - ], - - word: $ => $.word, - - rules: { - program: $ => optional($._statements), - - _statements: $ => prec(1, seq( - repeat(seq( - field('stmt', $._statement), - field('term', $.terminator), - )), - field('stmt', $._statement), - field('term', optional($.terminator)), - )), - - _terminated_statement: $ => repeat1(seq( - field('stmt', $._statement), - field('term', $.terminator) - )), - - // Statements - - _statement: $ => choice( - $._statement_not_subshell, - $.subshell, - ), - - _statement_not_subshell: $ => choice( - // $.case_statement, - $.command, - $.compound_statement, - // $.for_statement, - // $.function_definition, - // $.if_statement, - $.list, - $.negated_command, - $.pipeline, - $.redirected_statement, - $.variable_assignment, - $._variable_assignments, - // $.while_statement, - ), - - _statement_not_pipeline: $ => prec(1, choice( - // $.case_statement, - $.command, - $.compound_statement, - // $.for_statement, - // $.function_definition, - // $.if_statement, - $.list, - $.negated_command, - $.redirected_statement, - $.subshell, - $.variable_assignment, - $._variable_assignments, - // $.while_statement, - )), - - redirected_statement: $ => prec.dynamic(-1, prec.right(-1, choice( - seq( - field('body', $._statement), - field('redr', repeat1(choice($.file_redirect, $.heredoc_redirect))), - ), - field('redr', repeat1($.file_redirect)), - ))), - - /* - for_statement: $ => seq( - 'for', - field('var', $._simple_variable_name), - optional(seq( - 'in', - field('value', repeat1($._literal)), - )), - $.terminator, - field('body', $.do_group), - ), - - while_statement: $ => seq( - choice('while', 'until'), - field('cond', $._terminated_statement), - field('body', $.do_group), - ), - - do_group: $ => seq( - 'do', - optional($._terminated_statement), - 'done', - ), - - if_statement: $ => seq( - 'if', - field('cond', $._terminated_statement), - 'then', - field('body', optional($._terminated_statement)), - field('elif', repeat($.elif_clause)), - field('else', optional($.else_clause)), - 'fi', - ), - - elif_clause: $ => seq( - 'elif', - field('cond', $._terminated_statement), - 'then', - field('body', optional($._terminated_statement)), - ), - - else_clause: $ => seq( - 'else', - field('body', optional($._terminated_statement)), - ), - - case_statement: $ => seq( - 'case', - field('value', $._literal), - optional($.terminator), - 'in', - optional($.terminator), - optional(seq( - repeat(field('cases', $.case_item)), - field('cases', alias($._case_item_last, $.case_item)) - )), - 'esac', - ), - - _case_item_last: $ => seq( - optional('('), - field('value', choice($._literal, $._extglob_blob)), - repeat(seq('|', field('value', choice($._literal, $._extglob_blob)))), - ')', - repeat('\n'), - choice(field('body', $._statements),), - optional(';;') - ), - - case_item: $ => seq( - optional('('), - field('value', choice($._literal, $._extglob_blob)), - repeat(seq('|', field('value', choice($._literal, $._extglob_blob)))), - ')', - repeat('\n'), - choice(field('body', $._statements)), - ';;' - ), - - function_definition: $ => prec.right(seq( - field('name', $.word), - '(', ')', - field('body', choice($.compound_statement, $.subshell, $.command, $.while_statement, $.if_statement, $.for_statement, $._variable_assignments, repeat1($.file_redirect))), - )), - */ - - compound_statement: $ => seq('{', $._terminated_statement, '}'), - subshell: $ => seq('(', $._statements, ')'), - - pipeline: $ => prec.right(seq( - $._statement_not_pipeline, - repeat1(seq('|', $._statement_not_pipeline)), - )), - - list: $ => prec.left(-1, seq( - field('cmd', $._statement), - field('op', alias(choice('&&', '||'), $.operator)), - field('cmd', $._statement), - )), - - // Commands - - negated_command: $ => seq( - '!', - choice( - prec(2, $.command), - prec(1, $.variable_assignment), - $.subshell, - ), - ), - - command: $ => prec.left(seq( - repeat(choice( - $.variable_assignment, - field('redr', $.file_redirect), - )), - field('name', $.command_name), - repeat(choice( - field('arg', $._literal), - field('arg', alias($._bare_dollar, $.word)), - )), - )), - - command_name: $ => $._literal, - - variable_assignment: $ => seq( - field('name', choice( - $.variable_name, - )), - '=', - field('value', choice( - $._literal, - $._empty_value, - alias($._comment_word, $.word), - )), - ), - - _variable_assignments: $ => seq($.variable_assignment, repeat1($.variable_assignment)), - - file_redirect: $ => prec.left(seq( - field('op', alias(choice('<', '>', '>>'), $.operator)), - field('dest', repeat1($._literal)), - )), - - heredoc_redirect: $ => seq( - field('op', alias('<<', $.operator)), - field('del', alias(/[\w\d\-\._]+/, $.heredoc_delimiter)), - ), - - // Literals - - _literal: $ => choice($.concatenation, $._primary_expression), - - _primary_expression: $ => choice( - $.word, - $.string, - $.raw_string, - $.number, - $.expansion, - $.simple_expansion, - $.command_substitution, - $.arithmetic_expansion, - ), - - arithmetic_expansion: $ => seq('$((', $._arithmetic_expression, '))'), - - _arithmetic_expression: $ => prec(1, choice( - $.arithmetic_literal, - $.arithmetic_unary_expression, - $.arithmetic_ternary_expression, - $.arithmetic_binary_expression, - $.arithmetic_postfix_expression, - $.arithmetic_parenthesized_expression, - )), - - arithmetic_literal: $ => prec(1, choice( - $.number, - $.simple_expansion, - $.expansion, - $._simple_variable_name, - $.variable_name, - $.string, - )), - - arithmetic_binary_expression: $ => { - - /** @type {[RuleOrLiteral, number][]} */ - const table = [ - [choice('+', '-'), PREC.ADD], - [choice('*', '/', '%'), PREC.MULTIPLY], - ]; - - return choice(...table.map(([operator, precedence]) => - prec.left(precedence, seq( - field('lhs', $._arithmetic_expression), - field('op', alias(operator, $.operator)), - field('rhs', $._arithmetic_expression), - )) - )); - }, - - arithmetic_ternary_expression: $ => prec.left(PREC.TERNARY, seq( - field('cond', $._arithmetic_expression), - '?', - field('then', $._arithmetic_expression), - ':', - field('else', $._arithmetic_expression), - )), - - arithmetic_unary_expression: $ => prec(PREC.UNARY, seq( - field('op', alias(tokenLiterals(1, '-', '+'), $.operator)), - $._arithmetic_expression, - )), - - arithmetic_postfix_expression: $ => prec(PREC.POSTFIX, seq( - $._arithmetic_expression, - field('op', alias(choice('++', '--'), $.operator)), - )), - - arithmetic_parenthesized_expression: $ => seq('(', $._arithmetic_expression, ')'), - - concatenation: $ => prec(-1, seq( - $._primary_expression, - repeat1(seq( - choice($._concat, alias(/`\s*`/, '``')), - choice( - $._primary_expression, - alias($._comment_word, $.word), - alias($._bare_dollar, $.word), - alias(/`\s*`/, '``') - ), - )), - optional(seq($._concat, alias('$', $.word))), - )), - - string: $ => seq( - '"', - repeat(seq( - choice( - seq(optional('$'), $.string_content), - $.expansion, - $.simple_expansion, - $.command_substitution, - $.arithmetic_expansion, - ), - optional($._concat), - )), - optional(alias('$', $.string_content)), - '"', - ), - - string_content: _ => token(prec(-1, /([^"`$\\\r\n]|\\(.|\r?\n))+/)), - - raw_string: _ => /'[^']*'/, - - number: _ => /[0-9]+/, - - simple_expansion: $ => seq( - '$', - choice( - $._simple_variable_name, - $._multiline_variable_name, - $._special_variable_name, - $.variable_name, - alias('!', $.special_variable_name), - alias('#', $.special_variable_name), - ), - ), - - expansion: $ => seq( - '${', - optional($._expansion_body), - '}', - ), - - _expansion_body: $ => seq( - field('len', optional(alias('#', $.operator))), - field('name', choice($.variable_name, $._simple_variable_name, $._special_variable_name)), - optional(choice($._expansion_expression, $._expansion_regex)), - ), - - - _expansion_expression: $ => prec(1, seq( - field('op', alias(immediateLiterals(':-', '-', ':=', '=', ':?', '?', ':+', '+'), $.operator)), - field('args', optional(choice( - alias($._concatenation_in_expansion, $.concatenation), - alias(prec(1, $._word_no_brace), $.word2), - $.expansion, - $.raw_string, - $.string, - ))), - )), - - _expansion_regex: $ => seq( - field('op', alias(choice('#', $._immediate_double_hash, '%', '%%'), $.operator)), - field('args', repeat(choice( - $.raw_string, - $.regex, - $.string, - alias(')', $.regex), - alias(/\s+/, $.regex), - ))), - ), - - - _concatenation_in_expansion: $ => prec(-2, seq( - choice( - alias($._word_no_brace, $.word), - alias($._expansion_word, $.word), - $.variable_name, - $.simple_expansion, - $.expansion, - $.string, - $.raw_string, - $.command_substitution, - ), - repeat1(seq( - choice($._concat, alias(/`\s*`/, '``')), - choice( - alias($._word_no_brace, $.word), - alias($._expansion_word, $.word), - $.variable_name, - $.simple_expansion, - $.expansion, - $.string, - $.raw_string, - $.command_substitution, - ), - )), - )), - - command_substitution: $ => choice( - seq('$(', $._statements, ')'), - seq('$(', field('redr', $.file_redirect), ')'), - prec(1, seq('`', $._statements, '`')), - ), - - _extglob_blob: $ => choice( - $.extglob_pattern, - seq( - $.extglob_pattern, - choice($.string, $.expansion, $.command_substitution), - optional($.extglob_pattern), - ), - ), - - comment: _ => token(prec(-10, /#.*/)), - - _comment_word: _ => token(prec(-8, seq( - choice( - noneOf(...SPECIAL_CHARACTERS), - seq('\\', noneOf('\\s')), - ), - repeat(choice( - noneOf(...SPECIAL_CHARACTERS), - seq('\\', noneOf('\\s')), - '\\ ', - )), - ))), - - _simple_variable_name: $ => alias(/\w+/, $.variable_name), - _multiline_variable_name: $ => alias( - token(prec(-1, /(\w|\\\r?\n)+/)), - $.variable_name, - ), - - _special_variable_name: $ => alias(choice('*', '@', '?', '!', '#', '-', '$', '0'), $.special_variable_name), - - word: _ => token(seq( - choice( - noneOf('#', ...SPECIAL_CHARACTERS), - seq('\\', noneOf('\\s')), - ), - repeat(choice( - noneOf(...SPECIAL_CHARACTERS), - seq('\\', noneOf('\\s')), - '\\ ', - )), - )), - - - _word_no_brace: _ => prec(2, token(seq( - choice( - noneOf('#', '{', '}', ...SPECIAL_CHARACTERS), - seq('\\', noneOf('\\s')), - ), - repeat(choice( - noneOf('{', '}', ...SPECIAL_CHARACTERS), - seq('\\', noneOf('\\s')), - '\\ ', - )), - ))), - terminator: _ => choice(';', ';;', /\n/), - }, -}); - -/** - * Returns a regular expression that matches any character except the ones - * provided. - * - * @param {...string} characters - * - * @return {RegExp} - * - */ -function noneOf(...characters) { - const negatedString = characters.map(c => c == '\\' ? '\\\\' : c).join(''); - return new RegExp('[^' + negatedString + ']'); -} - -/** - * Creates a rule to optionally match one or more of the rules separated by a comma - * - * @param {RuleOrLiteral} rule - * - * @return {ChoiceRule} - * - */ -function commaSep(rule) { - return optional(commaSep1(rule)); -} - -/** - * Creates a rule to match one or more of the rules separated by a comma - * - * @param {RuleOrLiteral} rule - * - * @return {SeqRule} - * - */ -function commaSep1(rule) { - return seq(rule, repeat(seq(',', rule))); -} - -/** - * - * Turns a list of rules into a choice of immediate rule - * - * @param {(RegExp|String)[]} literals - * - * @return {ChoiceRule} - */ -function immediateLiterals(...literals) { - return choice(...literals.map(l => token.immediate(l))); -} - -/** - * - * Turns a list of rules into a choice of aliased token rules - * - * @param {number} precedence - * - * @param {(RegExp|String)[]} literals - * - * @return {ChoiceRule} - */ -function tokenLiterals(precedence, ...literals) { - return choice(...literals.map(l => token(prec(precedence, l)))); -} diff --git a/.tree-sitter-sh/package.json b/.tree-sitter-sh/package.json deleted file mode 100644 index 802435df..00000000 --- a/.tree-sitter-sh/package.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "name": "tree-sitter-bash", - "version": "0.0.1", - "description": "Bash grammar for tree-sitter", - "repository": "github:tree-sitter/tree-sitter-bash", - "license": "MIT", - "main": "bindings/node", - "types": "bindings/node", - "keywords": [ - "incremental", - "parsing", - "tree-sitter", - "bash" - ], - "files": [ - "grammar.js", - "binding.gyp", - "prebuilds/**", - "bindings/node/*", - "queries/*", - "src/**" - ], - "dependencies": { - "node-addon-api": "^7.1.0", - "node-gyp-build": "^4.8.0" - }, - "devDependencies": { - "prebuildify": "^6.0.0", - "tree-sitter-cli": "^0.22.6" - }, - "peerDependencies": { - "tree-sitter": "^0.21.0" - }, - "peerDependenciesMeta": { - "tree-sitter": { - "optional": true - } - }, - "scripts": { - "install": "node-gyp-build", - "prebuildify": "prebuildify --napi --strip", - "build": "tree-sitter generate --no-bindings", - "build-wasm": "tree-sitter build --wasm", - "test": "tree-sitter test", - "parse": "tree-sitter parse" - }, - "tree-sitter": [ - { - "scope": "source.bash", - "injection-regex": "^bash$" - } - ] -} diff --git a/.tree-sitter-sh/src/scanner.c b/.tree-sitter-sh/src/scanner.c deleted file mode 100644 index c9825f51..00000000 --- a/.tree-sitter-sh/src/scanner.c +++ /dev/null @@ -1,954 +0,0 @@ -#include "tree_sitter/alloc.h" -#include "tree_sitter/array.h" -#include "tree_sitter/parser.h" - -#include -#include -#include -#include - -enum TokenType -{ - FILE_DESCRIPTOR, - EMPTY_VALUE, - CONCAT, - VARIABLE_NAME, - REGEX, - EXPANSION_WORD, - EXTGLOB_PATTERN, - BARE_DOLLAR, - IMMEDIATE_DOUBLE_HASH, - // HEREDOC_ARROW, - // HEREDOC_ARROW_DASH, - NEWLINE, - OPENING_PAREN, - ERROR_RECOVERY, -}; - -typedef Array(char) String; - -typedef struct Heredoc -{ - bool is_raw; - bool started; - bool allows_indent; - String delimiter; - String current_leading_word; -} Heredoc; - -#define heredoc_new() \ - { \ - .is_raw = false, \ - .started = false, \ - .allows_indent = false, \ - .delimiter = array_new(), \ - .current_leading_word = array_new(), \ - }; - -typedef struct Scanner -{ - uint8_t last_glob_paren_depth; - bool ext_was_in_double_quote; - bool ext_saw_outside_quote; - Array(Heredoc) heredocs; -} Scanner; - -static inline void advance(TSLexer *lexer) -{ - lexer->advance(lexer, false); -} - -static inline void skip(TSLexer *lexer) -{ - lexer->advance(lexer, true); -} - -static inline bool in_error_recovery(const bool *valid_symbols) -{ - return valid_symbols[ERROR_RECOVERY]; -} - -static inline void reset_string(String *string) -{ - if (string->size > 0) - { - memset(string->contents, 0, string->size); - array_clear(string); - } -} - -static inline void reset_heredoc(Heredoc *heredoc) -{ - heredoc->is_raw = false; - heredoc->started = false; - heredoc->allows_indent = false; - reset_string(&heredoc->delimiter); -} - -static inline void reset(Scanner *scanner) -{ - for (uint32_t i = 0; i < scanner->heredocs.size; i++) - { - reset_heredoc(array_get(&scanner->heredocs, i)); - } -} - -static unsigned serialize(Scanner *scanner, char *buffer) -{ - uint32_t size = 0; - - buffer[size++] = (char)scanner->last_glob_paren_depth; - buffer[size++] = (char)scanner->ext_was_in_double_quote; - buffer[size++] = (char)scanner->ext_saw_outside_quote; - buffer[size++] = (char)scanner->heredocs.size; - - for (uint32_t i = 0; i < scanner->heredocs.size; i++) - { - Heredoc *heredoc = array_get(&scanner->heredocs, i); - if (heredoc->delimiter.size + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) - { - return 0; - } - - buffer[size++] = (char)heredoc->is_raw; - buffer[size++] = (char)heredoc->started; - buffer[size++] = (char)heredoc->allows_indent; - - memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(uint32_t)); - size += sizeof(uint32_t); - if (heredoc->delimiter.size > 0) - { - memcpy(&buffer[size], heredoc->delimiter.contents, heredoc->delimiter.size); - size += heredoc->delimiter.size; - } - } - return size; -} - -static void deserialize(Scanner *scanner, const char *buffer, unsigned length) -{ - if (length == 0) - { - reset(scanner); - } - else - { - uint32_t size = 0; - scanner->last_glob_paren_depth = buffer[size++]; - scanner->ext_was_in_double_quote = buffer[size++]; - scanner->ext_saw_outside_quote = buffer[size++]; - uint32_t heredoc_count = (unsigned char)buffer[size++]; - for (uint32_t i = 0; i < heredoc_count; i++) - { - Heredoc *heredoc = NULL; - if (i < scanner->heredocs.size) - { - heredoc = array_get(&scanner->heredocs, i); - } - else - { - Heredoc new_heredoc = heredoc_new(); - array_push(&scanner->heredocs, new_heredoc); - heredoc = array_back(&scanner->heredocs); - } - - heredoc->is_raw = buffer[size++]; - heredoc->started = buffer[size++]; - heredoc->allows_indent = buffer[size++]; - - memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t)); - size += sizeof(uint32_t); - array_reserve(&heredoc->delimiter, heredoc->delimiter.size); - - if (heredoc->delimiter.size > 0) - { - memcpy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size); - size += heredoc->delimiter.size; - } - } - assert(size == length); - } -} - -/** - * Consume a "word" in POSIX parlance, and returns it unquoted. - * - * This is an approximate implementation that doesn't deal with any - * POSIX-mandated substitution, and assumes the default value for - * IFS. - */ -static bool advance_word(TSLexer *lexer, String *unquoted_word) -{ - bool empty = true; - int32_t quote = 0; - - if (lexer->lookahead == '\'' || lexer->lookahead == '"') - { - quote = lexer->lookahead; - advance(lexer); - } - - while (lexer->lookahead && !(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n' : iswspace(lexer->lookahead))) - { - if (lexer->lookahead == '\\') - { - advance(lexer); - if (!lexer->lookahead) - return false; - } - empty = false; - array_push(unquoted_word, lexer->lookahead); - advance(lexer); - } - array_push(unquoted_word, '\0'); - - if (quote && lexer->lookahead == quote) - advance(lexer); - - return !empty; -} - -static inline bool scan_bare_dollar(TSLexer *lexer) -{ - while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer)) - skip(lexer); - - if (lexer->lookahead == '$') - { - advance(lexer); - lexer->result_symbol = BARE_DOLLAR; - lexer->mark_end(lexer); - return (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"'); - } - - return false; -} - -static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) -{ - if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) - { - if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' || lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' || lexer->lookahead == '|' || lexer->lookahead == '{' || lexer->lookahead == '}')) - { - lexer->result_symbol = CONCAT; - // So for a`b`, we want to return a concat. We check if the - // 2nd backtick has whitespace after it, and if it does we - // return concat. - if (lexer->lookahead == '`') - { - lexer->mark_end(lexer); - advance(lexer); - while (lexer->lookahead != '`' && !lexer->eof(lexer)) - { - advance(lexer); - } - if (lexer->eof(lexer)) - { - return false; - } - if (lexer->lookahead == '`') - { - advance(lexer); - } - return iswspace(lexer->lookahead) || lexer->eof(lexer); - } - // strings w/ expansions that contains escaped quotes or - // backslashes need this to return a concat - if (lexer->lookahead == '\\') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\') - { - return true; - } - if (lexer->eof(lexer)) - { - return false; - } - } - else - { - return true; - } - } - } - - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols)) - { - // advance two # and ensure not } after - if (lexer->lookahead == '#') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '#') - { - advance(lexer); - if (lexer->lookahead != '}') - { - lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; - lexer->mark_end(lexer); - return true; - } - } - } - } - - if (valid_symbols[EMPTY_VALUE]) - { - if (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&') - { - lexer->result_symbol = EMPTY_VALUE; - return true; - } - } - - if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR]) && !in_error_recovery(valid_symbols)) - { - for (;;) - { - if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && !valid_symbols[EXPANSION_WORD]) - { - skip(lexer); - } - else if (lexer->lookahead == '\\') - { - skip(lexer); - - if (lexer->eof(lexer)) - { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return true; - } - - if (lexer->lookahead == '\r') - { - skip(lexer); - } - if (lexer->lookahead == '\n') - { - skip(lexer); - } - else - { - if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD]) - { - goto expansion_word; - } - return false; - } - } - else - { - break; - } - } - - // no '*', '@', '?', '-', '$', '0', '_' - if (!valid_symbols[EXPANSION_WORD] && (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_')) - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' || lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/') - { - return false; - } - if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead)) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - bool is_number = true; - if (iswdigit(lexer->lookahead)) - { - advance(lexer); - } - else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') - { - is_number = false; - advance(lexer); - } - else - { - if (lexer->lookahead == '{') - { - goto brace_start; - } - if (valid_symbols[EXPANSION_WORD]) - { - goto expansion_word; - } - if (valid_symbols[EXTGLOB_PATTERN]) - { - goto extglob_pattern; - } - return false; - } - - for (;;) - { - if (iswdigit(lexer->lookahead)) - { - advance(lexer); - } - else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') - { - is_number = false; - advance(lexer); - } - else - { - break; - } - } - - if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<')) - { - lexer->result_symbol = FILE_DESCRIPTOR; - return true; - } - - if (valid_symbols[VARIABLE_NAME]) - { - if (lexer->lookahead == '+') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '=' || lexer->lookahead == ':') - { - lexer->result_symbol = VARIABLE_NAME; - return true; - } - return false; - } - if (lexer->lookahead == '/') - { - return false; - } - if (lexer->lookahead == '=' || lexer->lookahead == '[' || (lexer->lookahead == ':' && !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable - // names for function words, only handling : for now? #235 - lexer->lookahead == '%' || (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-')) - { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return true; - } - - if (lexer->lookahead == '?') - { - lexer->mark_end(lexer); - advance(lexer); - lexer->result_symbol = VARIABLE_NAME; - return iswalpha(lexer->lookahead); - } - } - - return false; - } - - if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) - { - return true; - } - - if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols)) - { - if (valid_symbols[REGEX]) - { - while (iswspace(lexer->lookahead)) - { - skip(lexer); - } - } - - if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || ((lexer->lookahead == '$' || lexer->lookahead == '\'')) || (lexer->lookahead == '\'')) - { - typedef struct - { - bool done; - bool advanced_once; - bool found_non_alnumdollarunderdash; - bool last_was_escape; - bool in_single_quote; - uint32_t paren_depth; - uint32_t bracket_depth; - uint32_t brace_depth; - } State; - - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '(') - { - return false; - } - } - - lexer->mark_end(lexer); - - State state = {false, false, false, false, false, 0, 0, 0}; - while (!state.done) - { - if (state.in_single_quote) - { - if (lexer->lookahead == '\'') - { - state.in_single_quote = false; - advance(lexer); - lexer->mark_end(lexer); - } - } - switch (lexer->lookahead) - { - case '\\': - state.last_was_escape = true; - break; - case '\0': - return false; - case '(': - state.paren_depth++; - state.last_was_escape = false; - break; - case '[': - state.bracket_depth++; - state.last_was_escape = false; - break; - case '{': - if (!state.last_was_escape) - state.brace_depth++; - state.last_was_escape = false; - break; - case ')': - if (state.paren_depth == 0) - state.done = true; - state.paren_depth--; - state.last_was_escape = false; - break; - case ']': - if (state.bracket_depth == 0) - state.done = true; - state.bracket_depth--; - state.last_was_escape = false; - break; - case '}': - if (state.brace_depth == 0) - state.done = true; - state.brace_depth--; - state.last_was_escape = false; - break; - case '\'': - // Enter or exit a single-quoted string. - state.in_single_quote = !state.in_single_quote; - advance(lexer); - state.advanced_once = true; - state.last_was_escape = false; - continue; - default: - state.last_was_escape = false; - break; - } - - if (!state.done) - { - if (valid_symbols[REGEX]) - { - bool was_space = !state.in_single_quote && iswspace(lexer->lookahead); - advance(lexer); - state.advanced_once = true; - if (!was_space || state.paren_depth > 0) - { - lexer->mark_end(lexer); - } - } - } - } - - lexer->result_symbol = REGEX; - if (valid_symbols[REGEX] && !state.advanced_once) - { - return false; - } - return true; - } - } - -extglob_pattern: - if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) - { - // first skip ws, then check for ? * + @ ! - while (iswspace(lexer->lookahead)) - { - skip(lexer); - } - - if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' || lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.' || lexer->lookahead == '[' || (iswalpha(lexer->lookahead))) - { - if (lexer->lookahead == '\\') - { - advance(lexer); - if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n') - { - advance(lexer); - } - else - { - return false; - } - } - - if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) - { - lexer->mark_end(lexer); - advance(lexer); - - if (iswspace(lexer->lookahead)) - { - return false; - } - } - - lexer->mark_end(lexer); - bool was_non_alpha = !iswalpha(lexer->lookahead); - if (lexer->lookahead != '[') - { - // no esac - if (lexer->lookahead == 'e') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == 's') - { - advance(lexer); - if (lexer->lookahead == 'a') - { - advance(lexer); - if (lexer->lookahead == 'c') - { - advance(lexer); - if (iswspace(lexer->lookahead)) - { - return false; - } - } - } - } - } - else - { - advance(lexer); - } - } - - // -\w is just a word, find something else special - if (lexer->lookahead == '-') - { - lexer->mark_end(lexer); - advance(lexer); - while (iswalnum(lexer->lookahead)) - { - advance(lexer); - } - - if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.') - { - return false; - } - lexer->mark_end(lexer); - } - - // case item -) or *) - if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) - { - lexer->mark_end(lexer); - advance(lexer); - if (iswspace(lexer->lookahead)) - { - lexer->result_symbol = EXTGLOB_PATTERN; - return was_non_alpha; - } - } - - if (iswspace(lexer->lookahead)) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return true; - } - - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(') - { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (lexer->lookahead == '|') - { - lexer->mark_end(lexer); - advance(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - - if (!iswalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && lexer->lookahead != '[' && lexer->lookahead != '?' && lexer->lookahead != '/' && lexer->lookahead != '\\' && lexer->lookahead != '_' && lexer->lookahead != '*') - { - return false; - } - - typedef struct - { - bool done; - bool saw_non_alphadot; - uint32_t paren_depth; - uint32_t bracket_depth; - uint32_t brace_depth; - } State; - - State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0}; - while (!state.done) - { - switch (lexer->lookahead) - { - case '\0': - return false; - case '(': - state.paren_depth++; - break; - case '[': - state.bracket_depth++; - break; - case '{': - state.brace_depth++; - break; - case ')': - if (state.paren_depth == 0) - { - state.done = true; - } - state.paren_depth--; - break; - case ']': - if (state.bracket_depth == 0) - { - state.done = true; - } - state.bracket_depth--; - break; - case '}': - if (state.brace_depth == 0) - { - state.done = true; - } - state.brace_depth--; - break; - } - - if (lexer->lookahead == '|') - { - lexer->mark_end(lexer); - advance(lexer); - if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0) - { - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (!state.done) - { - bool was_space = iswspace(lexer->lookahead); - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - if (lexer->lookahead == '(' || lexer->lookahead == '{') - { - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = state.paren_depth; - return state.saw_non_alphadot; - } - } - if (was_space) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - if (lexer->lookahead == '"') - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - if (lexer->lookahead == '\\') - { - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - if (iswspace(lexer->lookahead) || lexer->lookahead == '"') - { - advance(lexer); - } - } - else - { - if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') - { - state.saw_non_alphadot = true; - } - advance(lexer); - } - if (!was_space) - { - lexer->mark_end(lexer); - } - } - } - - lexer->result_symbol = EXTGLOB_PATTERN; - scanner->last_glob_paren_depth = 0; - return state.saw_non_alphadot; - } - scanner->last_glob_paren_depth = 0; - - return false; - } - -expansion_word: - if (valid_symbols[EXPANSION_WORD]) - { - bool advanced_once = false; - bool advance_once_space = false; - for (;;) - { - if (lexer->lookahead == '\"') - return false; - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || iswalnum(lexer->lookahead)) - { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - - if (lexer->lookahead == '}') - { - lexer->mark_end(lexer); - lexer->result_symbol = EXPANSION_WORD; - return advanced_once || advance_once_space; - } - - if (lexer->lookahead == '(' && !(advanced_once || advance_once_space)) - { - lexer->mark_end(lexer); - advance(lexer); - while (lexer->lookahead != ')' && !lexer->eof(lexer)) - { - // if we find a $( or ${ assume this is valid and is - // a garbage concatenation of some weird word + an - // expansion - // I wonder where this can fail - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - advance(lexer); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || iswalnum(lexer->lookahead)) - { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - else - { - advanced_once = advanced_once || !iswspace(lexer->lookahead); - advance_once_space = advance_once_space || iswspace(lexer->lookahead); - advance(lexer); - } - } - lexer->mark_end(lexer); - if (lexer->lookahead == ')') - { - advanced_once = true; - advance(lexer); - lexer->mark_end(lexer); - if (lexer->lookahead == '}') - return false; - } - else - return false; - } - - if (lexer->lookahead == '\'') - return false; - if (lexer->eof(lexer)) - return false; - advanced_once = advanced_once || !iswspace(lexer->lookahead); - advance_once_space = advance_once_space || iswspace(lexer->lookahead); - advance(lexer); - } - } - -brace_start: - return false; -} - -void *tree_sitter_sh_external_scanner_create() -{ - Scanner *scanner = calloc(1, sizeof(Scanner)); - array_init(&scanner->heredocs); - return scanner; -} - -bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) -{ - Scanner *scanner = (Scanner *)payload; - return scan(scanner, lexer, valid_symbols); -} - -unsigned tree_sitter_sh_external_scanner_serialize(void *payload, char *state) -{ - Scanner *scanner = (Scanner *)payload; - return serialize(scanner, state); -} - -void tree_sitter_sh_external_scanner_deserialize(void *payload, const char *state, unsigned length) -{ - Scanner *scanner = (Scanner *)payload; - deserialize(scanner, state, length); -} - -void tree_sitter_sh_external_scanner_destroy(void *payload) -{ - Scanner *scanner = (Scanner *)payload; - for (size_t i = 0; i < scanner->heredocs.size; i++) - { - Heredoc *heredoc = array_get(&scanner->heredocs, i); - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); - } - array_delete(&scanner->heredocs); - free(scanner); -}