update: remove ts

This commit is contained in:
maix0 2024-10-04 16:31:20 +02:00
parent 142ac9c9e1
commit a35eafd84c
3 changed files with 0 additions and 1613 deletions

View file

@ -1,606 +0,0 @@
/**
* @file Bash grammar for tree-sitter
* @author Max Brunsfeld <maxbrunsfeld@gmail.com>
* @author Amaan Qureshi <amaanq12@gmail.com>
* @license MIT
*/
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check
const SPECIAL_CHARACTERS = [
'|', '&', ';', '<', '>', '(', ')', '$', '`', '\\', '\"', '\'', ' ', '\t', '\n',
]
const PREC = {
UPDATE: 0,
ASSIGN: 1,
TERNARY: 2,
LOGICAL_OR: 3,
LOGICAL_AND: 4,
BITWISE_OR: 5,
BITWISE_XOR: 6,
BITWISE_AND: 7,
EQUALITY: 8,
COMPARE: 9,
TEST: 10,
UNARY: 11,
SHIFT: 12,
ADD: 13,
MULTIPLY: 14,
EXPONENT: 15,
NEGATE: 16,
PREFIX: 17,
POSTFIX: 18,
};
module.exports = grammar({
name: 'sh',
conflicts: $ => [
[$.command, $._variable_assignments],
[$.redirected_statement, $.command],
[$.redirected_statement, $.command_substitution],
[$._expansion_body, $._expansion_regex],
[$.pipeline],
],
inline: $ => [
$._statement,
$._literal,
$._terminated_statement,
$._primary_expression,
$._simple_variable_name,
$._multiline_variable_name,
$._special_variable_name,
$._statement_not_subshell,
],
externals: $ => [
$.file_descriptor,
$._empty_value,
$._concat,
$.variable_name,
$.regex,
$._expansion_word,
$.extglob_pattern,
$._bare_dollar,
$._immediate_double_hash,
//'<<',
/\n/,
'(',
$.__error_recovery,
],
extras: $ => [
$.comment,
/\s/,
/\\\r?\n/,
/\\( |\t|\v|\f)/,
],
word: $ => $.word,
rules: {
program: $ => optional($._statements),
_statements: $ => prec(1, seq(
repeat(seq(
field('stmt', $._statement),
field('term', $.terminator),
)),
field('stmt', $._statement),
field('term', optional($.terminator)),
)),
_terminated_statement: $ => repeat1(seq(
field('stmt', $._statement),
field('term', $.terminator)
)),
// Statements
_statement: $ => choice(
$._statement_not_subshell,
$.subshell,
),
_statement_not_subshell: $ => choice(
// $.case_statement,
$.command,
$.compound_statement,
// $.for_statement,
// $.function_definition,
// $.if_statement,
$.list,
$.negated_command,
$.pipeline,
$.redirected_statement,
$.variable_assignment,
$._variable_assignments,
// $.while_statement,
),
_statement_not_pipeline: $ => prec(1, choice(
// $.case_statement,
$.command,
$.compound_statement,
// $.for_statement,
// $.function_definition,
// $.if_statement,
$.list,
$.negated_command,
$.redirected_statement,
$.subshell,
$.variable_assignment,
$._variable_assignments,
// $.while_statement,
)),
redirected_statement: $ => prec.dynamic(-1, prec.right(-1, choice(
seq(
field('body', $._statement),
field('redr', repeat1(choice($.file_redirect, $.heredoc_redirect))),
),
field('redr', repeat1($.file_redirect)),
))),
/*
for_statement: $ => seq(
'for',
field('var', $._simple_variable_name),
optional(seq(
'in',
field('value', repeat1($._literal)),
)),
$.terminator,
field('body', $.do_group),
),
while_statement: $ => seq(
choice('while', 'until'),
field('cond', $._terminated_statement),
field('body', $.do_group),
),
do_group: $ => seq(
'do',
optional($._terminated_statement),
'done',
),
if_statement: $ => seq(
'if',
field('cond', $._terminated_statement),
'then',
field('body', optional($._terminated_statement)),
field('elif', repeat($.elif_clause)),
field('else', optional($.else_clause)),
'fi',
),
elif_clause: $ => seq(
'elif',
field('cond', $._terminated_statement),
'then',
field('body', optional($._terminated_statement)),
),
else_clause: $ => seq(
'else',
field('body', optional($._terminated_statement)),
),
case_statement: $ => seq(
'case',
field('value', $._literal),
optional($.terminator),
'in',
optional($.terminator),
optional(seq(
repeat(field('cases', $.case_item)),
field('cases', alias($._case_item_last, $.case_item))
)),
'esac',
),
_case_item_last: $ => seq(
optional('('),
field('value', choice($._literal, $._extglob_blob)),
repeat(seq('|', field('value', choice($._literal, $._extglob_blob)))),
')',
repeat('\n'),
choice(field('body', $._statements),),
optional(';;')
),
case_item: $ => seq(
optional('('),
field('value', choice($._literal, $._extglob_blob)),
repeat(seq('|', field('value', choice($._literal, $._extglob_blob)))),
')',
repeat('\n'),
choice(field('body', $._statements)),
';;'
),
function_definition: $ => prec.right(seq(
field('name', $.word),
'(', ')',
field('body', choice($.compound_statement, $.subshell, $.command, $.while_statement, $.if_statement, $.for_statement, $._variable_assignments, repeat1($.file_redirect))),
)),
*/
compound_statement: $ => seq('{', $._terminated_statement, '}'),
subshell: $ => seq('(', $._statements, ')'),
pipeline: $ => prec.right(seq(
$._statement_not_pipeline,
repeat1(seq('|', $._statement_not_pipeline)),
)),
list: $ => prec.left(-1, seq(
field('cmd', $._statement),
field('op', alias(choice('&&', '||'), $.operator)),
field('cmd', $._statement),
)),
// Commands
negated_command: $ => seq(
'!',
choice(
prec(2, $.command),
prec(1, $.variable_assignment),
$.subshell,
),
),
command: $ => prec.left(seq(
repeat(choice(
$.variable_assignment,
field('redr', $.file_redirect),
)),
field('name', $.command_name),
repeat(choice(
field('arg', $._literal),
field('arg', alias($._bare_dollar, $.word)),
)),
)),
command_name: $ => $._literal,
variable_assignment: $ => seq(
field('name', choice(
$.variable_name,
)),
'=',
field('value', choice(
$._literal,
$._empty_value,
alias($._comment_word, $.word),
)),
),
_variable_assignments: $ => seq($.variable_assignment, repeat1($.variable_assignment)),
file_redirect: $ => prec.left(seq(
field('op', alias(choice('<', '>', '>>'), $.operator)),
field('dest', repeat1($._literal)),
)),
heredoc_redirect: $ => seq(
field('op', alias('<<', $.operator)),
field('del', alias(/[\w\d\-\._]+/, $.heredoc_delimiter)),
),
// Literals
_literal: $ => choice($.concatenation, $._primary_expression),
_primary_expression: $ => choice(
$.word,
$.string,
$.raw_string,
$.number,
$.expansion,
$.simple_expansion,
$.command_substitution,
$.arithmetic_expansion,
),
arithmetic_expansion: $ => seq('$((', $._arithmetic_expression, '))'),
_arithmetic_expression: $ => prec(1, choice(
$.arithmetic_literal,
$.arithmetic_unary_expression,
$.arithmetic_ternary_expression,
$.arithmetic_binary_expression,
$.arithmetic_postfix_expression,
$.arithmetic_parenthesized_expression,
)),
arithmetic_literal: $ => prec(1, choice(
$.number,
$.simple_expansion,
$.expansion,
$._simple_variable_name,
$.variable_name,
$.string,
)),
arithmetic_binary_expression: $ => {
/** @type {[RuleOrLiteral, number][]} */
const table = [
[choice('+', '-'), PREC.ADD],
[choice('*', '/', '%'), PREC.MULTIPLY],
];
return choice(...table.map(([operator, precedence]) =>
prec.left(precedence, seq(
field('lhs', $._arithmetic_expression),
field('op', alias(operator, $.operator)),
field('rhs', $._arithmetic_expression),
))
));
},
arithmetic_ternary_expression: $ => prec.left(PREC.TERNARY, seq(
field('cond', $._arithmetic_expression),
'?',
field('then', $._arithmetic_expression),
':',
field('else', $._arithmetic_expression),
)),
arithmetic_unary_expression: $ => prec(PREC.UNARY, seq(
field('op', alias(tokenLiterals(1, '-', '+'), $.operator)),
$._arithmetic_expression,
)),
arithmetic_postfix_expression: $ => prec(PREC.POSTFIX, seq(
$._arithmetic_expression,
field('op', alias(choice('++', '--'), $.operator)),
)),
arithmetic_parenthesized_expression: $ => seq('(', $._arithmetic_expression, ')'),
concatenation: $ => prec(-1, seq(
$._primary_expression,
repeat1(seq(
choice($._concat, alias(/`\s*`/, '``')),
choice(
$._primary_expression,
alias($._comment_word, $.word),
alias($._bare_dollar, $.word),
alias(/`\s*`/, '``')
),
)),
optional(seq($._concat, alias('$', $.word))),
)),
string: $ => seq(
'"',
repeat(seq(
choice(
seq(optional('$'), $.string_content),
$.expansion,
$.simple_expansion,
$.command_substitution,
$.arithmetic_expansion,
),
optional($._concat),
)),
optional(alias('$', $.string_content)),
'"',
),
string_content: _ => token(prec(-1, /([^"`$\\\r\n]|\\(.|\r?\n))+/)),
raw_string: _ => /'[^']*'/,
number: _ => /[0-9]+/,
simple_expansion: $ => seq(
'$',
choice(
$._simple_variable_name,
$._multiline_variable_name,
$._special_variable_name,
$.variable_name,
alias('!', $.special_variable_name),
alias('#', $.special_variable_name),
),
),
expansion: $ => seq(
'${',
optional($._expansion_body),
'}',
),
_expansion_body: $ => seq(
field('len', optional(alias('#', $.operator))),
field('name', choice($.variable_name, $._simple_variable_name, $._special_variable_name)),
optional(choice($._expansion_expression, $._expansion_regex)),
),
_expansion_expression: $ => prec(1, seq(
field('op', alias(immediateLiterals(':-', '-', ':=', '=', ':?', '?', ':+', '+'), $.operator)),
field('args', optional(choice(
alias($._concatenation_in_expansion, $.concatenation),
alias(prec(1, $._word_no_brace), $.word2),
$.expansion,
$.raw_string,
$.string,
))),
)),
_expansion_regex: $ => seq(
field('op', alias(choice('#', $._immediate_double_hash, '%', '%%'), $.operator)),
field('args', repeat(choice(
$.raw_string,
$.regex,
$.string,
alias(')', $.regex),
alias(/\s+/, $.regex),
))),
),
_concatenation_in_expansion: $ => prec(-2, seq(
choice(
alias($._word_no_brace, $.word),
alias($._expansion_word, $.word),
$.variable_name,
$.simple_expansion,
$.expansion,
$.string,
$.raw_string,
$.command_substitution,
),
repeat1(seq(
choice($._concat, alias(/`\s*`/, '``')),
choice(
alias($._word_no_brace, $.word),
alias($._expansion_word, $.word),
$.variable_name,
$.simple_expansion,
$.expansion,
$.string,
$.raw_string,
$.command_substitution,
),
)),
)),
command_substitution: $ => choice(
seq('$(', $._statements, ')'),
seq('$(', field('redr', $.file_redirect), ')'),
prec(1, seq('`', $._statements, '`')),
),
_extglob_blob: $ => choice(
$.extglob_pattern,
seq(
$.extglob_pattern,
choice($.string, $.expansion, $.command_substitution),
optional($.extglob_pattern),
),
),
comment: _ => token(prec(-10, /#.*/)),
_comment_word: _ => token(prec(-8, seq(
choice(
noneOf(...SPECIAL_CHARACTERS),
seq('\\', noneOf('\\s')),
),
repeat(choice(
noneOf(...SPECIAL_CHARACTERS),
seq('\\', noneOf('\\s')),
'\\ ',
)),
))),
_simple_variable_name: $ => alias(/\w+/, $.variable_name),
_multiline_variable_name: $ => alias(
token(prec(-1, /(\w|\\\r?\n)+/)),
$.variable_name,
),
_special_variable_name: $ => alias(choice('*', '@', '?', '!', '#', '-', '$', '0'), $.special_variable_name),
word: _ => token(seq(
choice(
noneOf('#', ...SPECIAL_CHARACTERS),
seq('\\', noneOf('\\s')),
),
repeat(choice(
noneOf(...SPECIAL_CHARACTERS),
seq('\\', noneOf('\\s')),
'\\ ',
)),
)),
_word_no_brace: _ => prec(2, token(seq(
choice(
noneOf('#', '{', '}', ...SPECIAL_CHARACTERS),
seq('\\', noneOf('\\s')),
),
repeat(choice(
noneOf('{', '}', ...SPECIAL_CHARACTERS),
seq('\\', noneOf('\\s')),
'\\ ',
)),
))),
terminator: _ => choice(';', ';;', /\n/),
},
});
/**
* Returns a regular expression that matches any character except the ones
* provided.
*
* @param {...string} characters
*
* @return {RegExp}
*
*/
function noneOf(...characters) {
const negatedString = characters.map(c => c == '\\' ? '\\\\' : c).join('');
return new RegExp('[^' + negatedString + ']');
}
/**
* Creates a rule to optionally match one or more of the rules separated by a comma
*
* @param {RuleOrLiteral} rule
*
* @return {ChoiceRule}
*
*/
function commaSep(rule) {
return optional(commaSep1(rule));
}
/**
* Creates a rule to match one or more of the rules separated by a comma
*
* @param {RuleOrLiteral} rule
*
* @return {SeqRule}
*
*/
function commaSep1(rule) {
return seq(rule, repeat(seq(',', rule)));
}
/**
*
* Turns a list of rules into a choice of immediate rule
*
* @param {(RegExp|String)[]} literals
*
* @return {ChoiceRule}
*/
function immediateLiterals(...literals) {
return choice(...literals.map(l => token.immediate(l)));
}
/**
*
* Turns a list of rules into a choice of aliased token rules
*
* @param {number} precedence
*
* @param {(RegExp|String)[]} literals
*
* @return {ChoiceRule}
*/
function tokenLiterals(precedence, ...literals) {
return choice(...literals.map(l => token(prec(precedence, l))));
}

View file

@ -1,53 +0,0 @@
{
"name": "tree-sitter-bash",
"version": "0.0.1",
"description": "Bash grammar for tree-sitter",
"repository": "github:tree-sitter/tree-sitter-bash",
"license": "MIT",
"main": "bindings/node",
"types": "bindings/node",
"keywords": [
"incremental",
"parsing",
"tree-sitter",
"bash"
],
"files": [
"grammar.js",
"binding.gyp",
"prebuilds/**",
"bindings/node/*",
"queries/*",
"src/**"
],
"dependencies": {
"node-addon-api": "^7.1.0",
"node-gyp-build": "^4.8.0"
},
"devDependencies": {
"prebuildify": "^6.0.0",
"tree-sitter-cli": "^0.22.6"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree-sitter": {
"optional": true
}
},
"scripts": {
"install": "node-gyp-build",
"prebuildify": "prebuildify --napi --strip",
"build": "tree-sitter generate --no-bindings",
"build-wasm": "tree-sitter build --wasm",
"test": "tree-sitter test",
"parse": "tree-sitter parse"
},
"tree-sitter": [
{
"scope": "source.bash",
"injection-regex": "^bash$"
}
]
}

View file

@ -1,954 +0,0 @@
#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"
#include "tree_sitter/parser.h"
#include <assert.h>
#include <ctype.h>
#include <string.h>
#include <wctype.h>
enum TokenType
{
FILE_DESCRIPTOR,
EMPTY_VALUE,
CONCAT,
VARIABLE_NAME,
REGEX,
EXPANSION_WORD,
EXTGLOB_PATTERN,
BARE_DOLLAR,
IMMEDIATE_DOUBLE_HASH,
// HEREDOC_ARROW,
// HEREDOC_ARROW_DASH,
NEWLINE,
OPENING_PAREN,
ERROR_RECOVERY,
};
typedef Array(char) String;
typedef struct Heredoc
{
bool is_raw;
bool started;
bool allows_indent;
String delimiter;
String current_leading_word;
} Heredoc;
#define heredoc_new() \
{ \
.is_raw = false, \
.started = false, \
.allows_indent = false, \
.delimiter = array_new(), \
.current_leading_word = array_new(), \
};
typedef struct Scanner
{
uint8_t last_glob_paren_depth;
bool ext_was_in_double_quote;
bool ext_saw_outside_quote;
Array(Heredoc) heredocs;
} Scanner;
static inline void advance(TSLexer *lexer)
{
lexer->advance(lexer, false);
}
static inline void skip(TSLexer *lexer)
{
lexer->advance(lexer, true);
}
static inline bool in_error_recovery(const bool *valid_symbols)
{
return valid_symbols[ERROR_RECOVERY];
}
static inline void reset_string(String *string)
{
if (string->size > 0)
{
memset(string->contents, 0, string->size);
array_clear(string);
}
}
static inline void reset_heredoc(Heredoc *heredoc)
{
heredoc->is_raw = false;
heredoc->started = false;
heredoc->allows_indent = false;
reset_string(&heredoc->delimiter);
}
static inline void reset(Scanner *scanner)
{
for (uint32_t i = 0; i < scanner->heredocs.size; i++)
{
reset_heredoc(array_get(&scanner->heredocs, i));
}
}
static unsigned serialize(Scanner *scanner, char *buffer)
{
uint32_t size = 0;
buffer[size++] = (char)scanner->last_glob_paren_depth;
buffer[size++] = (char)scanner->ext_was_in_double_quote;
buffer[size++] = (char)scanner->ext_saw_outside_quote;
buffer[size++] = (char)scanner->heredocs.size;
for (uint32_t i = 0; i < scanner->heredocs.size; i++)
{
Heredoc *heredoc = array_get(&scanner->heredocs, i);
if (heredoc->delimiter.size + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
{
return 0;
}
buffer[size++] = (char)heredoc->is_raw;
buffer[size++] = (char)heredoc->started;
buffer[size++] = (char)heredoc->allows_indent;
memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(uint32_t));
size += sizeof(uint32_t);
if (heredoc->delimiter.size > 0)
{
memcpy(&buffer[size], heredoc->delimiter.contents, heredoc->delimiter.size);
size += heredoc->delimiter.size;
}
}
return size;
}
static void deserialize(Scanner *scanner, const char *buffer, unsigned length)
{
if (length == 0)
{
reset(scanner);
}
else
{
uint32_t size = 0;
scanner->last_glob_paren_depth = buffer[size++];
scanner->ext_was_in_double_quote = buffer[size++];
scanner->ext_saw_outside_quote = buffer[size++];
uint32_t heredoc_count = (unsigned char)buffer[size++];
for (uint32_t i = 0; i < heredoc_count; i++)
{
Heredoc *heredoc = NULL;
if (i < scanner->heredocs.size)
{
heredoc = array_get(&scanner->heredocs, i);
}
else
{
Heredoc new_heredoc = heredoc_new();
array_push(&scanner->heredocs, new_heredoc);
heredoc = array_back(&scanner->heredocs);
}
heredoc->is_raw = buffer[size++];
heredoc->started = buffer[size++];
heredoc->allows_indent = buffer[size++];
memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t));
size += sizeof(uint32_t);
array_reserve(&heredoc->delimiter, heredoc->delimiter.size);
if (heredoc->delimiter.size > 0)
{
memcpy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size);
size += heredoc->delimiter.size;
}
}
assert(size == length);
}
}
/**
* Consume a "word" in POSIX parlance, and returns it unquoted.
*
* This is an approximate implementation that doesn't deal with any
* POSIX-mandated substitution, and assumes the default value for
* IFS.
*/
static bool advance_word(TSLexer *lexer, String *unquoted_word)
{
bool empty = true;
int32_t quote = 0;
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
{
quote = lexer->lookahead;
advance(lexer);
}
while (lexer->lookahead && !(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n' : iswspace(lexer->lookahead)))
{
if (lexer->lookahead == '\\')
{
advance(lexer);
if (!lexer->lookahead)
return false;
}
empty = false;
array_push(unquoted_word, lexer->lookahead);
advance(lexer);
}
array_push(unquoted_word, '\0');
if (quote && lexer->lookahead == quote)
advance(lexer);
return !empty;
}
static inline bool scan_bare_dollar(TSLexer *lexer)
{
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer))
skip(lexer);
if (lexer->lookahead == '$')
{
advance(lexer);
lexer->result_symbol = BARE_DOLLAR;
lexer->mark_end(lexer);
return (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"');
}
return false;
}
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
{
if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols))
{
if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' || lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' || lexer->lookahead == '|' || lexer->lookahead == '{' || lexer->lookahead == '}'))
{
lexer->result_symbol = CONCAT;
// So for a`b`, we want to return a concat. We check if the
// 2nd backtick has whitespace after it, and if it does we
// return concat.
if (lexer->lookahead == '`')
{
lexer->mark_end(lexer);
advance(lexer);
while (lexer->lookahead != '`' && !lexer->eof(lexer))
{
advance(lexer);
}
if (lexer->eof(lexer))
{
return false;
}
if (lexer->lookahead == '`')
{
advance(lexer);
}
return iswspace(lexer->lookahead) || lexer->eof(lexer);
}
// strings w/ expansions that contains escaped quotes or
// backslashes need this to return a concat
if (lexer->lookahead == '\\')
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\')
{
return true;
}
if (lexer->eof(lexer))
{
return false;
}
}
else
{
return true;
}
}
}
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols))
{
// advance two # and ensure not } after
if (lexer->lookahead == '#')
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '#')
{
advance(lexer);
if (lexer->lookahead != '}')
{
lexer->result_symbol = IMMEDIATE_DOUBLE_HASH;
lexer->mark_end(lexer);
return true;
}
}
}
}
if (valid_symbols[EMPTY_VALUE])
{
if (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&')
{
lexer->result_symbol = EMPTY_VALUE;
return true;
}
}
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR]) && !in_error_recovery(valid_symbols))
{
for (;;)
{
if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && !valid_symbols[EXPANSION_WORD])
{
skip(lexer);
}
else if (lexer->lookahead == '\\')
{
skip(lexer);
if (lexer->eof(lexer))
{
lexer->mark_end(lexer);
lexer->result_symbol = VARIABLE_NAME;
return true;
}
if (lexer->lookahead == '\r')
{
skip(lexer);
}
if (lexer->lookahead == '\n')
{
skip(lexer);
}
else
{
if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD])
{
goto expansion_word;
}
return false;
}
}
else
{
break;
}
}
// no '*', '@', '?', '-', '$', '0', '_'
if (!valid_symbols[EXPANSION_WORD] && (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_'))
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' || lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/')
{
return false;
}
if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead))
{
lexer->mark_end(lexer);
lexer->result_symbol = EXTGLOB_PATTERN;
return true;
}
}
bool is_number = true;
if (iswdigit(lexer->lookahead))
{
advance(lexer);
}
else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_')
{
is_number = false;
advance(lexer);
}
else
{
if (lexer->lookahead == '{')
{
goto brace_start;
}
if (valid_symbols[EXPANSION_WORD])
{
goto expansion_word;
}
if (valid_symbols[EXTGLOB_PATTERN])
{
goto extglob_pattern;
}
return false;
}
for (;;)
{
if (iswdigit(lexer->lookahead))
{
advance(lexer);
}
else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_')
{
is_number = false;
advance(lexer);
}
else
{
break;
}
}
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<'))
{
lexer->result_symbol = FILE_DESCRIPTOR;
return true;
}
if (valid_symbols[VARIABLE_NAME])
{
if (lexer->lookahead == '+')
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '=' || lexer->lookahead == ':')
{
lexer->result_symbol = VARIABLE_NAME;
return true;
}
return false;
}
if (lexer->lookahead == '/')
{
return false;
}
if (lexer->lookahead == '=' || lexer->lookahead == '[' || (lexer->lookahead == ':' && !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable
// names for function words, only handling : for now? #235
lexer->lookahead == '%' || (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-'))
{
lexer->mark_end(lexer);
lexer->result_symbol = VARIABLE_NAME;
return true;
}
if (lexer->lookahead == '?')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->result_symbol = VARIABLE_NAME;
return iswalpha(lexer->lookahead);
}
}
return false;
}
if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer))
{
return true;
}
if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols))
{
if (valid_symbols[REGEX])
{
while (iswspace(lexer->lookahead))
{
skip(lexer);
}
}
if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || ((lexer->lookahead == '$' || lexer->lookahead == '\'')) || (lexer->lookahead == '\''))
{
typedef struct
{
bool done;
bool advanced_once;
bool found_non_alnumdollarunderdash;
bool last_was_escape;
bool in_single_quote;
uint32_t paren_depth;
uint32_t bracket_depth;
uint32_t brace_depth;
} State;
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '(')
{
return false;
}
}
lexer->mark_end(lexer);
State state = {false, false, false, false, false, 0, 0, 0};
while (!state.done)
{
if (state.in_single_quote)
{
if (lexer->lookahead == '\'')
{
state.in_single_quote = false;
advance(lexer);
lexer->mark_end(lexer);
}
}
switch (lexer->lookahead)
{
case '\\':
state.last_was_escape = true;
break;
case '\0':
return false;
case '(':
state.paren_depth++;
state.last_was_escape = false;
break;
case '[':
state.bracket_depth++;
state.last_was_escape = false;
break;
case '{':
if (!state.last_was_escape)
state.brace_depth++;
state.last_was_escape = false;
break;
case ')':
if (state.paren_depth == 0)
state.done = true;
state.paren_depth--;
state.last_was_escape = false;
break;
case ']':
if (state.bracket_depth == 0)
state.done = true;
state.bracket_depth--;
state.last_was_escape = false;
break;
case '}':
if (state.brace_depth == 0)
state.done = true;
state.brace_depth--;
state.last_was_escape = false;
break;
case '\'':
// Enter or exit a single-quoted string.
state.in_single_quote = !state.in_single_quote;
advance(lexer);
state.advanced_once = true;
state.last_was_escape = false;
continue;
default:
state.last_was_escape = false;
break;
}
if (!state.done)
{
if (valid_symbols[REGEX])
{
bool was_space = !state.in_single_quote && iswspace(lexer->lookahead);
advance(lexer);
state.advanced_once = true;
if (!was_space || state.paren_depth > 0)
{
lexer->mark_end(lexer);
}
}
}
}
lexer->result_symbol = REGEX;
if (valid_symbols[REGEX] && !state.advanced_once)
{
return false;
}
return true;
}
}
extglob_pattern:
if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols))
{
// first skip ws, then check for ? * + @ !
while (iswspace(lexer->lookahead))
{
skip(lexer);
}
if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' || lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.' || lexer->lookahead == '[' || (iswalpha(lexer->lookahead)))
{
if (lexer->lookahead == '\\')
{
advance(lexer);
if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n')
{
advance(lexer);
}
else
{
return false;
}
}
if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
{
lexer->mark_end(lexer);
advance(lexer);
if (iswspace(lexer->lookahead))
{
return false;
}
}
lexer->mark_end(lexer);
bool was_non_alpha = !iswalpha(lexer->lookahead);
if (lexer->lookahead != '[')
{
// no esac
if (lexer->lookahead == 'e')
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == 's')
{
advance(lexer);
if (lexer->lookahead == 'a')
{
advance(lexer);
if (lexer->lookahead == 'c')
{
advance(lexer);
if (iswspace(lexer->lookahead))
{
return false;
}
}
}
}
}
else
{
advance(lexer);
}
}
// -\w is just a word, find something else special
if (lexer->lookahead == '-')
{
lexer->mark_end(lexer);
advance(lexer);
while (iswalnum(lexer->lookahead))
{
advance(lexer);
}
if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.')
{
return false;
}
lexer->mark_end(lexer);
}
// case item -) or *)
if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
{
lexer->mark_end(lexer);
advance(lexer);
if (iswspace(lexer->lookahead))
{
lexer->result_symbol = EXTGLOB_PATTERN;
return was_non_alpha;
}
}
if (iswspace(lexer->lookahead))
{
lexer->mark_end(lexer);
lexer->result_symbol = EXTGLOB_PATTERN;
scanner->last_glob_paren_depth = 0;
return true;
}
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '{' || lexer->lookahead == '(')
{
lexer->result_symbol = EXTGLOB_PATTERN;
return true;
}
}
if (lexer->lookahead == '|')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->result_symbol = EXTGLOB_PATTERN;
return true;
}
if (!iswalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && lexer->lookahead != '[' && lexer->lookahead != '?' && lexer->lookahead != '/' && lexer->lookahead != '\\' && lexer->lookahead != '_' && lexer->lookahead != '*')
{
return false;
}
typedef struct
{
bool done;
bool saw_non_alphadot;
uint32_t paren_depth;
uint32_t bracket_depth;
uint32_t brace_depth;
} State;
State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0};
while (!state.done)
{
switch (lexer->lookahead)
{
case '\0':
return false;
case '(':
state.paren_depth++;
break;
case '[':
state.bracket_depth++;
break;
case '{':
state.brace_depth++;
break;
case ')':
if (state.paren_depth == 0)
{
state.done = true;
}
state.paren_depth--;
break;
case ']':
if (state.bracket_depth == 0)
{
state.done = true;
}
state.bracket_depth--;
break;
case '}':
if (state.brace_depth == 0)
{
state.done = true;
}
state.brace_depth--;
break;
}
if (lexer->lookahead == '|')
{
lexer->mark_end(lexer);
advance(lexer);
if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0)
{
lexer->result_symbol = EXTGLOB_PATTERN;
return true;
}
}
if (!state.done)
{
bool was_space = iswspace(lexer->lookahead);
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
{
state.saw_non_alphadot = true;
}
advance(lexer);
if (lexer->lookahead == '(' || lexer->lookahead == '{')
{
lexer->result_symbol = EXTGLOB_PATTERN;
scanner->last_glob_paren_depth = state.paren_depth;
return state.saw_non_alphadot;
}
}
if (was_space)
{
lexer->mark_end(lexer);
lexer->result_symbol = EXTGLOB_PATTERN;
scanner->last_glob_paren_depth = 0;
return state.saw_non_alphadot;
}
if (lexer->lookahead == '"')
{
lexer->mark_end(lexer);
lexer->result_symbol = EXTGLOB_PATTERN;
scanner->last_glob_paren_depth = 0;
return state.saw_non_alphadot;
}
if (lexer->lookahead == '\\')
{
if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
{
state.saw_non_alphadot = true;
}
advance(lexer);
if (iswspace(lexer->lookahead) || lexer->lookahead == '"')
{
advance(lexer);
}
}
else
{
if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
{
state.saw_non_alphadot = true;
}
advance(lexer);
}
if (!was_space)
{
lexer->mark_end(lexer);
}
}
}
lexer->result_symbol = EXTGLOB_PATTERN;
scanner->last_glob_paren_depth = 0;
return state.saw_non_alphadot;
}
scanner->last_glob_paren_depth = 0;
return false;
}
expansion_word:
if (valid_symbols[EXPANSION_WORD])
{
bool advanced_once = false;
bool advance_once_space = false;
for (;;)
{
if (lexer->lookahead == '\"')
return false;
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || iswalnum(lexer->lookahead))
{
lexer->result_symbol = EXPANSION_WORD;
return advanced_once;
}
advanced_once = true;
}
if (lexer->lookahead == '}')
{
lexer->mark_end(lexer);
lexer->result_symbol = EXPANSION_WORD;
return advanced_once || advance_once_space;
}
if (lexer->lookahead == '(' && !(advanced_once || advance_once_space))
{
lexer->mark_end(lexer);
advance(lexer);
while (lexer->lookahead != ')' && !lexer->eof(lexer))
{
// if we find a $( or ${ assume this is valid and is
// a garbage concatenation of some weird word + an
// expansion
// I wonder where this can fail
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || iswalnum(lexer->lookahead))
{
lexer->result_symbol = EXPANSION_WORD;
return advanced_once;
}
advanced_once = true;
}
else
{
advanced_once = advanced_once || !iswspace(lexer->lookahead);
advance_once_space = advance_once_space || iswspace(lexer->lookahead);
advance(lexer);
}
}
lexer->mark_end(lexer);
if (lexer->lookahead == ')')
{
advanced_once = true;
advance(lexer);
lexer->mark_end(lexer);
if (lexer->lookahead == '}')
return false;
}
else
return false;
}
if (lexer->lookahead == '\'')
return false;
if (lexer->eof(lexer))
return false;
advanced_once = advanced_once || !iswspace(lexer->lookahead);
advance_once_space = advance_once_space || iswspace(lexer->lookahead);
advance(lexer);
}
}
brace_start:
return false;
}
void *tree_sitter_sh_external_scanner_create()
{
Scanner *scanner = calloc(1, sizeof(Scanner));
array_init(&scanner->heredocs);
return scanner;
}
bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols)
{
Scanner *scanner = (Scanner *)payload;
return scan(scanner, lexer, valid_symbols);
}
unsigned tree_sitter_sh_external_scanner_serialize(void *payload, char *state)
{
Scanner *scanner = (Scanner *)payload;
return serialize(scanner, state);
}
void tree_sitter_sh_external_scanner_deserialize(void *payload, const char *state, unsigned length)
{
Scanner *scanner = (Scanner *)payload;
deserialize(scanner, state, length);
}
void tree_sitter_sh_external_scanner_destroy(void *payload)
{
Scanner *scanner = (Scanner *)payload;
for (size_t i = 0; i < scanner->heredocs.size; i++)
{
Heredoc *heredoc = array_get(&scanner->heredocs, i);
array_delete(&heredoc->current_leading_word);
array_delete(&heredoc->delimiter);
}
array_delete(&scanner->heredocs);
free(scanner);
}