update: remove ts
This commit is contained in:
parent
142ac9c9e1
commit
a35eafd84c
3 changed files with 0 additions and 1613 deletions
|
|
@ -1,606 +0,0 @@
|
|||
/**
|
||||
* @file Bash grammar for tree-sitter
|
||||
* @author Max Brunsfeld <maxbrunsfeld@gmail.com>
|
||||
* @author Amaan Qureshi <amaanq12@gmail.com>
|
||||
* @license MIT
|
||||
*/
|
||||
|
||||
/// <reference types="tree-sitter-cli/dsl" />
|
||||
// @ts-check
|
||||
|
||||
const SPECIAL_CHARACTERS = [
|
||||
'|', '&', ';', '<', '>', '(', ')', '$', '`', '\\', '\"', '\'', ' ', '\t', '\n',
|
||||
]
|
||||
|
||||
const PREC = {
|
||||
UPDATE: 0,
|
||||
ASSIGN: 1,
|
||||
TERNARY: 2,
|
||||
LOGICAL_OR: 3,
|
||||
LOGICAL_AND: 4,
|
||||
BITWISE_OR: 5,
|
||||
BITWISE_XOR: 6,
|
||||
BITWISE_AND: 7,
|
||||
EQUALITY: 8,
|
||||
COMPARE: 9,
|
||||
TEST: 10,
|
||||
UNARY: 11,
|
||||
SHIFT: 12,
|
||||
ADD: 13,
|
||||
MULTIPLY: 14,
|
||||
EXPONENT: 15,
|
||||
NEGATE: 16,
|
||||
PREFIX: 17,
|
||||
POSTFIX: 18,
|
||||
};
|
||||
|
||||
module.exports = grammar({
|
||||
name: 'sh',
|
||||
|
||||
conflicts: $ => [
|
||||
[$.command, $._variable_assignments],
|
||||
[$.redirected_statement, $.command],
|
||||
[$.redirected_statement, $.command_substitution],
|
||||
[$._expansion_body, $._expansion_regex],
|
||||
[$.pipeline],
|
||||
],
|
||||
|
||||
inline: $ => [
|
||||
$._statement,
|
||||
$._literal,
|
||||
$._terminated_statement,
|
||||
$._primary_expression,
|
||||
$._simple_variable_name,
|
||||
$._multiline_variable_name,
|
||||
$._special_variable_name,
|
||||
$._statement_not_subshell,
|
||||
],
|
||||
|
||||
externals: $ => [
|
||||
$.file_descriptor,
|
||||
$._empty_value,
|
||||
$._concat,
|
||||
$.variable_name,
|
||||
$.regex,
|
||||
$._expansion_word,
|
||||
$.extglob_pattern,
|
||||
$._bare_dollar,
|
||||
$._immediate_double_hash,
|
||||
//'<<',
|
||||
/\n/,
|
||||
'(',
|
||||
$.__error_recovery,
|
||||
],
|
||||
|
||||
extras: $ => [
|
||||
$.comment,
|
||||
/\s/,
|
||||
/\\\r?\n/,
|
||||
/\\( |\t|\v|\f)/,
|
||||
],
|
||||
|
||||
word: $ => $.word,
|
||||
|
||||
rules: {
|
||||
program: $ => optional($._statements),
|
||||
|
||||
_statements: $ => prec(1, seq(
|
||||
repeat(seq(
|
||||
field('stmt', $._statement),
|
||||
field('term', $.terminator),
|
||||
)),
|
||||
field('stmt', $._statement),
|
||||
field('term', optional($.terminator)),
|
||||
)),
|
||||
|
||||
_terminated_statement: $ => repeat1(seq(
|
||||
field('stmt', $._statement),
|
||||
field('term', $.terminator)
|
||||
)),
|
||||
|
||||
// Statements
|
||||
|
||||
_statement: $ => choice(
|
||||
$._statement_not_subshell,
|
||||
$.subshell,
|
||||
),
|
||||
|
||||
_statement_not_subshell: $ => choice(
|
||||
// $.case_statement,
|
||||
$.command,
|
||||
$.compound_statement,
|
||||
// $.for_statement,
|
||||
// $.function_definition,
|
||||
// $.if_statement,
|
||||
$.list,
|
||||
$.negated_command,
|
||||
$.pipeline,
|
||||
$.redirected_statement,
|
||||
$.variable_assignment,
|
||||
$._variable_assignments,
|
||||
// $.while_statement,
|
||||
),
|
||||
|
||||
_statement_not_pipeline: $ => prec(1, choice(
|
||||
// $.case_statement,
|
||||
$.command,
|
||||
$.compound_statement,
|
||||
// $.for_statement,
|
||||
// $.function_definition,
|
||||
// $.if_statement,
|
||||
$.list,
|
||||
$.negated_command,
|
||||
$.redirected_statement,
|
||||
$.subshell,
|
||||
$.variable_assignment,
|
||||
$._variable_assignments,
|
||||
// $.while_statement,
|
||||
)),
|
||||
|
||||
redirected_statement: $ => prec.dynamic(-1, prec.right(-1, choice(
|
||||
seq(
|
||||
field('body', $._statement),
|
||||
field('redr', repeat1(choice($.file_redirect, $.heredoc_redirect))),
|
||||
),
|
||||
field('redr', repeat1($.file_redirect)),
|
||||
))),
|
||||
|
||||
/*
|
||||
for_statement: $ => seq(
|
||||
'for',
|
||||
field('var', $._simple_variable_name),
|
||||
optional(seq(
|
||||
'in',
|
||||
field('value', repeat1($._literal)),
|
||||
)),
|
||||
$.terminator,
|
||||
field('body', $.do_group),
|
||||
),
|
||||
|
||||
while_statement: $ => seq(
|
||||
choice('while', 'until'),
|
||||
field('cond', $._terminated_statement),
|
||||
field('body', $.do_group),
|
||||
),
|
||||
|
||||
do_group: $ => seq(
|
||||
'do',
|
||||
optional($._terminated_statement),
|
||||
'done',
|
||||
),
|
||||
|
||||
if_statement: $ => seq(
|
||||
'if',
|
||||
field('cond', $._terminated_statement),
|
||||
'then',
|
||||
field('body', optional($._terminated_statement)),
|
||||
field('elif', repeat($.elif_clause)),
|
||||
field('else', optional($.else_clause)),
|
||||
'fi',
|
||||
),
|
||||
|
||||
elif_clause: $ => seq(
|
||||
'elif',
|
||||
field('cond', $._terminated_statement),
|
||||
'then',
|
||||
field('body', optional($._terminated_statement)),
|
||||
),
|
||||
|
||||
else_clause: $ => seq(
|
||||
'else',
|
||||
field('body', optional($._terminated_statement)),
|
||||
),
|
||||
|
||||
case_statement: $ => seq(
|
||||
'case',
|
||||
field('value', $._literal),
|
||||
optional($.terminator),
|
||||
'in',
|
||||
optional($.terminator),
|
||||
optional(seq(
|
||||
repeat(field('cases', $.case_item)),
|
||||
field('cases', alias($._case_item_last, $.case_item))
|
||||
)),
|
||||
'esac',
|
||||
),
|
||||
|
||||
_case_item_last: $ => seq(
|
||||
optional('('),
|
||||
field('value', choice($._literal, $._extglob_blob)),
|
||||
repeat(seq('|', field('value', choice($._literal, $._extglob_blob)))),
|
||||
')',
|
||||
repeat('\n'),
|
||||
choice(field('body', $._statements),),
|
||||
optional(';;')
|
||||
),
|
||||
|
||||
case_item: $ => seq(
|
||||
optional('('),
|
||||
field('value', choice($._literal, $._extglob_blob)),
|
||||
repeat(seq('|', field('value', choice($._literal, $._extglob_blob)))),
|
||||
')',
|
||||
repeat('\n'),
|
||||
choice(field('body', $._statements)),
|
||||
';;'
|
||||
),
|
||||
|
||||
function_definition: $ => prec.right(seq(
|
||||
field('name', $.word),
|
||||
'(', ')',
|
||||
field('body', choice($.compound_statement, $.subshell, $.command, $.while_statement, $.if_statement, $.for_statement, $._variable_assignments, repeat1($.file_redirect))),
|
||||
)),
|
||||
*/
|
||||
|
||||
compound_statement: $ => seq('{', $._terminated_statement, '}'),
|
||||
subshell: $ => seq('(', $._statements, ')'),
|
||||
|
||||
pipeline: $ => prec.right(seq(
|
||||
$._statement_not_pipeline,
|
||||
repeat1(seq('|', $._statement_not_pipeline)),
|
||||
)),
|
||||
|
||||
list: $ => prec.left(-1, seq(
|
||||
field('cmd', $._statement),
|
||||
field('op', alias(choice('&&', '||'), $.operator)),
|
||||
field('cmd', $._statement),
|
||||
)),
|
||||
|
||||
// Commands
|
||||
|
||||
negated_command: $ => seq(
|
||||
'!',
|
||||
choice(
|
||||
prec(2, $.command),
|
||||
prec(1, $.variable_assignment),
|
||||
$.subshell,
|
||||
),
|
||||
),
|
||||
|
||||
command: $ => prec.left(seq(
|
||||
repeat(choice(
|
||||
$.variable_assignment,
|
||||
field('redr', $.file_redirect),
|
||||
)),
|
||||
field('name', $.command_name),
|
||||
repeat(choice(
|
||||
field('arg', $._literal),
|
||||
field('arg', alias($._bare_dollar, $.word)),
|
||||
)),
|
||||
)),
|
||||
|
||||
command_name: $ => $._literal,
|
||||
|
||||
variable_assignment: $ => seq(
|
||||
field('name', choice(
|
||||
$.variable_name,
|
||||
)),
|
||||
'=',
|
||||
field('value', choice(
|
||||
$._literal,
|
||||
$._empty_value,
|
||||
alias($._comment_word, $.word),
|
||||
)),
|
||||
),
|
||||
|
||||
_variable_assignments: $ => seq($.variable_assignment, repeat1($.variable_assignment)),
|
||||
|
||||
file_redirect: $ => prec.left(seq(
|
||||
field('op', alias(choice('<', '>', '>>'), $.operator)),
|
||||
field('dest', repeat1($._literal)),
|
||||
)),
|
||||
|
||||
heredoc_redirect: $ => seq(
|
||||
field('op', alias('<<', $.operator)),
|
||||
field('del', alias(/[\w\d\-\._]+/, $.heredoc_delimiter)),
|
||||
),
|
||||
|
||||
// Literals
|
||||
|
||||
_literal: $ => choice($.concatenation, $._primary_expression),
|
||||
|
||||
_primary_expression: $ => choice(
|
||||
$.word,
|
||||
$.string,
|
||||
$.raw_string,
|
||||
$.number,
|
||||
$.expansion,
|
||||
$.simple_expansion,
|
||||
$.command_substitution,
|
||||
$.arithmetic_expansion,
|
||||
),
|
||||
|
||||
arithmetic_expansion: $ => seq('$((', $._arithmetic_expression, '))'),
|
||||
|
||||
_arithmetic_expression: $ => prec(1, choice(
|
||||
$.arithmetic_literal,
|
||||
$.arithmetic_unary_expression,
|
||||
$.arithmetic_ternary_expression,
|
||||
$.arithmetic_binary_expression,
|
||||
$.arithmetic_postfix_expression,
|
||||
$.arithmetic_parenthesized_expression,
|
||||
)),
|
||||
|
||||
arithmetic_literal: $ => prec(1, choice(
|
||||
$.number,
|
||||
$.simple_expansion,
|
||||
$.expansion,
|
||||
$._simple_variable_name,
|
||||
$.variable_name,
|
||||
$.string,
|
||||
)),
|
||||
|
||||
arithmetic_binary_expression: $ => {
|
||||
|
||||
/** @type {[RuleOrLiteral, number][]} */
|
||||
const table = [
|
||||
[choice('+', '-'), PREC.ADD],
|
||||
[choice('*', '/', '%'), PREC.MULTIPLY],
|
||||
];
|
||||
|
||||
return choice(...table.map(([operator, precedence]) =>
|
||||
prec.left(precedence, seq(
|
||||
field('lhs', $._arithmetic_expression),
|
||||
field('op', alias(operator, $.operator)),
|
||||
field('rhs', $._arithmetic_expression),
|
||||
))
|
||||
));
|
||||
},
|
||||
|
||||
arithmetic_ternary_expression: $ => prec.left(PREC.TERNARY, seq(
|
||||
field('cond', $._arithmetic_expression),
|
||||
'?',
|
||||
field('then', $._arithmetic_expression),
|
||||
':',
|
||||
field('else', $._arithmetic_expression),
|
||||
)),
|
||||
|
||||
arithmetic_unary_expression: $ => prec(PREC.UNARY, seq(
|
||||
field('op', alias(tokenLiterals(1, '-', '+'), $.operator)),
|
||||
$._arithmetic_expression,
|
||||
)),
|
||||
|
||||
arithmetic_postfix_expression: $ => prec(PREC.POSTFIX, seq(
|
||||
$._arithmetic_expression,
|
||||
field('op', alias(choice('++', '--'), $.operator)),
|
||||
)),
|
||||
|
||||
arithmetic_parenthesized_expression: $ => seq('(', $._arithmetic_expression, ')'),
|
||||
|
||||
concatenation: $ => prec(-1, seq(
|
||||
$._primary_expression,
|
||||
repeat1(seq(
|
||||
choice($._concat, alias(/`\s*`/, '``')),
|
||||
choice(
|
||||
$._primary_expression,
|
||||
alias($._comment_word, $.word),
|
||||
alias($._bare_dollar, $.word),
|
||||
alias(/`\s*`/, '``')
|
||||
),
|
||||
)),
|
||||
optional(seq($._concat, alias('$', $.word))),
|
||||
)),
|
||||
|
||||
string: $ => seq(
|
||||
'"',
|
||||
repeat(seq(
|
||||
choice(
|
||||
seq(optional('$'), $.string_content),
|
||||
$.expansion,
|
||||
$.simple_expansion,
|
||||
$.command_substitution,
|
||||
$.arithmetic_expansion,
|
||||
),
|
||||
optional($._concat),
|
||||
)),
|
||||
optional(alias('$', $.string_content)),
|
||||
'"',
|
||||
),
|
||||
|
||||
string_content: _ => token(prec(-1, /([^"`$\\\r\n]|\\(.|\r?\n))+/)),
|
||||
|
||||
raw_string: _ => /'[^']*'/,
|
||||
|
||||
number: _ => /[0-9]+/,
|
||||
|
||||
simple_expansion: $ => seq(
|
||||
'$',
|
||||
choice(
|
||||
$._simple_variable_name,
|
||||
$._multiline_variable_name,
|
||||
$._special_variable_name,
|
||||
$.variable_name,
|
||||
alias('!', $.special_variable_name),
|
||||
alias('#', $.special_variable_name),
|
||||
),
|
||||
),
|
||||
|
||||
expansion: $ => seq(
|
||||
'${',
|
||||
optional($._expansion_body),
|
||||
'}',
|
||||
),
|
||||
|
||||
_expansion_body: $ => seq(
|
||||
field('len', optional(alias('#', $.operator))),
|
||||
field('name', choice($.variable_name, $._simple_variable_name, $._special_variable_name)),
|
||||
optional(choice($._expansion_expression, $._expansion_regex)),
|
||||
),
|
||||
|
||||
|
||||
_expansion_expression: $ => prec(1, seq(
|
||||
field('op', alias(immediateLiterals(':-', '-', ':=', '=', ':?', '?', ':+', '+'), $.operator)),
|
||||
field('args', optional(choice(
|
||||
alias($._concatenation_in_expansion, $.concatenation),
|
||||
alias(prec(1, $._word_no_brace), $.word2),
|
||||
$.expansion,
|
||||
$.raw_string,
|
||||
$.string,
|
||||
))),
|
||||
)),
|
||||
|
||||
_expansion_regex: $ => seq(
|
||||
field('op', alias(choice('#', $._immediate_double_hash, '%', '%%'), $.operator)),
|
||||
field('args', repeat(choice(
|
||||
$.raw_string,
|
||||
$.regex,
|
||||
$.string,
|
||||
alias(')', $.regex),
|
||||
alias(/\s+/, $.regex),
|
||||
))),
|
||||
),
|
||||
|
||||
|
||||
_concatenation_in_expansion: $ => prec(-2, seq(
|
||||
choice(
|
||||
alias($._word_no_brace, $.word),
|
||||
alias($._expansion_word, $.word),
|
||||
$.variable_name,
|
||||
$.simple_expansion,
|
||||
$.expansion,
|
||||
$.string,
|
||||
$.raw_string,
|
||||
$.command_substitution,
|
||||
),
|
||||
repeat1(seq(
|
||||
choice($._concat, alias(/`\s*`/, '``')),
|
||||
choice(
|
||||
alias($._word_no_brace, $.word),
|
||||
alias($._expansion_word, $.word),
|
||||
$.variable_name,
|
||||
$.simple_expansion,
|
||||
$.expansion,
|
||||
$.string,
|
||||
$.raw_string,
|
||||
$.command_substitution,
|
||||
),
|
||||
)),
|
||||
)),
|
||||
|
||||
command_substitution: $ => choice(
|
||||
seq('$(', $._statements, ')'),
|
||||
seq('$(', field('redr', $.file_redirect), ')'),
|
||||
prec(1, seq('`', $._statements, '`')),
|
||||
),
|
||||
|
||||
_extglob_blob: $ => choice(
|
||||
$.extglob_pattern,
|
||||
seq(
|
||||
$.extglob_pattern,
|
||||
choice($.string, $.expansion, $.command_substitution),
|
||||
optional($.extglob_pattern),
|
||||
),
|
||||
),
|
||||
|
||||
comment: _ => token(prec(-10, /#.*/)),
|
||||
|
||||
_comment_word: _ => token(prec(-8, seq(
|
||||
choice(
|
||||
noneOf(...SPECIAL_CHARACTERS),
|
||||
seq('\\', noneOf('\\s')),
|
||||
),
|
||||
repeat(choice(
|
||||
noneOf(...SPECIAL_CHARACTERS),
|
||||
seq('\\', noneOf('\\s')),
|
||||
'\\ ',
|
||||
)),
|
||||
))),
|
||||
|
||||
_simple_variable_name: $ => alias(/\w+/, $.variable_name),
|
||||
_multiline_variable_name: $ => alias(
|
||||
token(prec(-1, /(\w|\\\r?\n)+/)),
|
||||
$.variable_name,
|
||||
),
|
||||
|
||||
_special_variable_name: $ => alias(choice('*', '@', '?', '!', '#', '-', '$', '0'), $.special_variable_name),
|
||||
|
||||
word: _ => token(seq(
|
||||
choice(
|
||||
noneOf('#', ...SPECIAL_CHARACTERS),
|
||||
seq('\\', noneOf('\\s')),
|
||||
),
|
||||
repeat(choice(
|
||||
noneOf(...SPECIAL_CHARACTERS),
|
||||
seq('\\', noneOf('\\s')),
|
||||
'\\ ',
|
||||
)),
|
||||
)),
|
||||
|
||||
|
||||
_word_no_brace: _ => prec(2, token(seq(
|
||||
choice(
|
||||
noneOf('#', '{', '}', ...SPECIAL_CHARACTERS),
|
||||
seq('\\', noneOf('\\s')),
|
||||
),
|
||||
repeat(choice(
|
||||
noneOf('{', '}', ...SPECIAL_CHARACTERS),
|
||||
seq('\\', noneOf('\\s')),
|
||||
'\\ ',
|
||||
)),
|
||||
))),
|
||||
terminator: _ => choice(';', ';;', /\n/),
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Returns a regular expression that matches any character except the ones
|
||||
* provided.
|
||||
*
|
||||
* @param {...string} characters
|
||||
*
|
||||
* @return {RegExp}
|
||||
*
|
||||
*/
|
||||
function noneOf(...characters) {
|
||||
const negatedString = characters.map(c => c == '\\' ? '\\\\' : c).join('');
|
||||
return new RegExp('[^' + negatedString + ']');
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a rule to optionally match one or more of the rules separated by a comma
|
||||
*
|
||||
* @param {RuleOrLiteral} rule
|
||||
*
|
||||
* @return {ChoiceRule}
|
||||
*
|
||||
*/
|
||||
function commaSep(rule) {
|
||||
return optional(commaSep1(rule));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a rule to match one or more of the rules separated by a comma
|
||||
*
|
||||
* @param {RuleOrLiteral} rule
|
||||
*
|
||||
* @return {SeqRule}
|
||||
*
|
||||
*/
|
||||
function commaSep1(rule) {
|
||||
return seq(rule, repeat(seq(',', rule)));
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Turns a list of rules into a choice of immediate rule
|
||||
*
|
||||
* @param {(RegExp|String)[]} literals
|
||||
*
|
||||
* @return {ChoiceRule}
|
||||
*/
|
||||
function immediateLiterals(...literals) {
|
||||
return choice(...literals.map(l => token.immediate(l)));
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Turns a list of rules into a choice of aliased token rules
|
||||
*
|
||||
* @param {number} precedence
|
||||
*
|
||||
* @param {(RegExp|String)[]} literals
|
||||
*
|
||||
* @return {ChoiceRule}
|
||||
*/
|
||||
function tokenLiterals(precedence, ...literals) {
|
||||
return choice(...literals.map(l => token(prec(precedence, l))));
|
||||
}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
{
|
||||
"name": "tree-sitter-bash",
|
||||
"version": "0.0.1",
|
||||
"description": "Bash grammar for tree-sitter",
|
||||
"repository": "github:tree-sitter/tree-sitter-bash",
|
||||
"license": "MIT",
|
||||
"main": "bindings/node",
|
||||
"types": "bindings/node",
|
||||
"keywords": [
|
||||
"incremental",
|
||||
"parsing",
|
||||
"tree-sitter",
|
||||
"bash"
|
||||
],
|
||||
"files": [
|
||||
"grammar.js",
|
||||
"binding.gyp",
|
||||
"prebuilds/**",
|
||||
"bindings/node/*",
|
||||
"queries/*",
|
||||
"src/**"
|
||||
],
|
||||
"dependencies": {
|
||||
"node-addon-api": "^7.1.0",
|
||||
"node-gyp-build": "^4.8.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"prebuildify": "^6.0.0",
|
||||
"tree-sitter-cli": "^0.22.6"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"tree-sitter": "^0.21.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"tree-sitter": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"install": "node-gyp-build",
|
||||
"prebuildify": "prebuildify --napi --strip",
|
||||
"build": "tree-sitter generate --no-bindings",
|
||||
"build-wasm": "tree-sitter build --wasm",
|
||||
"test": "tree-sitter test",
|
||||
"parse": "tree-sitter parse"
|
||||
},
|
||||
"tree-sitter": [
|
||||
{
|
||||
"scope": "source.bash",
|
||||
"injection-regex": "^bash$"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,954 +0,0 @@
|
|||
#include "tree_sitter/alloc.h"
|
||||
#include "tree_sitter/array.h"
|
||||
#include "tree_sitter/parser.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <wctype.h>
|
||||
|
||||
enum TokenType
|
||||
{
|
||||
FILE_DESCRIPTOR,
|
||||
EMPTY_VALUE,
|
||||
CONCAT,
|
||||
VARIABLE_NAME,
|
||||
REGEX,
|
||||
EXPANSION_WORD,
|
||||
EXTGLOB_PATTERN,
|
||||
BARE_DOLLAR,
|
||||
IMMEDIATE_DOUBLE_HASH,
|
||||
// HEREDOC_ARROW,
|
||||
// HEREDOC_ARROW_DASH,
|
||||
NEWLINE,
|
||||
OPENING_PAREN,
|
||||
ERROR_RECOVERY,
|
||||
};
|
||||
|
||||
typedef Array(char) String;
|
||||
|
||||
typedef struct Heredoc
|
||||
{
|
||||
bool is_raw;
|
||||
bool started;
|
||||
bool allows_indent;
|
||||
String delimiter;
|
||||
String current_leading_word;
|
||||
} Heredoc;
|
||||
|
||||
#define heredoc_new() \
|
||||
{ \
|
||||
.is_raw = false, \
|
||||
.started = false, \
|
||||
.allows_indent = false, \
|
||||
.delimiter = array_new(), \
|
||||
.current_leading_word = array_new(), \
|
||||
};
|
||||
|
||||
typedef struct Scanner
|
||||
{
|
||||
uint8_t last_glob_paren_depth;
|
||||
bool ext_was_in_double_quote;
|
||||
bool ext_saw_outside_quote;
|
||||
Array(Heredoc) heredocs;
|
||||
} Scanner;
|
||||
|
||||
static inline void advance(TSLexer *lexer)
|
||||
{
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
static inline void skip(TSLexer *lexer)
|
||||
{
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
static inline bool in_error_recovery(const bool *valid_symbols)
|
||||
{
|
||||
return valid_symbols[ERROR_RECOVERY];
|
||||
}
|
||||
|
||||
static inline void reset_string(String *string)
|
||||
{
|
||||
if (string->size > 0)
|
||||
{
|
||||
memset(string->contents, 0, string->size);
|
||||
array_clear(string);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void reset_heredoc(Heredoc *heredoc)
|
||||
{
|
||||
heredoc->is_raw = false;
|
||||
heredoc->started = false;
|
||||
heredoc->allows_indent = false;
|
||||
reset_string(&heredoc->delimiter);
|
||||
}
|
||||
|
||||
static inline void reset(Scanner *scanner)
|
||||
{
|
||||
for (uint32_t i = 0; i < scanner->heredocs.size; i++)
|
||||
{
|
||||
reset_heredoc(array_get(&scanner->heredocs, i));
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned serialize(Scanner *scanner, char *buffer)
|
||||
{
|
||||
uint32_t size = 0;
|
||||
|
||||
buffer[size++] = (char)scanner->last_glob_paren_depth;
|
||||
buffer[size++] = (char)scanner->ext_was_in_double_quote;
|
||||
buffer[size++] = (char)scanner->ext_saw_outside_quote;
|
||||
buffer[size++] = (char)scanner->heredocs.size;
|
||||
|
||||
for (uint32_t i = 0; i < scanner->heredocs.size; i++)
|
||||
{
|
||||
Heredoc *heredoc = array_get(&scanner->heredocs, i);
|
||||
if (heredoc->delimiter.size + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
buffer[size++] = (char)heredoc->is_raw;
|
||||
buffer[size++] = (char)heredoc->started;
|
||||
buffer[size++] = (char)heredoc->allows_indent;
|
||||
|
||||
memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(uint32_t));
|
||||
size += sizeof(uint32_t);
|
||||
if (heredoc->delimiter.size > 0)
|
||||
{
|
||||
memcpy(&buffer[size], heredoc->delimiter.contents, heredoc->delimiter.size);
|
||||
size += heredoc->delimiter.size;
|
||||
}
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static void deserialize(Scanner *scanner, const char *buffer, unsigned length)
|
||||
{
|
||||
if (length == 0)
|
||||
{
|
||||
reset(scanner);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t size = 0;
|
||||
scanner->last_glob_paren_depth = buffer[size++];
|
||||
scanner->ext_was_in_double_quote = buffer[size++];
|
||||
scanner->ext_saw_outside_quote = buffer[size++];
|
||||
uint32_t heredoc_count = (unsigned char)buffer[size++];
|
||||
for (uint32_t i = 0; i < heredoc_count; i++)
|
||||
{
|
||||
Heredoc *heredoc = NULL;
|
||||
if (i < scanner->heredocs.size)
|
||||
{
|
||||
heredoc = array_get(&scanner->heredocs, i);
|
||||
}
|
||||
else
|
||||
{
|
||||
Heredoc new_heredoc = heredoc_new();
|
||||
array_push(&scanner->heredocs, new_heredoc);
|
||||
heredoc = array_back(&scanner->heredocs);
|
||||
}
|
||||
|
||||
heredoc->is_raw = buffer[size++];
|
||||
heredoc->started = buffer[size++];
|
||||
heredoc->allows_indent = buffer[size++];
|
||||
|
||||
memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t));
|
||||
size += sizeof(uint32_t);
|
||||
array_reserve(&heredoc->delimiter, heredoc->delimiter.size);
|
||||
|
||||
if (heredoc->delimiter.size > 0)
|
||||
{
|
||||
memcpy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size);
|
||||
size += heredoc->delimiter.size;
|
||||
}
|
||||
}
|
||||
assert(size == length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a "word" in POSIX parlance, and returns it unquoted.
|
||||
*
|
||||
* This is an approximate implementation that doesn't deal with any
|
||||
* POSIX-mandated substitution, and assumes the default value for
|
||||
* IFS.
|
||||
*/
|
||||
static bool advance_word(TSLexer *lexer, String *unquoted_word)
|
||||
{
|
||||
bool empty = true;
|
||||
int32_t quote = 0;
|
||||
|
||||
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
|
||||
{
|
||||
quote = lexer->lookahead;
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
while (lexer->lookahead && !(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n' : iswspace(lexer->lookahead)))
|
||||
{
|
||||
if (lexer->lookahead == '\\')
|
||||
{
|
||||
advance(lexer);
|
||||
if (!lexer->lookahead)
|
||||
return false;
|
||||
}
|
||||
empty = false;
|
||||
array_push(unquoted_word, lexer->lookahead);
|
||||
advance(lexer);
|
||||
}
|
||||
array_push(unquoted_word, '\0');
|
||||
|
||||
if (quote && lexer->lookahead == quote)
|
||||
advance(lexer);
|
||||
|
||||
return !empty;
|
||||
}
|
||||
|
||||
static inline bool scan_bare_dollar(TSLexer *lexer)
|
||||
{
|
||||
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer))
|
||||
skip(lexer);
|
||||
|
||||
if (lexer->lookahead == '$')
|
||||
{
|
||||
advance(lexer);
|
||||
lexer->result_symbol = BARE_DOLLAR;
|
||||
lexer->mark_end(lexer);
|
||||
return (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"');
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols))
|
||||
{
|
||||
if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' || lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' || lexer->lookahead == '|' || lexer->lookahead == '{' || lexer->lookahead == '}'))
|
||||
{
|
||||
lexer->result_symbol = CONCAT;
|
||||
// So for a`b`, we want to return a concat. We check if the
|
||||
// 2nd backtick has whitespace after it, and if it does we
|
||||
// return concat.
|
||||
if (lexer->lookahead == '`')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
while (lexer->lookahead != '`' && !lexer->eof(lexer))
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
if (lexer->eof(lexer))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (lexer->lookahead == '`')
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
return iswspace(lexer->lookahead) || lexer->eof(lexer);
|
||||
}
|
||||
// strings w/ expansions that contains escaped quotes or
|
||||
// backslashes need this to return a concat
|
||||
if (lexer->lookahead == '\\')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if (lexer->eof(lexer))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols))
|
||||
{
|
||||
// advance two # and ensure not } after
|
||||
if (lexer->lookahead == '#')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '#')
|
||||
{
|
||||
advance(lexer);
|
||||
if (lexer->lookahead != '}')
|
||||
{
|
||||
lexer->result_symbol = IMMEDIATE_DOUBLE_HASH;
|
||||
lexer->mark_end(lexer);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[EMPTY_VALUE])
|
||||
{
|
||||
if (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&')
|
||||
{
|
||||
lexer->result_symbol = EMPTY_VALUE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR]) && !in_error_recovery(valid_symbols))
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && !valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
skip(lexer);
|
||||
}
|
||||
else if (lexer->lookahead == '\\')
|
||||
{
|
||||
skip(lexer);
|
||||
|
||||
if (lexer->eof(lexer))
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '\r')
|
||||
{
|
||||
skip(lexer);
|
||||
}
|
||||
if (lexer->lookahead == '\n')
|
||||
{
|
||||
skip(lexer);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
goto expansion_word;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// no '*', '@', '?', '-', '$', '0', '_'
|
||||
if (!valid_symbols[EXPANSION_WORD] && (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_'))
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' || lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead))
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_number = true;
|
||||
if (iswdigit(lexer->lookahead))
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_')
|
||||
{
|
||||
is_number = false;
|
||||
advance(lexer);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lexer->lookahead == '{')
|
||||
{
|
||||
goto brace_start;
|
||||
}
|
||||
if (valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
goto expansion_word;
|
||||
}
|
||||
if (valid_symbols[EXTGLOB_PATTERN])
|
||||
{
|
||||
goto extglob_pattern;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (iswdigit(lexer->lookahead))
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_')
|
||||
{
|
||||
is_number = false;
|
||||
advance(lexer);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<'))
|
||||
{
|
||||
lexer->result_symbol = FILE_DESCRIPTOR;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (valid_symbols[VARIABLE_NAME])
|
||||
{
|
||||
if (lexer->lookahead == '+')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '=' || lexer->lookahead == ':')
|
||||
{
|
||||
lexer->result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (lexer->lookahead == '/')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (lexer->lookahead == '=' || lexer->lookahead == '[' || (lexer->lookahead == ':' && !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable
|
||||
// names for function words, only handling : for now? #235
|
||||
lexer->lookahead == '%' || (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-'))
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '?')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
lexer->result_symbol = VARIABLE_NAME;
|
||||
return iswalpha(lexer->lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols))
|
||||
{
|
||||
if (valid_symbols[REGEX])
|
||||
{
|
||||
while (iswspace(lexer->lookahead))
|
||||
{
|
||||
skip(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || ((lexer->lookahead == '$' || lexer->lookahead == '\'')) || (lexer->lookahead == '\''))
|
||||
{
|
||||
typedef struct
|
||||
{
|
||||
bool done;
|
||||
bool advanced_once;
|
||||
bool found_non_alnumdollarunderdash;
|
||||
bool last_was_escape;
|
||||
bool in_single_quote;
|
||||
uint32_t paren_depth;
|
||||
uint32_t bracket_depth;
|
||||
uint32_t brace_depth;
|
||||
} State;
|
||||
|
||||
if (lexer->lookahead == '$')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '(')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
State state = {false, false, false, false, false, 0, 0, 0};
|
||||
while (!state.done)
|
||||
{
|
||||
if (state.in_single_quote)
|
||||
{
|
||||
if (lexer->lookahead == '\'')
|
||||
{
|
||||
state.in_single_quote = false;
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
switch (lexer->lookahead)
|
||||
{
|
||||
case '\\':
|
||||
state.last_was_escape = true;
|
||||
break;
|
||||
case '\0':
|
||||
return false;
|
||||
case '(':
|
||||
state.paren_depth++;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case '[':
|
||||
state.bracket_depth++;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case '{':
|
||||
if (!state.last_was_escape)
|
||||
state.brace_depth++;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case ')':
|
||||
if (state.paren_depth == 0)
|
||||
state.done = true;
|
||||
state.paren_depth--;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case ']':
|
||||
if (state.bracket_depth == 0)
|
||||
state.done = true;
|
||||
state.bracket_depth--;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case '}':
|
||||
if (state.brace_depth == 0)
|
||||
state.done = true;
|
||||
state.brace_depth--;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case '\'':
|
||||
// Enter or exit a single-quoted string.
|
||||
state.in_single_quote = !state.in_single_quote;
|
||||
advance(lexer);
|
||||
state.advanced_once = true;
|
||||
state.last_was_escape = false;
|
||||
continue;
|
||||
default:
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!state.done)
|
||||
{
|
||||
if (valid_symbols[REGEX])
|
||||
{
|
||||
bool was_space = !state.in_single_quote && iswspace(lexer->lookahead);
|
||||
advance(lexer);
|
||||
state.advanced_once = true;
|
||||
if (!was_space || state.paren_depth > 0)
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lexer->result_symbol = REGEX;
|
||||
if (valid_symbols[REGEX] && !state.advanced_once)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
extglob_pattern:
|
||||
if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols))
|
||||
{
|
||||
// first skip ws, then check for ? * + @ !
|
||||
while (iswspace(lexer->lookahead))
|
||||
{
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' || lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.' || lexer->lookahead == '[' || (iswalpha(lexer->lookahead)))
|
||||
{
|
||||
if (lexer->lookahead == '\\')
|
||||
{
|
||||
advance(lexer);
|
||||
if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n')
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
|
||||
if (iswspace(lexer->lookahead))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
bool was_non_alpha = !iswalpha(lexer->lookahead);
|
||||
if (lexer->lookahead != '[')
|
||||
{
|
||||
// no esac
|
||||
if (lexer->lookahead == 'e')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == 's')
|
||||
{
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == 'a')
|
||||
{
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == 'c')
|
||||
{
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
// -\w is just a word, find something else special
|
||||
if (lexer->lookahead == '-')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
while (iswalnum(lexer->lookahead))
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
|
||||
// case item -) or *)
|
||||
if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
{
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
return was_non_alpha;
|
||||
}
|
||||
}
|
||||
|
||||
if (iswspace(lexer->lookahead))
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
scanner->last_glob_paren_depth = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '$')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '{' || lexer->lookahead == '(')
|
||||
{
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '|')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!iswalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && lexer->lookahead != '[' && lexer->lookahead != '?' && lexer->lookahead != '/' && lexer->lookahead != '\\' && lexer->lookahead != '_' && lexer->lookahead != '*')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool done;
|
||||
bool saw_non_alphadot;
|
||||
uint32_t paren_depth;
|
||||
uint32_t bracket_depth;
|
||||
uint32_t brace_depth;
|
||||
} State;
|
||||
|
||||
State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0};
|
||||
while (!state.done)
|
||||
{
|
||||
switch (lexer->lookahead)
|
||||
{
|
||||
case '\0':
|
||||
return false;
|
||||
case '(':
|
||||
state.paren_depth++;
|
||||
break;
|
||||
case '[':
|
||||
state.bracket_depth++;
|
||||
break;
|
||||
case '{':
|
||||
state.brace_depth++;
|
||||
break;
|
||||
case ')':
|
||||
if (state.paren_depth == 0)
|
||||
{
|
||||
state.done = true;
|
||||
}
|
||||
state.paren_depth--;
|
||||
break;
|
||||
case ']':
|
||||
if (state.bracket_depth == 0)
|
||||
{
|
||||
state.done = true;
|
||||
}
|
||||
state.bracket_depth--;
|
||||
break;
|
||||
case '}':
|
||||
if (state.brace_depth == 0)
|
||||
{
|
||||
state.done = true;
|
||||
}
|
||||
state.brace_depth--;
|
||||
break;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '|')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0)
|
||||
{
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!state.done)
|
||||
{
|
||||
bool was_space = iswspace(lexer->lookahead);
|
||||
if (lexer->lookahead == '$')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
||||
{
|
||||
state.saw_non_alphadot = true;
|
||||
}
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '(' || lexer->lookahead == '{')
|
||||
{
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
scanner->last_glob_paren_depth = state.paren_depth;
|
||||
return state.saw_non_alphadot;
|
||||
}
|
||||
}
|
||||
if (was_space)
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
scanner->last_glob_paren_depth = 0;
|
||||
return state.saw_non_alphadot;
|
||||
}
|
||||
if (lexer->lookahead == '"')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
scanner->last_glob_paren_depth = 0;
|
||||
return state.saw_non_alphadot;
|
||||
}
|
||||
if (lexer->lookahead == '\\')
|
||||
{
|
||||
if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
||||
{
|
||||
state.saw_non_alphadot = true;
|
||||
}
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead) || lexer->lookahead == '"')
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!iswalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
||||
{
|
||||
state.saw_non_alphadot = true;
|
||||
}
|
||||
advance(lexer);
|
||||
}
|
||||
if (!was_space)
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||
scanner->last_glob_paren_depth = 0;
|
||||
return state.saw_non_alphadot;
|
||||
}
|
||||
scanner->last_glob_paren_depth = 0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
expansion_word:
|
||||
if (valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
bool advanced_once = false;
|
||||
bool advance_once_space = false;
|
||||
for (;;)
|
||||
{
|
||||
if (lexer->lookahead == '\"')
|
||||
return false;
|
||||
if (lexer->lookahead == '$')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || iswalnum(lexer->lookahead))
|
||||
{
|
||||
lexer->result_symbol = EXPANSION_WORD;
|
||||
return advanced_once;
|
||||
}
|
||||
advanced_once = true;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '}')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = EXPANSION_WORD;
|
||||
return advanced_once || advance_once_space;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '(' && !(advanced_once || advance_once_space))
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
while (lexer->lookahead != ')' && !lexer->eof(lexer))
|
||||
{
|
||||
// if we find a $( or ${ assume this is valid and is
|
||||
// a garbage concatenation of some weird word + an
|
||||
// expansion
|
||||
// I wonder where this can fail
|
||||
if (lexer->lookahead == '$')
|
||||
{
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || iswalnum(lexer->lookahead))
|
||||
{
|
||||
lexer->result_symbol = EXPANSION_WORD;
|
||||
return advanced_once;
|
||||
}
|
||||
advanced_once = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
advanced_once = advanced_once || !iswspace(lexer->lookahead);
|
||||
advance_once_space = advance_once_space || iswspace(lexer->lookahead);
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
if (lexer->lookahead == ')')
|
||||
{
|
||||
advanced_once = true;
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
if (lexer->lookahead == '}')
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '\'')
|
||||
return false;
|
||||
if (lexer->eof(lexer))
|
||||
return false;
|
||||
advanced_once = advanced_once || !iswspace(lexer->lookahead);
|
||||
advance_once_space = advance_once_space || iswspace(lexer->lookahead);
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
brace_start:
|
||||
return false;
|
||||
}
|
||||
|
||||
void *tree_sitter_sh_external_scanner_create()
|
||||
{
|
||||
Scanner *scanner = calloc(1, sizeof(Scanner));
|
||||
array_init(&scanner->heredocs);
|
||||
return scanner;
|
||||
}
|
||||
|
||||
bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
return scan(scanner, lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_sh_external_scanner_serialize(void *payload, char *state)
|
||||
{
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
return serialize(scanner, state);
|
||||
}
|
||||
|
||||
void tree_sitter_sh_external_scanner_deserialize(void *payload, const char *state, unsigned length)
|
||||
{
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
deserialize(scanner, state, length);
|
||||
}
|
||||
|
||||
void tree_sitter_sh_external_scanner_destroy(void *payload)
|
||||
{
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
for (size_t i = 0; i < scanner->heredocs.size; i++)
|
||||
{
|
||||
Heredoc *heredoc = array_get(&scanner->heredocs, i);
|
||||
array_delete(&heredoc->current_leading_word);
|
||||
array_delete(&heredoc->delimiter);
|
||||
}
|
||||
array_delete(&scanner->heredocs);
|
||||
free(scanner);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue