update: scanner is way more normed now :D

This commit is contained in:
maix0 2024-09-14 11:39:53 +00:00
parent 51c532991d
commit 5fb84a3a82
2 changed files with 364 additions and 328 deletions

View file

@ -6,7 +6,7 @@
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */ /* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/10 15:41:11 by rparodi #+# #+# */ /* Created: 2024/09/10 15:41:11 by rparodi #+# #+# */
/* Updated: 2024/09/14 13:09:27 by rparodi ### ########.fr */ /* Updated: 2024/09/14 11:38:03 by maiboyer ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */
@ -42,8 +42,8 @@ t_u32 serialize(t_scanner *scanner, t_u8 *buffer)
while (i < scanner->heredocs.len) while (i < scanner->heredocs.len)
{ {
heredoc = vec_heredoc_get(&scanner->heredocs, i); heredoc = vec_heredoc_get(&scanner->heredocs, i);
if (heredoc->delimiter.len + 3 + size >= \ if (heredoc->delimiter.len + 3
TREE_SITTER_SERIALIZATION_BUFFER_SIZE) + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
return (0); return (0);
buffer[size++] = (char)heredoc->is_raw; buffer[size++] = (char)heredoc->is_raw;
buffer[size++] = (char)heredoc->started; buffer[size++] = (char)heredoc->started;
@ -53,7 +53,7 @@ t_u32 serialize(t_scanner *scanner, t_u8 *buffer)
size += sizeof(t_usize); size += sizeof(t_usize);
if (heredoc->delimiter.len > 0) if (heredoc->delimiter.len > 0)
{ {
mem_copy(&buffer[size], heredoc->delimiter.buf, \ mem_copy(&buffer[size], heredoc->delimiter.buf,
heredoc->delimiter.len); heredoc->delimiter.len);
size += heredoc->delimiter.len; size += heredoc->delimiter.len;
} }
@ -102,8 +102,8 @@ void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
string_reserve(&heredoc->delimiter, heredoc->delimiter.len); string_reserve(&heredoc->delimiter, heredoc->delimiter.len);
if (heredoc->delimiter.len > 0) if (heredoc->delimiter.len > 0)
{ {
mem_copy(heredoc->delimiter.buf, &buffer[size], \ mem_copy(heredoc->delimiter.buf, &buffer[size],
heredoc->delimiter.len); heredoc->delimiter.len);
size += heredoc->delimiter.len; size += heredoc->delimiter.len;
} }
i++; i++;
@ -132,9 +132,10 @@ bool advance_word(t_lexer *lexer, t_string *unquoted_word)
quote = lexer->data.lookahead; quote = lexer->data.lookahead;
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
} }
while (lexer->data.lookahead && \ while (lexer->data.lookahead && !((quote && (lexer->data.lookahead == quote
!(quote ? lexer->data.lookahead == quote || lexer->data.lookahead == '\r' \ || lexer->data.lookahead == '\r'
|| lexer->data.lookahead == '\n' : me_isspace(lexer->data.lookahead))) || lexer->data.lookahead == '\n')) || (!quote
&& me_isspace(lexer->data.lookahead))))
{ {
if (lexer->data.lookahead == '\\') if (lexer->data.lookahead == '\\')
{ {
@ -154,16 +155,16 @@ bool advance_word(t_lexer *lexer, t_string *unquoted_word)
bool scan_bare_dollar(t_lexer *lexer) bool scan_bare_dollar(t_lexer *lexer)
{ {
while (me_isspace(lexer->data.lookahead) && \ while (me_isspace(lexer->data.lookahead) && lexer->data.lookahead != '\n'
lexer->data.lookahead != '\n' && !lexer->data.eof((void *)lexer)) && !lexer->data.eof((void *)lexer))
lexer->data.advance((void *)lexer, true); lexer->data.advance((void *)lexer, true);
if (lexer->data.lookahead == '$') if (lexer->data.lookahead == '$')
{ {
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
lexer->data.result_symbol = BARE_DOLLAR; lexer->data.result_symbol = BARE_DOLLAR;
lexer->data.mark_end((void *)lexer); lexer->data.mark_end((void *)lexer);
return (me_isspace(lexer->data.lookahead) || \ return (me_isspace(lexer->data.lookahead)
lexer->data.eof((void *)lexer) || lexer->data.lookahead == '\"'); || lexer->data.eof((void *)lexer) || lexer->data.lookahead == '\"');
} }
return (false); return (false);
} }
@ -178,8 +179,8 @@ bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer)
lexer->data.advance((void *)lexer, true); lexer->data.advance((void *)lexer, true);
} }
lexer->data.result_symbol = HEREDOC_START; lexer->data.result_symbol = HEREDOC_START;
heredoc->is_raw = lexer->data.lookahead == '\'' || \ heredoc->is_raw = lexer->data.lookahead == '\''
lexer->data.lookahead == '"' || lexer->data.lookahead == '\\'; || lexer->data.lookahead == '"' || lexer->data.lookahead == '\\';
if (!found_delimiter) if (!found_delimiter)
{ {
string_clear(&heredoc->delimiter); string_clear(&heredoc->delimiter);
@ -198,136 +199,133 @@ bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer)
string_clear(&heredoc->current_leading_word); string_clear(&heredoc->current_leading_word);
if (heredoc->delimiter.len > 0) if (heredoc->delimiter.len > 0)
{ {
while (lexer->data.lookahead != '\0' \ while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n'
&& lexer->data.lookahead != '\n' && \ && (t_i32)
(t_i32) * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead && \ * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead
heredoc->current_leading_word.len < heredoc->delimiter.len) && heredoc->current_leading_word.len < heredoc->delimiter.len)
{ {
string_push_char(&heredoc->current_leading_word, \ string_push_char(&heredoc->current_leading_word,
lexer->data.lookahead); lexer->data.lookahead);
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
size++; size++;
} }
} }
string_push_char(&heredoc->current_leading_word, '\0'); string_push_char(&heredoc->current_leading_word, '\0');
return (heredoc->delimiter.len == 0 ? false : str_compare(\ if (heredoc->delimiter.len == 0)
heredoc->current_leading_word.buf, heredoc->delimiter.buf)); return (false);
return (str_compare(heredoc->current_leading_word.buf,
heredoc->delimiter.buf));
} }
bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, \ bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer,
enum e_token_type middle_type, enum e_token_type end_type) enum e_token_type middle_type, enum e_token_type end_type)
{ {
bool did_advance; bool did_advance;
t_heredoc *heredoc; t_heredoc *heredoc;
did_advance = false; did_advance = false;
heredoc = vec_heredoc_last(&scanner->heredocs); heredoc = vec_heredoc_last(&scanner->heredocs);
for (;;) while (true)
{ {
switch (lexer->data.lookahead) if (lexer->data.lookahead == '\0')
{ {
case '\0': { if (lexer->data.eof((void *)lexer) && did_advance)
if (lexer->data.eof((void *)lexer) && did_advance) {
{ reset_heredoc(heredoc);
reset_heredoc(heredoc); lexer->data.result_symbol = end_type;
lexer->data.result_symbol = end_type; return (true);
return true;
}
return (false);
} }
return (false);
case '\\': { }
else if (lexer->data.lookahead == '\\')
{
did_advance = true;
lexer->data.advance((void *)lexer, false);
lexer->data.advance((void *)lexer, false);
}
else if (lexer->data.lookahead == '$')
{
if (heredoc->is_raw)
{
did_advance = true; did_advance = true;
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
lexer->data.advance((void *)lexer, false);
break ;
} }
if (did_advance)
case '$': { {
if (heredoc->is_raw)
{
did_advance = true;
lexer->data.advance((void *)lexer, false);
break ;
}
if (did_advance)
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = middle_type;
heredoc->started = true;
lexer->data.advance((void *)lexer, false);
if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '{' || lexer->data.lookahead == '(')
{
return (true);
}
break ;
}
if (middle_type == HEREDOC_BODY_BEGINNING && lexer->data.get_column((void *)lexer) == 0)
{
lexer->data.result_symbol = middle_type;
heredoc->started = true;
return (true);
}
return (false);
}
case '\n': {
if (!did_advance)
lexer->data.advance((void *)lexer, true);
else
lexer->data.advance((void *)lexer, false);
did_advance = true;
if (heredoc->allows_indent)
{
while (me_isspace(lexer->data.lookahead))
lexer->data.advance((void *)lexer, false);
}
lexer->data.result_symbol = heredoc->started ? middle_type : end_type;
lexer->data.mark_end((void *)lexer); lexer->data.mark_end((void *)lexer);
if (scan_heredoc_end_identifier(heredoc, lexer)) lexer->data.result_symbol = middle_type;
{ heredoc->started = true;
if (lexer->data.result_symbol == HEREDOC_END)
vec_heredoc_pop(&scanner->heredocs, NULL);
return (true);
}
break ;
}
default: {
if (lexer->data.get_column((void *)lexer) == 0)
{
// an alternative is to check the starting column of the
// heredoc body and track that statefully
while (me_isspace(lexer->data.lookahead))
{
if (did_advance)
lexer->data.advance((void *)lexer, false);
else
lexer->data.advance((void *)lexer, true);
}
if (end_type != SIMPLE_HEREDOC_BODY)
{
lexer->data.result_symbol = middle_type;
if (scan_heredoc_end_identifier(heredoc, lexer))
return (true);
}
if (end_type == SIMPLE_HEREDOC_BODY)
{
lexer->data.result_symbol = end_type;
lexer->data.mark_end((void *)lexer);
if (scan_heredoc_end_identifier(heredoc, lexer))
return (true);
}
}
did_advance = true;
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
break ; if (me_isalpha(lexer->data.lookahead)
|| lexer->data.lookahead == '{'
|| lexer->data.lookahead == '(')
return (true);
} }
if (middle_type == HEREDOC_BODY_BEGINNING
&& lexer->data.get_column((void *)lexer) == 0)
{
lexer->data.result_symbol = middle_type;
heredoc->started = true;
return (true);
}
return (false);
}
else if (lexer->data.lookahead == '\n')
{
if (!did_advance)
lexer->data.advance((void *)lexer, true);
else
lexer->data.advance((void *)lexer, false);
did_advance = true;
if (heredoc->allows_indent)
{
while (me_isspace(lexer->data.lookahead))
lexer->data.advance((void *)lexer, false);
}
lexer->data.result_symbol = end_type;
if (heredoc->started)
lexer->data.result_symbol = middle_type;
lexer->data.mark_end((void *)lexer);
if (scan_heredoc_end_identifier(heredoc, lexer))
{
if (lexer->data.result_symbol == HEREDOC_END)
vec_heredoc_pop(&scanner->heredocs, NULL);
return (true);
}
}
else
{
if (lexer->data.get_column((void *)lexer) == 0)
{
while (me_isspace(lexer->data.lookahead))
{
if (did_advance)
lexer->data.advance((void *)lexer, false);
else
lexer->data.advance((void *)lexer, true);
}
if (end_type != SIMPLE_HEREDOC_BODY)
{
lexer->data.result_symbol = middle_type;
if (scan_heredoc_end_identifier(heredoc, lexer))
return (true);
}
if (end_type == SIMPLE_HEREDOC_BODY)
{
lexer->data.result_symbol = end_type;
lexer->data.mark_end((void *)lexer);
if (scan_heredoc_end_identifier(heredoc, lexer))
return (true);
}
}
did_advance = true;
lexer->data.advance((void *)lexer, false);
} }
} }
} }
bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) bool scan_concat(t_scanner *scanner, t_lexer *lexer,
const bool *valid_symbols)
{ {
(void)(scanner); (void)(scanner);
(void)(lexer); (void)(lexer);
@ -337,7 +335,8 @@ bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{ {
lexer->data.mark_end((void *)lexer); lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\') if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\''
|| lexer->data.lookahead == '\\')
return (true); return (true);
if (lexer->data.eof((void *)lexer)) if (lexer->data.eof((void *)lexer))
return (false); return (false);
@ -345,12 +344,14 @@ bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
return (true); return (true);
} }
bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) bool scan_double_hash(t_scanner *scanner, t_lexer *lexer,
const bool *valid_symbols)
{ {
(void)(scanner); (void)(scanner);
(void)(lexer); (void)(lexer);
(void)(valid_symbols); (void)(valid_symbols);
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY])) if (valid_symbols[IMMEDIATE_DOUBLE_HASH]
&& !(valid_symbols[ERROR_RECOVERY]))
{ {
if (lexer->data.lookahead == '#') if (lexer->data.lookahead == '#')
{ {
@ -371,7 +372,8 @@ bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, const bool *valid_symb
return (false); return (false);
} }
bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer,
const bool *valid_symbols)
{ {
t_heredoc *heredoc; t_heredoc *heredoc;
@ -390,242 +392,271 @@ bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symb
return (false); return (false);
} }
bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols) bool scan_advance_words(t_scanner *scanner, t_lexer *lexer,
const bool *valid_symbols)
{ {
if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]) && bool advanced_once;
(!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' || bool advance_once_space;
lexer->data.lookahead == '<' || lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || lexer->data.lookahead == ';' ||
lexer->data.lookahead == '&' || lexer->data.lookahead == '|' || lexer->data.lookahead == '{' || lexer->data.lookahead == '}'))) advanced_once = false;
return (scan_concat(scanner, lexer, valid_symbols)); advance_once_space = false;
if (scan_double_hash(scanner, lexer, valid_symbols)) (void)(scanner);
return (true); (void)(lexer);
if (valid_symbols[EMPTY_VALUE] && (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) || (void)(valid_symbols);
lexer->data.lookahead == ';' || lexer->data.lookahead == '&')) while (true)
return (lexer->data.result_symbol = EMPTY_VALUE, true);
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 &&
!vec_heredoc_last(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY]))
return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY));
if (scan_heredoc_end(scanner, lexer, valid_symbols))
return (true);
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started &&
!(valid_symbols[ERROR_RECOVERY]))
return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END));
if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.len > 0)
return (scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer));
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) &&
!(valid_symbols[ERROR_RECOVERY]))
{ {
for (;;) if (lexer->data.lookahead == '\"')
{ return (false);
if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' || if (lexer->data.lookahead == '$')
(lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) &&
!valid_symbols[EXPANSION_WORD])
lexer->data.advance((void *)lexer, true);
else if (lexer->data.lookahead == '\\')
{
lexer->data.advance((void *)lexer, true);
if (lexer->data.eof((void *)lexer))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = VARIABLE_NAME;
return (true);
}
if (lexer->data.lookahead == '\r')
lexer->data.advance((void *)lexer, true);
if (lexer->data.lookahead == '\n')
lexer->data.advance((void *)lexer, true);
else
{
if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD])
goto expansion_word;
return (false);
}
}
else
break ;
}
if (!valid_symbols[EXPANSION_WORD] &&
(lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' || lexer->data.lookahead == '-' ||
lexer->data.lookahead == '0' || lexer->data.lookahead == '_'))
{ {
lexer->data.mark_end((void *)lexer); lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' || if (lexer->data.lookahead == '{' || lexer->data.lookahead == '('
lexer->data.lookahead == '-' || lexer->data.lookahead == '%' || lexer->data.lookahead == '#' || || lexer->data.lookahead == '\''
lexer->data.lookahead == '/') || me_isalnum(lexer->data.lookahead))
{
lexer->data.result_symbol = EXPANSION_WORD;
return (advanced_once);
}
advanced_once = true;
}
if (lexer->data.lookahead == '}')
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = EXPANSION_WORD;
return (advanced_once || advance_once_space);
}
if (lexer->data.lookahead == '(' && !(advanced_once
|| advance_once_space))
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
while (lexer->data.lookahead != ')'
&& !lexer->data.eof((void *)lexer))
{
if (lexer->data.lookahead == '$')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '{'
|| lexer->data.lookahead == '('
|| lexer->data.lookahead == '\''
|| me_isalnum(lexer->data.lookahead))
return (lexer->data.result_symbol = EXPANSION_WORD,
advanced_once);
advanced_once = true;
}
else
{
advanced_once = advanced_once
|| !me_isspace(lexer->data.lookahead);
advance_once_space = advance_once_space
|| me_isspace(lexer->data.lookahead);
lexer->data.advance((void *)lexer, false);
}
}
lexer->data.mark_end((void *)lexer);
if (lexer->data.lookahead == ')')
{
advanced_once = true;
lexer->data.advance((void *)lexer, false);
lexer->data.mark_end((void *)lexer);
if (lexer->data.lookahead == '}')
return (false);
}
else
return (false); return (false);
if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead)) }
if (lexer->data.lookahead == '\'')
return (false);
if (lexer->data.eof((void *)lexer))
return (false);
advanced_once = advanced_once || !me_isspace(lexer->data.lookahead);
advance_once_space = advance_once_space
|| me_isspace(lexer->data.lookahead);
lexer->data.advance((void *)lexer, false);
}
return (false);
}
bool scan_literals(t_scanner *scanner, t_lexer *lexer,
const bool *valid_symbols)
{
bool is_number;
while (true)
{
if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t'
|| lexer->data.lookahead == '\r'
|| (lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE]))
&& !valid_symbols[EXPANSION_WORD])
lexer->data.advance((void *)lexer, true);
else if (lexer->data.lookahead == '\\')
{
lexer->data.advance((void *)lexer, true);
if (lexer->data.eof((void *)lexer))
{ {
lexer->data.mark_end((void *)lexer); lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = EXTGLOB_PATTERN; lexer->data.result_symbol = VARIABLE_NAME;
return (true); return (true);
} }
if (lexer->data.lookahead == '\r')
lexer->data.advance((void *)lexer, true);
if (lexer->data.lookahead == '\n')
lexer->data.advance((void *)lexer, true);
else
{
if (lexer->data.lookahead == '\\'
&& valid_symbols[EXPANSION_WORD])
return (scan_advance_words(scanner, lexer, valid_symbols));
return (false);
}
} }
else
if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<') break ;
}
if (!valid_symbols[EXPANSION_WORD] && (lexer->data.lookahead == '*'
|| lexer->data.lookahead == '@' || lexer->data.lookahead == '?'
|| lexer->data.lookahead == '-' || lexer->data.lookahead == '0'
|| lexer->data.lookahead == '_'))
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '['
|| lexer->data.lookahead == ':' || lexer->data.lookahead == '-'
|| lexer->data.lookahead == '%' || lexer->data.lookahead == '#'
|| lexer->data.lookahead == '/')
return (false);
if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = EXTGLOB_PATTERN;
return (true);
}
}
if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<')
{
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '<')
{ {
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '<') vec_heredoc_push(&scanner->heredocs, heredoc_new());
{ lexer->data.result_symbol = HEREDOC_ARROW;
lexer->data.advance((void *)lexer, false); return (true);
t_heredoc heredoc = heredoc_new();
vec_heredoc_push(&scanner->heredocs, heredoc);
lexer->data.result_symbol = HEREDOC_ARROW;
return (true);
}
return (false);
} }
return (false);
bool is_number = true; }
is_number = true;
if (me_isdigit(lexer->data.lookahead))
lexer->data.advance((void *)lexer, false);
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
{
is_number = false;
lexer->data.advance((void *)lexer, false);
}
else
{
if (lexer->data.lookahead == '{')
return (false);
if (valid_symbols[EXPANSION_WORD])
return (scan_advance_words(scanner, lexer, valid_symbols));
return (false);
}
while (true)
{
if (me_isdigit(lexer->data.lookahead)) if (me_isdigit(lexer->data.lookahead))
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_') else if (me_isalpha(lexer->data.lookahead)
|| lexer->data.lookahead == '_')
{ {
is_number = false; is_number = false;
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
} }
else else
break ;
}
if (is_number && valid_symbols[FILE_DESCRIPTOR]
&& (lexer->data.lookahead == '>' || lexer->data.lookahead == '<'))
return (lexer->data.result_symbol = FILE_DESCRIPTOR, true);
if (valid_symbols[VARIABLE_NAME])
{
if (lexer->data.lookahead == '+')
{ {
if (lexer->data.lookahead == '{') lexer->data.mark_end((void *)lexer);
return (false); lexer->data.advance((void *)lexer, false);
if (valid_symbols[EXPANSION_WORD]) if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':')
goto expansion_word; return (lexer->data.result_symbol = VARIABLE_NAME, true);
return (false); return (false);
} }
if (lexer->data.lookahead == '/')
for (;;) return (false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '['
|| (lexer->data.lookahead == ':' && !valid_symbols[OPENING_PAREN])
|| lexer->data.lookahead == '%' || (lexer->data.lookahead == '#'
&& !is_number) || lexer->data.lookahead == '@'
|| (lexer->data.lookahead == '-'))
{ {
if (me_isdigit(lexer->data.lookahead)) lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false); lexer->data.result_symbol = VARIABLE_NAME;
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_') return (true);
{
is_number = false;
lexer->data.advance((void *)lexer, false);
}
else
break ;
} }
if (lexer->data.lookahead == '?')
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<'))
return (lexer->data.result_symbol = FILE_DESCRIPTOR, true);
if (valid_symbols[VARIABLE_NAME])
{ {
if (lexer->data.lookahead == '+') lexer->data.mark_end((void *)lexer);
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':')
return (lexer->data.result_symbol = VARIABLE_NAME, true);
return (false);
}
if (lexer->data.lookahead == '/')
return (false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' ||
(lexer->data.lookahead == ':' && !valid_symbols[OPENING_PAREN]) || lexer->data.lookahead == '%' ||
(lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-'))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = VARIABLE_NAME;
return (true);
}
if (lexer->data.lookahead == '?')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
lexer->data.result_symbol = VARIABLE_NAME;
return me_isalpha(lexer->data.lookahead);
}
}
return (false);
}
if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) && scan_bare_dollar(lexer))
return (true);
expansion_word:
if (valid_symbols[EXPANSION_WORD])
{
bool advanced_once = false;
bool advance_once_space = false;
for (;;)
{
if (lexer->data.lookahead == '\"')
return (false);
if (lexer->data.lookahead == '$')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' ||
me_isalnum(lexer->data.lookahead))
{
lexer->data.result_symbol = EXPANSION_WORD;
return (advanced_once);
}
advanced_once = true;
}
if (lexer->data.lookahead == '}')
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = EXPANSION_WORD;
return (advanced_once || advance_once_space);
}
if (lexer->data.lookahead == '(' && !(advanced_once || advance_once_space))
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
while (lexer->data.lookahead != ')' && !lexer->data.eof((void *)lexer))
{
// if we find a $( or ${ assume this is valid and is
// a garbage concatenation of some weird word + an
// expansion
// I wonder where this can fail
if (lexer->data.lookahead == '$')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' ||
me_isalnum(lexer->data.lookahead))
{
lexer->data.result_symbol = EXPANSION_WORD;
return (advanced_once);
}
advanced_once = true;
}
else
{
advanced_once = advanced_once || !me_isspace(lexer->data.lookahead);
advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead);
lexer->data.advance((void *)lexer, false);
}
}
lexer->data.mark_end((void *)lexer);
if (lexer->data.lookahead == ')')
{
advanced_once = true;
lexer->data.advance((void *)lexer, false);
lexer->data.mark_end((void *)lexer);
if (lexer->data.lookahead == '}')
return (false);
}
else
return (false);
}
if (lexer->data.lookahead == '\'')
return (false);
if (lexer->data.eof((void *)lexer))
return (false);
advanced_once = advanced_once || !me_isspace(lexer->data.lookahead);
advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead);
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
lexer->data.result_symbol = VARIABLE_NAME;
return (me_isalpha(lexer->data.lookahead));
} }
} }
return (false); return (false);
} }
void *tree_sitter_sh_external_scanner_create() bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{
if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY])
&& (!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead)
|| lexer->data.lookahead == '>' || lexer->data.lookahead == '<'
|| lexer->data.lookahead == ')' || lexer->data.lookahead == '('
|| lexer->data.lookahead == ';' || lexer->data.lookahead == '&'
|| lexer->data.lookahead == '|' || lexer->data.lookahead == '{'
|| lexer->data.lookahead == '}')))
return (scan_concat(scanner, lexer, valid_symbols));
if (scan_double_hash(scanner, lexer, valid_symbols))
return (true);
if (valid_symbols[EMPTY_VALUE] && (me_isspace(lexer->data.lookahead)
|| lexer->data.eof((void *)lexer) || lexer->data.lookahead == ';'
|| lexer->data.lookahead == '&'))
return (lexer->data.result_symbol = EMPTY_VALUE, true);
if ((valid_symbols[HEREDOC_BODY_BEGINNING]
|| valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0
&& !vec_heredoc_last(&scanner->heredocs)->started
&& !(valid_symbols[ERROR_RECOVERY]))
return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING,
SIMPLE_HEREDOC_BODY));
if (scan_heredoc_end(scanner, lexer, valid_symbols))
return (true);
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0
&& vec_heredoc_last(&scanner->heredocs)->started
&& !(valid_symbols[ERROR_RECOVERY]))
return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT,
HEREDOC_END));
if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY])
&& scanner->heredocs.len > 0)
return (scan_heredoc_start(vec_heredoc_last(&scanner->heredocs),
lexer));
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR]
|| valid_symbols[HEREDOC_ARROW])
&& !(valid_symbols[ERROR_RECOVERY]))
return (scan_literals(scanner, lexer, valid_symbols));
if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY])
&& scan_bare_dollar(lexer))
return (true);
if (valid_symbols[EXPANSION_WORD])
return (scan_advance_words(scanner, lexer, valid_symbols));
return (false);
}
void *tree_sitter_sh_external_scanner_create(void)
{ {
t_scanner *scanner; t_scanner *scanner;
@ -634,7 +665,8 @@ void *tree_sitter_sh_external_scanner_create()
return (scanner); return (scanner);
} }
bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, const bool *valid_symbols) bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer,
const bool *valid_symbols)
{ {
t_scanner *scanner; t_scanner *scanner;
@ -650,7 +682,8 @@ t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state)
return (serialize(scanner, state)); return (serialize(scanner, state));
} }
void tree_sitter_sh_external_scanner_deserialize(void *payload, const t_u8 *state, t_u32 length) void tree_sitter_sh_external_scanner_deserialize(void *payload,
const t_u8 *state, t_u32 length)
{ {
t_scanner *scanner; t_scanner *scanner;

3
test_word.sh Executable file
View file

@ -0,0 +1,3 @@
#!/usr/bin/env bash
make && valgrind --leak-check=full --show-leak-kinds=none --track-origins=yes --track-fds=no --trace-children=yes --read-var-info=yes --read-inline-info=yes ./minishell <<<'/usr/bin/env echo this\ is\ a\ concat" $PATH"'"'jklfsjhklgfd'"