working on scanner.c a bit more

This commit is contained in:
Maieul BOYER 2024-09-05 20:07:56 +00:00
parent 34798aeb3a
commit 38fc882915

View file

@ -2,11 +2,11 @@
#include "me/string/string.h"
#include "me/types.h"
#include "parser/array.h"
#include "parser/inner/heredoc.h"
#include "parser/lexer.h"
#include "parser/parser.h"
#include <assert.h>
#include <string.h>
#include "parser/inner/heredoc.h"
typedef struct s_heredoc t_heredoc;
typedef struct s_scanner t_scanner;
@ -164,7 +164,8 @@ bool advance_word(t_lexer *lexer, t_string *unquoted_word)
}
while (lexer->data.lookahead &&
!(quote ? lexer->data.lookahead == quote || lexer->data.lookahead == '\r' || lexer->data.lookahead == '\n' : me_isspace(lexer->data.lookahead)))
!(quote ? lexer->data.lookahead == quote || lexer->data.lookahead == '\r' || lexer->data.lookahead == '\n'
: me_isspace(lexer->data.lookahead)))
{
if (lexer->data.lookahead == '\\')
{
@ -227,7 +228,8 @@ bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer)
t_i32 size = 0;
if (heredoc->delimiter.len > 0)
{
while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' && (t_i32) * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead &&
while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' &&
(t_i32) * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead &&
heredoc->current_leading_word.len < heredoc->delimiter.len)
{
string_push_char(&heredoc->current_leading_word, lexer->data.lookahead);
@ -357,683 +359,11 @@ bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, enum e_token_type
}
}
bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{
if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]))
{
if (!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' || lexer->data.lookahead == '<' ||
lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || lexer->data.lookahead == ';' || lexer->data.lookahead == '&' ||
lexer->data.lookahead == '|' || lexer->data.lookahead == '{' || lexer->data.lookahead == '}'))
{
lexer->data.result_symbol = CONCAT;
// So for a`b`, we want to return a concat. We check if the
// 2nd backtick has whitespace after it, and if it does we
// return concat.
if (lexer->data.lookahead == '`')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
while (lexer->data.lookahead != '`' && !lexer->data.eof((void *)lexer))
{
lexer->data.advance((void *)lexer, false);
}
if (lexer->data.eof((void *)lexer))
{
return false;
}
if (lexer->data.lookahead == '`')
{
lexer->data.advance((void *)lexer, false);
}
return me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer);
}
// strings w/ expansions that contains escaped quotes or
// backslashes need this to return a concat
if (lexer->data.lookahead == '\\')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\')
{
return true;
}
if (lexer->data.eof((void *)lexer))
{
return false;
}
}
else
{
return true;
}
}
}
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY]))
{
// advance two # and ensure not } after
if (lexer->data.lookahead == '#')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '#')
{
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead != '}')
{
lexer->data.result_symbol = IMMEDIATE_DOUBLE_HASH;
lexer->data.mark_end((void *)lexer);
return true;
}
}
}
}
if (valid_symbols[EMPTY_VALUE])
{
if (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) || lexer->data.lookahead == ';' || lexer->data.lookahead == '&')
{
lexer->data.result_symbol = EMPTY_VALUE;
return true;
}
}
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 &&
!array_back(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY]))
return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY));
if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0)
{
t_heredoc *heredoc = array_back(&scanner->heredocs);
if (scan_heredoc_end_identifier(heredoc, lexer))
{
array_delete(&heredoc->current_leading_word);
array_delete(&heredoc->delimiter);
(void)array_pop(&scanner->heredocs);
lexer->data.result_symbol = HEREDOC_END;
return true;
}
}
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started &&
!(valid_symbols[ERROR_RECOVERY]))
return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END));
if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.size > 0)
return (scan_heredoc_start(array_back(&scanner->heredocs), lexer));
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) &&
!(valid_symbols[ERROR_RECOVERY]))
{
for (;;)
{
if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' ||
(lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) &&
!valid_symbols[EXPANSION_WORD])
{
lexer->data.advance((void *)lexer, true);
}
else if (lexer->data.lookahead == '\\')
{
lexer->data.advance((void *)lexer, true);
if (lexer->data.eof((void *)lexer))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = VARIABLE_NAME;
return true;
}
if (lexer->data.lookahead == '\r')
{
lexer->data.advance((void *)lexer, true);
}
if (lexer->data.lookahead == '\n')
{
lexer->data.advance((void *)lexer, true);
}
else
{
if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD])
{
goto expansion_word;
}
return false;
}
}
else
{
break;
}
}
// no '*', '@', '?', '-', '$', '0', '_'
if (!valid_symbols[EXPANSION_WORD] && (lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' ||
lexer->data.lookahead == '-' || lexer->data.lookahead == '0' || lexer->data.lookahead == '_'))
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' || lexer->data.lookahead == '-' ||
lexer->data.lookahead == '%' || lexer->data.lookahead == '#' || lexer->data.lookahead == '/')
{
return false;
}
if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = EXTGLOB_PATTERN;
return true;
}
}
if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<')
{
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '<')
{
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '-')
{
lexer->data.advance((void *)lexer, false);
t_heredoc heredoc = heredoc_new();
heredoc.allows_indent = true;
array_push(&scanner->heredocs, heredoc);
lexer->data.result_symbol = HEREDOC_ARROW_DASH;
}
// else if (lexer->data.lookahead == '<' || lexer->data.lookahead == '=')
// {
// return false;
// }
else
{
t_heredoc heredoc = heredoc_new();
array_push(&scanner->heredocs, heredoc);
lexer->data.result_symbol = HEREDOC_ARROW;
}
return true;
}
return false;
}
bool is_number = true;
if (me_isdigit(lexer->data.lookahead))
{
lexer->data.advance((void *)lexer, false);
}
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
{
is_number = false;
lexer->data.advance((void *)lexer, false);
}
else
{
if (lexer->data.lookahead == '{')
{
goto brace_start;
}
if (valid_symbols[EXPANSION_WORD])
{
goto expansion_word;
}
if (valid_symbols[EXTGLOB_PATTERN])
{
goto extglob_pattern;
}
return false;
}
for (;;)
{
if (me_isdigit(lexer->data.lookahead))
{
lexer->data.advance((void *)lexer, false);
}
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
{
is_number = false;
lexer->data.advance((void *)lexer, false);
}
else
{
break;
}
}
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<'))
{
lexer->data.result_symbol = FILE_DESCRIPTOR;
return true;
}
if (valid_symbols[VARIABLE_NAME])
{
if (lexer->data.lookahead == '+')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':')
{
lexer->data.result_symbol = VARIABLE_NAME;
return true;
}
return false;
}
if (lexer->data.lookahead == '/')
{
return false;
}
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' ||
(lexer->data.lookahead == ':' &&
!valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable
// names for function words, only handling : for now? #235
lexer->data.lookahead == '%' ||
(lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-'))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = VARIABLE_NAME;
return true;
}
if (lexer->data.lookahead == '?')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
lexer->data.result_symbol = VARIABLE_NAME;
return me_isalpha(lexer->data.lookahead);
}
}
return false;
}
if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) && scan_bare_dollar(lexer))
return (true);
// if ((valid_symbols[REGEX]) && !(valid_symbols[ERROR_RECOVERY]))
// {
// if (valid_symbols[REGEX])
// {
// while (me_isspace(lexer->data.lookahead))
// {
// lexer->data.advance((void *)lexer, true);
// }
// }
//
// if ((lexer->data.lookahead != '"' && lexer->data.lookahead != '\'') || ((lexer->data.lookahead == '$' || lexer->data.lookahead == '\'')) ||
// (lexer->data.lookahead == '\''))
// {
// typedef struct
// {
// bool done;
// bool advanced_once;
// bool found_non_alnumdollarunderdash;
// bool last_was_escape;
// bool in_single_quote;
// t_u32 paren_depth;
// t_u32 bracket_depth;
// t_u32 brace_depth;
// } State;
//
// if (lexer->data.lookahead == '$')
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.advance((void *)lexer, false);
// if (lexer->data.lookahead == '(')
// {
// return false;
// }
// }
//
// lexer->data.mark_end((void *)lexer);
//
// State state = {false, false, false, false, false, 0, 0, 0};
// while (!state.done)
// {
// if (state.in_single_quote)
// {
// if (lexer->data.lookahead == '\'')
// {
// state.in_single_quote = false;
// lexer->data.advance((void *)lexer, false);
// lexer->data.mark_end((void *)lexer);
// }
// }
// switch (lexer->data.lookahead)
// {
// case '\\':
// state.last_was_escape = true;
// break;
// case '\0':
// return false;
// case '(':
// state.paren_depth++;
// state.last_was_escape = false;
// break;
// case '[':
// state.bracket_depth++;
// state.last_was_escape = false;
// break;
// case '{':
// if (!state.last_was_escape)
// state.brace_depth++;
// state.last_was_escape = false;
// break;
// case ')':
// if (state.paren_depth == 0)
// state.done = true;
// state.paren_depth--;
// state.last_was_escape = false;
// break;
// case ']':
// if (state.bracket_depth == 0)
// state.done = true;
// state.bracket_depth--;
// state.last_was_escape = false;
// break;
// case '}':
// if (state.brace_depth == 0)
// state.done = true;
// state.brace_depth--;
// state.last_was_escape = false;
// break;
// case '\'':
// // Enter or exit a single-quoted string.
// state.in_single_quote = !state.in_single_quote;
// lexer->data.advance((void *)lexer, false);
// state.advanced_once = true;
// state.last_was_escape = false;
// continue;
// default:
// state.last_was_escape = false;
// break;
// }
//
// if (!state.done)
// {
// if (valid_symbols[REGEX])
// {
// bool was_space = !state.in_single_quote && me_isspace(lexer->data.lookahead);
// lexer->data.advance((void *)lexer, false);
// state.advanced_once = true;
// if (!was_space || state.paren_depth > 0)
// {
// lexer->data.mark_end((void *)lexer);
// }
// }
// }
// }
//
// lexer->data.result_symbol = REGEX;
// if (valid_symbols[REGEX] && !state.advanced_once)
// {
// return false;
// }
// return true;
// }
// }
extglob_pattern:
// if (valid_symbols[EXTGLOB_PATTERN] && !(valid_symbols[ERROR_RECOVERY]))
// {
// // first skip ws, then check for ? * + @ !
// while (me_isspace(lexer->data.lookahead))
// {
// lexer->data.advance((void *)lexer, true);
// }
//
// if (lexer->data.lookahead == '?' || lexer->data.lookahead == '*' || lexer->data.lookahead == '+' || lexer->data.lookahead == '@' ||
// lexer->data.lookahead == '!' || lexer->data.lookahead == '-' || lexer->data.lookahead == ')' || lexer->data.lookahead == '\\' ||
// lexer->data.lookahead == '.' || lexer->data.lookahead == '[' || (me_isalpha(lexer->data.lookahead)))
// {
// if (lexer->data.lookahead == '\\')
// {
// lexer->data.advance((void *)lexer, false);
// if ((me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '"') && lexer->data.lookahead != '\r' && lexer->data.lookahead != '\n')
// {
// lexer->data.advance((void *)lexer, false);
// }
// else
// {
// return false;
// }
// }
//
// if (lexer->data.lookahead == ')' && scanner->last_glob_paren_depth == 0)
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.advance((void *)lexer, false);
//
// if (me_isspace(lexer->data.lookahead))
// {
// return false;
// }
// }
//
// lexer->data.mark_end((void *)lexer);
// bool was_non_alpha = !me_isalpha(lexer->data.lookahead);
// if (lexer->data.lookahead != '[')
// {
// // no esac
// if (lexer->data.lookahead == 'e')
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.advance((void *)lexer, false);
// if (lexer->data.lookahead == 's')
// {
// lexer->data.advance((void *)lexer, false);
// if (lexer->data.lookahead == 'a')
// {
// lexer->data.advance((void *)lexer, false);
// if (lexer->data.lookahead == 'c')
// {
// lexer->data.advance((void *)lexer, false);
// if (me_isspace(lexer->data.lookahead))
// {
// return false;
// }
// }
// }
// }
// }
// else
// {
// lexer->data.advance((void *)lexer, false);
// }
// }
//
// // -\w is just a word, find something else special
// if (lexer->data.lookahead == '-')
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.advance((void *)lexer, false);
// while (me_isalnum(lexer->data.lookahead))
// {
// lexer->data.advance((void *)lexer, false);
// }
//
// if (lexer->data.lookahead == ')' || lexer->data.lookahead == '\\' || lexer->data.lookahead == '.')
// {
// return false;
// }
// lexer->data.mark_end((void *)lexer);
// }
//
// // case item -) or *)
// if (lexer->data.lookahead == ')' && scanner->last_glob_paren_depth == 0)
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.advance((void *)lexer, false);
// if (me_isspace(lexer->data.lookahead))
// {
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// return was_non_alpha;
// }
// }
//
// if (me_isspace(lexer->data.lookahead))
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// scanner->last_glob_paren_depth = 0;
// return true;
// }
//
// if (lexer->data.lookahead == '$')
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.advance((void *)lexer, false);
// if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(')
// {
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// return true;
// }
// }
//
// if (lexer->data.lookahead == '|')
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.advance((void *)lexer, false);
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// return true;
// }
//
// if (!me_isalnum(lexer->data.lookahead) && lexer->data.lookahead != '(' && lexer->data.lookahead != '"' && lexer->data.lookahead != '[' &&
// lexer->data.lookahead != '?' && lexer->data.lookahead != '/' && lexer->data.lookahead != '\\' && lexer->data.lookahead != '_' &&
// lexer->data.lookahead != '*')
// {
// return false;
// }
//
// typedef struct
// {
// bool done;
// bool saw_non_alphadot;
// t_u32 paren_depth;
// t_u32 bracket_depth;
// t_u32 brace_depth;
// } State;
//
// State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0};
// while (!state.done)
// {
// switch (lexer->data.lookahead)
// {
// case '\0':
// return false;
// case '(':
// state.paren_depth++;
// break;
// case '[':
// state.bracket_depth++;
// break;
// case '{':
// state.brace_depth++;
// break;
// case ')':
// if (state.paren_depth == 0)
// {
// state.done = true;
// }
// state.paren_depth--;
// break;
// case ']':
// if (state.bracket_depth == 0)
// {
// state.done = true;
// }
// state.bracket_depth--;
// break;
// case '}':
// if (state.brace_depth == 0)
// {
// state.done = true;
// }
// state.brace_depth--;
// break;
// }
//
// if (lexer->data.lookahead == '|')
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.advance((void *)lexer, false);
// if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0)
// {
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// return true;
// }
// }
//
// if (!state.done)
// {
// bool was_space = me_isspace(lexer->data.lookahead);
// if (lexer->data.lookahead == '$')
// {
// lexer->data.mark_end((void *)lexer);
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
// {
// state.saw_non_alphadot = true;
// }
// lexer->data.advance((void *)lexer, false);
// if (lexer->data.lookahead == '(' || lexer->data.lookahead == '{')
// {
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// scanner->last_glob_paren_depth = state.paren_depth;
// return state.saw_non_alphadot;
// }
// }
// if (was_space)
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// scanner->last_glob_paren_depth = 0;
// return state.saw_non_alphadot;
// }
// if (lexer->data.lookahead == '"')
// {
// lexer->data.mark_end((void *)lexer);
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// scanner->last_glob_paren_depth = 0;
// return state.saw_non_alphadot;
// }
// if (lexer->data.lookahead == '\\')
// {
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
// {
// state.saw_non_alphadot = true;
// }
// lexer->data.advance((void *)lexer, false);
// if (me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '"')
// {
// lexer->data.advance((void *)lexer, false);
// }
// }
// else
// {
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
// {
// state.saw_non_alphadot = true;
// }
// lexer->data.advance((void *)lexer, false);
// }
// if (!was_space)
// {
// lexer->data.mark_end((void *)lexer);
// }
// }
// }
//
// lexer->data.result_symbol = EXTGLOB_PATTERN;
// scanner->last_glob_paren_depth = 0;
// return state.saw_non_alphadot;
// }
// scanner->last_glob_paren_depth = 0;
//
// return false;
// }
expansion_word:
if (valid_symbols[EXPANSION_WORD])
bool scan_expansion_word(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{
(void)(scanner);
(void)(lexer);
(void)(valid_symbols);
bool advanced_once = false;
bool advance_once_space = false;
for (;;)
@ -1044,7 +374,8 @@ expansion_word:
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || me_isalnum(lexer->data.lookahead))
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' ||
me_isalnum(lexer->data.lookahead))
{
lexer->data.result_symbol = EXPANSION_WORD;
return advanced_once;
@ -1073,7 +404,8 @@ expansion_word:
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || me_isalnum(lexer->data.lookahead))
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' ||
me_isalnum(lexer->data.lookahead))
{
lexer->data.result_symbol = EXPANSION_WORD;
return advanced_once;
@ -1108,11 +440,242 @@ expansion_word:
advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead);
lexer->data.advance((void *)lexer, false);
}
return (false);
}
brace_start:
bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{
(void)(scanner);
(void)(lexer);
(void)(valid_symbols);
lexer->data.result_symbol = CONCAT;
if (lexer->data.lookahead == '\\')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\')
return true;
if (lexer->data.eof((void *)lexer))
return false;
}
return true;
}
bool check_scan_immediate_double_hash(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{
(void)(scanner);
(void)(lexer);
(void)(valid_symbols);
if (lexer->data.lookahead == '#')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '#')
{
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead != '}')
{
lexer->data.result_symbol = IMMEDIATE_DOUBLE_HASH;
lexer->data.mark_end((void *)lexer);
return (true);
}
}
}
return (false);
}
bool scan_variable_name(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{
for (;;)
{
if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' ||
(lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) &&
!valid_symbols[EXPANSION_WORD])
lexer->data.advance((void *)lexer, true);
else if (lexer->data.lookahead == '\\')
{
lexer->data.advance((void *)lexer, true);
if (lexer->data.eof((void *)lexer))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = VARIABLE_NAME;
return true;
}
if (lexer->data.lookahead == '\r')
lexer->data.advance((void *)lexer, true);
if (lexer->data.lookahead == '\n')
lexer->data.advance((void *)lexer, true);
else
{
if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD])
return (scan_expansion_word(scanner, lexer, valid_symbols));
return false;
}
}
else
break;
}
if (!valid_symbols[EXPANSION_WORD] && (lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' ||
lexer->data.lookahead == '-' || lexer->data.lookahead == '0' || lexer->data.lookahead == '_'))
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' || lexer->data.lookahead == '-' ||
lexer->data.lookahead == '%' || lexer->data.lookahead == '#' || lexer->data.lookahead == '/')
return (false);
if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = EXTGLOB_PATTERN;
return (true);
}
}
if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<')
{
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '<')
{
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '-')
{
lexer->data.advance((void *)lexer, false);
t_heredoc heredoc = heredoc_new();
heredoc.allows_indent = true;
array_push(&scanner->heredocs, heredoc);
lexer->data.result_symbol = HEREDOC_ARROW_DASH;
}
else
{
t_heredoc heredoc = heredoc_new();
array_push(&scanner->heredocs, heredoc);
lexer->data.result_symbol = HEREDOC_ARROW;
}
return (true);
}
return (false);
}
bool is_number = true;
if (me_isdigit(lexer->data.lookahead))
lexer->data.advance((void *)lexer, false);
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
{
is_number = false;
lexer->data.advance((void *)lexer, false);
}
else
{
if (lexer->data.lookahead == '{')
return (false);
if (valid_symbols[EXPANSION_WORD])
return (scan_expansion_word(scanner, lexer, valid_symbols));
if (valid_symbols[EXTGLOB_PATTERN])
return (false);
return false;
}
for (;;)
{
if (me_isdigit(lexer->data.lookahead))
lexer->data.advance((void *)lexer, false);
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
{
is_number = false;
lexer->data.advance((void *)lexer, false);
}
else
break;
}
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<'))
{
lexer->data.result_symbol = FILE_DESCRIPTOR;
return (true);
}
if (valid_symbols[VARIABLE_NAME])
{
if (lexer->data.lookahead == '+')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':')
{
lexer->data.result_symbol = VARIABLE_NAME;
return (true);
}
return (false);
}
if (lexer->data.lookahead == '/')
return (false);
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' ||
(lexer->data.lookahead == ':' && !valid_symbols[OPENING_PAREN]) || lexer->data.lookahead == '%' ||
(lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-'))
{
lexer->data.mark_end((void *)lexer);
lexer->data.result_symbol = VARIABLE_NAME;
return (true);
}
if (lexer->data.lookahead == '?')
{
lexer->data.mark_end((void *)lexer);
lexer->data.advance((void *)lexer, false);
lexer->data.result_symbol = VARIABLE_NAME;
return (me_isalpha(lexer->data.lookahead));
}
}
return (false);
}
bool check_scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{
(void)(valid_symbols);
t_heredoc *heredoc = array_back(&scanner->heredocs);
if (scan_heredoc_end_identifier(heredoc, lexer))
{
array_delete(&heredoc->current_leading_word);
array_delete(&heredoc->delimiter);
(void)array_pop(&scanner->heredocs);
lexer->data.result_symbol = HEREDOC_END;
return (true);
}
return (false);
}
bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
{
if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]) &&
!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' || lexer->data.lookahead == '<' ||
lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || lexer->data.lookahead == ';' || lexer->data.lookahead == '&' ||
lexer->data.lookahead == '|' || lexer->data.lookahead == '{' || lexer->data.lookahead == '}'))
return (scan_concat(scanner, lexer, valid_symbols));
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY]) &&
check_scan_immediate_double_hash(scanner, lexer, valid_symbols))
return (true);
if (valid_symbols[EMPTY_VALUE] && (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) ||
lexer->data.lookahead == ';' || lexer->data.lookahead == '&'))
return (lexer->data.result_symbol = EMPTY_VALUE, true);
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 &&
!array_back(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY]))
return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY));
if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0 && check_scan_heredoc_end(scanner, lexer, valid_symbols))
return (false);
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started &&
!(valid_symbols[ERROR_RECOVERY]))
return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END));
if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.size > 0)
return (scan_heredoc_start(array_back(&scanner->heredocs), lexer));
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) &&
!(valid_symbols[ERROR_RECOVERY]))
return (scan_variable_name(scanner, lexer, valid_symbols));
if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) && scan_bare_dollar(lexer))
return (true);
if (valid_symbols[EXPANSION_WORD])
return (scan_expansion_word(scanner, lexer, valid_symbols));
return (false);
}
void *tree_sitter_sh_external_scanner_create()
{