working on scanner.c a bit more
This commit is contained in:
parent
34798aeb3a
commit
38fc882915
1 changed files with 283 additions and 720 deletions
|
|
@ -2,11 +2,11 @@
|
|||
#include "me/string/string.h"
|
||||
#include "me/types.h"
|
||||
#include "parser/array.h"
|
||||
#include "parser/inner/heredoc.h"
|
||||
#include "parser/lexer.h"
|
||||
#include "parser/parser.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "parser/inner/heredoc.h"
|
||||
|
||||
typedef struct s_heredoc t_heredoc;
|
||||
typedef struct s_scanner t_scanner;
|
||||
|
|
@ -164,7 +164,8 @@ bool advance_word(t_lexer *lexer, t_string *unquoted_word)
|
|||
}
|
||||
|
||||
while (lexer->data.lookahead &&
|
||||
!(quote ? lexer->data.lookahead == quote || lexer->data.lookahead == '\r' || lexer->data.lookahead == '\n' : me_isspace(lexer->data.lookahead)))
|
||||
!(quote ? lexer->data.lookahead == quote || lexer->data.lookahead == '\r' || lexer->data.lookahead == '\n'
|
||||
: me_isspace(lexer->data.lookahead)))
|
||||
{
|
||||
if (lexer->data.lookahead == '\\')
|
||||
{
|
||||
|
|
@ -227,7 +228,8 @@ bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer)
|
|||
t_i32 size = 0;
|
||||
if (heredoc->delimiter.len > 0)
|
||||
{
|
||||
while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' && (t_i32) * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead &&
|
||||
while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' &&
|
||||
(t_i32) * (&heredoc->delimiter.buf[size]) == lexer->data.lookahead &&
|
||||
heredoc->current_leading_word.len < heredoc->delimiter.len)
|
||||
{
|
||||
string_push_char(&heredoc->current_leading_word, lexer->data.lookahead);
|
||||
|
|
@ -357,683 +359,11 @@ bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, enum e_token_type
|
|||
}
|
||||
}
|
||||
|
||||
bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]))
|
||||
{
|
||||
if (!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' || lexer->data.lookahead == '<' ||
|
||||
lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || lexer->data.lookahead == ';' || lexer->data.lookahead == '&' ||
|
||||
lexer->data.lookahead == '|' || lexer->data.lookahead == '{' || lexer->data.lookahead == '}'))
|
||||
{
|
||||
lexer->data.result_symbol = CONCAT;
|
||||
// So for a`b`, we want to return a concat. We check if the
|
||||
// 2nd backtick has whitespace after it, and if it does we
|
||||
// return concat.
|
||||
if (lexer->data.lookahead == '`')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
while (lexer->data.lookahead != '`' && !lexer->data.eof((void *)lexer))
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (lexer->data.lookahead == '`')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
return me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer);
|
||||
}
|
||||
// strings w/ expansions that contains escaped quotes or
|
||||
// backslashes need this to return a concat
|
||||
if (lexer->data.lookahead == '\\')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY]))
|
||||
{
|
||||
// advance two # and ensure not } after
|
||||
if (lexer->data.lookahead == '#')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '#')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead != '}')
|
||||
{
|
||||
lexer->data.result_symbol = IMMEDIATE_DOUBLE_HASH;
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[EMPTY_VALUE])
|
||||
{
|
||||
if (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) || lexer->data.lookahead == ';' || lexer->data.lookahead == '&')
|
||||
{
|
||||
lexer->data.result_symbol = EMPTY_VALUE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 &&
|
||||
!array_back(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY]))
|
||||
return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY));
|
||||
|
||||
if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0)
|
||||
{
|
||||
t_heredoc *heredoc = array_back(&scanner->heredocs);
|
||||
if (scan_heredoc_end_identifier(heredoc, lexer))
|
||||
{
|
||||
array_delete(&heredoc->current_leading_word);
|
||||
array_delete(&heredoc->delimiter);
|
||||
(void)array_pop(&scanner->heredocs);
|
||||
lexer->data.result_symbol = HEREDOC_END;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started &&
|
||||
!(valid_symbols[ERROR_RECOVERY]))
|
||||
return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END));
|
||||
|
||||
if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.size > 0)
|
||||
return (scan_heredoc_start(array_back(&scanner->heredocs), lexer));
|
||||
|
||||
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) &&
|
||||
!(valid_symbols[ERROR_RECOVERY]))
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' ||
|
||||
(lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) &&
|
||||
!valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
}
|
||||
else if (lexer->data.lookahead == '\\')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (lexer->data.lookahead == '\r')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
}
|
||||
if (lexer->data.lookahead == '\n')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
goto expansion_word;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// no '*', '@', '?', '-', '$', '0', '_'
|
||||
if (!valid_symbols[EXPANSION_WORD] && (lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' ||
|
||||
lexer->data.lookahead == '-' || lexer->data.lookahead == '0' || lexer->data.lookahead == '_'))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' || lexer->data.lookahead == '-' ||
|
||||
lexer->data.lookahead == '%' || lexer->data.lookahead == '#' || lexer->data.lookahead == '/')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '<')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '-')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
t_heredoc heredoc = heredoc_new();
|
||||
heredoc.allows_indent = true;
|
||||
array_push(&scanner->heredocs, heredoc);
|
||||
lexer->data.result_symbol = HEREDOC_ARROW_DASH;
|
||||
}
|
||||
// else if (lexer->data.lookahead == '<' || lexer->data.lookahead == '=')
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
else
|
||||
{
|
||||
t_heredoc heredoc = heredoc_new();
|
||||
array_push(&scanner->heredocs, heredoc);
|
||||
lexer->data.result_symbol = HEREDOC_ARROW;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_number = true;
|
||||
if (me_isdigit(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
|
||||
{
|
||||
is_number = false;
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lexer->data.lookahead == '{')
|
||||
{
|
||||
goto brace_start;
|
||||
}
|
||||
if (valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
goto expansion_word;
|
||||
}
|
||||
if (valid_symbols[EXTGLOB_PATTERN])
|
||||
{
|
||||
goto extglob_pattern;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (me_isdigit(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
|
||||
{
|
||||
is_number = false;
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<'))
|
||||
{
|
||||
lexer->data.result_symbol = FILE_DESCRIPTOR;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (valid_symbols[VARIABLE_NAME])
|
||||
{
|
||||
if (lexer->data.lookahead == '+')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':')
|
||||
{
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (lexer->data.lookahead == '/')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' ||
|
||||
(lexer->data.lookahead == ':' &&
|
||||
!valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable
|
||||
// names for function words, only handling : for now? #235
|
||||
lexer->data.lookahead == '%' ||
|
||||
(lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-'))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (lexer->data.lookahead == '?')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return me_isalpha(lexer->data.lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) && scan_bare_dollar(lexer))
|
||||
return (true);
|
||||
|
||||
// if ((valid_symbols[REGEX]) && !(valid_symbols[ERROR_RECOVERY]))
|
||||
// {
|
||||
// if (valid_symbols[REGEX])
|
||||
// {
|
||||
// while (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, true);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if ((lexer->data.lookahead != '"' && lexer->data.lookahead != '\'') || ((lexer->data.lookahead == '$' || lexer->data.lookahead == '\'')) ||
|
||||
// (lexer->data.lookahead == '\''))
|
||||
// {
|
||||
// typedef struct
|
||||
// {
|
||||
// bool done;
|
||||
// bool advanced_once;
|
||||
// bool found_non_alnumdollarunderdash;
|
||||
// bool last_was_escape;
|
||||
// bool in_single_quote;
|
||||
// t_u32 paren_depth;
|
||||
// t_u32 bracket_depth;
|
||||
// t_u32 brace_depth;
|
||||
// } State;
|
||||
//
|
||||
// if (lexer->data.lookahead == '$')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == '(')
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
//
|
||||
// State state = {false, false, false, false, false, 0, 0, 0};
|
||||
// while (!state.done)
|
||||
// {
|
||||
// if (state.in_single_quote)
|
||||
// {
|
||||
// if (lexer->data.lookahead == '\'')
|
||||
// {
|
||||
// state.in_single_quote = false;
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// }
|
||||
// }
|
||||
// switch (lexer->data.lookahead)
|
||||
// {
|
||||
// case '\\':
|
||||
// state.last_was_escape = true;
|
||||
// break;
|
||||
// case '\0':
|
||||
// return false;
|
||||
// case '(':
|
||||
// state.paren_depth++;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case '[':
|
||||
// state.bracket_depth++;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case '{':
|
||||
// if (!state.last_was_escape)
|
||||
// state.brace_depth++;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case ')':
|
||||
// if (state.paren_depth == 0)
|
||||
// state.done = true;
|
||||
// state.paren_depth--;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case ']':
|
||||
// if (state.bracket_depth == 0)
|
||||
// state.done = true;
|
||||
// state.bracket_depth--;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case '}':
|
||||
// if (state.brace_depth == 0)
|
||||
// state.done = true;
|
||||
// state.brace_depth--;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case '\'':
|
||||
// // Enter or exit a single-quoted string.
|
||||
// state.in_single_quote = !state.in_single_quote;
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// state.advanced_once = true;
|
||||
// state.last_was_escape = false;
|
||||
// continue;
|
||||
// default:
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// if (!state.done)
|
||||
// {
|
||||
// if (valid_symbols[REGEX])
|
||||
// {
|
||||
// bool was_space = !state.in_single_quote && me_isspace(lexer->data.lookahead);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// state.advanced_once = true;
|
||||
// if (!was_space || state.paren_depth > 0)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->data.result_symbol = REGEX;
|
||||
// if (valid_symbols[REGEX] && !state.advanced_once)
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
|
||||
extglob_pattern:
|
||||
// if (valid_symbols[EXTGLOB_PATTERN] && !(valid_symbols[ERROR_RECOVERY]))
|
||||
// {
|
||||
// // first skip ws, then check for ? * + @ !
|
||||
// while (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, true);
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == '?' || lexer->data.lookahead == '*' || lexer->data.lookahead == '+' || lexer->data.lookahead == '@' ||
|
||||
// lexer->data.lookahead == '!' || lexer->data.lookahead == '-' || lexer->data.lookahead == ')' || lexer->data.lookahead == '\\' ||
|
||||
// lexer->data.lookahead == '.' || lexer->data.lookahead == '[' || (me_isalpha(lexer->data.lookahead)))
|
||||
// {
|
||||
// if (lexer->data.lookahead == '\\')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if ((me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '"') && lexer->data.lookahead != '\r' && lexer->data.lookahead != '\n')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == ')' && scanner->last_glob_paren_depth == 0)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
//
|
||||
// if (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// bool was_non_alpha = !me_isalpha(lexer->data.lookahead);
|
||||
// if (lexer->data.lookahead != '[')
|
||||
// {
|
||||
// // no esac
|
||||
// if (lexer->data.lookahead == 'e')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == 's')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == 'a')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == 'c')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // -\w is just a word, find something else special
|
||||
// if (lexer->data.lookahead == '-')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// while (me_isalnum(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == ')' || lexer->data.lookahead == '\\' || lexer->data.lookahead == '.')
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// }
|
||||
//
|
||||
// // case item -) or *)
|
||||
// if (lexer->data.lookahead == ')' && scanner->last_glob_paren_depth == 0)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// return was_non_alpha;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == '$')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(')
|
||||
// {
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == '|')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// if (!me_isalnum(lexer->data.lookahead) && lexer->data.lookahead != '(' && lexer->data.lookahead != '"' && lexer->data.lookahead != '[' &&
|
||||
// lexer->data.lookahead != '?' && lexer->data.lookahead != '/' && lexer->data.lookahead != '\\' && lexer->data.lookahead != '_' &&
|
||||
// lexer->data.lookahead != '*')
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// typedef struct
|
||||
// {
|
||||
// bool done;
|
||||
// bool saw_non_alphadot;
|
||||
// t_u32 paren_depth;
|
||||
// t_u32 bracket_depth;
|
||||
// t_u32 brace_depth;
|
||||
// } State;
|
||||
//
|
||||
// State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0};
|
||||
// while (!state.done)
|
||||
// {
|
||||
// switch (lexer->data.lookahead)
|
||||
// {
|
||||
// case '\0':
|
||||
// return false;
|
||||
// case '(':
|
||||
// state.paren_depth++;
|
||||
// break;
|
||||
// case '[':
|
||||
// state.bracket_depth++;
|
||||
// break;
|
||||
// case '{':
|
||||
// state.brace_depth++;
|
||||
// break;
|
||||
// case ')':
|
||||
// if (state.paren_depth == 0)
|
||||
// {
|
||||
// state.done = true;
|
||||
// }
|
||||
// state.paren_depth--;
|
||||
// break;
|
||||
// case ']':
|
||||
// if (state.bracket_depth == 0)
|
||||
// {
|
||||
// state.done = true;
|
||||
// }
|
||||
// state.bracket_depth--;
|
||||
// break;
|
||||
// case '}':
|
||||
// if (state.brace_depth == 0)
|
||||
// {
|
||||
// state.done = true;
|
||||
// }
|
||||
// state.brace_depth--;
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == '|')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0)
|
||||
// {
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (!state.done)
|
||||
// {
|
||||
// bool was_space = me_isspace(lexer->data.lookahead);
|
||||
// if (lexer->data.lookahead == '$')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
|
||||
// {
|
||||
// state.saw_non_alphadot = true;
|
||||
// }
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == '(' || lexer->data.lookahead == '{')
|
||||
// {
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = state.paren_depth;
|
||||
// return state.saw_non_alphadot;
|
||||
// }
|
||||
// }
|
||||
// if (was_space)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
// return state.saw_non_alphadot;
|
||||
// }
|
||||
// if (lexer->data.lookahead == '"')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
// return state.saw_non_alphadot;
|
||||
// }
|
||||
// if (lexer->data.lookahead == '\\')
|
||||
// {
|
||||
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
|
||||
// {
|
||||
// state.saw_non_alphadot = true;
|
||||
// }
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '"')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
|
||||
// {
|
||||
// state.saw_non_alphadot = true;
|
||||
// }
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
// if (!was_space)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
// return state.saw_non_alphadot;
|
||||
// }
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
//
|
||||
// return false;
|
||||
// }
|
||||
|
||||
expansion_word:
|
||||
if (valid_symbols[EXPANSION_WORD])
|
||||
bool scan_expansion_word(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
(void)(scanner);
|
||||
(void)(lexer);
|
||||
(void)(valid_symbols);
|
||||
bool advanced_once = false;
|
||||
bool advance_once_space = false;
|
||||
for (;;)
|
||||
|
|
@ -1044,7 +374,8 @@ expansion_word:
|
|||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || me_isalnum(lexer->data.lookahead))
|
||||
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' ||
|
||||
me_isalnum(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.result_symbol = EXPANSION_WORD;
|
||||
return advanced_once;
|
||||
|
|
@ -1073,7 +404,8 @@ expansion_word:
|
|||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' || me_isalnum(lexer->data.lookahead))
|
||||
if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(' || lexer->data.lookahead == '\'' ||
|
||||
me_isalnum(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.result_symbol = EXPANSION_WORD;
|
||||
return advanced_once;
|
||||
|
|
@ -1108,11 +440,242 @@ expansion_word:
|
|||
advance_once_space = advance_once_space || me_isspace(lexer->data.lookahead);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
brace_start:
|
||||
bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
(void)(scanner);
|
||||
(void)(lexer);
|
||||
(void)(valid_symbols);
|
||||
lexer->data.result_symbol = CONCAT;
|
||||
if (lexer->data.lookahead == '\\')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\')
|
||||
return true;
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool check_scan_immediate_double_hash(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
(void)(scanner);
|
||||
(void)(lexer);
|
||||
(void)(valid_symbols);
|
||||
if (lexer->data.lookahead == '#')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '#')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead != '}')
|
||||
{
|
||||
lexer->data.result_symbol = IMMEDIATE_DOUBLE_HASH;
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
return (true);
|
||||
}
|
||||
}
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
bool scan_variable_name(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' ||
|
||||
(lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) &&
|
||||
!valid_symbols[EXPANSION_WORD])
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
else if (lexer->data.lookahead == '\\')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
if (lexer->data.lookahead == '\r')
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
if (lexer->data.lookahead == '\n')
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
else
|
||||
{
|
||||
if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD])
|
||||
return (scan_expansion_word(scanner, lexer, valid_symbols));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (!valid_symbols[EXPANSION_WORD] && (lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' ||
|
||||
lexer->data.lookahead == '-' || lexer->data.lookahead == '0' || lexer->data.lookahead == '_'))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' || lexer->data.lookahead == '-' ||
|
||||
lexer->data.lookahead == '%' || lexer->data.lookahead == '#' || lexer->data.lookahead == '/')
|
||||
return (false);
|
||||
if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
return (true);
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[HEREDOC_ARROW] && lexer->data.lookahead == '<')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '<')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '-')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
t_heredoc heredoc = heredoc_new();
|
||||
heredoc.allows_indent = true;
|
||||
array_push(&scanner->heredocs, heredoc);
|
||||
lexer->data.result_symbol = HEREDOC_ARROW_DASH;
|
||||
}
|
||||
else
|
||||
{
|
||||
t_heredoc heredoc = heredoc_new();
|
||||
array_push(&scanner->heredocs, heredoc);
|
||||
lexer->data.result_symbol = HEREDOC_ARROW;
|
||||
}
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
bool is_number = true;
|
||||
if (me_isdigit(lexer->data.lookahead))
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
|
||||
{
|
||||
is_number = false;
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lexer->data.lookahead == '{')
|
||||
return (false);
|
||||
if (valid_symbols[EXPANSION_WORD])
|
||||
return (scan_expansion_word(scanner, lexer, valid_symbols));
|
||||
if (valid_symbols[EXTGLOB_PATTERN])
|
||||
return (false);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (me_isdigit(lexer->data.lookahead))
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
|
||||
{
|
||||
is_number = false;
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<'))
|
||||
{
|
||||
lexer->data.result_symbol = FILE_DESCRIPTOR;
|
||||
return (true);
|
||||
}
|
||||
|
||||
if (valid_symbols[VARIABLE_NAME])
|
||||
{
|
||||
if (lexer->data.lookahead == '+')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':')
|
||||
{
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
if (lexer->data.lookahead == '/')
|
||||
return (false);
|
||||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' ||
|
||||
(lexer->data.lookahead == ':' && !valid_symbols[OPENING_PAREN]) || lexer->data.lookahead == '%' ||
|
||||
(lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-'))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return (true);
|
||||
}
|
||||
if (lexer->data.lookahead == '?')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return (me_isalpha(lexer->data.lookahead));
|
||||
}
|
||||
}
|
||||
|
||||
return (false);
|
||||
}
|
||||
bool check_scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
(void)(valid_symbols);
|
||||
t_heredoc *heredoc = array_back(&scanner->heredocs);
|
||||
if (scan_heredoc_end_identifier(heredoc, lexer))
|
||||
{
|
||||
array_delete(&heredoc->current_leading_word);
|
||||
array_delete(&heredoc->delimiter);
|
||||
(void)array_pop(&scanner->heredocs);
|
||||
lexer->data.result_symbol = HEREDOC_END;
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]) &&
|
||||
!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' || lexer->data.lookahead == '<' ||
|
||||
lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || lexer->data.lookahead == ';' || lexer->data.lookahead == '&' ||
|
||||
lexer->data.lookahead == '|' || lexer->data.lookahead == '{' || lexer->data.lookahead == '}'))
|
||||
return (scan_concat(scanner, lexer, valid_symbols));
|
||||
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY]) &&
|
||||
check_scan_immediate_double_hash(scanner, lexer, valid_symbols))
|
||||
return (true);
|
||||
if (valid_symbols[EMPTY_VALUE] && (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) ||
|
||||
lexer->data.lookahead == ';' || lexer->data.lookahead == '&'))
|
||||
return (lexer->data.result_symbol = EMPTY_VALUE, true);
|
||||
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 &&
|
||||
!array_back(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY]))
|
||||
return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY));
|
||||
if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0 && check_scan_heredoc_end(scanner, lexer, valid_symbols))
|
||||
return (false);
|
||||
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started &&
|
||||
!(valid_symbols[ERROR_RECOVERY]))
|
||||
return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END));
|
||||
if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.size > 0)
|
||||
return (scan_heredoc_start(array_back(&scanner->heredocs), lexer));
|
||||
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) &&
|
||||
!(valid_symbols[ERROR_RECOVERY]))
|
||||
return (scan_variable_name(scanner, lexer, valid_symbols));
|
||||
if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) && scan_bare_dollar(lexer))
|
||||
return (true);
|
||||
if (valid_symbols[EXPANSION_WORD])
|
||||
return (scan_expansion_word(scanner, lexer, valid_symbols));
|
||||
return (false);
|
||||
}
|
||||
|
||||
void *tree_sitter_sh_external_scanner_create()
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue