update: Updated stuff so the scanner works great now
This commit is contained in:
parent
475038e2b7
commit
163db2241f
15 changed files with 738 additions and 648 deletions
|
|
@ -6,25 +6,26 @@
|
|||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/08/31 17:33:11 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/08/31 17:33:32 by maiboyer ### ########.fr */
|
||||
/* Updated: 2024/09/06 17:11:17 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "parser/reduce_action.h"
|
||||
#include "me/vec/vec_reduce_action.h"
|
||||
|
||||
void ts_reduce_action_set_add(ReduceActionSet *self, ReduceAction new_action)
|
||||
void ts_reduce_action_set_add(ReduceActionSet *self, t_reduce_action new_action)
|
||||
{
|
||||
ReduceAction action;
|
||||
t_reduce_action action;
|
||||
t_u32 i;
|
||||
|
||||
i = 0;
|
||||
while (i < self->size)
|
||||
while (i < self->len)
|
||||
{
|
||||
action = self->contents[i];
|
||||
action = self->buffer[i];
|
||||
if (action.symbol == new_action.symbol \
|
||||
&& action.count == new_action.count)
|
||||
return ;
|
||||
i++;
|
||||
}
|
||||
array_push(self, new_action);
|
||||
vec_reduce_action_push(self, new_action);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,12 +6,13 @@
|
|||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/03 14:08:00 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/03 14:08:01 by maiboyer ### ########.fr */
|
||||
/* Updated: 2024/09/06 17:10:43 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/mem/mem.h"
|
||||
#include "me/types.h"
|
||||
#include "me/vec/vec_reduce_action.h"
|
||||
#include "me/vec/vec_subtree.h"
|
||||
#include "parser/api.h"
|
||||
#include "parser/array.h"
|
||||
|
|
@ -627,18 +628,15 @@ static t_stack_version ts_parser__reduce(TSParser *self, t_stack_version version
|
|||
ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2);
|
||||
if (ts_parser__select_children(self, (parent), &next_slice_children))
|
||||
{
|
||||
ts_subtree_array_clear(
|
||||
/*&self->tree_pool,*/ &self->trailing_extras);
|
||||
ts_subtree_release(
|
||||
/*&self->tree_pool,*/ (parent));
|
||||
ts_subtree_array_clear(&self->trailing_extras);
|
||||
ts_subtree_release(parent);
|
||||
array_swap(&self->trailing_extras, &self->trailing_extras2);
|
||||
parent = ts_subtree_new_node(symbol, &next_slice_children, production_id, self->language);
|
||||
}
|
||||
else
|
||||
{
|
||||
self->trailing_extras2.len = 0;
|
||||
ts_subtree_array_delete(
|
||||
/*&self->tree_pool,*/ &next_slice.subtrees);
|
||||
ts_subtree_array_delete(&next_slice.subtrees);
|
||||
}
|
||||
}
|
||||
state = ts_stack_state(self->stack, slice_version);
|
||||
|
|
@ -747,7 +745,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, t_stack_versi
|
|||
TSSymbol first_symbol;
|
||||
TSSymbol end_symbol;
|
||||
t_stack_version reduction_version;
|
||||
ReduceAction reduce_action;
|
||||
t_reduce_action reduce_action;
|
||||
t_u32 k;
|
||||
TSSymbol symbol;
|
||||
TableEntry entry;
|
||||
|
|
@ -774,7 +772,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, t_stack_versi
|
|||
continue;
|
||||
state = ts_stack_state(self->stack, version);
|
||||
has_shift_action = false;
|
||||
array_clear(&self->reduce_actions);
|
||||
self->reduce_actions.len = 0;
|
||||
if (lookahead_symbol != 0)
|
||||
{
|
||||
first_symbol = lookahead_symbol;
|
||||
|
|
@ -800,7 +798,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, t_stack_versi
|
|||
break;
|
||||
case TSParseActionTypeReduce:
|
||||
if (action.reduce.child_count > 0)
|
||||
ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
|
||||
ts_reduce_action_set_add(&self->reduce_actions, (t_reduce_action){
|
||||
.symbol = action.reduce.symbol,
|
||||
.count = action.reduce.child_count,
|
||||
.dynamic_precedence = action.reduce.dynamic_precedence,
|
||||
|
|
@ -813,9 +811,9 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, t_stack_versi
|
|||
}
|
||||
}
|
||||
reduction_version = STACK_VERSION_NONE;
|
||||
for (k = 0; k < self->reduce_actions.size; k++)
|
||||
for (k = 0; k < self->reduce_actions.len; k++)
|
||||
{
|
||||
reduce_action = self->reduce_actions.contents[k];
|
||||
reduce_action = self->reduce_actions.buffer[k];
|
||||
reduction_version = ts_parser__reduce(self, version, reduce_action.symbol, reduce_action.count,
|
||||
reduce_action.dynamic_precedence, reduce_action.production_id, true, false);
|
||||
}
|
||||
|
|
@ -1461,8 +1459,7 @@ TSParser *ts_parser_new(void)
|
|||
|
||||
self = mem_alloc(sizeof(*self));
|
||||
ts_lexer_init(&self->lexer);
|
||||
array_init(&self->reduce_actions);
|
||||
array_reserve(&self->reduce_actions, 4);
|
||||
self->reduce_actions = vec_reduce_action_new(4, NULL);
|
||||
self->stack = ts_stack_new();
|
||||
self->finished_tree = NULL;
|
||||
self->language = NULL;
|
||||
|
|
@ -1478,10 +1475,7 @@ void ts_parser_delete(TSParser *self)
|
|||
return;
|
||||
ts_parser_set_language(self, NULL);
|
||||
ts_stack_delete(self->stack);
|
||||
if (self->reduce_actions.contents)
|
||||
{
|
||||
array_delete(&self->reduce_actions);
|
||||
}
|
||||
vec_reduce_action_free(self->reduce_actions);
|
||||
array_delete(&self->trailing_extras);
|
||||
array_delete(&self->trailing_extras2);
|
||||
array_delete(&self->scratch_trees);
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
#include "me/char/char.h"
|
||||
#include "me/mem/mem.h"
|
||||
#include "me/str/str.h"
|
||||
#include "me/string/string.h"
|
||||
#include "me/types.h"
|
||||
#include "me/mem/mem.h"
|
||||
#include "me/vec/vec_heredoc.h"
|
||||
#include "parser/inner/heredoc.h"
|
||||
#include "parser/lexer.h"
|
||||
#include "parser/parser.h"
|
||||
#include "me/str/str.h"
|
||||
|
||||
typedef struct s_heredoc t_heredoc;
|
||||
typedef struct s_scanner t_scanner;
|
||||
|
|
@ -37,9 +37,9 @@ enum e_token_type
|
|||
|
||||
struct s_scanner
|
||||
{
|
||||
t_u8 last_glob_paren_depth;
|
||||
bool ext_was_in_double_quote;
|
||||
bool ext_saw_outside_quote;
|
||||
t_u8 last_glob_paren_depth;
|
||||
bool ext_was_in_double_quote;
|
||||
bool ext_saw_outside_quote;
|
||||
t_vec_heredoc heredocs;
|
||||
};
|
||||
|
||||
|
|
@ -360,62 +360,31 @@ bool scan_heredoc_content(t_scanner *scanner, t_lexer *lexer, enum e_token_type
|
|||
}
|
||||
}
|
||||
|
||||
bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
bool scan_concat(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]))
|
||||
(void)(scanner);
|
||||
(void)(lexer);
|
||||
(void)(valid_symbols);
|
||||
lexer->data.result_symbol = CONCAT;
|
||||
if (lexer->data.lookahead == '\\')
|
||||
{
|
||||
if (!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' ||
|
||||
lexer->data.lookahead == '<' || lexer->data.lookahead == ')' || lexer->data.lookahead == '(' ||
|
||||
lexer->data.lookahead == ';' || lexer->data.lookahead == '&' || lexer->data.lookahead == '|' ||
|
||||
lexer->data.lookahead == '{' || lexer->data.lookahead == '}'))
|
||||
{
|
||||
lexer->data.result_symbol = CONCAT;
|
||||
// So for a`b`, we want to return a concat. We check if the
|
||||
// 2nd backtick has whitespace after it, and if it does we
|
||||
// return concat.
|
||||
if (lexer->data.lookahead == '`')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
while (lexer->data.lookahead != '`' && !lexer->data.eof((void *)lexer))
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (lexer->data.lookahead == '`')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
return me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer);
|
||||
}
|
||||
// strings w/ expansions that contains escaped quotes or
|
||||
// backslashes need this to return a concat
|
||||
if (lexer->data.lookahead == '\\')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '"' || lexer->data.lookahead == '\'' || lexer->data.lookahead == '\\')
|
||||
return true;
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scan_double_hash(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
(void)(scanner);
|
||||
(void)(lexer);
|
||||
(void)(valid_symbols);
|
||||
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !(valid_symbols[ERROR_RECOVERY]))
|
||||
{
|
||||
// advance two # and ensure not } after
|
||||
if (lexer->data.lookahead == '#')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
|
|
@ -432,41 +401,50 @@ bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
|||
}
|
||||
}
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
if (valid_symbols[EMPTY_VALUE])
|
||||
{
|
||||
if (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) || lexer->data.lookahead == ';' ||
|
||||
lexer->data.lookahead == '&')
|
||||
{
|
||||
lexer->data.result_symbol = EMPTY_VALUE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 &&
|
||||
!vec_heredoc_last(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY]))
|
||||
return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY));
|
||||
bool scan_heredoc_end(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
t_heredoc *heredoc;
|
||||
|
||||
if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0)
|
||||
{
|
||||
t_heredoc *heredoc = vec_heredoc_last(&scanner->heredocs);
|
||||
heredoc = vec_heredoc_last(&scanner->heredocs);
|
||||
if (scan_heredoc_end_identifier(heredoc, lexer))
|
||||
{
|
||||
string_free(heredoc->current_leading_word);
|
||||
string_free(heredoc->delimiter);
|
||||
vec_heredoc_pop(&scanner->heredocs, NULL);
|
||||
lexer->data.result_symbol = HEREDOC_END;
|
||||
return true;
|
||||
return (true);
|
||||
}
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
if (valid_symbols[CONCAT] && !(valid_symbols[ERROR_RECOVERY]) &&
|
||||
(!(lexer->data.lookahead == 0 || me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '>' ||
|
||||
lexer->data.lookahead == '<' || lexer->data.lookahead == ')' || lexer->data.lookahead == '(' || lexer->data.lookahead == ';' ||
|
||||
lexer->data.lookahead == '&' || lexer->data.lookahead == '|' || lexer->data.lookahead == '{' || lexer->data.lookahead == '}')))
|
||||
return (scan_concat(scanner, lexer, valid_symbols));
|
||||
if (scan_double_hash(scanner, lexer, valid_symbols))
|
||||
return (true);
|
||||
if (valid_symbols[EMPTY_VALUE] && (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) ||
|
||||
lexer->data.lookahead == ';' || lexer->data.lookahead == '&'))
|
||||
return (lexer->data.result_symbol = EMPTY_VALUE, true);
|
||||
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 &&
|
||||
!vec_heredoc_last(&scanner->heredocs)->started && !(valid_symbols[ERROR_RECOVERY]))
|
||||
return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY));
|
||||
if (scan_heredoc_end(scanner, lexer, valid_symbols))
|
||||
return (true);
|
||||
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started &&
|
||||
!(valid_symbols[ERROR_RECOVERY]))
|
||||
return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END));
|
||||
|
||||
if (valid_symbols[HEREDOC_START] && !(valid_symbols[ERROR_RECOVERY]) && scanner->heredocs.len > 0)
|
||||
return (scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer));
|
||||
|
||||
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) &&
|
||||
!(valid_symbols[ERROR_RECOVERY]))
|
||||
{
|
||||
|
|
@ -475,44 +453,30 @@ bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
|||
if ((lexer->data.lookahead == ' ' || lexer->data.lookahead == '\t' || lexer->data.lookahead == '\r' ||
|
||||
(lexer->data.lookahead == '\n' && !valid_symbols[NEWLINE])) &&
|
||||
!valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
}
|
||||
else if (lexer->data.lookahead == '\\')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
|
||||
if (lexer->data.eof((void *)lexer))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
return (true);
|
||||
}
|
||||
|
||||
if (lexer->data.lookahead == '\r')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
}
|
||||
if (lexer->data.lookahead == '\n')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lexer->data.lookahead == '\\' && valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
goto expansion_word;
|
||||
}
|
||||
return false;
|
||||
return (false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// no '*', '@', '?', '-', '$', '0', '_'
|
||||
if (!valid_symbols[EXPANSION_WORD] &&
|
||||
(lexer->data.lookahead == '*' || lexer->data.lookahead == '@' || lexer->data.lookahead == '?' || lexer->data.lookahead == '-' ||
|
||||
lexer->data.lookahead == '0' || lexer->data.lookahead == '_'))
|
||||
|
|
@ -522,9 +486,7 @@ bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
|||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' || lexer->data.lookahead == ':' ||
|
||||
lexer->data.lookahead == '-' || lexer->data.lookahead == '%' || lexer->data.lookahead == '#' ||
|
||||
lexer->data.lookahead == '/')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
|
|
@ -539,34 +501,17 @@ bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
|||
if (lexer->data.lookahead == '<')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '-')
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
t_heredoc heredoc = heredoc_new();
|
||||
heredoc.allows_indent = true;
|
||||
vec_heredoc_push(&scanner->heredocs, heredoc);
|
||||
lexer->data.result_symbol = HEREDOC_ARROW_DASH;
|
||||
}
|
||||
// else if (lexer->data.lookahead == '<' || lexer->data.lookahead == '=')
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
else
|
||||
{
|
||||
t_heredoc heredoc = heredoc_new();
|
||||
vec_heredoc_push(&scanner->heredocs, heredoc);
|
||||
lexer->data.result_symbol = HEREDOC_ARROW;
|
||||
}
|
||||
return true;
|
||||
t_heredoc heredoc = heredoc_new();
|
||||
vec_heredoc_push(&scanner->heredocs, heredoc);
|
||||
lexer->data.result_symbol = HEREDOC_ARROW;
|
||||
return (true);
|
||||
}
|
||||
return false;
|
||||
return (false);
|
||||
}
|
||||
|
||||
bool is_number = true;
|
||||
if (me_isdigit(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
|
||||
{
|
||||
is_number = false;
|
||||
|
|
@ -576,42 +521,30 @@ bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
|||
{
|
||||
if (lexer->data.lookahead == '{')
|
||||
{
|
||||
goto brace_start;
|
||||
return (false);
|
||||
}
|
||||
if (valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
goto expansion_word;
|
||||
}
|
||||
if (valid_symbols[EXTGLOB_PATTERN])
|
||||
{
|
||||
goto extglob_pattern;
|
||||
}
|
||||
return false;
|
||||
return (false);
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (me_isdigit(lexer->data.lookahead))
|
||||
{
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else if (me_isalpha(lexer->data.lookahead) || lexer->data.lookahead == '_')
|
||||
{
|
||||
is_number = false;
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->data.lookahead == '>' || lexer->data.lookahead == '<'))
|
||||
{
|
||||
lexer->data.result_symbol = FILE_DESCRIPTOR;
|
||||
return true;
|
||||
}
|
||||
|
||||
return (lexer->data.result_symbol = FILE_DESCRIPTOR, true);
|
||||
if (valid_symbols[VARIABLE_NAME])
|
||||
{
|
||||
if (lexer->data.lookahead == '+')
|
||||
|
|
@ -619,28 +552,19 @@ bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
|||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.advance((void *)lexer, false);
|
||||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == ':')
|
||||
{
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
return (lexer->data.result_symbol = VARIABLE_NAME, true);
|
||||
return false;
|
||||
}
|
||||
if (lexer->data.lookahead == '/')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (lexer->data.lookahead == '=' || lexer->data.lookahead == '[' ||
|
||||
(lexer->data.lookahead == ':' &&
|
||||
!valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable
|
||||
// names for function words, only handling : for now? #235
|
||||
lexer->data.lookahead == '%' ||
|
||||
(lexer->data.lookahead == ':' && !valid_symbols[OPENING_PAREN]) || lexer->data.lookahead == '%' ||
|
||||
(lexer->data.lookahead == '#' && !is_number) || lexer->data.lookahead == '@' || (lexer->data.lookahead == '-'))
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
lexer->data.result_symbol = VARIABLE_NAME;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (lexer->data.lookahead == '?')
|
||||
{
|
||||
lexer->data.mark_end((void *)lexer);
|
||||
|
|
@ -649,396 +573,12 @@ bool scan(t_scanner *scanner, t_lexer *lexer, const bool *valid_symbols)
|
|||
return me_isalpha(lexer->data.lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (valid_symbols[BARE_DOLLAR] && !(valid_symbols[ERROR_RECOVERY]) && scan_bare_dollar(lexer))
|
||||
return (true);
|
||||
|
||||
// if ((valid_symbols[REGEX]) && !(valid_symbols[ERROR_RECOVERY]))
|
||||
// {
|
||||
// if (valid_symbols[REGEX])
|
||||
// {
|
||||
// while (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, true);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if ((lexer->data.lookahead != '"' && lexer->data.lookahead != '\'') || ((lexer->data.lookahead == '$' || lexer->data.lookahead ==
|
||||
// '\'')) || (lexer->data.lookahead == '\''))
|
||||
// {
|
||||
// typedef struct
|
||||
// {
|
||||
// bool done;
|
||||
// bool advanced_once;
|
||||
// bool found_non_alnumdollarunderdash;
|
||||
// bool last_was_escape;
|
||||
// bool in_single_quote;
|
||||
// t_u32 paren_depth;
|
||||
// t_u32 bracket_depth;
|
||||
// t_u32 brace_depth;
|
||||
// } State;
|
||||
//
|
||||
// if (lexer->data.lookahead == '$')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == '(')
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
//
|
||||
// State state = {false, false, false, false, false, 0, 0, 0};
|
||||
// while (!state.done)
|
||||
// {
|
||||
// if (state.in_single_quote)
|
||||
// {
|
||||
// if (lexer->data.lookahead == '\'')
|
||||
// {
|
||||
// state.in_single_quote = false;
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// }
|
||||
// }
|
||||
// switch (lexer->data.lookahead)
|
||||
// {
|
||||
// case '\\':
|
||||
// state.last_was_escape = true;
|
||||
// break;
|
||||
// case '\0':
|
||||
// return false;
|
||||
// case '(':
|
||||
// state.paren_depth++;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case '[':
|
||||
// state.bracket_depth++;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case '{':
|
||||
// if (!state.last_was_escape)
|
||||
// state.brace_depth++;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case ')':
|
||||
// if (state.paren_depth == 0)
|
||||
// state.done = true;
|
||||
// state.paren_depth--;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case ']':
|
||||
// if (state.bracket_depth == 0)
|
||||
// state.done = true;
|
||||
// state.bracket_depth--;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case '}':
|
||||
// if (state.brace_depth == 0)
|
||||
// state.done = true;
|
||||
// state.brace_depth--;
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// case '\'':
|
||||
// // Enter or exit a single-quoted string.
|
||||
// state.in_single_quote = !state.in_single_quote;
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// state.advanced_once = true;
|
||||
// state.last_was_escape = false;
|
||||
// continue;
|
||||
// default:
|
||||
// state.last_was_escape = false;
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// if (!state.done)
|
||||
// {
|
||||
// if (valid_symbols[REGEX])
|
||||
// {
|
||||
// bool was_space = !state.in_single_quote && me_isspace(lexer->data.lookahead);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// state.advanced_once = true;
|
||||
// if (!was_space || state.paren_depth > 0)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->data.result_symbol = REGEX;
|
||||
// if (valid_symbols[REGEX] && !state.advanced_once)
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
|
||||
extglob_pattern:
|
||||
// if (valid_symbols[EXTGLOB_PATTERN] && !(valid_symbols[ERROR_RECOVERY]))
|
||||
// {
|
||||
// // first skip ws, then check for ? * + @ !
|
||||
// while (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, true);
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == '?' || lexer->data.lookahead == '*' || lexer->data.lookahead == '+' || lexer->data.lookahead == '@' ||
|
||||
// lexer->data.lookahead == '!' || lexer->data.lookahead == '-' || lexer->data.lookahead == ')' || lexer->data.lookahead == '\\' ||
|
||||
// lexer->data.lookahead == '.' || lexer->data.lookahead == '[' || (me_isalpha(lexer->data.lookahead)))
|
||||
// {
|
||||
// if (lexer->data.lookahead == '\\')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if ((me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '"') && lexer->data.lookahead != '\r' &&
|
||||
// lexer->data.lookahead != '\n')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == ')' && scanner->last_glob_paren_depth == 0)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
//
|
||||
// if (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// bool was_non_alpha = !me_isalpha(lexer->data.lookahead);
|
||||
// if (lexer->data.lookahead != '[')
|
||||
// {
|
||||
// // no esac
|
||||
// if (lexer->data.lookahead == 'e')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == 's')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == 'a')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == 'c')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // -\w is just a word, find something else special
|
||||
// if (lexer->data.lookahead == '-')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// while (me_isalnum(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == ')' || lexer->data.lookahead == '\\' || lexer->data.lookahead == '.')
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// }
|
||||
//
|
||||
// // case item -) or *)
|
||||
// if (lexer->data.lookahead == ')' && scanner->last_glob_paren_depth == 0)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// return was_non_alpha;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (me_isspace(lexer->data.lookahead))
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == '$')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == '{' || lexer->data.lookahead == '(')
|
||||
// {
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == '|')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// if (!me_isalnum(lexer->data.lookahead) && lexer->data.lookahead != '(' && lexer->data.lookahead != '"' && lexer->data.lookahead
|
||||
// != '[' && lexer->data.lookahead != '?' && lexer->data.lookahead != '/' && lexer->data.lookahead != '\\' && lexer->data.lookahead !=
|
||||
// '_' && lexer->data.lookahead != '*')
|
||||
// {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// typedef struct
|
||||
// {
|
||||
// bool done;
|
||||
// bool saw_non_alphadot;
|
||||
// t_u32 paren_depth;
|
||||
// t_u32 bracket_depth;
|
||||
// t_u32 brace_depth;
|
||||
// } State;
|
||||
//
|
||||
// State state = {false, was_non_alpha, scanner->last_glob_paren_depth, 0, 0};
|
||||
// while (!state.done)
|
||||
// {
|
||||
// switch (lexer->data.lookahead)
|
||||
// {
|
||||
// case '\0':
|
||||
// return false;
|
||||
// case '(':
|
||||
// state.paren_depth++;
|
||||
// break;
|
||||
// case '[':
|
||||
// state.bracket_depth++;
|
||||
// break;
|
||||
// case '{':
|
||||
// state.brace_depth++;
|
||||
// break;
|
||||
// case ')':
|
||||
// if (state.paren_depth == 0)
|
||||
// {
|
||||
// state.done = true;
|
||||
// }
|
||||
// state.paren_depth--;
|
||||
// break;
|
||||
// case ']':
|
||||
// if (state.bracket_depth == 0)
|
||||
// {
|
||||
// state.done = true;
|
||||
// }
|
||||
// state.bracket_depth--;
|
||||
// break;
|
||||
// case '}':
|
||||
// if (state.brace_depth == 0)
|
||||
// {
|
||||
// state.done = true;
|
||||
// }
|
||||
// state.brace_depth--;
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// if (lexer->data.lookahead == '|')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0)
|
||||
// {
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (!state.done)
|
||||
// {
|
||||
// bool was_space = me_isspace(lexer->data.lookahead);
|
||||
// if (lexer->data.lookahead == '$')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
|
||||
// {
|
||||
// state.saw_non_alphadot = true;
|
||||
// }
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (lexer->data.lookahead == '(' || lexer->data.lookahead == '{')
|
||||
// {
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = state.paren_depth;
|
||||
// return state.saw_non_alphadot;
|
||||
// }
|
||||
// }
|
||||
// if (was_space)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
// return state.saw_non_alphadot;
|
||||
// }
|
||||
// if (lexer->data.lookahead == '"')
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
// return state.saw_non_alphadot;
|
||||
// }
|
||||
// if (lexer->data.lookahead == '\\')
|
||||
// {
|
||||
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
|
||||
// {
|
||||
// state.saw_non_alphadot = true;
|
||||
// }
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// if (me_isspace(lexer->data.lookahead) || lexer->data.lookahead == '"')
|
||||
// {
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// if (!me_isalpha(lexer->data.lookahead) && lexer->data.lookahead != '.' && lexer->data.lookahead != '\\')
|
||||
// {
|
||||
// state.saw_non_alphadot = true;
|
||||
// }
|
||||
// lexer->data.advance((void *)lexer, false);
|
||||
// }
|
||||
// if (!was_space)
|
||||
// {
|
||||
// lexer->data.mark_end((void *)lexer);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->data.result_symbol = EXTGLOB_PATTERN;
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
// return state.saw_non_alphadot;
|
||||
// }
|
||||
// scanner->last_glob_paren_depth = 0;
|
||||
//
|
||||
// return false;
|
||||
// }
|
||||
|
||||
expansion_word:
|
||||
if (valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
|
|
@ -1120,44 +660,47 @@ expansion_word:
|
|||
}
|
||||
}
|
||||
|
||||
brace_start:
|
||||
return false;
|
||||
}
|
||||
|
||||
void *tree_sitter_sh_external_scanner_create()
|
||||
{
|
||||
t_scanner *scanner = mem_alloc(sizeof(t_scanner));
|
||||
scanner->heredocs = vec_heredoc_new(0, NULL);
|
||||
t_scanner *scanner;
|
||||
|
||||
scanner = mem_alloc(sizeof(*scanner));
|
||||
scanner->heredocs = vec_heredoc_new(0, heredoc_free);
|
||||
return (scanner);
|
||||
}
|
||||
|
||||
bool tree_sitter_sh_external_scanner_scan(void *payload, t_lexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
t_scanner *scanner = (t_scanner *)payload;
|
||||
t_scanner *scanner;
|
||||
|
||||
scanner = (t_scanner *)payload;
|
||||
return scan(scanner, lexer, valid_symbols);
|
||||
}
|
||||
|
||||
t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state)
|
||||
{
|
||||
t_scanner *scanner = (t_scanner *)payload;
|
||||
t_scanner *scanner;
|
||||
|
||||
scanner = (t_scanner *)payload;
|
||||
return serialize(scanner, state);
|
||||
}
|
||||
|
||||
void tree_sitter_sh_external_scanner_deserialize(void *payload, const t_u8 *state, t_u32 length)
|
||||
{
|
||||
t_scanner *scanner = (t_scanner *)payload;
|
||||
t_scanner *scanner;
|
||||
|
||||
scanner = (t_scanner *)payload;
|
||||
deserialize(scanner, state, length);
|
||||
}
|
||||
|
||||
void tree_sitter_sh_external_scanner_destroy(void *payload)
|
||||
{
|
||||
t_scanner *scanner = (t_scanner *)payload;
|
||||
for (size_t i = 0; i < scanner->heredocs.len; i++)
|
||||
{
|
||||
t_heredoc *heredoc = vec_heredoc_get(&scanner->heredocs, i);
|
||||
string_free(heredoc->current_leading_word);
|
||||
string_free(heredoc->delimiter);
|
||||
}
|
||||
t_scanner *scanner;
|
||||
|
||||
scanner = (t_scanner *)payload;
|
||||
vec_heredoc_free(scanner->heredocs);
|
||||
mem_free(scanner);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue