From b2440780fb641afafd1ca489b177846957890441 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Mon, 2 Sep 2024 17:43:53 +0200 Subject: [PATCH] Updated parser/src/scanner.c to be splitted --- parser/Filelist.parser.mk | 6 +- parser/include/parser/inner/scanner.h | 8 +- parser/src/scanner.c | 334 -------------------------- parser/src/scanner/heredoc.c | 61 +++-- parser/src/scanner/scan.c | 47 ++++ parser/src/scanner/scan_concat.c | 52 ++++ parser/src/scanner/scan_double_hash.c | 37 +++ parser/src/scanner/scan_varname.c | 133 ++++++++++ parser/src/scanner/scan_word.c | 92 +++++++ parser/src/scanner/serialize.c | 4 +- parser/src/stack/stack_summary.c | 4 +- 11 files changed, 415 insertions(+), 363 deletions(-) delete mode 100644 parser/src/scanner.c create mode 100644 parser/src/scanner/scan.c create mode 100644 parser/src/scanner/scan_concat.c create mode 100644 parser/src/scanner/scan_double_hash.c create mode 100644 parser/src/scanner/scan_varname.c create mode 100644 parser/src/scanner/scan_word.c diff --git a/parser/Filelist.parser.mk b/parser/Filelist.parser.mk index a9283fba..2d84a3fb 100644 --- a/parser/Filelist.parser.mk +++ b/parser/Filelist.parser.mk @@ -32,13 +32,17 @@ node/node_relevent \ parser \ point/point_funcs1 \ point/point_funcs2 \ -scanner \ scanner/advance_words \ scanner/deserialize \ scanner/heredoc \ scanner/heredoc_functions \ scanner/lifetime \ +scanner/scan \ +scanner/scan_concat \ scanner/scan_dollar \ +scanner/scan_double_hash \ +scanner/scan_varname \ +scanner/scan_word \ scanner/serialize \ stack/stack_add_link \ stack/stack_funcs1 \ diff --git a/parser/include/parser/inner/scanner.h b/parser/include/parser/inner/scanner.h index 07c4817d..7426a6ac 100755 --- a/parser/include/parser/inner/scanner.h +++ b/parser/include/parser/inner/scanner.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/02 13:22:04 by maiboyer #+# #+# */ -/* Updated: 2024/09/02 13:27:44 by maiboyer ### ########.fr */ +/* Updated: 2024/09/02 17:40:15 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -65,11 +65,17 @@ struct s_heredoc_scan_state typedef bool (*t_heredoc_content_func)(struct s_heredoc_scan_state *state); bool advance_word(TSLexer *lexer, t_string *unquoted_word); +bool check_scan_concat(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols); bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols); bool scan_bare_dollar(TSLexer *lexer); +bool scan_concat(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols); +bool scan_double_hash(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols); +bool scan_expansion_word(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols); bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type); +bool scan_heredoc_end(t_scanner *scanner, TSLexer *lexer); bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer); bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer); +bool scan_varname(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols); void reset(t_scanner *scanner); #endif diff --git a/parser/src/scanner.c b/parser/src/scanner.c deleted file mode 100644 index 5d515557..00000000 --- a/parser/src/scanner.c +++ /dev/null @@ -1,334 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* scanner.c :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: maiboyer +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/09/01 14:17:17 by maiboyer #+# #+# */ -/* Updated: 2024/09/02 16:25:19 by maiboyer ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#include "parser/inner/scanner.h" -#include "me/char/char.h" -#include "me/mem/mem.h" -#include "me/string/string.h" -#include "me/types.h" -#include "me/vec/vec_heredoc.h" -#include "parser/inner/heredoc.h" -#include "parser/parser.h" -#include - -bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) -{ - if (valid_symbols[CONCAT] && !valid_symbols[ERROR_RECOVERY]) - { - if (!(lexer->lookahead == 0 || me_isspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' || - lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' || - lexer->lookahead == '|' || lexer->lookahead == '{' || lexer->lookahead == '}')) - { - lexer->result_symbol = CONCAT; - if (lexer->lookahead == '`') - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - while (lexer->lookahead != '`' && !lexer->eof(lexer)) - lexer->advance(lexer, false); - if (lexer->eof(lexer)) - return false; - if (lexer->lookahead == '`') - lexer->advance(lexer, false); - return me_isspace(lexer->lookahead) || lexer->eof(lexer); - } - if (lexer->lookahead == '\\') - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\') - return true; - if (lexer->eof(lexer)) - return false; - } - else - return true; - } - } - - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !valid_symbols[ERROR_RECOVERY]) - { - if (lexer->lookahead == '#') - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - if (lexer->lookahead == '#') - { - lexer->advance(lexer, false); - if (lexer->lookahead != '}') - { - lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; - lexer->mark_end(lexer); - return true; - } - } - } - } - - if (valid_symbols[EMPTY_VALUE] && - (me_isspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&')) - return (lexer->result_symbol = EMPTY_VALUE, true); - - if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 && - !vec_heredoc_last(&scanner->heredocs)->started && !valid_symbols[ERROR_RECOVERY]) - return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY); - - if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0 && scan_heredoc_end_identifier(vec_heredoc_last(&scanner->heredocs), lexer)) - { - t_heredoc *heredoc = vec_heredoc_last(&scanner->heredocs); - string_free(heredoc->current_leading_word); - string_free(heredoc->delimiter); - (void)vec_heredoc_pop(&scanner->heredocs, NULL); - return (lexer->result_symbol = HEREDOC_END, true); - } - - if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started && - !valid_symbols[ERROR_RECOVERY]) - return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END); - - if (valid_symbols[HEREDOC_START] && !valid_symbols[ERROR_RECOVERY] && scanner->heredocs.len > 0) - return scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer); - - if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && !valid_symbols[ERROR_RECOVERY]) - { - while (true) - { - if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || - (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && - !valid_symbols[EXPANSION_WORD]) - { - lexer->advance(lexer, true); - } - else if (lexer->lookahead == '\\') - { - lexer->advance(lexer, true); - - if (lexer->eof(lexer)) - { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return true; - } - - if (lexer->lookahead == '\r') - lexer->advance(lexer, true); - if (lexer->lookahead == '\n') - lexer->advance(lexer, true); - else - { - if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD]) - goto expansion_word; - return false; - } - } - else - break; - } - - // no '*', '@', '?', '-', '$', '0', '_' - if (!valid_symbols[EXPANSION_WORD] && (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || - lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_')) - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' || - lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/') - return false; - if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->lookahead)) - { - lexer->mark_end(lexer); - lexer->result_symbol = EXTGLOB_PATTERN; - return true; - } - } - - if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') - { - lexer->advance(lexer, false); - if (lexer->lookahead == '<') - { - lexer->advance(lexer, false); - if (lexer->lookahead == '-') - { - lexer->advance(lexer, false); - t_heredoc heredoc = heredoc_new(); - heredoc.allows_indent = true; - vec_heredoc_push(&scanner->heredocs, heredoc); - lexer->result_symbol = HEREDOC_ARROW_DASH; - } - else - { - t_heredoc heredoc = heredoc_new(); - vec_heredoc_push(&scanner->heredocs, heredoc); - lexer->result_symbol = HEREDOC_ARROW; - } - return true; - } - return false; - } - - bool is_number = true; - if (me_isdigit(lexer->lookahead)) - lexer->advance(lexer, false); - else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_') - { - is_number = false; - lexer->advance(lexer, false); - } - else - { - if (lexer->lookahead == '{') - goto brace_start; - if (valid_symbols[EXPANSION_WORD]) - goto expansion_word; - if (valid_symbols[EXTGLOB_PATTERN]) - goto extglob_pattern; - return false; - } - - while (true) - { - if (me_isdigit(lexer->lookahead)) - lexer->advance(lexer, false); - else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_') - { - is_number = false; - lexer->advance(lexer, false); - } - else - break; - } - - if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<')) - { - lexer->result_symbol = FILE_DESCRIPTOR; - return true; - } - - if (valid_symbols[VARIABLE_NAME]) - { - if (lexer->lookahead == '+') - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - if (lexer->lookahead == '=' || lexer->lookahead == ':') - { - lexer->result_symbol = VARIABLE_NAME; - return true; - } - return false; - } - if (lexer->lookahead == '/') - { - return false; - } - if (lexer->lookahead == '=' || lexer->lookahead == '[' || (lexer->lookahead == ':' && !valid_symbols[OPENING_PAREN]) || - lexer->lookahead == '%' || (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-')) - { - lexer->mark_end(lexer); - lexer->result_symbol = VARIABLE_NAME; - return true; - } - - if (lexer->lookahead == '?') - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - lexer->result_symbol = VARIABLE_NAME; - return me_isalpha(lexer->lookahead); - } - } - - return false; - } - - if (valid_symbols[BARE_DOLLAR] && !valid_symbols[ERROR_RECOVERY] && scan_bare_dollar(lexer)) - return true; -extglob_pattern: -expansion_word: - if (valid_symbols[EXPANSION_WORD]) - { - bool advanced_once = false; - bool advance_once_space = false; - while (true) - { - if (lexer->lookahead == '\"') - return false; - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead)) - { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - - if (lexer->lookahead == '}') - { - lexer->mark_end(lexer); - lexer->result_symbol = EXPANSION_WORD; - return advanced_once || advance_once_space; - } - - if (lexer->lookahead == '(' && !(advanced_once || advance_once_space)) - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - while (lexer->lookahead != ')' && !lexer->eof(lexer)) - { - if (lexer->lookahead == '$') - { - lexer->mark_end(lexer); - lexer->advance(lexer, false); - if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead)) - { - lexer->result_symbol = EXPANSION_WORD; - return advanced_once; - } - advanced_once = true; - } - else - { - advanced_once = advanced_once || !me_isspace(lexer->lookahead); - advance_once_space = advance_once_space || me_isspace(lexer->lookahead); - lexer->advance(lexer, false); - } - } - lexer->mark_end(lexer); - if (lexer->lookahead == ')') - { - advanced_once = true; - lexer->advance(lexer, false); - lexer->mark_end(lexer); - if (lexer->lookahead == '}') - return false; - } - else - return false; - } - - if (lexer->lookahead == '\'') - return false; - if (lexer->eof(lexer)) - return false; - advanced_once = advanced_once || !me_isspace(lexer->lookahead); - advance_once_space = advance_once_space || me_isspace(lexer->lookahead); - lexer->advance(lexer, false); - } - } - -brace_start: - return false; -} diff --git a/parser/src/scanner/heredoc.c b/parser/src/scanner/heredoc.c index b2433d59..5aa4c0cd 100644 --- a/parser/src/scanner/heredoc.c +++ b/parser/src/scanner/heredoc.c @@ -6,44 +6,42 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/01 19:33:04 by maiboyer #+# #+# */ -/* Updated: 2024/09/01 20:02:49 by maiboyer ### ########.fr */ +/* Updated: 2024/09/02 17:31:02 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ +#include "parser/inner/heredoc.h" #include "me/char/char.h" #include "me/str/str.h" #include "me/types.h" #include "me/vec/vec_heredoc.h" -#include "parser/inner/heredoc.h" #include "parser/inner/scanner.h" #include "parser/parser.h" -bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer) +bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer) { - bool found_delimiter; + bool found_delimiter; while (me_isspace(lexer->lookahead)) lexer->advance(lexer, true); lexer->result_symbol = HEREDOC_START; - heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' - || lexer->lookahead == '\\'; + heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\'; found_delimiter = advance_word(lexer, &heredoc->delimiter); if (!found_delimiter) return (string_clear(&heredoc->delimiter), false); return (found_delimiter); } -bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer) +bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer) { - t_i32 size; + t_i32 size; size = 0; string_clear(&heredoc->current_leading_word); if (heredoc->delimiter.len > 0) { - while (lexer->lookahead != '\0' && lexer->lookahead != '\n' - && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead - && heredoc->current_leading_word.len < heredoc->delimiter.len) + while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead && + heredoc->current_leading_word.len < heredoc->delimiter.len) { string_push_char(&heredoc->current_leading_word, lexer->lookahead); lexer->advance(lexer, false); @@ -52,26 +50,27 @@ bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer) } if (heredoc->delimiter.len == 0) return (false); - return (str_compare(heredoc->current_leading_word.buf, - heredoc->delimiter.buf)); + return (str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf)); } -bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state); -bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state); -bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state); -bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state); -bool scan_heredoc_content_other(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_other(struct s_heredoc_scan_state *state); -bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, - enum e_token_type middle_type, enum e_token_type end_type) +bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type) { - struct s_heredoc_scan_state state; + struct s_heredoc_scan_state state; t_heredoc_content_func func; - state = (struct s_heredoc_scan_state){.did_advance = false, \ - .lexer = lexer, .heredoc = vec_heredoc_last(&scanner->heredocs), \ - .scanner = scanner, .middle_type = middle_type, .end_type = end_type, \ - .return_value = false}; + state = (struct s_heredoc_scan_state){.did_advance = false, + .lexer = lexer, + .heredoc = vec_heredoc_last(&scanner->heredocs), + .scanner = scanner, + .middle_type = middle_type, + .end_type = end_type, + .return_value = false}; while (true) { if (lexer->lookahead == '\0') @@ -89,3 +88,15 @@ bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, } return (false); } + +bool scan_heredoc_end(t_scanner *scanner, TSLexer *lexer) +{ + t_heredoc *heredoc; + + heredoc = vec_heredoc_last(&scanner->heredocs); + string_free(heredoc->current_leading_word); + string_free(heredoc->delimiter); + vec_heredoc_pop(&scanner->heredocs, NULL); + lexer->result_symbol = HEREDOC_END; + return (true); +} diff --git a/parser/src/scanner/scan.c b/parser/src/scanner/scan.c new file mode 100644 index 00000000..97854949 --- /dev/null +++ b/parser/src/scanner/scan.c @@ -0,0 +1,47 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* scan.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/02 17:40:46 by maiboyer #+# #+# */ +/* Updated: 2024/09/02 17:41:52 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/char/char.h" +#include "parser/inner/scanner.h" + +static bool scan2(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) +{ + if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started && + !valid_symbols[ERROR_RECOVERY]) + return (scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END)); + if (valid_symbols[HEREDOC_START] && !valid_symbols[ERROR_RECOVERY] && scanner->heredocs.len > 0) + return (scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer)); + if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && !valid_symbols[ERROR_RECOVERY]) + return (scan_varname(scanner, lexer, valid_symbols)); + if (valid_symbols[BARE_DOLLAR] && !valid_symbols[ERROR_RECOVERY] && scan_bare_dollar(lexer)) + return (true); + if (valid_symbols[EXPANSION_WORD]) + return (scan_expansion_word(scanner, lexer, valid_symbols)); + return (false); +} + +bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) +{ + if (check_scan_concat(scanner, lexer, valid_symbols)) + return (scan_concat(scanner, lexer, valid_symbols)); + if (scan_double_hash(scanner, lexer, valid_symbols)) + return (true); + if (valid_symbols[EMPTY_VALUE] && + (me_isspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == ';' || lexer->lookahead == '&')) + return (lexer->result_symbol = EMPTY_VALUE, true); + if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 && + !vec_heredoc_last(&scanner->heredocs)->started && !valid_symbols[ERROR_RECOVERY]) + return (scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY)); + if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0 && scan_heredoc_end_identifier(vec_heredoc_last(&scanner->heredocs), lexer)) + return (scan_heredoc_end(scanner, lexer)); + return (scan2(scanner, lexer, valid_symbols)); +} diff --git a/parser/src/scanner/scan_concat.c b/parser/src/scanner/scan_concat.c new file mode 100644 index 00000000..5564c533 --- /dev/null +++ b/parser/src/scanner/scan_concat.c @@ -0,0 +1,52 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* scan_concat.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/02 17:37:05 by maiboyer #+# #+# */ +/* Updated: 2024/09/02 17:42:58 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/char/char.h" +#include "parser/inner/scanner.h" + +bool scan_concat(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) +{ + (void)(scanner); + (void)(valid_symbols); + lexer->result_symbol = CONCAT; + if (lexer->lookahead == '`') + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + while (lexer->lookahead != '`' && !lexer->eof(lexer)) + lexer->advance(lexer, false); + if (lexer->eof(lexer)) + return (false); + if (lexer->lookahead == '`') + lexer->advance(lexer, false); + return (me_isspace(lexer->lookahead) || lexer->eof(lexer)); + } + if (lexer->lookahead == '\\') + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\') + return (true); + if (lexer->eof(lexer)) + return (false); + } + return (true); +} + +bool check_scan_concat(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) +{ + (void)(scanner); + return (valid_symbols[CONCAT] && !valid_symbols[ERROR_RECOVERY]) && + (!(lexer->lookahead == 0 || me_isspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' || + lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' || + lexer->lookahead == '|' || lexer->lookahead == '{' || lexer->lookahead == '}')); +} diff --git a/parser/src/scanner/scan_double_hash.c b/parser/src/scanner/scan_double_hash.c new file mode 100644 index 00000000..b883889f --- /dev/null +++ b/parser/src/scanner/scan_double_hash.c @@ -0,0 +1,37 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* scan_double_hash.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/02 17:32:35 by maiboyer #+# #+# */ +/* Updated: 2024/09/02 17:42:43 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser/inner/scanner.h" + +bool scan_double_hash(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) +{ + (void)(scanner); + if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !valid_symbols[ERROR_RECOVERY]) + { + if (lexer->lookahead == '#') + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + if (lexer->lookahead == '#') + { + lexer->advance(lexer, false); + if (lexer->lookahead != '}') + { + lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; + lexer->mark_end(lexer); + return (true); + } + } + } + } + return (false); +} diff --git a/parser/src/scanner/scan_varname.c b/parser/src/scanner/scan_varname.c new file mode 100644 index 00000000..541531a4 --- /dev/null +++ b/parser/src/scanner/scan_varname.c @@ -0,0 +1,133 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* scan_varname.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/02 17:26:05 by maiboyer #+# #+# */ +/* Updated: 2024/09/02 17:28:10 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/char/char.h" +#include "parser/inner/scanner.h" + +bool scan_varname(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) +{ + while (true) + { + if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || + (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && + !valid_symbols[EXPANSION_WORD]) + lexer->advance(lexer, true); + else if (lexer->lookahead == '\\') + { + lexer->advance(lexer, true); + if (lexer->eof(lexer)) + { + lexer->mark_end(lexer); + lexer->result_symbol = VARIABLE_NAME; + return (true); + } + if (lexer->lookahead == '\r') + lexer->advance(lexer, true); + if (lexer->lookahead == '\n') + lexer->advance(lexer, true); + else + { + if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD]) + return (scan_expansion_word(scanner, lexer, valid_symbols)); + return (false); + } + } + else + break; + } + + // no '*', '@', '?', '-', '$', '0', '_' + if (!valid_symbols[EXPANSION_WORD] && (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' || + lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_')) + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' || + lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/') + return false; + if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->lookahead)) + { + lexer->mark_end(lexer); + lexer->result_symbol = EXTGLOB_PATTERN; + return true; + } + } + + if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') + { + lexer->advance(lexer, false); + if (lexer->lookahead == '<') + { + lexer->advance(lexer, false); + t_heredoc heredoc = heredoc_new(); + vec_heredoc_push(&scanner->heredocs, heredoc); + lexer->result_symbol = HEREDOC_ARROW; + return (true); + } + return (false); + } + + bool is_number = true; + if (me_isdigit(lexer->lookahead)) + lexer->advance(lexer, false); + else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_') + { + is_number = false; + lexer->advance(lexer, false); + } + else + { + if (lexer->lookahead == '{') + return (false); + if (valid_symbols[EXPANSION_WORD]) + return (scan_expansion_word(scanner, lexer, valid_symbols)); + return (false); + } + + while (true) + { + if (me_isdigit(lexer->lookahead)) + lexer->advance(lexer, false); + else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_') + is_number = (lexer->advance(lexer, false), false); + else + break; + } + + if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<')) + return (lexer->result_symbol = FILE_DESCRIPTOR, true); + + if (valid_symbols[VARIABLE_NAME]) + { + if (lexer->lookahead == '+') + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + if (lexer->lookahead == '=' || lexer->lookahead == ':') + return (lexer->result_symbol = VARIABLE_NAME, true); + return false; + } + if (lexer->lookahead == '/') + return false; + if (lexer->lookahead == '=' || lexer->lookahead == '[' || (lexer->lookahead == ':' && !valid_symbols[OPENING_PAREN]) || + lexer->lookahead == '%' || (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-')) + return (lexer->mark_end(lexer), lexer->result_symbol = VARIABLE_NAME, true); + if (lexer->lookahead == '?') + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + lexer->result_symbol = VARIABLE_NAME; + return (me_isalpha(lexer->lookahead)); + } + } + return (false); +} diff --git a/parser/src/scanner/scan_word.c b/parser/src/scanner/scan_word.c new file mode 100644 index 00000000..bccdba06 --- /dev/null +++ b/parser/src/scanner/scan_word.c @@ -0,0 +1,92 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* scan_word.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/02 16:59:16 by maiboyer #+# #+# */ +/* Updated: 2024/09/02 17:22:25 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/char/char.h" +#include "parser/inner/scanner.h" + +bool scan_expansion_word(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) +{ + bool advanced_once = false; + bool advance_once_space = false; + + (void)(scanner); + (void)(lexer); + (void)(valid_symbols); + while (true) + { + if (lexer->lookahead == '\"') + return false; + if (lexer->lookahead == '$') + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead)) + { + lexer->result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } + + if (lexer->lookahead == '}') + { + lexer->mark_end(lexer); + lexer->result_symbol = EXPANSION_WORD; + return advanced_once || advance_once_space; + } + + if (lexer->lookahead == '(' && !(advanced_once || advance_once_space)) + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + while (lexer->lookahead != ')' && !lexer->eof(lexer)) + { + if (lexer->lookahead == '$') + { + lexer->mark_end(lexer); + lexer->advance(lexer, false); + if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead)) + { + lexer->result_symbol = EXPANSION_WORD; + return advanced_once; + } + advanced_once = true; + } + else + { + advanced_once = advanced_once || !me_isspace(lexer->lookahead); + advance_once_space = advance_once_space || me_isspace(lexer->lookahead); + lexer->advance(lexer, false); + } + } + lexer->mark_end(lexer); + if (lexer->lookahead == ')') + { + advanced_once = true; + lexer->advance(lexer, false); + lexer->mark_end(lexer); + if (lexer->lookahead == '}') + return false; + } + else + return false; + } + + if (lexer->lookahead == '\'') + return false; + if (lexer->eof(lexer)) + return false; + advanced_once = advanced_once || !me_isspace(lexer->lookahead); + advance_once_space = advance_once_space || me_isspace(lexer->lookahead); + lexer->advance(lexer, false); + } +} diff --git a/parser/src/scanner/serialize.c b/parser/src/scanner/serialize.c index c6bc8f17..0e55b628 100644 --- a/parser/src/scanner/serialize.c +++ b/parser/src/scanner/serialize.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */ -/* Updated: 2024/09/01 19:28:24 by maiboyer ### ########.fr */ +/* Updated: 2024/09/02 17:06:26 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -22,6 +22,8 @@ t_error serialize_heredocs(t_scanner *scanner, t_u8 *buffer, t_u32 *size, t_heredoc *heredoc; heredoc = vec_heredoc_get(&scanner->heredocs, i); + if (heredoc == NULL) + return (ERROR); if (heredoc->delimiter.len + 1 + sizeof(t_usize) + (*size) >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return (ERROR); diff --git a/parser/src/stack/stack_summary.c b/parser/src/stack/stack_summary.c index 0872ebba..b0a3a5cb 100644 --- a/parser/src/stack/stack_summary.c +++ b/parser/src/stack/stack_summary.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/08/31 16:50:56 by maiboyer #+# #+# */ -/* Updated: 2024/08/31 16:51:36 by maiboyer ### ########.fr */ +/* Updated: 2024/09/02 17:20:31 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -30,6 +30,8 @@ t_stack_action summarize_stack_callback(void *payload, i = session->summary->size - 1; while (i + 1 > 0) { + if (session->summary->contents == NULL) + return (SActionNone); entry = session->summary->contents[i--]; if (entry.depth < depth) break ;