From 7e1e51e90b40a8957c421647a4ce3bc98a7267d6 Mon Sep 17 00:00:00 2001 From: Maieul BOYER Date: Sun, 1 Sep 2024 19:56:22 +0000 Subject: [PATCH] split heredoc handling in the scanner --- Filelist.sh.mk | 5 + input.toml | 1 + output/include/me/vec/vec_ast.h | 10 + output/include/me/vec/vec_estr.h | 10 + output/include/me/vec/vec_heredoc.h | 10 + output/include/me/vec/vec_pid.h | 10 + output/include/me/vec/vec_str.h | 10 + output/src/vec/ast/ast_functions3.c | 2 - output/src/vec/ast/ast_functions4.c | 30 ++ output/src/vec/estr/estr_functions3.c | 2 - output/src/vec/estr/estr_functions4.c | 30 ++ output/src/vec/heredoc/heredoc_functions3.c | 2 - output/src/vec/heredoc/heredoc_functions4.c | 30 ++ output/src/vec/pid/pid_functions3.c | 2 - output/src/vec/pid/pid_functions4.c | 30 ++ output/src/vec/str/str_functions3.c | 2 - output/src/vec/str/str_functions4.c | 30 ++ parser/Filelist.parser.mk | 3 + parser/include/parser/inner/heredoc.h | 8 +- parser/include/parser/inner/scanner.h | 48 +- parser/src/scanner.c | 470 ++++-------------- parser/src/scanner/advance_words.c | 46 ++ parser/src/scanner/deserialize.c | 14 +- parser/src/scanner/heredoc.c | 91 ++++ parser/src/scanner/heredoc_functions.c | 120 +++++ parser/src/scanner/serialize.c | 20 +- .../header/vec_C__PREFIX__.h__TEMPLATE__ | 10 + .../vec/C__PREFIX___functions3.c__TEMPLATE__ | 2 - .../vec/C__PREFIX___functions4.c__TEMPLATE__ | 30 ++ stdme/input.toml | 1 + 30 files changed, 663 insertions(+), 416 deletions(-) create mode 100644 output/src/vec/ast/ast_functions4.c create mode 100644 output/src/vec/estr/estr_functions4.c create mode 100644 output/src/vec/heredoc/heredoc_functions4.c create mode 100644 output/src/vec/pid/pid_functions4.c create mode 100644 output/src/vec/str/str_functions4.c create mode 100644 parser/src/scanner/advance_words.c create mode 100644 parser/src/scanner/heredoc.c create mode 100644 parser/src/scanner/heredoc_functions.c create mode 100644 stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__ diff --git a/Filelist.sh.mk b/Filelist.sh.mk index e13f545c..35d86524 100644 --- a/Filelist.sh.mk +++ b/Filelist.sh.mk @@ -14,21 +14,26 @@ src/hashmap/env/env_utils \ src/vec/ast/ast \ src/vec/ast/ast_functions2 \ src/vec/ast/ast_functions3 \ +src/vec/ast/ast_functions4 \ src/vec/ast/ast_sort \ src/vec/estr/estr \ src/vec/estr/estr_functions2 \ src/vec/estr/estr_functions3 \ +src/vec/estr/estr_functions4 \ src/vec/estr/estr_sort \ src/vec/heredoc/heredoc \ src/vec/heredoc/heredoc_functions2 \ src/vec/heredoc/heredoc_functions3 \ +src/vec/heredoc/heredoc_functions4 \ src/vec/heredoc/heredoc_sort \ src/vec/pid/pid \ src/vec/pid/pid_functions2 \ src/vec/pid/pid_functions3 \ +src/vec/pid/pid_functions4 \ src/vec/pid/pid_sort \ src/vec/str/str \ src/vec/str/str_functions2 \ src/vec/str/str_functions3 \ +src/vec/str/str_functions4 \ src/vec/str/str_sort \ diff --git a/input.toml b/input.toml index 8b689e08..c33827fe 100644 --- a/input.toml +++ b/input.toml @@ -5,6 +5,7 @@ sources = [ "stdme/generic_sources/src/vec/C__PREFIX___sort.c__TEMPLATE__", "stdme/generic_sources/src/vec/C__PREFIX___functions2.c__TEMPLATE__", "stdme/generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__", + "stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__", ] replace.C__TYPENAME__ = "type" replace.C__TYPEHEADER__ = "header_include" diff --git a/output/include/me/vec/vec_ast.h b/output/include/me/vec/vec_ast.h index a7c5f7bd..da15f4c4 100644 --- a/output/include/me/vec/vec_ast.h +++ b/output/include/me/vec/vec_ast.h @@ -120,4 +120,14 @@ void vec_ast_sort(t_vec_ast *vec, t_vec_ast_sort_fn is_sorted); /// @return true if the operation failed, false otherwise t_error vec_ast_back(t_vec_ast *vec, t_ast_node **out); +/// @brief Get a pointer to the i'th element, or NULL otherwise +/// @param vec The vec_ast to get the element from +/// @return A pointer to the element or NULL +t_ast_node *vec_ast_get(t_vec_ast *vec, t_usize i); + +/// @brief Get a pointer to the last element, or NULL otherwise +/// @param vec The vec_ast to get the element from +/// @return A pointer to the last element or NULL +t_ast_node *vec_ast_last(t_vec_ast *vec); + #endif diff --git a/output/include/me/vec/vec_estr.h b/output/include/me/vec/vec_estr.h index 5433947d..904de38b 100644 --- a/output/include/me/vec/vec_estr.h +++ b/output/include/me/vec/vec_estr.h @@ -120,4 +120,14 @@ void vec_estr_sort(t_vec_estr *vec, t_vec_estr_sort_fn is_sorted); /// @return true if the operation failed, false otherwise t_error vec_estr_back(t_vec_estr *vec, t_expandable_str **out); +/// @brief Get a pointer to the i'th element, or NULL otherwise +/// @param vec The vec_estr to get the element from +/// @return A pointer to the element or NULL +t_expandable_str *vec_estr_get(t_vec_estr *vec, t_usize i); + +/// @brief Get a pointer to the last element, or NULL otherwise +/// @param vec The vec_estr to get the element from +/// @return A pointer to the last element or NULL +t_expandable_str *vec_estr_last(t_vec_estr *vec); + #endif diff --git a/output/include/me/vec/vec_heredoc.h b/output/include/me/vec/vec_heredoc.h index 10583859..ee45aea1 100644 --- a/output/include/me/vec/vec_heredoc.h +++ b/output/include/me/vec/vec_heredoc.h @@ -120,4 +120,14 @@ void vec_heredoc_sort(t_vec_heredoc *vec, t_vec_heredoc_sort_fn is_sorted); /// @return true if the operation failed, false otherwise t_error vec_heredoc_back(t_vec_heredoc *vec, t_heredoc **out); +/// @brief Get a pointer to the i'th element, or NULL otherwise +/// @param vec The vec_heredoc to get the element from +/// @return A pointer to the element or NULL +t_heredoc *vec_heredoc_get(t_vec_heredoc *vec, t_usize i); + +/// @brief Get a pointer to the last element, or NULL otherwise +/// @param vec The vec_heredoc to get the element from +/// @return A pointer to the last element or NULL +t_heredoc *vec_heredoc_last(t_vec_heredoc *vec); + #endif diff --git a/output/include/me/vec/vec_pid.h b/output/include/me/vec/vec_pid.h index 647f66e0..b0cd2670 100644 --- a/output/include/me/vec/vec_pid.h +++ b/output/include/me/vec/vec_pid.h @@ -120,4 +120,14 @@ void vec_pid_sort(t_vec_pid *vec, t_vec_pid_sort_fn is_sorted); /// @return true if the operation failed, false otherwise t_error vec_pid_back(t_vec_pid *vec, t_pid **out); +/// @brief Get a pointer to the i'th element, or NULL otherwise +/// @param vec The vec_pid to get the element from +/// @return A pointer to the element or NULL +t_pid *vec_pid_get(t_vec_pid *vec, t_usize i); + +/// @brief Get a pointer to the last element, or NULL otherwise +/// @param vec The vec_pid to get the element from +/// @return A pointer to the last element or NULL +t_pid *vec_pid_last(t_vec_pid *vec); + #endif diff --git a/output/include/me/vec/vec_str.h b/output/include/me/vec/vec_str.h index e45d83c0..74e5631a 100644 --- a/output/include/me/vec/vec_str.h +++ b/output/include/me/vec/vec_str.h @@ -120,4 +120,14 @@ void vec_str_sort(t_vec_str *vec, t_vec_str_sort_fn is_sorted); /// @return true if the operation failed, false otherwise t_error vec_str_back(t_vec_str *vec, t_str **out); +/// @brief Get a pointer to the i'th element, or NULL otherwise +/// @param vec The vec_str to get the element from +/// @return A pointer to the element or NULL +t_str *vec_str_get(t_vec_str *vec, t_usize i); + +/// @brief Get a pointer to the last element, or NULL otherwise +/// @param vec The vec_str to get the element from +/// @return A pointer to the last element or NULL +t_str *vec_str_last(t_vec_str *vec); + #endif diff --git a/output/src/vec/ast/ast_functions3.c b/output/src/vec/ast/ast_functions3.c index e4d8f2c3..78e586ae 100644 --- a/output/src/vec/ast/ast_functions3.c +++ b/output/src/vec/ast/ast_functions3.c @@ -10,8 +10,6 @@ /* */ /* ************************************************************************** */ -#include "me/mem/mem.h" -#include "me/mem/mem.h" #include "me/mem/mem.h" #include "me/types.h" #include "me/vec/vec_ast.h" diff --git a/output/src/vec/ast/ast_functions4.c b/output/src/vec/ast/ast_functions4.c new file mode 100644 index 00000000..3c9e3853 --- /dev/null +++ b/output/src/vec/ast/ast_functions4.c @@ -0,0 +1,30 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_ast.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem.h" +#include "me/types.h" +#include "me/vec/vec_ast.h" +#include + +t_ast_node *vec_ast_get(t_vec_ast *vec, t_usize i) +{ + if (vec->len >= i) + return (NULL); + return (&vec->buffer[i]); +} + +t_ast_node *vec_ast_last(t_vec_ast *vec) +{ + if (vec->len == 0) + return (NULL); + return (&vec->buffer[vec->len - 1]); +} diff --git a/output/src/vec/estr/estr_functions3.c b/output/src/vec/estr/estr_functions3.c index 3da5679c..1c242739 100644 --- a/output/src/vec/estr/estr_functions3.c +++ b/output/src/vec/estr/estr_functions3.c @@ -10,8 +10,6 @@ /* */ /* ************************************************************************** */ -#include "me/mem/mem.h" -#include "me/mem/mem.h" #include "me/mem/mem.h" #include "me/types.h" #include "me/vec/vec_estr.h" diff --git a/output/src/vec/estr/estr_functions4.c b/output/src/vec/estr/estr_functions4.c new file mode 100644 index 00000000..02f65a86 --- /dev/null +++ b/output/src/vec/estr/estr_functions4.c @@ -0,0 +1,30 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_estr.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem.h" +#include "me/types.h" +#include "me/vec/vec_estr.h" +#include + +t_expandable_str *vec_estr_get(t_vec_estr *vec, t_usize i) +{ + if (vec->len >= i) + return (NULL); + return (&vec->buffer[i]); +} + +t_expandable_str *vec_estr_last(t_vec_estr *vec) +{ + if (vec->len == 0) + return (NULL); + return (&vec->buffer[vec->len - 1]); +} diff --git a/output/src/vec/heredoc/heredoc_functions3.c b/output/src/vec/heredoc/heredoc_functions3.c index 9f3e5d7e..bd2b0852 100644 --- a/output/src/vec/heredoc/heredoc_functions3.c +++ b/output/src/vec/heredoc/heredoc_functions3.c @@ -10,8 +10,6 @@ /* */ /* ************************************************************************** */ -#include "me/mem/mem.h" -#include "me/mem/mem.h" #include "me/mem/mem.h" #include "me/types.h" #include "me/vec/vec_heredoc.h" diff --git a/output/src/vec/heredoc/heredoc_functions4.c b/output/src/vec/heredoc/heredoc_functions4.c new file mode 100644 index 00000000..a2d1cf73 --- /dev/null +++ b/output/src/vec/heredoc/heredoc_functions4.c @@ -0,0 +1,30 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_heredoc.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem.h" +#include "me/types.h" +#include "me/vec/vec_heredoc.h" +#include + +t_heredoc *vec_heredoc_get(t_vec_heredoc *vec, t_usize i) +{ + if (vec->len >= i) + return (NULL); + return (&vec->buffer[i]); +} + +t_heredoc *vec_heredoc_last(t_vec_heredoc *vec) +{ + if (vec->len == 0) + return (NULL); + return (&vec->buffer[vec->len - 1]); +} diff --git a/output/src/vec/pid/pid_functions3.c b/output/src/vec/pid/pid_functions3.c index a68f76c4..5e7dab79 100644 --- a/output/src/vec/pid/pid_functions3.c +++ b/output/src/vec/pid/pid_functions3.c @@ -10,8 +10,6 @@ /* */ /* ************************************************************************** */ -#include "me/mem/mem.h" -#include "me/mem/mem.h" #include "me/mem/mem.h" #include "me/types.h" #include "me/vec/vec_pid.h" diff --git a/output/src/vec/pid/pid_functions4.c b/output/src/vec/pid/pid_functions4.c new file mode 100644 index 00000000..3be22bfd --- /dev/null +++ b/output/src/vec/pid/pid_functions4.c @@ -0,0 +1,30 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_pid.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem.h" +#include "me/types.h" +#include "me/vec/vec_pid.h" +#include + +t_pid *vec_pid_get(t_vec_pid *vec, t_usize i) +{ + if (vec->len >= i) + return (NULL); + return (&vec->buffer[i]); +} + +t_pid *vec_pid_last(t_vec_pid *vec) +{ + if (vec->len == 0) + return (NULL); + return (&vec->buffer[vec->len - 1]); +} diff --git a/output/src/vec/str/str_functions3.c b/output/src/vec/str/str_functions3.c index 6b9704a5..9c34fdf9 100644 --- a/output/src/vec/str/str_functions3.c +++ b/output/src/vec/str/str_functions3.c @@ -10,8 +10,6 @@ /* */ /* ************************************************************************** */ -#include "me/mem/mem.h" -#include "me/mem/mem.h" #include "me/mem/mem.h" #include "me/types.h" #include "me/vec/vec_str.h" diff --git a/output/src/vec/str/str_functions4.c b/output/src/vec/str/str_functions4.c new file mode 100644 index 00000000..44e73771 --- /dev/null +++ b/output/src/vec/str/str_functions4.c @@ -0,0 +1,30 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_str.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem.h" +#include "me/types.h" +#include "me/vec/vec_str.h" +#include + +t_str *vec_str_get(t_vec_str *vec, t_usize i) +{ + if (vec->len >= i) + return (NULL); + return (&vec->buffer[i]); +} + +t_str *vec_str_last(t_vec_str *vec) +{ + if (vec->len == 0) + return (NULL); + return (&vec->buffer[vec->len - 1]); +} diff --git a/parser/Filelist.parser.mk b/parser/Filelist.parser.mk index 27fa1efa..b0dff1d5 100644 --- a/parser/Filelist.parser.mk +++ b/parser/Filelist.parser.mk @@ -33,7 +33,10 @@ parser \ point/point_funcs1 \ point/point_funcs2 \ scanner \ +scanner/advance_words \ scanner/deserialize \ +scanner/heredoc \ +scanner/heredoc_functions \ scanner/serialize \ stack/stack_add_link \ stack/stack_funcs1 \ diff --git a/parser/include/parser/inner/heredoc.h b/parser/include/parser/inner/heredoc.h index f424bb8d..178e667f 100644 --- a/parser/include/parser/inner/heredoc.h +++ b/parser/include/parser/inner/heredoc.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */ -/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */ +/* Updated: 2024/09/01 19:01:16 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -46,4 +46,10 @@ static inline void reset_heredoc(t_heredoc *heredoc) string_clear(&heredoc->delimiter); } +static inline void heredoc_free(t_heredoc heredoc) +{ + string_free(heredoc.delimiter); + string_free(heredoc.current_leading_word); +} + #endif /* HEREDOC_TYPE_H */ diff --git a/parser/include/parser/inner/scanner.h b/parser/include/parser/inner/scanner.h index 857d11fb..e638d272 100755 --- a/parser/include/parser/inner/scanner.h +++ b/parser/include/parser/inner/scanner.h @@ -2,18 +2,54 @@ #define SCANNER_H #include "me/types.h" -#include "parser/inner/heredoc.h" #include "me/vec/vec_heredoc.h" -#include "parser/array.h" +#include "parser/parser.h" typedef struct s_scanner t_scanner; struct s_scanner { - t_u8 last_glob_paren_depth; - bool ext_was_in_double_quote; - bool ext_saw_outside_quote; + t_u8 last_glob_paren_depth; + bool ext_was_in_double_quote; + bool ext_saw_outside_quote; t_vec_heredoc heredocs; }; -#endif \ No newline at end of file +enum e_token_type +{ + HEREDOC_START, + SIMPLE_HEREDOC_BODY, + HEREDOC_BODY_BEGINNING, + HEREDOC_CONTENT, + HEREDOC_END, + FILE_DESCRIPTOR, + EMPTY_VALUE, + CONCAT, + VARIABLE_NAME, + REGEX, + EXPANSION_WORD, + EXTGLOB_PATTERN, + BARE_DOLLAR, + IMMEDIATE_DOUBLE_HASH, + HEREDOC_ARROW, + HEREDOC_ARROW_DASH, + NEWLINE, + OPENING_PAREN, + ESAC, + ERROR_RECOVERY, +}; + +struct s_heredoc_scan_state +{ + t_scanner *scanner; + TSLexer *lexer; + enum e_token_type middle_type; + enum e_token_type end_type; + bool did_advance; + t_heredoc *heredoc; + bool return_value; +}; + +bool advance_word(TSLexer *lexer, t_string *unquoted_word); + +#endif diff --git a/parser/src/scanner.c b/parser/src/scanner.c index ca0cab75..7a5586ae 100644 --- a/parser/src/scanner.c +++ b/parser/src/scanner.c @@ -6,112 +6,42 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/01 14:17:17 by maiboyer #+# #+# */ -/* Updated: 2024/09/01 18:50:23 by maiboyer ### ########.fr */ +/* Updated: 2024/09/01 19:55:43 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ +#include "parser/inner/scanner.h" #include "me/char/char.h" #include "me/str/str.h" #include "me/string/string.h" -#include "parser/inner/heredoc.h" -#include "parser/inner/scanner.h" #include "me/types.h" +#include "me/vec/vec_heredoc.h" #include "parser/array.h" +#include "parser/inner/heredoc.h" #include "parser/parser.h" #include -enum e_token_type -{ - HEREDOC_START, - SIMPLE_HEREDOC_BODY, - HEREDOC_BODY_BEGINNING, - HEREDOC_CONTENT, - HEREDOC_END, - FILE_DESCRIPTOR, - EMPTY_VALUE, - CONCAT, - VARIABLE_NAME, - REGEX, - EXPANSION_WORD, - EXTGLOB_PATTERN, - BARE_DOLLAR, - IMMEDIATE_DOUBLE_HASH, - HEREDOC_ARROW, - HEREDOC_ARROW_DASH, - NEWLINE, - OPENING_PAREN, - ESAC, - ERROR_RECOVERY, -}; - -void advance(TSLexer *lexer) -{ - lexer->advance(lexer, false); -} - -void skip(TSLexer *lexer) -{ - lexer->advance(lexer, true); -} - -bool in_error_recovery(const bool *valid_symbols) -{ - return (valid_symbols[ERROR_RECOVERY]); -} +bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type); +bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer); +bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer); +bool advance_word(TSLexer *lexer, t_string *unquoted_word); void reset(t_scanner *scanner) { t_usize i; i = 0; - while (i < scanner->heredocs.size) - reset_heredoc(array_get(&scanner->heredocs, i++)); -} - -/** - * Consume a "word" in POSIX parlance, and returns it unquoted. - * - * This is an approximate implementation that doesn't deal with any - * POSIX-mandated substitution, and assumes the default value for - * IFS. - */ -bool advance_word(TSLexer *lexer, t_string *unquoted_word) -{ - bool empty; - t_i32 quote; - - empty = true; - quote = 0; - if (lexer->lookahead == '\'' || lexer->lookahead == '"') - { - quote = lexer->lookahead; - advance(lexer); - } - while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) || - (!quote && (me_isspace(lexer->lookahead))))) - { - if (lexer->lookahead == '\\') - { - advance(lexer); - if (!lexer->lookahead) - return (false); - } - empty = false; - string_push_char(unquoted_word, lexer->lookahead); - advance(lexer); - } - if (quote && lexer->lookahead == quote) - advance(lexer); - return (!empty); + while (i < scanner->heredocs.len) + reset_heredoc(&scanner->heredocs.buffer[i++]); } bool scan_bare_dollar(TSLexer *lexer) { while (me_isspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer)) - skip(lexer); + lexer->advance(lexer, true); if (lexer->lookahead == '$') { - advance(lexer); + lexer->advance(lexer, false); lexer->result_symbol = BARE_DOLLAR; lexer->mark_end(lexer); return (me_isspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"'); @@ -119,207 +49,50 @@ bool scan_bare_dollar(TSLexer *lexer) return (false); } -bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer) -{ - bool found_delimiter; - - while (me_isspace(lexer->lookahead)) - skip(lexer); - lexer->result_symbol = HEREDOC_START; - heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\'; - found_delimiter = advance_word(lexer, &heredoc->delimiter); - if (!found_delimiter) - return (string_clear(&heredoc->delimiter), false); - return (found_delimiter); -} - -bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer) -{ - t_i32 size; - - size = 0; - string_clear(&heredoc->current_leading_word); - if (heredoc->delimiter.len > 0) - { - while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead && - heredoc->current_leading_word.len < heredoc->delimiter.len) - { - string_push_char(&heredoc->current_leading_word, lexer->lookahead); - advance(lexer); - size++; - } - } - return heredoc->delimiter.len == 0 ? false : str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf); -} - -bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type) -{ - bool did_advance = false; - t_heredoc *heredoc = array_back(&scanner->heredocs); - - while (true) - { - if (lexer->lookahead == '\0') - { - if (lexer->eof(lexer) && did_advance) - { - reset_heredoc(heredoc); - lexer->result_symbol = end_type; - return (true); - } - return (false); - } - else if (lexer->lookahead == '\\') - { - did_advance = true; - advance(lexer); - advance(lexer); - } - else if (lexer->lookahead == '$') - { - if (heredoc->is_raw) - { - did_advance = true; - advance(lexer); - } - if (did_advance) - { - lexer->mark_end(lexer); - lexer->result_symbol = middle_type; - heredoc->started = true; - advance(lexer); - if (me_isalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(') - return true; - } - if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0) - { - lexer->result_symbol = middle_type; - heredoc->started = true; - return true; - } - return false; - } - else if (lexer->lookahead == '\n') - { - if (!did_advance) - { - skip(lexer); - } - else - { - advance(lexer); - } - did_advance = true; - if (heredoc->allows_indent) - { - while (me_isspace(lexer->lookahead)) - advance(lexer); - } - lexer->result_symbol = end_type; - if (heredoc->started) - lexer->result_symbol = middle_type; - lexer->mark_end(lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - { - if (lexer->result_symbol == HEREDOC_END) - (void)array_pop(&scanner->heredocs); - return (true); - } - } - else - { - if (lexer->get_column(lexer) == 0) - { - while (me_isspace(lexer->lookahead)) - { - if (did_advance) - advance(lexer); - else - skip(lexer); - } - if (end_type != SIMPLE_HEREDOC_BODY) - { - lexer->result_symbol = middle_type; - if (scan_heredoc_end_identifier(heredoc, lexer)) - return true; - } - if (end_type == SIMPLE_HEREDOC_BODY) - { - lexer->result_symbol = end_type; - lexer->mark_end(lexer); - if (scan_heredoc_end_identifier(heredoc, lexer)) - return true; - } - } - did_advance = true; - advance(lexer); - } - } - return (false); -} - bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { - if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) + if (valid_symbols[CONCAT] && !valid_symbols[ERROR_RECOVERY]) { if (!(lexer->lookahead == 0 || me_isspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' || lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' || lexer->lookahead == '|' || lexer->lookahead == '{' || lexer->lookahead == '}')) { lexer->result_symbol = CONCAT; - // So for a`b`, we want to return a concat. We check if the - // 2nd backtick has whitespace after it, and if it does we - // return concat. if (lexer->lookahead == '`') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); while (lexer->lookahead != '`' && !lexer->eof(lexer)) - { - advance(lexer); - } + lexer->advance(lexer, false); if (lexer->eof(lexer)) - { return false; - } if (lexer->lookahead == '`') - { - advance(lexer); - } + lexer->advance(lexer, false); return me_isspace(lexer->lookahead) || lexer->eof(lexer); } - // strings w/ expansions that contains escaped quotes or - // backslashes need this to return a concat if (lexer->lookahead == '\\') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\') - { return true; - } if (lexer->eof(lexer)) - { return false; - } } else - { return true; - } } } - if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols)) + if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !valid_symbols[ERROR_RECOVERY]) { - // advance two # and ensure not } after if (lexer->lookahead == '#') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '#') { - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead != '}') { lexer->result_symbol = IMMEDIATE_DOUBLE_HASH; @@ -339,50 +112,43 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) } } - if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 && - !array_back(&scanner->heredocs)->started && !in_error_recovery(valid_symbols)) - { + if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 && + !vec_heredoc_last(&scanner->heredocs)->started && !valid_symbols[ERROR_RECOVERY]) return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY); - } - if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0) + if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0) { - t_heredoc *heredoc = array_back(&scanner->heredocs); + t_heredoc *heredoc = vec_heredoc_last(&scanner->heredocs); if (scan_heredoc_end_identifier(heredoc, lexer)) { - array_delete(&heredoc->current_leading_word); - array_delete(&heredoc->delimiter); - (void)array_pop(&scanner->heredocs); + string_free(heredoc->current_leading_word); + string_free(heredoc->delimiter); + (void)vec_heredoc_pop(&scanner->heredocs, NULL); lexer->result_symbol = HEREDOC_END; return true; } } - if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started && - !in_error_recovery(valid_symbols)) - { + if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started && + !valid_symbols[ERROR_RECOVERY]) return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END); - } - if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && scanner->heredocs.size > 0) - { - return scan_heredoc_start(array_back(&scanner->heredocs), lexer); - } + if (valid_symbols[HEREDOC_START] && !valid_symbols[ERROR_RECOVERY] && scanner->heredocs.len > 0) + return scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer); - if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && - !in_error_recovery(valid_symbols)) + if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && !valid_symbols[ERROR_RECOVERY]) { - for (;;) + while (true) { if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' || (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) && !valid_symbols[EXPANSION_WORD]) { - skip(lexer); + lexer->advance(lexer, true); } else if (lexer->lookahead == '\\') { - skip(lexer); + lexer->advance(lexer, true); if (lexer->eof(lexer)) { @@ -392,26 +158,18 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) } if (lexer->lookahead == '\r') - { - skip(lexer); - } + lexer->advance(lexer, true); if (lexer->lookahead == '\n') - { - skip(lexer); - } + lexer->advance(lexer, true); else { if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD]) - { goto expansion_word; - } return false; } } else - { break; - } } // no '*', '@', '?', '-', '$', '0', '_' @@ -419,12 +177,10 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_')) { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' || lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/') - { return false; - } if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->lookahead)) { lexer->mark_end(lexer); @@ -435,26 +191,22 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') { - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '<') { - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '-') { - advance(lexer); + lexer->advance(lexer, false); t_heredoc heredoc = heredoc_new(); heredoc.allows_indent = true; - array_push(&scanner->heredocs, heredoc); + vec_heredoc_push(&scanner->heredocs, heredoc); lexer->result_symbol = HEREDOC_ARROW_DASH; } - // else if (lexer->lookahead == '<' || lexer->lookahead == '=') - // { - // return false; - // } else { t_heredoc heredoc = heredoc_new(); - array_push(&scanner->heredocs, heredoc); + vec_heredoc_push(&scanner->heredocs, heredoc); lexer->result_symbol = HEREDOC_ARROW; } return true; @@ -464,44 +216,34 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) bool is_number = true; if (me_isdigit(lexer->lookahead)) - advance(lexer); + lexer->advance(lexer, false); else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_') { is_number = false; - advance(lexer); + lexer->advance(lexer, false); } else { if (lexer->lookahead == '{') - { goto brace_start; - } if (valid_symbols[EXPANSION_WORD]) - { goto expansion_word; - } if (valid_symbols[EXTGLOB_PATTERN]) - { goto extglob_pattern; - } return false; } - for (;;) + while (true) { if (me_isdigit(lexer->lookahead)) - { - advance(lexer); - } + lexer->advance(lexer, false); else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_') { is_number = false; - advance(lexer); + lexer->advance(lexer, false); } else - { break; - } } if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<')) @@ -515,7 +257,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) if (lexer->lookahead == '+') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '=' || lexer->lookahead == ':') { lexer->result_symbol = VARIABLE_NAME; @@ -527,12 +269,8 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { return false; } - if (lexer->lookahead == '=' || lexer->lookahead == '[' || - (lexer->lookahead == ':' && - !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable - // names for function words, only handling : for now? #235 - lexer->lookahead == '%' || - (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-')) + if (lexer->lookahead == '=' || lexer->lookahead == '[' || (lexer->lookahead == ':' && !valid_symbols[OPENING_PAREN]) || + lexer->lookahead == '%' || (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-')) { lexer->mark_end(lexer); lexer->result_symbol = VARIABLE_NAME; @@ -542,7 +280,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) if (lexer->lookahead == '?') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); lexer->result_symbol = VARIABLE_NAME; return me_isalpha(lexer->lookahead); } @@ -551,20 +289,16 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) return false; } - if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer)) + if (valid_symbols[BARE_DOLLAR] && !valid_symbols[ERROR_RECOVERY] && scan_bare_dollar(lexer)) { return true; } - if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols)) + if ((valid_symbols[REGEX]) && !valid_symbols[ERROR_RECOVERY]) { if (valid_symbols[REGEX]) - { while (me_isspace(lexer->lookahead)) - { - skip(lexer); - } - } + lexer->advance(lexer, true); if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || ((lexer->lookahead == '$' || lexer->lookahead == '\'')) || (lexer->lookahead == '\'')) @@ -584,11 +318,9 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) if (lexer->lookahead == '$') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '(') - { return false; - } } lexer->mark_end(lexer); @@ -601,7 +333,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) if (lexer->lookahead == '\'') { state.in_single_quote = false; - advance(lexer); + lexer->advance(lexer, false); lexer->mark_end(lexer); } } @@ -646,7 +378,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) case '\'': // Enter or exit a single-quoted string. state.in_single_quote = !state.in_single_quote; - advance(lexer); + lexer->advance(lexer, false); state.advanced_once = true; state.last_was_escape = false; continue; @@ -660,7 +392,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) if (valid_symbols[REGEX]) { bool was_space = !state.in_single_quote && me_isspace(lexer->lookahead); - advance(lexer); + lexer->advance(lexer, false); state.advanced_once = true; if (!was_space || state.paren_depth > 0) { @@ -671,20 +403,16 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols) } lexer->result_symbol = REGEX; - if (valid_symbols[REGEX] && !state.advanced_once) - { - return false; - } - return true; + return (!(valid_symbols[REGEX] && !state.advanced_once)); } } extglob_pattern: - if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols)) + if (valid_symbols[EXTGLOB_PATTERN] && !valid_symbols[ERROR_RECOVERY]) { // first skip ws, then check for ? * + @ ! while (me_isspace(lexer->lookahead)) - skip(lexer); + lexer->advance(lexer, true); if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' || lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' || @@ -692,9 +420,9 @@ extglob_pattern: { if (lexer->lookahead == '\\') { - advance(lexer); + lexer->advance(lexer, false); if ((me_isspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n') - advance(lexer); + lexer->advance(lexer, false); else return false; } @@ -702,12 +430,10 @@ extglob_pattern: if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (me_isspace(lexer->lookahead)) - { return false; - } } lexer->mark_end(lexer); @@ -718,16 +444,16 @@ extglob_pattern: if (lexer->lookahead == 'e') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == 's') { - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == 'a') { - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == 'c') { - advance(lexer); + lexer->advance(lexer, false); if (me_isspace(lexer->lookahead)) return false; } @@ -735,16 +461,16 @@ extglob_pattern: } } else - advance(lexer); + lexer->advance(lexer, false); } // -\w is just a word, find something else special if (lexer->lookahead == '-') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); while (me_isalnum(lexer->lookahead)) - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.') return false; @@ -755,7 +481,7 @@ extglob_pattern: if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0) { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (me_isspace(lexer->lookahead)) { lexer->result_symbol = EXTGLOB_PATTERN; @@ -774,7 +500,7 @@ extglob_pattern: if (lexer->lookahead == '$') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '{' || lexer->lookahead == '(') { lexer->result_symbol = EXTGLOB_PATTERN; @@ -785,7 +511,7 @@ extglob_pattern: if (lexer->lookahead == '|') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); lexer->result_symbol = EXTGLOB_PATTERN; return true; } @@ -846,7 +572,7 @@ extglob_pattern: if (lexer->lookahead == '|') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0) { lexer->result_symbol = EXTGLOB_PATTERN; @@ -862,7 +588,7 @@ extglob_pattern: lexer->mark_end(lexer); if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') state.saw_non_alphadot = true; - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '(' || lexer->lookahead == '{') { lexer->result_symbol = EXTGLOB_PATTERN; @@ -888,15 +614,15 @@ extglob_pattern: { if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') state.saw_non_alphadot = true; - advance(lexer); + lexer->advance(lexer, false); if (me_isspace(lexer->lookahead) || lexer->lookahead == '"') - advance(lexer); + lexer->advance(lexer, false); } else { if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\') state.saw_non_alphadot = true; - advance(lexer); + lexer->advance(lexer, false); } if (!was_space) lexer->mark_end(lexer); @@ -917,14 +643,14 @@ expansion_word: { bool advanced_once = false; bool advance_once_space = false; - for (;;) + while (true) { if (lexer->lookahead == '\"') return false; if (lexer->lookahead == '$') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead)) { lexer->result_symbol = EXPANSION_WORD; @@ -943,17 +669,13 @@ expansion_word: if (lexer->lookahead == '(' && !(advanced_once || advance_once_space)) { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); while (lexer->lookahead != ')' && !lexer->eof(lexer)) { - // if we find a $( or ${ assume this is valid and is - // a garbage concatenation of some weird word + an - // expansion - // I wonder where this can fail if (lexer->lookahead == '$') { lexer->mark_end(lexer); - advance(lexer); + lexer->advance(lexer, false); if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead)) { lexer->result_symbol = EXPANSION_WORD; @@ -965,14 +687,14 @@ expansion_word: { advanced_once = advanced_once || !me_isspace(lexer->lookahead); advance_once_space = advance_once_space || me_isspace(lexer->lookahead); - advance(lexer); + lexer->advance(lexer, false); } } lexer->mark_end(lexer); if (lexer->lookahead == ')') { advanced_once = true; - advance(lexer); + lexer->advance(lexer, false); lexer->mark_end(lexer); if (lexer->lookahead == '}') return false; @@ -987,7 +709,7 @@ expansion_word: return false; advanced_once = advanced_once || !me_isspace(lexer->lookahead); advance_once_space = advance_once_space || me_isspace(lexer->lookahead); - advance(lexer); + lexer->advance(lexer, false); } } @@ -1000,7 +722,7 @@ void *tree_sitter_sh_external_scanner_create() t_scanner *scanner; scanner = mem_alloc(sizeof(*scanner)); - array_init(&scanner->heredocs); + scanner->heredocs = vec_heredoc_new(16, heredoc_free); return (scanner); } @@ -1014,18 +736,6 @@ bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const b void tree_sitter_sh_external_scanner_destroy(void *payload) { - t_scanner *scanner; - t_heredoc *heredoc; - t_usize i; - - scanner = (t_scanner *)payload; - i = 0; - while (i < scanner->heredocs.size) - { - heredoc = array_get(&scanner->heredocs, i++); - string_free(heredoc->current_leading_word); - string_free(heredoc->delimiter); - } - array_delete(&scanner->heredocs); - mem_free(scanner); + vec_heredoc_free(((t_scanner *)payload)->heredocs); + mem_free((t_scanner *)payload); } diff --git a/parser/src/scanner/advance_words.c b/parser/src/scanner/advance_words.c new file mode 100644 index 00000000..7522767f --- /dev/null +++ b/parser/src/scanner/advance_words.c @@ -0,0 +1,46 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* advance_words.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/01 19:28:19 by maiboyer #+# #+# */ +/* Updated: 2024/09/01 19:30:20 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/char/char.h" +#include "me/string/string.h" +#include "me/types.h" +#include "parser/parser.h" + +bool advance_word(TSLexer *lexer, t_string *unquoted_word) +{ + bool empty; + t_i32 quote; + + empty = true; + quote = 0; + if (lexer->lookahead == '\'' || lexer->lookahead == '"') + { + quote = lexer->lookahead; + lexer->advance(lexer, false); + } + while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) || + (!quote && (me_isspace(lexer->lookahead))))) + { + if (lexer->lookahead == '\\') + { + lexer->advance(lexer, false); + if (!lexer->lookahead) + return (false); + } + empty = false; + string_push_char(unquoted_word, lexer->lookahead); + lexer->advance(lexer, false); + } + if (quote && lexer->lookahead == quote) + lexer->advance(lexer, false); + return (!empty); +} diff --git a/parser/src/scanner/deserialize.c b/parser/src/scanner/deserialize.c index 2c4356e2..fd34ba94 100644 --- a/parser/src/scanner/deserialize.c +++ b/parser/src/scanner/deserialize.c @@ -6,15 +6,15 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */ -/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */ +/* Updated: 2024/09/01 19:40:35 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ +#include "me/mem/mem.h" #include "me/types.h" +#include "me/vec/vec_heredoc.h" #include "parser/inner/heredoc.h" #include "parser/inner/scanner.h" -#include "parser/array.h" -#include "parser/parser.h" void reset(t_scanner *); @@ -39,12 +39,12 @@ void tree_sitter_sh_external_scanner_deserialize(t_scanner *scanner, const t_u8 while (i < heredoc_count) { heredoc = NULL; - if (i < scanner->heredocs.size) - heredoc = array_get(&scanner->heredocs, i); + if (i < scanner->heredocs.len) + heredoc = vec_heredoc_get(&scanner->heredocs, i); else { - array_push(&scanner->heredocs, heredoc_new()); - heredoc = array_back(&scanner->heredocs); + vec_heredoc_push(&scanner->heredocs, heredoc_new()); + heredoc = vec_heredoc_last(&scanner->heredocs); } heredoc->is_raw = buffer[size++]; diff --git a/parser/src/scanner/heredoc.c b/parser/src/scanner/heredoc.c new file mode 100644 index 00000000..0c987073 --- /dev/null +++ b/parser/src/scanner/heredoc.c @@ -0,0 +1,91 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* heredoc.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/01 19:33:04 by maiboyer #+# #+# */ +/* Updated: 2024/09/01 19:55:50 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser/inner/heredoc.h" +#include "me/char/char.h" +#include "me/str/str.h" +#include "me/types.h" +#include "me/vec/vec_heredoc.h" +#include "parser/inner/scanner.h" +#include "parser/parser.h" + +bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer) +{ + bool found_delimiter; + + while (me_isspace(lexer->lookahead)) + lexer->advance(lexer, true); + lexer->result_symbol = HEREDOC_START; + heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\'; + found_delimiter = advance_word(lexer, &heredoc->delimiter); + if (!found_delimiter) + return (string_clear(&heredoc->delimiter), false); + return (found_delimiter); +} + +bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer) +{ + t_i32 size; + + size = 0; + string_clear(&heredoc->current_leading_word); + if (heredoc->delimiter.len > 0) + { + while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead && + heredoc->current_leading_word.len < heredoc->delimiter.len) + { + string_push_char(&heredoc->current_leading_word, lexer->lookahead); + lexer->advance(lexer, false); + size++; + } + } + if (heredoc->delimiter.len == 0) + return (false); + return (str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf)); +} + +bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state); +bool scan_heredoc_content_other(struct s_heredoc_scan_state *state); + +bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type) +{ + struct s_heredoc_scan_state state; + bool (*func)(struct s_heredoc_scan_state *state); + + state.did_advance = false; + state.lexer = lexer; + state.heredoc = vec_heredoc_last(&scanner->heredocs); + state.scanner = scanner; + state.middle_type = middle_type; + state.end_type = end_type; + state.return_value = false; + + while (true) + { + if (lexer->lookahead == '\0') + func = scan_heredoc_content_nullbyte; + else if (lexer->lookahead == '\\') + func = scan_heredoc_content_backslash; + else if (lexer->lookahead == '$') + func = scan_heredoc_content_dollar; + else if (lexer->lookahead == '\n') + func = scan_heredoc_content_newline; + else + func = scan_heredoc_content_other; + if (func(&state)) + return (state.return_value); + } + return (false); +} diff --git a/parser/src/scanner/heredoc_functions.c b/parser/src/scanner/heredoc_functions.c new file mode 100644 index 00000000..4013797e --- /dev/null +++ b/parser/src/scanner/heredoc_functions.c @@ -0,0 +1,120 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* heredoc_functions.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/09/01 19:36:53 by maiboyer #+# #+# */ +/* Updated: 2024/09/01 19:54:13 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/char/char.h" +#include "me/str/str.h" +#include "me/types.h" +#include "me/vec/vec_heredoc.h" +#include "parser/inner/heredoc.h" +#include "parser/inner/scanner.h" +#include "parser/parser.h" + +bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer); + +bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state) +{ + if (state->lexer->eof(state->lexer) && state->did_advance) + { + reset_heredoc(state->heredoc); + state->lexer->result_symbol = state->end_type; + return (state->return_value = true, true); + } + return (state->return_value = false, true); +} + +bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state) +{ + state->did_advance = true; + state->lexer->advance(state->lexer, false); + state->lexer->advance(state->lexer, false); + return (false); +} + +bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state) +{ + if (state->heredoc->is_raw) + { + state->did_advance = true; + state->lexer->advance(state->lexer, false); + } + if (state->did_advance) + { + state->lexer->mark_end(state->lexer); + state->lexer->result_symbol = state->middle_type; + state->heredoc->started = true; + state->lexer->advance(state->lexer, false); + if (me_isalpha(state->lexer->lookahead) || state->lexer->lookahead == '{' || state->lexer->lookahead == '(') + return (state->return_value = true, true); + } + if (state->middle_type == HEREDOC_BODY_BEGINNING && state->lexer->get_column(state->lexer) == 0) + { + state->lexer->result_symbol = state->middle_type; + state->heredoc->started = true; + return (state->return_value = true, true); + } + return (state->return_value = false, true); +} + +bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state) +{ + if (!state->did_advance) + state->lexer->advance(state->lexer, true); + else + state->lexer->advance(state->lexer, false); + state->did_advance = true; + if (state->heredoc->allows_indent) + { + while (me_isspace(state->lexer->lookahead)) + state->lexer->advance(state->lexer, false); + } + state->lexer->result_symbol = state->end_type; + if (state->heredoc->started) + state->lexer->result_symbol = state->middle_type; + state->lexer->mark_end(state->lexer); + if (scan_heredoc_end_identifier(state->heredoc, state->lexer)) + { + if (state->lexer->result_symbol == HEREDOC_END) + vec_heredoc_pop(&state->scanner->heredocs, NULL); + return (state->return_value = true, true); + } + return (false); +} + +bool scan_heredoc_content_other(struct s_heredoc_scan_state *state) +{ + if (state->lexer->get_column(state->lexer) == 0) + { + while (me_isspace(state->lexer->lookahead)) + { + if (state->did_advance) + state->lexer->advance(state->lexer, false); + else + state->lexer->advance(state->lexer, true); + } + if (state->end_type != SIMPLE_HEREDOC_BODY) + { + state->lexer->result_symbol = state->middle_type; + if (scan_heredoc_end_identifier(state->heredoc, state->lexer)) + return (state->return_value = true, true); + } + if (state->end_type == SIMPLE_HEREDOC_BODY) + { + state->lexer->result_symbol = state->end_type; + state->lexer->mark_end(state->lexer); + if (scan_heredoc_end_identifier(state->heredoc, state->lexer)) + return (state->return_value = true, true); + } + } + state->did_advance = true; + state->lexer->advance(state->lexer, false); + return (false); +} diff --git a/parser/src/scanner/serialize.c b/parser/src/scanner/serialize.c index e46b95aa..879c149b 100644 --- a/parser/src/scanner/serialize.c +++ b/parser/src/scanner/serialize.c @@ -6,21 +6,21 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */ -/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */ +/* Updated: 2024/09/01 19:28:24 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ -#include "parser/inner/scanner.h" -#include "parser/inner/heredoc.h" #include "me/types.h" #include "parser/array.h" +#include "parser/inner/heredoc.h" +#include "parser/inner/scanner.h" #include "parser/parser.h" -t_error serialize_heredocs(t_scanner *scanner, t_u8* buffer, t_u32 *size, t_usize i) +t_error serialize_heredocs(t_scanner *scanner, t_u8 *buffer, t_u32 *size, t_usize i) { t_heredoc *heredoc; - heredoc = array_get(&scanner->heredocs, i); + heredoc = vec_heredoc_get(&scanner->heredocs, i); if (heredoc->delimiter.len + 1 + sizeof(t_usize) + (*size) >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return (ERROR); buffer[(*size)++] = (char)heredoc->is_raw; @@ -40,17 +40,17 @@ t_error serialize_heredocs(t_scanner *scanner, t_u8* buffer, t_u32 *size, t_usiz t_u32 tree_sitter_sh_external_scanner_serialize(t_scanner *scanner, t_u8 *buffer) { - t_u32 size; - t_usize i; + t_u32 size; + t_usize i; size = 0; buffer[size++] = (char)scanner->last_glob_paren_depth; buffer[size++] = (char)scanner->ext_was_in_double_quote; buffer[size++] = (char)scanner->ext_saw_outside_quote; - buffer[size++] = (char)scanner->heredocs.size; + buffer[size++] = (char)scanner->heredocs.len; i = 0; - while (i < scanner->heredocs.size) + while (i < scanner->heredocs.len) if (serialize_heredocs(scanner, buffer, &size, i++)) return (0); return (size); -} \ No newline at end of file +} diff --git a/stdme/generic_sources/header/vec_C__PREFIX__.h__TEMPLATE__ b/stdme/generic_sources/header/vec_C__PREFIX__.h__TEMPLATE__ index f84d72ef..797272bb 100644 --- a/stdme/generic_sources/header/vec_C__PREFIX__.h__TEMPLATE__ +++ b/stdme/generic_sources/header/vec_C__PREFIX__.h__TEMPLATE__ @@ -120,4 +120,14 @@ void vec_C__PREFIX___sort(t_vec_C__PREFIX__ *vec, t_vec_C__PREFIX___sort_fn i /// @return true if the operation failed, false otherwise t_error vec_C__PREFIX___back(t_vec_C__PREFIX__ *vec, C__TYPENAME__ **out); +/// @brief Get a pointer to the i'th element, or NULL otherwise +/// @param vec The vec_C__PREFIX__ to get the element from +/// @return A pointer to the element or NULL +C__TYPENAME__ *vec_C__PREFIX___get(t_vec_C__PREFIX__ *vec, t_usize i); + +/// @brief Get a pointer to the last element, or NULL otherwise +/// @param vec The vec_C__PREFIX__ to get the element from +/// @return A pointer to the last element or NULL +C__TYPENAME__ *vec_C__PREFIX___last(t_vec_C__PREFIX__ *vec); + #endif diff --git a/stdme/generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__ b/stdme/generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__ index b56a2987..1b62753a 100644 --- a/stdme/generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__ +++ b/stdme/generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__ @@ -10,8 +10,6 @@ /* */ /* ************************************************************************** */ -#include "me/mem/mem.h" -#include "me/mem/mem.h" #include "me/mem/mem.h" #include "me/types.h" #include "me/vec/vec_C__PREFIX__.h" diff --git a/stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__ b/stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__ new file mode 100644 index 00000000..ee9ac8f8 --- /dev/null +++ b/stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__ @@ -0,0 +1,30 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* vec_C__PREFIX__.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */ +/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/mem/mem.h" +#include "me/types.h" +#include "me/vec/vec_C__PREFIX__.h" +#include + +C__TYPENAME__ *vec_C__PREFIX___get(t_vec_C__PREFIX__ *vec, t_usize i) +{ + if (vec->len >= i) + return (NULL); + return (&vec->buffer[i]); +} + +C__TYPENAME__ *vec_C__PREFIX___last(t_vec_C__PREFIX__ *vec) +{ + if (vec->len == 0) + return (NULL); + return (&vec->buffer[vec->len - 1]); +} diff --git a/stdme/input.toml b/stdme/input.toml index a9c13b50..0b58b3cb 100644 --- a/stdme/input.toml +++ b/stdme/input.toml @@ -5,6 +5,7 @@ sources = [ "generic_sources/src/vec/C__PREFIX___sort.c__TEMPLATE__", "generic_sources/src/vec/C__PREFIX___functions2.c__TEMPLATE__", "generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__", + "generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__", ] replace.C__TYPENAME__ = "type" replace.C__TYPEHEADER__ = "header_include"