split heredoc handling in the scanner

This commit is contained in:
Maieul BOYER 2024-09-01 19:56:22 +00:00
parent 8be7417a61
commit 7e1e51e90b
30 changed files with 663 additions and 416 deletions

View file

@ -14,21 +14,26 @@ src/hashmap/env/env_utils \
src/vec/ast/ast \
src/vec/ast/ast_functions2 \
src/vec/ast/ast_functions3 \
src/vec/ast/ast_functions4 \
src/vec/ast/ast_sort \
src/vec/estr/estr \
src/vec/estr/estr_functions2 \
src/vec/estr/estr_functions3 \
src/vec/estr/estr_functions4 \
src/vec/estr/estr_sort \
src/vec/heredoc/heredoc \
src/vec/heredoc/heredoc_functions2 \
src/vec/heredoc/heredoc_functions3 \
src/vec/heredoc/heredoc_functions4 \
src/vec/heredoc/heredoc_sort \
src/vec/pid/pid \
src/vec/pid/pid_functions2 \
src/vec/pid/pid_functions3 \
src/vec/pid/pid_functions4 \
src/vec/pid/pid_sort \
src/vec/str/str \
src/vec/str/str_functions2 \
src/vec/str/str_functions3 \
src/vec/str/str_functions4 \
src/vec/str/str_sort \

View file

@ -5,6 +5,7 @@ sources = [
"stdme/generic_sources/src/vec/C__PREFIX___sort.c__TEMPLATE__",
"stdme/generic_sources/src/vec/C__PREFIX___functions2.c__TEMPLATE__",
"stdme/generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__",
"stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__",
]
replace.C__TYPENAME__ = "type"
replace.C__TYPEHEADER__ = "header_include"

View file

@ -120,4 +120,14 @@ void vec_ast_sort(t_vec_ast *vec, t_vec_ast_sort_fn is_sorted);
/// @return true if the operation failed, false otherwise
t_error vec_ast_back(t_vec_ast *vec, t_ast_node **out);
/// @brief Get a pointer to the i'th element, or NULL otherwise
/// @param vec The vec_ast to get the element from
/// @return A pointer to the element or NULL
t_ast_node *vec_ast_get(t_vec_ast *vec, t_usize i);
/// @brief Get a pointer to the last element, or NULL otherwise
/// @param vec The vec_ast to get the element from
/// @return A pointer to the last element or NULL
t_ast_node *vec_ast_last(t_vec_ast *vec);
#endif

View file

@ -120,4 +120,14 @@ void vec_estr_sort(t_vec_estr *vec, t_vec_estr_sort_fn is_sorted);
/// @return true if the operation failed, false otherwise
t_error vec_estr_back(t_vec_estr *vec, t_expandable_str **out);
/// @brief Get a pointer to the i'th element, or NULL otherwise
/// @param vec The vec_estr to get the element from
/// @return A pointer to the element or NULL
t_expandable_str *vec_estr_get(t_vec_estr *vec, t_usize i);
/// @brief Get a pointer to the last element, or NULL otherwise
/// @param vec The vec_estr to get the element from
/// @return A pointer to the last element or NULL
t_expandable_str *vec_estr_last(t_vec_estr *vec);
#endif

View file

@ -120,4 +120,14 @@ void vec_heredoc_sort(t_vec_heredoc *vec, t_vec_heredoc_sort_fn is_sorted);
/// @return true if the operation failed, false otherwise
t_error vec_heredoc_back(t_vec_heredoc *vec, t_heredoc **out);
/// @brief Get a pointer to the i'th element, or NULL otherwise
/// @param vec The vec_heredoc to get the element from
/// @return A pointer to the element or NULL
t_heredoc *vec_heredoc_get(t_vec_heredoc *vec, t_usize i);
/// @brief Get a pointer to the last element, or NULL otherwise
/// @param vec The vec_heredoc to get the element from
/// @return A pointer to the last element or NULL
t_heredoc *vec_heredoc_last(t_vec_heredoc *vec);
#endif

View file

@ -120,4 +120,14 @@ void vec_pid_sort(t_vec_pid *vec, t_vec_pid_sort_fn is_sorted);
/// @return true if the operation failed, false otherwise
t_error vec_pid_back(t_vec_pid *vec, t_pid **out);
/// @brief Get a pointer to the i'th element, or NULL otherwise
/// @param vec The vec_pid to get the element from
/// @return A pointer to the element or NULL
t_pid *vec_pid_get(t_vec_pid *vec, t_usize i);
/// @brief Get a pointer to the last element, or NULL otherwise
/// @param vec The vec_pid to get the element from
/// @return A pointer to the last element or NULL
t_pid *vec_pid_last(t_vec_pid *vec);
#endif

View file

@ -120,4 +120,14 @@ void vec_str_sort(t_vec_str *vec, t_vec_str_sort_fn is_sorted);
/// @return true if the operation failed, false otherwise
t_error vec_str_back(t_vec_str *vec, t_str **out);
/// @brief Get a pointer to the i'th element, or NULL otherwise
/// @param vec The vec_str to get the element from
/// @return A pointer to the element or NULL
t_str *vec_str_get(t_vec_str *vec, t_usize i);
/// @brief Get a pointer to the last element, or NULL otherwise
/// @param vec The vec_str to get the element from
/// @return A pointer to the last element or NULL
t_str *vec_str_last(t_vec_str *vec);
#endif

View file

@ -10,8 +10,6 @@
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_ast.h"

View file

@ -0,0 +1,30 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* vec_ast.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */
/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_ast.h"
#include <stdlib.h>
t_ast_node *vec_ast_get(t_vec_ast *vec, t_usize i)
{
if (vec->len >= i)
return (NULL);
return (&vec->buffer[i]);
}
t_ast_node *vec_ast_last(t_vec_ast *vec)
{
if (vec->len == 0)
return (NULL);
return (&vec->buffer[vec->len - 1]);
}

View file

@ -10,8 +10,6 @@
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_estr.h"

View file

@ -0,0 +1,30 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* vec_estr.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */
/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_estr.h"
#include <stdlib.h>
t_expandable_str *vec_estr_get(t_vec_estr *vec, t_usize i)
{
if (vec->len >= i)
return (NULL);
return (&vec->buffer[i]);
}
t_expandable_str *vec_estr_last(t_vec_estr *vec)
{
if (vec->len == 0)
return (NULL);
return (&vec->buffer[vec->len - 1]);
}

View file

@ -10,8 +10,6 @@
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_heredoc.h"

View file

@ -0,0 +1,30 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* vec_heredoc.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */
/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_heredoc.h"
#include <stdlib.h>
t_heredoc *vec_heredoc_get(t_vec_heredoc *vec, t_usize i)
{
if (vec->len >= i)
return (NULL);
return (&vec->buffer[i]);
}
t_heredoc *vec_heredoc_last(t_vec_heredoc *vec)
{
if (vec->len == 0)
return (NULL);
return (&vec->buffer[vec->len - 1]);
}

View file

@ -10,8 +10,6 @@
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_pid.h"

View file

@ -0,0 +1,30 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* vec_pid.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */
/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_pid.h"
#include <stdlib.h>
t_pid *vec_pid_get(t_vec_pid *vec, t_usize i)
{
if (vec->len >= i)
return (NULL);
return (&vec->buffer[i]);
}
t_pid *vec_pid_last(t_vec_pid *vec)
{
if (vec->len == 0)
return (NULL);
return (&vec->buffer[vec->len - 1]);
}

View file

@ -10,8 +10,6 @@
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_str.h"

View file

@ -0,0 +1,30 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* vec_str.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */
/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_str.h"
#include <stdlib.h>
t_str *vec_str_get(t_vec_str *vec, t_usize i)
{
if (vec->len >= i)
return (NULL);
return (&vec->buffer[i]);
}
t_str *vec_str_last(t_vec_str *vec)
{
if (vec->len == 0)
return (NULL);
return (&vec->buffer[vec->len - 1]);
}

View file

@ -33,7 +33,10 @@ parser \
point/point_funcs1 \
point/point_funcs2 \
scanner \
scanner/advance_words \
scanner/deserialize \
scanner/heredoc \
scanner/heredoc_functions \
scanner/serialize \
stack/stack_add_link \
stack/stack_funcs1 \

View file

@ -6,7 +6,7 @@
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */
/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */
/* Updated: 2024/09/01 19:01:16 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
@ -46,4 +46,10 @@ static inline void reset_heredoc(t_heredoc *heredoc)
string_clear(&heredoc->delimiter);
}
static inline void heredoc_free(t_heredoc heredoc)
{
string_free(heredoc.delimiter);
string_free(heredoc.current_leading_word);
}
#endif /* HEREDOC_TYPE_H */

View file

@ -2,9 +2,8 @@
#define SCANNER_H
#include "me/types.h"
#include "parser/inner/heredoc.h"
#include "me/vec/vec_heredoc.h"
#include "parser/array.h"
#include "parser/parser.h"
typedef struct s_scanner t_scanner;
@ -16,4 +15,41 @@ struct s_scanner
t_vec_heredoc heredocs;
};
enum e_token_type
{
HEREDOC_START,
SIMPLE_HEREDOC_BODY,
HEREDOC_BODY_BEGINNING,
HEREDOC_CONTENT,
HEREDOC_END,
FILE_DESCRIPTOR,
EMPTY_VALUE,
CONCAT,
VARIABLE_NAME,
REGEX,
EXPANSION_WORD,
EXTGLOB_PATTERN,
BARE_DOLLAR,
IMMEDIATE_DOUBLE_HASH,
HEREDOC_ARROW,
HEREDOC_ARROW_DASH,
NEWLINE,
OPENING_PAREN,
ESAC,
ERROR_RECOVERY,
};
struct s_heredoc_scan_state
{
t_scanner *scanner;
TSLexer *lexer;
enum e_token_type middle_type;
enum e_token_type end_type;
bool did_advance;
t_heredoc *heredoc;
bool return_value;
};
bool advance_word(TSLexer *lexer, t_string *unquoted_word);
#endif

View file

@ -6,112 +6,42 @@
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/01 14:17:17 by maiboyer #+# #+# */
/* Updated: 2024/09/01 18:50:23 by maiboyer ### ########.fr */
/* Updated: 2024/09/01 19:55:43 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "parser/inner/scanner.h"
#include "me/char/char.h"
#include "me/str/str.h"
#include "me/string/string.h"
#include "parser/inner/heredoc.h"
#include "parser/inner/scanner.h"
#include "me/types.h"
#include "me/vec/vec_heredoc.h"
#include "parser/array.h"
#include "parser/inner/heredoc.h"
#include "parser/parser.h"
#include <assert.h>
enum e_token_type
{
HEREDOC_START,
SIMPLE_HEREDOC_BODY,
HEREDOC_BODY_BEGINNING,
HEREDOC_CONTENT,
HEREDOC_END,
FILE_DESCRIPTOR,
EMPTY_VALUE,
CONCAT,
VARIABLE_NAME,
REGEX,
EXPANSION_WORD,
EXTGLOB_PATTERN,
BARE_DOLLAR,
IMMEDIATE_DOUBLE_HASH,
HEREDOC_ARROW,
HEREDOC_ARROW_DASH,
NEWLINE,
OPENING_PAREN,
ESAC,
ERROR_RECOVERY,
};
void advance(TSLexer *lexer)
{
lexer->advance(lexer, false);
}
void skip(TSLexer *lexer)
{
lexer->advance(lexer, true);
}
bool in_error_recovery(const bool *valid_symbols)
{
return (valid_symbols[ERROR_RECOVERY]);
}
bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type);
bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer);
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer);
bool advance_word(TSLexer *lexer, t_string *unquoted_word);
void reset(t_scanner *scanner)
{
t_usize i;
i = 0;
while (i < scanner->heredocs.size)
reset_heredoc(array_get(&scanner->heredocs, i++));
}
/**
* Consume a "word" in POSIX parlance, and returns it unquoted.
*
* This is an approximate implementation that doesn't deal with any
* POSIX-mandated substitution, and assumes the default value for
* IFS.
*/
bool advance_word(TSLexer *lexer, t_string *unquoted_word)
{
bool empty;
t_i32 quote;
empty = true;
quote = 0;
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
{
quote = lexer->lookahead;
advance(lexer);
}
while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) ||
(!quote && (me_isspace(lexer->lookahead)))))
{
if (lexer->lookahead == '\\')
{
advance(lexer);
if (!lexer->lookahead)
return (false);
}
empty = false;
string_push_char(unquoted_word, lexer->lookahead);
advance(lexer);
}
if (quote && lexer->lookahead == quote)
advance(lexer);
return (!empty);
while (i < scanner->heredocs.len)
reset_heredoc(&scanner->heredocs.buffer[i++]);
}
bool scan_bare_dollar(TSLexer *lexer)
{
while (me_isspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer))
skip(lexer);
lexer->advance(lexer, true);
if (lexer->lookahead == '$')
{
advance(lexer);
lexer->advance(lexer, false);
lexer->result_symbol = BARE_DOLLAR;
lexer->mark_end(lexer);
return (me_isspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"');
@ -119,207 +49,50 @@ bool scan_bare_dollar(TSLexer *lexer)
return (false);
}
bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
{
bool found_delimiter;
while (me_isspace(lexer->lookahead))
skip(lexer);
lexer->result_symbol = HEREDOC_START;
heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\';
found_delimiter = advance_word(lexer, &heredoc->delimiter);
if (!found_delimiter)
return (string_clear(&heredoc->delimiter), false);
return (found_delimiter);
}
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
{
t_i32 size;
size = 0;
string_clear(&heredoc->current_leading_word);
if (heredoc->delimiter.len > 0)
{
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead &&
heredoc->current_leading_word.len < heredoc->delimiter.len)
{
string_push_char(&heredoc->current_leading_word, lexer->lookahead);
advance(lexer);
size++;
}
}
return heredoc->delimiter.len == 0 ? false : str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf);
}
bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type)
{
bool did_advance = false;
t_heredoc *heredoc = array_back(&scanner->heredocs);
while (true)
{
if (lexer->lookahead == '\0')
{
if (lexer->eof(lexer) && did_advance)
{
reset_heredoc(heredoc);
lexer->result_symbol = end_type;
return (true);
}
return (false);
}
else if (lexer->lookahead == '\\')
{
did_advance = true;
advance(lexer);
advance(lexer);
}
else if (lexer->lookahead == '$')
{
if (heredoc->is_raw)
{
did_advance = true;
advance(lexer);
}
if (did_advance)
{
lexer->mark_end(lexer);
lexer->result_symbol = middle_type;
heredoc->started = true;
advance(lexer);
if (me_isalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(')
return true;
}
if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0)
{
lexer->result_symbol = middle_type;
heredoc->started = true;
return true;
}
return false;
}
else if (lexer->lookahead == '\n')
{
if (!did_advance)
{
skip(lexer);
}
else
{
advance(lexer);
}
did_advance = true;
if (heredoc->allows_indent)
{
while (me_isspace(lexer->lookahead))
advance(lexer);
}
lexer->result_symbol = end_type;
if (heredoc->started)
lexer->result_symbol = middle_type;
lexer->mark_end(lexer);
if (scan_heredoc_end_identifier(heredoc, lexer))
{
if (lexer->result_symbol == HEREDOC_END)
(void)array_pop(&scanner->heredocs);
return (true);
}
}
else
{
if (lexer->get_column(lexer) == 0)
{
while (me_isspace(lexer->lookahead))
{
if (did_advance)
advance(lexer);
else
skip(lexer);
}
if (end_type != SIMPLE_HEREDOC_BODY)
{
lexer->result_symbol = middle_type;
if (scan_heredoc_end_identifier(heredoc, lexer))
return true;
}
if (end_type == SIMPLE_HEREDOC_BODY)
{
lexer->result_symbol = end_type;
lexer->mark_end(lexer);
if (scan_heredoc_end_identifier(heredoc, lexer))
return true;
}
}
did_advance = true;
advance(lexer);
}
}
return (false);
}
bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
{
if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols))
if (valid_symbols[CONCAT] && !valid_symbols[ERROR_RECOVERY])
{
if (!(lexer->lookahead == 0 || me_isspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' ||
lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' ||
lexer->lookahead == '|' || lexer->lookahead == '{' || lexer->lookahead == '}'))
{
lexer->result_symbol = CONCAT;
// So for a`b`, we want to return a concat. We check if the
// 2nd backtick has whitespace after it, and if it does we
// return concat.
if (lexer->lookahead == '`')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
while (lexer->lookahead != '`' && !lexer->eof(lexer))
{
advance(lexer);
}
lexer->advance(lexer, false);
if (lexer->eof(lexer))
{
return false;
}
if (lexer->lookahead == '`')
{
advance(lexer);
}
lexer->advance(lexer, false);
return me_isspace(lexer->lookahead) || lexer->eof(lexer);
}
// strings w/ expansions that contains escaped quotes or
// backslashes need this to return a concat
if (lexer->lookahead == '\\')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\')
{
return true;
}
if (lexer->eof(lexer))
{
return false;
}
}
else
{
return true;
}
}
}
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols))
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !valid_symbols[ERROR_RECOVERY])
{
// advance two # and ensure not } after
if (lexer->lookahead == '#')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '#')
{
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead != '}')
{
lexer->result_symbol = IMMEDIATE_DOUBLE_HASH;
@ -339,50 +112,43 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
}
}
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 &&
!array_back(&scanner->heredocs)->started && !in_error_recovery(valid_symbols))
{
if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 &&
!vec_heredoc_last(&scanner->heredocs)->started && !valid_symbols[ERROR_RECOVERY])
return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY);
}
if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0)
if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0)
{
t_heredoc *heredoc = array_back(&scanner->heredocs);
t_heredoc *heredoc = vec_heredoc_last(&scanner->heredocs);
if (scan_heredoc_end_identifier(heredoc, lexer))
{
array_delete(&heredoc->current_leading_word);
array_delete(&heredoc->delimiter);
(void)array_pop(&scanner->heredocs);
string_free(heredoc->current_leading_word);
string_free(heredoc->delimiter);
(void)vec_heredoc_pop(&scanner->heredocs, NULL);
lexer->result_symbol = HEREDOC_END;
return true;
}
}
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started &&
!in_error_recovery(valid_symbols))
{
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started &&
!valid_symbols[ERROR_RECOVERY])
return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END);
}
if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && scanner->heredocs.size > 0)
{
return scan_heredoc_start(array_back(&scanner->heredocs), lexer);
}
if (valid_symbols[HEREDOC_START] && !valid_symbols[ERROR_RECOVERY] && scanner->heredocs.len > 0)
return scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer);
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) &&
!in_error_recovery(valid_symbols))
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && !valid_symbols[ERROR_RECOVERY])
{
for (;;)
while (true)
{
if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' ||
(lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) &&
!valid_symbols[EXPANSION_WORD])
{
skip(lexer);
lexer->advance(lexer, true);
}
else if (lexer->lookahead == '\\')
{
skip(lexer);
lexer->advance(lexer, true);
if (lexer->eof(lexer))
{
@ -392,39 +158,29 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
}
if (lexer->lookahead == '\r')
{
skip(lexer);
}
lexer->advance(lexer, true);
if (lexer->lookahead == '\n')
{
skip(lexer);
}
lexer->advance(lexer, true);
else
{
if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD])
{
goto expansion_word;
}
return false;
}
}
else
{
break;
}
}
// no '*', '@', '?', '-', '$', '0', '_'
if (!valid_symbols[EXPANSION_WORD] && (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' ||
lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_'))
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' ||
lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/')
{
return false;
}
if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->lookahead))
{
lexer->mark_end(lexer);
@ -435,26 +191,22 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<')
{
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '<')
{
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '-')
{
advance(lexer);
lexer->advance(lexer, false);
t_heredoc heredoc = heredoc_new();
heredoc.allows_indent = true;
array_push(&scanner->heredocs, heredoc);
vec_heredoc_push(&scanner->heredocs, heredoc);
lexer->result_symbol = HEREDOC_ARROW_DASH;
}
// else if (lexer->lookahead == '<' || lexer->lookahead == '=')
// {
// return false;
// }
else
{
t_heredoc heredoc = heredoc_new();
array_push(&scanner->heredocs, heredoc);
vec_heredoc_push(&scanner->heredocs, heredoc);
lexer->result_symbol = HEREDOC_ARROW;
}
return true;
@ -464,45 +216,35 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
bool is_number = true;
if (me_isdigit(lexer->lookahead))
advance(lexer);
lexer->advance(lexer, false);
else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_')
{
is_number = false;
advance(lexer);
lexer->advance(lexer, false);
}
else
{
if (lexer->lookahead == '{')
{
goto brace_start;
}
if (valid_symbols[EXPANSION_WORD])
{
goto expansion_word;
}
if (valid_symbols[EXTGLOB_PATTERN])
{
goto extglob_pattern;
}
return false;
}
for (;;)
while (true)
{
if (me_isdigit(lexer->lookahead))
{
advance(lexer);
}
lexer->advance(lexer, false);
else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_')
{
is_number = false;
advance(lexer);
lexer->advance(lexer, false);
}
else
{
break;
}
}
if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<'))
{
@ -515,7 +257,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
if (lexer->lookahead == '+')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '=' || lexer->lookahead == ':')
{
lexer->result_symbol = VARIABLE_NAME;
@ -527,12 +269,8 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
{
return false;
}
if (lexer->lookahead == '=' || lexer->lookahead == '[' ||
(lexer->lookahead == ':' &&
!valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable
// names for function words, only handling : for now? #235
lexer->lookahead == '%' ||
(lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-'))
if (lexer->lookahead == '=' || lexer->lookahead == '[' || (lexer->lookahead == ':' && !valid_symbols[OPENING_PAREN]) ||
lexer->lookahead == '%' || (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-'))
{
lexer->mark_end(lexer);
lexer->result_symbol = VARIABLE_NAME;
@ -542,7 +280,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
if (lexer->lookahead == '?')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
lexer->result_symbol = VARIABLE_NAME;
return me_isalpha(lexer->lookahead);
}
@ -551,20 +289,16 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
return false;
}
if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer))
if (valid_symbols[BARE_DOLLAR] && !valid_symbols[ERROR_RECOVERY] && scan_bare_dollar(lexer))
{
return true;
}
if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols))
if ((valid_symbols[REGEX]) && !valid_symbols[ERROR_RECOVERY])
{
if (valid_symbols[REGEX])
{
while (me_isspace(lexer->lookahead))
{
skip(lexer);
}
}
lexer->advance(lexer, true);
if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || ((lexer->lookahead == '$' || lexer->lookahead == '\'')) ||
(lexer->lookahead == '\''))
@ -584,12 +318,10 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '(')
{
return false;
}
}
lexer->mark_end(lexer);
@ -601,7 +333,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
if (lexer->lookahead == '\'')
{
state.in_single_quote = false;
advance(lexer);
lexer->advance(lexer, false);
lexer->mark_end(lexer);
}
}
@ -646,7 +378,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
case '\'':
// Enter or exit a single-quoted string.
state.in_single_quote = !state.in_single_quote;
advance(lexer);
lexer->advance(lexer, false);
state.advanced_once = true;
state.last_was_escape = false;
continue;
@ -660,7 +392,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
if (valid_symbols[REGEX])
{
bool was_space = !state.in_single_quote && me_isspace(lexer->lookahead);
advance(lexer);
lexer->advance(lexer, false);
state.advanced_once = true;
if (!was_space || state.paren_depth > 0)
{
@ -671,20 +403,16 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
}
lexer->result_symbol = REGEX;
if (valid_symbols[REGEX] && !state.advanced_once)
{
return false;
}
return true;
return (!(valid_symbols[REGEX] && !state.advanced_once));
}
}
extglob_pattern:
if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols))
if (valid_symbols[EXTGLOB_PATTERN] && !valid_symbols[ERROR_RECOVERY])
{
// first skip ws, then check for ? * + @ !
while (me_isspace(lexer->lookahead))
skip(lexer);
lexer->advance(lexer, true);
if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' ||
lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' ||
@ -692,9 +420,9 @@ extglob_pattern:
{
if (lexer->lookahead == '\\')
{
advance(lexer);
lexer->advance(lexer, false);
if ((me_isspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n')
advance(lexer);
lexer->advance(lexer, false);
else
return false;
}
@ -702,13 +430,11 @@ extglob_pattern:
if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (me_isspace(lexer->lookahead))
{
return false;
}
}
lexer->mark_end(lexer);
bool was_non_alpha = !me_isalpha(lexer->lookahead);
@ -718,16 +444,16 @@ extglob_pattern:
if (lexer->lookahead == 'e')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == 's')
{
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == 'a')
{
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == 'c')
{
advance(lexer);
lexer->advance(lexer, false);
if (me_isspace(lexer->lookahead))
return false;
}
@ -735,16 +461,16 @@ extglob_pattern:
}
}
else
advance(lexer);
lexer->advance(lexer, false);
}
// -\w is just a word, find something else special
if (lexer->lookahead == '-')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
while (me_isalnum(lexer->lookahead))
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.')
return false;
@ -755,7 +481,7 @@ extglob_pattern:
if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (me_isspace(lexer->lookahead))
{
lexer->result_symbol = EXTGLOB_PATTERN;
@ -774,7 +500,7 @@ extglob_pattern:
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '{' || lexer->lookahead == '(')
{
lexer->result_symbol = EXTGLOB_PATTERN;
@ -785,7 +511,7 @@ extglob_pattern:
if (lexer->lookahead == '|')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
lexer->result_symbol = EXTGLOB_PATTERN;
return true;
}
@ -846,7 +572,7 @@ extglob_pattern:
if (lexer->lookahead == '|')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0)
{
lexer->result_symbol = EXTGLOB_PATTERN;
@ -862,7 +588,7 @@ extglob_pattern:
lexer->mark_end(lexer);
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
state.saw_non_alphadot = true;
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '(' || lexer->lookahead == '{')
{
lexer->result_symbol = EXTGLOB_PATTERN;
@ -888,15 +614,15 @@ extglob_pattern:
{
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
state.saw_non_alphadot = true;
advance(lexer);
lexer->advance(lexer, false);
if (me_isspace(lexer->lookahead) || lexer->lookahead == '"')
advance(lexer);
lexer->advance(lexer, false);
}
else
{
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
state.saw_non_alphadot = true;
advance(lexer);
lexer->advance(lexer, false);
}
if (!was_space)
lexer->mark_end(lexer);
@ -917,14 +643,14 @@ expansion_word:
{
bool advanced_once = false;
bool advance_once_space = false;
for (;;)
while (true)
{
if (lexer->lookahead == '\"')
return false;
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead))
{
lexer->result_symbol = EXPANSION_WORD;
@ -943,17 +669,13 @@ expansion_word:
if (lexer->lookahead == '(' && !(advanced_once || advance_once_space))
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
while (lexer->lookahead != ')' && !lexer->eof(lexer))
{
// if we find a $( or ${ assume this is valid and is
// a garbage concatenation of some weird word + an
// expansion
// I wonder where this can fail
if (lexer->lookahead == '$')
{
lexer->mark_end(lexer);
advance(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead))
{
lexer->result_symbol = EXPANSION_WORD;
@ -965,14 +687,14 @@ expansion_word:
{
advanced_once = advanced_once || !me_isspace(lexer->lookahead);
advance_once_space = advance_once_space || me_isspace(lexer->lookahead);
advance(lexer);
lexer->advance(lexer, false);
}
}
lexer->mark_end(lexer);
if (lexer->lookahead == ')')
{
advanced_once = true;
advance(lexer);
lexer->advance(lexer, false);
lexer->mark_end(lexer);
if (lexer->lookahead == '}')
return false;
@ -987,7 +709,7 @@ expansion_word:
return false;
advanced_once = advanced_once || !me_isspace(lexer->lookahead);
advance_once_space = advance_once_space || me_isspace(lexer->lookahead);
advance(lexer);
lexer->advance(lexer, false);
}
}
@ -1000,7 +722,7 @@ void *tree_sitter_sh_external_scanner_create()
t_scanner *scanner;
scanner = mem_alloc(sizeof(*scanner));
array_init(&scanner->heredocs);
scanner->heredocs = vec_heredoc_new(16, heredoc_free);
return (scanner);
}
@ -1014,18 +736,6 @@ bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const b
void tree_sitter_sh_external_scanner_destroy(void *payload)
{
t_scanner *scanner;
t_heredoc *heredoc;
t_usize i;
scanner = (t_scanner *)payload;
i = 0;
while (i < scanner->heredocs.size)
{
heredoc = array_get(&scanner->heredocs, i++);
string_free(heredoc->current_leading_word);
string_free(heredoc->delimiter);
}
array_delete(&scanner->heredocs);
mem_free(scanner);
vec_heredoc_free(((t_scanner *)payload)->heredocs);
mem_free((t_scanner *)payload);
}

View file

@ -0,0 +1,46 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* advance_words.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/01 19:28:19 by maiboyer #+# #+# */
/* Updated: 2024/09/01 19:30:20 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/char/char.h"
#include "me/string/string.h"
#include "me/types.h"
#include "parser/parser.h"
bool advance_word(TSLexer *lexer, t_string *unquoted_word)
{
bool empty;
t_i32 quote;
empty = true;
quote = 0;
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
{
quote = lexer->lookahead;
lexer->advance(lexer, false);
}
while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) ||
(!quote && (me_isspace(lexer->lookahead)))))
{
if (lexer->lookahead == '\\')
{
lexer->advance(lexer, false);
if (!lexer->lookahead)
return (false);
}
empty = false;
string_push_char(unquoted_word, lexer->lookahead);
lexer->advance(lexer, false);
}
if (quote && lexer->lookahead == quote)
lexer->advance(lexer, false);
return (!empty);
}

View file

@ -6,15 +6,15 @@
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */
/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */
/* Updated: 2024/09/01 19:40:35 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_heredoc.h"
#include "parser/inner/heredoc.h"
#include "parser/inner/scanner.h"
#include "parser/array.h"
#include "parser/parser.h"
void reset(t_scanner *);
@ -39,12 +39,12 @@ void tree_sitter_sh_external_scanner_deserialize(t_scanner *scanner, const t_u8
while (i < heredoc_count)
{
heredoc = NULL;
if (i < scanner->heredocs.size)
heredoc = array_get(&scanner->heredocs, i);
if (i < scanner->heredocs.len)
heredoc = vec_heredoc_get(&scanner->heredocs, i);
else
{
array_push(&scanner->heredocs, heredoc_new());
heredoc = array_back(&scanner->heredocs);
vec_heredoc_push(&scanner->heredocs, heredoc_new());
heredoc = vec_heredoc_last(&scanner->heredocs);
}
heredoc->is_raw = buffer[size++];

View file

@ -0,0 +1,91 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* heredoc.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/01 19:33:04 by maiboyer #+# #+# */
/* Updated: 2024/09/01 19:55:50 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "parser/inner/heredoc.h"
#include "me/char/char.h"
#include "me/str/str.h"
#include "me/types.h"
#include "me/vec/vec_heredoc.h"
#include "parser/inner/scanner.h"
#include "parser/parser.h"
bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
{
bool found_delimiter;
while (me_isspace(lexer->lookahead))
lexer->advance(lexer, true);
lexer->result_symbol = HEREDOC_START;
heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\';
found_delimiter = advance_word(lexer, &heredoc->delimiter);
if (!found_delimiter)
return (string_clear(&heredoc->delimiter), false);
return (found_delimiter);
}
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
{
t_i32 size;
size = 0;
string_clear(&heredoc->current_leading_word);
if (heredoc->delimiter.len > 0)
{
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead &&
heredoc->current_leading_word.len < heredoc->delimiter.len)
{
string_push_char(&heredoc->current_leading_word, lexer->lookahead);
lexer->advance(lexer, false);
size++;
}
}
if (heredoc->delimiter.len == 0)
return (false);
return (str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf));
}
bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state);
bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state);
bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state);
bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state);
bool scan_heredoc_content_other(struct s_heredoc_scan_state *state);
bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type)
{
struct s_heredoc_scan_state state;
bool (*func)(struct s_heredoc_scan_state *state);
state.did_advance = false;
state.lexer = lexer;
state.heredoc = vec_heredoc_last(&scanner->heredocs);
state.scanner = scanner;
state.middle_type = middle_type;
state.end_type = end_type;
state.return_value = false;
while (true)
{
if (lexer->lookahead == '\0')
func = scan_heredoc_content_nullbyte;
else if (lexer->lookahead == '\\')
func = scan_heredoc_content_backslash;
else if (lexer->lookahead == '$')
func = scan_heredoc_content_dollar;
else if (lexer->lookahead == '\n')
func = scan_heredoc_content_newline;
else
func = scan_heredoc_content_other;
if (func(&state))
return (state.return_value);
}
return (false);
}

View file

@ -0,0 +1,120 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* heredoc_functions.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/01 19:36:53 by maiboyer #+# #+# */
/* Updated: 2024/09/01 19:54:13 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/char/char.h"
#include "me/str/str.h"
#include "me/types.h"
#include "me/vec/vec_heredoc.h"
#include "parser/inner/heredoc.h"
#include "parser/inner/scanner.h"
#include "parser/parser.h"
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer);
bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state)
{
if (state->lexer->eof(state->lexer) && state->did_advance)
{
reset_heredoc(state->heredoc);
state->lexer->result_symbol = state->end_type;
return (state->return_value = true, true);
}
return (state->return_value = false, true);
}
bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state)
{
state->did_advance = true;
state->lexer->advance(state->lexer, false);
state->lexer->advance(state->lexer, false);
return (false);
}
bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state)
{
if (state->heredoc->is_raw)
{
state->did_advance = true;
state->lexer->advance(state->lexer, false);
}
if (state->did_advance)
{
state->lexer->mark_end(state->lexer);
state->lexer->result_symbol = state->middle_type;
state->heredoc->started = true;
state->lexer->advance(state->lexer, false);
if (me_isalpha(state->lexer->lookahead) || state->lexer->lookahead == '{' || state->lexer->lookahead == '(')
return (state->return_value = true, true);
}
if (state->middle_type == HEREDOC_BODY_BEGINNING && state->lexer->get_column(state->lexer) == 0)
{
state->lexer->result_symbol = state->middle_type;
state->heredoc->started = true;
return (state->return_value = true, true);
}
return (state->return_value = false, true);
}
bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state)
{
if (!state->did_advance)
state->lexer->advance(state->lexer, true);
else
state->lexer->advance(state->lexer, false);
state->did_advance = true;
if (state->heredoc->allows_indent)
{
while (me_isspace(state->lexer->lookahead))
state->lexer->advance(state->lexer, false);
}
state->lexer->result_symbol = state->end_type;
if (state->heredoc->started)
state->lexer->result_symbol = state->middle_type;
state->lexer->mark_end(state->lexer);
if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
{
if (state->lexer->result_symbol == HEREDOC_END)
vec_heredoc_pop(&state->scanner->heredocs, NULL);
return (state->return_value = true, true);
}
return (false);
}
bool scan_heredoc_content_other(struct s_heredoc_scan_state *state)
{
if (state->lexer->get_column(state->lexer) == 0)
{
while (me_isspace(state->lexer->lookahead))
{
if (state->did_advance)
state->lexer->advance(state->lexer, false);
else
state->lexer->advance(state->lexer, true);
}
if (state->end_type != SIMPLE_HEREDOC_BODY)
{
state->lexer->result_symbol = state->middle_type;
if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
return (state->return_value = true, true);
}
if (state->end_type == SIMPLE_HEREDOC_BODY)
{
state->lexer->result_symbol = state->end_type;
state->lexer->mark_end(state->lexer);
if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
return (state->return_value = true, true);
}
}
state->did_advance = true;
state->lexer->advance(state->lexer, false);
return (false);
}

View file

@ -6,21 +6,21 @@
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */
/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */
/* Updated: 2024/09/01 19:28:24 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "parser/inner/scanner.h"
#include "parser/inner/heredoc.h"
#include "me/types.h"
#include "parser/array.h"
#include "parser/inner/heredoc.h"
#include "parser/inner/scanner.h"
#include "parser/parser.h"
t_error serialize_heredocs(t_scanner *scanner, t_u8 *buffer, t_u32 *size, t_usize i)
{
t_heredoc *heredoc;
heredoc = array_get(&scanner->heredocs, i);
heredoc = vec_heredoc_get(&scanner->heredocs, i);
if (heredoc->delimiter.len + 1 + sizeof(t_usize) + (*size) >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
return (ERROR);
buffer[(*size)++] = (char)heredoc->is_raw;
@ -47,9 +47,9 @@ t_u32 tree_sitter_sh_external_scanner_serialize(t_scanner *scanner, t_u8 *buffer
buffer[size++] = (char)scanner->last_glob_paren_depth;
buffer[size++] = (char)scanner->ext_was_in_double_quote;
buffer[size++] = (char)scanner->ext_saw_outside_quote;
buffer[size++] = (char)scanner->heredocs.size;
buffer[size++] = (char)scanner->heredocs.len;
i = 0;
while (i < scanner->heredocs.size)
while (i < scanner->heredocs.len)
if (serialize_heredocs(scanner, buffer, &size, i++))
return (0);
return (size);

View file

@ -120,4 +120,14 @@ void vec_C__PREFIX___sort(t_vec_C__PREFIX__ *vec, t_vec_C__PREFIX___sort_fn i
/// @return true if the operation failed, false otherwise
t_error vec_C__PREFIX___back(t_vec_C__PREFIX__ *vec, C__TYPENAME__ **out);
/// @brief Get a pointer to the i'th element, or NULL otherwise
/// @param vec The vec_C__PREFIX__ to get the element from
/// @return A pointer to the element or NULL
C__TYPENAME__ *vec_C__PREFIX___get(t_vec_C__PREFIX__ *vec, t_usize i);
/// @brief Get a pointer to the last element, or NULL otherwise
/// @param vec The vec_C__PREFIX__ to get the element from
/// @return A pointer to the last element or NULL
C__TYPENAME__ *vec_C__PREFIX___last(t_vec_C__PREFIX__ *vec);
#endif

View file

@ -10,8 +10,6 @@
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_C__PREFIX__.h"

View file

@ -0,0 +1,30 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* vec_C__PREFIX__.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2023/12/30 17:59:28 by maiboyer #+# #+# */
/* Updated: 2023/12/30 17:59:28 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/mem/mem.h"
#include "me/types.h"
#include "me/vec/vec_C__PREFIX__.h"
#include <stdlib.h>
C__TYPENAME__ *vec_C__PREFIX___get(t_vec_C__PREFIX__ *vec, t_usize i)
{
if (vec->len >= i)
return (NULL);
return (&vec->buffer[i]);
}
C__TYPENAME__ *vec_C__PREFIX___last(t_vec_C__PREFIX__ *vec)
{
if (vec->len == 0)
return (NULL);
return (&vec->buffer[vec->len - 1]);
}

View file

@ -5,6 +5,7 @@ sources = [
"generic_sources/src/vec/C__PREFIX___sort.c__TEMPLATE__",
"generic_sources/src/vec/C__PREFIX___functions2.c__TEMPLATE__",
"generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__",
"generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__",
]
replace.C__TYPENAME__ = "type"
replace.C__TYPEHEADER__ = "header_include"