split heredoc handling in the scanner
This commit is contained in:
parent
8be7417a61
commit
7e1e51e90b
30 changed files with 663 additions and 416 deletions
46
parser/src/scanner/advance_words.c
Normal file
46
parser/src/scanner/advance_words.c
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* advance_words.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/01 19:28:19 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/01 19:30:20 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/char/char.h"
|
||||
#include "me/string/string.h"
|
||||
#include "me/types.h"
|
||||
#include "parser/parser.h"
|
||||
|
||||
bool advance_word(TSLexer *lexer, t_string *unquoted_word)
|
||||
{
|
||||
bool empty;
|
||||
t_i32 quote;
|
||||
|
||||
empty = true;
|
||||
quote = 0;
|
||||
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
|
||||
{
|
||||
quote = lexer->lookahead;
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) ||
|
||||
(!quote && (me_isspace(lexer->lookahead)))))
|
||||
{
|
||||
if (lexer->lookahead == '\\')
|
||||
{
|
||||
lexer->advance(lexer, false);
|
||||
if (!lexer->lookahead)
|
||||
return (false);
|
||||
}
|
||||
empty = false;
|
||||
string_push_char(unquoted_word, lexer->lookahead);
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
if (quote && lexer->lookahead == quote)
|
||||
lexer->advance(lexer, false);
|
||||
return (!empty);
|
||||
}
|
||||
|
|
@ -6,15 +6,15 @@
|
|||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */
|
||||
/* Updated: 2024/09/01 19:40:35 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/mem/mem.h"
|
||||
#include "me/types.h"
|
||||
#include "me/vec/vec_heredoc.h"
|
||||
#include "parser/inner/heredoc.h"
|
||||
#include "parser/inner/scanner.h"
|
||||
#include "parser/array.h"
|
||||
#include "parser/parser.h"
|
||||
|
||||
void reset(t_scanner *);
|
||||
|
||||
|
|
@ -39,12 +39,12 @@ void tree_sitter_sh_external_scanner_deserialize(t_scanner *scanner, const t_u8
|
|||
while (i < heredoc_count)
|
||||
{
|
||||
heredoc = NULL;
|
||||
if (i < scanner->heredocs.size)
|
||||
heredoc = array_get(&scanner->heredocs, i);
|
||||
if (i < scanner->heredocs.len)
|
||||
heredoc = vec_heredoc_get(&scanner->heredocs, i);
|
||||
else
|
||||
{
|
||||
array_push(&scanner->heredocs, heredoc_new());
|
||||
heredoc = array_back(&scanner->heredocs);
|
||||
vec_heredoc_push(&scanner->heredocs, heredoc_new());
|
||||
heredoc = vec_heredoc_last(&scanner->heredocs);
|
||||
}
|
||||
|
||||
heredoc->is_raw = buffer[size++];
|
||||
|
|
|
|||
91
parser/src/scanner/heredoc.c
Normal file
91
parser/src/scanner/heredoc.c
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* heredoc.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/01 19:33:04 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/01 19:55:50 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "parser/inner/heredoc.h"
|
||||
#include "me/char/char.h"
|
||||
#include "me/str/str.h"
|
||||
#include "me/types.h"
|
||||
#include "me/vec/vec_heredoc.h"
|
||||
#include "parser/inner/scanner.h"
|
||||
#include "parser/parser.h"
|
||||
|
||||
bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
|
||||
{
|
||||
bool found_delimiter;
|
||||
|
||||
while (me_isspace(lexer->lookahead))
|
||||
lexer->advance(lexer, true);
|
||||
lexer->result_symbol = HEREDOC_START;
|
||||
heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\';
|
||||
found_delimiter = advance_word(lexer, &heredoc->delimiter);
|
||||
if (!found_delimiter)
|
||||
return (string_clear(&heredoc->delimiter), false);
|
||||
return (found_delimiter);
|
||||
}
|
||||
|
||||
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
|
||||
{
|
||||
t_i32 size;
|
||||
|
||||
size = 0;
|
||||
string_clear(&heredoc->current_leading_word);
|
||||
if (heredoc->delimiter.len > 0)
|
||||
{
|
||||
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead &&
|
||||
heredoc->current_leading_word.len < heredoc->delimiter.len)
|
||||
{
|
||||
string_push_char(&heredoc->current_leading_word, lexer->lookahead);
|
||||
lexer->advance(lexer, false);
|
||||
size++;
|
||||
}
|
||||
}
|
||||
if (heredoc->delimiter.len == 0)
|
||||
return (false);
|
||||
return (str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf));
|
||||
}
|
||||
|
||||
bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state);
|
||||
bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state);
|
||||
bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state);
|
||||
bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state);
|
||||
bool scan_heredoc_content_other(struct s_heredoc_scan_state *state);
|
||||
|
||||
bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type)
|
||||
{
|
||||
struct s_heredoc_scan_state state;
|
||||
bool (*func)(struct s_heredoc_scan_state *state);
|
||||
|
||||
state.did_advance = false;
|
||||
state.lexer = lexer;
|
||||
state.heredoc = vec_heredoc_last(&scanner->heredocs);
|
||||
state.scanner = scanner;
|
||||
state.middle_type = middle_type;
|
||||
state.end_type = end_type;
|
||||
state.return_value = false;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (lexer->lookahead == '\0')
|
||||
func = scan_heredoc_content_nullbyte;
|
||||
else if (lexer->lookahead == '\\')
|
||||
func = scan_heredoc_content_backslash;
|
||||
else if (lexer->lookahead == '$')
|
||||
func = scan_heredoc_content_dollar;
|
||||
else if (lexer->lookahead == '\n')
|
||||
func = scan_heredoc_content_newline;
|
||||
else
|
||||
func = scan_heredoc_content_other;
|
||||
if (func(&state))
|
||||
return (state.return_value);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
120
parser/src/scanner/heredoc_functions.c
Normal file
120
parser/src/scanner/heredoc_functions.c
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* heredoc_functions.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/01 19:36:53 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/01 19:54:13 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/char/char.h"
|
||||
#include "me/str/str.h"
|
||||
#include "me/types.h"
|
||||
#include "me/vec/vec_heredoc.h"
|
||||
#include "parser/inner/heredoc.h"
|
||||
#include "parser/inner/scanner.h"
|
||||
#include "parser/parser.h"
|
||||
|
||||
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer);
|
||||
|
||||
bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state)
|
||||
{
|
||||
if (state->lexer->eof(state->lexer) && state->did_advance)
|
||||
{
|
||||
reset_heredoc(state->heredoc);
|
||||
state->lexer->result_symbol = state->end_type;
|
||||
return (state->return_value = true, true);
|
||||
}
|
||||
return (state->return_value = false, true);
|
||||
}
|
||||
|
||||
bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state)
|
||||
{
|
||||
state->did_advance = true;
|
||||
state->lexer->advance(state->lexer, false);
|
||||
state->lexer->advance(state->lexer, false);
|
||||
return (false);
|
||||
}
|
||||
|
||||
bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state)
|
||||
{
|
||||
if (state->heredoc->is_raw)
|
||||
{
|
||||
state->did_advance = true;
|
||||
state->lexer->advance(state->lexer, false);
|
||||
}
|
||||
if (state->did_advance)
|
||||
{
|
||||
state->lexer->mark_end(state->lexer);
|
||||
state->lexer->result_symbol = state->middle_type;
|
||||
state->heredoc->started = true;
|
||||
state->lexer->advance(state->lexer, false);
|
||||
if (me_isalpha(state->lexer->lookahead) || state->lexer->lookahead == '{' || state->lexer->lookahead == '(')
|
||||
return (state->return_value = true, true);
|
||||
}
|
||||
if (state->middle_type == HEREDOC_BODY_BEGINNING && state->lexer->get_column(state->lexer) == 0)
|
||||
{
|
||||
state->lexer->result_symbol = state->middle_type;
|
||||
state->heredoc->started = true;
|
||||
return (state->return_value = true, true);
|
||||
}
|
||||
return (state->return_value = false, true);
|
||||
}
|
||||
|
||||
bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state)
|
||||
{
|
||||
if (!state->did_advance)
|
||||
state->lexer->advance(state->lexer, true);
|
||||
else
|
||||
state->lexer->advance(state->lexer, false);
|
||||
state->did_advance = true;
|
||||
if (state->heredoc->allows_indent)
|
||||
{
|
||||
while (me_isspace(state->lexer->lookahead))
|
||||
state->lexer->advance(state->lexer, false);
|
||||
}
|
||||
state->lexer->result_symbol = state->end_type;
|
||||
if (state->heredoc->started)
|
||||
state->lexer->result_symbol = state->middle_type;
|
||||
state->lexer->mark_end(state->lexer);
|
||||
if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
|
||||
{
|
||||
if (state->lexer->result_symbol == HEREDOC_END)
|
||||
vec_heredoc_pop(&state->scanner->heredocs, NULL);
|
||||
return (state->return_value = true, true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
bool scan_heredoc_content_other(struct s_heredoc_scan_state *state)
|
||||
{
|
||||
if (state->lexer->get_column(state->lexer) == 0)
|
||||
{
|
||||
while (me_isspace(state->lexer->lookahead))
|
||||
{
|
||||
if (state->did_advance)
|
||||
state->lexer->advance(state->lexer, false);
|
||||
else
|
||||
state->lexer->advance(state->lexer, true);
|
||||
}
|
||||
if (state->end_type != SIMPLE_HEREDOC_BODY)
|
||||
{
|
||||
state->lexer->result_symbol = state->middle_type;
|
||||
if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
|
||||
return (state->return_value = true, true);
|
||||
}
|
||||
if (state->end_type == SIMPLE_HEREDOC_BODY)
|
||||
{
|
||||
state->lexer->result_symbol = state->end_type;
|
||||
state->lexer->mark_end(state->lexer);
|
||||
if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
|
||||
return (state->return_value = true, true);
|
||||
}
|
||||
}
|
||||
state->did_advance = true;
|
||||
state->lexer->advance(state->lexer, false);
|
||||
return (false);
|
||||
}
|
||||
|
|
@ -6,21 +6,21 @@
|
|||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */
|
||||
/* Updated: 2024/09/01 19:28:24 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "parser/inner/scanner.h"
|
||||
#include "parser/inner/heredoc.h"
|
||||
#include "me/types.h"
|
||||
#include "parser/array.h"
|
||||
#include "parser/inner/heredoc.h"
|
||||
#include "parser/inner/scanner.h"
|
||||
#include "parser/parser.h"
|
||||
|
||||
t_error serialize_heredocs(t_scanner *scanner, t_u8* buffer, t_u32 *size, t_usize i)
|
||||
t_error serialize_heredocs(t_scanner *scanner, t_u8 *buffer, t_u32 *size, t_usize i)
|
||||
{
|
||||
t_heredoc *heredoc;
|
||||
|
||||
heredoc = array_get(&scanner->heredocs, i);
|
||||
heredoc = vec_heredoc_get(&scanner->heredocs, i);
|
||||
if (heredoc->delimiter.len + 1 + sizeof(t_usize) + (*size) >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
|
||||
return (ERROR);
|
||||
buffer[(*size)++] = (char)heredoc->is_raw;
|
||||
|
|
@ -40,17 +40,17 @@ t_error serialize_heredocs(t_scanner *scanner, t_u8* buffer, t_u32 *size, t_usiz
|
|||
|
||||
t_u32 tree_sitter_sh_external_scanner_serialize(t_scanner *scanner, t_u8 *buffer)
|
||||
{
|
||||
t_u32 size;
|
||||
t_usize i;
|
||||
t_u32 size;
|
||||
t_usize i;
|
||||
|
||||
size = 0;
|
||||
buffer[size++] = (char)scanner->last_glob_paren_depth;
|
||||
buffer[size++] = (char)scanner->ext_was_in_double_quote;
|
||||
buffer[size++] = (char)scanner->ext_saw_outside_quote;
|
||||
buffer[size++] = (char)scanner->heredocs.size;
|
||||
buffer[size++] = (char)scanner->heredocs.len;
|
||||
i = 0;
|
||||
while (i < scanner->heredocs.size)
|
||||
while (i < scanner->heredocs.len)
|
||||
if (serialize_heredocs(scanner, buffer, &size, i++))
|
||||
return (0);
|
||||
return (size);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue