Updated scanner to be a bit more normed, didn't yet do the big ass functions
This commit is contained in:
parent
7e1e51e90b
commit
00546417ff
6 changed files with 108 additions and 94 deletions
|
|
@ -50,6 +50,8 @@ struct s_heredoc_scan_state
|
||||||
bool return_value;
|
bool return_value;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef bool (*t_heredoc_content_func)(struct s_heredoc_scan_state *state);
|
||||||
|
|
||||||
bool advance_word(TSLexer *lexer, t_string *unquoted_word);
|
bool advance_word(TSLexer *lexer, t_string *unquoted_word);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||||
/* +#+#+#+#+#+ +#+ */
|
/* +#+#+#+#+#+ +#+ */
|
||||||
/* Created: 2024/09/01 19:28:19 by maiboyer #+# #+# */
|
/* Created: 2024/09/01 19:28:19 by maiboyer #+# #+# */
|
||||||
/* Updated: 2024/09/01 19:30:20 by maiboyer ### ########.fr */
|
/* Updated: 2024/09/01 20:10:14 by maiboyer ### ########.fr */
|
||||||
/* */
|
/* */
|
||||||
/* ************************************************************************** */
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
|
@ -15,20 +15,26 @@
|
||||||
#include "me/types.h"
|
#include "me/types.h"
|
||||||
#include "parser/parser.h"
|
#include "parser/parser.h"
|
||||||
|
|
||||||
bool advance_word(TSLexer *lexer, t_string *unquoted_word)
|
void advance_word_inner(TSLexer *lexer, bool *empty, t_i32 *quote)
|
||||||
{
|
{
|
||||||
bool empty;
|
*empty = true;
|
||||||
t_i32 quote;
|
*quote = 0;
|
||||||
|
|
||||||
empty = true;
|
|
||||||
quote = 0;
|
|
||||||
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
|
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
|
||||||
{
|
{
|
||||||
quote = lexer->lookahead;
|
*quote = lexer->lookahead;
|
||||||
lexer->advance(lexer, false);
|
lexer->advance(lexer, false);
|
||||||
}
|
}
|
||||||
while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) ||
|
}
|
||||||
(!quote && (me_isspace(lexer->lookahead)))))
|
|
||||||
|
bool advance_word(TSLexer *lexer, t_string *unquoted_word)
|
||||||
|
{
|
||||||
|
bool empty;
|
||||||
|
t_i32 quote;
|
||||||
|
|
||||||
|
advance_word_inner(lexer, &empty, "e);
|
||||||
|
while (lexer->lookahead && !((quote && (lexer->lookahead == quote
|
||||||
|
|| lexer->lookahead == '\r' || lexer->lookahead == '\n'))
|
||||||
|
|| (!quote && (me_isspace(lexer->lookahead)))))
|
||||||
{
|
{
|
||||||
if (lexer->lookahead == '\\')
|
if (lexer->lookahead == '\\')
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||||
/* +#+#+#+#+#+ +#+ */
|
/* +#+#+#+#+#+ +#+ */
|
||||||
/* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */
|
/* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */
|
||||||
/* Updated: 2024/09/01 19:40:35 by maiboyer ### ########.fr */
|
/* Updated: 2024/09/01 20:08:37 by maiboyer ### ########.fr */
|
||||||
/* */
|
/* */
|
||||||
/* ************************************************************************** */
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
|
@ -16,53 +16,53 @@
|
||||||
#include "parser/inner/heredoc.h"
|
#include "parser/inner/heredoc.h"
|
||||||
#include "parser/inner/scanner.h"
|
#include "parser/inner/scanner.h"
|
||||||
|
|
||||||
void reset(t_scanner *);
|
void reset(t_scanner *scanner);
|
||||||
|
|
||||||
void tree_sitter_sh_external_scanner_deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
|
void actual_reset(\
|
||||||
|
t_scanner *scanner, const t_u8 *buffer, t_usize i, t_u32 *size)
|
||||||
{
|
{
|
||||||
t_usize delim_size;
|
t_heredoc *heredoc;
|
||||||
t_u32 size;
|
t_usize delim_size;
|
||||||
t_u32 heredoc_count;
|
|
||||||
t_heredoc *heredoc;
|
|
||||||
t_usize i;
|
|
||||||
|
|
||||||
if (length == 0)
|
heredoc = NULL;
|
||||||
reset(scanner);
|
if (i < scanner->heredocs.len)
|
||||||
|
heredoc = vec_heredoc_get(&scanner->heredocs, i);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
size = 0;
|
vec_heredoc_push(&scanner->heredocs, heredoc_new());
|
||||||
scanner->last_glob_paren_depth = buffer[size++];
|
heredoc = vec_heredoc_last(&scanner->heredocs);
|
||||||
scanner->ext_was_in_double_quote = buffer[size++];
|
}
|
||||||
scanner->ext_saw_outside_quote = buffer[size++];
|
heredoc->is_raw = buffer[(*size)++];
|
||||||
heredoc_count = (t_u8)buffer[size++];
|
heredoc->started = buffer[(*size)++];
|
||||||
i = 0;
|
heredoc->allows_indent = buffer[(*size)++];
|
||||||
while (i < heredoc_count)
|
mem_copy(&delim_size, &buffer[(*size)], sizeof(t_usize));
|
||||||
{
|
(*size) += sizeof(t_usize);
|
||||||
heredoc = NULL;
|
string_reserve(&heredoc->delimiter, delim_size + 1);
|
||||||
if (i < scanner->heredocs.len)
|
heredoc->delimiter.len = delim_size - 1;
|
||||||
heredoc = vec_heredoc_get(&scanner->heredocs, i);
|
if (delim_size > 0)
|
||||||
else
|
{
|
||||||
{
|
mem_copy(heredoc->delimiter.buf, &buffer[(*size)], delim_size);
|
||||||
vec_heredoc_push(&scanner->heredocs, heredoc_new());
|
(*size) += delim_size;
|
||||||
heredoc = vec_heredoc_last(&scanner->heredocs);
|
|
||||||
}
|
|
||||||
|
|
||||||
heredoc->is_raw = buffer[size++];
|
|
||||||
heredoc->started = buffer[size++];
|
|
||||||
heredoc->allows_indent = buffer[size++];
|
|
||||||
|
|
||||||
mem_copy(&delim_size, &buffer[size], sizeof(t_usize));
|
|
||||||
size += sizeof(t_usize);
|
|
||||||
string_reserve(&heredoc->delimiter, delim_size + 1);
|
|
||||||
heredoc->delimiter.len = delim_size - 1;
|
|
||||||
if (delim_size > 0)
|
|
||||||
{
|
|
||||||
mem_copy(heredoc->delimiter.buf, &buffer[size], delim_size);
|
|
||||||
size += delim_size;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
if (size != length)
|
|
||||||
me_abort("size != length");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tree_sitter_sh_external_scanner_deserialize(t_scanner *scanner,
|
||||||
|
const t_u8 *buffer, t_u32 length)
|
||||||
|
{
|
||||||
|
t_u32 size;
|
||||||
|
t_u32 heredoc_count;
|
||||||
|
t_usize i;
|
||||||
|
|
||||||
|
if (length == 0)
|
||||||
|
return (reset(scanner));
|
||||||
|
size = 0;
|
||||||
|
scanner->last_glob_paren_depth = buffer[size++];
|
||||||
|
scanner->ext_was_in_double_quote = buffer[size++];
|
||||||
|
scanner->ext_saw_outside_quote = buffer[size++];
|
||||||
|
heredoc_count = (t_u8)buffer[size++];
|
||||||
|
i = 0;
|
||||||
|
while (i < heredoc_count)
|
||||||
|
actual_reset(scanner, buffer, i++, &size);
|
||||||
|
if (size != length)
|
||||||
|
me_abort("size != length");
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,42 +6,44 @@
|
||||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||||
/* +#+#+#+#+#+ +#+ */
|
/* +#+#+#+#+#+ +#+ */
|
||||||
/* Created: 2024/09/01 19:33:04 by maiboyer #+# #+# */
|
/* Created: 2024/09/01 19:33:04 by maiboyer #+# #+# */
|
||||||
/* Updated: 2024/09/01 19:55:50 by maiboyer ### ########.fr */
|
/* Updated: 2024/09/01 20:02:49 by maiboyer ### ########.fr */
|
||||||
/* */
|
/* */
|
||||||
/* ************************************************************************** */
|
/* ************************************************************************** */
|
||||||
|
|
||||||
#include "parser/inner/heredoc.h"
|
|
||||||
#include "me/char/char.h"
|
#include "me/char/char.h"
|
||||||
#include "me/str/str.h"
|
#include "me/str/str.h"
|
||||||
#include "me/types.h"
|
#include "me/types.h"
|
||||||
#include "me/vec/vec_heredoc.h"
|
#include "me/vec/vec_heredoc.h"
|
||||||
|
#include "parser/inner/heredoc.h"
|
||||||
#include "parser/inner/scanner.h"
|
#include "parser/inner/scanner.h"
|
||||||
#include "parser/parser.h"
|
#include "parser/parser.h"
|
||||||
|
|
||||||
bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
|
bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
|
||||||
{
|
{
|
||||||
bool found_delimiter;
|
bool found_delimiter;
|
||||||
|
|
||||||
while (me_isspace(lexer->lookahead))
|
while (me_isspace(lexer->lookahead))
|
||||||
lexer->advance(lexer, true);
|
lexer->advance(lexer, true);
|
||||||
lexer->result_symbol = HEREDOC_START;
|
lexer->result_symbol = HEREDOC_START;
|
||||||
heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\';
|
heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"'
|
||||||
|
|| lexer->lookahead == '\\';
|
||||||
found_delimiter = advance_word(lexer, &heredoc->delimiter);
|
found_delimiter = advance_word(lexer, &heredoc->delimiter);
|
||||||
if (!found_delimiter)
|
if (!found_delimiter)
|
||||||
return (string_clear(&heredoc->delimiter), false);
|
return (string_clear(&heredoc->delimiter), false);
|
||||||
return (found_delimiter);
|
return (found_delimiter);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
|
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
|
||||||
{
|
{
|
||||||
t_i32 size;
|
t_i32 size;
|
||||||
|
|
||||||
size = 0;
|
size = 0;
|
||||||
string_clear(&heredoc->current_leading_word);
|
string_clear(&heredoc->current_leading_word);
|
||||||
if (heredoc->delimiter.len > 0)
|
if (heredoc->delimiter.len > 0)
|
||||||
{
|
{
|
||||||
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead &&
|
while (lexer->lookahead != '\0' && lexer->lookahead != '\n'
|
||||||
heredoc->current_leading_word.len < heredoc->delimiter.len)
|
&& (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead
|
||||||
|
&& heredoc->current_leading_word.len < heredoc->delimiter.len)
|
||||||
{
|
{
|
||||||
string_push_char(&heredoc->current_leading_word, lexer->lookahead);
|
string_push_char(&heredoc->current_leading_word, lexer->lookahead);
|
||||||
lexer->advance(lexer, false);
|
lexer->advance(lexer, false);
|
||||||
|
|
@ -50,28 +52,26 @@ bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
|
||||||
}
|
}
|
||||||
if (heredoc->delimiter.len == 0)
|
if (heredoc->delimiter.len == 0)
|
||||||
return (false);
|
return (false);
|
||||||
return (str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf));
|
return (str_compare(heredoc->current_leading_word.buf,
|
||||||
|
heredoc->delimiter.buf));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state);
|
bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state);
|
||||||
bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state);
|
bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state);
|
||||||
bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state);
|
bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state);
|
||||||
bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state);
|
bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state);
|
||||||
bool scan_heredoc_content_other(struct s_heredoc_scan_state *state);
|
bool scan_heredoc_content_other(struct s_heredoc_scan_state *state);
|
||||||
|
|
||||||
bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type)
|
bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer,
|
||||||
|
enum e_token_type middle_type, enum e_token_type end_type)
|
||||||
{
|
{
|
||||||
struct s_heredoc_scan_state state;
|
struct s_heredoc_scan_state state;
|
||||||
bool (*func)(struct s_heredoc_scan_state *state);
|
t_heredoc_content_func func;
|
||||||
|
|
||||||
state.did_advance = false;
|
|
||||||
state.lexer = lexer;
|
|
||||||
state.heredoc = vec_heredoc_last(&scanner->heredocs);
|
|
||||||
state.scanner = scanner;
|
|
||||||
state.middle_type = middle_type;
|
|
||||||
state.end_type = end_type;
|
|
||||||
state.return_value = false;
|
|
||||||
|
|
||||||
|
state = (struct s_heredoc_scan_state){.did_advance = false, \
|
||||||
|
.lexer = lexer, .heredoc = vec_heredoc_last(&scanner->heredocs), \
|
||||||
|
.scanner = scanner, .middle_type = middle_type, .end_type = end_type, \
|
||||||
|
.return_value = false};
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
if (lexer->lookahead == '\0')
|
if (lexer->lookahead == '\0')
|
||||||
|
|
|
||||||
|
|
@ -18,9 +18,9 @@
|
||||||
#include "parser/inner/scanner.h"
|
#include "parser/inner/scanner.h"
|
||||||
#include "parser/parser.h"
|
#include "parser/parser.h"
|
||||||
|
|
||||||
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer);
|
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer);
|
||||||
|
|
||||||
bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state)
|
bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state)
|
||||||
{
|
{
|
||||||
if (state->lexer->eof(state->lexer) && state->did_advance)
|
if (state->lexer->eof(state->lexer) && state->did_advance)
|
||||||
{
|
{
|
||||||
|
|
@ -31,7 +31,7 @@ bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state)
|
||||||
return (state->return_value = false, true);
|
return (state->return_value = false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state)
|
bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state)
|
||||||
{
|
{
|
||||||
state->did_advance = true;
|
state->did_advance = true;
|
||||||
state->lexer->advance(state->lexer, false);
|
state->lexer->advance(state->lexer, false);
|
||||||
|
|
@ -39,7 +39,7 @@ bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state)
|
||||||
return (false);
|
return (false);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state)
|
bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state)
|
||||||
{
|
{
|
||||||
if (state->heredoc->is_raw)
|
if (state->heredoc->is_raw)
|
||||||
{
|
{
|
||||||
|
|
@ -52,10 +52,12 @@ bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state)
|
||||||
state->lexer->result_symbol = state->middle_type;
|
state->lexer->result_symbol = state->middle_type;
|
||||||
state->heredoc->started = true;
|
state->heredoc->started = true;
|
||||||
state->lexer->advance(state->lexer, false);
|
state->lexer->advance(state->lexer, false);
|
||||||
if (me_isalpha(state->lexer->lookahead) || state->lexer->lookahead == '{' || state->lexer->lookahead == '(')
|
if (me_isalpha(state->lexer->lookahead)
|
||||||
|
|| state->lexer->lookahead == '{' || state->lexer->lookahead == '(')
|
||||||
return (state->return_value = true, true);
|
return (state->return_value = true, true);
|
||||||
}
|
}
|
||||||
if (state->middle_type == HEREDOC_BODY_BEGINNING && state->lexer->get_column(state->lexer) == 0)
|
if (state->middle_type == HEREDOC_BODY_BEGINNING
|
||||||
|
&& state->lexer->get_column(state->lexer) == 0)
|
||||||
{
|
{
|
||||||
state->lexer->result_symbol = state->middle_type;
|
state->lexer->result_symbol = state->middle_type;
|
||||||
state->heredoc->started = true;
|
state->heredoc->started = true;
|
||||||
|
|
@ -64,7 +66,7 @@ bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state)
|
||||||
return (state->return_value = false, true);
|
return (state->return_value = false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state)
|
bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state)
|
||||||
{
|
{
|
||||||
if (!state->did_advance)
|
if (!state->did_advance)
|
||||||
state->lexer->advance(state->lexer, true);
|
state->lexer->advance(state->lexer, true);
|
||||||
|
|
@ -89,7 +91,7 @@ bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state)
|
||||||
return (false);
|
return (false);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool scan_heredoc_content_other(struct s_heredoc_scan_state *state)
|
bool scan_heredoc_content_other(struct s_heredoc_scan_state *state)
|
||||||
{
|
{
|
||||||
if (state->lexer->get_column(state->lexer) == 0)
|
if (state->lexer->get_column(state->lexer) == 0)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -16,12 +16,14 @@
|
||||||
#include "parser/inner/scanner.h"
|
#include "parser/inner/scanner.h"
|
||||||
#include "parser/parser.h"
|
#include "parser/parser.h"
|
||||||
|
|
||||||
t_error serialize_heredocs(t_scanner *scanner, t_u8 *buffer, t_u32 *size, t_usize i)
|
t_error serialize_heredocs(t_scanner *scanner, t_u8 *buffer, t_u32 *size,
|
||||||
|
t_usize i)
|
||||||
{
|
{
|
||||||
t_heredoc *heredoc;
|
t_heredoc *heredoc;
|
||||||
|
|
||||||
heredoc = vec_heredoc_get(&scanner->heredocs, i);
|
heredoc = vec_heredoc_get(&scanner->heredocs, i);
|
||||||
if (heredoc->delimiter.len + 1 + sizeof(t_usize) + (*size) >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
|
if (heredoc->delimiter.len + 1 + sizeof(t_usize)
|
||||||
|
+ (*size) >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
|
||||||
return (ERROR);
|
return (ERROR);
|
||||||
buffer[(*size)++] = (char)heredoc->is_raw;
|
buffer[(*size)++] = (char)heredoc->is_raw;
|
||||||
buffer[(*size)++] = (char)heredoc->started;
|
buffer[(*size)++] = (char)heredoc->started;
|
||||||
|
|
@ -31,17 +33,19 @@ t_error serialize_heredocs(t_scanner *scanner, t_u8 *buffer, t_u32 *size, t_usiz
|
||||||
size += sizeof(t_usize);
|
size += sizeof(t_usize);
|
||||||
if (heredoc->delimiter.len > 0)
|
if (heredoc->delimiter.len > 0)
|
||||||
{
|
{
|
||||||
mem_copy(&buffer[(*size)], heredoc->delimiter.buf, heredoc->delimiter.len);
|
mem_copy(&buffer[(*size)], heredoc->delimiter.buf,
|
||||||
|
heredoc->delimiter.len);
|
||||||
(*size) += heredoc->delimiter.len;
|
(*size) += heredoc->delimiter.len;
|
||||||
}
|
}
|
||||||
heredoc->delimiter.len--;
|
heredoc->delimiter.len--;
|
||||||
return (NO_ERROR);
|
return (NO_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
t_u32 tree_sitter_sh_external_scanner_serialize(t_scanner *scanner, t_u8 *buffer)
|
t_u32 tree_sitter_sh_external_scanner_serialize(t_scanner *scanner,
|
||||||
|
t_u8 *buffer)
|
||||||
{
|
{
|
||||||
t_u32 size;
|
t_u32 size;
|
||||||
t_usize i;
|
t_usize i;
|
||||||
|
|
||||||
size = 0;
|
size = 0;
|
||||||
buffer[size++] = (char)scanner->last_glob_paren_depth;
|
buffer[size++] = (char)scanner->last_glob_paren_depth;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue