norm: Adding all scanner on norm (just started but a good start)

This commit is contained in:
Raphaël 2024-09-10 15:51:55 +02:00
parent 163db2241f
commit 800c9b0a50

View file

@ -1,3 +1,15 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* scanner.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/09/10 15:41:11 by rparodi #+# #+# */
/* Updated: 2024/09/10 15:51:28 by rparodi ### ########.fr */
/* */
/* ************************************************************************** */
#include "me/char/char.h" #include "me/char/char.h"
#include "me/mem/mem.h" #include "me/mem/mem.h"
#include "me/str/str.h" #include "me/str/str.h"
@ -8,8 +20,8 @@
#include "parser/lexer.h" #include "parser/lexer.h"
#include "parser/parser.h" #include "parser/parser.h"
typedef struct s_heredoc t_heredoc; typedef struct s_heredoc t_heredoc;
typedef struct s_scanner t_scanner; typedef struct s_scanner t_scanner;
enum e_token_type enum e_token_type
{ {
@ -37,31 +49,32 @@ enum e_token_type
struct s_scanner struct s_scanner
{ {
t_u8 last_glob_paren_depth; t_u8 last_glob_paren_depth;
bool ext_was_in_double_quote; bool ext_was_in_double_quote;
bool ext_saw_outside_quote; bool ext_saw_outside_quote;
t_vec_heredoc heredocs; t_vec_heredoc heredocs;
}; };
bool in_error_recovery(const bool *valid_symbols) bool in_error_recovery(const bool *valid_symbols)
{ {
return (valid_symbols[ERROR_RECOVERY]); return (valid_symbols[ERROR_RECOVERY]);
} }
void reset(t_scanner *scanner) void reset(t_scanner *scanner)
{ {
t_u32 i; t_u32 i;
i = 0; i = 0;
while (i < scanner->heredocs.len) while (i < scanner->heredocs.len)
reset_heredoc(vec_heredoc_get(&scanner->heredocs, i++)); reset_heredoc(vec_heredoc_get(&scanner->heredocs, i++));
} }
t_u32 serialize(t_scanner *scanner, t_u8 *buffer) t_u32 serialize(t_scanner *scanner, t_u8 *buffer)
{ {
t_u32 size; t_u32 size;
t_usize delimiter_size; t_usize delimiter_size;
t_usize i; t_usize i;
t_heredoc *heredoc; t_heredoc *heredoc;
i = 0; i = 0;
size = 0; size = 0;
@ -69,23 +82,22 @@ t_u32 serialize(t_scanner *scanner, t_u8 *buffer)
buffer[size++] = (char)scanner->ext_was_in_double_quote; buffer[size++] = (char)scanner->ext_was_in_double_quote;
buffer[size++] = (char)scanner->ext_saw_outside_quote; buffer[size++] = (char)scanner->ext_saw_outside_quote;
buffer[size++] = (char)scanner->heredocs.len; buffer[size++] = (char)scanner->heredocs.len;
while (i < scanner->heredocs.len) while (i < scanner->heredocs.len)
{ {
heredoc = vec_heredoc_get(&scanner->heredocs, i); heredoc = vec_heredoc_get(&scanner->heredocs, i);
if (heredoc->delimiter.len + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) if (heredoc->delimiter.len + 3 + size >= \
TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
return (0); return (0);
buffer[size++] = (char)heredoc->is_raw; buffer[size++] = (char)heredoc->is_raw;
buffer[size++] = (char)heredoc->started; buffer[size++] = (char)heredoc->started;
buffer[size++] = (char)heredoc->allows_indent; buffer[size++] = (char)heredoc->allows_indent;
delimiter_size = heredoc->delimiter.len; delimiter_size = heredoc->delimiter.len;
mem_copy(&buffer[size], &delimiter_size, sizeof(t_usize)); mem_copy(&buffer[size], &delimiter_size, sizeof(t_usize));
size += sizeof(t_usize); size += sizeof(t_usize);
if (heredoc->delimiter.len > 0) if (heredoc->delimiter.len > 0)
{ {
mem_copy(&buffer[size], heredoc->delimiter.buf, heredoc->delimiter.len); mem_copy(&buffer[size], heredoc->delimiter.buf, \
heredoc->delimiter.len);
size += heredoc->delimiter.len; size += heredoc->delimiter.len;
} }
i++; i++;
@ -93,14 +105,14 @@ t_u32 serialize(t_scanner *scanner, t_u8 *buffer)
return (size); return (size);
} }
void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length) void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
{ {
t_u32 size; t_u32 size;
t_u32 heredoc_count; t_u32 heredoc_count;
t_usize i; t_usize i;
t_usize delimiter_size; t_usize delimiter_size;
t_heredoc *heredoc; t_heredoc *heredoc;
t_heredoc new_heredoc; t_heredoc new_heredoc;
if (length == 0) if (length == 0)
reset(scanner); reset(scanner);
@ -124,19 +136,17 @@ void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
vec_heredoc_push(&scanner->heredocs, new_heredoc); vec_heredoc_push(&scanner->heredocs, new_heredoc);
heredoc = vec_heredoc_last(&scanner->heredocs); heredoc = vec_heredoc_last(&scanner->heredocs);
} }
heredoc->is_raw = buffer[size++]; heredoc->is_raw = buffer[size++];
heredoc->started = buffer[size++]; heredoc->started = buffer[size++];
heredoc->allows_indent = buffer[size++]; heredoc->allows_indent = buffer[size++];
mem_copy(&delimiter_size, &buffer[size], sizeof(t_usize)); mem_copy(&delimiter_size, &buffer[size], sizeof(t_usize));
size += sizeof(t_usize); size += sizeof(t_usize);
heredoc->delimiter.len = delimiter_size; heredoc->delimiter.len = delimiter_size;
string_reserve(&heredoc->delimiter, heredoc->delimiter.len); string_reserve(&heredoc->delimiter, heredoc->delimiter.len);
if (heredoc->delimiter.len > 0) if (heredoc->delimiter.len > 0)
{ {
mem_copy(heredoc->delimiter.buf, &buffer[size], heredoc->delimiter.len); mem_copy(heredoc->delimiter.buf, &buffer[size], \
heredoc->delimiter.len);
size += heredoc->delimiter.len; size += heredoc->delimiter.len;
} }
i++; i++;
@ -153,80 +163,82 @@ void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
* POSIX-mandated substitution, and assumes the default value for * POSIX-mandated substitution, and assumes the default value for
* IFS. * IFS.
*/ */
bool advance_word(t_lexer *lexer, t_string *unquoted_word) bool advance_word(t_lexer *lexer, t_string *unquoted_word)
{ {
bool empty = true; bool empty;
t_i32 quote = 0; t_i32 quote;
empty = true;
quote = 0;
if (lexer->data.lookahead == '\'' || lexer->data.lookahead == '"') if (lexer->data.lookahead == '\'' || lexer->data.lookahead == '"')
{ {
quote = lexer->data.lookahead; quote = lexer->data.lookahead;
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
} }
while (lexer->data.lookahead && \
while (lexer->data.lookahead && !(quote ? lexer->data.lookahead == quote || lexer->data.lookahead == '\r' \
!(quote ? lexer->data.lookahead == quote || lexer->data.lookahead == '\r' || lexer->data.lookahead == '\n' || lexer->data.lookahead == '\n' : me_isspace(lexer->data.lookahead)))
: me_isspace(lexer->data.lookahead)))
{ {
if (lexer->data.lookahead == '\\') if (lexer->data.lookahead == '\\')
{ {
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
if (!lexer->data.lookahead) if (!lexer->data.lookahead)
return false; return (false);
} }
empty = false; empty = false;
string_push_char(unquoted_word, lexer->data.lookahead); string_push_char(unquoted_word, lexer->data.lookahead);
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
} }
string_push_char(unquoted_word, '\0'); string_push_char(unquoted_word, '\0');
if (quote && lexer->data.lookahead == quote) if (quote && lexer->data.lookahead == quote)
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
return (!empty);
return !empty;
} }
bool scan_bare_dollar(t_lexer *lexer) bool scan_bare_dollar(t_lexer *lexer)
{ {
while (me_isspace(lexer->data.lookahead) && lexer->data.lookahead != '\n' && !lexer->data.eof((void *)lexer)) while (me_isspace(lexer->data.lookahead) && \
lexer->data.lookahead != '\n' && !lexer->data.eof((void *)lexer))
lexer->data.advance((void *)lexer, true); lexer->data.advance((void *)lexer, true);
if (lexer->data.lookahead == '$') if (lexer->data.lookahead == '$')
{ {
lexer->data.advance((void *)lexer, false); lexer->data.advance((void *)lexer, false);
lexer->data.result_symbol = BARE_DOLLAR; lexer->data.result_symbol = BARE_DOLLAR;
lexer->data.mark_end((void *)lexer); lexer->data.mark_end((void *)lexer);
return (me_isspace(lexer->data.lookahead) || lexer->data.eof((void *)lexer) || lexer->data.lookahead == '\"'); return (me_isspace(lexer->data.lookahead) || \
lexer->data.eof((void *)lexer) || lexer->data.lookahead == '\"');
} }
return (false);
return false;
} }
bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer) bool scan_heredoc_start(t_heredoc *heredoc, t_lexer *lexer)
{ {
bool found_delimiter;
found_delimiter = advance_word(lexer, &heredoc->delimiter);
while (me_isspace(lexer->data.lookahead)) while (me_isspace(lexer->data.lookahead))
{ {
lexer->data.advance((void *)lexer, true); lexer->data.advance((void *)lexer, true);
} }
lexer->data.result_symbol = HEREDOC_START; lexer->data.result_symbol = HEREDOC_START;
heredoc->is_raw = lexer->data.lookahead == '\'' || lexer->data.lookahead == '"' || lexer->data.lookahead == '\\'; heredoc->is_raw = lexer->data.lookahead == '\'' || \
lexer->data.lookahead == '"' || lexer->data.lookahead == '\\';
bool found_delimiter = advance_word(lexer, &heredoc->delimiter);
if (!found_delimiter) if (!found_delimiter)
{ {
string_clear(&heredoc->delimiter); string_clear(&heredoc->delimiter);
return false; return false;
} }
return found_delimiter; return (found_delimiter);
} }
bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer) bool scan_heredoc_end_identifier(t_heredoc *heredoc, t_lexer *lexer)
{ {
t_i32 size;
size = 0;
string_clear(&heredoc->current_leading_word); string_clear(&heredoc->current_leading_word);
// Scan the first 'n' characters on this line, to see if they match the // Scan the first 'n' characters on this line, to see if they match the
// heredoc delimiter // heredoc delimiter
t_i32 size = 0;
if (heredoc->delimiter.len > 0) if (heredoc->delimiter.len > 0)
{ {
while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' && while (lexer->data.lookahead != '\0' && lexer->data.lookahead != '\n' &&