Updated scanner to use less dumb stuff
This commit is contained in:
parent
a77ced5fb7
commit
a8b09ab55a
2 changed files with 168 additions and 180 deletions
50
parser/include/parser/_inner/heredoc_type.h
Normal file
50
parser/include/parser/_inner/heredoc_type.h
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
/* ************************************************************************** */
|
||||||
|
/* */
|
||||||
|
/* ::: :::::::: */
|
||||||
|
/* heredoc_type.h :+: :+: :+: */
|
||||||
|
/* +:+ +:+ +:+ */
|
||||||
|
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||||
|
/* +#+#+#+#+#+ +#+ */
|
||||||
|
/* Created: 2024/09/01 15:06:56 by maiboyer #+# #+# */
|
||||||
|
/* Updated: 2024/09/01 15:08:47 by maiboyer ### ########.fr */
|
||||||
|
/* */
|
||||||
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
#ifndef HEREDOC_TYPE_H
|
||||||
|
#define HEREDOC_TYPE_H
|
||||||
|
|
||||||
|
#include "me/string/string.h"
|
||||||
|
#include "me/types.h"
|
||||||
|
|
||||||
|
typedef struct s_heredoc t_heredoc;
|
||||||
|
|
||||||
|
struct s_heredoc
|
||||||
|
{
|
||||||
|
bool is_raw;
|
||||||
|
bool started;
|
||||||
|
bool allows_indent;
|
||||||
|
t_string delimiter;
|
||||||
|
t_string current_leading_word;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline t_heredoc heredoc_new(void)
|
||||||
|
{
|
||||||
|
return ((t_heredoc){
|
||||||
|
.is_raw = false,
|
||||||
|
.started = false,
|
||||||
|
.allows_indent = false,
|
||||||
|
.delimiter = string_new(0),
|
||||||
|
.current_leading_word = string_new(0),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void reset_heredoc(t_heredoc *heredoc)
|
||||||
|
{
|
||||||
|
heredoc->is_raw = false;
|
||||||
|
heredoc->started = false;
|
||||||
|
heredoc->allows_indent = false;
|
||||||
|
string_clear(&heredoc->delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* HEREDOC_TYPE_H */
|
||||||
|
|
@ -6,18 +6,22 @@
|
||||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||||
/* +#+#+#+#+#+ +#+ */
|
/* +#+#+#+#+#+ +#+ */
|
||||||
/* Created: 2024/09/01 14:17:17 by maiboyer #+# #+# */
|
/* Created: 2024/09/01 14:17:17 by maiboyer #+# #+# */
|
||||||
/* Updated: 2024/09/01 14:22:35 by maiboyer ### ########.fr */
|
/* Updated: 2024/09/01 15:11:51 by maiboyer ### ########.fr */
|
||||||
/* */
|
/* */
|
||||||
/* ************************************************************************** */
|
/* ************************************************************************** */
|
||||||
|
|
||||||
#include "me/char/char.h"
|
#include "me/char/char.h"
|
||||||
|
#include "me/str/str.h"
|
||||||
|
#include "me/string/string.h"
|
||||||
#include "me/types.h"
|
#include "me/types.h"
|
||||||
|
#include "parser/_inner/heredoc_type.h"
|
||||||
#include "parser/array.h"
|
#include "parser/array.h"
|
||||||
#include "parser/parser.h"
|
#include "parser/parser.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
enum TokenType
|
enum e_token_type
|
||||||
{
|
{
|
||||||
HEREDOC_START,
|
HEREDOC_START,
|
||||||
SIMPLE_HEREDOC_BODY,
|
SIMPLE_HEREDOC_BODY,
|
||||||
|
|
@ -41,18 +45,8 @@ enum TokenType
|
||||||
ERROR_RECOVERY,
|
ERROR_RECOVERY,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef Array(char) String;
|
|
||||||
|
|
||||||
typedef struct s_heredoc t_heredoc;
|
|
||||||
struct s_heredoc
|
|
||||||
{
|
|
||||||
bool is_raw;
|
|
||||||
bool started;
|
|
||||||
bool allows_indent;
|
|
||||||
String delimiter;
|
|
||||||
String current_leading_word;
|
|
||||||
};
|
|
||||||
typedef struct s_scanner t_scanner;
|
typedef struct s_scanner t_scanner;
|
||||||
|
|
||||||
struct s_scanner
|
struct s_scanner
|
||||||
{
|
{
|
||||||
t_u8 last_glob_paren_depth;
|
t_u8 last_glob_paren_depth;
|
||||||
|
|
@ -61,51 +55,31 @@ struct s_scanner
|
||||||
Array(t_heredoc) heredocs;
|
Array(t_heredoc) heredocs;
|
||||||
};
|
};
|
||||||
|
|
||||||
t_heredoc heredoc_new(void)
|
void advance(TSLexer *lexer)
|
||||||
{
|
|
||||||
return ((t_heredoc){
|
|
||||||
.is_raw = false, .started = false, .allows_indent = false, .delimiter = array_new(), .current_leading_word = array_new()});
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void advance(TSLexer *lexer)
|
|
||||||
{
|
{
|
||||||
lexer->advance(lexer, false);
|
lexer->advance(lexer, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void skip(TSLexer *lexer)
|
void skip(TSLexer *lexer)
|
||||||
{
|
{
|
||||||
lexer->advance(lexer, true);
|
lexer->advance(lexer, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool in_error_recovery(const bool *valid_symbols)
|
bool in_error_recovery(const bool *valid_symbols)
|
||||||
{
|
{
|
||||||
return valid_symbols[ERROR_RECOVERY];
|
return valid_symbols[ERROR_RECOVERY];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void reset_string(String *string)
|
void reset(t_scanner *scanner)
|
||||||
{
|
{
|
||||||
if (string->size > 0)
|
t_usize i;
|
||||||
{
|
|
||||||
memset(string->contents, 0, string->size);
|
i = 0;
|
||||||
array_clear(string);
|
while (i < scanner->heredocs.size)
|
||||||
}
|
reset_heredoc(array_get(&scanner->heredocs, i++));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void reset_heredoc(t_heredoc *heredoc)
|
t_u32 serialize(t_scanner *scanner, t_u8 *buffer)
|
||||||
{
|
|
||||||
heredoc->is_raw = false;
|
|
||||||
heredoc->started = false;
|
|
||||||
heredoc->allows_indent = false;
|
|
||||||
reset_string(&heredoc->delimiter);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void reset(t_scanner *scanner)
|
|
||||||
{
|
|
||||||
for (t_u32 i = 0; i < scanner->heredocs.size; i++)
|
|
||||||
reset_heredoc(array_get(&scanner->heredocs, i));
|
|
||||||
}
|
|
||||||
|
|
||||||
static t_u32 serialize(t_scanner *scanner, t_u8 *buffer)
|
|
||||||
{
|
{
|
||||||
t_u32 size;
|
t_u32 size;
|
||||||
t_usize i;
|
t_usize i;
|
||||||
|
|
@ -116,52 +90,55 @@ static t_u32 serialize(t_scanner *scanner, t_u8 *buffer)
|
||||||
buffer[size++] = (char)scanner->ext_was_in_double_quote;
|
buffer[size++] = (char)scanner->ext_was_in_double_quote;
|
||||||
buffer[size++] = (char)scanner->ext_saw_outside_quote;
|
buffer[size++] = (char)scanner->ext_saw_outside_quote;
|
||||||
buffer[size++] = (char)scanner->heredocs.size;
|
buffer[size++] = (char)scanner->heredocs.size;
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
while (i < scanner->heredocs.size)
|
while (i < scanner->heredocs.size)
|
||||||
{
|
{
|
||||||
heredoc = array_get(&scanner->heredocs, i);
|
heredoc = array_get(&scanner->heredocs, i);
|
||||||
if (heredoc->delimiter.size + 3 + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
|
if (heredoc->delimiter.len + 1 + sizeof(t_usize) + size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
buffer[size++] = (char)heredoc->is_raw;
|
buffer[size++] = (char)heredoc->is_raw;
|
||||||
buffer[size++] = (char)heredoc->started;
|
buffer[size++] = (char)heredoc->started;
|
||||||
buffer[size++] = (char)heredoc->allows_indent;
|
buffer[size++] = (char)heredoc->allows_indent;
|
||||||
|
heredoc->delimiter.len++;
|
||||||
mem_copy(&buffer[size], &heredoc->delimiter.size, sizeof(t_u32));
|
mem_copy(&buffer[size], &heredoc->delimiter.len, sizeof(t_usize));
|
||||||
size += sizeof(t_u32);
|
size += sizeof(t_usize);
|
||||||
if (heredoc->delimiter.size > 0)
|
if (heredoc->delimiter.len > 0)
|
||||||
{
|
{
|
||||||
mem_copy(&buffer[size], heredoc->delimiter.contents, heredoc->delimiter.size);
|
mem_copy(&buffer[size], heredoc->delimiter.buf, heredoc->delimiter.len);
|
||||||
size += heredoc->delimiter.size;
|
size += heredoc->delimiter.len;
|
||||||
}
|
}
|
||||||
|
heredoc->delimiter.len--;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
return size;
|
return (size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
|
void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
|
||||||
{
|
{
|
||||||
|
t_usize delim_size;
|
||||||
|
t_u32 size;
|
||||||
|
t_u32 heredoc_count;
|
||||||
|
t_heredoc *heredoc;
|
||||||
|
t_usize i;
|
||||||
|
|
||||||
if (length == 0)
|
if (length == 0)
|
||||||
reset(scanner);
|
reset(scanner);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
t_u32 size = 0;
|
size = 0;
|
||||||
scanner->last_glob_paren_depth = buffer[size++];
|
scanner->last_glob_paren_depth = buffer[size++];
|
||||||
scanner->ext_was_in_double_quote = buffer[size++];
|
scanner->ext_was_in_double_quote = buffer[size++];
|
||||||
scanner->ext_saw_outside_quote = buffer[size++];
|
scanner->ext_saw_outside_quote = buffer[size++];
|
||||||
t_u32 heredoc_count = (t_u8)buffer[size++];
|
heredoc_count = (t_u8)buffer[size++];
|
||||||
for (t_u32 i = 0; i < heredoc_count; i++)
|
i = 0;
|
||||||
|
while (i < heredoc_count)
|
||||||
{
|
{
|
||||||
t_heredoc *heredoc = NULL;
|
heredoc = NULL;
|
||||||
if (i < scanner->heredocs.size)
|
if (i < scanner->heredocs.size)
|
||||||
{
|
|
||||||
heredoc = array_get(&scanner->heredocs, i);
|
heredoc = array_get(&scanner->heredocs, i);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
t_heredoc new_heredoc = heredoc_new();
|
array_push(&scanner->heredocs, heredoc_new());
|
||||||
array_push(&scanner->heredocs, new_heredoc);
|
|
||||||
heredoc = array_back(&scanner->heredocs);
|
heredoc = array_back(&scanner->heredocs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -169,17 +146,19 @@ static void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
|
||||||
heredoc->started = buffer[size++];
|
heredoc->started = buffer[size++];
|
||||||
heredoc->allows_indent = buffer[size++];
|
heredoc->allows_indent = buffer[size++];
|
||||||
|
|
||||||
mem_copy(&heredoc->delimiter.size, &buffer[size], sizeof(t_u32));
|
mem_copy(&delim_size, &buffer[size], sizeof(t_usize));
|
||||||
size += sizeof(t_u32);
|
size += sizeof(t_usize);
|
||||||
array_reserve(&heredoc->delimiter, heredoc->delimiter.size);
|
string_reserve(&heredoc->delimiter, delim_size + 1);
|
||||||
|
heredoc->delimiter.len = delim_size - 1;
|
||||||
if (heredoc->delimiter.size > 0)
|
if (delim_size > 0)
|
||||||
{
|
{
|
||||||
mem_copy(heredoc->delimiter.contents, &buffer[size], heredoc->delimiter.size);
|
mem_copy(heredoc->delimiter.buf, &buffer[size], delim_size);
|
||||||
size += heredoc->delimiter.size;
|
size += delim_size;
|
||||||
}
|
}
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
assert(size == length);
|
if (size != length)
|
||||||
|
me_abort("size != length");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -190,43 +169,40 @@ static void deserialize(t_scanner *scanner, const t_u8 *buffer, t_u32 length)
|
||||||
* POSIX-mandated substitution, and assumes the default value for
|
* POSIX-mandated substitution, and assumes the default value for
|
||||||
* IFS.
|
* IFS.
|
||||||
*/
|
*/
|
||||||
static bool advance_word(TSLexer *lexer, String *unquoted_word)
|
bool advance_word(TSLexer *lexer, t_string *unquoted_word)
|
||||||
{
|
{
|
||||||
bool empty = true;
|
bool empty;
|
||||||
t_i32 quote = 0;
|
t_i32 quote;
|
||||||
|
|
||||||
|
empty = true;
|
||||||
|
quote = 0;
|
||||||
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
|
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
|
||||||
{
|
{
|
||||||
quote = lexer->lookahead;
|
quote = lexer->lookahead;
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
}
|
}
|
||||||
|
while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) ||
|
||||||
while (lexer->lookahead &&
|
(!quote && (me_isspace(lexer->lookahead)))))
|
||||||
!(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n' : me_isspace(lexer->lookahead)))
|
|
||||||
{
|
{
|
||||||
if (lexer->lookahead == '\\')
|
if (lexer->lookahead == '\\')
|
||||||
{
|
{
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
if (!lexer->lookahead)
|
if (!lexer->lookahead)
|
||||||
return false;
|
return (false);
|
||||||
}
|
}
|
||||||
empty = false;
|
empty = false;
|
||||||
array_push(unquoted_word, lexer->lookahead);
|
string_push_char(unquoted_word, lexer->lookahead);
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
}
|
}
|
||||||
array_push(unquoted_word, '\0');
|
|
||||||
|
|
||||||
if (quote && lexer->lookahead == quote)
|
if (quote && lexer->lookahead == quote)
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
|
return (!empty);
|
||||||
return !empty;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool scan_bare_dollar(TSLexer *lexer)
|
bool scan_bare_dollar(TSLexer *lexer)
|
||||||
{
|
{
|
||||||
while (me_isspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer))
|
while (me_isspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer))
|
||||||
skip(lexer);
|
skip(lexer);
|
||||||
|
|
||||||
if (lexer->lookahead == '$')
|
if (lexer->lookahead == '$')
|
||||||
{
|
{
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
|
|
@ -234,81 +210,71 @@ static inline bool scan_bare_dollar(TSLexer *lexer)
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
return (me_isspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"');
|
return (me_isspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"');
|
||||||
}
|
}
|
||||||
|
return (false);
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
|
bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
|
||||||
{
|
{
|
||||||
|
bool found_delimiter;
|
||||||
|
|
||||||
while (me_isspace(lexer->lookahead))
|
while (me_isspace(lexer->lookahead))
|
||||||
{
|
|
||||||
skip(lexer);
|
skip(lexer);
|
||||||
}
|
|
||||||
|
|
||||||
lexer->result_symbol = HEREDOC_START;
|
lexer->result_symbol = HEREDOC_START;
|
||||||
heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\';
|
heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\';
|
||||||
|
found_delimiter = advance_word(lexer, &heredoc->delimiter);
|
||||||
bool found_delimiter = advance_word(lexer, &heredoc->delimiter);
|
|
||||||
if (!found_delimiter)
|
if (!found_delimiter)
|
||||||
{
|
return (string_clear(&heredoc->delimiter), false);
|
||||||
reset_string(&heredoc->delimiter);
|
return (found_delimiter);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return found_delimiter;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
|
bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
|
||||||
{
|
{
|
||||||
t_i32 size;
|
t_i32 size;
|
||||||
|
|
||||||
size = 0;
|
size = 0;
|
||||||
reset_string(&heredoc->current_leading_word);
|
string_clear(&heredoc->current_leading_word);
|
||||||
if (heredoc->delimiter.size > 0)
|
if (heredoc->delimiter.len > 0)
|
||||||
{
|
{
|
||||||
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)*array_get(&heredoc->delimiter, size) == lexer->lookahead &&
|
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead &&
|
||||||
heredoc->current_leading_word.size < heredoc->delimiter.size)
|
heredoc->current_leading_word.len < heredoc->delimiter.len)
|
||||||
{
|
{
|
||||||
array_push(&heredoc->current_leading_word, lexer->lookahead);
|
string_push_char(&heredoc->current_leading_word, lexer->lookahead);
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
size++;
|
size++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
array_push(&heredoc->current_leading_word, '\0');
|
return heredoc->delimiter.len == 0 ? false : str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf);
|
||||||
return heredoc->delimiter.size == 0 ? false : strcmp(heredoc->current_leading_word.contents, heredoc->delimiter.contents) == 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum TokenType middle_type, enum TokenType end_type)
|
bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type)
|
||||||
{
|
{
|
||||||
bool did_advance = false;
|
bool did_advance = false;
|
||||||
t_heredoc *heredoc = array_back(&scanner->heredocs);
|
t_heredoc *heredoc = array_back(&scanner->heredocs);
|
||||||
|
|
||||||
for (;;)
|
while (true)
|
||||||
{
|
{
|
||||||
switch (lexer->lookahead)
|
if (lexer->lookahead == '\0')
|
||||||
{
|
{
|
||||||
case '\0': {
|
|
||||||
if (lexer->eof(lexer) && did_advance)
|
if (lexer->eof(lexer) && did_advance)
|
||||||
{
|
{
|
||||||
reset_heredoc(heredoc);
|
reset_heredoc(heredoc);
|
||||||
lexer->result_symbol = end_type;
|
lexer->result_symbol = end_type;
|
||||||
return true;
|
return (true);
|
||||||
}
|
}
|
||||||
return false;
|
return (false);
|
||||||
}
|
}
|
||||||
|
else if (lexer->lookahead == '\\')
|
||||||
case '\\': {
|
{
|
||||||
did_advance = true;
|
did_advance = true;
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
else if (lexer->lookahead == '$')
|
||||||
case '$': {
|
{
|
||||||
if (heredoc->is_raw)
|
if (heredoc->is_raw)
|
||||||
{
|
{
|
||||||
did_advance = true;
|
did_advance = true;
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if (did_advance)
|
if (did_advance)
|
||||||
{
|
{
|
||||||
|
|
@ -317,11 +283,8 @@ static bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum TokenT
|
||||||
heredoc->started = true;
|
heredoc->started = true;
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
if (me_isalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(')
|
if (me_isalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(')
|
||||||
{
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0)
|
if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0)
|
||||||
{
|
{
|
||||||
lexer->result_symbol = middle_type;
|
lexer->result_symbol = middle_type;
|
||||||
|
|
@ -330,8 +293,8 @@ static bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum TokenT
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
else if (lexer->lookahead == '\n')
|
||||||
case '\n': {
|
{
|
||||||
if (!did_advance)
|
if (!did_advance)
|
||||||
{
|
{
|
||||||
skip(lexer);
|
skip(lexer);
|
||||||
|
|
@ -346,62 +309,50 @@ static bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum TokenT
|
||||||
while (me_isspace(lexer->lookahead))
|
while (me_isspace(lexer->lookahead))
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
}
|
}
|
||||||
lexer->result_symbol = heredoc->started ? middle_type : end_type;
|
lexer->result_symbol = end_type;
|
||||||
|
if (heredoc->started)
|
||||||
|
lexer->result_symbol = middle_type;
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
if (scan_heredoc_end_identifier(heredoc, lexer))
|
if (scan_heredoc_end_identifier(heredoc, lexer))
|
||||||
{
|
{
|
||||||
if (lexer->result_symbol == HEREDOC_END)
|
if (lexer->result_symbol == HEREDOC_END)
|
||||||
{
|
|
||||||
(void)array_pop(&scanner->heredocs);
|
(void)array_pop(&scanner->heredocs);
|
||||||
|
return (true);
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default: {
|
|
||||||
if (lexer->get_column(lexer) == 0)
|
|
||||||
{
|
|
||||||
// an alternative is to check the starting column of the
|
|
||||||
// heredoc body and track that statefully
|
|
||||||
while (me_isspace(lexer->lookahead))
|
|
||||||
{
|
|
||||||
if (did_advance)
|
|
||||||
{
|
|
||||||
advance(lexer);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (lexer->get_column(lexer) == 0)
|
||||||
|
{
|
||||||
|
while (me_isspace(lexer->lookahead))
|
||||||
|
{
|
||||||
|
if (did_advance)
|
||||||
|
advance(lexer);
|
||||||
|
else
|
||||||
skip(lexer);
|
skip(lexer);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (end_type != SIMPLE_HEREDOC_BODY)
|
if (end_type != SIMPLE_HEREDOC_BODY)
|
||||||
{
|
{
|
||||||
lexer->result_symbol = middle_type;
|
lexer->result_symbol = middle_type;
|
||||||
if (scan_heredoc_end_identifier(heredoc, lexer))
|
if (scan_heredoc_end_identifier(heredoc, lexer))
|
||||||
{
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (end_type == SIMPLE_HEREDOC_BODY)
|
if (end_type == SIMPLE_HEREDOC_BODY)
|
||||||
{
|
{
|
||||||
lexer->result_symbol = end_type;
|
lexer->result_symbol = end_type;
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
if (scan_heredoc_end_identifier(heredoc, lexer))
|
if (scan_heredoc_end_identifier(heredoc, lexer))
|
||||||
{
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
did_advance = true;
|
did_advance = true;
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return (false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
|
bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
|
||||||
{
|
{
|
||||||
if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols))
|
if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols))
|
||||||
{
|
{
|
||||||
|
|
@ -827,9 +778,7 @@ extglob_pattern:
|
||||||
{
|
{
|
||||||
// first skip ws, then check for ? * + @ !
|
// first skip ws, then check for ? * + @ !
|
||||||
while (me_isspace(lexer->lookahead))
|
while (me_isspace(lexer->lookahead))
|
||||||
{
|
|
||||||
skip(lexer);
|
skip(lexer);
|
||||||
}
|
|
||||||
|
|
||||||
if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' ||
|
if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' ||
|
||||||
lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' ||
|
lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' ||
|
||||||
|
|
@ -839,14 +788,10 @@ extglob_pattern:
|
||||||
{
|
{
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
if ((me_isspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n')
|
if ((me_isspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n')
|
||||||
{
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
|
if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
|
||||||
{
|
{
|
||||||
|
|
@ -878,18 +823,14 @@ extglob_pattern:
|
||||||
{
|
{
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
if (me_isspace(lexer->lookahead))
|
if (me_isspace(lexer->lookahead))
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// -\w is just a word, find something else special
|
// -\w is just a word, find something else special
|
||||||
if (lexer->lookahead == '-')
|
if (lexer->lookahead == '-')
|
||||||
|
|
@ -897,14 +838,10 @@ extglob_pattern:
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
while (me_isalnum(lexer->lookahead))
|
while (me_isalnum(lexer->lookahead))
|
||||||
{
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
}
|
|
||||||
|
|
||||||
if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.')
|
if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.')
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -950,9 +887,7 @@ extglob_pattern:
|
||||||
if (!me_isalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && lexer->lookahead != '[' &&
|
if (!me_isalnum(lexer->lookahead) && lexer->lookahead != '(' && lexer->lookahead != '"' && lexer->lookahead != '[' &&
|
||||||
lexer->lookahead != '?' && lexer->lookahead != '/' && lexer->lookahead != '\\' && lexer->lookahead != '_' &&
|
lexer->lookahead != '?' && lexer->lookahead != '/' && lexer->lookahead != '\\' && lexer->lookahead != '_' &&
|
||||||
lexer->lookahead != '*')
|
lexer->lookahead != '*')
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
|
@ -1020,9 +955,7 @@ extglob_pattern:
|
||||||
{
|
{
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
||||||
{
|
|
||||||
state.saw_non_alphadot = true;
|
state.saw_non_alphadot = true;
|
||||||
}
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
if (lexer->lookahead == '(' || lexer->lookahead == '{')
|
if (lexer->lookahead == '(' || lexer->lookahead == '{')
|
||||||
{
|
{
|
||||||
|
|
@ -1048,29 +981,21 @@ extglob_pattern:
|
||||||
if (lexer->lookahead == '\\')
|
if (lexer->lookahead == '\\')
|
||||||
{
|
{
|
||||||
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
||||||
{
|
|
||||||
state.saw_non_alphadot = true;
|
state.saw_non_alphadot = true;
|
||||||
}
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
if (me_isspace(lexer->lookahead) || lexer->lookahead == '"')
|
if (me_isspace(lexer->lookahead) || lexer->lookahead == '"')
|
||||||
{
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
|
||||||
{
|
|
||||||
state.saw_non_alphadot = true;
|
state.saw_non_alphadot = true;
|
||||||
}
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
}
|
}
|
||||||
if (!was_space)
|
if (!was_space)
|
||||||
{
|
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
lexer->result_symbol = EXTGLOB_PATTERN;
|
lexer->result_symbol = EXTGLOB_PATTERN;
|
||||||
scanner->last_glob_paren_depth = 0;
|
scanner->last_glob_paren_depth = 0;
|
||||||
|
|
@ -1166,37 +1091,50 @@ brace_start:
|
||||||
|
|
||||||
void *tree_sitter_sh_external_scanner_create()
|
void *tree_sitter_sh_external_scanner_create()
|
||||||
{
|
{
|
||||||
t_scanner *scanner = mem_alloc(sizeof(t_scanner));
|
t_scanner *scanner;
|
||||||
|
|
||||||
|
scanner = mem_alloc(sizeof(*scanner));
|
||||||
array_init(&scanner->heredocs);
|
array_init(&scanner->heredocs);
|
||||||
return scanner;
|
return scanner;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols)
|
bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols)
|
||||||
{
|
{
|
||||||
t_scanner *scanner = (t_scanner *)payload;
|
t_scanner *scanner;
|
||||||
|
|
||||||
|
scanner = (t_scanner *)payload;
|
||||||
return scan(scanner, lexer, valid_symbols);
|
return scan(scanner, lexer, valid_symbols);
|
||||||
}
|
}
|
||||||
|
|
||||||
t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state)
|
t_u32 tree_sitter_sh_external_scanner_serialize(void *payload, t_u8 *state)
|
||||||
{
|
{
|
||||||
t_scanner *scanner = (t_scanner *)payload;
|
t_scanner *scanner;
|
||||||
return serialize(scanner, state);
|
|
||||||
|
scanner = (t_scanner *)payload;
|
||||||
|
return (serialize(scanner, state));
|
||||||
}
|
}
|
||||||
|
|
||||||
void tree_sitter_sh_external_scanner_deserialize(void *payload, const t_u8 *state, t_u32 length)
|
void tree_sitter_sh_external_scanner_deserialize(void *payload, const t_u8 *state, t_u32 length)
|
||||||
{
|
{
|
||||||
t_scanner *scanner = (t_scanner *)payload;
|
t_scanner *scanner;
|
||||||
|
|
||||||
|
scanner = (t_scanner *)payload;
|
||||||
deserialize(scanner, state, length);
|
deserialize(scanner, state, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tree_sitter_sh_external_scanner_destroy(void *payload)
|
void tree_sitter_sh_external_scanner_destroy(void *payload)
|
||||||
{
|
{
|
||||||
t_scanner *scanner = (t_scanner *)payload;
|
t_scanner *scanner;
|
||||||
for (size_t i = 0; i < scanner->heredocs.size; i++)
|
t_heredoc *heredoc;
|
||||||
|
t_usize i;
|
||||||
|
|
||||||
|
scanner = (t_scanner *)payload;
|
||||||
|
i = 0;
|
||||||
|
while (i < scanner->heredocs.size)
|
||||||
{
|
{
|
||||||
t_heredoc *heredoc = array_get(&scanner->heredocs, i);
|
heredoc = array_get(&scanner->heredocs, i++);
|
||||||
array_delete(&heredoc->current_leading_word);
|
string_free(heredoc->current_leading_word);
|
||||||
array_delete(&heredoc->delimiter);
|
string_free(heredoc->delimiter);
|
||||||
}
|
}
|
||||||
array_delete(&scanner->heredocs);
|
array_delete(&scanner->heredocs);
|
||||||
mem_free(scanner);
|
mem_free(scanner);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue