Update: Work on AST

This commit is contained in:
Maix0 2024-06-09 22:27:00 +02:00
parent 8f00b8fd9b
commit 5759396eb2
9 changed files with 79 additions and 1297 deletions

View file

@ -6,50 +6,48 @@
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/04/25 16:13:52 by maiboyer #+# #+# */
/* Updated: 2024/05/01 17:38:14 by maiboyer ### ########.fr */
/* Updated: 2024/06/09 21:46:14 by maiboyer ### ########.fr */
/* */
/* ************************************************************************** */
#include "../parse_types.h"
#include "../static/headers/constants.h"
#include "../static/headers/symbols.h"
#include "../parse_types.h"
bool lex_keywords_main(t_lexer *lexer, t_state_id state);
bool lex_normal_main(t_lexer *lexer, t_state_id state);
bool tree_sitter_sh_external_scanner_scan(void *ctx, t_lexer *lexer, const bool *ret);
const bool *create_external_scanner_states(void);
const char *const *create_field_names(void);
const char *const *create_symbols_names(void);
const t_field_map_entry *create_field_map_entries(void);
const t_field_map_slice *create_field_map_slices(void);
const t_lex_modes *create_lex_modes(void);
const t_parse_action_entry *create_parse_actions_entries(void);
const t_state_id *create_primary_state_ids(void);
const t_symbol *create_alias_sequences(void);
const t_symbol *create_external_scanner_symbol_map(void);
const t_symbol *create_non_terminal_alias_map(void);
const t_symbol *create_unique_symbols_map(void);
const t_symbol_metadata *create_symbols_metadata(void);
const uint16_t *create_parse_table(void);
const uint16_t *create_small_parse_table(void);
const uint32_t *create_small_parse_table_map(void);
const t_parse_action_entry *create_parse_actions_entries(void);
const char *const *create_symbols_names(void);
const char *const *create_field_names(void);
const t_field_map_slice *create_field_map_slices(void);
const t_field_map_entry *create_field_map_entries(void);
const t_symbol_metadata *create_symbols_metadata(void);
const t_symbol *create_unique_symbols_map(void);
const t_symbol *create_non_terminal_alias_map(void);
const t_symbol *create_alias_sequences(void);
const t_lex_modes *create_lex_modes(void);
const t_state_id *create_primary_state_ids(void);
const bool *create_external_scanner_states(void);
const t_symbol *create_external_scanner_symbol_map(void);
bool lex_normal_main(t_lexer *lexer, t_state_id state);
bool lex_keywords_main(t_lexer *lexer, t_state_id state);
void *tree_sitter_bash_external_scanner_create(void);
void tree_sitter_bash_external_scanner_destroy(void *ctx);
bool tree_sitter_bash_external_scanner_scan(void *ctx, t_lexer *lexer,
const bool *ret);
uint32_t tree_sitter_bash_external_scanner_serialize(void *ctx, char *s);
void tree_sitter_bash_external_scanner_deserialize(void *ctx, const char *s,
uint32_t val);
uint32_t tree_sitter_sh_external_scanner_serialize(void *ctx, char *s);
void tree_sitter_sh_external_scanner_deserialize(void *ctx, const char *s, uint32_t val);
void tree_sitter_sh_external_scanner_destroy(void *ctx);
void *tree_sitter_sh_external_scanner_create(void);
static t_scanner init_scanner(void)
{
return ((t_scanner){
create_external_scanner_states(),
create_external_scanner_symbol_map(),
tree_sitter_bash_external_scanner_create,
tree_sitter_bash_external_scanner_destroy,
tree_sitter_bash_external_scanner_scan,
tree_sitter_bash_external_scanner_serialize,
tree_sitter_bash_external_scanner_deserialize,
tree_sitter_sh_external_scanner_create,
tree_sitter_sh_external_scanner_destroy,
tree_sitter_sh_external_scanner_scan,
tree_sitter_sh_external_scanner_serialize,
tree_sitter_sh_external_scanner_deserialize,
});
}

View file

@ -28,40 +28,38 @@ enum TokenType
ESAC,
ERROR_RECOVERY,
};
/*
enum TokenType {
HEREDOC_START,
SIMPLE_HEREDOC_BODY,
HEREDOC_BODY_BEGINNING,
HEREDOC_CONTENT,
HEREDOC_END,
FILE_DESCRIPTOR,
EMPTY_VALUE,
CONCAT,
VARIABLE_NAME,
TEST_OPERATOR,
REGEX,
REGEX_NO_SLASH,
REGEX_NO_SPACE,
EXPANSION_WORD,
EXTGLOB_PATTERN,
BARE_DOLLAR,
BRACE_START,
IMMEDIATE_DOUBLE_HASH,
EXTERNAL_EXPANSION_SYM_HASH,
EXTERNAL_EXPANSION_SYM_BANG,
EXTERNAL_EXPANSION_SYM_EQUAL,
CLOSING_BRACE,
CLOSING_BRACKET,
HEREDOC_ARROW,
HEREDOC_ARROW_DASH,
NEWLINE,
OPENING_PAREN,
ESAC,
ERROR_RECOVERY,
};
*/
// enum TokenType {
// HEREDOC_START,
// SIMPLE_HEREDOC_BODY,
// HEREDOC_BODY_BEGINNING,
// HEREDOC_CONTENT,
// HEREDOC_END,
// FILE_DESCRIPTOR,
// EMPTY_VALUE,
// CONCAT,
// VARIABLE_NAME,
// TEST_OPERATOR,
// REGEX,
// REGEX_NO_SLASH,
// REGEX_NO_SPACE,
// EXPANSION_WORD,
// EXTGLOB_PATTERN,
// BARE_DOLLAR,
// BRACE_START,
// IMMEDIATE_DOUBLE_HASH,
// EXTERNAL_EXPANSION_SYM_HASH,
// EXTERNAL_EXPANSION_SYM_BANG,
// EXTERNAL_EXPANSION_SYM_EQUAL,
// CLOSING_BRACE,
// CLOSING_BRACKET,
// HEREDOC_ARROW,
// HEREDOC_ARROW_DASH,
// NEWLINE,
// OPENING_PAREN,
// ESAC,
// ERROR_RECOVERY,
// };
typedef struct s_lexer_data TSLexer;
typedef Array(char) String;
@ -91,12 +89,12 @@ typedef struct
Array(Heredoc) heredocs;
} Scanner;
static inline void advance(t_lexer_data *lexer)
static inline void advance(TSLexer *lexer)
{
lexer->advance(lexer, false);
}
static inline void skip(t_lexer_data *lexer)
static inline void skip(TSLexer *lexer)
{
lexer->advance(lexer, true);
}
@ -215,11 +213,11 @@ static void deserialize(Scanner *scanner, const char *buffer, unsigned length)
* POSIX-mandated substitution, and assumes the default value for
* IFS.
*/
static bool advance_word(t_lexer_data *lexer, String *unquoted_word)
static bool advance_word(TSLexer *lexer, String *unquoted_word)
{
bool empty = true;
bool empty = true;
int32_t quote = 0;
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
{
quote = lexer->lookahead;
@ -233,9 +231,7 @@ static bool advance_word(t_lexer_data *lexer, String *unquoted_word)
{
advance(lexer);
if (!lexer->lookahead)
{
return false;
}
}
empty = false;
array_push(unquoted_word, lexer->lookahead);
@ -244,32 +240,29 @@ static bool advance_word(t_lexer_data *lexer, String *unquoted_word)
array_push(unquoted_word, '\0');
if (quote && lexer->lookahead == quote)
{
advance(lexer);
}
return !empty;
}
static inline bool scan_bare_dollar(t_lexer_data *lexer)
static inline bool scan_bare_dollar(TSLexer *lexer)
{
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer))
{
skip(lexer);
}
if (lexer->lookahead == '$')
{
advance(lexer);
lexer->result_symbol = BARE_DOLLAR;
lexer->mark_end(lexer);
return iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"';
return (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"');
}
return false;
}
static bool scan_heredoc_start(Heredoc *heredoc, t_lexer_data *lexer)
static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer)
{
while (iswspace(lexer->lookahead))
{
@ -288,7 +281,7 @@ static bool scan_heredoc_start(Heredoc *heredoc, t_lexer_data *lexer)
return found_delimiter;
}
static bool scan_heredoc_end_identifier(Heredoc *heredoc, t_lexer_data *lexer)
static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer)
{
reset_string(&heredoc->current_leading_word);
// Scan the first 'n' characters on this line, to see if they match the
@ -308,7 +301,7 @@ static bool scan_heredoc_end_identifier(Heredoc *heredoc, t_lexer_data *lexer)
return heredoc->delimiter.size == 0 ? false : strcmp(heredoc->current_leading_word.contents, heredoc->delimiter.contents) == 0;
}
static bool scan_heredoc_content(Scanner *scanner, t_lexer_data *lexer, enum TokenType middle_type, enum TokenType end_type)
static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, enum TokenType middle_type, enum TokenType end_type)
{
bool did_advance = false;
Heredoc *heredoc = array_back(&scanner->heredocs);
@ -434,7 +427,7 @@ static bool scan_heredoc_content(Scanner *scanner, t_lexer_data *lexer, enum Tok
}
}
static bool scan(Scanner *scanner, t_lexer_data *lexer, const bool *valid_symbols)
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
{
if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols))
{
@ -484,11 +477,6 @@ static bool scan(Scanner *scanner, t_lexer_data *lexer, const bool *valid_symbol
return true;
}
}
if (iswspace(lexer->lookahead) && !valid_symbols[EXPANSION_WORD])
{
lexer->result_symbol = CONCAT;
return true;
}
}
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols))
@ -739,7 +727,7 @@ static bool scan(Scanner *scanner, t_lexer_data *lexer, const bool *valid_symbol
return true;
}
if (valid_symbols[REGEX] && !in_error_recovery(valid_symbols))
if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols))
{
if (valid_symbols[REGEX])
{
@ -805,32 +793,24 @@ static bool scan(Scanner *scanner, t_lexer_data *lexer, const bool *valid_symbol
break;
case '{':
if (!state.last_was_escape)
{
state.brace_depth++;
}
state.last_was_escape = false;
break;
case ')':
if (state.paren_depth == 0)
{
state.done = true;
}
state.paren_depth--;
state.last_was_escape = false;
break;
case ']':
if (state.bracket_depth == 0)
{
state.done = true;
}
state.bracket_depth--;
state.last_was_escape = false;
break;
case '}':
if (state.brace_depth == 0)
{
state.done = true;
}
state.brace_depth--;
state.last_was_escape = false;
break;
@ -1220,36 +1200,35 @@ expansion_word:
}
brace_start:
return false;
}
void *tree_sitter_bash_external_scanner_create()
void *tree_sitter_sh_external_scanner_create()
{
Scanner *scanner = calloc(1, sizeof(Scanner));
array_init(&scanner->heredocs);
return scanner;
}
bool tree_sitter_bash_external_scanner_scan(void *payload, t_lexer_data *lexer, const bool *valid_symbols)
bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols)
{
Scanner *scanner = (Scanner *)payload;
return scan(scanner, lexer, valid_symbols);
}
unsigned tree_sitter_bash_external_scanner_serialize(void *payload, char *state)
unsigned tree_sitter_sh_external_scanner_serialize(void *payload, char *state)
{
Scanner *scanner = (Scanner *)payload;
return serialize(scanner, state);
}
void tree_sitter_bash_external_scanner_deserialize(void *payload, const char *state, unsigned length)
void tree_sitter_sh_external_scanner_deserialize(void *payload, const char *state, unsigned length)
{
Scanner *scanner = (Scanner *)payload;
deserialize(scanner, state, length);
}
void tree_sitter_bash_external_scanner_destroy(void *payload)
void tree_sitter_sh_external_scanner_destroy(void *payload)
{
Scanner *scanner = (Scanner *)payload;
for (size_t i = 0; i < scanner->heredocs.size; i++)