Update: Work on AST
This commit is contained in:
parent
8f00b8fd9b
commit
5759396eb2
9 changed files with 79 additions and 1297 deletions
|
|
@ -6,50 +6,48 @@
|
|||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/04/25 16:13:52 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/05/01 17:38:14 by maiboyer ### ########.fr */
|
||||
/* Updated: 2024/06/09 21:46:14 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "../parse_types.h"
|
||||
#include "../static/headers/constants.h"
|
||||
#include "../static/headers/symbols.h"
|
||||
#include "../parse_types.h"
|
||||
|
||||
bool lex_keywords_main(t_lexer *lexer, t_state_id state);
|
||||
bool lex_normal_main(t_lexer *lexer, t_state_id state);
|
||||
bool tree_sitter_sh_external_scanner_scan(void *ctx, t_lexer *lexer, const bool *ret);
|
||||
const bool *create_external_scanner_states(void);
|
||||
const char *const *create_field_names(void);
|
||||
const char *const *create_symbols_names(void);
|
||||
const t_field_map_entry *create_field_map_entries(void);
|
||||
const t_field_map_slice *create_field_map_slices(void);
|
||||
const t_lex_modes *create_lex_modes(void);
|
||||
const t_parse_action_entry *create_parse_actions_entries(void);
|
||||
const t_state_id *create_primary_state_ids(void);
|
||||
const t_symbol *create_alias_sequences(void);
|
||||
const t_symbol *create_external_scanner_symbol_map(void);
|
||||
const t_symbol *create_non_terminal_alias_map(void);
|
||||
const t_symbol *create_unique_symbols_map(void);
|
||||
const t_symbol_metadata *create_symbols_metadata(void);
|
||||
const uint16_t *create_parse_table(void);
|
||||
const uint16_t *create_small_parse_table(void);
|
||||
const uint32_t *create_small_parse_table_map(void);
|
||||
const t_parse_action_entry *create_parse_actions_entries(void);
|
||||
const char *const *create_symbols_names(void);
|
||||
const char *const *create_field_names(void);
|
||||
const t_field_map_slice *create_field_map_slices(void);
|
||||
const t_field_map_entry *create_field_map_entries(void);
|
||||
const t_symbol_metadata *create_symbols_metadata(void);
|
||||
const t_symbol *create_unique_symbols_map(void);
|
||||
const t_symbol *create_non_terminal_alias_map(void);
|
||||
const t_symbol *create_alias_sequences(void);
|
||||
const t_lex_modes *create_lex_modes(void);
|
||||
const t_state_id *create_primary_state_ids(void);
|
||||
const bool *create_external_scanner_states(void);
|
||||
const t_symbol *create_external_scanner_symbol_map(void);
|
||||
bool lex_normal_main(t_lexer *lexer, t_state_id state);
|
||||
bool lex_keywords_main(t_lexer *lexer, t_state_id state);
|
||||
void *tree_sitter_bash_external_scanner_create(void);
|
||||
void tree_sitter_bash_external_scanner_destroy(void *ctx);
|
||||
bool tree_sitter_bash_external_scanner_scan(void *ctx, t_lexer *lexer,
|
||||
const bool *ret);
|
||||
uint32_t tree_sitter_bash_external_scanner_serialize(void *ctx, char *s);
|
||||
void tree_sitter_bash_external_scanner_deserialize(void *ctx, const char *s,
|
||||
uint32_t val);
|
||||
uint32_t tree_sitter_sh_external_scanner_serialize(void *ctx, char *s);
|
||||
void tree_sitter_sh_external_scanner_deserialize(void *ctx, const char *s, uint32_t val);
|
||||
void tree_sitter_sh_external_scanner_destroy(void *ctx);
|
||||
void *tree_sitter_sh_external_scanner_create(void);
|
||||
|
||||
static t_scanner init_scanner(void)
|
||||
{
|
||||
return ((t_scanner){
|
||||
create_external_scanner_states(),
|
||||
create_external_scanner_symbol_map(),
|
||||
tree_sitter_bash_external_scanner_create,
|
||||
tree_sitter_bash_external_scanner_destroy,
|
||||
tree_sitter_bash_external_scanner_scan,
|
||||
tree_sitter_bash_external_scanner_serialize,
|
||||
tree_sitter_bash_external_scanner_deserialize,
|
||||
tree_sitter_sh_external_scanner_create,
|
||||
tree_sitter_sh_external_scanner_destroy,
|
||||
tree_sitter_sh_external_scanner_scan,
|
||||
tree_sitter_sh_external_scanner_serialize,
|
||||
tree_sitter_sh_external_scanner_deserialize,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,40 +28,38 @@ enum TokenType
|
|||
ESAC,
|
||||
ERROR_RECOVERY,
|
||||
};
|
||||
|
||||
/*
|
||||
enum TokenType {
|
||||
HEREDOC_START,
|
||||
SIMPLE_HEREDOC_BODY,
|
||||
HEREDOC_BODY_BEGINNING,
|
||||
HEREDOC_CONTENT,
|
||||
HEREDOC_END,
|
||||
FILE_DESCRIPTOR,
|
||||
EMPTY_VALUE,
|
||||
CONCAT,
|
||||
VARIABLE_NAME,
|
||||
TEST_OPERATOR,
|
||||
REGEX,
|
||||
REGEX_NO_SLASH,
|
||||
REGEX_NO_SPACE,
|
||||
EXPANSION_WORD,
|
||||
EXTGLOB_PATTERN,
|
||||
BARE_DOLLAR,
|
||||
BRACE_START,
|
||||
IMMEDIATE_DOUBLE_HASH,
|
||||
EXTERNAL_EXPANSION_SYM_HASH,
|
||||
EXTERNAL_EXPANSION_SYM_BANG,
|
||||
EXTERNAL_EXPANSION_SYM_EQUAL,
|
||||
CLOSING_BRACE,
|
||||
CLOSING_BRACKET,
|
||||
HEREDOC_ARROW,
|
||||
HEREDOC_ARROW_DASH,
|
||||
NEWLINE,
|
||||
OPENING_PAREN,
|
||||
ESAC,
|
||||
ERROR_RECOVERY,
|
||||
};
|
||||
*/
|
||||
// enum TokenType {
|
||||
// HEREDOC_START,
|
||||
// SIMPLE_HEREDOC_BODY,
|
||||
// HEREDOC_BODY_BEGINNING,
|
||||
// HEREDOC_CONTENT,
|
||||
// HEREDOC_END,
|
||||
// FILE_DESCRIPTOR,
|
||||
// EMPTY_VALUE,
|
||||
// CONCAT,
|
||||
// VARIABLE_NAME,
|
||||
// TEST_OPERATOR,
|
||||
// REGEX,
|
||||
// REGEX_NO_SLASH,
|
||||
// REGEX_NO_SPACE,
|
||||
// EXPANSION_WORD,
|
||||
// EXTGLOB_PATTERN,
|
||||
// BARE_DOLLAR,
|
||||
// BRACE_START,
|
||||
// IMMEDIATE_DOUBLE_HASH,
|
||||
// EXTERNAL_EXPANSION_SYM_HASH,
|
||||
// EXTERNAL_EXPANSION_SYM_BANG,
|
||||
// EXTERNAL_EXPANSION_SYM_EQUAL,
|
||||
// CLOSING_BRACE,
|
||||
// CLOSING_BRACKET,
|
||||
// HEREDOC_ARROW,
|
||||
// HEREDOC_ARROW_DASH,
|
||||
// NEWLINE,
|
||||
// OPENING_PAREN,
|
||||
// ESAC,
|
||||
// ERROR_RECOVERY,
|
||||
// };
|
||||
typedef struct s_lexer_data TSLexer;
|
||||
|
||||
typedef Array(char) String;
|
||||
|
||||
|
|
@ -91,12 +89,12 @@ typedef struct
|
|||
Array(Heredoc) heredocs;
|
||||
} Scanner;
|
||||
|
||||
static inline void advance(t_lexer_data *lexer)
|
||||
static inline void advance(TSLexer *lexer)
|
||||
{
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
static inline void skip(t_lexer_data *lexer)
|
||||
static inline void skip(TSLexer *lexer)
|
||||
{
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
|
@ -215,11 +213,11 @@ static void deserialize(Scanner *scanner, const char *buffer, unsigned length)
|
|||
* POSIX-mandated substitution, and assumes the default value for
|
||||
* IFS.
|
||||
*/
|
||||
static bool advance_word(t_lexer_data *lexer, String *unquoted_word)
|
||||
static bool advance_word(TSLexer *lexer, String *unquoted_word)
|
||||
{
|
||||
bool empty = true;
|
||||
|
||||
bool empty = true;
|
||||
int32_t quote = 0;
|
||||
|
||||
if (lexer->lookahead == '\'' || lexer->lookahead == '"')
|
||||
{
|
||||
quote = lexer->lookahead;
|
||||
|
|
@ -233,9 +231,7 @@ static bool advance_word(t_lexer_data *lexer, String *unquoted_word)
|
|||
{
|
||||
advance(lexer);
|
||||
if (!lexer->lookahead)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
empty = false;
|
||||
array_push(unquoted_word, lexer->lookahead);
|
||||
|
|
@ -244,32 +240,29 @@ static bool advance_word(t_lexer_data *lexer, String *unquoted_word)
|
|||
array_push(unquoted_word, '\0');
|
||||
|
||||
if (quote && lexer->lookahead == quote)
|
||||
{
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
return !empty;
|
||||
}
|
||||
|
||||
static inline bool scan_bare_dollar(t_lexer_data *lexer)
|
||||
static inline bool scan_bare_dollar(TSLexer *lexer)
|
||||
{
|
||||
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer))
|
||||
{
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
|
||||
if (lexer->lookahead == '$')
|
||||
{
|
||||
advance(lexer);
|
||||
lexer->result_symbol = BARE_DOLLAR;
|
||||
lexer->mark_end(lexer);
|
||||
return iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"';
|
||||
return (iswspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"');
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool scan_heredoc_start(Heredoc *heredoc, t_lexer_data *lexer)
|
||||
static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer)
|
||||
{
|
||||
while (iswspace(lexer->lookahead))
|
||||
{
|
||||
|
|
@ -288,7 +281,7 @@ static bool scan_heredoc_start(Heredoc *heredoc, t_lexer_data *lexer)
|
|||
return found_delimiter;
|
||||
}
|
||||
|
||||
static bool scan_heredoc_end_identifier(Heredoc *heredoc, t_lexer_data *lexer)
|
||||
static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer)
|
||||
{
|
||||
reset_string(&heredoc->current_leading_word);
|
||||
// Scan the first 'n' characters on this line, to see if they match the
|
||||
|
|
@ -308,7 +301,7 @@ static bool scan_heredoc_end_identifier(Heredoc *heredoc, t_lexer_data *lexer)
|
|||
return heredoc->delimiter.size == 0 ? false : strcmp(heredoc->current_leading_word.contents, heredoc->delimiter.contents) == 0;
|
||||
}
|
||||
|
||||
static bool scan_heredoc_content(Scanner *scanner, t_lexer_data *lexer, enum TokenType middle_type, enum TokenType end_type)
|
||||
static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer, enum TokenType middle_type, enum TokenType end_type)
|
||||
{
|
||||
bool did_advance = false;
|
||||
Heredoc *heredoc = array_back(&scanner->heredocs);
|
||||
|
|
@ -434,7 +427,7 @@ static bool scan_heredoc_content(Scanner *scanner, t_lexer_data *lexer, enum Tok
|
|||
}
|
||||
}
|
||||
|
||||
static bool scan(Scanner *scanner, t_lexer_data *lexer, const bool *valid_symbols)
|
||||
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols))
|
||||
{
|
||||
|
|
@ -484,11 +477,6 @@ static bool scan(Scanner *scanner, t_lexer_data *lexer, const bool *valid_symbol
|
|||
return true;
|
||||
}
|
||||
}
|
||||
if (iswspace(lexer->lookahead) && !valid_symbols[EXPANSION_WORD])
|
||||
{
|
||||
lexer->result_symbol = CONCAT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols))
|
||||
|
|
@ -739,7 +727,7 @@ static bool scan(Scanner *scanner, t_lexer_data *lexer, const bool *valid_symbol
|
|||
return true;
|
||||
}
|
||||
|
||||
if (valid_symbols[REGEX] && !in_error_recovery(valid_symbols))
|
||||
if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols))
|
||||
{
|
||||
if (valid_symbols[REGEX])
|
||||
{
|
||||
|
|
@ -805,32 +793,24 @@ static bool scan(Scanner *scanner, t_lexer_data *lexer, const bool *valid_symbol
|
|||
break;
|
||||
case '{':
|
||||
if (!state.last_was_escape)
|
||||
{
|
||||
state.brace_depth++;
|
||||
}
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case ')':
|
||||
if (state.paren_depth == 0)
|
||||
{
|
||||
state.done = true;
|
||||
}
|
||||
state.paren_depth--;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case ']':
|
||||
if (state.bracket_depth == 0)
|
||||
{
|
||||
state.done = true;
|
||||
}
|
||||
state.bracket_depth--;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
case '}':
|
||||
if (state.brace_depth == 0)
|
||||
{
|
||||
state.done = true;
|
||||
}
|
||||
state.brace_depth--;
|
||||
state.last_was_escape = false;
|
||||
break;
|
||||
|
|
@ -1220,36 +1200,35 @@ expansion_word:
|
|||
}
|
||||
|
||||
brace_start:
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void *tree_sitter_bash_external_scanner_create()
|
||||
void *tree_sitter_sh_external_scanner_create()
|
||||
{
|
||||
Scanner *scanner = calloc(1, sizeof(Scanner));
|
||||
array_init(&scanner->heredocs);
|
||||
return scanner;
|
||||
}
|
||||
|
||||
bool tree_sitter_bash_external_scanner_scan(void *payload, t_lexer_data *lexer, const bool *valid_symbols)
|
||||
bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols)
|
||||
{
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
return scan(scanner, lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_bash_external_scanner_serialize(void *payload, char *state)
|
||||
unsigned tree_sitter_sh_external_scanner_serialize(void *payload, char *state)
|
||||
{
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
return serialize(scanner, state);
|
||||
}
|
||||
|
||||
void tree_sitter_bash_external_scanner_deserialize(void *payload, const char *state, unsigned length)
|
||||
void tree_sitter_sh_external_scanner_deserialize(void *payload, const char *state, unsigned length)
|
||||
{
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
deserialize(scanner, state, length);
|
||||
}
|
||||
|
||||
void tree_sitter_bash_external_scanner_destroy(void *payload)
|
||||
void tree_sitter_sh_external_scanner_destroy(void *payload)
|
||||
{
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
for (size_t i = 0; i < scanner->heredocs.size; i++)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue