diff --git a/parser/Filelist.parser.mk b/parser/Filelist.parser.mk index 7e63232b..c7b14639 100644 --- a/parser/Filelist.parser.mk +++ b/parser/Filelist.parser.mk @@ -5,11 +5,12 @@ passes/fold_double_amp \ passes/fold_double_carret \ passes/fold_double_pipe \ passes/fold_expansion \ +passes/fold_no_quote \ passes/fold_whitespace \ passes/template_file \ +token_lifetime \ tokenizer \ tokenizer_utils \ -token_lifetime \ GEN_FILES = \ \ diff --git a/parser/include/parser/passes.h b/parser/include/parser/passes.h index 05d7b888..ec85bcf9 100644 --- a/parser/include/parser/passes.h +++ b/parser/include/parser/passes.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 18:43:41 by maiboyer #+# #+# */ -/* Updated: 2024/10/04 19:01:40 by rparodi ### ########.fr */ +/* Updated: 2024/10/05 13:10:56 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -40,8 +40,10 @@ t_error ts_double_lcarret(t_vec_token input, t_vec_token *output); t_error ts_double_pipe(t_vec_token input, t_vec_token *output); t_error ts_double_rcarret(t_vec_token input, t_vec_token *output); t_error ts_double_string_pass(t_vec_token input, t_vec_token *output); +t_error ts_fold_no_quote(t_vec_token input, t_vec_token *output); t_error ts_fold_whitespace(t_vec_token input, t_vec_token *output); t_error ts_do_fuck_all(t_vec_token input, t_vec_token *output); t_error ts_fold_expension(t_vec_token input, t_vec_token *output); + #endif /* PASSES_H */ diff --git a/parser/include/parser/token.h b/parser/include/parser/token.h index 95868033..b41024dc 100644 --- a/parser/include/parser/token.h +++ b/parser/include/parser/token.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/26 17:59:23 by maiboyer #+# #+# */ -/* Updated: 2024/10/03 22:30:42 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 13:11:34 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -37,6 +37,7 @@ enum e_token TOK_SEMICOLON, // semicolor == ; TOK_SQUOTE, // single quote string TOK_WHITESPACE, // whitespace outside of quoted strings + TOK_NALPHANUM, // a non alphanumeric character, used in the expansion folding, then folded back into NQUOTE TOK_WORD, // a meta token, which contains subtokens }; @@ -53,9 +54,11 @@ t_token token_new_meta(enum e_token type); // This create a "simple" token consisting of a string t_token token_new(enum e_token type); t_token token_new_none(void); -void token_free(t_token tok); -bool token_is_meta(t_token tok); t_token token_clone(t_token *tok); +void token_free(t_token tok); + +bool token_is_meta(t_token tok); +bool token_is_noquote(enum e_token tok); /* PARSING */ t_error tokenize(t_const_str s, t_vec_token *out); diff --git a/parser/src/passes.c b/parser/src/passes.c index 4ddf4a37..7b2f41b5 100644 --- a/parser/src/passes.c +++ b/parser/src/passes.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 18:41:16 by maiboyer #+# #+# */ -/* Updated: 2024/10/04 19:41:34 by rparodi ### ########.fr */ +/* Updated: 2024/10/05 13:12:08 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -36,6 +36,7 @@ static const struct s_ts_pass_def g_ts_passes[] = {\ {ts_double_string_pass, "double string parser"}, \ {ts_fold_expension, "fold expansion"}, + {ts_fold_no_quote, "fold no quote"}, {ts_fold_whitespace, "fold whitespace"}, {ts_double_amp, "double amp => and"}, {ts_double_pipe, "double pipe => or"}, @@ -64,8 +65,8 @@ t_error ts_apply_passes(t_vec_token ts, t_vec_token *out) } static const struct s_ts_pass_def g_ts_dq_passes[] = {\ - {ts_do_fuck_all, "does nothing lol"}, \ {ts_fold_expension, "fold expansion"}, + {ts_fold_no_quote, "fold no quote"}, }; t_error ts_dq_apply_passes(t_vec_token ts, t_vec_token *out) diff --git a/parser/src/passes/double_quote_parsing.c b/parser/src/passes/double_quote_parsing.c index 17a15999..a6fd5190 100644 --- a/parser/src/passes/double_quote_parsing.c +++ b/parser/src/passes/double_quote_parsing.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/03 22:43:27 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 13:06:17 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -21,6 +21,10 @@ void push_token_and_create_new(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s); void push_token_and_set_new(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s); +void push_token_and_create_new_chr(\ + t_vec_token *tokens, t_token *tok, enum e_token ttype, char c); +void push_token_and_set_new_chr(\ + t_vec_token *tokens, t_token *tok, enum e_token ttype, char c); t_error _parse_dquote_inner(t_token dquote, t_vec_token *append) { @@ -46,8 +50,10 @@ t_error _parse_dquote_inner(t_token dquote, t_vec_token *append) } string_push_char(&ctok.string, c); } - else if (c == '$') - push_token_and_create_new(&out.subtokens, &ctok, TOK_DOLLAR, "$"); + else if ('$') + push_token_and_create_new(&out.subtokens, &ctok, TOK_DOLLAR, "$"); + else if (!(me_isalnum(c) || c == '_')) + push_token_and_create_new_chr(&out.subtokens, &ctok, TOK_NALPHANUM, c); else { if (ctok.type == TOK_NONE) diff --git a/parser/src/passes/fold_expansion.c b/parser/src/passes/fold_expansion.c index 9f17cbe4..6c3127c1 100644 --- a/parser/src/passes/fold_expansion.c +++ b/parser/src/passes/fold_expansion.c @@ -6,15 +6,25 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/03 22:50:40 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 13:19:33 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ +#include "me/str/str.h" #include "me/types.h" #include "me/vec/vec_token.h" #include "parser/passes.h" #include "parser/token.h" +bool _can_be_varname(t_token *tok) +{ + if (tok->type == TOK_DOLLAR) + return (true); + if (tok->type != TOK_NALPHANUM) + return (false); + return (str_find_chr("!?#*-", tok->string.buf[0]) != NULL); +} + /// This is a sample pass /// /// There is a few rules the rest of the tokenizer machinery assumes @@ -38,8 +48,8 @@ t_error ts_fold_expension(t_vec_token input, t_vec_token *output) if (i + 1 >= input.len) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_DOLLAR \ - && (input.buffer[i + 1].type == TOK_DOLLAR \ - || input.buffer[i + 1].type == TOK_NQUOTE)) + && (input.buffer[i + 1].type == TOK_NQUOTE \ + || _can_be_varname(&input.buffer[i+1]))) { tmp = token_clone(&input.buffer[++i]); tmp.type= TOK_EXPENSION; diff --git a/parser/src/passes/fold_no_quote.c b/parser/src/passes/fold_no_quote.c new file mode 100644 index 00000000..bc6456d4 --- /dev/null +++ b/parser/src/passes/fold_no_quote.c @@ -0,0 +1,55 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* fold_no_quote.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ +/* Updated: 2024/10/05 13:19:50 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/string/string.h" +#include "parser/passes.h" +#include "me/types.h" +#include "me/vec/vec_token.h" +#include "parser/token.h" + +/// This is a sample pass +/// +/// There is a few rules the rest of the tokenizer machinery assumes +/// theses function follows: +/// - the input vec WILL be freed when the function return, even in +/// case of error +/// - the output vector isn't populated if the function returns an error, +/// thus it shouldn't be freed in case of error +/// - the output tokens may not be direct copy of the input tokens, +/// but need to be cloned (different allocations for stuff) + +t_error ts_fold_no_quote(t_vec_token input, t_vec_token *output) +{ + t_vec_token out; + t_usize i; + t_usize j; + t_token tmp; + + i = 0; + out = vec_token_new(input.len, token_free); + while (i < input.len) + { + if (token_is_noquote(input.buffer[i].type)) + { + j = 0; + tmp = token_new(TOK_NQUOTE); + while (i + j < input.len && token_is_noquote(input.buffer[i + j].type)) + string_push(&tmp.string, input.buffer[i + j++].string.buf); + vec_token_push(&out, tmp); + i += j; + } + else + vec_token_push(&out, token_clone(&input.buffer[i++])); + } + vec_token_free(input); + return (*output = out, NO_ERROR); +} diff --git a/parser/src/token_lifetime.c b/parser/src/token_lifetime.c index c2bb5a1d..3525de9a 100644 --- a/parser/src/token_lifetime.c +++ b/parser/src/token_lifetime.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/28 14:37:13 by maiboyer #+# #+# */ -/* Updated: 2024/10/02 19:11:25 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 13:13:16 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -64,6 +64,13 @@ t_token token_clone(t_token *tok) return (out); } +bool token_is_noquote(enum e_token ttype) +{ + return (ttype == TOK_NQUOTE \ + || ttype == TOK_DOLLAR \ + || ttype == TOK_NALPHANUM); +} + // TO REMOVE t_str token_name(t_token *token) { @@ -107,5 +114,7 @@ t_str token_name(t_token *token) return ("WHITESPACE"); if (token->type == TOK_WORD) return ("WORD"); + if (token->type == TOK_NALPHANUM) + return ("NALPHANUM"); return (NULL); } diff --git a/parser/src/tokenizer.c b/parser/src/tokenizer.c index 3d1dc635..80d6304d 100644 --- a/parser/src/tokenizer.c +++ b/parser/src/tokenizer.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/30 19:39:39 by maiboyer #+# #+# */ -/* Updated: 2024/10/03 22:44:57 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 13:02:28 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -20,6 +20,10 @@ void push_token_and_create_new(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s); void push_token_and_set_new(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s); +void push_token_and_create_new_chr(\ + t_vec_token *tokens, t_token *tok, enum e_token ttype, char c); +void push_token_and_set_new_chr(\ + t_vec_token *tokens, t_token *tok, enum e_token ttype, char c); static void handle_quote(t_vec_token *ret, char chr, t_token *tok, char *quote) { @@ -55,6 +59,8 @@ static void handle_noquote(t_vec_token *ret, char chr, t_token *tok, char *quote push_token_and_create_new(ret, tok, TOK_SEMICOLON, ";"); else if (me_isspace(chr)) push_token_and_create_new(ret, tok, TOK_WHITESPACE, " "); + else if (!(me_isalnum(chr) || chr == '_')) + push_token_and_create_new_chr(ret, tok, TOK_NALPHANUM, chr); else { if (tok->type == TOK_NONE) diff --git a/parser/src/tokenizer_utils.c b/parser/src/tokenizer_utils.c index f0a9a17c..8d42583b 100644 --- a/parser/src/tokenizer_utils.c +++ b/parser/src/tokenizer_utils.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/03 22:07:25 by maiboyer #+# #+# */ -/* Updated: 2024/10/03 22:08:14 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 13:00:52 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -22,6 +22,10 @@ void push_token_and_create_new(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s); void push_token_and_set_new(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s); +void push_token_and_create_new_chr(\ + t_vec_token *tokens, t_token *tok, enum e_token ttype, char c); +void push_token_and_set_new_chr(\ + t_vec_token *tokens, t_token *tok, enum e_token ttype, char c); void push_token_and_create_new(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s) @@ -44,3 +48,23 @@ void push_token_and_set_new(\ *tok = token_new(ttype); string_push(&tok->string, s); } + +void push_token_and_create_new_chr(\ + t_vec_token *tokens, t_token *tok, enum e_token ttype, char c) +{ + char tmp[2]; + + tmp[0] = c; + tmp[1] = '\0'; + push_token_and_create_new(tokens, tok, ttype, (t_const_str)&tmp); +} + +void push_token_and_set_new_chr(\ + t_vec_token *tokens, t_token *tok, enum e_token ttype, char c) +{ + char tmp[2]; + + tmp[0] = c; + tmp[1] = '\0'; + push_token_and_set_new(tokens, tok, ttype, (t_const_str)&tmp); +}