From b058348d35bf9c8bb16866d6c98aed762e36106b Mon Sep 17 00:00:00 2001 From: maix0 Date: Sun, 6 Oct 2024 13:53:30 +0200 Subject: [PATCH] update: normed stuff --- parser/Filelist.parser.mk | 4 + parser/include/parser/passes.h | 4 +- parser/src/passes.c | 45 ++++++---- parser/src/passes/double_quote_parsing.c | 63 +++++++------- parser/src/passes/fold_double_amp.c | 4 +- parser/src/passes/fold_double_carret.c | 6 +- parser/src/passes/fold_double_paren.c | 25 +++--- parser/src/passes/fold_double_pipe.c | 4 +- parser/src/passes/fold_expansion.c | 10 +-- parser/src/passes/fold_no_quote.c | 5 +- parser/src/passes/fold_redir.c | 21 +++-- parser/src/passes/fold_whitespace.c | 4 +- parser/src/passes/paren_to_nquote.c | 58 +++++++++++++ parser/src/passes/split_double_paren.c | 64 ++++++++++++++ parser/src/token_lifetime.c | 104 +++-------------------- parser/src/token_name.c | 66 ++++++++++++++ parser/src/token_utils.c | 44 ++++++++++ parser/src/tokenizer.c | 25 ++++-- parser/src/tokenizer_utils.c | 10 +-- parser/src/ts_print.c | 15 ++-- 20 files changed, 388 insertions(+), 193 deletions(-) create mode 100644 parser/src/passes/paren_to_nquote.c create mode 100644 parser/src/passes/split_double_paren.c create mode 100644 parser/src/token_name.c create mode 100644 parser/src/token_utils.c diff --git a/parser/Filelist.parser.mk b/parser/Filelist.parser.mk index 615649bd..55854971 100644 --- a/parser/Filelist.parser.mk +++ b/parser/Filelist.parser.mk @@ -9,8 +9,12 @@ passes/fold_expansion \ passes/fold_no_quote \ passes/fold_redir \ passes/fold_whitespace \ +passes/paren_to_nquote \ +passes/split_double_paren \ passes/template_file \ token_lifetime \ +token_name \ +token_utils \ tokenizer \ tokenizer_utils \ ts_print \ diff --git a/parser/include/parser/passes.h b/parser/include/parser/passes.h index 4885ad96..f18cfa1f 100644 --- a/parser/include/parser/passes.h +++ b/parser/include/parser/passes.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 18:43:41 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:42:17 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:12:04 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -47,6 +47,8 @@ t_error ts_fold_whitespace(t_vec_token input, t_vec_token *output); t_error ts_do_fuck_all(t_vec_token input, t_vec_token *output); t_error ts_fold_expension(t_vec_token input, t_vec_token *output); t_error ts_fold_redir(t_vec_token input, t_vec_token *output); +t_error ts_split_paren(t_vec_token input, t_vec_token *output); +t_error ts_paren_to_noquote(t_vec_token input, t_vec_token *output); #endif /* PASSES_H */ diff --git a/parser/src/passes.c b/parser/src/passes.c index 3cebe65b..9832d39f 100644 --- a/parser/src/passes.c +++ b/parser/src/passes.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 18:41:16 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:57:59 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:32:00 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -26,6 +26,17 @@ /// there is a few stuff we want to do, for example: /// - combine any *QUOTE token that are next to eachothers /// into a single metatoken WORD +/// - do somekind of arith expansion that will have any token between +/// DOLLAR DLPAREN and the matching DRPAREN in it +/// - do somekind of parenthesis token will have any token between +/// LPAREN and the matching RPAREN in it +/// - do somekind of CMD token that will store every token that consitute +/// command into a single token stopping at the correct ending: +/// semicolon, pipe, or, and +/// - do a smth that will take any into a single token +/// - do a smth that will take any TOK PIPE TOK into a single token and +/// merge if any of the TOK is also a pipeline +/// (merging may be done during the final ast building) // here is the signature easily accessible: // @@ -35,17 +46,18 @@ static const struct s_ts_pass_def g_ts_passes[] = {\ {ts_double_string_pass, "double string parser"}, \ - {ts_fold_expension, "fold expansion"}, - {ts_fold_no_quote, "fold no quote"}, - {ts_fold_whitespace, "fold whitespace"}, - {ts_double_amp, "double amp => and"}, - {ts_double_pipe, "double pipe => or"}, - {ts_double_lparen, "double lparen => dlparen"}, - {ts_double_rparen, "double rparen => drparen"}, - {ts_double_lcarret, "double lcarret => dlcarret"}, - {ts_double_rcarret, "double rcarrer => drcarret"}, + {ts_fold_expension, "fold expansion"}, \ + {ts_fold_no_quote, "fold no quote"}, \ + {ts_fold_whitespace, "fold whitespace"}, \ + {ts_double_amp, "double amp => and"}, \ + {ts_double_pipe, "double pipe => or"}, \ + {ts_double_lparen, "double lparen => dlparen"}, \ + {ts_double_rparen, "double rparen => drparen"}, \ + {ts_double_lcarret, "double lcarret => dlcarret"}, \ + {ts_double_rcarret, "double rcarrer => drcarret"}, \ // there should be an ts_fold_arith here - {ts_fold_redir, "fold redir+argument"}, +{ts_split_paren, "split double parenthesis"}, \ + {ts_fold_redir, "fold redir+argument"}, \ }; t_error ts_apply_passes(t_vec_token ts, t_vec_token *out) @@ -59,7 +71,8 @@ t_error ts_apply_passes(t_vec_token ts, t_vec_token *out) if (g_ts_passes[i].fn == NULL) return (vec_token_free(ts), ERROR); if ((g_ts_passes[i].fn)(ts, &next)) - return (me_eprintf("failed on %s token pass\n", g_ts_passes[i].name), ERROR); + return (me_eprintf("failed on %s token pass\n", \ + g_ts_passes[i].name), ERROR); else me_printf("Applied '%s' pass\n", g_ts_passes[i].name); ts = next; @@ -70,10 +83,12 @@ t_error ts_apply_passes(t_vec_token ts, t_vec_token *out) } static const struct s_ts_pass_def g_ts_dq_passes[] = {\ - {ts_double_lparen, "double lparen => dlparen"}, - {ts_double_rparen, "double rparen => drparen"}, - {ts_fold_expension, "fold expansion"}, + {ts_double_lparen, "double lparen => dlparen"}, \ + {ts_double_rparen, "double rparen => drparen"}, \ + {ts_fold_expension, "fold expansion"}, \ // there should be an ts_fold_arith here +{ts_split_paren, "split double parenthesis"}, \ + {ts_paren_to_noquote, "parenthesis to noquote"}, \ {ts_fold_no_quote, "fold no quote"}, }; diff --git a/parser/src/passes/double_quote_parsing.c b/parser/src/passes/double_quote_parsing.c index aa248411..1d8e4145 100644 --- a/parser/src/passes/double_quote_parsing.c +++ b/parser/src/passes/double_quote_parsing.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:56:12 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:51:52 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -26,50 +26,53 @@ void push_token_and_create_new_chr(\ void push_token_and_set_new_chr(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, char c); +static bool _dquote_inner2(t_token *ctok, t_token*out, char c) +{ + if (me_isspace(c)) + { + if (ctok->type == TOK_NONE) + *ctok = token_new(TOK_WHITESPACE); + if (ctok->type != TOK_WHITESPACE) + { + vec_token_push(&out->subtokens, *ctok); + *ctok = token_new(TOK_WHITESPACE); + } + string_push_char(&ctok->string, c); + } + else if (c == '$') + push_token_and_create_new(&out->subtokens, ctok, TOK_DOLLAR, "$"); + else if (c == '(') + push_token_and_create_new(&out->subtokens, ctok, TOK_LPAREN, "("); + else if (c == ')') + push_token_and_create_new(&out->subtokens, ctok, TOK_RPAREN, ")"); + else if (!(me_isalnum(c) || c == '_')) + push_token_and_create_new_chr(&out->subtokens, ctok, TOK_NALPHANUM, c); + else + return (false); + return (true); +} + t_error _parse_dquote_inner(t_token dquote, t_vec_token *append) { t_token ctok; t_token out; t_usize i; - char c; out = token_new_meta(TOK_DQUOTE); i = 0; ctok = token_new_none(); while (dquote.string.buf[i] != '\0') { - c = dquote.string.buf[i++]; - if (me_isspace(c)) - { - if (ctok.type == TOK_NONE) - ctok = token_new(TOK_WHITESPACE); - if (ctok.type != TOK_WHITESPACE) - { - vec_token_push(&out.subtokens, ctok); - ctok = token_new(TOK_WHITESPACE); - } - string_push_char(&ctok.string, c); - } - else if (c == '$') - push_token_and_create_new(&out.subtokens, &ctok, TOK_DOLLAR, "$"); - else if (c == '(') - push_token_and_create_new(&out.subtokens, &ctok, TOK_LPAREN, "("); - else if (c == ')') - push_token_and_create_new(&out.subtokens, &ctok, TOK_RPAREN, ")"); - else if (!(me_isalnum(c) || c == '_')) - push_token_and_create_new_chr(&out.subtokens, &ctok, TOK_NALPHANUM, c); - else + if (!_dquote_inner2(&ctok, &out, dquote.string.buf[i++])) { if (ctok.type == TOK_NONE) ctok = token_new(TOK_NQUOTE); if (ctok.type != TOK_NQUOTE) - { - vec_token_push(&out.subtokens, ctok); - ctok = token_new(TOK_NQUOTE); - } - string_push_char(&ctok.string, c); + ctok = (vec_token_push(&out.subtokens, ctok), \ + token_new(TOK_NQUOTE)); + string_push_char(&ctok.string, dquote.string.buf[i - 1]); } - }; + } if (ctok.type != TOK_NONE) vec_token_push(&out.subtokens, ctok); if (ts_dq_apply_passes(out.subtokens, &out.subtokens)) @@ -99,7 +102,7 @@ t_error ts_double_string_pass(t_vec_token input, t_vec_token *output) if (_parse_dquote_inner(input.buffer[i], &out)) return (vec_token_free(input), ERROR); } - else + else vec_token_push(&out, token_clone(&input.buffer[i])); i++; } diff --git a/parser/src/passes/fold_double_amp.c b/parser/src/passes/fold_double_amp.c index 36e75b91..066a2d33 100644 --- a/parser/src/passes/fold_double_amp.c +++ b/parser/src/passes/fold_double_amp.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/04 19:09:45 by rparodi ### ########.fr */ +/* Updated: 2024/10/06 13:42:09 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -38,7 +38,7 @@ t_error ts_double_amp(t_vec_token input, t_vec_token *output) if (i + 1 >= input.len) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_AMP - && input.buffer[i + 1].type == TOK_AMP) + && input.buffer[i + 1].type == TOK_AMP) { vec_token_push(&out, token_new(TOK_AND)); i++; diff --git a/parser/src/passes/fold_double_carret.c b/parser/src/passes/fold_double_carret.c index 6181bed2..62e8fb55 100644 --- a/parser/src/passes/fold_double_carret.c +++ b/parser/src/passes/fold_double_carret.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/04 19:09:30 by rparodi ### ########.fr */ +/* Updated: 2024/10/06 13:42:24 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -38,7 +38,7 @@ t_error ts_double_lcarret(t_vec_token input, t_vec_token *output) if (i + 1 >= input.len) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_LCARRET - && input.buffer[i + 1].type == TOK_LCARRET) + && input.buffer[i + 1].type == TOK_LCARRET) { vec_token_push(&out, token_new(TOK_DLCARRET)); i++; @@ -63,7 +63,7 @@ t_error ts_double_rcarret(t_vec_token input, t_vec_token *output) if (i + 1 >= input.len) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_RCARRET - && input.buffer[i + 1].type == TOK_RCARRET) + && input.buffer[i + 1].type == TOK_RCARRET) { vec_token_push(&out, token_new(TOK_DRCARRET)); i++; diff --git a/parser/src/passes/fold_double_paren.c b/parser/src/passes/fold_double_paren.c index f21b3c96..eba1b94d 100644 --- a/parser/src/passes/fold_double_paren.c +++ b/parser/src/passes/fold_double_paren.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:43:52 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:40:36 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -16,6 +16,15 @@ #include "me/vec/vec_token.h" #include "parser/token.h" +static void _fold_parens_helper(t_vec_token *v, enum e_token ty, t_const_str s) +{ + t_token tmp; + + tmp = token_new(ty); + string_push(&tmp.string, s); + vec_token_push(v, tmp); +} + /// This is a sample pass /// /// There is a few rules the rest of the tokenizer machinery assumes @@ -31,7 +40,6 @@ t_error ts_double_lparen(t_vec_token input, t_vec_token *output) { t_vec_token out; t_usize i; - t_token tmp; i = 0; out = vec_token_new(input.len, token_free); @@ -40,11 +48,9 @@ t_error ts_double_lparen(t_vec_token input, t_vec_token *output) if (i + 1 >= input.len) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_LPAREN - && input.buffer[i + 1].type == TOK_LPAREN) + && input.buffer[i + 1].type == TOK_LPAREN) { - tmp = token_new(TOK_DLPAREN); - string_push(&tmp.string, "(("); - vec_token_push(&out, tmp); + _fold_parens_helper(&out, TOK_DLPAREN, "(("); i++; } else @@ -59,7 +65,6 @@ t_error ts_double_rparen(t_vec_token input, t_vec_token *output) { t_vec_token out; t_usize i; - t_token tmp; i = 0; out = vec_token_new(input.len, token_free); @@ -68,11 +73,9 @@ t_error ts_double_rparen(t_vec_token input, t_vec_token *output) if (i + 1 >= input.len) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_RPAREN - && input.buffer[i + 1].type == TOK_RPAREN) + && input.buffer[i + 1].type == TOK_RPAREN) { - tmp = token_new(TOK_DRPAREN); - string_push(&tmp.string, "))"); - vec_token_push(&out, tmp); + _fold_parens_helper(&out, TOK_DRPAREN, "))"); i++; } else diff --git a/parser/src/passes/fold_double_pipe.c b/parser/src/passes/fold_double_pipe.c index bfd8abac..11aee6af 100644 --- a/parser/src/passes/fold_double_pipe.c +++ b/parser/src/passes/fold_double_pipe.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/04 19:08:48 by rparodi ### ########.fr */ +/* Updated: 2024/10/06 13:42:02 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -38,7 +38,7 @@ t_error ts_double_pipe(t_vec_token input, t_vec_token *output) if (i + 1 >= input.len) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_PIPE - && input.buffer[i + 1].type == TOK_PIPE) + && input.buffer[i + 1].type == TOK_PIPE) { vec_token_push(&out, token_new(TOK_OR)); i++; diff --git a/parser/src/passes/fold_expansion.c b/parser/src/passes/fold_expansion.c index 6c3127c1..fab3a124 100644 --- a/parser/src/passes/fold_expansion.c +++ b/parser/src/passes/fold_expansion.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 13:19:33 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:43:38 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -16,7 +16,7 @@ #include "parser/passes.h" #include "parser/token.h" -bool _can_be_varname(t_token *tok) +bool _can_be_varname(t_token *tok) { if (tok->type == TOK_DOLLAR) return (true); @@ -35,7 +35,7 @@ bool _can_be_varname(t_token *tok) /// thus it shouldn't be freed in case of error /// - the output tokens may not be direct copy of the input tokens, /// but need to be cloned (different allocations for stuff) -t_error ts_fold_expension(t_vec_token input, t_vec_token *output) +t_error ts_fold_expension(t_vec_token input, t_vec_token *output) { t_vec_token out; t_usize i; @@ -49,10 +49,10 @@ t_error ts_fold_expension(t_vec_token input, t_vec_token *output) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_DOLLAR \ && (input.buffer[i + 1].type == TOK_NQUOTE \ - || _can_be_varname(&input.buffer[i+1]))) + || _can_be_varname(&input.buffer[i + 1]))) { tmp = token_clone(&input.buffer[++i]); - tmp.type= TOK_EXPENSION; + tmp.type = TOK_EXPENSION; vec_token_push(&out, tmp); } else diff --git a/parser/src/passes/fold_no_quote.c b/parser/src/passes/fold_no_quote.c index 3ce3d698..dcda6ebc 100644 --- a/parser/src/passes/fold_no_quote.c +++ b/parser/src/passes/fold_no_quote.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:05:49 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:41:08 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -42,7 +42,8 @@ t_error ts_fold_no_quote(t_vec_token input, t_vec_token *output) { j = 0; tmp = token_new(TOK_NQUOTE); - while (i + j < input.len && token_is_noquote(input.buffer[i + j].type)) + while (i + j < input.len \ + && token_is_noquote(input.buffer[i + j].type)) string_push(&tmp.string, input.buffer[i + j++].string.buf); vec_token_push(&out, tmp); i += j; diff --git a/parser/src/passes/fold_redir.c b/parser/src/passes/fold_redir.c index 705a2a81..c53618db 100644 --- a/parser/src/passes/fold_redir.c +++ b/parser/src/passes/fold_redir.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:02:25 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:43:02 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,9 +15,12 @@ #include "me/vec/vec_token.h" #include "parser/token.h" -bool _is_token_carret(enum e_token ttype) +bool _is_token_carret(enum e_token ttype) { - return (ttype == TOK_LCARRET || ttype == TOK_DLCARRET || ttype == TOK_RCARRET || ttype == TOK_DRCARRET); + return (ttype == TOK_LCARRET \ + || ttype == TOK_DLCARRET \ + || ttype == TOK_RCARRET \ + || ttype == TOK_DRCARRET); } /// This is a sample pass @@ -41,14 +44,18 @@ t_error ts_fold_redir(t_vec_token input, t_vec_token *output) out = vec_token_new(input.len, token_free); while (i < input.len) { - if (vec_token_get(&input, i + 1) != NULL && _is_token_carret(vec_token_get(&input, i)->type) && vec_token_get(&input, i + 1)->type == TOK_WORD) + if (vec_token_get(&input, i + 1) != NULL \ + && _is_token_carret(vec_token_get(&input, i)->type) \ + && vec_token_get(&input, i + 1)->type == TOK_WORD) { tmp = token_new_meta(TOK_REDIR); - vec_token_push(&tmp.subtokens, token_clone(vec_token_get(&input, i++))); - vec_token_push(&tmp.subtokens, token_clone(vec_token_get(&input, i++))); + vec_token_push(&tmp.subtokens, \ + token_clone(vec_token_get(&input, i++))); + vec_token_push(&tmp.subtokens, \ + token_clone(vec_token_get(&input, i++))); vec_token_push(&out, tmp); } - else + else vec_token_push(&out, token_clone(&input.buffer[i++])); } vec_token_free(input); diff --git a/parser/src/passes/fold_whitespace.c b/parser/src/passes/fold_whitespace.c index c7c74ec4..f4bfde41 100644 --- a/parser/src/passes/fold_whitespace.c +++ b/parser/src/passes/fold_whitespace.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/04 18:32:00 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:34:58 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -38,7 +38,7 @@ t_error ts_fold_whitespace(t_vec_token input, t_vec_token *output) if (i + 1 >= input.len) vec_token_push(&out, token_clone(&input.buffer[i])); else if (input.buffer[i].type == TOK_WHITESPACE - && input.buffer[i + 1].type == TOK_WHITESPACE) + && input.buffer[i + 1].type == TOK_WHITESPACE) ; else vec_token_push(&out, token_clone(&input.buffer[i])); diff --git a/parser/src/passes/paren_to_nquote.c b/parser/src/passes/paren_to_nquote.c new file mode 100644 index 00000000..846272ea --- /dev/null +++ b/parser/src/passes/paren_to_nquote.c @@ -0,0 +1,58 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* paren_to_nquote.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ +/* Updated: 2024/10/06 13:37:07 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/string/string.h" +#include "parser/passes.h" +#include "me/types.h" +#include "me/vec/vec_token.h" +#include "parser/token.h" + +static void _paren_to_nquote_helper(t_vec_token *v, char c) +{ + t_token tmp; + + tmp = token_new(TOK_NQUOTE); + string_push_char(&tmp.string, c); + vec_token_push(v, tmp); +} + +/// This is a sample pass +/// +/// There is a few rules the rest of the tokenizer machinery assumes +/// theses function follows: +/// - the input vec WILL be freed when the function return, even in +/// case of error +/// - the output vector isn't populated if the function returns an error, +/// thus it shouldn't be freed in case of error +/// - the output tokens may not be direct copy of the input tokens, +/// but need to be cloned (different allocations for stuff) + +t_error ts_paren_to_noquote(t_vec_token input, t_vec_token *output) +{ + t_vec_token out; + t_usize i; + + i = 0; + out = vec_token_new(input.len, token_free); + while (i < input.len) + { + if (input.buffer[i].type == TOK_LPAREN) + _paren_to_nquote_helper(&out, '('); + else if (input.buffer[i].type == TOK_RPAREN) + _paren_to_nquote_helper(&out, ')'); + else + vec_token_push(&out, token_clone(&input.buffer[i])); + i++; + } + vec_token_free(input); + return (*output = out, NO_ERROR); +} diff --git a/parser/src/passes/split_double_paren.c b/parser/src/passes/split_double_paren.c new file mode 100644 index 00000000..4bddd9d8 --- /dev/null +++ b/parser/src/passes/split_double_paren.c @@ -0,0 +1,64 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* split_double_paren.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ +/* Updated: 2024/10/06 13:38:43 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/string/string.h" +#include "parser/passes.h" +#include "me/types.h" +#include "me/vec/vec_token.h" +#include "parser/token.h" + +static void _split_parens_helper(t_vec_token *v, enum e_token ty, char c) +{ + t_token tmp; + + tmp = token_new(ty); + string_push_char(&tmp.string, c); + vec_token_push(v, tmp); +} + +/// This is a sample pass +/// +/// There is a few rules the rest of the tokenizer machinery assumes +/// theses function follows: +/// - the input vec WILL be freed when the function return, even in +/// case of error +/// - the output vector isn't populated if the function returns an error, +/// thus it shouldn't be freed in case of error +/// - the output tokens may not be direct copy of the input tokens, +/// but need to be cloned (different allocations for stuff) + +t_error ts_split_paren(t_vec_token input, t_vec_token *output) +{ + t_vec_token out; + t_usize i; + + i = 0; + out = vec_token_new(input.len, token_free); + while (i < input.len) + { + if (input.buffer[i].type == TOK_DLPAREN) + { + _split_parens_helper(&out, TOK_LPAREN, '('); + _split_parens_helper(&out, TOK_LPAREN, '('); + } + else if (input.buffer[i].type == TOK_DRPAREN) + { + _split_parens_helper(&out, TOK_RPAREN, ')'); + _split_parens_helper(&out, TOK_RPAREN, ')'); + } + else + vec_token_push(&out, token_clone(&input.buffer[i])); + i++; + } + vec_token_free(input); + return (*output = out, NO_ERROR); +} diff --git a/parser/src/token_lifetime.c b/parser/src/token_lifetime.c index 6ab272e8..ee7eaccb 100644 --- a/parser/src/token_lifetime.c +++ b/parser/src/token_lifetime.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/28 14:37:13 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:48:09 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:33:34 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -14,7 +14,7 @@ #include "me/vec/vec_token.h" #include "parser/token.h" -void token_free(t_token tok) +void token_free(t_token tok) { if (tok.string.buf != NULL) string_free(tok.string); @@ -22,105 +22,25 @@ void token_free(t_token tok) vec_token_free(tok.subtokens); } -t_token token_new(enum e_token type) +t_token token_new(enum e_token type) { - return ((t_token){.type = type, .string = string_new(16), .subtokens = {NULL, 0, 0, NULL}}); + return ((t_token){.type = type, .string = string_new(16), \ + .subtokens = {NULL, 0, 0, NULL}}); } -t_token token_new_meta(enum e_token type) +t_token token_new_meta(enum e_token type) { - return ((t_token){.type = type, .string = {NULL, 0, 0}, .subtokens = vec_token_new(16, token_free)}); + return ((t_token){.type = type, .string = {NULL, 0, 0}, \ + .subtokens = vec_token_new(16, token_free)}); } -bool token_is_meta(t_token tok) +bool token_is_meta(t_token tok) { return (tok.subtokens.buffer != NULL); } -t_token token_new_none(void) +t_token token_new_none(void) { - return ((t_token){.type = TOK_NONE, .string = {NULL, 0, 0}, .subtokens = {NULL, 0, 0, NULL}}); -} - -t_token token_clone(t_token *tok) -{ - t_token out; - t_usize i; - - out = token_new_none(); - out.type = tok->type; - if (tok->string.buf != NULL) - { - out.string = string_new(tok->string.capacity); - string_push(&out.string, tok->string.buf); - } - if (tok->subtokens.buffer != NULL) - { - out.subtokens = vec_token_new(tok->subtokens.capacity, token_free); - i = 0; - while (i < tok->subtokens.len) - vec_token_push(&out.subtokens, token_clone(&tok->subtokens.buffer[i++])); - } - return (out); -} - -bool token_is_noquote(enum e_token ttype) -{ - return (ttype == TOK_NQUOTE \ - || ttype == TOK_DOLLAR \ - || ttype == TOK_NALPHANUM \ - // false - //|| ttype == TOK_LPAREN \n - //|| ttype == TOK_RPAREN \n - //|| ttype == TOK_DLPAREN \n - //|| ttype == TOK_DRPAREN -); -} - -// TO REMOVE -t_str token_name(t_token *token) -{ - if (token->type == TOK_NONE) - return ("NONE"); - if (token->type == TOK_AMP) - return ("AMP"); - if (token->type == TOK_AND) - return ("AND"); - if (token->type == TOK_CARRET) - return ("CARRET"); - if (token->type == TOK_DLCARRET) - return ("DLCARRET"); - if (token->type == TOK_DOLLAR) - return ("DOLLAR"); - if (token->type == TOK_DQUOTE) - return ("DQUOTE"); - if (token->type == TOK_DRCARRET) - return ("DRCARRET"); - if (token->type == TOK_EXPENSION) - return ("EXPENSION"); - if (token->type == TOK_LCARRET) - return ("LCARRET"); - if (token->type == TOK_LPAREN) - return ("LPAREN"); - if (token->type == TOK_NQUOTE) - return ("NQUOTE"); - if (token->type == TOK_OR) - return ("OR"); - if (token->type == TOK_PIPE) - return ("PIPE"); - if (token->type == TOK_RCARRET) - return ("RCARRET"); - if (token->type == TOK_RPAREN) - return ("RPAREN"); - if (token->type == TOK_SEMICOLON) - return ("SEMICOLON"); - if (token->type == TOK_SQUOTE) - return ("SQUOTE"); - if (token->type == TOK_WHITESPACE) - return ("WHITESPACE"); - if (token->type == TOK_WORD) - return ("WORD"); - if (token->type == TOK_NALPHANUM) - return ("NALPHANUM"); - return (NULL); + return ((t_token){.type = TOK_NONE, .string = {NULL, 0, 0}, \ + .subtokens = {NULL, 0, 0, NULL}}); } diff --git a/parser/src/token_name.c b/parser/src/token_name.c new file mode 100644 index 00000000..1735892b --- /dev/null +++ b/parser/src/token_name.c @@ -0,0 +1,66 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* token_name.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/10/06 13:32:28 by maiboyer #+# #+# */ +/* Updated: 2024/10/06 13:32:39 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/types.h" +#include "parser/token.h" + +// TO REMOVE +t_str token_name(t_token *token) +{ + if (token->type == TOK_NONE) + return ("NONE"); + if (token->type == TOK_AMP) + return ("AMP"); + if (token->type == TOK_AND) + return ("AND"); + if (token->type == TOK_CARRET) + return ("CARRET"); + if (token->type == TOK_DLCARRET) + return ("DLCARRET"); + if (token->type == TOK_DOLLAR) + return ("DOLLAR"); + if (token->type == TOK_DQUOTE) + return ("DQUOTE"); + if (token->type == TOK_DRCARRET) + return ("DRCARRET"); + if (token->type == TOK_EXPENSION) + return ("EXPENSION"); + if (token->type == TOK_LCARRET) + return ("LCARRET"); + if (token->type == TOK_LPAREN) + return ("LPAREN"); + if (token->type == TOK_NQUOTE) + return ("NQUOTE"); + if (token->type == TOK_OR) + return ("OR"); + if (token->type == TOK_PIPE) + return ("PIPE"); + if (token->type == TOK_RCARRET) + return ("RCARRET"); + if (token->type == TOK_RPAREN) + return ("RPAREN"); + if (token->type == TOK_SEMICOLON) + return ("SEMICOLON"); + if (token->type == TOK_SQUOTE) + return ("SQUOTE"); + if (token->type == TOK_WHITESPACE) + return ("WHITESPACE"); + if (token->type == TOK_WORD) + return ("WORD"); + if (token->type == TOK_NALPHANUM) + return ("NALPHANUM"); + if (token->type == TOK_DLPAREN) + return ("DLPAREN"); + if (token->type == TOK_DRPAREN) + return ("DRPAREN"); + return (NULL); +} diff --git a/parser/src/token_utils.c b/parser/src/token_utils.c new file mode 100644 index 00000000..3978d910 --- /dev/null +++ b/parser/src/token_utils.c @@ -0,0 +1,44 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* token_utils.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/10/06 13:33:12 by maiboyer #+# #+# */ +/* Updated: 2024/10/06 13:33:48 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/types.h" +#include "parser/token.h" + +t_token token_clone(t_token *tok) +{ + t_token out; + t_usize i; + + out = token_new_none(); + out.type = tok->type; + if (tok->string.buf != NULL) + { + out.string = string_new(tok->string.capacity); + string_push(&out.string, tok->string.buf); + } + if (tok->subtokens.buffer != NULL) + { + out.subtokens = vec_token_new(tok->subtokens.capacity, token_free); + i = 0; + while (i < tok->subtokens.len) + vec_token_push(&out.subtokens, \ + token_clone(&tok->subtokens.buffer[i++])); + } + return (out); +} + +bool token_is_noquote(enum e_token ttype) +{ + return (ttype == TOK_NQUOTE \ + || ttype == TOK_DOLLAR \ + || ttype == TOK_NALPHANUM); +} diff --git a/parser/src/tokenizer.c b/parser/src/tokenizer.c index 80d6304d..4d7f63bc 100644 --- a/parser/src/tokenizer.c +++ b/parser/src/tokenizer.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/30 19:39:39 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 13:02:28 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:51:41 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -38,9 +38,8 @@ static void handle_quote(t_vec_token *ret, char chr, t_token *tok, char *quote) string_push_char(&tok->string, chr); } -static void handle_noquote(t_vec_token *ret, char chr, t_token *tok, char *quote) +static bool _handle_singlechr(t_vec_token *ret, char chr, t_token *tok) { - *quote = '\0'; if (chr == '$') push_token_and_create_new(ret, tok, TOK_DOLLAR, "$"); else if (chr == '>') @@ -62,6 +61,14 @@ static void handle_noquote(t_vec_token *ret, char chr, t_token *tok, char *quote else if (!(me_isalnum(chr) || chr == '_')) push_token_and_create_new_chr(ret, tok, TOK_NALPHANUM, chr); else + return (false); + return (true); +} + +static void handle_noquote(t_vec_token *ret, char chr, t_token *tok, char *q) +{ + *q = '\0'; + if (!_handle_singlechr(ret, chr, tok)) { if (tok->type == TOK_NONE) *tok = token_new(TOK_NQUOTE); @@ -69,20 +76,20 @@ static void handle_noquote(t_vec_token *ret, char chr, t_token *tok, char *quote } } -static void tokenize_inner(t_vec_token *ret, char chr, t_token *tok, char *quote) +static void tokenize_inner(t_vec_token *ret, char chr, t_token *tok, char *q) { - if (*quote == '\0') + if (*q == '\0') { - *quote = chr; + *q = chr; if (chr == '\"') push_token_and_set_new(ret, tok, TOK_DQUOTE, ""); else if (chr == '\'') push_token_and_set_new(ret, tok, TOK_SQUOTE, ""); else - handle_noquote(ret, chr, tok, quote); + handle_noquote(ret, chr, tok, q); } - else if (*quote == '\'' || *quote == '\"') - handle_quote(ret, chr, tok, quote); + else if (*q == '\'' || *q == '\"') + handle_quote(ret, chr, tok, q); else me_abort("invalid quote type"); } diff --git a/parser/src/tokenizer_utils.c b/parser/src/tokenizer_utils.c index 8d42583b..dd48b666 100644 --- a/parser/src/tokenizer_utils.c +++ b/parser/src/tokenizer_utils.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/03 22:07:25 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 13:00:52 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:25:32 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -52,19 +52,19 @@ void push_token_and_set_new(\ void push_token_and_create_new_chr(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, char c) { - char tmp[2]; + char tmp[2]; tmp[0] = c; tmp[1] = '\0'; - push_token_and_create_new(tokens, tok, ttype, (t_const_str)&tmp); + push_token_and_create_new(tokens, tok, ttype, (t_const_str) & tmp); } void push_token_and_set_new_chr(\ t_vec_token *tokens, t_token *tok, enum e_token ttype, char c) { - char tmp[2]; + char tmp[2]; tmp[0] = c; tmp[1] = '\0'; - push_token_and_set_new(tokens, tok, ttype, (t_const_str)&tmp); + push_token_and_set_new(tokens, tok, ttype, (t_const_str) & tmp); } diff --git a/parser/src/ts_print.c b/parser/src/ts_print.c index ff5bcf8f..149cdea8 100644 --- a/parser/src/ts_print.c +++ b/parser/src/ts_print.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/05 18:51:50 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:53:18 by maiboyer ### ########.fr */ +/* Updated: 2024/10/06 13:31:38 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -16,7 +16,8 @@ #include #include "app/colors.h" -t_str token_name(t_token *out); +t_str token_name(t_token *out); + static void _print_ts_inner(t_usize i, t_token *token, void *vdepth) { t_usize depth; @@ -29,20 +30,20 @@ static void _print_ts_inner(t_usize i, t_token *token, void *vdepth) i = 0; while (i++ < depth) string_push_char(&sdepth, '\t'); - if (token->subtokens.buffer != NULL) { depth++; - printf("%s[" COL_GREEN "%10s"RESET"]\n", sdepth.buf ,token_name(token)); + printf("%s[" COL_GREEN "%10s" RESET "]\n", \ + sdepth.buf, token_name(token)); vec_token_iter(&token->subtokens, _print_ts_inner, &depth); } else - printf("%s[" COL_GREEN "%10s"RESET"] '"COL_YELLOW"%s"RESET"'\n",\ - sdepth.buf ,token_name(token), token->string.buf); + printf("%s[" COL_GREEN "%10s" RESET "] '" COL_YELLOW "%s" RESET "'\n", \ + sdepth.buf, token_name(token), token->string.buf); string_free(sdepth); } -void ts_print(t_vec_token *ts) +void ts_print(t_vec_token *ts) { vec_token_iter(ts, _print_ts_inner, NULL); }