From 71d9a201b52feb0f5887c68d8960bd9d3ef38d89 Mon Sep 17 00:00:00 2001 From: maix0 Date: Sat, 5 Oct 2024 18:58:42 +0200 Subject: [PATCH] update: added debug print of ts after every pass --- parser/Filelist.parser.mk | 2 + parser/include/parser/passes.h | 6 +- parser/include/parser/token.h | 12 ++-- parser/src/passes.c | 10 ++- parser/src/passes/double_quote_parsing.c | 8 ++- parser/src/passes/fold_double_paren.c | 84 ++++++++++++++++++++++++ parser/src/token_lifetime.c | 10 ++- parser/src/ts_print.c | 48 ++++++++++++++ sources/main.c | 31 +-------- 9 files changed, 172 insertions(+), 39 deletions(-) create mode 100644 parser/src/passes/fold_double_paren.c create mode 100644 parser/src/ts_print.c diff --git a/parser/Filelist.parser.mk b/parser/Filelist.parser.mk index 361539a3..615649bd 100644 --- a/parser/Filelist.parser.mk +++ b/parser/Filelist.parser.mk @@ -3,6 +3,7 @@ passes \ passes/double_quote_parsing \ passes/fold_double_amp \ passes/fold_double_carret \ +passes/fold_double_paren \ passes/fold_double_pipe \ passes/fold_expansion \ passes/fold_no_quote \ @@ -12,6 +13,7 @@ passes/template_file \ token_lifetime \ tokenizer \ tokenizer_utils \ +ts_print \ GEN_FILES = \ \ diff --git a/parser/include/parser/passes.h b/parser/include/parser/passes.h index cd166ae9..4885ad96 100644 --- a/parser/include/parser/passes.h +++ b/parser/include/parser/passes.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 18:43:41 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:03:54 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 18:42:17 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -37,14 +37,16 @@ t_error ts_dq_apply_passes(t_vec_token ts, t_vec_token *out); // this is a example one, does absolutly nothing lol t_error ts_double_amp(t_vec_token input, t_vec_token *output); t_error ts_double_lcarret(t_vec_token input, t_vec_token *output); +t_error ts_double_lparen(t_vec_token input, t_vec_token *output); t_error ts_double_pipe(t_vec_token input, t_vec_token *output); t_error ts_double_rcarret(t_vec_token input, t_vec_token *output); +t_error ts_double_rparen(t_vec_token input, t_vec_token *output); t_error ts_double_string_pass(t_vec_token input, t_vec_token *output); t_error ts_fold_no_quote(t_vec_token input, t_vec_token *output); t_error ts_fold_whitespace(t_vec_token input, t_vec_token *output); t_error ts_do_fuck_all(t_vec_token input, t_vec_token *output); -t_error ts_fold_redir(t_vec_token input, t_vec_token *output); t_error ts_fold_expension(t_vec_token input, t_vec_token *output); +t_error ts_fold_redir(t_vec_token input, t_vec_token *output); #endif /* PASSES_H */ diff --git a/parser/include/parser/token.h b/parser/include/parser/token.h index d6a4aa73..dbe163cd 100644 --- a/parser/include/parser/token.h +++ b/parser/include/parser/token.h @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/26 17:59:23 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:02:03 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 18:54:13 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -18,28 +18,31 @@ enum e_token { - TOK_NONE, // NO TOKEN TYPE == INVALID / INEXISTANT TOKEN + TOK_AEXP, // a meta token, artih expansion TOK_AMP, // ampersand == & TOK_AND, // and == && TOK_CARRET, // any carret == < > << >> TOK_DLCARRET, // double left carret == << + TOK_DLPAREN, // double left parenthesis '((' TOK_DOLLAR, // dollar == $ TOK_DQUOTE, // double quote string TOK_DRCARRET, // double right carret == >> + TOK_DRPAREN, // double right parenthesis '))' TOK_EXPENSION, // an expension == $; the $ is not in .string TOK_LCARRET, // left carret == < TOK_LPAREN, // left parenthesis == ( + TOK_NALPHANUM, // a non alphanumeric character, used in the expansion folding, then folded back into NQUOTE + TOK_NONE, // NO TOKEN TYPE == INVALID / INEXISTANT TOKEN TOK_NQUOTE, // no quote string TOK_OR, // or == || TOK_PIPE, // pipe == | TOK_RCARRET, // right carret == > + TOK_REDIR, // a meta token, which contains being an [D](L|R)CARRET and the arg being a WORD TOK_RPAREN, // right parenthesis == ) TOK_SEMICOLON, // semicolor == ; TOK_SQUOTE, // single quote string TOK_WHITESPACE, // whitespace outside of quoted strings - TOK_NALPHANUM, // a non alphanumeric character, used in the expansion folding, then folded back into NQUOTE TOK_WORD, // a meta token, which contains subtokens - TOK_REDIR, // a meta token, which contains being an [D](L|R)CARRET and the arg being a WORD }; typedef struct s_token @@ -61,6 +64,7 @@ void token_free(t_token tok); bool token_is_meta(t_token tok); bool token_is_noquote(enum e_token tok); +void ts_print(t_vec_token *ts); /* PARSING */ t_error tokenize(t_const_str s, t_vec_token *out); diff --git a/parser/src/passes.c b/parser/src/passes.c index adea93f2..3cebe65b 100644 --- a/parser/src/passes.c +++ b/parser/src/passes.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 18:41:16 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 18:03:39 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 18:57:59 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -40,8 +40,11 @@ static const struct s_ts_pass_def g_ts_passes[] = {\ {ts_fold_whitespace, "fold whitespace"}, {ts_double_amp, "double amp => and"}, {ts_double_pipe, "double pipe => or"}, + {ts_double_lparen, "double lparen => dlparen"}, + {ts_double_rparen, "double rparen => drparen"}, {ts_double_lcarret, "double lcarret => dlcarret"}, {ts_double_rcarret, "double rcarrer => drcarret"}, + // there should be an ts_fold_arith here {ts_fold_redir, "fold redir+argument"}, }; @@ -60,13 +63,17 @@ t_error ts_apply_passes(t_vec_token ts, t_vec_token *out) else me_printf("Applied '%s' pass\n", g_ts_passes[i].name); ts = next; + ts_print(&ts); i++; } return (*out = ts, NO_ERROR); } static const struct s_ts_pass_def g_ts_dq_passes[] = {\ + {ts_double_lparen, "double lparen => dlparen"}, + {ts_double_rparen, "double rparen => drparen"}, {ts_fold_expension, "fold expansion"}, + // there should be an ts_fold_arith here {ts_fold_no_quote, "fold no quote"}, }; @@ -86,6 +93,7 @@ t_error ts_dq_apply_passes(t_vec_token ts, t_vec_token *out) else me_printf("Applied '%s' dq_pass\n", g_ts_dq_passes[i].name); ts = next; + ts_print(&ts); i++; } return (*out = ts, NO_ERROR); diff --git a/parser/src/passes/double_quote_parsing.c b/parser/src/passes/double_quote_parsing.c index a6fd5190..aa248411 100644 --- a/parser/src/passes/double_quote_parsing.c +++ b/parser/src/passes/double_quote_parsing.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 13:06:17 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 18:56:12 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -50,8 +50,12 @@ t_error _parse_dquote_inner(t_token dquote, t_vec_token *append) } string_push_char(&ctok.string, c); } - else if ('$') + else if (c == '$') push_token_and_create_new(&out.subtokens, &ctok, TOK_DOLLAR, "$"); + else if (c == '(') + push_token_and_create_new(&out.subtokens, &ctok, TOK_LPAREN, "("); + else if (c == ')') + push_token_and_create_new(&out.subtokens, &ctok, TOK_RPAREN, ")"); else if (!(me_isalnum(c) || c == '_')) push_token_and_create_new_chr(&out.subtokens, &ctok, TOK_NALPHANUM, c); else diff --git a/parser/src/passes/fold_double_paren.c b/parser/src/passes/fold_double_paren.c new file mode 100644 index 00000000..f21b3c96 --- /dev/null +++ b/parser/src/passes/fold_double_paren.c @@ -0,0 +1,84 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* fold_double_paren.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/10/02 19:04:32 by maiboyer #+# #+# */ +/* Updated: 2024/10/05 18:43:52 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/string/string.h" +#include "parser/passes.h" +#include "me/types.h" +#include "me/vec/vec_token.h" +#include "parser/token.h" + +/// This is a sample pass +/// +/// There is a few rules the rest of the tokenizer machinery assumes +/// theses function follows: +/// - the input vec WILL be freed when the function return, even in +/// case of error +/// - the output vector isn't populated if the function returns an error, +/// thus it shouldn't be freed in case of error +/// - the output tokens may not be direct copy of the input tokens, +/// but need to be cloned (different allocations for stuff) + +t_error ts_double_lparen(t_vec_token input, t_vec_token *output) +{ + t_vec_token out; + t_usize i; + t_token tmp; + + i = 0; + out = vec_token_new(input.len, token_free); + while (i < input.len) + { + if (i + 1 >= input.len) + vec_token_push(&out, token_clone(&input.buffer[i])); + else if (input.buffer[i].type == TOK_LPAREN + && input.buffer[i + 1].type == TOK_LPAREN) + { + tmp = token_new(TOK_DLPAREN); + string_push(&tmp.string, "(("); + vec_token_push(&out, tmp); + i++; + } + else + vec_token_push(&out, token_clone(&input.buffer[i])); + i++; + } + vec_token_free(input); + return (*output = out, NO_ERROR); +} + +t_error ts_double_rparen(t_vec_token input, t_vec_token *output) +{ + t_vec_token out; + t_usize i; + t_token tmp; + + i = 0; + out = vec_token_new(input.len, token_free); + while (i < input.len) + { + if (i + 1 >= input.len) + vec_token_push(&out, token_clone(&input.buffer[i])); + else if (input.buffer[i].type == TOK_RPAREN + && input.buffer[i + 1].type == TOK_RPAREN) + { + tmp = token_new(TOK_DRPAREN); + string_push(&tmp.string, "))"); + vec_token_push(&out, tmp); + i++; + } + else + vec_token_push(&out, token_clone(&input.buffer[i])); + i++; + } + vec_token_free(input); + return (*output = out, NO_ERROR); +} diff --git a/parser/src/token_lifetime.c b/parser/src/token_lifetime.c index 3525de9a..6ab272e8 100644 --- a/parser/src/token_lifetime.c +++ b/parser/src/token_lifetime.c @@ -6,7 +6,7 @@ /* By: maiboyer +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/09/28 14:37:13 by maiboyer #+# #+# */ -/* Updated: 2024/10/05 13:13:16 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 18:48:09 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -68,7 +68,13 @@ bool token_is_noquote(enum e_token ttype) { return (ttype == TOK_NQUOTE \ || ttype == TOK_DOLLAR \ - || ttype == TOK_NALPHANUM); + || ttype == TOK_NALPHANUM \ + // false + //|| ttype == TOK_LPAREN \n + //|| ttype == TOK_RPAREN \n + //|| ttype == TOK_DLPAREN \n + //|| ttype == TOK_DRPAREN +); } // TO REMOVE diff --git a/parser/src/ts_print.c b/parser/src/ts_print.c new file mode 100644 index 00000000..ff5bcf8f --- /dev/null +++ b/parser/src/ts_print.c @@ -0,0 +1,48 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ts_print.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: maiboyer +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/10/05 18:51:50 by maiboyer #+# #+# */ +/* Updated: 2024/10/05 18:53:18 by maiboyer ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "me/types.h" +#include "me/vec/vec_token.h" +#include "parser/token.h" +#include +#include "app/colors.h" + +t_str token_name(t_token *out); +static void _print_ts_inner(t_usize i, t_token *token, void *vdepth) +{ + t_usize depth; + t_string sdepth; + + depth = 0; + if (vdepth != NULL) + depth = *(t_usize *)vdepth; + sdepth = string_new(16); + i = 0; + while (i++ < depth) + string_push_char(&sdepth, '\t'); + + if (token->subtokens.buffer != NULL) + { + depth++; + printf("%s[" COL_GREEN "%10s"RESET"]\n", sdepth.buf ,token_name(token)); + vec_token_iter(&token->subtokens, _print_ts_inner, &depth); + } + else + printf("%s[" COL_GREEN "%10s"RESET"] '"COL_YELLOW"%s"RESET"'\n",\ + sdepth.buf ,token_name(token), token->string.buf); + string_free(sdepth); +} + +void ts_print(t_vec_token *ts) +{ + vec_token_iter(ts, _print_ts_inner, NULL); +} diff --git a/sources/main.c b/sources/main.c index e3445378..b78c827e 100644 --- a/sources/main.c +++ b/sources/main.c @@ -6,7 +6,7 @@ /* By: rparodi +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */ -/* Updated: 2024/10/03 22:47:05 by maiboyer ### ########.fr */ +/* Updated: 2024/10/05 18:54:45 by maiboyer ### ########.fr */ /* */ /* ************************************************************************** */ @@ -101,32 +101,6 @@ void print_node_data(t_node *t, t_usize depth) } */ -t_str token_name(t_token *out); -void func(t_usize i, t_token *token, void *vdepth) -{ - t_usize depth; - t_string sdepth; - - depth = 0; - if (vdepth != NULL) - depth = *(t_usize *)vdepth; - sdepth = string_new(16); - i = 0; - while (i++ < depth) - string_push_char(&sdepth, '\t'); - - if (token->subtokens.buffer != NULL) - { - depth++; - printf("%s[" COL_GREEN "%10s"RESET"]\n", sdepth.buf ,token_name(token)); - vec_token_iter(&token->subtokens, func, &depth); - } - else - printf("%s[" COL_GREEN "%10s"RESET"] '"COL_YELLOW"%s"RESET"'\n",\ - sdepth.buf ,token_name(token), token->string.buf); - string_free(sdepth); -} - void parse_str(t_state *state) { t_vec_token tokens; @@ -134,7 +108,8 @@ void parse_str(t_state *state) return ; if (ts_apply_passes(tokens, &tokens)) return ; - vec_token_iter(&tokens, func, NULL); + printf("\n\nEND TOKENS\n"); + ts_print(&tokens); vec_token_free(tokens); }