update: base of tokenizer

2024-09-30 20:25:03 +02:00 · 2024-09-30 20:25:03 +02:00 · b5556b9063
commit b5556b9063
parent 24d8bf5fc9
10 changed files with 215 additions and 219 deletions
--- a/parser/Filelist.parser.mk
+++ b/parser/Filelist.parser.mk
@ -1,8 +1,6 @@
 SRC_FILES =                                                                   \
 dollar                                                                        \
 quotes                                                                        \
 token                                                                         \
 token_lifetime                                                                \
 tokenizer                                                                     \
 GEN_FILES =                                                                   \
                                                                              \
--- a/parser/include/parser/token.h
+++ b/parser/include/parser/token.h
@ -6,7 +6,7 @@
 /*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/26 17:59:23 by maiboyer          #+#    #+#             */
-/*   Updated: 2024/09/29 13:30:06 by rparodi          ###   ########.fr       */
+/*   Updated: 2024/09/30 19:47:53 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
@ -18,25 +18,26 @@
 enum e_token
 {
-	AMP,		// ampersand == &
+	TOK_NONE,		// NO TOKEN TYPE == INVALID / INEXISTANT TOKEN
-	AND,		// and == &&
+	TOK_AMP,		// ampersand == &
-	CARRET,		// any carret == < > << >>
+	TOK_AND,		// and == &&
-	DLCARRET,	// double left carret == <<
+	TOK_CARRET,		// any carret == < > << >>
-	DOLLAR,		// dollar == $
+	TOK_DLCARRET,	// double left carret == <<
-	DQUOTE,		// double quote string
+	TOK_DOLLAR,		// dollar == $
-	DRCARRET,	// double right carret == >>
+	TOK_DQUOTE,		// double quote string
-	EXPENSION,	// an expension == $<no_quote_word>
+	TOK_DRCARRET,	// double right carret == >>
-	LCARRET,	// left carret == <
+	TOK_EXPENSION,	// an expension == $<no_quote_word>
-	LPAREN,		// left parenthesis == (
+	TOK_LCARRET,	// left carret == <
-	NQUOTE,		// no quote string
+	TOK_LPAREN,		// left parenthesis == (
-	OR,			// or == ||
+	TOK_NQUOTE,		// no quote string
-	PIPE,		// pipe == |
+	TOK_OR,			// or == ||
-	RCARRET,	// right carret == >
+	TOK_PIPE,		// pipe == |
-	RPAREN,		// right parenthesis == )
+	TOK_RCARRET,	// right carret == >
-	SEMICOLON,	// semicolor == ;
+	TOK_RPAREN,		// right parenthesis == )
-	SQUOTE,		// single quote string
+	TOK_SEMICOLON,	// semicolor == ;
-	WHITESPACE, // whitespace outside of quoted strings
+	TOK_SQUOTE,		// single quote string
-	WORD,		// a meta token, which contains subtokens
+	TOK_WHITESPACE, // whitespace outside of quoted strings
 	TOK_WORD,		// a meta token, which contains subtokens
 };
 typedef struct s_token
@ -51,6 +52,7 @@ typedef struct s_token
 t_token	token_new_meta(enum e_token type);
 // This create a "simple" token consisting of a string
 t_token	token_new(enum e_token type);
 t_token	token_new_none(void);
 void	token_free(t_token tok);
 bool	token_is_meta(t_token tok);
--- a/parser/src/dollar.c
+++ b/parser/src/dollar.c
@ -1,36 +0,0 @@
 /* ************************************************************************** */
 /*                                                                            */
 /*                                                        :::      ::::::::   */
 /*   dollar.c                                           :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
 /*   By: rparodi <rparodi@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/27 22:18:46 by rparodi           #+#    #+#             */
 /*   Updated: 2024/09/28 14:50:56 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 #include "me/vec/vec_token.h"
 #include "parser/token.h"
 #include "me/string/string.h"
 #include "me/types.h"
 #include <stdio.h>
 // MAIX: C'est necessaire de split ca dans une fonction a par vu que ca retourne
 //		la valeur de la comparaion ?
 //		Ca serai pas mieux de faire une fonction du genre 
 //		"bool create_single_char_token(char c, t_token *tok)" qui cree un token
 //		dans `tok` et retourne true si il a match un char qui correspond 
 //		a un token specific (genre $ ou parentheses par example)
 /**
 * @brief boolean function that's say if it's a dollar or not
 *
 * @param c character will be checked
 * @return true if it's dollar, if not return false
 */
 bool	is_dollar(char c)
 {
 	if (c == '$')
 		return (true);
 	return (false);
 }
--- a/parser/src/parentheses.c
+++ b/parser/src/parentheses.c
@ -1,29 +0,0 @@
 /* ************************************************************************** */
 /*                                                                            */
 /*                                                        :::      ::::::::   */
 /*   parentheses.c                                      :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
 /*   By: rparodi <rparodi@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/30 12:25:22 by rparodi           #+#    #+#             */
 /*   Updated: 2024/09/30 12:28:26 by rparodi          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 #include "me/vec/vec_token.h"
 #include "parser/token.h"
 #include "me/string/string.h"
 #include "me/types.h"
 #include <stdio.h>
 char	is_parentheses(char c, char next)
 {
 	if (c == '(')
 	{
 		if (next == '(')
 			return (2);
 		else
 			return (1);
 	}
 	return (0);
 }
--- a/parser/src/quotes.c
+++ b/parser/src/quotes.c
@ -1,59 +0,0 @@
 /* ************************************************************************** */
 /*                                                                            */
 /*                                                        :::      ::::::::   */
 /*   quotes.c                                           :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
 /*   By: rparodi <rparodi@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/27 11:46:45 by rparodi           #+#    #+#             */
 /*   Updated: 2024/09/30 12:28:26 by rparodi          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 #include "me/vec/vec_token.h"
 #include "parser/token.h"
 #include "me/string/string.h"
 #include "me/types.h"
 #include <stdio.h>
 /**
 * @brief boolean function that's say if it's a quote or not
 *
 * @param c character will be checked
 * @return true if it's quote, if not return false
 */
 bool	is_quote(char c)
 {
 	if (c == '"' || c == '\'')
 		return (true);
 	return (false);
 }
 /**
 * @brief token function that's read the string and return the tokens
 *
 * @param raw the input from stdin
 * @param start the index where the first quote was found
 * @param output the token of the string
 * @return Check if there is an error on this function
 */
 t_error	find_end_string(t_const_str raw, t_usize *start, t_token *output)
 {
 	if (!raw || !output)
 		return (ERROR);
 	if (is_quote(raw[(*start)]))
 	{
 		string_push_char(&output->string, raw[(*start)]);
 		(*start)++;
 		if (raw[(*start)] == '\0')
 			return (ERROR);
 		while (raw[(*start)] != '\0')
 		{
 			string_push_char(&output->string, raw[(*start)]);
 			if (is_quote(raw[(*start)]))
 				return (NO_ERROR);
 			(*start)++;
 		}
 	}
 	return (ERROR);
 }
--- a/parser/src/token.c
+++ b/parser/src/token.c
@ -1,59 +0,0 @@
 /* ************************************************************************** */
 /*                                                                            */
 /*                                                        :::      ::::::::   */
 /*   token.c                                            :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
 /*   By: rparodi <rparodi@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/25 16:27:03 by rparodi           #+#    #+#             */
 /*   Updated: 2024/09/29 15:24:11 by rparodi          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 #include "parser/token.h"
 #include "me/string/string.h"
 #include "me/types.h"
 #include "me/char/char.h"
 #include "me/vec/vec_token.h"
 #include <stdio.h>
 #include <stdbool.h>
 #include "me/mem/mem.h"
 // MAIX: tu peux faire un token par character "whitespace", vu qu'on va 
 // manipuler la list de token apres pour faire des truc plus simple a process 
 // on se debrouillera pour plus avoir plein de token whitespace :)
 t_error	start_analyse(t_const_str raw, t_vec_token *output)
 {
 	t_usize	i;
 	t_token	token;
 	if (!raw || !output)
 		return (ERROR);
 	i = 0;
 	while (raw[i] != '\0')
 	{
 		if (me_isspace(raw[i]))
 			token = token_new(WHITESPACE);
 		if (is_quote(raw[i]))
 			find_end_string(raw, &i, &token);
 		if (is_dollar(raw[i]))
 			token = token_new(DOLLAR);
 		vec_token_push(output, token);
 		i++;
 	}
 	return (NO_ERROR);
 }
 // MAIX: attention tu ne fais rien avec le vec_token ici :D
 //		aussi l'argument list est censer faire quoi ? 
 //		c'est un reste d'une version ancienne ?
 t_error	tokeniser(t_const_str raw)
 {
 	t_vec_token	output;
 	if (!raw)
 		return (ERROR);
 	output = vec_token_new(16, NULL);
 	start_analyse(raw, &output);
 	return (NO_ERROR);
 }
--- a/parser/src/token_lifetime.c
+++ b/parser/src/token_lifetime.c
@ -6,7 +6,7 @@
 /*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/28 14:37:13 by maiboyer          #+#    #+#             */
-/*   Updated: 2024/09/28 15:24:06 by rparodi          ###   ########.fr       */
+/*   Updated: 2024/09/30 20:15:05 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
@ -23,19 +23,68 @@ void	token_free(t_token tok)
 t_token token_new(enum e_token type)
 {
-	return ((t_token){.type = type, .string = string_new(16), \
+	return ((t_token){.type = type, .string = string_new(16), .subtokens = {NULL, 0, 0, NULL}});
 		.subtokens = {NULL, 0, 0, NULL}});
 }
 t_token token_new_meta(enum e_token type)
 {
-	return ((t_token){.type = type, .string = {NULL, 0, 0}, \
+	return ((t_token){.type = type, .string = {NULL, 0, 0}, .subtokens = vec_token_new(16, token_free)});
 		.subtokens = vec_token_new(16, token_free)});
 }
 bool token_is_meta(t_token tok)
 {
-	if (tok.type == WORD)
+	if (tok.type == TOK_WORD)
 		return (true);
 	return (false);
 }
 t_token token_new_none(void)
 {
 	return ((t_token){.type = TOK_NONE, .string = {NULL, 0, 0}, .subtokens = vec_token_new(16, token_free)});
 }
 // TO REMOVE
 t_str token_name(t_token *token)
 {
 	if (token->type == TOK_NONE)
 		return ("NONE");
 	if (token->type == TOK_AMP)
 		return ("AMP");
 	if (token->type == TOK_AND)
 		return ("AND");
 	if (token->type == TOK_CARRET)
 		return ("CARRET");
 	if (token->type == TOK_DLCARRET)
 		return ("DLCARRET");
 	if (token->type == TOK_DOLLAR)
 		return ("DOLLAR");
 	if (token->type == TOK_DQUOTE)
 		return ("DQUOTE");
 	if (token->type == TOK_DRCARRET)
 		return ("DRCARRET");
 	if (token->type == TOK_EXPENSION)
 		return ("EXPENSION");
 	if (token->type == TOK_LCARRET)
 		return ("LCARRET");
 	if (token->type == TOK_LPAREN)
 		return ("LPAREN");
 	if (token->type == TOK_NQUOTE)
 		return ("NQUOTE");
 	if (token->type == TOK_OR)
 		return ("OR");
 	if (token->type == TOK_PIPE)
 		return ("PIPE");
 	if (token->type == TOK_RCARRET)
 		return ("RCARRET");
 	if (token->type == TOK_RPAREN)
 		return ("RPAREN");
 	if (token->type == TOK_SEMICOLON)
 		return ("SEMICOLON");
 	if (token->type == TOK_SQUOTE)
 		return ("SQUOTE");
 	if (token->type == TOK_WHITESPACE)
 		return ("WHITESPACE");
 	if (token->type == TOK_WORD)
 		return ("WORD");
 	return (NULL);
 }
--- a/parser/src/tokenizer.c
+++ b/parser/src/tokenizer.c
@ -0,0 +1,115 @@
 /* ************************************************************************** */
 /*                                                                            */
 /*                                                        :::      ::::::::   */
 /*   tokenizer.c                                        :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
 /*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/30 19:39:39 by maiboyer          #+#    #+#             */
 /*   Updated: 2024/09/30 20:19:06 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 #include "me/char/char.h"
 #include "me/string/string.h"
 #include "me/types.h"
 #include "me/vec/vec_token.h"
 #include "parser/token.h"
 static void push_token_and_create_new(t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s)
 {
 	t_token tmp;
 	if (tok->type != TOK_NONE)
 		vec_token_push(tokens, *tok);
 	*tok = token_new_none();
 	tmp = token_new(ttype);
 	string_push(&tmp.string, s);
 	vec_token_push(tokens, tmp);
 }
 t_error tokenize(t_const_str s, t_vec_token *out)
 {
 	t_usize		i;
 	char		quote;
 	t_vec_token ret;
 	t_token		tok;
 	if (s == NULL || out == NULL)
 		return (ERROR);
 	i = 0;
 	quote = '\0';
 	tok = token_new_none();
 	ret = vec_token_new(16, token_free);
 	while (s[i] != '\0')
 	{
 		if (quote == '\0')
 		{
 			quote = s[i];
 			if (s[i] == '\"')
 				push_token_and_create_new(&ret, &tok, TOK_DQUOTE, "");
 			else if (s[i] == '\'')
 				push_token_and_create_new(&ret, &tok, TOK_SQUOTE, "");
 			else
 			{
 				quote = '\0';
 				if (s[i] == '$')
 					push_token_and_create_new(&ret, &tok, TOK_DOLLAR, "$");
 				else if (s[i] == '>')
 					push_token_and_create_new(&ret, &tok, TOK_RCARRET, ">");
 				else if (s[i] == '<')
 					push_token_and_create_new(&ret, &tok, TOK_LCARRET, "<");
 				else if (s[i] == '&')
 					push_token_and_create_new(&ret, &tok, TOK_AMP, "&");
 				else if (s[i] == '|')
 					push_token_and_create_new(&ret, &tok, TOK_PIPE, "|");
 				else if (s[i] == '(')
 					push_token_and_create_new(&ret, &tok, TOK_LPAREN, "(");
 				else if (s[i] == ')')
 					push_token_and_create_new(&ret, &tok, TOK_RPAREN, ")");
 				else if (s[i] == ';')
 					push_token_and_create_new(&ret, &tok, TOK_RPAREN, ";");
 				else if (me_isspace(s[i]))
 					push_token_and_create_new(&ret, &tok, TOK_WHITESPACE, " ");
 				else
 				{
 					if (tok.type == TOK_NONE)
 						tok = token_new(TOK_NQUOTE);
 					string_push_char(&tok.string, s[i]);
 				}
 			}
 		}
 		else if (quote == '\'')
 		{
 			if (s[i] == '\'')
 			{
 				quote = '\0';
 				if (tok.type != TOK_NONE)
 					vec_token_push(&ret, tok);
 				tok = token_new_none();
 			}
 			else
 				string_push_char(&tok.string, s[i]);
 		}
 		else if (quote == '\"')
 		{
 			if (s[i] == '\"')
 			{
 				quote = '\0';
 				if (tok.type != TOK_NONE)
 					vec_token_push(&ret, tok);
 				tok = token_new_none();
 			}
 			else
 				string_push_char(&tok.string, s[i]);
 		}
 		else
 			me_abort("invalid quote type");
 		i++;
 	}
 	if (tok.type == TOK_NQUOTE)
 		vec_token_push(&ret, tok);
 	if (tok.type == TOK_NQUOTE || tok.type == TOK_NONE)
 		return (*out = ret, NO_ERROR);
 	else
 		return (vec_token_free(ret), ERROR);
 }
--- a/sources/_helper_main.c
+++ b/sources/_helper_main.c
@ -6,7 +6,7 @@
 /*   By: rparodi <rparodi@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/06 16:31:41 by rparodi           #+#    #+#             */
-/*   Updated: 2024/09/26 18:14:19 by maiboyer         ###   ########.fr       */
+/*   Updated: 2024/09/30 20:06:27 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
@ -47,6 +47,7 @@ t_error	get_user_input(t_state *state)
 		}
 	}
 	line_edit_stop(&lstate);
 	printf("state->str_input = %s\n", state->str_input);
 	return (NO_ERROR);
 }
--- a/sources/main.c
+++ b/sources/main.c
@ -6,7 +6,7 @@
 /*   By: rparodi <rparodi@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/03/28 14:40:38 by rparodi           #+#    #+#             */
-/*   Updated: 2024/09/26 18:14:59 by maiboyer         ###   ########.fr       */
+/*   Updated: 2024/09/30 20:11:12 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
@ -22,6 +22,8 @@
 #include "me/str/str.h"
 #include "me/string/string.h"
 #include "me/types.h"
 #include "me/vec/vec_str.h"
 #include "me/vec/vec_token.h"
 #include <errno.h>
 #include <sys/types.h>
@ -97,9 +99,21 @@ void print_node_data(t_node *t, t_usize depth)
 }
 */
-void	parse_str(t_state *state)
+t_str token_name(t_token *out);
 void func(t_usize i, t_token *token, void *state)
 {
 	(void)(state);
 	(void)(i);
 	printf("%s => %s\n", token_name(token), token->string.buf);
 }
 t_error tokenize(t_const_str s, t_vec_token *out);
 void	parse_str(t_state *state)
 {
 	t_vec_token tokens;
 	if (tokenize(state->str_input, &tokens))
 		return ;
 	vec_token_iter(&tokens, func, NULL);
 }
 t_i32	main(t_i32 argc, t_str argv[], t_str envp[])