update: base of tokenizer
This commit is contained in:
parent
24d8bf5fc9
commit
b5556b9063
10 changed files with 215 additions and 219 deletions
|
|
@ -1,8 +1,6 @@
|
|||
SRC_FILES = \
|
||||
dollar \
|
||||
quotes \
|
||||
token \
|
||||
token_lifetime \
|
||||
tokenizer \
|
||||
|
||||
GEN_FILES = \
|
||||
\
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/26 17:59:23 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/29 13:30:06 by rparodi ### ########.fr */
|
||||
/* Updated: 2024/09/30 19:47:53 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
|
|
@ -18,25 +18,26 @@
|
|||
|
||||
enum e_token
|
||||
{
|
||||
AMP, // ampersand == &
|
||||
AND, // and == &&
|
||||
CARRET, // any carret == < > << >>
|
||||
DLCARRET, // double left carret == <<
|
||||
DOLLAR, // dollar == $
|
||||
DQUOTE, // double quote string
|
||||
DRCARRET, // double right carret == >>
|
||||
EXPENSION, // an expension == $<no_quote_word>
|
||||
LCARRET, // left carret == <
|
||||
LPAREN, // left parenthesis == (
|
||||
NQUOTE, // no quote string
|
||||
OR, // or == ||
|
||||
PIPE, // pipe == |
|
||||
RCARRET, // right carret == >
|
||||
RPAREN, // right parenthesis == )
|
||||
SEMICOLON, // semicolor == ;
|
||||
SQUOTE, // single quote string
|
||||
WHITESPACE, // whitespace outside of quoted strings
|
||||
WORD, // a meta token, which contains subtokens
|
||||
TOK_NONE, // NO TOKEN TYPE == INVALID / INEXISTANT TOKEN
|
||||
TOK_AMP, // ampersand == &
|
||||
TOK_AND, // and == &&
|
||||
TOK_CARRET, // any carret == < > << >>
|
||||
TOK_DLCARRET, // double left carret == <<
|
||||
TOK_DOLLAR, // dollar == $
|
||||
TOK_DQUOTE, // double quote string
|
||||
TOK_DRCARRET, // double right carret == >>
|
||||
TOK_EXPENSION, // an expension == $<no_quote_word>
|
||||
TOK_LCARRET, // left carret == <
|
||||
TOK_LPAREN, // left parenthesis == (
|
||||
TOK_NQUOTE, // no quote string
|
||||
TOK_OR, // or == ||
|
||||
TOK_PIPE, // pipe == |
|
||||
TOK_RCARRET, // right carret == >
|
||||
TOK_RPAREN, // right parenthesis == )
|
||||
TOK_SEMICOLON, // semicolor == ;
|
||||
TOK_SQUOTE, // single quote string
|
||||
TOK_WHITESPACE, // whitespace outside of quoted strings
|
||||
TOK_WORD, // a meta token, which contains subtokens
|
||||
};
|
||||
|
||||
typedef struct s_token
|
||||
|
|
@ -51,6 +52,7 @@ typedef struct s_token
|
|||
t_token token_new_meta(enum e_token type);
|
||||
// This create a "simple" token consisting of a string
|
||||
t_token token_new(enum e_token type);
|
||||
t_token token_new_none(void);
|
||||
void token_free(t_token tok);
|
||||
bool token_is_meta(t_token tok);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,36 +0,0 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* dollar.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/27 22:18:46 by rparodi #+# #+# */
|
||||
/* Updated: 2024/09/28 14:50:56 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/vec/vec_token.h"
|
||||
#include "parser/token.h"
|
||||
#include "me/string/string.h"
|
||||
#include "me/types.h"
|
||||
#include <stdio.h>
|
||||
|
||||
// MAIX: C'est necessaire de split ca dans une fonction a par vu que ca retourne
|
||||
// la valeur de la comparaion ?
|
||||
// Ca serai pas mieux de faire une fonction du genre
|
||||
// "bool create_single_char_token(char c, t_token *tok)" qui cree un token
|
||||
// dans `tok` et retourne true si il a match un char qui correspond
|
||||
// a un token specific (genre $ ou parentheses par example)
|
||||
/**
|
||||
* @brief boolean function that's say if it's a dollar or not
|
||||
*
|
||||
* @param c character will be checked
|
||||
* @return true if it's dollar, if not return false
|
||||
*/
|
||||
bool is_dollar(char c)
|
||||
{
|
||||
if (c == '$')
|
||||
return (true);
|
||||
return (false);
|
||||
}
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* parentheses.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/30 12:25:22 by rparodi #+# #+# */
|
||||
/* Updated: 2024/09/30 12:28:26 by rparodi ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/vec/vec_token.h"
|
||||
#include "parser/token.h"
|
||||
#include "me/string/string.h"
|
||||
#include "me/types.h"
|
||||
#include <stdio.h>
|
||||
|
||||
char is_parentheses(char c, char next)
|
||||
{
|
||||
if (c == '(')
|
||||
{
|
||||
if (next == '(')
|
||||
return (2);
|
||||
else
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* quotes.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/27 11:46:45 by rparodi #+# #+# */
|
||||
/* Updated: 2024/09/30 12:28:26 by rparodi ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/vec/vec_token.h"
|
||||
#include "parser/token.h"
|
||||
#include "me/string/string.h"
|
||||
#include "me/types.h"
|
||||
#include <stdio.h>
|
||||
|
||||
/**
|
||||
* @brief boolean function that's say if it's a quote or not
|
||||
*
|
||||
* @param c character will be checked
|
||||
* @return true if it's quote, if not return false
|
||||
*/
|
||||
bool is_quote(char c)
|
||||
{
|
||||
if (c == '"' || c == '\'')
|
||||
return (true);
|
||||
return (false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief token function that's read the string and return the tokens
|
||||
*
|
||||
* @param raw the input from stdin
|
||||
* @param start the index where the first quote was found
|
||||
* @param output the token of the string
|
||||
* @return Check if there is an error on this function
|
||||
*/
|
||||
t_error find_end_string(t_const_str raw, t_usize *start, t_token *output)
|
||||
{
|
||||
if (!raw || !output)
|
||||
return (ERROR);
|
||||
if (is_quote(raw[(*start)]))
|
||||
{
|
||||
string_push_char(&output->string, raw[(*start)]);
|
||||
(*start)++;
|
||||
if (raw[(*start)] == '\0')
|
||||
return (ERROR);
|
||||
while (raw[(*start)] != '\0')
|
||||
{
|
||||
string_push_char(&output->string, raw[(*start)]);
|
||||
if (is_quote(raw[(*start)]))
|
||||
return (NO_ERROR);
|
||||
(*start)++;
|
||||
}
|
||||
}
|
||||
return (ERROR);
|
||||
}
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* token.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/25 16:27:03 by rparodi #+# #+# */
|
||||
/* Updated: 2024/09/29 15:24:11 by rparodi ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "parser/token.h"
|
||||
#include "me/string/string.h"
|
||||
#include "me/types.h"
|
||||
#include "me/char/char.h"
|
||||
#include "me/vec/vec_token.h"
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include "me/mem/mem.h"
|
||||
|
||||
// MAIX: tu peux faire un token par character "whitespace", vu qu'on va
|
||||
// manipuler la list de token apres pour faire des truc plus simple a process
|
||||
// on se debrouillera pour plus avoir plein de token whitespace :)
|
||||
t_error start_analyse(t_const_str raw, t_vec_token *output)
|
||||
{
|
||||
t_usize i;
|
||||
t_token token;
|
||||
|
||||
if (!raw || !output)
|
||||
return (ERROR);
|
||||
i = 0;
|
||||
while (raw[i] != '\0')
|
||||
{
|
||||
if (me_isspace(raw[i]))
|
||||
token = token_new(WHITESPACE);
|
||||
if (is_quote(raw[i]))
|
||||
find_end_string(raw, &i, &token);
|
||||
if (is_dollar(raw[i]))
|
||||
token = token_new(DOLLAR);
|
||||
vec_token_push(output, token);
|
||||
i++;
|
||||
}
|
||||
return (NO_ERROR);
|
||||
}
|
||||
|
||||
// MAIX: attention tu ne fais rien avec le vec_token ici :D
|
||||
// aussi l'argument list est censer faire quoi ?
|
||||
// c'est un reste d'une version ancienne ?
|
||||
t_error tokeniser(t_const_str raw)
|
||||
{
|
||||
t_vec_token output;
|
||||
|
||||
if (!raw)
|
||||
return (ERROR);
|
||||
output = vec_token_new(16, NULL);
|
||||
start_analyse(raw, &output);
|
||||
return (NO_ERROR);
|
||||
}
|
||||
|
|
@ -6,14 +6,14 @@
|
|||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/28 14:37:13 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/28 15:24:06 by rparodi ### ########.fr */
|
||||
/* Updated: 2024/09/30 20:15:05 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/vec/vec_token.h"
|
||||
#include "parser/token.h"
|
||||
|
||||
void token_free(t_token tok)
|
||||
void token_free(t_token tok)
|
||||
{
|
||||
if (tok.string.buf != NULL)
|
||||
string_free(tok.string);
|
||||
|
|
@ -21,21 +21,70 @@ void token_free(t_token tok)
|
|||
vec_token_free(tok.subtokens);
|
||||
}
|
||||
|
||||
t_token token_new(enum e_token type)
|
||||
t_token token_new(enum e_token type)
|
||||
{
|
||||
return ((t_token){.type = type, .string = string_new(16), \
|
||||
.subtokens = {NULL, 0, 0, NULL}});
|
||||
return ((t_token){.type = type, .string = string_new(16), .subtokens = {NULL, 0, 0, NULL}});
|
||||
}
|
||||
|
||||
t_token token_new_meta(enum e_token type)
|
||||
t_token token_new_meta(enum e_token type)
|
||||
{
|
||||
return ((t_token){.type = type, .string = {NULL, 0, 0}, \
|
||||
.subtokens = vec_token_new(16, token_free)});
|
||||
return ((t_token){.type = type, .string = {NULL, 0, 0}, .subtokens = vec_token_new(16, token_free)});
|
||||
}
|
||||
|
||||
bool token_is_meta(t_token tok)
|
||||
bool token_is_meta(t_token tok)
|
||||
{
|
||||
if (tok.type == WORD)
|
||||
if (tok.type == TOK_WORD)
|
||||
return (true);
|
||||
return (false);
|
||||
}
|
||||
|
||||
t_token token_new_none(void)
|
||||
{
|
||||
return ((t_token){.type = TOK_NONE, .string = {NULL, 0, 0}, .subtokens = vec_token_new(16, token_free)});
|
||||
}
|
||||
|
||||
// TO REMOVE
|
||||
t_str token_name(t_token *token)
|
||||
{
|
||||
if (token->type == TOK_NONE)
|
||||
return ("NONE");
|
||||
if (token->type == TOK_AMP)
|
||||
return ("AMP");
|
||||
if (token->type == TOK_AND)
|
||||
return ("AND");
|
||||
if (token->type == TOK_CARRET)
|
||||
return ("CARRET");
|
||||
if (token->type == TOK_DLCARRET)
|
||||
return ("DLCARRET");
|
||||
if (token->type == TOK_DOLLAR)
|
||||
return ("DOLLAR");
|
||||
if (token->type == TOK_DQUOTE)
|
||||
return ("DQUOTE");
|
||||
if (token->type == TOK_DRCARRET)
|
||||
return ("DRCARRET");
|
||||
if (token->type == TOK_EXPENSION)
|
||||
return ("EXPENSION");
|
||||
if (token->type == TOK_LCARRET)
|
||||
return ("LCARRET");
|
||||
if (token->type == TOK_LPAREN)
|
||||
return ("LPAREN");
|
||||
if (token->type == TOK_NQUOTE)
|
||||
return ("NQUOTE");
|
||||
if (token->type == TOK_OR)
|
||||
return ("OR");
|
||||
if (token->type == TOK_PIPE)
|
||||
return ("PIPE");
|
||||
if (token->type == TOK_RCARRET)
|
||||
return ("RCARRET");
|
||||
if (token->type == TOK_RPAREN)
|
||||
return ("RPAREN");
|
||||
if (token->type == TOK_SEMICOLON)
|
||||
return ("SEMICOLON");
|
||||
if (token->type == TOK_SQUOTE)
|
||||
return ("SQUOTE");
|
||||
if (token->type == TOK_WHITESPACE)
|
||||
return ("WHITESPACE");
|
||||
if (token->type == TOK_WORD)
|
||||
return ("WORD");
|
||||
return (NULL);
|
||||
}
|
||||
|
|
|
|||
115
parser/src/tokenizer.c
Normal file
115
parser/src/tokenizer.c
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* tokenizer.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2024/09/30 19:39:39 by maiboyer #+# #+# */
|
||||
/* Updated: 2024/09/30 20:19:06 by maiboyer ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "me/char/char.h"
|
||||
#include "me/string/string.h"
|
||||
#include "me/types.h"
|
||||
#include "me/vec/vec_token.h"
|
||||
#include "parser/token.h"
|
||||
|
||||
static void push_token_and_create_new(t_vec_token *tokens, t_token *tok, enum e_token ttype, t_const_str s)
|
||||
{
|
||||
t_token tmp;
|
||||
if (tok->type != TOK_NONE)
|
||||
vec_token_push(tokens, *tok);
|
||||
*tok = token_new_none();
|
||||
tmp = token_new(ttype);
|
||||
string_push(&tmp.string, s);
|
||||
vec_token_push(tokens, tmp);
|
||||
}
|
||||
|
||||
t_error tokenize(t_const_str s, t_vec_token *out)
|
||||
{
|
||||
t_usize i;
|
||||
char quote;
|
||||
t_vec_token ret;
|
||||
t_token tok;
|
||||
|
||||
if (s == NULL || out == NULL)
|
||||
return (ERROR);
|
||||
i = 0;
|
||||
quote = '\0';
|
||||
tok = token_new_none();
|
||||
ret = vec_token_new(16, token_free);
|
||||
while (s[i] != '\0')
|
||||
{
|
||||
if (quote == '\0')
|
||||
{
|
||||
quote = s[i];
|
||||
if (s[i] == '\"')
|
||||
push_token_and_create_new(&ret, &tok, TOK_DQUOTE, "");
|
||||
else if (s[i] == '\'')
|
||||
push_token_and_create_new(&ret, &tok, TOK_SQUOTE, "");
|
||||
else
|
||||
{
|
||||
quote = '\0';
|
||||
if (s[i] == '$')
|
||||
push_token_and_create_new(&ret, &tok, TOK_DOLLAR, "$");
|
||||
else if (s[i] == '>')
|
||||
push_token_and_create_new(&ret, &tok, TOK_RCARRET, ">");
|
||||
else if (s[i] == '<')
|
||||
push_token_and_create_new(&ret, &tok, TOK_LCARRET, "<");
|
||||
else if (s[i] == '&')
|
||||
push_token_and_create_new(&ret, &tok, TOK_AMP, "&");
|
||||
else if (s[i] == '|')
|
||||
push_token_and_create_new(&ret, &tok, TOK_PIPE, "|");
|
||||
else if (s[i] == '(')
|
||||
push_token_and_create_new(&ret, &tok, TOK_LPAREN, "(");
|
||||
else if (s[i] == ')')
|
||||
push_token_and_create_new(&ret, &tok, TOK_RPAREN, ")");
|
||||
else if (s[i] == ';')
|
||||
push_token_and_create_new(&ret, &tok, TOK_RPAREN, ";");
|
||||
else if (me_isspace(s[i]))
|
||||
push_token_and_create_new(&ret, &tok, TOK_WHITESPACE, " ");
|
||||
else
|
||||
{
|
||||
if (tok.type == TOK_NONE)
|
||||
tok = token_new(TOK_NQUOTE);
|
||||
string_push_char(&tok.string, s[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (quote == '\'')
|
||||
{
|
||||
if (s[i] == '\'')
|
||||
{
|
||||
quote = '\0';
|
||||
if (tok.type != TOK_NONE)
|
||||
vec_token_push(&ret, tok);
|
||||
tok = token_new_none();
|
||||
}
|
||||
else
|
||||
string_push_char(&tok.string, s[i]);
|
||||
}
|
||||
else if (quote == '\"')
|
||||
{
|
||||
if (s[i] == '\"')
|
||||
{
|
||||
quote = '\0';
|
||||
if (tok.type != TOK_NONE)
|
||||
vec_token_push(&ret, tok);
|
||||
tok = token_new_none();
|
||||
}
|
||||
else
|
||||
string_push_char(&tok.string, s[i]);
|
||||
}
|
||||
else
|
||||
me_abort("invalid quote type");
|
||||
i++;
|
||||
}
|
||||
if (tok.type == TOK_NQUOTE)
|
||||
vec_token_push(&ret, tok);
|
||||
if (tok.type == TOK_NQUOTE || tok.type == TOK_NONE)
|
||||
return (*out = ret, NO_ERROR);
|
||||
else
|
||||
return (vec_token_free(ret), ERROR);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue