update: make something that collapses tokens into one if it can

This commit is contained in:
maix0 2024-09-26 22:20:24 +02:00
parent 2e811bcec2
commit 774f374965
5 changed files with 81 additions and 54 deletions

View file

@ -1,10 +1,12 @@
import collapse
import concat
import str_to_token import str_to_token
import concat
import ttoken import ttoken
s = input("> ") s = input("> ")
print(s) print(s)
tokens = str_to_token.str_to_token(s) tokens = str_to_token.str_to_token(s)
concated_tokens = concat.concat(tokens) concated_tokens = concat.concat(tokens)
collapsed_tokens = collapse.collapse(concated_tokens)
ttoken.print_tokenlist(concated_tokens) ttoken.print_tokenlist(collapsed_tokens)

View file

@ -0,0 +1,38 @@
from ttoken import *
TT = TokenType
# This function will transform some tokens into others depending on what follows them
def collapse(tokens: list[Token]):
i = 0
out = []
while i < len(tokens):
tok = tokens[i]
peek = tokens[i + 1] if i + 1 < len(tokens) else None
if peek is None:
out.append(tok)
i += 1
continue
if tok.ty == TT.PIPE and peek.ty == TT.PIPE:
out.append(Token(TT.OR, string="||"))
i += 2
elif tok.ty == TT.AMP and peek.ty == TT.AMP:
out.append(Token(TT.AND, string="&&"))
i += 2
elif tok.ty == TT.CARRET and tok.string == "<" and peek.ty == TT.CARRET and peek.string == "<":
out.append(Token(TT.DLCARRET, string="<<"))
i += 2
elif tok.ty == TT.CARRET and tok.string == ">" and peek.ty == TT.CARRET and peek.string == ">":
out.append(Token(TT.DRCARRET, string=">>"))
i += 2
elif tok.ty == TT.CARRET and tok.string == "<" :
out.append(Token(TT.LCARRET, string="<"))
i += 1
elif tok.ty == TT.CARRET and tok.string == ">" :
out.append(Token(TT.RCARRET, string=">"))
i += 1
else:
out.append(tok)
i += 1
return out

View file

@ -2,7 +2,7 @@ from ttoken import *
# This function will make a "big" token that will represent a word in the shell sense # This function will make a "big" token that will represent a word in the shell sense
def concat(tokens: list[Token]): def concat(tokens: list[Token]) -> list[Token]:
i = 0 i = 0
out = [] out = []
while i < len(tokens): while i < len(tokens):

View file

@ -3,12 +3,12 @@ from ttoken import *
TT = TokenType TT = TokenType
def is_quote(c: chr): def is_quote(c: str) -> bool:
return c == "'" or c == '"' return c == "'" or c == '"'
# This function takes the string and seperate them into different tokens depending on the quotes # This function takes the string and seperate them into different tokens depending on the quotes
def str_to_token(s: str): def str_to_token(s: str) -> list[Token]:
tokens = [] tokens = []
current_token = None current_token = None
quote = 0 quote = 0
@ -36,6 +36,8 @@ def str_to_token(s: str):
): ):
tokens.append(current_token) tokens.append(current_token)
current_token = Token(TT.WHITESPACE, string="") current_token = Token(TT.WHITESPACE, string="")
i += 1;
continue;
else: else:
# we DON'T have a whitespace, then if the current token is a whitespace, just push it and set the new token to raw_string # we DON'T have a whitespace, then if the current token is a whitespace, just push it and set the new token to raw_string
if current_token.ty == TT.WHITESPACE: if current_token.ty == TT.WHITESPACE:
@ -64,7 +66,7 @@ def str_to_token(s: str):
elif c == ";": elif c == ";":
tokens.append(current_token) tokens.append(current_token)
current_token = None current_token = None
tokens.append(Token(TT.CARRET, string=";")) tokens.append(Token(TT.SEMICOLON, string=";"))
elif c == ">" or c == "<": elif c == ">" or c == "<":
tokens.append(current_token) tokens.append(current_token)
current_token = None current_token = None
@ -97,4 +99,9 @@ def str_to_token(s: str):
# if the current token is not none and the current token is "no quote" then we push it # if the current token is not none and the current token is "no quote" then we push it
if current_token != None and current_token.ty == TT.NQUOTE: if current_token != None and current_token.ty == TT.NQUOTE:
tokens.append(current_token) tokens.append(current_token)
return tokens # cleanup the empty tokens that may be here
out = []
for tok in tokens:
if not (tok.ty == TT.NQUOTE and len(tok.string) == 0):
out.append(tok)
return out

View file

@ -4,25 +4,25 @@ from dataclasses import dataclass
TokenType = Enum( TokenType = Enum(
"TokenType", "TokenType",
[ [
"AMP", "AMP", # ampersand == &
"AND", "AND", # and == &&
"CARRET", "CARRET", # any carret == < > << >>
"DOLLAR", "DLCARRET", # double left carret == <<
"DQUOTE", "DOLLAR", # dollar == $
"EXPENSION", "DQUOTE", # double quote string
"LCARRET", "DRCARRET", # double right carret == >>
"LCARRET_DOUBLE", "EXPENSION", # an expension == $<no_quote_word>
"LPAREN", "LCARRET", # left carret == <
"NQUOTE", "LPAREN", # left parenthesis == (
"OR", "NQUOTE", # no quote string
"PIPE", "OR", # or == ||
"RCARRET", "PIPE", # pipe == |
"RCARRET_DOUBLE", "RCARRET", # right carret == >
"RPAREN", "RPAREN", # right parenthesis == )
"SEMICOLON", "SEMICOLON", # semicolor == ;
"SQUOTE", "SQUOTE", # single quote string
"WHITESPACE", "WHITESPACE", # whitespace outside of quoted strings
"WORD", "WORD", # a meta token, which contains subtokens
], ],
) )
@ -33,12 +33,14 @@ class Token:
string: str = None string: str = None
subtokens: list = None subtokens: list = None
def is_subtoken(self) -> bool: def is_metatoken(self) -> bool:
return self.subtokens != None return self.subtokens != None
def append_char(self, c: str): def append_char(self, c: str):
if self.string is None: if self.string is None:
raise Exception(f"Tried to push a char on a token that contains subtokens, TT={self.ty}") raise Exception(
f"Tried to push a char on a token that contains subtokens, TT={self.ty}"
)
self.string += c self.string += c
def is_word(self): def is_word(self):
@ -50,34 +52,12 @@ class Token:
) )
def print_tokenlist(tokens: list[Token], *, between="", end="\n"): def print_tokenlist(tokens: list[Token], *, depth=0):
for tok in tokens: for tok in tokens:
col = "0" if tok.is_metatoken():
if tok.ty == TokenType.SQUOTE: print_tokenlist(tok.subtokens, depth=depth + 1)
col = "33"
if tok.ty == TokenType.DQUOTE:
col = "32"
if tok.ty == TokenType.WHITESPACE:
col = "31;4"
if tok.ty == TokenType.DOLLAR:
col = "31"
if tok.ty == TokenType.LPAREN:
col = "35"
if tok.ty == TokenType.RPAREN:
col = "35"
if tok.ty == TokenType.AMP:
col = "35"
if tok.ty == TokenType.PIPE:
col = "35"
if tok.ty == TokenType.SEMICOLON:
col = "35"
if tok.ty == TokenType.CARRET:
col = "35"
if tok.is_subtoken():
print_tokenlist(tok.subtokens, between="\x1b[100m", end="")
else: else:
print(f"\x1b[{col}m{between}{tok.string}\x1b[0m", end="") print(f"{'\t' * depth}{tok.ty.name:>10} => \x1b[31;40m{tok.string}\x1b[0m")
#print(end)
__all__ = ["TokenType", "Token", "print_tokenlist"] __all__ = ["TokenType", "Token", "print_tokenlist"]