diff --git a/parser/token.py/app.py b/parser/token.py/app.py index b6b174b4..473dbaa5 100644 --- a/parser/token.py/app.py +++ b/parser/token.py/app.py @@ -1,10 +1,12 @@ +import collapse +import concat import str_to_token -import concat import ttoken s = input("> ") print(s) tokens = str_to_token.str_to_token(s) concated_tokens = concat.concat(tokens) +collapsed_tokens = collapse.collapse(concated_tokens) -ttoken.print_tokenlist(concated_tokens) +ttoken.print_tokenlist(collapsed_tokens) diff --git a/parser/token.py/collapse.py b/parser/token.py/collapse.py new file mode 100644 index 00000000..2ff66c00 --- /dev/null +++ b/parser/token.py/collapse.py @@ -0,0 +1,38 @@ +from ttoken import * + +TT = TokenType + + +# This function will transform some tokens into others depending on what follows them +def collapse(tokens: list[Token]): + i = 0 + out = [] + while i < len(tokens): + tok = tokens[i] + peek = tokens[i + 1] if i + 1 < len(tokens) else None + if peek is None: + out.append(tok) + i += 1 + continue + if tok.ty == TT.PIPE and peek.ty == TT.PIPE: + out.append(Token(TT.OR, string="||")) + i += 2 + elif tok.ty == TT.AMP and peek.ty == TT.AMP: + out.append(Token(TT.AND, string="&&")) + i += 2 + elif tok.ty == TT.CARRET and tok.string == "<" and peek.ty == TT.CARRET and peek.string == "<": + out.append(Token(TT.DLCARRET, string="<<")) + i += 2 + elif tok.ty == TT.CARRET and tok.string == ">" and peek.ty == TT.CARRET and peek.string == ">": + out.append(Token(TT.DRCARRET, string=">>")) + i += 2 + elif tok.ty == TT.CARRET and tok.string == "<" : + out.append(Token(TT.LCARRET, string="<")) + i += 1 + elif tok.ty == TT.CARRET and tok.string == ">" : + out.append(Token(TT.RCARRET, string=">")) + i += 1 + else: + out.append(tok) + i += 1 + return out diff --git a/parser/token.py/concat.py b/parser/token.py/concat.py index eb5f0cd2..49546d63 100644 --- a/parser/token.py/concat.py +++ b/parser/token.py/concat.py @@ -2,7 +2,7 @@ from ttoken import * # This function will make a "big" token that will represent a word in the shell sense -def concat(tokens: list[Token]): +def concat(tokens: list[Token]) -> list[Token]: i = 0 out = [] while i < len(tokens): diff --git a/parser/token.py/str_to_token.py b/parser/token.py/str_to_token.py index cc1c16fb..de103acc 100644 --- a/parser/token.py/str_to_token.py +++ b/parser/token.py/str_to_token.py @@ -3,12 +3,12 @@ from ttoken import * TT = TokenType -def is_quote(c: chr): +def is_quote(c: str) -> bool: return c == "'" or c == '"' # This function takes the string and seperate them into different tokens depending on the quotes -def str_to_token(s: str): +def str_to_token(s: str) -> list[Token]: tokens = [] current_token = None quote = 0 @@ -36,6 +36,8 @@ def str_to_token(s: str): ): tokens.append(current_token) current_token = Token(TT.WHITESPACE, string="") + i += 1; + continue; else: # we DON'T have a whitespace, then if the current token is a whitespace, just push it and set the new token to raw_string if current_token.ty == TT.WHITESPACE: @@ -64,7 +66,7 @@ def str_to_token(s: str): elif c == ";": tokens.append(current_token) current_token = None - tokens.append(Token(TT.CARRET, string=";")) + tokens.append(Token(TT.SEMICOLON, string=";")) elif c == ">" or c == "<": tokens.append(current_token) current_token = None @@ -97,4 +99,9 @@ def str_to_token(s: str): # if the current token is not none and the current token is "no quote" then we push it if current_token != None and current_token.ty == TT.NQUOTE: tokens.append(current_token) - return tokens + # cleanup the empty tokens that may be here + out = [] + for tok in tokens: + if not (tok.ty == TT.NQUOTE and len(tok.string) == 0): + out.append(tok) + return out diff --git a/parser/token.py/ttoken.py b/parser/token.py/ttoken.py index 4e377d15..4f2f83cb 100644 --- a/parser/token.py/ttoken.py +++ b/parser/token.py/ttoken.py @@ -4,25 +4,25 @@ from dataclasses import dataclass TokenType = Enum( "TokenType", [ - "AMP", - "AND", - "CARRET", - "DOLLAR", - "DQUOTE", - "EXPENSION", - "LCARRET", - "LCARRET_DOUBLE", - "LPAREN", - "NQUOTE", - "OR", - "PIPE", - "RCARRET", - "RCARRET_DOUBLE", - "RPAREN", - "SEMICOLON", - "SQUOTE", - "WHITESPACE", - "WORD", + "AMP", # ampersand == & + "AND", # and == && + "CARRET", # any carret == < > << >> + "DLCARRET", # double left carret == << + "DOLLAR", # dollar == $ + "DQUOTE", # double quote string + "DRCARRET", # double right carret == >> + "EXPENSION", # an expension == $ + "LCARRET", # left carret == < + "LPAREN", # left parenthesis == ( + "NQUOTE", # no quote string + "OR", # or == || + "PIPE", # pipe == | + "RCARRET", # right carret == > + "RPAREN", # right parenthesis == ) + "SEMICOLON", # semicolor == ; + "SQUOTE", # single quote string + "WHITESPACE", # whitespace outside of quoted strings + "WORD", # a meta token, which contains subtokens ], ) @@ -33,12 +33,14 @@ class Token: string: str = None subtokens: list = None - def is_subtoken(self) -> bool: + def is_metatoken(self) -> bool: return self.subtokens != None def append_char(self, c: str): if self.string is None: - raise Exception(f"Tried to push a char on a token that contains subtokens, TT={self.ty}") + raise Exception( + f"Tried to push a char on a token that contains subtokens, TT={self.ty}" + ) self.string += c def is_word(self): @@ -50,34 +52,12 @@ class Token: ) -def print_tokenlist(tokens: list[Token], *, between="", end="\n"): +def print_tokenlist(tokens: list[Token], *, depth=0): for tok in tokens: - col = "0" - if tok.ty == TokenType.SQUOTE: - col = "33" - if tok.ty == TokenType.DQUOTE: - col = "32" - if tok.ty == TokenType.WHITESPACE: - col = "31;4" - if tok.ty == TokenType.DOLLAR: - col = "31" - if tok.ty == TokenType.LPAREN: - col = "35" - if tok.ty == TokenType.RPAREN: - col = "35" - if tok.ty == TokenType.AMP: - col = "35" - if tok.ty == TokenType.PIPE: - col = "35" - if tok.ty == TokenType.SEMICOLON: - col = "35" - if tok.ty == TokenType.CARRET: - col = "35" - if tok.is_subtoken(): - print_tokenlist(tok.subtokens, between="\x1b[100m", end="") + if tok.is_metatoken(): + print_tokenlist(tok.subtokens, depth=depth + 1) else: - print(f"\x1b[{col}m{between}{tok.string}\x1b[0m", end="") - #print(end) + print(f"{'\t' * depth}{tok.ty.name:>10} => \x1b[31;40m{tok.string}\x1b[0m") __all__ = ["TokenType", "Token", "print_tokenlist"]