update: make something that collapses tokens into one if it can

2024-09-26 22:20:24 +02:00 · 2024-09-26 22:20:24 +02:00 · 774f374965
commit 774f374965
parent 2e811bcec2
5 changed files with 81 additions and 54 deletions
--- a/parser/token.py/app.py
+++ b/parser/token.py/app.py
@ -1,10 +1,12 @@
 import collapse
 import concat
 import str_to_token
 import concat 
 import ttoken
 s = input("> ")
 print(s)
 tokens = str_to_token.str_to_token(s)
 concated_tokens = concat.concat(tokens)
 collapsed_tokens = collapse.collapse(concated_tokens)
-ttoken.print_tokenlist(concated_tokens)
+ttoken.print_tokenlist(collapsed_tokens)
--- a/parser/token.py/collapse.py
+++ b/parser/token.py/collapse.py
@ -0,0 +1,38 @@
 from ttoken import *
 TT = TokenType
 # This function will transform some tokens into others depending on what follows them
 def collapse(tokens: list[Token]):
    i = 0
    out = []
    while i < len(tokens):
        tok = tokens[i]
        peek = tokens[i + 1] if i + 1 < len(tokens) else None
        if peek is None:
            out.append(tok)
            i += 1
            continue
        if tok.ty == TT.PIPE and peek.ty == TT.PIPE:
            out.append(Token(TT.OR, string="||"))
            i += 2
        elif tok.ty == TT.AMP and peek.ty == TT.AMP:
            out.append(Token(TT.AND, string="&&"))
            i += 2
        elif tok.ty == TT.CARRET and tok.string == "<" and peek.ty == TT.CARRET and peek.string == "<":
            out.append(Token(TT.DLCARRET, string="<<"))
            i += 2
        elif tok.ty == TT.CARRET and tok.string == ">" and peek.ty == TT.CARRET and peek.string == ">":
            out.append(Token(TT.DRCARRET, string=">>"))
            i += 2
        elif tok.ty == TT.CARRET and tok.string == "<" :
            out.append(Token(TT.LCARRET, string="<"))
            i += 1
        elif tok.ty == TT.CARRET and tok.string == ">" :
            out.append(Token(TT.RCARRET, string=">"))
            i += 1
        else:
            out.append(tok)
            i += 1
    return out
--- a/parser/token.py/concat.py
+++ b/parser/token.py/concat.py
@ -2,7 +2,7 @@ from ttoken import *
 # This function will make a "big" token that will represent a word in the shell sense
-def concat(tokens: list[Token]):
+def concat(tokens: list[Token]) -> list[Token]:
    i = 0
    out = []
    while i < len(tokens):
--- a/parser/token.py/str_to_token.py
+++ b/parser/token.py/str_to_token.py
@ -3,12 +3,12 @@ from ttoken import *
 TT = TokenType
-def is_quote(c: chr):
+def is_quote(c: str) -> bool:
    return c == "'" or c == '"'
 # This function takes the string and seperate them into different tokens depending on the quotes
-def str_to_token(s: str):
+def str_to_token(s: str) -> list[Token]:
    tokens = []
    current_token = None
    quote = 0
@ -36,6 +36,8 @@ def str_to_token(s: str):
                    ):
                        tokens.append(current_token)
                    current_token = Token(TT.WHITESPACE, string="")
                    i += 1;
                    continue;
                else:
                    # we DON'T have a whitespace, then if the current token is a whitespace, just push it and set the new token to raw_string
                    if current_token.ty == TT.WHITESPACE:
@ -64,7 +66,7 @@ def str_to_token(s: str):
                elif c == ";":
                    tokens.append(current_token)
                    current_token = None
-                    tokens.append(Token(TT.CARRET, string=";"))
+                    tokens.append(Token(TT.SEMICOLON, string=";"))
                elif c == ">" or c == "<":
                    tokens.append(current_token)
                    current_token = None
@ -97,4 +99,9 @@ def str_to_token(s: str):
    # if the current token is not none and the current token is "no quote" then we push it
    if current_token != None and current_token.ty == TT.NQUOTE:
        tokens.append(current_token)
-    return tokens
+    # cleanup the empty tokens that may be here
    out = []
    for tok in tokens:
        if not (tok.ty == TT.NQUOTE and len(tok.string) == 0):
            out.append(tok)
    return out
--- a/parser/token.py/ttoken.py
+++ b/parser/token.py/ttoken.py
@ -4,25 +4,25 @@ from dataclasses import dataclass
 TokenType = Enum(
    "TokenType",
    [
-        "AMP",
+        "AMP",  # ampersand == &
-        "AND",
+        "AND",  # and == &&
-        "CARRET",
+        "CARRET",  # any carret == < > << >>
-        "DOLLAR",
+        "DLCARRET",  # double left carret == <<
-        "DQUOTE",
+        "DOLLAR",  # dollar == $
-        "EXPENSION",
+        "DQUOTE",  # double quote string
-        "LCARRET",
+        "DRCARRET",  # double right carret == >>
-        "LCARRET_DOUBLE",
+        "EXPENSION",  # an expension == $<no_quote_word>
-        "LPAREN",
+        "LCARRET",  # left carret == <
-        "NQUOTE",
+        "LPAREN",  # left parenthesis == (
-        "OR",
+        "NQUOTE",  # no quote string
-        "PIPE",
+        "OR",  # or == ||
-        "RCARRET",
+        "PIPE",  # pipe == |
-        "RCARRET_DOUBLE",
+        "RCARRET",  # right carret == >
-        "RPAREN",
+        "RPAREN",  # right parenthesis == )
-        "SEMICOLON",
+        "SEMICOLON",  # semicolor == ;
-        "SQUOTE",
+        "SQUOTE",  # single quote string
-        "WHITESPACE",
+        "WHITESPACE",  # whitespace outside of quoted strings
-        "WORD",
+        "WORD",  # a meta token, which contains subtokens
    ],
 )
@ -33,12 +33,14 @@ class Token:
    string: str = None
    subtokens: list = None
-    def is_subtoken(self) -> bool:
+    def is_metatoken(self) -> bool:
        return self.subtokens != None
    def append_char(self, c: str):
        if self.string is None:
-            raise Exception(f"Tried to push a char on a token that contains subtokens, TT={self.ty}")
+            raise Exception(
                f"Tried to push a char on a token that contains subtokens, TT={self.ty}"
            )
        self.string += c
    def is_word(self):
@ -50,34 +52,12 @@ class Token:
        )
-def print_tokenlist(tokens: list[Token], *, between="", end="\n"):
+def print_tokenlist(tokens: list[Token], *, depth=0):
    for tok in tokens:
-        col = "0"
+        if tok.is_metatoken():
-        if tok.ty == TokenType.SQUOTE:
+            print_tokenlist(tok.subtokens, depth=depth + 1)
            col = "33"
        if tok.ty == TokenType.DQUOTE:
            col = "32"
        if tok.ty == TokenType.WHITESPACE:
            col = "31;4"
        if tok.ty == TokenType.DOLLAR:
            col = "31"
        if tok.ty == TokenType.LPAREN:
            col = "35"
        if tok.ty == TokenType.RPAREN:
            col = "35"
        if tok.ty == TokenType.AMP:
            col = "35"
        if tok.ty == TokenType.PIPE:
            col = "35"
        if tok.ty == TokenType.SEMICOLON:
            col = "35"
        if tok.ty == TokenType.CARRET:
            col = "35"
        if tok.is_subtoken():
            print_tokenlist(tok.subtokens, between="\x1b[100m", end="")
        else:
-            print(f"\x1b[{col}m{between}{tok.string}\x1b[0m", end="")
+            print(f"{'\t' * depth}{tok.ty.name:>10} => \x1b[31;40m{tok.string}\x1b[0m")
    #print(end)
 __all__ = ["TokenType", "Token", "print_tokenlist"]