update: make something that collapses tokens into one if it can

2024-09-26 22:20:24 +02:00 · 2024-09-26 22:20:24 +02:00 · 774f374965
commit 774f374965
parent 2e811bcec2
5 changed files with 81 additions and 54 deletions
--- a/parser/token.py/app.py
+++ b/parser/token.py/app.py
@ -1,10 +1,12 @@
+import collapse
+import concat
 import str_to_token
-import concat 
 import ttoken

 s = input("> ")
 print(s)
 tokens = str_to_token.str_to_token(s)
 concated_tokens = concat.concat(tokens)
+collapsed_tokens = collapse.collapse(concated_tokens)

-ttoken.print_tokenlist(concated_tokens)
+ttoken.print_tokenlist(collapsed_tokens)
--- a/parser/token.py/collapse.py
+++ b/parser/token.py/collapse.py
@ -0,0 +1,38 @@
+from ttoken import *
+
+TT = TokenType
+
+
+# This function will transform some tokens into others depending on what follows them
+def collapse(tokens: list[Token]):
+    i = 0
+    out = []
+    while i < len(tokens):
+        tok = tokens[i]
+        peek = tokens[i + 1] if i + 1 < len(tokens) else None
+        if peek is None:
+            out.append(tok)
+            i += 1
+            continue
+        if tok.ty == TT.PIPE and peek.ty == TT.PIPE:
+            out.append(Token(TT.OR, string="||"))
+            i += 2
+        elif tok.ty == TT.AMP and peek.ty == TT.AMP:
+            out.append(Token(TT.AND, string="&&"))
+            i += 2
+        elif tok.ty == TT.CARRET and tok.string == "<" and peek.ty == TT.CARRET and peek.string == "<":
+            out.append(Token(TT.DLCARRET, string="<<"))
+            i += 2
+        elif tok.ty == TT.CARRET and tok.string == ">" and peek.ty == TT.CARRET and peek.string == ">":
+            out.append(Token(TT.DRCARRET, string=">>"))
+            i += 2
+        elif tok.ty == TT.CARRET and tok.string == "<" :
+            out.append(Token(TT.LCARRET, string="<"))
+            i += 1
+        elif tok.ty == TT.CARRET and tok.string == ">" :
+            out.append(Token(TT.RCARRET, string=">"))
+            i += 1
+        else:
+            out.append(tok)
+            i += 1
+    return out
--- a/parser/token.py/concat.py
+++ b/parser/token.py/concat.py
@ -2,7 +2,7 @@ from ttoken import *


 # This function will make a "big" token that will represent a word in the shell sense
-def concat(tokens: list[Token]):
+def concat(tokens: list[Token]) -> list[Token]:
    i = 0
    out = []
    while i < len(tokens):
--- a/parser/token.py/str_to_token.py
+++ b/parser/token.py/str_to_token.py
@ -3,12 +3,12 @@ from ttoken import *
 TT = TokenType


-def is_quote(c: chr):
+def is_quote(c: str) -> bool:
    return c == "'" or c == '"'


 # This function takes the string and seperate them into different tokens depending on the quotes
-def str_to_token(s: str):
+def str_to_token(s: str) -> list[Token]:
    tokens = []
    current_token = None
    quote = 0
@ -36,6 +36,8 @@ def str_to_token(s: str):
                    ):
                        tokens.append(current_token)
                    current_token = Token(TT.WHITESPACE, string="")
+                    i += 1;
+                    continue;
                else:
                    # we DON'T have a whitespace, then if the current token is a whitespace, just push it and set the new token to raw_string
                    if current_token.ty == TT.WHITESPACE:
@ -64,7 +66,7 @@ def str_to_token(s: str):
                elif c == ";":
                    tokens.append(current_token)
                    current_token = None
-                    tokens.append(Token(TT.CARRET, string=";"))
+                    tokens.append(Token(TT.SEMICOLON, string=";"))
                elif c == ">" or c == "<":
                    tokens.append(current_token)
                    current_token = None
@ -97,4 +99,9 @@ def str_to_token(s: str):
    # if the current token is not none and the current token is "no quote" then we push it
    if current_token != None and current_token.ty == TT.NQUOTE:
        tokens.append(current_token)
-    return tokens
+    # cleanup the empty tokens that may be here
+    out = []
+    for tok in tokens:
+        if not (tok.ty == TT.NQUOTE and len(tok.string) == 0):
+            out.append(tok)
+    return out
--- a/parser/token.py/ttoken.py
+++ b/parser/token.py/ttoken.py
@ -4,25 +4,25 @@ from dataclasses import dataclass
 TokenType = Enum(
    "TokenType",
    [
-        "AMP",
-        "AND",
-        "CARRET",
-        "DOLLAR",
-        "DQUOTE",
-        "EXPENSION",
-        "LCARRET",
-        "LCARRET_DOUBLE",
-        "LPAREN",
-        "NQUOTE",
-        "OR",
-        "PIPE",
-        "RCARRET",
-        "RCARRET_DOUBLE",
-        "RPAREN",
-        "SEMICOLON",
-        "SQUOTE",
-        "WHITESPACE",
-        "WORD",
+        "AMP",  # ampersand == &
+        "AND",  # and == &&
+        "CARRET",  # any carret == < > << >>
+        "DLCARRET",  # double left carret == <<
+        "DOLLAR",  # dollar == $
+        "DQUOTE",  # double quote string
+        "DRCARRET",  # double right carret == >>
+        "EXPENSION",  # an expension == $<no_quote_word>
+        "LCARRET",  # left carret == <
+        "LPAREN",  # left parenthesis == (
+        "NQUOTE",  # no quote string
+        "OR",  # or == ||
+        "PIPE",  # pipe == |
+        "RCARRET",  # right carret == >
+        "RPAREN",  # right parenthesis == )
+        "SEMICOLON",  # semicolor == ;
+        "SQUOTE",  # single quote string
+        "WHITESPACE",  # whitespace outside of quoted strings
+        "WORD",  # a meta token, which contains subtokens
    ],
 )

@ -33,12 +33,14 @@ class Token:
    string: str = None
    subtokens: list = None

-    def is_subtoken(self) -> bool:
+    def is_metatoken(self) -> bool:
        return self.subtokens != None

    def append_char(self, c: str):
        if self.string is None:
-            raise Exception(f"Tried to push a char on a token that contains subtokens, TT={self.ty}")
+            raise Exception(
+                f"Tried to push a char on a token that contains subtokens, TT={self.ty}"
+            )
        self.string += c

    def is_word(self):
@ -50,34 +52,12 @@ class Token:
        )


-def print_tokenlist(tokens: list[Token], *, between="", end="\n"):
+def print_tokenlist(tokens: list[Token], *, depth=0):
    for tok in tokens:
-        col = "0"
-        if tok.ty == TokenType.SQUOTE:
-            col = "33"
-        if tok.ty == TokenType.DQUOTE:
-            col = "32"
-        if tok.ty == TokenType.WHITESPACE:
-            col = "31;4"
-        if tok.ty == TokenType.DOLLAR:
-            col = "31"
-        if tok.ty == TokenType.LPAREN:
-            col = "35"
-        if tok.ty == TokenType.RPAREN:
-            col = "35"
-        if tok.ty == TokenType.AMP:
-            col = "35"
-        if tok.ty == TokenType.PIPE:
-            col = "35"
-        if tok.ty == TokenType.SEMICOLON:
-            col = "35"
-        if tok.ty == TokenType.CARRET:
-            col = "35"
-        if tok.is_subtoken():
-            print_tokenlist(tok.subtokens, between="\x1b[100m", end="")
+        if tok.is_metatoken():
+            print_tokenlist(tok.subtokens, depth=depth + 1)
        else:
-            print(f"\x1b[{col}m{between}{tok.string}\x1b[0m", end="")
-    #print(end)
+            print(f"{'\t' * depth}{tok.ty.name:>10} => \x1b[31;40m{tok.string}\x1b[0m")


 __all__ = ["TokenType", "Token", "print_tokenlist"]