update: make something that collapses tokens into one if it can
This commit is contained in:
parent
2e811bcec2
commit
774f374965
5 changed files with 81 additions and 54 deletions
|
|
@ -1,10 +1,12 @@
|
||||||
|
import collapse
|
||||||
|
import concat
|
||||||
import str_to_token
|
import str_to_token
|
||||||
import concat
|
|
||||||
import ttoken
|
import ttoken
|
||||||
|
|
||||||
s = input("> ")
|
s = input("> ")
|
||||||
print(s)
|
print(s)
|
||||||
tokens = str_to_token.str_to_token(s)
|
tokens = str_to_token.str_to_token(s)
|
||||||
concated_tokens = concat.concat(tokens)
|
concated_tokens = concat.concat(tokens)
|
||||||
|
collapsed_tokens = collapse.collapse(concated_tokens)
|
||||||
|
|
||||||
ttoken.print_tokenlist(concated_tokens)
|
ttoken.print_tokenlist(collapsed_tokens)
|
||||||
|
|
|
||||||
38
parser/token.py/collapse.py
Normal file
38
parser/token.py/collapse.py
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
from ttoken import *
|
||||||
|
|
||||||
|
TT = TokenType
|
||||||
|
|
||||||
|
|
||||||
|
# This function will transform some tokens into others depending on what follows them
|
||||||
|
def collapse(tokens: list[Token]):
|
||||||
|
i = 0
|
||||||
|
out = []
|
||||||
|
while i < len(tokens):
|
||||||
|
tok = tokens[i]
|
||||||
|
peek = tokens[i + 1] if i + 1 < len(tokens) else None
|
||||||
|
if peek is None:
|
||||||
|
out.append(tok)
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
if tok.ty == TT.PIPE and peek.ty == TT.PIPE:
|
||||||
|
out.append(Token(TT.OR, string="||"))
|
||||||
|
i += 2
|
||||||
|
elif tok.ty == TT.AMP and peek.ty == TT.AMP:
|
||||||
|
out.append(Token(TT.AND, string="&&"))
|
||||||
|
i += 2
|
||||||
|
elif tok.ty == TT.CARRET and tok.string == "<" and peek.ty == TT.CARRET and peek.string == "<":
|
||||||
|
out.append(Token(TT.DLCARRET, string="<<"))
|
||||||
|
i += 2
|
||||||
|
elif tok.ty == TT.CARRET and tok.string == ">" and peek.ty == TT.CARRET and peek.string == ">":
|
||||||
|
out.append(Token(TT.DRCARRET, string=">>"))
|
||||||
|
i += 2
|
||||||
|
elif tok.ty == TT.CARRET and tok.string == "<" :
|
||||||
|
out.append(Token(TT.LCARRET, string="<"))
|
||||||
|
i += 1
|
||||||
|
elif tok.ty == TT.CARRET and tok.string == ">" :
|
||||||
|
out.append(Token(TT.RCARRET, string=">"))
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
out.append(tok)
|
||||||
|
i += 1
|
||||||
|
return out
|
||||||
|
|
@ -2,7 +2,7 @@ from ttoken import *
|
||||||
|
|
||||||
|
|
||||||
# This function will make a "big" token that will represent a word in the shell sense
|
# This function will make a "big" token that will represent a word in the shell sense
|
||||||
def concat(tokens: list[Token]):
|
def concat(tokens: list[Token]) -> list[Token]:
|
||||||
i = 0
|
i = 0
|
||||||
out = []
|
out = []
|
||||||
while i < len(tokens):
|
while i < len(tokens):
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,12 @@ from ttoken import *
|
||||||
TT = TokenType
|
TT = TokenType
|
||||||
|
|
||||||
|
|
||||||
def is_quote(c: chr):
|
def is_quote(c: str) -> bool:
|
||||||
return c == "'" or c == '"'
|
return c == "'" or c == '"'
|
||||||
|
|
||||||
|
|
||||||
# This function takes the string and seperate them into different tokens depending on the quotes
|
# This function takes the string and seperate them into different tokens depending on the quotes
|
||||||
def str_to_token(s: str):
|
def str_to_token(s: str) -> list[Token]:
|
||||||
tokens = []
|
tokens = []
|
||||||
current_token = None
|
current_token = None
|
||||||
quote = 0
|
quote = 0
|
||||||
|
|
@ -36,6 +36,8 @@ def str_to_token(s: str):
|
||||||
):
|
):
|
||||||
tokens.append(current_token)
|
tokens.append(current_token)
|
||||||
current_token = Token(TT.WHITESPACE, string="")
|
current_token = Token(TT.WHITESPACE, string="")
|
||||||
|
i += 1;
|
||||||
|
continue;
|
||||||
else:
|
else:
|
||||||
# we DON'T have a whitespace, then if the current token is a whitespace, just push it and set the new token to raw_string
|
# we DON'T have a whitespace, then if the current token is a whitespace, just push it and set the new token to raw_string
|
||||||
if current_token.ty == TT.WHITESPACE:
|
if current_token.ty == TT.WHITESPACE:
|
||||||
|
|
@ -64,7 +66,7 @@ def str_to_token(s: str):
|
||||||
elif c == ";":
|
elif c == ";":
|
||||||
tokens.append(current_token)
|
tokens.append(current_token)
|
||||||
current_token = None
|
current_token = None
|
||||||
tokens.append(Token(TT.CARRET, string=";"))
|
tokens.append(Token(TT.SEMICOLON, string=";"))
|
||||||
elif c == ">" or c == "<":
|
elif c == ">" or c == "<":
|
||||||
tokens.append(current_token)
|
tokens.append(current_token)
|
||||||
current_token = None
|
current_token = None
|
||||||
|
|
@ -97,4 +99,9 @@ def str_to_token(s: str):
|
||||||
# if the current token is not none and the current token is "no quote" then we push it
|
# if the current token is not none and the current token is "no quote" then we push it
|
||||||
if current_token != None and current_token.ty == TT.NQUOTE:
|
if current_token != None and current_token.ty == TT.NQUOTE:
|
||||||
tokens.append(current_token)
|
tokens.append(current_token)
|
||||||
return tokens
|
# cleanup the empty tokens that may be here
|
||||||
|
out = []
|
||||||
|
for tok in tokens:
|
||||||
|
if not (tok.ty == TT.NQUOTE and len(tok.string) == 0):
|
||||||
|
out.append(tok)
|
||||||
|
return out
|
||||||
|
|
|
||||||
|
|
@ -4,25 +4,25 @@ from dataclasses import dataclass
|
||||||
TokenType = Enum(
|
TokenType = Enum(
|
||||||
"TokenType",
|
"TokenType",
|
||||||
[
|
[
|
||||||
"AMP",
|
"AMP", # ampersand == &
|
||||||
"AND",
|
"AND", # and == &&
|
||||||
"CARRET",
|
"CARRET", # any carret == < > << >>
|
||||||
"DOLLAR",
|
"DLCARRET", # double left carret == <<
|
||||||
"DQUOTE",
|
"DOLLAR", # dollar == $
|
||||||
"EXPENSION",
|
"DQUOTE", # double quote string
|
||||||
"LCARRET",
|
"DRCARRET", # double right carret == >>
|
||||||
"LCARRET_DOUBLE",
|
"EXPENSION", # an expension == $<no_quote_word>
|
||||||
"LPAREN",
|
"LCARRET", # left carret == <
|
||||||
"NQUOTE",
|
"LPAREN", # left parenthesis == (
|
||||||
"OR",
|
"NQUOTE", # no quote string
|
||||||
"PIPE",
|
"OR", # or == ||
|
||||||
"RCARRET",
|
"PIPE", # pipe == |
|
||||||
"RCARRET_DOUBLE",
|
"RCARRET", # right carret == >
|
||||||
"RPAREN",
|
"RPAREN", # right parenthesis == )
|
||||||
"SEMICOLON",
|
"SEMICOLON", # semicolor == ;
|
||||||
"SQUOTE",
|
"SQUOTE", # single quote string
|
||||||
"WHITESPACE",
|
"WHITESPACE", # whitespace outside of quoted strings
|
||||||
"WORD",
|
"WORD", # a meta token, which contains subtokens
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -33,12 +33,14 @@ class Token:
|
||||||
string: str = None
|
string: str = None
|
||||||
subtokens: list = None
|
subtokens: list = None
|
||||||
|
|
||||||
def is_subtoken(self) -> bool:
|
def is_metatoken(self) -> bool:
|
||||||
return self.subtokens != None
|
return self.subtokens != None
|
||||||
|
|
||||||
def append_char(self, c: str):
|
def append_char(self, c: str):
|
||||||
if self.string is None:
|
if self.string is None:
|
||||||
raise Exception(f"Tried to push a char on a token that contains subtokens, TT={self.ty}")
|
raise Exception(
|
||||||
|
f"Tried to push a char on a token that contains subtokens, TT={self.ty}"
|
||||||
|
)
|
||||||
self.string += c
|
self.string += c
|
||||||
|
|
||||||
def is_word(self):
|
def is_word(self):
|
||||||
|
|
@ -50,34 +52,12 @@ class Token:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def print_tokenlist(tokens: list[Token], *, between="", end="\n"):
|
def print_tokenlist(tokens: list[Token], *, depth=0):
|
||||||
for tok in tokens:
|
for tok in tokens:
|
||||||
col = "0"
|
if tok.is_metatoken():
|
||||||
if tok.ty == TokenType.SQUOTE:
|
print_tokenlist(tok.subtokens, depth=depth + 1)
|
||||||
col = "33"
|
|
||||||
if tok.ty == TokenType.DQUOTE:
|
|
||||||
col = "32"
|
|
||||||
if tok.ty == TokenType.WHITESPACE:
|
|
||||||
col = "31;4"
|
|
||||||
if tok.ty == TokenType.DOLLAR:
|
|
||||||
col = "31"
|
|
||||||
if tok.ty == TokenType.LPAREN:
|
|
||||||
col = "35"
|
|
||||||
if tok.ty == TokenType.RPAREN:
|
|
||||||
col = "35"
|
|
||||||
if tok.ty == TokenType.AMP:
|
|
||||||
col = "35"
|
|
||||||
if tok.ty == TokenType.PIPE:
|
|
||||||
col = "35"
|
|
||||||
if tok.ty == TokenType.SEMICOLON:
|
|
||||||
col = "35"
|
|
||||||
if tok.ty == TokenType.CARRET:
|
|
||||||
col = "35"
|
|
||||||
if tok.is_subtoken():
|
|
||||||
print_tokenlist(tok.subtokens, between="\x1b[100m", end="")
|
|
||||||
else:
|
else:
|
||||||
print(f"\x1b[{col}m{between}{tok.string}\x1b[0m", end="")
|
print(f"{'\t' * depth}{tok.ty.name:>10} => \x1b[31;40m{tok.string}\x1b[0m")
|
||||||
#print(end)
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["TokenType", "Token", "print_tokenlist"]
|
__all__ = ["TokenType", "Token", "print_tokenlist"]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue