Files
plthy/plthy_impl/lexer.py
2024-02-19 11:21:07 +01:00

78 lines
1.6 KiB
Python

from string import ascii_letters, digits
from rply import LexerGenerator
VALID_CHARACTERS = ascii_letters+"_"+digits
KEYWORD_TOKENS = [("KEYWORD_"+i.upper(), i) for i in [
"hello",
"goodbye",
"maybe",
"do",
"if",
# "because",
"until",
"define",
"as",
"variable",
"return",
"argument"
]]
BUILTIN_TOKENS = [("BUILTIN", i) for i in [
"print",
"input",
"random"
]]
DATA_TOKENS = [
("DATA_STRING", r"\'.*?\'"),
("DATA_INT", r"\d+"),
("DATA_FLOAT", r"\d+(\.\d+)")
]
SYMBOL_TOKENS = [
("SYMBOL_SET", r"\-\>"),
# ("SYMBOL_LPARENS", r"\("),
# ("SYMBOL_RPARENS", r"\)"),
("SYMBOL_LBRACKET", r"\["),
("SYMBOL_RBRACKET", r"\]"),
("SYMBOL_LCURL", r"\{"),
("SYMBOL_RCURL", r"\}"),
("SYMBOL_PLUS", r"\+"),
("SYMBOL_MINUS", r"\-"),
("SYMBOL_TIMES", r"\*"),
("SYMBOL_DIVIDE", r"\/"),
# ("SYMBOL_COMMA", r"\,"),
# ("SYMBOL_COLON", r"\:"),
("SYMBOL_SEMICOLON", r"\;"),
("SYMBOL_PIPE", r"\|"),
("SYMBOL_QUOTE", r"\""),
("SYMBOL_LT", r"\<"),
("SYMBOL_GT", r"\>"),
("SYMBOL_EQUALS", r"\="),
("SYMBOL_DOLLAR", r"\$")
]
ALL_TOKENS = (
KEYWORD_TOKENS +
BUILTIN_TOKENS +
DATA_TOKENS +
SYMBOL_TOKENS +
[("ARG", r"\#\d+")] +
[("ID", f"[{VALID_CHARACTERS}]+")]
)
class Lexer():
def __init__(self):
self.lexer = LexerGenerator()
def _add_tokens(self):
for token in ALL_TOKENS:
self.lexer.add(*token)
self.lexer.ignore(r"[\s\n]+|//.*\n")
def get_lexer(self):
self._add_tokens()
return self.lexer.build()