🎉
This commit is contained in:
74
plthy_impl/lexer.py
Normal file
74
plthy_impl/lexer.py
Normal file
@ -0,0 +1,74 @@
|
||||
from string import ascii_letters, digits
|
||||
from rply import LexerGenerator
|
||||
|
||||
VALID_CHARACTERS = ascii_letters+"_"+digits
|
||||
|
||||
KEYWORD_TOKENS = [("KEYWORD_"+i.upper(), i) for i in [
|
||||
"hello",
|
||||
"goodbye",
|
||||
"maybe",
|
||||
"do",
|
||||
"if",
|
||||
"because",
|
||||
"until",
|
||||
"define",
|
||||
"as",
|
||||
"variable",
|
||||
"return",
|
||||
"argument"
|
||||
]]
|
||||
|
||||
BUILTIN_TOKENS = [("BUILTIN", i) for i in [
|
||||
"print"
|
||||
]]
|
||||
|
||||
DATA_TOKENS = [
|
||||
("DATA_STRING", r"\'.*?\'"),
|
||||
("DATA_NUMERAL", r"\d+(\.\d+)?")
|
||||
]
|
||||
|
||||
SYMBOL_TOKENS = [
|
||||
("SYMBOL_SET", r"\-\>"),
|
||||
("SYMBOL_LPARENS", r"\("),
|
||||
("SYMBOL_RPARENS", r"\)"),
|
||||
("SYMBOL_LBRACKET", r"\["),
|
||||
("SYMBOL_RBRACKET", r"\]"),
|
||||
("SYMBOL_LCURL", r"\{"),
|
||||
("SYMBOL_RCURL", r"\}"),
|
||||
("SYMBOL_PLUS", r"\+"),
|
||||
("SYMBOL_MINUS", r"\-"),
|
||||
("SYMBOL_TIMES", r"\*"),
|
||||
("SYMBOL_DIVIDE", r"\/"),
|
||||
("SYMBOL_COMMA", r"\,"),
|
||||
("SYMBOL_COLON", r"\:"),
|
||||
("SYMBOL_SEMICOLON", r"\;"),
|
||||
("SYMBOL_PIPE", r"\|"),
|
||||
("SYMBOL_QUOTE", r"\""),
|
||||
("SYMBOL_LT", r"\<"),
|
||||
("SYMBOL_GT", r"\>"),
|
||||
("SYMBOL_EQUALS", r"\="),
|
||||
("SYMBOL_DOLLAR", r"\$")
|
||||
]
|
||||
|
||||
ALL_TOKENS = (
|
||||
KEYWORD_TOKENS +
|
||||
BUILTIN_TOKENS +
|
||||
DATA_TOKENS +
|
||||
SYMBOL_TOKENS +
|
||||
[("ARG", r"\#\d+")] +
|
||||
[("ID", f"[{VALID_CHARACTERS}]+")]
|
||||
)
|
||||
|
||||
|
||||
class Lexer():
|
||||
def __init__(self):
|
||||
self.lexer = LexerGenerator()
|
||||
|
||||
def _add_tokens(self):
|
||||
for token in ALL_TOKENS:
|
||||
self.lexer.add(*token)
|
||||
self.lexer.ignore(r"[\s\n]+|//.*\n")
|
||||
|
||||
def get_lexer(self):
|
||||
self._add_tokens()
|
||||
return self.lexer.build()
|
Reference in New Issue
Block a user