Files
IPS_G-assignment/Fasto/Lexer.fsl
NikolajDanger 7c20bc1c2c
2022-06-09 12:41:00 +02:00

122 lines
4.8 KiB
Plaintext

{
module Lexer
open System;;
open FSharp.Text.Lexing;;
open System.Text;;
(* A lexer definition for Fasto, for use with fslex. *)
(* boilerplate code for all lexer files... *)
let mutable currentLine = 1
let mutable lineStartPos = [0]
let rec getLineCol pos line = function
| (p1::ps) ->
if pos>=p1
then (line, pos-p1)
else getLineCol pos (line-1) ps
| [] -> (0,0) (* should not happen *)
let getPos (lexbuf : LexBuffer<'char>) =
getLineCol lexbuf.StartPos.pos_cnum
(currentLine)
(lineStartPos)
exception LexicalError of string * (int * int) (* (message, (line, column)) *)
let lexerError lexbuf s =
raise (LexicalError (s, getPos lexbuf))
(* This one is language specific, yet very common. Alternative would
be to encode every keyword as a regexp. This one is much easier.
Note that here we recognize specific keywords, and if none matches
then we assume we have found a user-defined identifier (last case).
*)
let keyword (s, pos) =
match s with
| "if" -> Parser.IF pos
| "then" -> Parser.THEN pos
| "else" -> Parser.ELSE pos
| "let" -> Parser.LET pos
| "in" -> Parser.IN pos
| "int" -> Parser.INT pos
| "bool" -> Parser.BOOL pos
| "char" -> Parser.CHAR pos
| "fun" -> Parser.FUN pos
| "fn" -> Parser.FN pos
| "op" -> Parser.OP pos
(* specials: *)
| "iota" -> Parser.IOTA pos
| "map" -> Parser.MAP pos
| "reduce" -> Parser.REDUCE pos
| "read" -> Parser.READ pos
| "write" -> Parser.WRITE pos
(* added: *)
| "not" -> Parser.NOT pos
| "true" -> Parser.BOOLVAL (true, pos)
| "false" -> Parser.BOOLVAL (false, pos)
| "replicate" -> Parser.REPLICATE pos
| "scan" -> Parser.SCAN pos
| "filter" -> Parser.FILTER pos
| _ -> Parser.ID (s, pos)
}
rule Token = parse
[' ' '\t' '\r']+ { Token lexbuf } (* whitespace *)
| ['\n' '\012'] { currentLine <- currentLine + 1;
lineStartPos <- lexbuf.StartPos.pos_cnum
:: lineStartPos;
Token lexbuf } (* newlines *)
| "//" [^ '\n' '\012']* { Token lexbuf } (* comment *)
| '0' | ['1'-'9']['0'-'9']* { Parser.NUM
( int (Encoding.UTF8.GetString(lexbuf.Lexeme))
, getPos lexbuf )
}
| ['a'-'z' 'A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
{ keyword ( Encoding.UTF8.GetString(lexbuf.Lexeme)
, getPos lexbuf ) }
| '\'' ( [' ' '!' '#'-'&' '('-'[' ']'-'~'] | '\\' ['n' 't' '\'' '"' '\\'] ) '\''
{ let str0 = Encoding.UTF8.GetString(lexbuf.Lexeme)
let str1 = str0.Substring (1, (String.length str0) - 2)
let str2 = AbSyn.fromCString str1
Parser.CHARLIT (str2.Chars(0), getPos lexbuf)
}
| '"' ( [' ' '!' '#'-'&' '('-'[' ']'-'~'] | '\\' ['n' 't' '\'' '"' '\\'] )* '"'
{
let str0 = Encoding.UTF8.GetString(lexbuf.Lexeme)
let str1 = str0.Substring (1, (String.length str0) - 2)
Parser.STRINGLIT (AbSyn.fromCString str1, getPos lexbuf)
}
| '+' { Parser.PLUS (getPos lexbuf) }
| '-' { Parser.MINUS (getPos lexbuf) }
| "=>" { Parser.ARROW (getPos lexbuf) }
| "==" { Parser.DEQ (getPos lexbuf) }
| '=' { Parser.EQ (getPos lexbuf) }
| '<' { Parser.LTH (getPos lexbuf) }
| '(' { Parser.LPAR (getPos lexbuf) }
| ')' { Parser.RPAR (getPos lexbuf) }
| '[' { Parser.LBRACKET (getPos lexbuf) }
| ']' { Parser.RBRACKET (getPos lexbuf) }
| '{' { Parser.LCURLY (getPos lexbuf) }
| '}' { Parser.RCURLY (getPos lexbuf) }
| ',' { Parser.COMMA (getPos lexbuf) }
| '*' { Parser.TIMES (getPos lexbuf) }
| '/' { Parser.DIVIDE (getPos lexbuf) }
| '~' { Parser.NUMNEG (getPos lexbuf) }
| "||" { Parser.OR (getPos lexbuf) }
| "&&" { Parser.AND (getPos lexbuf) }
| ';' { Parser.SEMICOLON (getPos lexbuf) }
| eof { Parser.EOF (getPos lexbuf) }
| _ { lexerError lexbuf "Illegal symbol in input" }