119 lines
4.8 KiB
Plaintext
119 lines
4.8 KiB
Plaintext
////////////////////////////////////////////////////////////////////
|
|
// TODO: project task 1
|
|
// implement lexer tokens for the new operators:
|
|
// multiplication (*), division (/), numerical negation (~),
|
|
// logical negation (not), logical and (&&), logical or (||),
|
|
// boolean literals (true, false), semicolon (;)
|
|
//
|
|
//
|
|
// TODO: project task 2
|
|
// implement lexer tokens (keywords) for replicate, filter, scan
|
|
//
|
|
//
|
|
// TODO: project task 4
|
|
// implement the lexer tokens (keywords) for array comprehension
|
|
////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
{
|
|
module Lexer
|
|
|
|
open System;;
|
|
open FSharp.Text.Lexing;;
|
|
open System.Text;;
|
|
|
|
(* A lexer definition for Fasto, for use with fslex. *)
|
|
|
|
(* boilerplate code for all lexer files... *)
|
|
let mutable currentLine = 1
|
|
let mutable lineStartPos = [0]
|
|
|
|
let rec getLineCol pos line = function
|
|
| (p1::ps) ->
|
|
if pos>=p1
|
|
then (line, pos-p1)
|
|
else getLineCol pos (line-1) ps
|
|
| [] -> (0,0) (* should not happen *)
|
|
|
|
let getPos (lexbuf : LexBuffer<'char>) =
|
|
getLineCol lexbuf.StartPos.pos_cnum
|
|
(currentLine)
|
|
(lineStartPos)
|
|
|
|
exception LexicalError of string * (int * int) (* (message, (line, column)) *)
|
|
|
|
let lexerError lexbuf s =
|
|
raise (LexicalError (s, getPos lexbuf))
|
|
|
|
(* This one is language specific, yet very common. Alternative would
|
|
be to encode every keyword as a regexp. This one is much easier.
|
|
Note that here we recognize specific keywords, and if none matches
|
|
then we assume we have found a user-defined identifier (last case).
|
|
*)
|
|
let keyword (s, pos) =
|
|
match s with
|
|
| "if" -> Parser.IF pos
|
|
| "then" -> Parser.THEN pos
|
|
| "else" -> Parser.ELSE pos
|
|
| "let" -> Parser.LET pos
|
|
| "in" -> Parser.IN pos
|
|
| "int" -> Parser.INT pos
|
|
| "bool" -> Parser.BOOL pos
|
|
| "char" -> Parser.CHAR pos
|
|
| "fun" -> Parser.FUN pos
|
|
| "fn" -> Parser.FN pos
|
|
| "op" -> Parser.OP pos
|
|
|
|
(* specials: *)
|
|
| "iota" -> Parser.IOTA pos
|
|
| "map" -> Parser.MAP pos
|
|
| "reduce" -> Parser.REDUCE pos
|
|
| "read" -> Parser.READ pos
|
|
| "write" -> Parser.WRITE pos
|
|
| _ -> Parser.ID (s, pos)
|
|
|
|
}
|
|
|
|
rule Token = parse
|
|
[' ' '\t' '\r']+ { Token lexbuf } (* whitespace *)
|
|
| ['\n' '\012'] { currentLine <- currentLine + 1;
|
|
lineStartPos <- lexbuf.StartPos.pos_cnum
|
|
:: lineStartPos;
|
|
Token lexbuf } (* newlines *)
|
|
| "//" [^ '\n' '\012']* { Token lexbuf } (* comment *)
|
|
|
|
| '0' | ['1'-'9']['0'-'9']* { Parser.NUM
|
|
( int (Encoding.UTF8.GetString(lexbuf.Lexeme))
|
|
, getPos lexbuf )
|
|
}
|
|
| ['a'-'z' 'A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
|
|
{ keyword ( Encoding.UTF8.GetString(lexbuf.Lexeme)
|
|
, getPos lexbuf ) }
|
|
| '\'' ( [' ' '!' '#'-'&' '('-'[' ']'-'~'] | '\\' ['n' 't' '\'' '"' '\\'] ) '\''
|
|
{ let str0 = Encoding.UTF8.GetString(lexbuf.Lexeme)
|
|
let str1 = str0.Substring (1, (String.length str0) - 2)
|
|
let str2 = AbSyn.fromCString str1
|
|
Parser.CHARLIT (str2.Chars(0), getPos lexbuf)
|
|
}
|
|
| '"' ( [' ' '!' '#'-'&' '('-'[' ']'-'~'] | '\\' ['n' 't' '\'' '"' '\\'] )* '"'
|
|
{
|
|
let str0 = Encoding.UTF8.GetString(lexbuf.Lexeme)
|
|
let str1 = str0.Substring (1, (String.length str0) - 2)
|
|
Parser.STRINGLIT (AbSyn.fromCString str1, getPos lexbuf)
|
|
}
|
|
| '+' { Parser.PLUS (getPos lexbuf) }
|
|
| '-' { Parser.MINUS (getPos lexbuf) }
|
|
| "=>" { Parser.ARROW (getPos lexbuf) }
|
|
| "==" { Parser.DEQ (getPos lexbuf) }
|
|
| '=' { Parser.EQ (getPos lexbuf) }
|
|
| '<' { Parser.LTH (getPos lexbuf) }
|
|
| '(' { Parser.LPAR (getPos lexbuf) }
|
|
| ')' { Parser.RPAR (getPos lexbuf) }
|
|
| '[' { Parser.LBRACKET (getPos lexbuf) }
|
|
| ']' { Parser.RBRACKET (getPos lexbuf) }
|
|
| '{' { Parser.LCURLY (getPos lexbuf) }
|
|
| '}' { Parser.RCURLY (getPos lexbuf) }
|
|
| ',' { Parser.COMMA (getPos lexbuf) }
|
|
| eof { Parser.EOF (getPos lexbuf) }
|
|
| _ { lexerError lexbuf "Illegal symbol in input" }
|