{ module Lexer open System;; open FSharp.Text.Lexing;; open System.Text;; (* A lexer definition for Fasto, for use with fslex. *) (* boilerplate code for all lexer files... *) let mutable currentLine = 1 let mutable lineStartPos = [0] let rec getLineCol pos line = function | (p1::ps) -> if pos>=p1 then (line, pos-p1) else getLineCol pos (line-1) ps | [] -> (0,0) (* should not happen *) let getPos (lexbuf : LexBuffer<'char>) = getLineCol lexbuf.StartPos.pos_cnum (currentLine) (lineStartPos) exception LexicalError of string * (int * int) (* (message, (line, column)) *) let lexerError lexbuf s = raise (LexicalError (s, getPos lexbuf)) (* This one is language specific, yet very common. Alternative would be to encode every keyword as a regexp. This one is much easier. Note that here we recognize specific keywords, and if none matches then we assume we have found a user-defined identifier (last case). *) let keyword (s, pos) = match s with | "if" -> Parser.IF pos | "then" -> Parser.THEN pos | "else" -> Parser.ELSE pos | "let" -> Parser.LET pos | "in" -> Parser.IN pos | "int" -> Parser.INT pos | "bool" -> Parser.BOOL pos | "char" -> Parser.CHAR pos | "fun" -> Parser.FUN pos | "fn" -> Parser.FN pos | "op" -> Parser.OP pos (* specials: *) | "iota" -> Parser.IOTA pos | "map" -> Parser.MAP pos | "reduce" -> Parser.REDUCE pos | "read" -> Parser.READ pos | "write" -> Parser.WRITE pos (* added: *) | "not" -> Parser.NOT pos | "true" -> Parser.BOOLVAL (true, pos) | "false" -> Parser.BOOLVAL (false, pos) | "replicate" -> Parser.REPLICATE pos | "scan" -> Parser.SCAN pos | "filter" -> Parser.FILTER pos | _ -> Parser.ID (s, pos) } rule Token = parse [' ' '\t' '\r']+ { Token lexbuf } (* whitespace *) | ['\n' '\012'] { currentLine <- currentLine + 1; lineStartPos <- lexbuf.StartPos.pos_cnum :: lineStartPos; Token lexbuf } (* newlines *) | "//" [^ '\n' '\012']* { Token lexbuf } (* comment *) | '0' | ['1'-'9']['0'-'9']* { Parser.NUM ( int (Encoding.UTF8.GetString(lexbuf.Lexeme)) , getPos lexbuf ) } | ['a'-'z' 'A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']* { keyword ( Encoding.UTF8.GetString(lexbuf.Lexeme) , getPos lexbuf ) } | '\'' ( [' ' '!' '#'-'&' '('-'[' ']'-'~'] | '\\' ['n' 't' '\'' '"' '\\'] ) '\'' { let str0 = Encoding.UTF8.GetString(lexbuf.Lexeme) let str1 = str0.Substring (1, (String.length str0) - 2) let str2 = AbSyn.fromCString str1 Parser.CHARLIT (str2.Chars(0), getPos lexbuf) } | '"' ( [' ' '!' '#'-'&' '('-'[' ']'-'~'] | '\\' ['n' 't' '\'' '"' '\\'] )* '"' { let str0 = Encoding.UTF8.GetString(lexbuf.Lexeme) let str1 = str0.Substring (1, (String.length str0) - 2) Parser.STRINGLIT (AbSyn.fromCString str1, getPos lexbuf) } | '+' { Parser.PLUS (getPos lexbuf) } | '-' { Parser.MINUS (getPos lexbuf) } | "=>" { Parser.ARROW (getPos lexbuf) } | "==" { Parser.DEQ (getPos lexbuf) } | '=' { Parser.EQ (getPos lexbuf) } | '<' { Parser.LTH (getPos lexbuf) } | '(' { Parser.LPAR (getPos lexbuf) } | ')' { Parser.RPAR (getPos lexbuf) } | '[' { Parser.LBRACKET (getPos lexbuf) } | ']' { Parser.RBRACKET (getPos lexbuf) } | '{' { Parser.LCURLY (getPos lexbuf) } | '}' { Parser.RCURLY (getPos lexbuf) } | ',' { Parser.COMMA (getPos lexbuf) } | '*' { Parser.TIMES (getPos lexbuf) } | '/' { Parser.DIVIDE (getPos lexbuf) } | '~' { Parser.NUMNEG (getPos lexbuf) } | "||" { Parser.OR (getPos lexbuf) } | "&&" { Parser.AND (getPos lexbuf) } | ';' { Parser.SEMICOLON (getPos lexbuf) } | eof { Parser.EOF (getPos lexbuf) } | _ { lexerError lexbuf "Illegal symbol in input" }