IPS_G-assignment/Fasto/Lexer.fsl

{
module Lexer

open System;;
open FSharp.Text.Lexing;;
open System.Text;;

(* A lexer definition for Fasto, for use with fslex. *)

(* boilerplate code for all lexer files... *)
let mutable currentLine = 1
let mutable lineStartPos = [0]

let rec getLineCol pos line = function
    |  (p1::ps) ->
        if pos>=p1
        then (line, pos-p1)
        else getLineCol pos (line-1) ps
    |  [] -> (0,0) (* should not happen *)

let getPos (lexbuf : LexBuffer<'char>) =
      getLineCol lexbuf.StartPos.pos_cnum
                 (currentLine)
                 (lineStartPos)

exception LexicalError of string * (int * int) (* (message, (line, column)) *)

let lexerError lexbuf s =
     raise (LexicalError (s, getPos lexbuf))

(* This one is language specific, yet very common. Alternative would
   be to encode every keyword as a regexp. This one is much easier.
   Note that here we recognize specific keywords, and if none matches
   then we assume we have found a user-defined identifier (last case).
*)
let keyword (s, pos) =
     match s with
       | "if"           -> Parser.IF pos
       | "then"         -> Parser.THEN pos
       | "else"         -> Parser.ELSE pos
       | "let"          -> Parser.LET pos
       | "in"           -> Parser.IN pos
       | "int"          -> Parser.INT pos
       | "bool"         -> Parser.BOOL pos
       | "char"         -> Parser.CHAR pos
       | "fun"          -> Parser.FUN pos
       | "fn"           -> Parser.FN pos
       | "op"           -> Parser.OP pos

(* specials: *)
       | "iota"         -> Parser.IOTA pos
       | "map"          -> Parser.MAP  pos
       | "reduce"       -> Parser.REDUCE pos
       | "read"         -> Parser.READ pos
       | "write"        -> Parser.WRITE pos

(* added: *)
       | "not"          -> Parser.NOT pos
       | "true"         -> Parser.BOOLVAL (true, pos)
       | "false"        -> Parser.BOOLVAL (false, pos)

       | "replicate"    -> Parser.REPLICATE pos
       | "scan"         -> Parser.SCAN pos
       | "filter"       -> Parser.FILTER pos

       | _              -> Parser.ID (s, pos)

}

rule Token = parse
    [' ' '\t' '\r']+      { Token lexbuf } (* whitespace *)
  | ['\n' '\012']         { currentLine  <- currentLine + 1;
                            lineStartPos <-  lexbuf.StartPos.pos_cnum
                                             :: lineStartPos;
                            Token lexbuf } (* newlines *)
  | "//" [^ '\n' '\012']* { Token lexbuf } (* comment *)

  | '0' | ['1'-'9']['0'-'9']* { Parser.NUM
                                  ( int (Encoding.UTF8.GetString(lexbuf.Lexeme))
                                  , getPos lexbuf )
                              }
  | ['a'-'z' 'A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
                          { keyword ( Encoding.UTF8.GetString(lexbuf.Lexeme)
                                    , getPos lexbuf ) }
  | '\'' ( [' ' '!' '#'-'&' '('-'[' ']'-'~'] | '\\' ['n' 't' '\'' '"' '\\'] ) '\''
                          { let str0 = Encoding.UTF8.GetString(lexbuf.Lexeme)
                            let str1 = str0.Substring (1, (String.length str0) - 2)
                            let str2 = AbSyn.fromCString str1
                            Parser.CHARLIT (str2.Chars(0), getPos lexbuf)
                          }
  | '"' ( [' ' '!' '#'-'&' '('-'[' ']'-'~'] | '\\' ['n' 't' '\'' '"' '\\'] )* '"'
                          {
                            let str0 = Encoding.UTF8.GetString(lexbuf.Lexeme)
                            let str1 = str0.Substring (1, (String.length str0) - 2)
                            Parser.STRINGLIT (AbSyn.fromCString str1, getPos lexbuf)
                          }
  | '+'                   { Parser.PLUS   (getPos lexbuf) }
  | '-'                   { Parser.MINUS  (getPos lexbuf) }
  | "=>"                  { Parser.ARROW  (getPos lexbuf) }
  | "=="                  { Parser.DEQ    (getPos lexbuf) }
  | '='                   { Parser.EQ     (getPos lexbuf) }
  | '<'                   { Parser.LTH    (getPos lexbuf) }
  | '('                   { Parser.LPAR   (getPos lexbuf) }
  | ')'                   { Parser.RPAR   (getPos lexbuf) }
  | '['                   { Parser.LBRACKET (getPos lexbuf) }
  | ']'                   { Parser.RBRACKET (getPos lexbuf) }
  | '{'                   { Parser.LCURLY (getPos lexbuf) }
  | '}'                   { Parser.RCURLY (getPos lexbuf) }
  | ','                   { Parser.COMMA  (getPos lexbuf) }

  | '*'                   { Parser.TIMES (getPos lexbuf) }
  | '/'                   { Parser.DIVIDE (getPos lexbuf) }
  | '~'                   { Parser.NUMNEG (getPos lexbuf) }

  | "||"                  { Parser.OR (getPos lexbuf) }
  | "&&"                  { Parser.AND (getPos lexbuf) }

  | ';'                   { Parser.SEMICOLON (getPos lexbuf) }

  | eof                   { Parser.EOF    (getPos lexbuf) }
  | _                     { lexerError lexbuf "Illegal symbol in input" }