(* Analyseur lexical pour Petit Caml *) { open Lexing open Parser exception Lexing_error of string let kwd_tbl = [ "else", ELSE; "false", FALSE; "function", FUNCTION; "if", IF; "in", IN; "let", LET; "match", MATCH; "not", NOT; "rec", REC; "then", THEN; "true", TRUE; "with", WITH ] let id_or_kwd = let h = Hashtbl.create 17 in List.iter (fun (s,t) -> Hashtbl.add h s t) kwd_tbl; fun s -> try List.assoc s kwd_tbl with _ -> IDENT s let newline lexbuf = let pos = lexbuf.lex_curr_p in lexbuf.lex_curr_p <- { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum } let string_buffer = ref "" } let digit = ['0'-'9'] let alpha = ['a'-'z' 'A'-'Z'] let ident = alpha (alpha | "_" | "'" | digit)* let integer = ['0'-'9']+ let space = [' ' '\t'] rule token = parse | '\n' { newline lexbuf; token lexbuf } | space+ { token lexbuf } | integer as s { INTEGER (int_of_string s) } | "\"" { string_buffer := "" ; let start_pos = lexbuf.lex_start_pos and start_p = lexbuf.lex_start_p in lex_string lexbuf ; lexbuf.lex_start_pos <- start_pos; lexbuf.lex_start_p <- start_p; STRING (!string_buffer) } | "()" { UNIT } | "[]" { EMPTYLIST } | ident as id { id_or_kwd id } | "->" { RIGHTARROW } | "::" { TWOCOLONS } | "_" { UNDERSCORE } | '+' { PLUS } | '-' { MINUS } | '*' { TIMES } | '/' { DIV } | "&&" { AND } | "||" { OR } | "|" { CASE } | "<=" { LE } | ">=" { GE } | '<' { LT } | '>' { GT } | "<>" { NEQ } | '=' { EQ } | "[" { LBRACE } | "]" { RBRACE } | '(' { LPAREN } | ')' { RPAREN } | ',' { COMMA } | ";" { SEMICOLON } | "(*" { comment 0 lexbuf } | eof { EOF } | _ as c { raise (Lexing_error ("illegal character: " ^ String.make 1 c)) } and comment depth = parse | '\n' { newline lexbuf; comment depth lexbuf } | "(*" { comment (depth + 1) lexbuf } | "*)" { match depth with | 0 -> token lexbuf | _ -> comment (depth - 1) lexbuf } | eof { raise (Lexing_error "unterminated comment") } | _ { comment depth lexbuf } and lex_string = parse | "\"" { () } | "\\\"" { string_buffer := !string_buffer ^ "\""; lex_string lexbuf } | "\\n" { string_buffer := !string_buffer ^ "\n"; lex_string lexbuf } | "\\\\" { string_buffer := !string_buffer ^ "\\"; lex_string lexbuf } | "\\" { raise (Lexing_error "illegal \\ in string") } | "\n" eof | eof { raise (Lexing_error "unterminated string") } | "\n" { raise (Lexing_error "illegal \\n in string") } | _ as c { string_buffer := !string_buffer ^ (String.make 1 c) ; lex_string lexbuf }