1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
(* Analyseur lexical pour Petit Caml *)
{
open Lexing
open Parser
exception Lexing_error of string
let kwd_tbl =
[
"else", ELSE; "false", FALSE; "function", FUNCTION; "if", IF;
"in", IN; "let", LET; "match", MATCH; "not", NOT; "rec", REC;
"then", THEN; "true", TRUE; "with", WITH
]
let id_or_kwd =
let h = Hashtbl.create 17 in
List.iter (fun (s,t) -> Hashtbl.add h s t) kwd_tbl;
fun s ->
try List.assoc s kwd_tbl with _ -> IDENT s
let newline lexbuf =
let pos = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <-
{ pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
let string_buffer = ref ""
}
let digit = ['0'-'9']
let alpha = ['a'-'z' 'A'-'Z']
let ident = alpha (alpha | "_" | "'" | digit)*
let integer = ['0'-'9']+
let space = [' ' '\t']
rule token = parse
| '\n' { newline lexbuf; token lexbuf }
| space+ { token lexbuf }
| integer as s { INTEGER (int_of_string s) }
| "\"" { string_buffer := "" ;
let start_pos = lexbuf.lex_start_pos
and start_p = lexbuf.lex_start_p
in
lex_string lexbuf ;
lexbuf.lex_start_pos <- start_pos;
lexbuf.lex_start_p <- start_p;
STRING (!string_buffer) }
| "()" { UNIT }
| "[]" { EMPTYLIST }
| ident as id { id_or_kwd id }
| "->" { RIGHTARROW }
| "::" { TWOCOLONS }
| "_" { UNDERSCORE }
| '+' { PLUS }
| '-' { MINUS }
| '*' { TIMES }
| '/' { DIV }
| "&&" { AND }
| "||" { OR }
| "|" { CASE }
| "<=" { LE }
| ">=" { GE }
| '<' { LT }
| '>' { GT }
| "<>" { NEQ }
| '=' { EQ }
| "[" { LBRACE }
| "]" { RBRACE }
| '(' { LPAREN }
| ')' { RPAREN }
| ',' { COMMA }
| ";" { SEMICOLON }
| "(*" { comment 0 lexbuf }
| eof { EOF }
| _ as c { raise (Lexing_error ("illegal character: " ^ String.make 1 c)) }
and comment depth = parse
| '\n' { newline lexbuf; comment depth lexbuf }
| "(*" { comment (depth + 1) lexbuf }
| "*)" { match depth with
| 0 -> token lexbuf
| _ -> comment (depth - 1) lexbuf }
| eof { raise (Lexing_error "unterminated comment") }
| _ { comment depth lexbuf }
and lex_string = parse
| "\"" { () }
| "\\\"" { string_buffer := !string_buffer ^ "\"";
lex_string lexbuf }
| "\\n" { string_buffer := !string_buffer ^ "\n";
lex_string lexbuf }
| "\\\\" { string_buffer := !string_buffer ^ "\\";
lex_string lexbuf }
| "\\" { raise (Lexing_error "illegal \\ in string") }
| "\n" eof | eof
{ raise (Lexing_error "unterminated string") }
| "\n" { raise (Lexing_error "illegal \\n in string") }
| _ as c { string_buffer := !string_buffer ^ (String.make 1 c) ;
lex_string lexbuf }
|