summaryrefslogtreecommitdiff
path: root/lexer.mll
diff options
context:
space:
mode:
authorGuillaume Seguin <guillaume@segu.in>2009-01-10 15:18:46 +0100
committerGuillaume Seguin <guillaume@segu.in>2009-01-10 15:18:46 +0100
commit7c9a83390e3094cbed37ccaf98574dff0450c2c5 (patch)
tree27fdf025610858eb2c471a61ddaf08e63b4ead41 /lexer.mll
downloadpetitcaml-7c9a83390e3094cbed37ccaf98574dff0450c2c5.tar.gz
petitcaml-7c9a83390e3094cbed37ccaf98574dff0450c2c5.tar.bz2
[petitcaml] Import compiler frontend
Diffstat (limited to 'lexer.mll')
-rw-r--r--lexer.mll100
1 files changed, 100 insertions, 0 deletions
diff --git a/lexer.mll b/lexer.mll
new file mode 100644
index 0000000..88081cd
--- /dev/null
+++ b/lexer.mll
@@ -0,0 +1,100 @@
+
+(* Analyseur lexical pour Petit Caml *)
+
+{
+ open Lexing
+ open Parser
+
+ exception Lexing_error of string
+
+ let kwd_tbl =
+ [
+ "else", ELSE; "false", FALSE; "function", FUNCTION; "if", IF;
+ "in", IN; "let", LET; "match", MATCH; "not", NOT; "rec", REC;
+ "then", THEN; "true", TRUE; "with", WITH
+ ]
+
+ let id_or_kwd =
+ let h = Hashtbl.create 17 in
+ List.iter (fun (s,t) -> Hashtbl.add h s t) kwd_tbl;
+ fun s ->
+ try List.assoc s kwd_tbl with _ -> IDENT s
+
+ let newline lexbuf =
+ let pos = lexbuf.lex_curr_p in
+ lexbuf.lex_curr_p <-
+ { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
+
+ let string_buffer = ref ""
+}
+
+let digit = ['0'-'9']
+let alpha = ['a'-'z' 'A'-'Z']
+let ident = alpha (alpha | "_" | "'" | digit)*
+let integer = ['0'-'9']+
+let space = [' ' '\t']
+
+rule token = parse
+ | '\n' { newline lexbuf; token lexbuf }
+ | space+ { token lexbuf }
+ | integer as s { INTEGER (int_of_string s) }
+ | "\"" { string_buffer := "" ;
+ let start_pos = lexbuf.lex_start_pos
+ and start_p = lexbuf.lex_start_p
+ in
+ lex_string lexbuf ;
+ lexbuf.lex_start_pos <- start_pos;
+ lexbuf.lex_start_p <- start_p;
+ STRING (!string_buffer) }
+ | "()" { UNIT }
+ | "[]" { EMPTYLIST }
+ | ident as id { id_or_kwd id }
+ | "->" { RIGHTARROW }
+ | "::" { TWOCOLONS }
+ | "_" { UNDERSCORE }
+ | '+' { PLUS }
+ | '-' { MINUS }
+ | '*' { TIMES }
+ | '/' { DIV }
+ | "&&" { AND }
+ | "||" { OR }
+ | "|" { CASE }
+ | "<=" { LE }
+ | ">=" { GE }
+ | '<' { LT }
+ | '>' { GT }
+ | "<>" { NEQ }
+ | '=' { EQ }
+ | "[" { LBRACE }
+ | "]" { RBRACE }
+ | '(' { LPAREN }
+ | ')' { RPAREN }
+ | ',' { COMMA }
+ | ";" { SEMICOLON }
+ | "(*" { comment 0 lexbuf }
+ | eof { EOF }
+ | _ as c { raise (Lexing_error ("illegal character: " ^ String.make 1 c)) }
+
+and comment depth = parse
+ | '\n' { newline lexbuf; comment depth lexbuf }
+ | "(*" { comment (depth + 1) lexbuf }
+ | "*)" { match depth with
+ | 0 -> token lexbuf
+ | _ -> comment (depth - 1) lexbuf }
+ | eof { raise (Lexing_error "unterminated comment") }
+ | _ { comment depth lexbuf }
+
+and lex_string = parse
+ | "\"" { () }
+ | "\\\"" { string_buffer := !string_buffer ^ "\"";
+ lex_string lexbuf }
+ | "\\n" { string_buffer := !string_buffer ^ "\n";
+ lex_string lexbuf }
+ | "\\\\" { string_buffer := !string_buffer ^ "\\";
+ lex_string lexbuf }
+ | "\\" { raise (Lexing_error "illegal \\ in string") }
+ | "\n" eof | eof
+ { raise (Lexing_error "unterminated string") }
+ | "\n" { raise (Lexing_error "illegal \\n in string") }
+ | _ as c { string_buffer := !string_buffer ^ (String.make 1 c) ;
+ lex_string lexbuf }