summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuillaume Seguin <guillaume@segu.in>2009-01-10 15:18:46 +0100
committerGuillaume Seguin <guillaume@segu.in>2009-01-10 15:18:46 +0100
commit7c9a83390e3094cbed37ccaf98574dff0450c2c5 (patch)
tree27fdf025610858eb2c471a61ddaf08e63b4ead41
downloadpetitcaml-7c9a83390e3094cbed37ccaf98574dff0450c2c5.tar.gz
petitcaml-7c9a83390e3094cbed37ccaf98574dff0450c2c5.tar.bz2
[petitcaml] Import compiler frontend
-rw-r--r--Makefile36
-rw-r--r--ast.mli66
-rw-r--r--lexer.mll100
-rw-r--r--main.ml100
-rw-r--r--parser.mly149
-rw-r--r--typing.ml348
-rw-r--r--utils.ml180
7 files changed, 979 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5bc105c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,36 @@
+CMO=typing.cmo utils.cmo lexer.cmo parser.cmo \
+ mips.cmo closure.cmo compile.cmo main.cmo
+GENERATED = lexer.ml parser.ml parser.mli
+BIN=petit-caml
+FLAGS=
+
+all: $(BIN)
+
+$(BIN):$(CMO)
+ ocamlc $(FLAGS) -o $(BIN) $(CMO)
+
+.SUFFIXES: .mli .ml .cmi .cmo .mll .mly
+
+.mli.cmi:
+ ocamlc $(FLAGS) -c $<
+
+.ml.cmo:
+ ocamlc $(FLAGS) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+.mly.ml:
+ menhir -v $<
+
+.mly.mli:
+ ocamlyacc -v $<
+
+clean:
+ rm -f *.cm[io] *.o *~ $(BIN) $(GENERATED) parser.output
+
+.depend depend:$(GENERATED)
+ rm -f .depend
+ ocamldep *.ml *.mli > .depend
+
+include .depend
diff --git a/ast.mli b/ast.mli
new file mode 100644
index 0000000..814e1c7
--- /dev/null
+++ b/ast.mli
@@ -0,0 +1,66 @@
+open Lexing
+
+(* Types *)
+
+type typ =
+ | Tint
+ | Tstring
+ | Tbool
+ | Tunit
+ | Tarrow of typ * typ
+ | Tproduct of typ list
+ | Tlist of typ
+ | Tvar of tvar
+and tvar =
+ { id : int;
+ mutable def : typ option }
+
+(* AST *)
+
+type ident = string
+
+type func_name = string option
+type pos = { l : int ; c : int ; raw_c : int }
+type loc = { spos : pos ; epos : pos }
+type expr_loc = loc option
+type expr_typ = typ option
+
+type binop = Add | Sub | Mul | Div | And | Or | Eq | Neq | Lt | Le | Gt | Ge
+type unop = Neg | Not
+
+type const =
+ | Cint of int
+ | Cstring of string
+ | Cbool of bool
+ | Cunit
+ | Cemptylist
+
+type motif_raw =
+ | Munderscore
+ | Mident of string
+ | Mtuple of motif list
+and motif = { m : motif_raw ;
+ motif_loc : expr_loc ;
+ mutable motif_t : expr_typ }
+
+type expr_raw =
+ | Econst of const
+ | Eident of ident
+ | Etuple of expr list
+ | Ebinop of expr * binop * expr
+ | Eunop of unop * expr
+ | Eletin of motif * expr * expr
+ | Efunc of func
+ | Eif of expr * expr * expr
+ | Elistcons of expr * expr
+ | Ecall of expr * expr
+ | Ematch of expr * expr * motif * motif * expr
+ | Eclos of ident * ident list
+and expr = { e : expr_raw ;
+ loc : expr_loc ;
+ mutable t : expr_typ }
+and func =
+ { name : func_name ;
+ recursive : bool ;
+ arg : motif ;
+ body : expr }
diff --git a/lexer.mll b/lexer.mll
new file mode 100644
index 0000000..88081cd
--- /dev/null
+++ b/lexer.mll
@@ -0,0 +1,100 @@
+
+(* Analyseur lexical pour Petit Caml *)
+
+{
+ open Lexing
+ open Parser
+
+ exception Lexing_error of string
+
+ let kwd_tbl =
+ [
+ "else", ELSE; "false", FALSE; "function", FUNCTION; "if", IF;
+ "in", IN; "let", LET; "match", MATCH; "not", NOT; "rec", REC;
+ "then", THEN; "true", TRUE; "with", WITH
+ ]
+
+ let id_or_kwd =
+ let h = Hashtbl.create 17 in
+ List.iter (fun (s,t) -> Hashtbl.add h s t) kwd_tbl;
+ fun s ->
+ try List.assoc s kwd_tbl with _ -> IDENT s
+
+ let newline lexbuf =
+ let pos = lexbuf.lex_curr_p in
+ lexbuf.lex_curr_p <-
+ { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
+
+ let string_buffer = ref ""
+}
+
+let digit = ['0'-'9']
+let alpha = ['a'-'z' 'A'-'Z']
+let ident = alpha (alpha | "_" | "'" | digit)*
+let integer = ['0'-'9']+
+let space = [' ' '\t']
+
+rule token = parse
+ | '\n' { newline lexbuf; token lexbuf }
+ | space+ { token lexbuf }
+ | integer as s { INTEGER (int_of_string s) }
+ | "\"" { string_buffer := "" ;
+ let start_pos = lexbuf.lex_start_pos
+ and start_p = lexbuf.lex_start_p
+ in
+ lex_string lexbuf ;
+ lexbuf.lex_start_pos <- start_pos;
+ lexbuf.lex_start_p <- start_p;
+ STRING (!string_buffer) }
+ | "()" { UNIT }
+ | "[]" { EMPTYLIST }
+ | ident as id { id_or_kwd id }
+ | "->" { RIGHTARROW }
+ | "::" { TWOCOLONS }
+ | "_" { UNDERSCORE }
+ | '+' { PLUS }
+ | '-' { MINUS }
+ | '*' { TIMES }
+ | '/' { DIV }
+ | "&&" { AND }
+ | "||" { OR }
+ | "|" { CASE }
+ | "<=" { LE }
+ | ">=" { GE }
+ | '<' { LT }
+ | '>' { GT }
+ | "<>" { NEQ }
+ | '=' { EQ }
+ | "[" { LBRACE }
+ | "]" { RBRACE }
+ | '(' { LPAREN }
+ | ')' { RPAREN }
+ | ',' { COMMA }
+ | ";" { SEMICOLON }
+ | "(*" { comment 0 lexbuf }
+ | eof { EOF }
+ | _ as c { raise (Lexing_error ("illegal character: " ^ String.make 1 c)) }
+
+and comment depth = parse
+ | '\n' { newline lexbuf; comment depth lexbuf }
+ | "(*" { comment (depth + 1) lexbuf }
+ | "*)" { match depth with
+ | 0 -> token lexbuf
+ | _ -> comment (depth - 1) lexbuf }
+ | eof { raise (Lexing_error "unterminated comment") }
+ | _ { comment depth lexbuf }
+
+and lex_string = parse
+ | "\"" { () }
+ | "\\\"" { string_buffer := !string_buffer ^ "\"";
+ lex_string lexbuf }
+ | "\\n" { string_buffer := !string_buffer ^ "\n";
+ lex_string lexbuf }
+ | "\\\\" { string_buffer := !string_buffer ^ "\\";
+ lex_string lexbuf }
+ | "\\" { raise (Lexing_error "illegal \\ in string") }
+ | "\n" eof | eof
+ { raise (Lexing_error "unterminated string") }
+ | "\n" { raise (Lexing_error "illegal \\n in string") }
+ | _ as c { string_buffer := !string_buffer ^ (String.make 1 c) ;
+ lex_string lexbuf }
diff --git a/main.ml b/main.ml
new file mode 100644
index 0000000..e75a2d5
--- /dev/null
+++ b/main.ml
@@ -0,0 +1,100 @@
+open Format
+open Lexing
+open Ast
+
+let parse_only = ref false
+let type_only = ref false
+let verbose = ref false
+
+let ifile = ref ""
+let ifile_length = ref 0
+let ofile = ref ""
+
+let set_file f s = f := s
+
+let options =
+ ["-parse-only", Arg.Set parse_only,
+ " Arrête la compilation après la phase d'analyse syntaxique";
+ "-type-only", Arg.Set type_only,
+ " Arrête la compilation après la phase d'analyse sémantique";
+ "-v", Arg.Set verbose,
+ " Affiche des informations supplémentaires sur la compilation";
+ "-o", Arg.String (set_file ofile),
+ "<file> Définit le nom du fichier de sortie)"]
+
+let usage = "Usage: petit-caml [option] file.ml"
+
+let localisation pos =
+ let l = pos.pos_lnum in
+ let c = pos.pos_cnum - pos.pos_bol + 1 in
+ eprintf "File \"%s\", line %d, characters %d-%d:\n" !ifile l (c-1) c
+let localisation_expr = function
+ | None ->
+ eprintf "File \"%s\", line 0, characters 0-%d:\n"
+ !ifile
+ !ifile_length
+ | Some loc when loc.spos.l = loc.spos.l ->
+ eprintf "File \"%s\", line %d, characters %d-%d:\n"
+ !ifile
+ loc.spos.l
+ loc.spos.c
+ loc.epos.c
+ | Some loc ->
+ eprintf "File \"%s\", line %d, characters %d-%d:\n"
+ !ifile
+ loc.spos.l
+ loc.spos.c
+ (loc.spos.c + loc.epos.raw_c - loc.epos.raw_c)
+
+let () =
+ Arg.parse options (set_file ifile) usage;
+
+ if !ifile = "" then begin
+ eprintf "Aucun fichier à compiler\n@?";
+ exit 1
+ end;
+
+ if not (Filename.check_suffix !ifile ".ml") then begin
+ eprintf "Le fichier d'entrée doit avoir l'extension .ml\n@?";
+ Arg.usage options usage;
+ exit 1
+ end;
+
+ if !ofile = "" then
+ ofile := Filename.chop_suffix !ifile ".ml" ^ ".s";
+
+ let f = open_in !ifile in
+
+ ifile_length := in_channel_length f;
+
+ let buf = Lexing.from_channel f in
+
+ try
+ let a = Parser.fichier Lexer.token buf
+ in
+ close_in f;
+
+ if !verbose then Utils.display_ast a;
+
+ if !parse_only then exit 0;
+
+ let _ = Typing.type_ast a
+ in
+ if !verbose then Utils.display_ast a;
+
+ if !type_only then exit 0;
+
+ Compile.compile a !ofile
+ with
+ | Lexer.Lexing_error err ->
+ localisation (Lexing.lexeme_start_p buf);
+ eprintf "Erreur dans l'analyse lexicale : %s@." err;
+ exit 1
+ | Parser.Error ->
+ localisation (Lexing.lexeme_start_p buf);
+ eprintf "Erreur dans l'analyse syntaxique@.";
+ exit 1
+ | Typing.TypingFailure (loc, err) ->
+ localisation_expr loc;
+ eprintf "Erreur dans l'analyse sémantique : %s@." err;
+ exit 1
diff --git a/parser.mly b/parser.mly
new file mode 100644
index 0000000..3b2c0e3
--- /dev/null
+++ b/parser.mly
@@ -0,0 +1,149 @@
+/* Analyseur syntaxique pour Petit Caml */
+
+%{
+open Ast
+open Utils
+%}
+
+%token <int> INTEGER
+%token <string> STRING
+%token TRUE FALSE
+%token UNIT
+%token <string> IDENT
+%token ELSE FUNCTION IF IN LET MATCH NOT REC THEN WITH
+%token EMPTYLIST RIGHTARROW TWOCOLONS CASE UNDERSCORE
+%token PLUS MINUS TIMES DIV AND OR
+%token LE GE LT GT NEQ EQ
+%token LBRACE RBRACE LPAREN RPAREN COMMA SEMICOLON
+%token EOF
+
+/* Dfinitions des priorits et associativits des tokens */
+
+%nonassoc IN
+%nonassoc ELSE
+%nonassoc RIGHTARROW
+%left AND OR
+%left LT LE GT GE EQ NEQ
+%right TWOCOLONS
+%left MINUS PLUS
+%left TIMES DIV
+%nonassoc UMINUS NOT
+
+/* Point d'entre de la grammaire */
+%start fichier
+
+/* Type des valeurs retournes par l'analyseur syntaxique */
+%type <Ast.expr> fichier
+
+%%
+
+fichier:
+ decls = list(decl) EOF
+ {
+ let rec reduce_decls = function
+ | [] -> raw_expr (Econst Cunit)
+ | (m, e, loc)::l -> locd_expr (Eletin (m, e, (reduce_decls l))) loc
+ in
+ reduce_decls decls
+ }
+
+const:
+| a = INTEGER { Cint a }
+| s = STRING { Cstring s }
+| TRUE { Cbool true }
+| FALSE { Cbool false }
+| UNIT { Cunit }
+| EMPTYLIST { Cemptylist }
+;
+
+motif:
+| UNDERSCORE { locd_motif Munderscore (make_loc $startpos $endpos) }
+| id = IDENT { locd_motif (Mident id) (make_loc $startpos $endpos) }
+| LPAREN m = motif COMMA motifs = separated_nonempty_list(COMMA, motif) RPAREN
+ { locd_motif (Mtuple (m::motifs)) (make_loc $startpos $endpos) }
+;
+
+decl:
+| LET name = motif EQ e = expr
+ { (name, e, make_loc $startpos $endpos) }
+| LET recur = ioption(REC) id = IDENT args = nonempty_list(motif) EQ e = expr
+ { (locd_motif (Mident id) (make_loc $startpos(id) $endpos(id)),
+ raw_expr (make_func id recur e args),
+ make_loc $startpos $endpos) }
+;
+
+simple_expr:
+| LPAREN e = expr RPAREN { e }
+| id = IDENT { locd_expr (Eident id) (make_loc $startpos $endpos) }
+| c = const { locd_expr (Econst c) (make_loc $startpos $endpos) }
+| LPAREN e = expr COMMA exprs = separated_nonempty_list(COMMA, expr) RPAREN
+ { locd_expr (Etuple (e::exprs)) (make_loc $startpos $endpos) }
+| LBRACE exprs = separated_list(SEMICOLON, expr) RBRACE
+ { List.fold_right (fun expr base -> raw_expr (Elistcons (expr, base)))
+ exprs
+ (locd_expr (Econst (Cemptylist))
+ (make_loc $startpos $endpos)) }
+;
+
+expr:
+| e = simple_expr { e }
+| func = simple_expr args = nonempty_list(simple_expr)
+ { match args with
+ | [] -> failwith "Unreachable matching"
+ | arg::args ->
+ List.fold_left (fun base arg -> raw_expr (Ecall (base, arg)))
+ (locd_expr (Ecall (func, arg))
+ (make_loc $startpos $endpos))
+ args }
+| FUNCTION m = motif RIGHTARROW e = expr
+ { locd_expr (Efunc ({ name = None ;
+ recursive = false ;
+ arg = m ;
+ body = e }))
+ (make_loc $startpos $endpos) }
+| e = unopexpr { locd_expr e (make_loc $startpos $endpos) }
+| left = expr o = binop right = expr
+ { locd_expr (Ebinop (left, o, right))
+ (make_loc $startpos $endpos) }
+| head = expr TWOCOLONS tail = expr
+ { locd_expr (Elistcons (head, tail))
+ (make_loc $startpos $endpos) }
+| IF cond = expr THEN then_expr = expr ELSE else_expr = expr
+ { locd_expr (Eif (cond, then_expr, else_expr))
+ (make_loc $startpos $endpos) }
+| LET m = motif EQ value = expr IN body = expr
+ { locd_expr (Eletin (m, value, body))
+ (make_loc $startpos $endpos) }
+| LET recur = ioption(REC) id = IDENT args = nonempty_list(motif)
+ EQ fbody = expr IN body = expr
+ { locd_expr (Eletin (locd_motif (Mident id) (make_loc $startpos(id)
+ $endpos(id)),
+ raw_expr (make_func id recur fbody args),
+ body))
+ (make_loc $startpos $endpos) }
+| MATCH e = expr WITH
+ EMPTYLIST RIGHTARROW empty_e = expr
+ CASE head = motif TWOCOLONS tail = motif RIGHTARROW match_expr = expr
+ { locd_expr (Ematch (e, empty_e, head, tail, match_expr))
+ (make_loc $startpos $endpos) }
+;
+
+unopexpr:
+| MINUS e = expr %prec UMINUS { Eunop (Neg, e) }
+| NOT e = expr { Eunop (Not, e) }
+;
+
+%inline binop:
+| PLUS { Add }
+| MINUS { Sub }
+| TIMES { Mul }
+| DIV { Div }
+| AND { And }
+| OR { Or }
+| EQ { Eq }
+| NEQ { Neq }
+| LT { Lt }
+| LE { Le }
+| GT { Gt }
+| GE { Ge }
+;
diff --git a/typing.ml b/typing.ml
new file mode 100644
index 0000000..45008c3
--- /dev/null
+++ b/typing.ml
@@ -0,0 +1,348 @@
+open Ast
+
+module Int =
+ struct
+ type t = int
+ let compare x y = x - y
+ end
+
+module Imap = Map.Make(Int)
+
+let next_id = ref 1
+
+let rec head = function
+ | Tvar tvar ->
+ begin
+ match tvar.def with
+ | None -> Tvar tvar
+ | Some t -> head t
+ end
+ | t -> t
+
+let rec canon t =
+ match head t with
+ | Tarrow (t1, t2) -> Tarrow (canon t1, canon t2)
+ | Tproduct l -> Tproduct (List.map canon l)
+ | Tlist t -> Tlist (canon t)
+ | t -> t
+
+let canon_string t tvar_names =
+ let next_tvar_char = ref 97
+ and tvar_names = ref tvar_names
+ in
+ let rec canon_aux t =
+ match canon t with
+ | Tint -> "int"
+ | Tstring -> "string"
+ | Tunit -> "unit"
+ | Tbool -> "bool"
+ | Tarrow (t1, t2) ->
+ Printf.sprintf "%s -> %s" (canon_aux t1) (canon_aux t2)
+ | Tproduct (t::l) ->
+ List.fold_left (fun base t ->
+ Printf.sprintf "%s * %s" base (canon_aux t))
+ (canon_aux t)
+ l
+ | Tproduct _ -> failwith "Unreachable matching"
+ | Tlist t ->
+ Printf.sprintf "%s list" (canon_aux t)
+ | Tvar tvar ->
+ try
+ Imap.find tvar.id !tvar_names
+ with Not_found ->
+ let tvar_name =
+ (Printf.sprintf "'%c" (char_of_int !next_tvar_char))
+ in
+ incr next_tvar_char;
+ tvar_names := Imap.add tvar.id tvar_name !tvar_names;
+ tvar_name
+ in
+ let s = canon_aux t
+ in
+ s, !tvar_names
+
+exception TypingFailure of loc option * string
+
+let typing_error l err = raise (TypingFailure (l, err))
+
+let unification_error l t1 t2 =
+ let t1s, tvar_names = canon_string t1 Imap.empty
+ in
+ let t2s, _ = canon_string t2 tvar_names
+ in
+ let err = Printf.sprintf "Cette expression a le type %s mais est \
+ ici utilisée avec le type %s" t1s t2s
+ in
+ typing_error l err
+
+let rec occur var t_raw =
+ match head t_raw with
+ | Tarrow (t1, t2) -> (occur var t1) || (occur var t2)
+ | Tproduct l -> List.exists (occur var) l
+ | Tlist t -> occur var t
+ | Tvar tvar when tvar.id = var.id -> true
+ | t -> false
+
+let rec unify loc t1 t2 =
+ match (head t1, head t2) with
+ | (a, b) when a = b -> ()
+ | (Tarrow (t1a, t1b), Tarrow (t2a, t2b)) ->
+ unify loc t1a t2a;
+ unify loc t1b t2b
+ | (Tproduct p1, Tproduct p2) when unify_products loc p1 p2 -> ()
+ | (Tproduct p1, Tproduct p2) -> unification_error loc t1 t2
+ | (Tlist t1, Tlist t2) -> unify loc t1 t2
+ | (Tvar tv1, t2) when not (occur tv1 t2) -> (* tv1.def = None puisque
+ c'est le head de t1 *)
+ tv1.def <- Some t2
+ | (Tvar tv1, t2) -> unification_error loc (Tvar tv1) t2
+ | (t1, Tvar tv2) ->
+ unify loc (Tvar tv2) t1
+ | (t1, t2) -> unification_error loc t1 t2
+and unify_products loc p1 p2 =
+ match (p1, p2) with
+ | [], [] -> true
+ | [], l -> false
+ | l, [] -> false
+ | t1::l1, t2::l2 ->
+ unify loc t1 t2;
+ unify_products loc p1 p2
+
+module V = struct
+ type t = tvar
+ let compare v1 v2 = Pervasives.compare v1.id v2.id
+ let create = let r = ref 0 in fun () -> incr r; { id = !r; def = None }
+end
+
+module Vset = Set.Make(V)
+
+let rec fvars t =
+ match head t with
+ | Tarrow (t1, t2) -> Vset.union (fvars t1) (fvars t2)
+ | Tproduct l ->
+ List.fold_left
+ (fun base t -> Vset.union base (fvars t)) Vset.empty l
+ | Tlist t -> fvars t
+ | Tvar tv -> Vset.singleton tv
+ | _ -> Vset.empty (* Tint, Tstring, Tbool, Tunit *)
+
+type schema = { vars : Vset.t; typ : typ }
+module Smap = Map.Make(String)
+type env = { bindings : schema Smap.t; fvars : Vset.t }
+
+let empty_env = { bindings = Smap.empty ; fvars = Vset.empty }
+
+let add ident t env =
+ { bindings = Smap.add ident { vars = Vset.empty ; typ = t } env.bindings;
+ fvars = Vset.union env.fvars (fvars t) }
+
+let add_gen ident t env =
+ let new_env_fvars =
+ Vset.filter (function tv -> not (occur tv t)) env.fvars
+ in
+ let binding_vars = Vset.diff (fvars t) new_env_fvars
+ in
+ { bindings =
+ Smap.add ident { vars = binding_vars ; typ = t } env.bindings;
+ fvars = new_env_fvars }
+
+let rec replace_var orig dest t =
+ match head t with
+ | Tarrow (t1, t2) ->
+ Tarrow (replace_var orig dest t1, replace_var orig dest t2)
+ | Tproduct l ->
+ Tproduct (List.map (function t -> replace_var orig dest t) l)
+ | Tlist t ->
+ Tlist (replace_var orig dest t)
+ | Tvar tv when tv.id = orig ->
+ Tvar { id = dest ; def = tv.def }
+ | t -> t
+
+let use_freshvar tv t =
+ let new_t = replace_var tv.id !next_id t
+ in
+ incr next_id;
+ new_t
+
+let find ident env =
+ let binding = Smap.find ident env.bindings
+ in
+ Vset.fold use_freshvar binding.vars binding.typ
+
+let alloc_tvar () =
+ let new_t = Tvar { id = !next_id ; def = None }
+ in
+ incr next_id;
+ new_t
+
+module Sset = Set.Make(String)
+
+let rec w env e =
+ let w_aux e =
+ match e.e with
+ | Eident ident ->
+ begin
+ try
+ find ident env
+ with Not_found ->
+ typing_error e.loc
+ (Printf.sprintf "Identifiant invalide : %s"
+ ident)
+ end
+ | Econst (Cint _) -> Tint
+ | Econst (Cstring _) -> Tstring
+ | Econst (Cbool _) -> Tbool
+ | Econst Cunit -> Tunit
+ | Econst (Cemptylist) ->
+ Tlist (alloc_tvar ())
+ | Etuple l ->
+ Tproduct (List.map (w env) l)
+ | Ebinop (e1, And, e2)
+ | Ebinop (e1, Or, e2) ->
+ let t1 = w env e1
+ and t2 = w env e2
+ in
+ unify e1.loc t1 Tbool;
+ unify e2.loc t2 Tbool;
+ Tbool
+ | Ebinop (e1, Eq, e2)
+ | Ebinop (e1, Neq, e2)
+ | Ebinop (e1, Lt, e2)
+ | Ebinop (e1, Le, e2)
+ | Ebinop (e1, Gt, e2)
+ | Ebinop (e1, Ge, e2) ->
+ let t1 = w env e1
+ and t2 = w env e2
+ in
+ unify e1.loc t1 Tint;
+ unify e2.loc t2 Tint;
+ Tbool
+ | Ebinop (e1, Add, e2)
+ | Ebinop (e1, Sub, e2)
+ | Ebinop (e1, Mul, e2)
+ | Ebinop (e1, Div, e2) ->
+ let t1 = w env e1
+ and t2 = w env e2
+ in
+ unify e1.loc t1 Tint;
+ unify e2.loc t2 Tint;
+ Tint
+ | Eunop (Not, e) ->
+ let t = w env e
+ in
+ unify e.loc t Tbool;
+ Tbool
+ | Eunop (Neg, e) ->
+ let t = w env e
+ in
+ unify e.loc t Tint;
+ Tint
+ | Eletin (m, val_expr, body_expr) ->
+ let m_t, env2 = w_motif env m
+ and val_t = w env val_expr
+ in
+ unify val_expr.loc m_t val_t;
+ w env2 body_expr
+ | Elistcons (head, tail) ->
+ let head_t = w env head
+ and tail_t = w env tail
+ in
+ unify head.loc (Tlist head_t) tail_t;
+ Tlist head_t
+ | Eif (cond, then_expr, else_expr) ->
+ let cond_t = w env cond
+ and then_t = w env then_expr
+ and else_t = w env else_expr
+ in
+ unify cond.loc cond_t Tbool;
+ unify else_expr.loc else_t then_t;
+ then_t
+ | Efunc f when not f.recursive ->
+ let arg_t, env2 = w_motif_func env f.arg
+ in
+ let return_t = w env2 f.body
+ in
+ Tarrow (arg_t, return_t)
+ | Efunc f -> (* f.recursive = true *)
+ let arg_t, env2 = w_motif_func env f.arg
+ and return_t = alloc_tvar ()
+ in
+ let env3 =
+ let fname = match f.name with
+ | Some name -> name
+ | None -> failwith "Unreachable matching"
+ in
+ add fname (Tarrow (arg_t, return_t)) env2
+ in
+ unify f.body.loc (w env3 f.body) return_t;
+ Tarrow (arg_t, return_t)
+ | Ecall (func_expr, arg) ->
+ let func_t = w env func_expr
+ and arg_t = w env arg
+ and alpha_t = alloc_tvar ()
+ in
+ unify func_expr.loc func_t (Tarrow (arg_t, alpha_t));
+ alpha_t
+ | Ematch (match_expr, empty_expr, head_m, tail_m, expr) ->
+ let match_t = w env match_expr
+ and empty_t = w env empty_expr
+ and alpha_t = alloc_tvar ()
+ in
+ unify match_expr.loc match_t (Tlist alpha_t);
+ begin
+ match tail_m.m with
+ | Mtuple _ ->
+ typing_error e.loc "Motif interdit dans le \
+ filtrage de la queue de liste"
+ | _ -> ()
+ end;
+ let head_t, env2 = w_motif env head_m
+ in
+ let tail_t, env3 = w_motif env2 tail_m
+ in
+ let result_t = w env3 expr
+ in
+ unify expr.loc result_t empty_t;
+ result_t
+ | Eclos _ -> failwith "Unreachable matching"
+ in
+ let t = w_aux e
+ in
+ e.t <- Some t;
+ t
+and w_motif_base add_func env m =
+ let new_env = ref env
+ and vars = ref Sset.empty
+ in
+ let rec aux m =
+ match m.m with
+ | Munderscore -> alloc_tvar ()
+ | Mident ident when Sset.mem ident !vars ->
+ typing_error m.motif_loc
+ (Printf.sprintf "Variable %s non unique \
+ dans ce motif" ident)
+ | Mident ident ->
+ let t = alloc_tvar ()
+ in
+ vars := Sset.add ident !vars;
+ new_env := add_func ident t (!new_env);
+ t
+ | Mtuple l ->
+ Tproduct (List.map aux l)
+ in
+ let t = aux m
+ in
+ m.motif_t <- Some t;
+ (t, !new_env)
+and w_motif env m = w_motif_base add_gen env m
+and w_motif_func env m = w_motif_base add env m
+
+let base_env =
+ add "print_int" (Tarrow (Tint, Tunit))
+ (add "print_string" (Tarrow (Tstring, Tunit))
+ (add "print_newline" (Tarrow (Tunit, Tunit))
+ (add "read_int" (Tarrow (Tunit, Tint))
+ (add "read_line" (Tarrow (Tunit, Tstring)) empty_env))))
+
+let type_ast a =
+ w base_env a
diff --git a/utils.ml b/utils.ml
new file mode 100644
index 0000000..deea42d
--- /dev/null
+++ b/utils.ml
@@ -0,0 +1,180 @@
+open Ast
+open Lexing
+open Typing
+
+(* Formattage de base des constantes, motifs et opérateurs *)
+
+let format_const = function
+ | Cint i -> Printf.sprintf "Cint %d" i
+ | Cstring s -> Printf.sprintf "Cstring \"%s\"" s
+ | Cbool true -> "Cbool true"
+ | Cbool false -> "Cbool false"
+ | Cunit -> "Cunit ()"
+ | Cemptylist -> "Cemptylist []"
+
+let format_unop = function
+ | Not -> "Not"
+ | Neg -> "Neg"
+
+let format_binop = function
+ | Add -> "Add"
+ | Sub -> "Sub"
+ | Mul -> "Mul"
+ | Div -> "Div"
+ | And -> "And"
+ | Or -> "Or"
+ | Eq -> "Eq"
+ | Neq -> "Neq"
+ | Lt -> "Lt"
+ | Le -> "Le"
+ | Gt -> "Gt"
+ | Ge -> "Ge"
+
+let rec format_motif m =
+ match m.m with
+ | Munderscore -> "Munderscore _"
+ | Mident id -> Printf.sprintf "Mident %s" id
+ | Mtuple (m::motifs) ->
+ List.fold_left motif_tuple_folder (format_motif m) motifs
+ | Mtuple [] -> "" (* Avoids an useless warning *)
+and motif_tuple_folder base motif =
+ Printf.sprintf "%s, %s" base (format_motif motif)
+and motif_list_folder base motif =
+ Printf.sprintf "%s ; %s" base (format_motif motif)
+
+(* Helper pour l'indentation, crée une string de `level` espaces de long *)
+let make_indent level =
+ Printf.sprintf "%s" (String.make level ' ')
+
+let format_string_option = function
+ | None -> "None"
+ | Some x -> Printf.sprintf "Some %s" x
+
+let typeof e =
+ match e.t with
+ | None -> ""
+ | Some t ->
+ let ts, tvar_names = canon_string t Imap.empty
+ in
+ Printf.sprintf "{%s}" ts
+
+let rec format_expr level expr =
+ let expr_t = typeof expr
+ in
+ match expr.e with
+ | Econst c -> Printf.sprintf "Econst%s %s" expr_t
+ (format_const c)
+ | Eident id -> Printf.sprintf "Eident%s %s" (typeof expr) id
+ | Etuple (e1::l) ->
+ Printf.sprintf "Etuple%s (%s,\n%s)"
+ expr_t
+ (format_expr (level + 8) e1)
+ (format_expr_list "," (level + 8) l)
+ | Etuple l -> ""
+ | Eunop (o, e) ->
+ let so = (format_unop o)
+ in
+ Printf.sprintf "Eunop%s (%s, %s)" expr_t so
+ (format_expr (level + 9 + String.length so) e)
+ | Ebinop (e1, o, e2) ->
+ let s1 = format_expr (level + 8) e1 and so = format_binop o
+ and s2 = format_expr_i (level + 8) e2
+ in
+ Printf.sprintf "Ebinop%s (%s,\n%s%s,\n%s)" expr_t s1
+ (make_indent (level + 8)) so s2
+ | Eletin (m, value, body) ->
+ Printf.sprintf "Eletin%s (%s,\n%s,\n%s)" expr_t
+ (format_motif m)
+ (format_expr_i (level + 8) value)
+ (format_expr_i (level + 8) body)
+ | Efunc f ->
+ format_func level expr_t f
+ | Eif (cond, then_expr, else_expr) ->
+ Printf.sprintf "Eif%s (%s,\n%s,\n%s)" expr_t
+ (format_expr (level + 5) cond)
+ (format_expr_i (level + 5) then_expr)
+ (format_expr_i (level + 5) else_expr)
+ | Elistcons (hd_expr, tl_expr) ->
+ Printf.sprintf "Elistcons%s (%s,\n%s)" expr_t
+ (format_expr (level + 11) hd_expr)
+ (format_expr_i (level + 11) tl_expr)
+ | Ecall (func_expr, arg) ->
+ Printf.sprintf "Ecall%s (%s,\n%s%s" expr_t
+ (format_expr (level + 7) func_expr)
+ (make_indent (level + 7))
+ (format_expr (level + 7) arg)
+ | Ematch (matched_expr, empty_expr, m1, m2, m_expr) ->
+ Printf.sprintf "Ematch%s (%s,\n%s,\n%s((%s)::(%s)),\n%s)"
+ expr_t
+ (format_expr (level + 8) matched_expr)
+ (format_expr_i (level + 8) empty_expr)
+ (make_indent (level + 8))
+ (format_motif m1)
+ (format_motif m2)
+ (format_expr_i (level + 8) m_expr)
+ | Eclos _ -> failwith "Unreachable matching"
+and format_expr_i level expr =
+ Printf.sprintf "%s%s" (make_indent level) (format_expr level expr)
+and format_expr_list sep level = function
+ | [] -> ""
+ | [e] -> Printf.sprintf "%s" (format_expr_i level e)
+ | e::l ->
+ Printf.sprintf "%s%s\n%s" (format_expr_i level e) sep
+ (format_expr_list sep level l)
+and format_func level expr_t f =
+ let get_recursive = function
+ | true -> "recursive"
+ | false -> "not recursive"
+ in
+ Printf.sprintf "Efunc%s {name = %s, %s, arg = %s,\n%s}" expr_t
+ (format_string_option f.name) (get_recursive f.recursive)
+ (format_motif f.arg)
+ (format_expr_i (level + 7) f.body)
+
+let display_ast ast =
+ Printf.printf "%s\n" (format_expr 0 ast)
+
+(* Helpers au moment du parsing *)
+
+(* Création d'un record expr/motif non localisé *)
+let raw_expr expr =
+ { e = expr ; loc = None ; t = None }
+let raw_motif m =
+ { m = m ; motif_loc = None ; motif_t = None }
+
+(* Création d'un record loc avec les positions fournies par menhir *)
+let make_loc startpos endpos =
+ let make_pos p =
+ { l = p.pos_lnum ;
+ c = p.pos_cnum - p.pos_bol ;
+ raw_c = p.pos_cnum }
+ in
+ { spos = make_pos startpos ;
+ epos = make_pos endpos }
+
+(* Création d'un record expr/motif localisé *)
+let locd_expr e loc =
+ { e = e ; loc = Some loc ; t = None }
+let locd_motif m loc =
+ { m = m ; motif_loc = Some loc ; motif_t = None }
+
+(* Création d'un record func en séparatant le premier argument des autres,
+ puis en appliquant un fold right afin d'utiliser la curyfication
+ (f a b c est gérée comme f = function a -> function b -> function c) *)
+let make_func id recur fbody = function
+ | [] -> failwith "Unreachable matching"
+ | arg::args ->
+ let body = List.fold_right (fun arg base ->
+ raw_expr (Efunc {
+ name = None ;
+ recursive = false ;
+ arg = arg ;
+ body = base
+ }))
+ args
+ fbody
+ in
+ Efunc { name = Some id ;
+ recursive = recur <> None ;
+ arg = arg ;
+ body = body }