Commit 1af3e9b5513f2c6ded00c414c3b537583cf38003

Authored by Wojciech Jaworski
1 parent be1d1198

konwersja leksykonu do pdf'a

LCGlexicon/.gitignore
1 1 test
  2 +print_lexicon
2 3 results/*
... ...
LCGlexicon/ENIAM_LCGlexicon.ml
... ... @@ -22,25 +22,26 @@ open ENIAM_LCGtypes
22 22 open ENIAM_LCGlexiconTypes
23 23 open ENIAMcategoriesPL
24 24  
25   -let grammar = load_lexicon "data/grammar.sh"
26   -
27   -type rule2 =
28   - Basic of string
29   - | Quant of (selector * string) list * string
30   - | Raised of (selector * string) list * string * selector list
31   - | Quot of (selector * string) list * string
32   - | Inclusion of string
33   - | Conj of (selector * string) list * string
34   - | Bracket of string
35   -
36   -
37 25 let rec find_selector s = function
38 26 (t,Eq,x :: _) :: l -> if t = s then x else find_selector s l
39 27 | (t,_,_) :: l -> if t = s then failwith "find_selector 1" else find_selector s l
40 28 | [] -> failwith "find_selector 2"
41 29  
  30 +let rec get_syntax rev = function
  31 + Syntax syntax :: rule -> syntax, (List.rev rev) @ rule
  32 + | t :: rule -> get_syntax (t :: rev) rule
  33 + | [] -> failwith "get_syntax"
  34 +
  35 +let rec get_quant rev = function
  36 + Quant quant :: rule -> quant, (List.rev rev) @ rule
  37 + | t :: rule -> get_quant (t :: rev) rule
  38 + | [] -> [], List.rev rev
  39 +
  40 +let rec get_bracket rev = function
  41 + Bracket :: rule -> true, (List.rev rev) @ rule
  42 + | t :: rule -> get_bracket (t :: rev) rule
  43 + | [] -> false, List.rev rev
42 44  
43   -(* FIXME: sprawdzić, czy to nie wycina Ctype *)
44 45 let merge_quant pos_quants quants =
45 46 let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in
46 47 let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) ->
... ... @@ -48,6 +49,34 @@ let merge_quant pos_quants quants =
48 49 else (cat,v) :: l, map) in
49 50 List.rev (SelectorMap.fold map l (fun l cat v -> (cat,v) :: l))
50 51  
  52 +let assign_quantifiers (selectors,rule,weight) =
  53 + let pos = find_selector Pos selectors in
  54 + let categories =
  55 + try StringMap.find pos_categories pos
  56 + with Not_found -> failwith ("assign_quantifiers: " ^ pos) in
  57 + let categories = Xlist.map categories (fun s -> s,Top) in
  58 + let syntax,rule = get_syntax [] rule in
  59 + let quant,rule = get_quant [] rule in
  60 + let bracket,rule = get_bracket [] rule in
  61 + let quant = merge_quant categories quant in
  62 + selectors, (bracket,quant,syntax),(rule,weight)
  63 +
  64 +let _ =
  65 + let lexicon = ENIAM_LCGlexiconParser.load_lexicon "resources/lexicon-pl.dic" in
  66 + List.rev (Xlist.rev_map lexicon assign_quantifiers)
  67 +
  68 +(***
  69 +type rule2 =
  70 + Basic of string
  71 + | Quant of (selector * string) list * string
  72 + | Raised of (selector * string) list * string * selector list
  73 + | Quot of (selector * string) list * string
  74 + | Inclusion of string
  75 + | Conj of (selector * string) list * string
  76 + | Bracket of string
  77 +
  78 +
  79 +
51 80 let parse_quants_range quant =
52 81 Xlist.map quant (fun (cats,v) -> cats, parse_quant_range (cats,v))
53 82 (**
... ... @@ -285,3 +314,4 @@ let create (paths,last) tokens lex_sems =
285 314 let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in
286 315 chart
287 316 *)
  317 + ***)
... ...
LCGlexicon/ENIAM_LCGlexiconLatexOf.ml
... ... @@ -17,6 +17,10 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
  20 +open ENIAM_LCGtypes
  21 +open ENIAM_LCGlexiconTypes
  22 +open ENIAMcategoriesPL
  23 +
20 24 let rec add_quantifiers t = function
21 25 [] -> t
22 26 | (cat,s) :: l -> add_quantifiers (WithVar(string_of_selector cat,s,"",t)) l
... ... @@ -89,15 +93,19 @@ let latex_of_selectors selectors =
89 93 let rel = if rel = Eq then "=" else "!=" in
90 94 ENIAMcategoriesPL.string_of_selector cat ^ rel ^ (String.concat "|" l)))
91 95  
92   -let print_latex_grammar grammar =
93   - Printf.printf "grammar size: %d\n" (Xlist.size grammar);
94   - Xlatex.latex_file_out "results/" "grammar" "a0" false (fun file ->
95   - Xlist.iter grammar (fun (selectors,(bracket,quant,syntax,semantics),weight) ->
  96 +let print_latex_lexicon lexicon =
  97 + Printf.printf "lexicon size: %d\n" (Xlist.size lexicon);
  98 + Xlatex.latex_file_out "results/" "lexicon" "a0" false (fun file ->
  99 + Xlist.iter lexicon (fun (selectors,(bracket,quant,syntax),semantics) ->
96 100 let syntax = add_quantifiers_simple syntax (List.rev quant) in
97 101 Printf.fprintf file "%s\\\\\n$\\begin{array}{l}%s\\end{array}$\\\\\\;\\\\\\;\\\\\n" (latex_of_selectors selectors) (latex_of_grammar_symbol 0 syntax)));
98   - Xlatex.latex_compile_and_clean "results/" "grammar"
99   -
  102 + Xlatex.latex_compile_and_clean "results/" "lexicon"
100 103  
101   -(* let grammar = parse_grammar grammar *)
102 104  
103   -(* let _ = print_latex_grammar grammar *)
  105 +let _ =
  106 + if Array.length Sys.argv < 3 then
  107 + print_endline "missing argument\nUsage: print_lexicon <input-file> <output-file>"
  108 + else
  109 + let lexicon = ENIAM_LCGlexiconParser.load_lexicon Sys.argv.(1) in
  110 + let lexicon = List.rev (Xlist.rev_map lexicon ENIAM_LCGlexicon.assign_quantifiers) in
  111 + print_latex_lexicon lexicon
... ...
LCGlexicon/ENIAM_LCGlexiconPL.ml deleted
No preview for this file type
LCGlexicon/ENIAM_LCGlexiconParser.ml
... ... @@ -18,8 +18,8 @@
18 18 *)
19 19  
20 20 open Xstd
21   -open ENIAM_LCGlexiconTypes
22 21 open ENIAM_LCGtypes
  22 +open ENIAM_LCGlexiconTypes
23 23 open ENIAMcategoriesPL
24 24  
25 25 let rec get_first n = function
... ... @@ -282,12 +282,6 @@ let parse_quantifiers tokens =
282 282 | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'")
283 283 | [] -> failwith "parse_quantifiers: no token")
284 284  
285   -type rule =
286   - Bracket
287   - | Quant of (selector * internal_grammar_symbol) list
288   - | Raised of string list
289   - | Syntax of grammar_symbol
290   -
291 285 let rec parse_rule atoms = function
292 286 "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens
293 287 | "QUANT" :: "[" :: tokens ->
... ...
LCGlexicon/ENIAM_LCGlexiconTypes.ml
... ... @@ -46,6 +46,13 @@ type rule_sem =
46 46  
47 47 type selector_relation = Eq | Neq (*| StrictEq*)
48 48  
  49 +type rule =
  50 + Bracket
  51 + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list
  52 + | Raised of string list
  53 + | Syntax of ENIAM_LCGtypes.grammar_symbol
  54 +
  55 +
49 56 (* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *)
50 57 (* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *)
51 58 (* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *)
... ...
LCGlexicon/ENIAM_LCGrendererPL.ml deleted
LCGlexicon/ENIAMcategoriesPL.ml
... ... @@ -526,4 +526,4 @@ let pos_categories = Xlist.fold [
526 526 "burk",[Lemma;];
527 527 "interp",[Lemma;];
528 528 "unk",[Lemma;Number;Case;Gender;Person;];
529   - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Xlist.map l (fun v -> v,"T")))
  529 + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
... ...
LCGlexicon/TODO
1 1 - unumber jako atrybut
2 2  
3 3 "Można było" - brakuje uzgodnienia rodzaju przymiotnika w przypadku predykatywnym, i ogólnie kontroli składniowej
  4 +
  5 +- dodać do wyświetlania leksykonu informacje o bracetach i semantyce
... ...
LCGlexicon/makefile
... ... @@ -27,6 +27,10 @@ test: test.ml $(SOURCES)
27 27 mkdir -p results
28 28 $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $(SOURCES) test.ml
29 29  
  30 +print_lexicon: ENIAM_LCGlexiconLatexOf.ml $(SOURCES)
  31 + mkdir -p results
  32 + $(OCAMLOPT) -o print_lexicon $(OCAMLOPTFLAGS) $(SOURCES) ENIAM_LCGlexiconLatexOf.ml
  33 +
30 34 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
31 35  
32 36 .mll.ml:
... ... @@ -48,4 +52,4 @@ test: test.ml $(SOURCES)
48 52 $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
49 53  
50 54 clean:
51   - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test
  55 + rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test print_lexicon
... ...