Commit 1af3e9b5513f2c6ded00c414c3b537583cf38003
1 parent
be1d1198
konwersja leksykonu do pdf'a
Showing
10 changed files
with
76 additions
and
31 deletions
LCGlexicon/ENIAM_LCGlexicon.ml
... | ... | @@ -22,25 +22,26 @@ open ENIAM_LCGtypes |
22 | 22 | open ENIAM_LCGlexiconTypes |
23 | 23 | open ENIAMcategoriesPL |
24 | 24 | |
25 | -let grammar = load_lexicon "data/grammar.sh" | |
26 | - | |
27 | -type rule2 = | |
28 | - Basic of string | |
29 | - | Quant of (selector * string) list * string | |
30 | - | Raised of (selector * string) list * string * selector list | |
31 | - | Quot of (selector * string) list * string | |
32 | - | Inclusion of string | |
33 | - | Conj of (selector * string) list * string | |
34 | - | Bracket of string | |
35 | - | |
36 | - | |
37 | 25 | let rec find_selector s = function |
38 | 26 | (t,Eq,x :: _) :: l -> if t = s then x else find_selector s l |
39 | 27 | | (t,_,_) :: l -> if t = s then failwith "find_selector 1" else find_selector s l |
40 | 28 | | [] -> failwith "find_selector 2" |
41 | 29 | |
30 | +let rec get_syntax rev = function | |
31 | + Syntax syntax :: rule -> syntax, (List.rev rev) @ rule | |
32 | + | t :: rule -> get_syntax (t :: rev) rule | |
33 | + | [] -> failwith "get_syntax" | |
34 | + | |
35 | +let rec get_quant rev = function | |
36 | + Quant quant :: rule -> quant, (List.rev rev) @ rule | |
37 | + | t :: rule -> get_quant (t :: rev) rule | |
38 | + | [] -> [], List.rev rev | |
39 | + | |
40 | +let rec get_bracket rev = function | |
41 | + Bracket :: rule -> true, (List.rev rev) @ rule | |
42 | + | t :: rule -> get_bracket (t :: rev) rule | |
43 | + | [] -> false, List.rev rev | |
42 | 44 | |
43 | -(* FIXME: sprawdzić, czy to nie wycina Ctype *) | |
44 | 45 | let merge_quant pos_quants quants = |
45 | 46 | let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in |
46 | 47 | let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> |
... | ... | @@ -48,6 +49,34 @@ let merge_quant pos_quants quants = |
48 | 49 | else (cat,v) :: l, map) in |
49 | 50 | List.rev (SelectorMap.fold map l (fun l cat v -> (cat,v) :: l)) |
50 | 51 | |
52 | +let assign_quantifiers (selectors,rule,weight) = | |
53 | + let pos = find_selector Pos selectors in | |
54 | + let categories = | |
55 | + try StringMap.find pos_categories pos | |
56 | + with Not_found -> failwith ("assign_quantifiers: " ^ pos) in | |
57 | + let categories = Xlist.map categories (fun s -> s,Top) in | |
58 | + let syntax,rule = get_syntax [] rule in | |
59 | + let quant,rule = get_quant [] rule in | |
60 | + let bracket,rule = get_bracket [] rule in | |
61 | + let quant = merge_quant categories quant in | |
62 | + selectors, (bracket,quant,syntax),(rule,weight) | |
63 | + | |
64 | +let _ = | |
65 | + let lexicon = ENIAM_LCGlexiconParser.load_lexicon "resources/lexicon-pl.dic" in | |
66 | + List.rev (Xlist.rev_map lexicon assign_quantifiers) | |
67 | + | |
68 | +(*** | |
69 | +type rule2 = | |
70 | + Basic of string | |
71 | + | Quant of (selector * string) list * string | |
72 | + | Raised of (selector * string) list * string * selector list | |
73 | + | Quot of (selector * string) list * string | |
74 | + | Inclusion of string | |
75 | + | Conj of (selector * string) list * string | |
76 | + | Bracket of string | |
77 | + | |
78 | + | |
79 | + | |
51 | 80 | let parse_quants_range quant = |
52 | 81 | Xlist.map quant (fun (cats,v) -> cats, parse_quant_range (cats,v)) |
53 | 82 | (** |
... | ... | @@ -285,3 +314,4 @@ let create (paths,last) tokens lex_sems = |
285 | 314 | let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in |
286 | 315 | chart |
287 | 316 | *) |
317 | + ***) | |
... | ... |
LCGlexicon/ENIAM_LCGlexiconLatexOf.ml
... | ... | @@ -17,6 +17,10 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | +open ENIAM_LCGtypes | |
21 | +open ENIAM_LCGlexiconTypes | |
22 | +open ENIAMcategoriesPL | |
23 | + | |
20 | 24 | let rec add_quantifiers t = function |
21 | 25 | [] -> t |
22 | 26 | | (cat,s) :: l -> add_quantifiers (WithVar(string_of_selector cat,s,"",t)) l |
... | ... | @@ -89,15 +93,19 @@ let latex_of_selectors selectors = |
89 | 93 | let rel = if rel = Eq then "=" else "!=" in |
90 | 94 | ENIAMcategoriesPL.string_of_selector cat ^ rel ^ (String.concat "|" l))) |
91 | 95 | |
92 | -let print_latex_grammar grammar = | |
93 | - Printf.printf "grammar size: %d\n" (Xlist.size grammar); | |
94 | - Xlatex.latex_file_out "results/" "grammar" "a0" false (fun file -> | |
95 | - Xlist.iter grammar (fun (selectors,(bracket,quant,syntax,semantics),weight) -> | |
96 | +let print_latex_lexicon lexicon = | |
97 | + Printf.printf "lexicon size: %d\n" (Xlist.size lexicon); | |
98 | + Xlatex.latex_file_out "results/" "lexicon" "a0" false (fun file -> | |
99 | + Xlist.iter lexicon (fun (selectors,(bracket,quant,syntax),semantics) -> | |
96 | 100 | let syntax = add_quantifiers_simple syntax (List.rev quant) in |
97 | 101 | Printf.fprintf file "%s\\\\\n$\\begin{array}{l}%s\\end{array}$\\\\\\;\\\\\\;\\\\\n" (latex_of_selectors selectors) (latex_of_grammar_symbol 0 syntax))); |
98 | - Xlatex.latex_compile_and_clean "results/" "grammar" | |
99 | - | |
102 | + Xlatex.latex_compile_and_clean "results/" "lexicon" | |
100 | 103 | |
101 | -(* let grammar = parse_grammar grammar *) | |
102 | 104 | |
103 | -(* let _ = print_latex_grammar grammar *) | |
105 | +let _ = | |
106 | + if Array.length Sys.argv < 3 then | |
107 | + print_endline "missing argument\nUsage: print_lexicon <input-file> <output-file>" | |
108 | + else | |
109 | + let lexicon = ENIAM_LCGlexiconParser.load_lexicon Sys.argv.(1) in | |
110 | + let lexicon = List.rev (Xlist.rev_map lexicon ENIAM_LCGlexicon.assign_quantifiers) in | |
111 | + print_latex_lexicon lexicon | |
... | ... |
LCGlexicon/ENIAM_LCGlexiconPL.ml deleted
No preview for this file type
LCGlexicon/ENIAM_LCGlexiconParser.ml
... | ... | @@ -18,8 +18,8 @@ |
18 | 18 | *) |
19 | 19 | |
20 | 20 | open Xstd |
21 | -open ENIAM_LCGlexiconTypes | |
22 | 21 | open ENIAM_LCGtypes |
22 | +open ENIAM_LCGlexiconTypes | |
23 | 23 | open ENIAMcategoriesPL |
24 | 24 | |
25 | 25 | let rec get_first n = function |
... | ... | @@ -282,12 +282,6 @@ let parse_quantifiers tokens = |
282 | 282 | | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'") |
283 | 283 | | [] -> failwith "parse_quantifiers: no token") |
284 | 284 | |
285 | -type rule = | |
286 | - Bracket | |
287 | - | Quant of (selector * internal_grammar_symbol) list | |
288 | - | Raised of string list | |
289 | - | Syntax of grammar_symbol | |
290 | - | |
291 | 285 | let rec parse_rule atoms = function |
292 | 286 | "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens |
293 | 287 | | "QUANT" :: "[" :: tokens -> |
... | ... |
LCGlexicon/ENIAM_LCGlexiconTypes.ml
... | ... | @@ -46,6 +46,13 @@ type rule_sem = |
46 | 46 | |
47 | 47 | type selector_relation = Eq | Neq (*| StrictEq*) |
48 | 48 | |
49 | +type rule = | |
50 | + Bracket | |
51 | + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list | |
52 | + | Raised of string list | |
53 | + | Syntax of ENIAM_LCGtypes.grammar_symbol | |
54 | + | |
55 | + | |
49 | 56 | (* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) |
50 | 57 | (* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) |
51 | 58 | (* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *) |
... | ... |
LCGlexicon/ENIAM_LCGrendererPL.ml deleted
LCGlexicon/ENIAMcategoriesPL.ml
... | ... | @@ -526,4 +526,4 @@ let pos_categories = Xlist.fold [ |
526 | 526 | "burk",[Lemma;]; |
527 | 527 | "interp",[Lemma;]; |
528 | 528 | "unk",[Lemma;Number;Case;Gender;Person;]; |
529 | - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Xlist.map l (fun v -> v,"T"))) | |
529 | + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) | |
... | ... |
LCGlexicon/TODO
LCGlexicon/makefile
... | ... | @@ -27,6 +27,10 @@ test: test.ml $(SOURCES) |
27 | 27 | mkdir -p results |
28 | 28 | $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $(SOURCES) test.ml |
29 | 29 | |
30 | +print_lexicon: ENIAM_LCGlexiconLatexOf.ml $(SOURCES) | |
31 | + mkdir -p results | |
32 | + $(OCAMLOPT) -o print_lexicon $(OCAMLOPTFLAGS) $(SOURCES) ENIAM_LCGlexiconLatexOf.ml | |
33 | + | |
30 | 34 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
31 | 35 | |
32 | 36 | .mll.ml: |
... | ... | @@ -48,4 +52,4 @@ test: test.ml $(SOURCES) |
48 | 52 | $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< |
49 | 53 | |
50 | 54 | clean: |
51 | - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test | |
55 | + rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test print_lexicon | |
... | ... |