diff --git a/LCGlexicon/.gitignore b/LCGlexicon/.gitignore index 3041d59..1b04ae0 100644 --- a/LCGlexicon/.gitignore +++ b/LCGlexicon/.gitignore @@ -1,2 +1,3 @@ test +print_lexicon results/* diff --git a/LCGlexicon/ENIAM_LCGlexicon.ml b/LCGlexicon/ENIAM_LCGlexicon.ml index f844062..7d293dd 100644 --- a/LCGlexicon/ENIAM_LCGlexicon.ml +++ b/LCGlexicon/ENIAM_LCGlexicon.ml @@ -22,25 +22,26 @@ open ENIAM_LCGtypes open ENIAM_LCGlexiconTypes open ENIAMcategoriesPL -let grammar = load_lexicon "data/grammar.sh" - -type rule2 = - Basic of string - | Quant of (selector * string) list * string - | Raised of (selector * string) list * string * selector list - | Quot of (selector * string) list * string - | Inclusion of string - | Conj of (selector * string) list * string - | Bracket of string - - let rec find_selector s = function (t,Eq,x :: _) :: l -> if t = s then x else find_selector s l | (t,_,_) :: l -> if t = s then failwith "find_selector 1" else find_selector s l | [] -> failwith "find_selector 2" +let rec get_syntax rev = function + Syntax syntax :: rule -> syntax, (List.rev rev) @ rule + | t :: rule -> get_syntax (t :: rev) rule + | [] -> failwith "get_syntax" + +let rec get_quant rev = function + Quant quant :: rule -> quant, (List.rev rev) @ rule + | t :: rule -> get_quant (t :: rev) rule + | [] -> [], List.rev rev + +let rec get_bracket rev = function + Bracket :: rule -> true, (List.rev rev) @ rule + | t :: rule -> get_bracket (t :: rev) rule + | [] -> false, List.rev rev -(* FIXME: sprawdzić, czy to nie wycina Ctype *) let merge_quant pos_quants quants = let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> @@ -48,6 +49,34 @@ let merge_quant pos_quants quants = else (cat,v) :: l, map) in List.rev (SelectorMap.fold map l (fun l cat v -> (cat,v) :: l)) +let assign_quantifiers (selectors,rule,weight) = + let pos = find_selector Pos selectors in + let categories = + try StringMap.find pos_categories pos + with Not_found -> failwith ("assign_quantifiers: " ^ pos) in + let categories = Xlist.map categories (fun s -> s,Top) in + let syntax,rule = get_syntax [] rule in + let quant,rule = get_quant [] rule in + let bracket,rule = get_bracket [] rule in + let quant = merge_quant categories quant in + selectors, (bracket,quant,syntax),(rule,weight) + +let _ = + let lexicon = ENIAM_LCGlexiconParser.load_lexicon "resources/lexicon-pl.dic" in + List.rev (Xlist.rev_map lexicon assign_quantifiers) + +(*** +type rule2 = + Basic of string + | Quant of (selector * string) list * string + | Raised of (selector * string) list * string * selector list + | Quot of (selector * string) list * string + | Inclusion of string + | Conj of (selector * string) list * string + | Bracket of string + + + let parse_quants_range quant = Xlist.map quant (fun (cats,v) -> cats, parse_quant_range (cats,v)) (** @@ -285,3 +314,4 @@ let create (paths,last) tokens lex_sems = let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in chart *) + ***) diff --git a/LCGlexicon/ENIAM_LCGlexiconLatexOf.ml b/LCGlexicon/ENIAM_LCGlexiconLatexOf.ml index b24144a..1db7bb3 100644 --- a/LCGlexicon/ENIAM_LCGlexiconLatexOf.ml +++ b/LCGlexicon/ENIAM_LCGlexiconLatexOf.ml @@ -17,6 +17,10 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. *) +open ENIAM_LCGtypes +open ENIAM_LCGlexiconTypes +open ENIAMcategoriesPL + let rec add_quantifiers t = function [] -> t | (cat,s) :: l -> add_quantifiers (WithVar(string_of_selector cat,s,"",t)) l @@ -89,15 +93,19 @@ let latex_of_selectors selectors = let rel = if rel = Eq then "=" else "!=" in ENIAMcategoriesPL.string_of_selector cat ^ rel ^ (String.concat "|" l))) -let print_latex_grammar grammar = - Printf.printf "grammar size: %d\n" (Xlist.size grammar); - Xlatex.latex_file_out "results/" "grammar" "a0" false (fun file -> - Xlist.iter grammar (fun (selectors,(bracket,quant,syntax,semantics),weight) -> +let print_latex_lexicon lexicon = + Printf.printf "lexicon size: %d\n" (Xlist.size lexicon); + Xlatex.latex_file_out "results/" "lexicon" "a0" false (fun file -> + Xlist.iter lexicon (fun (selectors,(bracket,quant,syntax),semantics) -> let syntax = add_quantifiers_simple syntax (List.rev quant) in Printf.fprintf file "%s\\\\\n$\\begin{array}{l}%s\\end{array}$\\\\\\;\\\\\\;\\\\\n" (latex_of_selectors selectors) (latex_of_grammar_symbol 0 syntax))); - Xlatex.latex_compile_and_clean "results/" "grammar" - + Xlatex.latex_compile_and_clean "results/" "lexicon" -(* let grammar = parse_grammar grammar *) -(* let _ = print_latex_grammar grammar *) +let _ = + if Array.length Sys.argv < 3 then + print_endline "missing argument\nUsage: print_lexicon <input-file> <output-file>" + else + let lexicon = ENIAM_LCGlexiconParser.load_lexicon Sys.argv.(1) in + let lexicon = List.rev (Xlist.rev_map lexicon ENIAM_LCGlexicon.assign_quantifiers) in + print_latex_lexicon lexicon diff --git a/LCGlexicon/ENIAM_LCGlexiconPL.ml b/LCGlexicon/ENIAM_LCGlexiconPL.ml deleted file mode 100644 index 8b13789..0000000 --- a/LCGlexicon/ENIAM_LCGlexiconPL.ml +++ /dev/null @@ -1 +0,0 @@ - diff --git a/LCGlexicon/ENIAM_LCGlexiconParser.ml b/LCGlexicon/ENIAM_LCGlexiconParser.ml index 8f7e58f..f974f6e 100644 --- a/LCGlexicon/ENIAM_LCGlexiconParser.ml +++ b/LCGlexicon/ENIAM_LCGlexiconParser.ml @@ -18,8 +18,8 @@ *) open Xstd -open ENIAM_LCGlexiconTypes open ENIAM_LCGtypes +open ENIAM_LCGlexiconTypes open ENIAMcategoriesPL let rec get_first n = function @@ -282,12 +282,6 @@ let parse_quantifiers tokens = | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'") | [] -> failwith "parse_quantifiers: no token") -type rule = - Bracket - | Quant of (selector * internal_grammar_symbol) list - | Raised of string list - | Syntax of grammar_symbol - let rec parse_rule atoms = function "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens | "QUANT" :: "[" :: tokens -> diff --git a/LCGlexicon/ENIAM_LCGlexiconTypes.ml b/LCGlexicon/ENIAM_LCGlexiconTypes.ml index adb5d35..2e05b84 100644 --- a/LCGlexicon/ENIAM_LCGlexiconTypes.ml +++ b/LCGlexicon/ENIAM_LCGlexiconTypes.ml @@ -46,6 +46,13 @@ type rule_sem = type selector_relation = Eq | Neq (*| StrictEq*) +type rule = + Bracket + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list + | Raised of string list + | Syntax of ENIAM_LCGtypes.grammar_symbol + + (* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) (* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) (* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *) diff --git a/LCGlexicon/ENIAM_LCGrendererPL.ml b/LCGlexicon/ENIAM_LCGrendererPL.ml deleted file mode 100644 index e69de29..0000000 --- a/LCGlexicon/ENIAM_LCGrendererPL.ml +++ /dev/null diff --git a/LCGlexicon/ENIAMcategoriesPL.ml b/LCGlexicon/ENIAMcategoriesPL.ml index 12a38b9..7268cba 100644 --- a/LCGlexicon/ENIAMcategoriesPL.ml +++ b/LCGlexicon/ENIAMcategoriesPL.ml @@ -526,4 +526,4 @@ let pos_categories = Xlist.fold [ "burk",[Lemma;]; "interp",[Lemma;]; "unk",[Lemma;Number;Case;Gender;Person;]; - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Xlist.map l (fun v -> v,"T"))) + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) diff --git a/LCGlexicon/TODO b/LCGlexicon/TODO index f267515..9c9d0fc 100644 --- a/LCGlexicon/TODO +++ b/LCGlexicon/TODO @@ -1,3 +1,5 @@ - unumber jako atrybut "Można było" - brakuje uzgodnienia rodzaju przymiotnika w przypadku predykatywnym, i ogólnie kontroli składniowej + +- dodać do wyświetlania leksykonu informacje o bracetach i semantyce diff --git a/LCGlexicon/makefile b/LCGlexicon/makefile index 32d8099..be7501f 100755 --- a/LCGlexicon/makefile +++ b/LCGlexicon/makefile @@ -27,6 +27,10 @@ test: test.ml $(SOURCES) mkdir -p results $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $(SOURCES) test.ml +print_lexicon: ENIAM_LCGlexiconLatexOf.ml $(SOURCES) + mkdir -p results + $(OCAMLOPT) -o print_lexicon $(OCAMLOPTFLAGS) $(SOURCES) ENIAM_LCGlexiconLatexOf.ml + .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx .mll.ml: @@ -48,4 +52,4 @@ test: test.ml $(SOURCES) $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< clean: - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test + rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test print_lexicon