Commit baf1c22d44d40ac4a047d6e9bfa6f09ca47dafe6
1 parent
173c381a
generowanie interp rules
Showing
4 changed files
with
126 additions
and
168 deletions
guesser/dict.ml
... | ... | @@ -300,7 +300,9 @@ let exceptional_lemmata = StringSet.of_list ([ |
300 | 300 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; |
301 | 301 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; |
302 | 302 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*) |
303 | - ] @ File.load_lines "data/obce.tab" @ File.load_lines "data/validated_adj.tab" @ File.load_lines "data/validated_noun.tab" @ File.load_lines "data/validated_verb.tab" @ File.load_lines "data/adv_nieodprzymiotnikowe.tab") | |
303 | + ] @ File.load_lines "data/obce.tab" (* @ | |
304 | + File.load_lines "data/validated_adj.tab" @ File.load_lines "data/validated_noun.tab" @ | |
305 | + File.load_lines "data/validated_verb.tab" @ File.load_lines "data/adv_nieodprzymiotnikowe.tab" *)) | |
304 | 306 | |
305 | 307 | let remove_exceptional_lemmata dict = |
306 | 308 | Xlist.fold dict [] (fun dict entry -> |
... | ... | @@ -316,7 +318,7 @@ let generate_stem dict = |
316 | 318 | else ""}) |
317 | 319 | |
318 | 320 | let validate dict = |
319 | - let rules = Rules.interp_compound_rule_trees in | |
321 | + let rules = Rules.compound_rule_trees in | |
320 | 322 | Xlist.rev_map dict (fun entry -> |
321 | 323 | let simple_lemma = Stem.simplify_lemma entry.lemma in |
322 | 324 | let forms = Xlist.rev_map entry.forms (fun form -> |
... | ... | @@ -326,7 +328,22 @@ let validate dict = |
326 | 328 | let candidates = Xlist.fold candidates [] (fun candidates (stem,rule) -> |
327 | 329 | (* printf "R %s\t%s\n" stem (Rules.string_of_rule rule); *) |
328 | 330 | if stem ^ rule.Rules.set = simple_lemma then (stem,rule) :: candidates else candidates) in |
329 | - if candidates = [] then form else {form with validated=true}) in | |
331 | + if candidates = [] then {form with validated=false} else {form with validated=true}) in | |
332 | + {entry with forms=forms}) | |
333 | + | |
334 | +let validate_interp dict = | |
335 | + let rules = Rules.interp_compound_rule_trees in | |
336 | + Xlist.rev_map dict (fun entry -> | |
337 | + let simple_lemma = Stem.simplify_lemma entry.lemma in | |
338 | + let forms = Xlist.rev_map entry.forms (fun form -> | |
339 | + (* printf "E %s\t%s\t%s\n" orth lemma interp; *) | |
340 | + let candidates = Rules.CharTrees.find rules form.orth in | |
341 | + (* printf "S %d\n" (Xlist.size forms); *) | |
342 | + let candidates = Xlist.fold candidates [] (fun candidates (stem,rule) -> | |
343 | + (* printf "R %s\t%s\n" stem (Rules.string_of_rule rule); *) | |
344 | + if stem ^ rule.Rules.set = simple_lemma && form.interp = rule.Rules.interp then | |
345 | + (stem,rule) :: candidates else candidates) in | |
346 | + if candidates = [] then {form with validated=false} else {form with validated=true}) in | |
330 | 347 | {entry with forms=forms}) |
331 | 348 | |
332 | 349 | let remove_validated_forms dict = |
... | ... | @@ -363,3 +380,64 @@ let print_lemmata filename dict = |
363 | 380 | File.file_out filename (fun file -> |
364 | 381 | Xlist.iter dict (fun entry -> |
365 | 382 | fprintf file "%s\n" entry.lemma)) |
383 | + | |
384 | +let remove_sup_neg_forms dict = | |
385 | + Xlist.fold dict [] (fun dict entry -> | |
386 | + let forms = Xlist.fold entry.forms [] (fun forms form -> | |
387 | + if Xstring.check_sufix ":neg" form.interp || Xstring.check_sufix ":sup" form.interp then | |
388 | + forms else form :: forms) in | |
389 | + if forms = [] then dict else {entry with forms=forms} :: dict) | |
390 | + | |
391 | +let generate_rules path filename rules_filename = | |
392 | + let dict = load_tab (path ^ filename) in | |
393 | + (* printf "g1 %d\n%!" (Xlist.size dict); *) | |
394 | + let dict = merge_entries dict in | |
395 | + (* printf "g2 %d\n%!" (Xlist.size dict); *) | |
396 | + let dict = mark_ndm dict in (* FIXME: remove_ndm? *) | |
397 | + (* printf "g3 %d\n%!" (Xlist.size dict); *) | |
398 | + let dict = find_kolwiek_suffixes dict in | |
399 | + (* printf "g4 %d\n%!" (Xlist.size dict); *) | |
400 | + let dict = remove_exceptional_lemmata dict in | |
401 | + (* printf "g5 %d\n%!" (Xlist.size dict); *) | |
402 | + let dict = generate_stem dict in | |
403 | + let dict = validate dict in | |
404 | + let dict = remove_validated_forms dict in | |
405 | + (* printf "g6 %d\n%!" (Xlist.size dict); *) | |
406 | + let dict = remove_sup_neg_forms dict in | |
407 | + (* printf "g7 %d\n%!" (Xlist.size dict); *) | |
408 | + let rules = Xlist.fold dict StringMap.empty (fun rules entry -> | |
409 | + Xlist.fold (RuleGenerator.generate_rules_entry entry) rules (fun rules (key,rule) -> | |
410 | + let rules2 = try StringMap.find rules key with Not_found -> StringMap.empty in | |
411 | + let rules2 = StringMap.add_inc rules2 rule (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l) in | |
412 | + StringMap.add rules key rules2)) in | |
413 | + File.file_out rules_filename (fun file -> | |
414 | + StringMap.iter rules (fun interp rules2 -> | |
415 | + fprintf file "\n@RULES %s\n" interp; | |
416 | + StringMap.iter rules2 (fun rule (q,l) -> | |
417 | + fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l)))) | |
418 | + | |
419 | +let generate_interp_rules con_flag group_flag lemma_flag path filename rules_filename = | |
420 | + let dict = load_tab (path ^ filename) in | |
421 | + (* printf "g1 %d\n%!" (Xlist.size dict); *) | |
422 | + let dict = merge_entries dict in | |
423 | + (* printf "g2 %d\n%!" (Xlist.size dict); *) | |
424 | + let dict = mark_ndm dict in (* FIXME: remove_ndm? *) | |
425 | + (* printf "g3 %d\n%!" (Xlist.size dict); *) | |
426 | + let dict = find_kolwiek_suffixes dict in | |
427 | + (* printf "g4 %d\n%!" (Xlist.size dict); *) | |
428 | + let dict = remove_exceptional_lemmata dict in | |
429 | + (* printf "g5 %d\n%!" (Xlist.size dict); *) | |
430 | + let dict = generate_stem dict in | |
431 | + let dict = validate_interp dict in | |
432 | + let dict = remove_validated_forms dict in | |
433 | + (* printf "g6 %d\n%!" (Xlist.size dict); *) | |
434 | + (* printf "g7 %d\n%!" (Xlist.size dict); *) | |
435 | + let interp_rules = Xlist.fold dict StringMap.empty (fun interp_rules entry -> | |
436 | + let simple_lemma = Stem.simplify_lemma entry.lemma in | |
437 | + Xlist.fold entry.forms interp_rules (fun interp_rules form -> | |
438 | + let candidates = RuleGenerator.generate_interp_rules con_flag group_flag lemma_flag simple_lemma form in | |
439 | + Xlist.fold candidates interp_rules (fun interp_rules cand -> | |
440 | + StringMap.add_inc interp_rules cand (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l)))) in | |
441 | + File.file_out rules_filename (fun file -> | |
442 | + StringMap.iter interp_rules (fun k (q,l) -> | |
443 | + fprintf file "\t%s\t# %d %s\n" k q (String.concat " " l))) | |
... | ... |
guesser/generate.ml
... | ... | @@ -140,7 +140,7 @@ let check_stem_generation path filename = |
140 | 140 | |
141 | 141 | (* Sprawdzenie dziaลania stemowania *) |
142 | 142 | let _ = |
143 | - check_stem_generation morfeusz_path sgjp_filename; | |
143 | + (* check_stem_generation morfeusz_path sgjp_filename; *) | |
144 | 144 | (* check_stem_generation morfeusz_path ("odm_adj_" ^ sgjp_filename); *) |
145 | 145 | (* check_stem_generation morfeusz_path ("odm_noun_" ^ sgjp_filename); *) |
146 | 146 | () |
... | ... | @@ -160,7 +160,7 @@ let find_not_validated_entries path filename out_filename = |
160 | 160 | |
161 | 161 | (* Wypisanie niezwalidowanych form *) |
162 | 162 | let _ = |
163 | - find_not_validated_forms morfeusz_path odm_adj_sgjp_filename "results/not_validated_odm_adj.tab"; | |
163 | + (* find_not_validated_forms morfeusz_path odm_adj_sgjp_filename "results/not_validated_odm_adj.tab"; *) | |
164 | 164 | (* find_not_validated_entries morfeusz_path ("odm_adj_" ^ sgjp_filename) "results/not_validated_odm_adj.tab"; *) |
165 | 165 | (* find_not_validated_entries "results/" "not_validated_odm_adj.tab" "results/not_validated_odm_adj2.tab"; *) |
166 | 166 | (* find_not_validated_entries morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/not_validated_odm_noun.tab"; *) |
... | ... | @@ -177,11 +177,31 @@ let find_validated_lemmata path filename out_filename = |
177 | 177 | |
178 | 178 | (* Wypisanie zwalidowanych lematรณw *) |
179 | 179 | let _ = |
180 | - find_validated_lemmata morfeusz_path odm_adj_sgjp_filename "results/validated_odm_adj.tab"; | |
180 | + (* find_validated_lemmata morfeusz_path odm_adj_sgjp_filename "results/validated_odm_adj.tab"; | |
181 | 181 | find_validated_lemmata morfeusz_path odm_noun_sgjp_filename "results/validated_odm_noun.tab"; |
182 | - find_validated_lemmata morfeusz_path verb_sgjp_filename "results/validated_verb.tab"; | |
182 | + find_validated_lemmata morfeusz_path verb_sgjp_filename "results/validated_verb.tab"; *) | |
183 | 183 | () |
184 | 184 | |
185 | +(* Generowanie reguล *) | |
186 | +let _ = | |
187 | + (* Dict.generate_rules morfeusz_path odm_adj_sgjp_filename "results/rules-odm-adj.txt"; *) | |
188 | + (* Dict.generate_rules morfeusz_path odm_noun_sgjp_filename "results/rules-odm-noun.txt"; | |
189 | + Dict.generate_rules morfeusz_path adv_sgjp_filename "results/rules-adv.txt"; | |
190 | + Dict.generate_rules morfeusz_path verb_sgjp_filename "results/rules-verb.txt"; *) | |
191 | + () | |
192 | + | |
193 | +(* Generowanie reguล dla interpretacji *) | |
194 | +let _ = | |
195 | + Dict.generate_interp_rules true true true morfeusz_path odm_adj_sgjp_filename "results/interp_rules_odm_adj.tab"; | |
196 | +(* Dict.generate_interp_rules false true true morfeusz_path ("odm_adj_" ^ sgjp_filename) "results/interp_rules_odm_adj2.tab"; *) | |
197 | +(* Dict.generate_interp_rules true true true morfeusz_path ("adv_" ^ sgjp_filename) "results/interp_rules_adv.tab"; *) | |
198 | +(* Dict.generate_interp_rules true true true morfeusz_path ("verb_" ^ sgjp_filename) "results/interp_rules_verb.tab"; | |
199 | +Dict.generate_interp_rules true false true morfeusz_path ("verb_" ^ sgjp_filename) "results/interp_rules_verb2.tab"; *) | |
200 | +(* Dict.generate_interp_rules true true true morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/interp_rules_odm_noun.tab"; | |
201 | +Dict.generate_interp_rules true true false morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/interp_rules_odm_noun2.tab";*) | |
202 | + () | |
203 | + | |
204 | + | |
185 | 205 | (**********************************************************************************) |
186 | 206 | |
187 | 207 | module OrderedStringList = struct |
... | ... | @@ -387,21 +407,6 @@ let _ = |
387 | 407 | |
388 | 408 | (**********************************************************************************) |
389 | 409 | (* |
390 | -let remove_com_sup dict = | |
391 | - List.rev (Xlist.fold dict [] (fun l (orth,lemma,interp) -> | |
392 | - if Xstring.check_sufix ":com" interp || Xstring.check_sufix ":sup" interp then l else (orth,lemma,interp) :: l)) | |
393 | - | |
394 | -let generate_adj_pos_rules rules_filename dict = | |
395 | - let dict = map_of_tab dict in | |
396 | - let dict = find_kolwiek_suffixes dict in | |
397 | - let dict = remove_exceptional_lemmata dict in | |
398 | - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry "adj") in | |
399 | - File.file_out rules_filename (fun file -> | |
400 | - StringMap.iter rules (fun interp rules2 -> | |
401 | - fprintf file "\n@RULES %s\n" interp; | |
402 | - StringMap.iter rules2 (fun rule (q,l) -> | |
403 | - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l)))) | |
404 | - | |
405 | 410 | let split_into_groups interp_sel dict = |
406 | 411 | Xlist.fold dict StringMap.empty (fun dict (lemma,orth,interp) -> |
407 | 412 | let group = |
... | ... | @@ -420,118 +425,6 @@ let load_interp_sel filename = |
420 | 425 | [group;interp;label] -> StringMap.add interp_sel interp group |
421 | 426 | | line -> failwith ("load_interp_sel: " ^ (String.concat "\t" line))) |
422 | 427 | |
423 | -let generate_adj_rules path filename adj_pos_rules_filename = | |
424 | - let interp_sel = load_interp_sel "data/interps.tab" in | |
425 | - let dict = load_tab (path ^ filename) in | |
426 | - let dict = split_into_groups interp_sel dict in | |
427 | - if StringMap.size dict <> 3 then failwith ("generate_adj_rules: " ^ | |
428 | - String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else | |
429 | - generate_adj_pos_rules adj_pos_rules_filename (StringMap.find dict "adj"); | |
430 | - () | |
431 | - | |
432 | -let generate_adj_com_rules path filename adj_com_rules_filename = | |
433 | - let dict = load_tab (path ^ filename) in | |
434 | - let dict = map_of_tab dict in | |
435 | - let dict = find_kolwiek_suffixes dict in | |
436 | -(* let dict = remove_exceptional_lemmata dict in *) | |
437 | - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_com_entry "adj") in | |
438 | - File.file_out adj_com_rules_filename (fun file -> | |
439 | - StringMap.iter rules (fun interp rules2 -> | |
440 | - fprintf file "\n@RULES %s\n" interp; | |
441 | - StringMap.iter rules2 (fun rule (q,l) -> | |
442 | - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l)))) | |
443 | - | |
444 | -let generate_adv_pos_rules rules_filename dict = | |
445 | - let dict = map_of_tab dict in | |
446 | - let dict = find_kolwiek_suffixes dict in | |
447 | - let dict = remove_exceptional_lemmata dict in | |
448 | - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry "adv") in | |
449 | - File.file_out rules_filename (fun file -> | |
450 | - StringMap.iter rules (fun interp rules2 -> | |
451 | - fprintf file "\n@RULES %s\n" interp; | |
452 | - StringMap.iter rules2 (fun rule (q,l) -> | |
453 | - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l)))) | |
454 | - | |
455 | -let generate_adv_rules path filename adv_pos_rules_filename = | |
456 | - let interp_sel = load_interp_sel "data/interps.tab" in | |
457 | - let dict = load_tab (path ^ filename) in | |
458 | - let dict = split_into_groups interp_sel dict in | |
459 | - if StringMap.size dict <> 3 then failwith ("generate_adv_rules: " ^ | |
460 | - String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else | |
461 | - generate_adv_pos_rules adv_pos_rules_filename (StringMap.find dict "adv"); | |
462 | - () | |
463 | - | |
464 | -let generate_adv_com_rules path filename adv_com_rules_filename = | |
465 | - let dict = load_tab (path ^ filename) in | |
466 | - let dict = map_of_tab dict in | |
467 | - let dict = find_kolwiek_suffixes dict in | |
468 | - let dict = remove_exceptional_lemmata dict in | |
469 | - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_com_entry "adv") in | |
470 | - File.file_out adv_com_rules_filename (fun file -> | |
471 | - StringMap.iter rules (fun interp rules2 -> | |
472 | - fprintf file "\n@RULES %s\n" interp; | |
473 | - StringMap.iter rules2 (fun rule (q,l) -> | |
474 | - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l)))) | |
475 | - | |
476 | - | |
477 | -let generate_noun_rules2 rules_filename dict = | |
478 | - let dict = map_of_tab dict in | |
479 | - let dict = find_kolwiek_suffixes dict in | |
480 | - let dict = remove_exceptional_lemmata dict in | |
481 | - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry "noun") in | |
482 | -(* let rules = StringMap.fold dict StringMap.empty (fun rules lemma l -> | |
483 | - let interps = Xlist.fold l StringMap.empty (fun map (orth,interp) -> | |
484 | - StringMap.add_inc map interp [orth] (fun l -> orth :: l)) in | |
485 | - let stem(*,_*) = generate_stem (*0*) interps lemma noun_stem_sel in | |
486 | - let cl = classify_noun lemma stem interps noun_classes in | |
487 | - if cl <> "A" && cl <> "II" (*&& cl <> "ฤ" && cl <> "ฤ"*) then rules else | |
488 | - if has_known_inflexion_noun stem interps then rules else | |
489 | - let interps = select_inflexion rules_a stem interps in | |
490 | - let stem2 = cut_stem_sufix stem in | |
491 | - StringMap.fold interps rules (fun rules interp orths -> | |
492 | - Xlist.fold orths rules (fun rules orth -> | |
493 | - let n = find_common_prefix_length [stem2;orth] in | |
494 | - let rules2 = try StringMap.find rules interp with Not_found -> StringMap.empty in | |
495 | - let a = cut_prefixn n orth in | |
496 | - let b = cut_prefixn n stem in | |
497 | - let c,f = rule_code (a,b) in | |
498 | -(* let rule = sprintf "%s\t%s\t%s\t%s" cl c a b in *) | |
499 | - let rule = cl ^ "\t" ^ if f then "\t" ^ c else sprintf "%s\t%s\t%s" c a b in | |
500 | - let rules2 = StringMap.add_inc rules2 rule (1,[lemma]) (fun (q,l) -> q+1, if q < 20 then lemma :: l else l) in | |
501 | - StringMap.add rules interp rules2))) in*) | |
502 | - File.file_out rules_filename (fun file -> | |
503 | - StringMap.iter rules (fun interp rules2 -> | |
504 | - fprintf file "\n@RULES %s\n" interp; | |
505 | - StringMap.iter rules2 (fun rule (q,l) -> | |
506 | - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l)))) | |
507 | - | |
508 | -let generate_noun_rules path filename noun_rules_filename = | |
509 | - let interp_sel = load_interp_sel "data/interps.tab" in | |
510 | - let dict = load_tab (path ^ filename) in | |
511 | - let dict = split_into_groups interp_sel dict in | |
512 | - if StringMap.size dict <> 1 then failwith ("generate_noun_rules: " ^ | |
513 | - String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else | |
514 | - generate_noun_rules2 noun_rules_filename (StringMap.find dict "noun"); | |
515 | - () | |
516 | - | |
517 | -let generate_verb_rules2 rules_filename dict = | |
518 | - let dict = map_of_tab dict in | |
519 | - let dict = remove_exceptional_lemmata dict in | |
520 | - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry "verb") in | |
521 | - File.file_out rules_filename (fun file -> | |
522 | - StringMap.iter rules (fun interp rules2 -> | |
523 | - fprintf file "\n@RULES %s\n" interp; | |
524 | - StringMap.iter rules2 (fun rule (q,l) -> | |
525 | - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l)))) | |
526 | - | |
527 | -let generate_verb_rules path filename rules_filename = | |
528 | - let interp_sel = load_interp_sel "data/interps.tab" in | |
529 | - let dict = load_tab (path ^ filename) in | |
530 | - let dict = split_into_groups interp_sel dict in | |
531 | - if StringMap.size dict <> 2 then failwith ("generate_verb_rules: " ^ | |
532 | - String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else | |
533 | - generate_verb_rules2 rules_filename (StringMap.find dict "verb"); | |
534 | - () | |
535 | 428 | |
536 | 429 | let find_tags_lemata_noun path filename = |
537 | 430 | let dict = load_tab (path ^ filename) in |
... | ... | @@ -559,17 +452,7 @@ let find_tags_lemata_verb path filename = |
559 | 452 | if StringMap.size dict <> 2 then failwith ("generate_verb_rules: " ^ |
560 | 453 | String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else |
561 | 454 | find_tags_lemata_verb2 (StringMap.find dict "verb") |
562 | - | |
563 | -let _ = | |
564 | -(* generate_adj_rules morfeusz_path ("odm_adj_" ^ sgjp_filename) "rules/ADJ-FLEX6.dic"; *) | |
565 | -(* generate_noun_rules morfeusz_path ("odm_noun_" ^ sgjp_filename) "rules/NOUN-FLEX6.dic"; *) | |
566 | -(* generate_adj_com_rules morfeusz_path ("odm_adj_" ^ sgjp_filename) "rules/ADJ-FLEX-COM6.dic"; *) | |
567 | -(* generate_adv_rules morfeusz_path ("adv_" ^ sgjp_filename) "rules/ADV-FLEX6.dic"; *) | |
568 | -(* generate_adv_com_rules morfeusz_path ("adv_" ^ sgjp_filename) "rules/ADV-FLEX-COM6.dic"; *) | |
569 | -(* generate_verb_rules morfeusz_path ("verb_" ^ sgjp_filename) "rules/VERB-FLEX6.dic"; *) | |
570 | -(* generate_verb_rules "data/" "verbs_ex.tab" "rules/VERB-FLEX6.dic"; *) | |
571 | - () | |
572 | - | |
455 | +*) | |
573 | 456 | let _ = |
574 | 457 | (* find_tags_lemata_verb "data/" "verbs_ex.tab"; *) |
575 | 458 | (* find_tags_lemata_noun "data/" "nouns_ex.tab"; *) |
... | ... | @@ -579,7 +462,7 @@ let _ = |
579 | 462 | (* Rules.print "results/rules/"; *) |
580 | 463 | (* Rules.print_compound_rules "results/compounds.dic" (Rules.create_rules ()); *) |
581 | 464 | () |
582 | - | |
465 | +(* | |
583 | 466 | let interp_rule_string tags interp = |
584 | 467 | let tags = Xlist.sort tags compare_tag in |
585 | 468 | String.concat " " (Xlist.map tags (fun (k,v) -> k ^ "=" ^ v)) ^ "\t" ^ interp |
... | ... | @@ -613,7 +496,7 @@ let print_interp_rules con_flag group_flag lemma_flag path filename out_filename |
613 | 496 | File.file_out out_filename (fun file -> |
614 | 497 | StringMap.iter interp_rules (fun k (q,l) -> |
615 | 498 | fprintf file "\t%s\t# %d %s\n" k q (String.concat " " l))) |
616 | - | |
499 | +*) | |
617 | 500 | let _ = |
618 | 501 | (* print_interp_rules true true true morfeusz_path ("odm_adj_" ^ sgjp_filename) "results/interp_rules_odm_adj.tab"; *) |
619 | 502 | (* print_interp_rules false true true morfeusz_path ("odm_adj_" ^ sgjp_filename) "results/interp_rules_odm_adj2.tab"; *) |
... | ... | @@ -623,4 +506,3 @@ let _ = |
623 | 506 | (* print_interp_rules true true true morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/interp_rules_odm_noun.tab"; |
624 | 507 | print_interp_rules true true false morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/interp_rules_odm_noun2.tab"; *) |
625 | 508 | () |
626 | -*) | |
... | ... |
guesser/ruleGenerator.ml
... | ... | @@ -211,21 +211,23 @@ let entry_classes = |
211 | 211 | "subst:sg:nom:n2","um","UM"; |
212 | 212 | ] |
213 | 213 | |
214 | -let generate_rules_entry rules entry = | |
214 | +let generate_rules_entry entry = | |
215 | 215 | let stem_pref = Stem.cut_stem_sufix entry.stem in |
216 | 216 | let cl = classify_entry entry entry_classes in |
217 | - Xlist.fold entry.forms rules (fun rules form -> | |
218 | - let rule = cl ^ "\t" ^ generate_rule entry.stem stem_pref form.orth in | |
219 | - let rules2 = try StringMap.find rules form.interp with Not_found -> StringMap.empty in | |
220 | - let rules2 = StringMap.add_inc rules2 rule (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l) in | |
221 | - StringMap.add rules form.interp rules2) | |
217 | + Xlist.map entry.forms (fun form -> | |
218 | + form.interp,cl ^ "\t" ^ generate_rule entry.stem stem_pref form.orth) | |
222 | 219 | |
223 | -let generate_rules_com_entry rules entry = | |
224 | - let stem_pref = Stem.cut_stem_sufix entry.stem in | |
225 | -(* let cl = classify_entry entry entry_classes in *) | |
226 | - Xlist.fold entry.forms rules (fun rules form -> | |
227 | - if not (Xstring.check_sufix ":com" form.interp) then rules else | |
228 | - let rule = "\t" ^ generate_rule entry.stem stem_pref form.orth in | |
229 | - let rules2 = try StringMap.find rules form.interp with Not_found -> StringMap.empty in | |
230 | - let rules2 = StringMap.add_inc rules2 rule (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l) in | |
231 | - StringMap.add rules form.interp rules2) | |
220 | +let generate_interp_rules con_flag group_flag lemma_flag simple_lemma form = | |
221 | + let rules = Rules.compound_rule_trees in | |
222 | + let candidates = Rules.CharTrees.find rules form.orth in | |
223 | + (* printf "S %d\n" (Xlist.size forms); *) | |
224 | + let candidates = Xlist.fold candidates [] (fun candidates (stem,rule) -> | |
225 | + (* printf "R %s\t%s\n" stem (Rules.string_of_rule rule); *) | |
226 | + if stem ^ rule.Rules.set = simple_lemma then rule :: candidates else candidates) in | |
227 | + Xlist.rev_map candidates (fun rule -> | |
228 | + let tags = rule.Rules.tags in | |
229 | + let tags = if con_flag then snd (Rules.extract_tag "con" [] tags) else tags in | |
230 | + let tags = if group_flag then snd (Rules.extract_tag "group" [] tags) else tags in | |
231 | + let tags = if lemma_flag then snd (Rules.extract_tag "lemma" [] tags) else tags in | |
232 | + let tags = Xlist.sort tags Rules.compare_tag in | |
233 | + String.concat " " (Xlist.map tags (fun (k,v) -> k ^ "=" ^ v)) ^ "\t" ^ form.interp) | |
... | ... |
guesser/rules.ml
... | ... | @@ -282,11 +282,7 @@ let interp_compound_rule_trees = CharTrees.create interp_compound_rules |
282 | 282 | (**********************************************************************************************) |
283 | 283 | (**********************************************************************************************) |
284 | 284 | |
285 | -(* let rec select_tag tag rev = function | |
286 | - [] -> "", rev | |
287 | - | (k,v) :: l -> if k = tag then v, rev @ l else select_tag tag ((k,v) :: rev) l | |
288 | - | |
289 | -let is_applicable_rule rule s = Xstring.check_sufix rule.find s | |
285 | +(* let is_applicable_rule rule s = Xstring.check_sufix rule.find s | |
290 | 286 | |
291 | 287 | let apply_rule rule s = |
292 | 288 | (Xstring.cut_sufix rule.find s) ^ rule.set |
... | ... |