Commit baf1c22d44d40ac4a047d6e9bfa6f09ca47dafe6

Authored by Wojciech Jaworski
1 parent 173c381a

generowanie interp rules

guesser/dict.ml
... ... @@ -300,7 +300,9 @@ let exceptional_lemmata = StringSet.of_list ([
300 300 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
301 301 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
302 302 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*)
303   - ] @ File.load_lines "data/obce.tab" @ File.load_lines "data/validated_adj.tab" @ File.load_lines "data/validated_noun.tab" @ File.load_lines "data/validated_verb.tab" @ File.load_lines "data/adv_nieodprzymiotnikowe.tab")
  303 + ] @ File.load_lines "data/obce.tab" (* @
  304 + File.load_lines "data/validated_adj.tab" @ File.load_lines "data/validated_noun.tab" @
  305 + File.load_lines "data/validated_verb.tab" @ File.load_lines "data/adv_nieodprzymiotnikowe.tab" *))
304 306  
305 307 let remove_exceptional_lemmata dict =
306 308 Xlist.fold dict [] (fun dict entry ->
... ... @@ -316,7 +318,7 @@ let generate_stem dict =
316 318 else ""})
317 319  
318 320 let validate dict =
319   - let rules = Rules.interp_compound_rule_trees in
  321 + let rules = Rules.compound_rule_trees in
320 322 Xlist.rev_map dict (fun entry ->
321 323 let simple_lemma = Stem.simplify_lemma entry.lemma in
322 324 let forms = Xlist.rev_map entry.forms (fun form ->
... ... @@ -326,7 +328,22 @@ let validate dict =
326 328 let candidates = Xlist.fold candidates [] (fun candidates (stem,rule) ->
327 329 (* printf "R %s\t%s\n" stem (Rules.string_of_rule rule); *)
328 330 if stem ^ rule.Rules.set = simple_lemma then (stem,rule) :: candidates else candidates) in
329   - if candidates = [] then form else {form with validated=true}) in
  331 + if candidates = [] then {form with validated=false} else {form with validated=true}) in
  332 + {entry with forms=forms})
  333 +
  334 +let validate_interp dict =
  335 + let rules = Rules.interp_compound_rule_trees in
  336 + Xlist.rev_map dict (fun entry ->
  337 + let simple_lemma = Stem.simplify_lemma entry.lemma in
  338 + let forms = Xlist.rev_map entry.forms (fun form ->
  339 + (* printf "E %s\t%s\t%s\n" orth lemma interp; *)
  340 + let candidates = Rules.CharTrees.find rules form.orth in
  341 + (* printf "S %d\n" (Xlist.size forms); *)
  342 + let candidates = Xlist.fold candidates [] (fun candidates (stem,rule) ->
  343 + (* printf "R %s\t%s\n" stem (Rules.string_of_rule rule); *)
  344 + if stem ^ rule.Rules.set = simple_lemma && form.interp = rule.Rules.interp then
  345 + (stem,rule) :: candidates else candidates) in
  346 + if candidates = [] then {form with validated=false} else {form with validated=true}) in
330 347 {entry with forms=forms})
331 348  
332 349 let remove_validated_forms dict =
... ... @@ -363,3 +380,64 @@ let print_lemmata filename dict =
363 380 File.file_out filename (fun file ->
364 381 Xlist.iter dict (fun entry ->
365 382 fprintf file "%s\n" entry.lemma))
  383 +
  384 +let remove_sup_neg_forms dict =
  385 + Xlist.fold dict [] (fun dict entry ->
  386 + let forms = Xlist.fold entry.forms [] (fun forms form ->
  387 + if Xstring.check_sufix ":neg" form.interp || Xstring.check_sufix ":sup" form.interp then
  388 + forms else form :: forms) in
  389 + if forms = [] then dict else {entry with forms=forms} :: dict)
  390 +
  391 +let generate_rules path filename rules_filename =
  392 + let dict = load_tab (path ^ filename) in
  393 + (* printf "g1 %d\n%!" (Xlist.size dict); *)
  394 + let dict = merge_entries dict in
  395 + (* printf "g2 %d\n%!" (Xlist.size dict); *)
  396 + let dict = mark_ndm dict in (* FIXME: remove_ndm? *)
  397 + (* printf "g3 %d\n%!" (Xlist.size dict); *)
  398 + let dict = find_kolwiek_suffixes dict in
  399 + (* printf "g4 %d\n%!" (Xlist.size dict); *)
  400 + let dict = remove_exceptional_lemmata dict in
  401 + (* printf "g5 %d\n%!" (Xlist.size dict); *)
  402 + let dict = generate_stem dict in
  403 + let dict = validate dict in
  404 + let dict = remove_validated_forms dict in
  405 + (* printf "g6 %d\n%!" (Xlist.size dict); *)
  406 + let dict = remove_sup_neg_forms dict in
  407 + (* printf "g7 %d\n%!" (Xlist.size dict); *)
  408 + let rules = Xlist.fold dict StringMap.empty (fun rules entry ->
  409 + Xlist.fold (RuleGenerator.generate_rules_entry entry) rules (fun rules (key,rule) ->
  410 + let rules2 = try StringMap.find rules key with Not_found -> StringMap.empty in
  411 + let rules2 = StringMap.add_inc rules2 rule (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l) in
  412 + StringMap.add rules key rules2)) in
  413 + File.file_out rules_filename (fun file ->
  414 + StringMap.iter rules (fun interp rules2 ->
  415 + fprintf file "\n@RULES %s\n" interp;
  416 + StringMap.iter rules2 (fun rule (q,l) ->
  417 + fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
  418 +
  419 +let generate_interp_rules con_flag group_flag lemma_flag path filename rules_filename =
  420 + let dict = load_tab (path ^ filename) in
  421 + (* printf "g1 %d\n%!" (Xlist.size dict); *)
  422 + let dict = merge_entries dict in
  423 + (* printf "g2 %d\n%!" (Xlist.size dict); *)
  424 + let dict = mark_ndm dict in (* FIXME: remove_ndm? *)
  425 + (* printf "g3 %d\n%!" (Xlist.size dict); *)
  426 + let dict = find_kolwiek_suffixes dict in
  427 + (* printf "g4 %d\n%!" (Xlist.size dict); *)
  428 + let dict = remove_exceptional_lemmata dict in
  429 + (* printf "g5 %d\n%!" (Xlist.size dict); *)
  430 + let dict = generate_stem dict in
  431 + let dict = validate_interp dict in
  432 + let dict = remove_validated_forms dict in
  433 + (* printf "g6 %d\n%!" (Xlist.size dict); *)
  434 + (* printf "g7 %d\n%!" (Xlist.size dict); *)
  435 + let interp_rules = Xlist.fold dict StringMap.empty (fun interp_rules entry ->
  436 + let simple_lemma = Stem.simplify_lemma entry.lemma in
  437 + Xlist.fold entry.forms interp_rules (fun interp_rules form ->
  438 + let candidates = RuleGenerator.generate_interp_rules con_flag group_flag lemma_flag simple_lemma form in
  439 + Xlist.fold candidates interp_rules (fun interp_rules cand ->
  440 + StringMap.add_inc interp_rules cand (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l)))) in
  441 + File.file_out rules_filename (fun file ->
  442 + StringMap.iter interp_rules (fun k (q,l) ->
  443 + fprintf file "\t%s\t# %d %s\n" k q (String.concat " " l)))
... ...
guesser/generate.ml
... ... @@ -140,7 +140,7 @@ let check_stem_generation path filename =
140 140  
141 141 (* Sprawdzenie dziaล‚ania stemowania *)
142 142 let _ =
143   - check_stem_generation morfeusz_path sgjp_filename;
  143 + (* check_stem_generation morfeusz_path sgjp_filename; *)
144 144 (* check_stem_generation morfeusz_path ("odm_adj_" ^ sgjp_filename); *)
145 145 (* check_stem_generation morfeusz_path ("odm_noun_" ^ sgjp_filename); *)
146 146 ()
... ... @@ -160,7 +160,7 @@ let find_not_validated_entries path filename out_filename =
160 160  
161 161 (* Wypisanie niezwalidowanych form *)
162 162 let _ =
163   - find_not_validated_forms morfeusz_path odm_adj_sgjp_filename "results/not_validated_odm_adj.tab";
  163 + (* find_not_validated_forms morfeusz_path odm_adj_sgjp_filename "results/not_validated_odm_adj.tab"; *)
164 164 (* find_not_validated_entries morfeusz_path ("odm_adj_" ^ sgjp_filename) "results/not_validated_odm_adj.tab"; *)
165 165 (* find_not_validated_entries "results/" "not_validated_odm_adj.tab" "results/not_validated_odm_adj2.tab"; *)
166 166 (* find_not_validated_entries morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/not_validated_odm_noun.tab"; *)
... ... @@ -177,11 +177,31 @@ let find_validated_lemmata path filename out_filename =
177 177  
178 178 (* Wypisanie zwalidowanych lematรณw *)
179 179 let _ =
180   - find_validated_lemmata morfeusz_path odm_adj_sgjp_filename "results/validated_odm_adj.tab";
  180 + (* find_validated_lemmata morfeusz_path odm_adj_sgjp_filename "results/validated_odm_adj.tab";
181 181 find_validated_lemmata morfeusz_path odm_noun_sgjp_filename "results/validated_odm_noun.tab";
182   - find_validated_lemmata morfeusz_path verb_sgjp_filename "results/validated_verb.tab";
  182 + find_validated_lemmata morfeusz_path verb_sgjp_filename "results/validated_verb.tab"; *)
183 183 ()
184 184  
  185 +(* Generowanie reguล‚ *)
  186 +let _ =
  187 + (* Dict.generate_rules morfeusz_path odm_adj_sgjp_filename "results/rules-odm-adj.txt"; *)
  188 + (* Dict.generate_rules morfeusz_path odm_noun_sgjp_filename "results/rules-odm-noun.txt";
  189 + Dict.generate_rules morfeusz_path adv_sgjp_filename "results/rules-adv.txt";
  190 + Dict.generate_rules morfeusz_path verb_sgjp_filename "results/rules-verb.txt"; *)
  191 + ()
  192 +
  193 +(* Generowanie reguล‚ dla interpretacji *)
  194 +let _ =
  195 + Dict.generate_interp_rules true true true morfeusz_path odm_adj_sgjp_filename "results/interp_rules_odm_adj.tab";
  196 +(* Dict.generate_interp_rules false true true morfeusz_path ("odm_adj_" ^ sgjp_filename) "results/interp_rules_odm_adj2.tab"; *)
  197 +(* Dict.generate_interp_rules true true true morfeusz_path ("adv_" ^ sgjp_filename) "results/interp_rules_adv.tab"; *)
  198 +(* Dict.generate_interp_rules true true true morfeusz_path ("verb_" ^ sgjp_filename) "results/interp_rules_verb.tab";
  199 +Dict.generate_interp_rules true false true morfeusz_path ("verb_" ^ sgjp_filename) "results/interp_rules_verb2.tab"; *)
  200 +(* Dict.generate_interp_rules true true true morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/interp_rules_odm_noun.tab";
  201 +Dict.generate_interp_rules true true false morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/interp_rules_odm_noun2.tab";*)
  202 + ()
  203 +
  204 +
185 205 (**********************************************************************************)
186 206  
187 207 module OrderedStringList = struct
... ... @@ -387,21 +407,6 @@ let _ =
387 407  
388 408 (**********************************************************************************)
389 409 (*
390   -let remove_com_sup dict =
391   - List.rev (Xlist.fold dict [] (fun l (orth,lemma,interp) ->
392   - if Xstring.check_sufix ":com" interp || Xstring.check_sufix ":sup" interp then l else (orth,lemma,interp) :: l))
393   -
394   -let generate_adj_pos_rules rules_filename dict =
395   - let dict = map_of_tab dict in
396   - let dict = find_kolwiek_suffixes dict in
397   - let dict = remove_exceptional_lemmata dict in
398   - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry "adj") in
399   - File.file_out rules_filename (fun file ->
400   - StringMap.iter rules (fun interp rules2 ->
401   - fprintf file "\n@RULES %s\n" interp;
402   - StringMap.iter rules2 (fun rule (q,l) ->
403   - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
404   -
405 410 let split_into_groups interp_sel dict =
406 411 Xlist.fold dict StringMap.empty (fun dict (lemma,orth,interp) ->
407 412 let group =
... ... @@ -420,118 +425,6 @@ let load_interp_sel filename =
420 425 [group;interp;label] -> StringMap.add interp_sel interp group
421 426 | line -> failwith ("load_interp_sel: " ^ (String.concat "\t" line)))
422 427  
423   -let generate_adj_rules path filename adj_pos_rules_filename =
424   - let interp_sel = load_interp_sel "data/interps.tab" in
425   - let dict = load_tab (path ^ filename) in
426   - let dict = split_into_groups interp_sel dict in
427   - if StringMap.size dict <> 3 then failwith ("generate_adj_rules: " ^
428   - String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
429   - generate_adj_pos_rules adj_pos_rules_filename (StringMap.find dict "adj");
430   - ()
431   -
432   -let generate_adj_com_rules path filename adj_com_rules_filename =
433   - let dict = load_tab (path ^ filename) in
434   - let dict = map_of_tab dict in
435   - let dict = find_kolwiek_suffixes dict in
436   -(* let dict = remove_exceptional_lemmata dict in *)
437   - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_com_entry "adj") in
438   - File.file_out adj_com_rules_filename (fun file ->
439   - StringMap.iter rules (fun interp rules2 ->
440   - fprintf file "\n@RULES %s\n" interp;
441   - StringMap.iter rules2 (fun rule (q,l) ->
442   - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
443   -
444   -let generate_adv_pos_rules rules_filename dict =
445   - let dict = map_of_tab dict in
446   - let dict = find_kolwiek_suffixes dict in
447   - let dict = remove_exceptional_lemmata dict in
448   - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry "adv") in
449   - File.file_out rules_filename (fun file ->
450   - StringMap.iter rules (fun interp rules2 ->
451   - fprintf file "\n@RULES %s\n" interp;
452   - StringMap.iter rules2 (fun rule (q,l) ->
453   - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
454   -
455   -let generate_adv_rules path filename adv_pos_rules_filename =
456   - let interp_sel = load_interp_sel "data/interps.tab" in
457   - let dict = load_tab (path ^ filename) in
458   - let dict = split_into_groups interp_sel dict in
459   - if StringMap.size dict <> 3 then failwith ("generate_adv_rules: " ^
460   - String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
461   - generate_adv_pos_rules adv_pos_rules_filename (StringMap.find dict "adv");
462   - ()
463   -
464   -let generate_adv_com_rules path filename adv_com_rules_filename =
465   - let dict = load_tab (path ^ filename) in
466   - let dict = map_of_tab dict in
467   - let dict = find_kolwiek_suffixes dict in
468   - let dict = remove_exceptional_lemmata dict in
469   - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_com_entry "adv") in
470   - File.file_out adv_com_rules_filename (fun file ->
471   - StringMap.iter rules (fun interp rules2 ->
472   - fprintf file "\n@RULES %s\n" interp;
473   - StringMap.iter rules2 (fun rule (q,l) ->
474   - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
475   -
476   -
477   -let generate_noun_rules2 rules_filename dict =
478   - let dict = map_of_tab dict in
479   - let dict = find_kolwiek_suffixes dict in
480   - let dict = remove_exceptional_lemmata dict in
481   - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry "noun") in
482   -(* let rules = StringMap.fold dict StringMap.empty (fun rules lemma l ->
483   - let interps = Xlist.fold l StringMap.empty (fun map (orth,interp) ->
484   - StringMap.add_inc map interp [orth] (fun l -> orth :: l)) in
485   - let stem(*,_*) = generate_stem (*0*) interps lemma noun_stem_sel in
486   - let cl = classify_noun lemma stem interps noun_classes in
487   - if cl <> "A" && cl <> "II" (*&& cl <> "ฤ˜" && cl <> "ฤ„"*) then rules else
488   - if has_known_inflexion_noun stem interps then rules else
489   - let interps = select_inflexion rules_a stem interps in
490   - let stem2 = cut_stem_sufix stem in
491   - StringMap.fold interps rules (fun rules interp orths ->
492   - Xlist.fold orths rules (fun rules orth ->
493   - let n = find_common_prefix_length [stem2;orth] in
494   - let rules2 = try StringMap.find rules interp with Not_found -> StringMap.empty in
495   - let a = cut_prefixn n orth in
496   - let b = cut_prefixn n stem in
497   - let c,f = rule_code (a,b) in
498   -(* let rule = sprintf "%s\t%s\t%s\t%s" cl c a b in *)
499   - let rule = cl ^ "\t" ^ if f then "\t" ^ c else sprintf "%s\t%s\t%s" c a b in
500   - let rules2 = StringMap.add_inc rules2 rule (1,[lemma]) (fun (q,l) -> q+1, if q < 20 then lemma :: l else l) in
501   - StringMap.add rules interp rules2))) in*)
502   - File.file_out rules_filename (fun file ->
503   - StringMap.iter rules (fun interp rules2 ->
504   - fprintf file "\n@RULES %s\n" interp;
505   - StringMap.iter rules2 (fun rule (q,l) ->
506   - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
507   -
508   -let generate_noun_rules path filename noun_rules_filename =
509   - let interp_sel = load_interp_sel "data/interps.tab" in
510   - let dict = load_tab (path ^ filename) in
511   - let dict = split_into_groups interp_sel dict in
512   - if StringMap.size dict <> 1 then failwith ("generate_noun_rules: " ^
513   - String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
514   - generate_noun_rules2 noun_rules_filename (StringMap.find dict "noun");
515   - ()
516   -
517   -let generate_verb_rules2 rules_filename dict =
518   - let dict = map_of_tab dict in
519   - let dict = remove_exceptional_lemmata dict in
520   - let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry "verb") in
521   - File.file_out rules_filename (fun file ->
522   - StringMap.iter rules (fun interp rules2 ->
523   - fprintf file "\n@RULES %s\n" interp;
524   - StringMap.iter rules2 (fun rule (q,l) ->
525   - fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
526   -
527   -let generate_verb_rules path filename rules_filename =
528   - let interp_sel = load_interp_sel "data/interps.tab" in
529   - let dict = load_tab (path ^ filename) in
530   - let dict = split_into_groups interp_sel dict in
531   - if StringMap.size dict <> 2 then failwith ("generate_verb_rules: " ^
532   - String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
533   - generate_verb_rules2 rules_filename (StringMap.find dict "verb");
534   - ()
535 428  
536 429 let find_tags_lemata_noun path filename =
537 430 let dict = load_tab (path ^ filename) in
... ... @@ -559,17 +452,7 @@ let find_tags_lemata_verb path filename =
559 452 if StringMap.size dict <> 2 then failwith ("generate_verb_rules: " ^
560 453 String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
561 454 find_tags_lemata_verb2 (StringMap.find dict "verb")
562   -
563   -let _ =
564   -(* generate_adj_rules morfeusz_path ("odm_adj_" ^ sgjp_filename) "rules/ADJ-FLEX6.dic"; *)
565   -(* generate_noun_rules morfeusz_path ("odm_noun_" ^ sgjp_filename) "rules/NOUN-FLEX6.dic"; *)
566   -(* generate_adj_com_rules morfeusz_path ("odm_adj_" ^ sgjp_filename) "rules/ADJ-FLEX-COM6.dic"; *)
567   -(* generate_adv_rules morfeusz_path ("adv_" ^ sgjp_filename) "rules/ADV-FLEX6.dic"; *)
568   -(* generate_adv_com_rules morfeusz_path ("adv_" ^ sgjp_filename) "rules/ADV-FLEX-COM6.dic"; *)
569   -(* generate_verb_rules morfeusz_path ("verb_" ^ sgjp_filename) "rules/VERB-FLEX6.dic"; *)
570   -(* generate_verb_rules "data/" "verbs_ex.tab" "rules/VERB-FLEX6.dic"; *)
571   - ()
572   -
  455 +*)
573 456 let _ =
574 457 (* find_tags_lemata_verb "data/" "verbs_ex.tab"; *)
575 458 (* find_tags_lemata_noun "data/" "nouns_ex.tab"; *)
... ... @@ -579,7 +462,7 @@ let _ =
579 462 (* Rules.print "results/rules/"; *)
580 463 (* Rules.print_compound_rules "results/compounds.dic" (Rules.create_rules ()); *)
581 464 ()
582   -
  465 +(*
583 466 let interp_rule_string tags interp =
584 467 let tags = Xlist.sort tags compare_tag in
585 468 String.concat " " (Xlist.map tags (fun (k,v) -> k ^ "=" ^ v)) ^ "\t" ^ interp
... ... @@ -613,7 +496,7 @@ let print_interp_rules con_flag group_flag lemma_flag path filename out_filename
613 496 File.file_out out_filename (fun file ->
614 497 StringMap.iter interp_rules (fun k (q,l) ->
615 498 fprintf file "\t%s\t# %d %s\n" k q (String.concat " " l)))
616   -
  499 +*)
617 500 let _ =
618 501 (* print_interp_rules true true true morfeusz_path ("odm_adj_" ^ sgjp_filename) "results/interp_rules_odm_adj.tab"; *)
619 502 (* print_interp_rules false true true morfeusz_path ("odm_adj_" ^ sgjp_filename) "results/interp_rules_odm_adj2.tab"; *)
... ... @@ -623,4 +506,3 @@ let _ =
623 506 (* print_interp_rules true true true morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/interp_rules_odm_noun.tab";
624 507 print_interp_rules true true false morfeusz_path ("odm_noun_" ^ sgjp_filename) "results/interp_rules_odm_noun2.tab"; *)
625 508 ()
626   -*)
... ...
guesser/ruleGenerator.ml
... ... @@ -211,21 +211,23 @@ let entry_classes =
211 211 "subst:sg:nom:n2","um","UM";
212 212 ]
213 213  
214   -let generate_rules_entry rules entry =
  214 +let generate_rules_entry entry =
215 215 let stem_pref = Stem.cut_stem_sufix entry.stem in
216 216 let cl = classify_entry entry entry_classes in
217   - Xlist.fold entry.forms rules (fun rules form ->
218   - let rule = cl ^ "\t" ^ generate_rule entry.stem stem_pref form.orth in
219   - let rules2 = try StringMap.find rules form.interp with Not_found -> StringMap.empty in
220   - let rules2 = StringMap.add_inc rules2 rule (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l) in
221   - StringMap.add rules form.interp rules2)
  217 + Xlist.map entry.forms (fun form ->
  218 + form.interp,cl ^ "\t" ^ generate_rule entry.stem stem_pref form.orth)
222 219  
223   -let generate_rules_com_entry rules entry =
224   - let stem_pref = Stem.cut_stem_sufix entry.stem in
225   -(* let cl = classify_entry entry entry_classes in *)
226   - Xlist.fold entry.forms rules (fun rules form ->
227   - if not (Xstring.check_sufix ":com" form.interp) then rules else
228   - let rule = "\t" ^ generate_rule entry.stem stem_pref form.orth in
229   - let rules2 = try StringMap.find rules form.interp with Not_found -> StringMap.empty in
230   - let rules2 = StringMap.add_inc rules2 rule (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l) in
231   - StringMap.add rules form.interp rules2)
  220 +let generate_interp_rules con_flag group_flag lemma_flag simple_lemma form =
  221 + let rules = Rules.compound_rule_trees in
  222 + let candidates = Rules.CharTrees.find rules form.orth in
  223 + (* printf "S %d\n" (Xlist.size forms); *)
  224 + let candidates = Xlist.fold candidates [] (fun candidates (stem,rule) ->
  225 + (* printf "R %s\t%s\n" stem (Rules.string_of_rule rule); *)
  226 + if stem ^ rule.Rules.set = simple_lemma then rule :: candidates else candidates) in
  227 + Xlist.rev_map candidates (fun rule ->
  228 + let tags = rule.Rules.tags in
  229 + let tags = if con_flag then snd (Rules.extract_tag "con" [] tags) else tags in
  230 + let tags = if group_flag then snd (Rules.extract_tag "group" [] tags) else tags in
  231 + let tags = if lemma_flag then snd (Rules.extract_tag "lemma" [] tags) else tags in
  232 + let tags = Xlist.sort tags Rules.compare_tag in
  233 + String.concat " " (Xlist.map tags (fun (k,v) -> k ^ "=" ^ v)) ^ "\t" ^ form.interp)
... ...
guesser/rules.ml
... ... @@ -282,11 +282,7 @@ let interp_compound_rule_trees = CharTrees.create interp_compound_rules
282 282 (**********************************************************************************************)
283 283 (**********************************************************************************************)
284 284  
285   -(* let rec select_tag tag rev = function
286   - [] -> "", rev
287   - | (k,v) :: l -> if k = tag then v, rev @ l else select_tag tag ((k,v) :: rev) l
288   -
289   -let is_applicable_rule rule s = Xstring.check_sufix rule.find s
  285 +(* let is_applicable_rule rule s = Xstring.check_sufix rule.find s
290 286  
291 287 let apply_rule rule s =
292 288 (Xstring.cut_sufix rule.find s) ^ rule.set
... ...