Commit 9f53f85cc3623461c1cbd40e331f2bd4745cb911

Authored by Daniel Oklesiński
1 parent 56fafa1e

dodana statystyka znaków interpunkcyjnych w krzakach.conll, rozszerzenie info_se…

…ntences (do nowego pliku) info_sentences2.txt, zdebugowanie fukncji replace_tokens w tree_change.ml

Too many changes to show.

To preserve performance only 3 of 9 files are displayed.

corpora/CONLL.ml
... ... @@ -173,7 +173,7 @@ let rec establish_lengths text paths tokens =
173 173  
174 174 exception ErrorInfoFile of string
175 175  
176   -let info_file = "../corpora/info_sentences.txt"
  176 +let info_file = "../corpora/info_sentences2.txt"
177 177  
178 178 let info = Xstring.split "\n\n" @@ File.load_file_gen info_file
179 179  
... ... @@ -183,7 +183,7 @@ let add_to_map map info_str =
183 183 | _ -> raise (ErrorInfoFile info_str)
184 184  
185 185 let info_map =
186   - Xlist.fold info StringMap.empty add_to_map
  186 + Xlist.fold (List.tl info) StringMap.empty add_to_map
187 187  
188 188 let match_sentence (p_record,tokens) =
189 189 let rec info_token s = match s with
... ...
corpora/conllParser.ml
1 1 open Xstd
2 2 open Types
3 3  
4   -let skladnica_zaleznosciowa_filename = "resources/skladnica_zaleznosciowa.conll"
  4 +let skladnica_zaleznosciowa_filename = "../../NLP resources/skladnica_zaleznosciowa.conll"
5 5  
6 6 let oc = open_out "../corpora/info_sentences.txt"
7 7  
... ... @@ -79,7 +79,7 @@ let any_difference string1 string2 = if string1 = string2
79 79 let find_info tokens =
80 80 let text_generated = get_sentence tokens in
81 81 try
82   - let sentence = Resources.InfoMap.find (Xlist.map tokens (fun token -> token.c_orth)) Resources.conll_info in
  82 + let sentence = Resources.InfoMap.find (Xlist.map tokens (fun token -> token.c_orth)) (Resources.conll_info ()) in
83 83 let id, text = sentence.s_id, sentence.s_text in
84 84 (*if any_difference text text_generated && text <> "not_found"
85 85 then print_endline (text ^ "\n" ^ text_generated ^ "\n\n");*)
... ...
corpora/generate.ml
1   -(* Generowanie pliku info_sentences.txt *)
2   -let _ =
  1 +(* Generowanie pliku info_sentences.txt na podstawie skladnicy walencyjnej*)
  2 +let _ =
3 3 (*ConllParser.processSkladnica () *)
4 4 ()
  5 +
  6 +(* Generowanie pliku info_sentences2.txt na podstawie krzaków *)
  7 +let _ =
  8 + (* Resources.info_file () *)
  9 + ()
  10 +
  11 +(* Generowanie pliku ../../NLP resources/krzaki_interp_statistics.txt na podstawie krzaków *)
  12 +let _ =
  13 + InterpsInCorpus.print_diagnose ()
... ...