Commit 56c5b9f7eb6000373d9c88f02a23dc67cc052e55
1 parent
7f69c1b8
Wstępna wersja reguł dla ortografii angielskiej
Showing
6 changed files
with
1094 additions
and
890 deletions
morphology/data/akronimy.tab deleted
1 | -XBW | |
2 | -DJ | |
3 | -VIP | |
4 | -AIDS | |
5 | -CMYK | |
6 | -DOS | |
7 | -HIV | |
8 | -HTML | |
9 | -SGML | |
10 | -SMS | |
11 | -STAR | |
12 | -TIR | |
13 | -UNIX | |
14 | -CMYK | |
15 | -HTML | |
16 | -SGML | |
17 | -sms | |
18 | -ADP | |
19 | -AIDS | |
20 | -ANSI | |
21 | -API | |
22 | -APS | |
23 | -ASCII | |
24 | -ATP | |
25 | -AWF | |
26 | -BBK | |
27 | -BC | |
28 | -BDK | |
29 | -BFG | |
30 | -BG | |
31 | -BGK | |
32 | -BH | |
33 | -BHK | |
34 | -BISE | |
35 | -BMW | |
36 | -BP:s1 | |
37 | -BPH | |
38 | -DNA | |
39 | -NSA | |
40 | -RNA | |
41 | -RSHA | |
42 | -TWA | |
43 | -AL | |
44 | -AMERBANK | |
45 | -BIG | |
46 | -BOŚ | |
47 | -BŚ | |
48 | -ChRL | |
49 | -DHL | |
50 | -EUROPOL | |
51 | -FATAH | |
52 | -GL | |
53 | -GUC | |
54 | -HDL | |
55 | -HTML | |
56 | -IBL | |
57 | -KAI | |
58 | -KAL | |
59 | -KUL | |
60 | -LDL | |
61 | -LOK | |
62 | -MAL | |
63 | -BERD | |
64 | -CAD | |
65 | -NEC | |
66 | -OAPEC | |
67 | -OPEC | |
68 | -SAAB | |
69 | -SIMM | |
70 | -AAP | |
71 | -ADM | |
72 | -ADN | |
73 | -AF | |
74 | -AFP | |
75 | -AGH | |
76 | -AI:s1 | |
77 | -AI:s2 | |
78 | -AIF | |
79 | -AK | |
80 | -AL | |
81 | -AM | |
82 | -AON | |
83 | -AP:s1 | |
84 | -AP:s2 | |
85 | -AR | |
86 | -ARiMR | |
87 | -ASP | |
88 | -ATK | |
89 | -AWS | |
90 | -ABS | |
91 | -ADM | |
92 | -ADN | |
93 | -AF | |
94 | -AIDS | |
95 | -AIF | |
96 | -AL | |
97 | -AM | |
98 | -AMERBANK | |
99 | -AON | |
100 | -APS | |
101 | -AR | |
102 | -ARiMR | |
103 | -ASEAN | |
104 | -ASR | |
105 | -AWF | |
106 | -AWS | |
107 | -AZS | |
108 | -BAAS | |
109 | -BASF | |
110 | -ANSA | |
111 | -CEFTA | |
112 | -CIA | |
113 | -DPA | |
114 | -EFTA | |
115 | -ELTA | |
116 | -ENEA | |
117 | -EPA | |
118 | -ETA | |
119 | -INA | |
120 | -IRA | |
121 | -JANA | |
122 | -KNA | |
123 | -NAFTA | |
124 | -NASA | |
125 | -NRA | |
126 | -OPA | |
127 | -PA | |
128 | -PIA | |
129 | -RPA | |
130 | -ABB | |
131 | -ABC | |
132 | -AC | |
133 | -ADP | |
134 | -AFP | |
135 | -AGH | |
136 | -AI:s1 | |
137 | -AIDS | |
138 | -AK | |
139 | -AL | |
140 | -ANSI | |
141 | -AP:s1 | |
142 | -ATK | |
143 | -ATP | |
144 | -ATT | |
145 | -BBC | |
146 | -BBK | |
147 | -BCG | |
148 | -BFG | |
149 | -BG | |
150 | -AGD | |
151 | -LP:s1 | |
152 | -LZS | |
153 | -MTP | |
154 | -MŚ:s2 | |
155 | -OHP | |
156 | -PKP | |
157 | -UN | |
158 | -WSiP | |
159 | -WZiZT | |
160 | -USA | |
161 | -ZEA | |
162 | -CIT | |
163 | -DAT | |
164 | -FAT | |
165 | -LOT | |
166 | -NOT | |
167 | -OIT | |
168 | -PAGART | |
169 | -PIT | |
170 | -VAT | |
171 | -WAT | |
172 | -ZHR | |
173 | -tv | |
174 | -PIT:s1 | |
175 | -ABBA | |
176 | -APA | |
177 | -FAMA | |
178 | -LETTA | |
179 | -MENA | |
180 | -SABENA | |
181 | -UEFA | |
182 | -MKOl | |
183 | -MOK | |
184 | -MPiK | |
185 | -MŚ:s1 | |
186 | -NASK | |
187 | -NHL | |
188 | -NIK | |
189 | -NKOl | |
190 | -PAH | |
191 | -PAL | |
192 | -PESEL | |
193 | -PIH | |
194 | -PKOl | |
195 | -PLL | |
196 | -PŁ | |
197 | -URL | |
198 | -UŚ | |
199 | -UŚl | |
200 | -WIG | |
201 | -BBWR | |
202 | -BGŻ | |
203 | -BIOS | |
204 | -BN | |
205 | -BOR | |
206 | -BPS | |
207 | -BR | |
208 | -BRBM | |
209 | -BRR | |
210 | -BS | |
211 | -BSR | |
212 | -BZ | |
213 | -CAF | |
214 | -CAM | |
215 | -CD-ROM | |
216 | -COCOM | |
217 | -CRZZ | |
218 | -CWKS | |
219 | -DAB | |
220 | -DLS | |
221 | -CAM | |
222 | -CRZZ | |
223 | -ICJ | |
224 | -IPN | |
225 | -ISBN | |
226 | -ISDN | |
227 | -ISSN | |
228 | -KERM | |
229 | -MSZ | |
230 | -NIK | |
231 | -NIP | |
232 | -ONZ | |
233 | -PKS | |
234 | -PSS | |
235 | -RN:s1 | |
236 | -SARS | |
237 | -UJ | |
238 | -UOP | |
239 | -VHF | |
240 | -VHS | |
241 | -WAP | |
242 | -WiP | |
243 | -DM | |
244 | -DS | |
245 | -DVD-ROM | |
246 | -EBOR | |
247 | -EBOiR | |
248 | -EPROM | |
249 | -ERM | |
250 | -ESOP | |
251 | -FADOM | |
252 | -FAS | |
253 | -FOZZ | |
254 | -FPŻ | |
255 | -FSM | |
256 | -GKS | |
257 | -GM | |
258 | -GOPR | |
259 | -GPRS | |
260 | -GPS | |
261 | -GS | |
262 | -GUS | |
263 | -IFOR | |
264 | -IMiD | |
265 | -KBN | |
266 | -KLM | |
267 | -KM:s1 | |
268 | -KOR | |
269 | -KPN | |
270 | -KSERM | |
271 | -LAN | |
272 | -MBOiR | |
273 | -MDM | |
274 | -MEN | |
275 | -MF | |
276 | -MGM:s1 | |
277 | -MKS | |
278 | -MKiDN | |
279 | -MON | |
280 | -MOSiR | |
281 | -MS-DOS | |
282 | -MTS | |
283 | -NEP | |
284 | -NSZ | |
285 | -OBOP | |
286 | -OCR | |
287 | -ONR | |
288 | -ORWN | |
289 | -OZN | |
290 | -PANAM | |
291 | -PBKS | |
292 | -PBR | |
293 | -PFRON | |
294 | -PGR | |
295 | -PIN | |
296 | -PIP:s1 | |
297 | -PIW | |
298 | -PKWN | |
299 | -PKiN | |
300 | -PPS | |
301 | -PR:s1 | |
302 | -PRON | |
303 | -PRS | |
304 | -PUR | |
305 | -PWN | |
306 | -PZPN | |
307 | -PŻM | |
308 | -RAF | |
309 | -RAM | |
310 | -REGON | |
311 | -RFN | |
312 | -ROM | |
313 | -ROP | |
314 | -ROR | |
315 | -RPN | |
316 | -RUM | |
317 | -RUP | |
318 | -RdR | |
319 | -SAS | |
320 | -SCMS | |
321 | -SECAM | |
322 | -SIM | |
323 | -SKS | |
324 | -SPF | |
325 | -SPN | |
326 | -SUDANAIR | |
327 | -TPN | |
328 | -TS | |
329 | -UAM | |
330 | -UKF | |
331 | -UM | |
332 | -UMCS | |
333 | -UMTS | |
334 | -UNICEF | |
335 | -UNZ | |
336 | -UPS | |
337 | -URM | |
338 | -URz | |
339 | -US | |
340 | -UWM | |
341 | -UWr | |
342 | -UZ | |
343 | -VCR | |
344 | -WAN | |
345 | -WBZ | |
346 | -ZZ | |
347 | -ckm | |
348 | -lkm | |
349 | -scs | |
350 | -wf | |
351 | -ŁBR | |
352 | -ŁKS | |
353 | -ŚPN | |
354 | -WEP | |
355 | -WF | |
356 | -WKR | |
357 | -WOP | |
358 | -WOPR | |
359 | -WSM | |
360 | -WiN | |
361 | -ZAIKS | |
362 | -ZASP | |
363 | -ZBOWiD | |
364 | -ZBoWiD | |
365 | -ZMS | |
366 | -ZOM | |
367 | -ZOSP | |
368 | -ZOZ | |
369 | -ZS | |
370 | -ZUS | |
371 | -ZWM | |
372 | -ZWZ | |
373 | -BWZ | |
374 | -HDZ | |
375 | -MWGzZ | |
376 | -MZ | |
377 | -OPZZ | |
378 | -ZSZ | |
379 | -MB | |
380 | -TAB | |
381 | -ŻOB | |
382 | -NRF | |
383 | -PKF | |
384 | -RMF | |
385 | -SPATiF | |
386 | -TKKF | |
387 | -IBM | |
388 | -KM:s2 | |
389 | -MGM:s2 | |
390 | -PAM | |
391 | -PGM | |
392 | -RM | |
393 | -SM:s2 | |
394 | -ŚAM | |
395 | -CNN | |
396 | -CPN | |
397 | -FN | |
398 | -KEN | |
399 | -KRN | |
400 | -LN | |
401 | -MN | |
402 | -PAN | |
403 | -PN | |
404 | -REN | |
405 | -RN:s2 | |
406 | -WRN:s1 | |
407 | -WRON | |
408 | -ZChN | |
409 | -KEP | |
410 | -KSAP | |
411 | -LOP | |
412 | -MEP | |
413 | -MOP | |
414 | -NAP | |
415 | -PAP | |
416 | -PIP:s2 | |
417 | -WOŚP | |
418 | -BOS | |
419 | -CBOS | |
420 | -CBS | |
421 | -ITAR-TASS | |
422 | -KRS | |
423 | -KRUS | |
424 | -MPiPS | |
425 | -MS | |
426 | -NZS | |
427 | -PBS | |
428 | -PDS | |
429 | -PS | |
430 | -PiS | |
431 | -RAS | |
432 | -SGPiS | |
433 | -BAV | |
434 | -BUW | |
435 | -TOZ | |
436 | -BJ | |
437 | -PTJ | |
438 | -TKJ | |
439 | -PAI | |
440 | -RAI | |
441 | -SKOK | |
442 | -SOK | |
443 | -PPL | |
444 | -PRL | |
445 | -PSL | |
446 | -SDKPiL | |
447 | -ZSL | |
448 | -PŚ | |
449 | -KPZR | |
450 | -LPR | |
451 | -MRR | |
452 | -PPR | |
453 | -PZPR | |
454 | -TOPR | |
455 | -TPPR | |
456 | -TUR | |
457 | -UPR | |
458 | -GATT | |
459 | -KRRiT | |
460 | -PAT | |
461 | -PIT:s2 | |
462 | -FIFA | |
463 | -S.A. | |
464 | -AA:s2 | |
465 | -BA | |
466 | -BSA | |
467 | -FIFA | |
468 | -MSWiA | |
469 | -NBA | |
470 | -NWZA | |
471 | -RCA | |
472 | -SAA | |
473 | -SA | |
474 | -UPA | |
475 | -WZA | |
476 | -YMCA | |
477 | -YWCA | |
478 | -KGB | |
479 | -MB | |
480 | -OB | |
481 | -PB | |
482 | -PKB | |
483 | -PNB | |
484 | -SB | |
485 | -TAB | |
486 | -UB | |
487 | -USB | |
488 | -UwB | |
489 | -CFC | |
490 | -C | |
491 | -FC | |
492 | -GMC | |
493 | -KC | |
494 | -LC | |
495 | -NBC | |
496 | -OC | |
497 | -PC | |
498 | -PVC | |
499 | -UC | |
500 | -USC | |
501 | -WC | |
502 | -WTC | |
503 | -FDD | |
504 | -HDD | |
505 | -KLD | |
506 | -KRLD | |
507 | -LCD | |
508 | -LSD | |
509 | -ND | |
510 | -NKWD | |
511 | -NPD | |
512 | -NRD | |
513 | -OECD | |
514 | -OPD | |
515 | -PKD | |
516 | -SChD | |
517 | -SD | |
518 | -SLD | |
519 | -SPD:s1 | |
520 | -SPD:s2 | |
521 | -TPD | |
522 | -WKD | |
523 | -BRE | |
524 | -BSE | |
525 | -BWE | |
526 | -CINTE | |
527 | -IDE | |
528 | -KBWE | |
529 | -LE | |
530 | -OFE | |
531 | -PHARE | |
532 | -PTE | |
533 | -PZE | |
534 | -RE | |
535 | -RWE | |
536 | -UE | |
537 | -dBASE | |
538 | -RMF | |
539 | -SF | |
540 | -SPATiF | |
541 | -TKKF | |
542 | -DBG | |
543 | -EEG | |
544 | -EKG | |
545 | -EMG | |
546 | -EWG | |
547 | -KG | |
548 | -MG | |
549 | -PBG | |
550 | -PGNiG | |
551 | -PG | |
552 | -PZG | |
553 | -RWPG | |
554 | -SG | |
555 | -UG | |
556 | -USG | |
557 | -LH | |
558 | -OSH | |
559 | -PBH | |
560 | -PH | |
561 | -PZH | |
562 | -SGH | |
563 | -ZGH | |
564 | -pH | |
565 | -FBI | |
566 | -ITI | |
567 | -NFI | |
568 | -RAI | |
569 | -SI | |
570 | -TPI | |
571 | -UPI | |
572 | -WSI | |
573 | -PTJ | |
574 | -TKJ | |
575 | -BSK | |
576 | -MPK | |
577 | -MZK | |
578 | -PBK | |
579 | -PCK | |
580 | -PK | |
581 | -PTTK | |
582 | -SDK | |
583 | -UMK | |
584 | -WBK | |
585 | -WSK | |
586 | -ZK | |
587 | -PPL | |
588 | -PSL | |
589 | -ZSL | |
590 | -IBM | |
591 | -PGM | |
592 | -FN | |
593 | -MN | |
594 | -WRN:s1 | |
595 | -ZChN | |
596 | -MPO | |
597 | -NATO | |
598 | -ORMO | |
599 | -PKO | |
600 | -PLO | |
601 | -PO | |
602 | -PeKaO | |
603 | -RGO | |
604 | -ROPCiO | |
605 | -SEATO | |
606 | -SLO | |
607 | -SO | |
608 | -UFO | |
609 | -UNESCO | |
610 | -UNO | |
611 | -UO | |
612 | -WHO | |
613 | -WMO | |
614 | -WTO | |
615 | -ZOMO | |
616 | -IQ | |
617 | -Q | |
618 | -PPP | |
619 | -SDP | |
620 | -SMJP | |
621 | -SPP:s1 | |
622 | -SP | |
623 | -SdRP | |
624 | -TP:s1 | |
625 | -TP:s2 | |
626 | -TWP | |
627 | -TZSP | |
628 | -UP | |
629 | -WNP | |
630 | -WP | |
631 | -WSP | |
632 | -ZHP | |
633 | -ZLP | |
634 | -ZMP | |
635 | -ZNP | |
636 | -ZSMP | |
637 | -ZSP | |
638 | -MRR | |
639 | -PCR | |
640 | -PR:s2 | |
641 | -SGGW-AR | |
642 | -TOPR | |
643 | -TPPR | |
644 | -TUR | |
645 | -ZSRR | |
646 | -ZSSR | |
647 | -BOS | |
648 | -CBOS | |
649 | -CBS | |
650 | -CGS | |
651 | -IS | |
652 | -MPiPS | |
653 | -MS | |
654 | -NZS | |
655 | -PiS | |
656 | -SOS | |
657 | -SS:s1 | |
658 | -DDT | |
659 | -GATT | |
660 | -MT | |
661 | -TNT | |
662 | -UHT | |
663 | -UPT | |
664 | -CDU | |
665 | -CKU | |
666 | -CPU | |
667 | -CSU | |
668 | -EBU | |
669 | -ECU | |
670 | -GPU | |
671 | -PAU | |
672 | -PZU | |
673 | -WKU | |
674 | -CV | |
675 | -PCV | |
676 | -PRiTV | |
677 | -TGV | |
678 | -KW:s3 | |
679 | -MFW | |
680 | -MPW | |
681 | -MRiRW | |
682 | -MSW | |
683 | -NW | |
684 | -PCW | |
685 | -POW | |
686 | -PW | |
687 | -RSW | |
688 | -SGGW | |
689 | -TW | |
690 | -UKSW | |
691 | -UW:s1 | |
692 | -UW:s2 | |
693 | -VW | |
694 | -WFSW | |
695 | -WSW | |
696 | -ŻW:s1 | |
697 | -ŻZW | |
698 | -BWZ | |
699 | -LZ | |
700 | -MWGzZ | |
701 | -MZ | |
702 | -OPZZ | |
703 | -TOZ | |
704 | -UŁ | |
705 | -CPLiA | |
706 | -SAPA | |
707 | -UNRRA | |
708 | -VGA | |
709 | -ChD | |
710 | -FIDE | |
711 | -KE | |
712 | -FK | |
713 | -PTK | |
714 | -NCL | |
715 | -SM:s1 | |
716 | -TVN | |
717 | -FAO | |
718 | -FSO | |
719 | -ISO | |
720 | -KKO:s2 | |
721 | -KRO | |
722 | -MO | |
723 | -BP:s5 | |
724 | -BSP:s2 | |
725 | -DOKP | |
726 | -FDP:s1 | |
727 | -KPP | |
728 | -KWP | |
729 | -NSDAP | |
730 | -OSP | |
731 | -OWP:s2 | |
732 | -PKZP | |
733 | -PPPP | |
734 | -RP | |
735 | -TVP | |
736 | -SS:s2 | |
737 | -WFTU | |
738 | -CTV | |
739 | -MTV | |
740 | -TV | |
741 | -KW:s1 | |
742 | -MW | |
743 | -WWW | |
744 | -ŻW:s2 | |
745 | -NZ | |
746 | -W-Z | |
747 | -HB | |
748 | -ZPB | |
749 | -CD | |
750 | -PKLD | |
751 | -GBH | |
752 | -EBI | |
753 | -PCI | |
754 | -CK | |
755 | -GBGK | |
756 | -MCK | |
757 | -MDK | |
758 | -TK | |
759 | -JAL | |
760 | -NCL | |
761 | -QAL | |
762 | -GSM | |
763 | -KGHM | |
764 | -REM | |
765 | -ON | |
766 | -PEN | |
767 | -PKN | |
768 | -TVN | |
769 | -WRN:s2 | |
770 | -GIODO | |
771 | -KKO:s1 | |
772 | -RPO | |
773 | -BSP:s1 | |
774 | -CKŻP | |
775 | -DBP | |
776 | -EP | |
777 | -HP | |
778 | -HSP | |
779 | -HTTP | |
780 | -IP | |
781 | -KP:s1 | |
782 | -KP:s2 | |
783 | -MP:s1 | |
784 | -NBP | |
785 | -ORP | |
786 | -OWP:s1 | |
787 | -SPP:s2 | |
788 | -UTP | |
789 | -LR | |
790 | -NMT | |
791 | -PDT | |
792 | -RPU | |
793 | -DTV | |
794 | -UV | |
795 | -BW | |
796 | -GBW | |
797 | -KBW:s3 | |
798 | -SW | |
799 | -UKW | |
800 | -NSZZ | |
801 | -NZ | |
802 | -CD | |
803 | -CZD | |
804 | -ChD | |
805 | -DVD | |
806 | -CO | |
807 | -FAO | |
808 | -FSO | |
809 | -HBO | |
810 | -ISO | |
811 | -KKO:s1 | |
812 | -KKO:s2 | |
813 | -KRO | |
814 | -LO | |
815 | -MO | |
816 | -DOKP | |
817 | -DTP | |
818 | -EP | |
819 | -FDP:s1 | |
820 | -FDP:s2 | |
821 | -FTP | |
822 | -HP | |
823 | -IP | |
824 | -KPP | |
825 | -KWP | |
826 | -LP:s3 | |
827 | -MP:s2 | |
828 | -MSP | |
829 | -NBP | |
830 | -NSDAP | |
831 | -OSP | |
832 | -OWP:s1 | |
833 | -OWP:s2 | |
834 | -PKZP | |
835 | -PPPP | |
836 | -BGW | |
837 | -BW | |
838 | -GBW | |
839 | -KBW:s1 | |
840 | -KBW:s3 | |
841 | -KW:s1 | |
842 | -BHP | |
843 | -CZMP | |
844 | -AA:s1 | |
845 | -BCh | |
846 | - | |
847 | - | |
848 | - | |
849 | - | |
850 | - | |
851 | - | |
852 | - | |
853 | - | |
854 | - | |
855 | - | |
856 | - | |
857 | - | |
858 | - | |
859 | - | |
860 | - | |
861 | - | |
862 | - | |
863 | - |
morphology/data/fonetics_en.dic
0 → 100644
1 | +@symbols | |
2 | +ω a ą e ę o ó u | |
3 | +δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε | |
4 | +γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε | |
5 | +ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε | |
6 | +μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε | |
7 | +λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε | |
8 | +κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε | |
9 | +σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε | |
10 | +ρ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε ε | |
11 | + | |
12 | +@rev_symbols | |
13 | +ω a ą e ę o ó u | |
14 | +δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε | |
15 | +λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε | |
16 | +κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε | |
17 | +ρ aε axε am′iε ąε eε ęε iε oε omε onε umε óvε uε ov′iε omε emε ε | |
18 | + | |
19 | +@rev_rules | |
20 | +#x ch ε | |
21 | +ks x | |
22 | +č tch | |
23 | +aj ay ρ | |
24 | +ej ey ρ | |
25 | +oj oy ρ | |
26 | +ł w | |
27 | +#v v | |
28 | +′ec iec ε | |
29 | +óv ów ε | |
30 | +ov′i owi ε | |
31 | +ov′e owie ε | |
32 | +am′i ami ε | |
33 | +ax ach ε | |
34 | +ře rze ε | |
35 | +t′e cie ε | |
36 | +n′e nie ε | |
37 | +d′e dzie ε | |
38 | +#ǯ ge ρ | |
39 | +#d de ρ | |
40 | +#s se ρ | |
41 | +#l le ρ | |
42 | +#s ce ρ | |
43 | +d de ε | |
44 | +d d ε | |
45 | +f ph ε | |
46 | +k ke ε | |
47 | +k k ε | |
48 | +k kes ε | |
49 | +k c | |
50 | +k ky ε | |
51 | +t te ε | |
52 | +t t ε | |
53 | +t th | |
54 | +t the ε | |
55 | +r re ε | |
56 | +r r ε | |
57 | +r rh ε | |
58 | +m me ε | |
59 | +m m ε | |
60 | +n ne ε | |
61 | +n n ε | |
62 | +s se ε | |
63 | +s s ε | |
64 | +s ce ε | |
65 | +s th ε | |
66 | +s the ε | |
67 | +os′t′ ość ε | |
68 | +ovy owy ε | |
69 | +k′i ki ε | |
70 | + | |
71 | +a a ε | |
72 | +a ah ε | |
73 | +ka kha ε | |
74 | +vja via ε | |
75 | +n′ja nia ε | |
76 | +li ly ε | |
77 | +g gh ε | |
78 | +g g ε | |
79 | + | |
80 | +ʲi ie ε | |
81 | +ʲi i ε | |
82 | +ʲi y ε | |
83 | + | |
84 | +@rules | |
85 | +#x ch ε | |
86 | +ks x | |
87 | +č tch | |
88 | +aj ay ρ | |
89 | +ej ey ρ | |
90 | +oj oy ρ | |
91 | +ł w | |
92 | +#v v | |
93 | +aʲi ai ε | |
94 | +oʲi oi ε | |
95 | +eʲi ei ε | |
96 | +uʲi ui ε | |
97 | +če cze ε | |
98 | +óv ów ε | |
99 | +ov′i owi ε | |
100 | +ov′e owie ε | |
101 | +am′i ami ε | |
102 | +ym′i ymi ε | |
103 | +ax ach ε | |
104 | +yx ych ε | |
105 | +ce ce ε | |
106 | +f′e fie ε | |
107 | +ře rze ε | |
108 | +t′e cie ε | |
109 | +kt′e kcie ε | |
110 | +s′t′e ście ε | |
111 | +m′e mie ε | |
112 | +n′e nie ε | |
113 | +d′e dzie ε | |
114 | +p′e pie ε | |
115 | +s′e sie ε | |
116 | +ks′e ksie ε | |
117 | +v′e vie ε | |
118 | +z′e zie ε | |
119 | + | |
120 | +g′i gi ε | |
121 | +k′ix kich ε | |
122 | +k′e kie ε | |
123 | +k′ej kiej ε | |
124 | +k′im kim ε | |
125 | +k′im′i kimi ε | |
126 | +k′i ki ε | |
127 | +k′i khi ε | |
128 | +ka ka ε | |
129 | +ką ką ε | |
130 | +ko ko ε | |
131 | +ku ku ε | |
132 | +lix lych ε | |
133 | +lim′i lymi ε | |
134 | +lim lym ε | |
135 | +řy rzy ε | |
136 | +m′i mi ε | |
137 | +#g′em ghiem ε | |
138 | +g′i ghi ε | |
139 | + | |
140 | +g′em giem ε | |
141 | +g g iem | |
142 | +k′em kiem ε | |
143 | +k k iem | |
144 | +d dh | |
145 | +g gh | |
146 | +g′ ghi em | |
147 | +l leigh | |
148 | +ʲi ie ch | |
149 | +ʲi ie mi | |
150 | +ʲi ie m | |
151 | +ʲi ee ch | |
152 | +ʲi ee mi | |
153 | +ʲego iego ε | |
154 | +#′ego iego ε | |
155 | +ʲemu iemu ε | |
156 | +ʲem iem ε | |
157 | +ʲim im ε | |
158 | +ʲix ich ε | |
159 | +ʲe ie ε | |
160 | +ʲim′i imi ε | |
161 | +ʲov′e iowie ε | |
162 | +x ch ε | |
163 | +m′i mi ε | |
164 | + | |
165 | +oł oe ’ | |
166 | +b by ’ | |
167 | +k′ ky ’ | |
168 | +r ry ’ | |
169 | +t thy ’ | |
170 | +d dy ’ | |
171 | +m my ’ | |
172 | +s cy ’ | |
173 | +l ly ’ | |
174 | +d dy ’ | |
175 | +s ce ’ | |
176 | +d de ’ | |
177 | +f fe ’ | |
178 | +ǯ ge ’ | |
179 | +k ke ’ | |
180 | +l le ’ | |
181 | +m me ’ | |
182 | +n ne ’ | |
183 | +r re ’ | |
184 | +s se ’ | |
185 | +t te ’ | |
186 | +t the ’ | |
187 | +v ve ’ | |
188 | +ł we ’ | |
189 | + ’ ρ | |
190 | +’ ’ owi | |
191 | + | |
192 | +s ce ε | |
193 | +d de ε | |
194 | +f fe ε | |
195 | +ǯ ge ε | |
196 | +k ke ε | |
197 | +l le ε | |
198 | +le le ε | |
199 | +m me ε | |
200 | +me me ε | |
201 | +n ne ε | |
202 | +r re ε | |
203 | +re re ε | |
204 | +s se ε | |
205 | +t te ε | |
206 | +v ve ε | |
207 | +ł we ε | |
208 | + | |
209 | +vja via ε | |
210 | +vją vią ε | |
211 | +vje vie ε | |
212 | +vję vię ε | |
213 | +vji vii ε | |
214 | +v′ij vij ε | |
215 | +vjom viom ε | |
216 | +vjo vio ε | |
217 | +vjax viach ε | |
218 | +vjam′i viami ε | |
219 | +n′ja nia ε | |
220 | +n′ją nią ε | |
221 | +n′je nie ε | |
222 | +n′ję nię ε | |
223 | +n′ji nii ε | |
224 | +n′ij nij ε | |
225 | +n′jom niom ε | |
226 | +n′jo nio ε | |
227 | +n′jax niach ε | |
228 | +n′jam′i niami ε | |
229 | + | |
230 | +os′t′ ość ε | |
231 | +os′t′ą ością ε | |
232 | +os′t′om ościom ε | |
233 | +os′t′am′i ościami ε | |
234 | +os′t′ax ościach ε | |
235 | +os′t′i ości ε | |
236 | +ovą ową ε | |
237 | +ovym′i owymi ε | |
238 | +ovym owym ε | |
239 | +ovyx owych ε | |
240 | +ovy owy ε | |
241 | +ovo owo ε | |
242 | +ov′i owi ε | |
243 | +ovemu owemu ε | |
244 | +ovej owej ε | |
245 | +ovego owego ε | |
246 | +ove owe ε | |
247 | +ova owa ε | |
248 | +oły owy ε | |
249 | +oła owa ε | |
250 | + | |
... | ... |
morphology/data/obce_langs.tab
... | ... | @@ -214,7 +214,6 @@ speedway en |
214 | 214 | runway en |
215 | 215 | replay en |
216 | 216 | permalloy en |
217 | -longplay en | |
218 | 217 | jersey:s1 en |
219 | 218 | fairway en |
220 | 219 | cockney:s1 en |
... | ... | @@ -227,21 +226,17 @@ joule fr |
227 | 226 | image fr |
228 | 227 | grunge en |
229 | 228 | grisaille fr |
230 | -freestyle en | |
231 | 229 | entourage fr |
232 | -ensemble fr | |
233 | 230 | decoupage fr |
234 | 231 | collége fr |
235 | 232 | college en |
236 | 233 | collage fr |
237 | -chippendale:s1 en | |
238 | 234 | cartridge en |
239 | 235 | assemblage fr |
240 | 236 | penthouse en |
241 | 237 | pedicure fr |
242 | 238 | offside en |
243 | 239 | mainframe en |
244 | -iphone en | |
245 | 240 | interface en |
246 | 241 | house en |
247 | 242 | high-life en |
... | ... | @@ -339,7 +334,6 @@ Java fr |
339 | 334 | Inkatha en |
340 | 335 | Hertha de |
341 | 336 | Nemcova cz |
342 | -Mantegna it | |
343 | 337 | Garcia es |
344 | 338 | casanova it |
345 | 339 | Venclova cz |
... | ... | @@ -987,7 +981,7 @@ xantia fr |
987 | 981 | Xawery pl |
988 | 982 | Xenia pl |
989 | 983 | xero pl |
990 | -XML en | |
984 | +XML acro | |
991 | 985 | Xymena pl |
992 | 986 | banjo en |
993 | 987 | banjola en |
... | ... | @@ -1765,9 +1759,7 @@ Samaranch es |
1765 | 1759 | Toeplitz de |
1766 | 1760 | Wachholz de |
1767 | 1761 | chow-chow en |
1768 | -collie en | |
1769 | 1762 | gourde fr |
1770 | -kelpie en | |
1771 | 1763 | malinois fr |
1772 | 1764 | mirage fr |
1773 | 1765 | Armagnac fr |
... | ... | @@ -1792,7 +1784,6 @@ zombie en |
1792 | 1784 | collie en |
1793 | 1785 | kelpie en |
1794 | 1786 | sheltie en |
1795 | -gourde fr | |
1796 | 1787 | porsche de |
1797 | 1788 | back-office en |
1798 | 1789 | brie fr |
... | ... | @@ -1841,3 +1832,746 @@ scotch en |
1841 | 1832 | cicerone it |
1842 | 1833 | gaucho es |
1843 | 1834 | intermezzo it |
1835 | +XBW acro | |
1836 | +DJ acro | |
1837 | +VIP acro | |
1838 | +DOS acro | |
1839 | +HIV acro | |
1840 | +SMS acro | |
1841 | +STAR acro | |
1842 | +TIR acro | |
1843 | +UNIX acro | |
1844 | +CMYK acro | |
1845 | +SGML acro | |
1846 | +sms acro | |
1847 | +ADP acro | |
1848 | +ANSI acro | |
1849 | +API acro | |
1850 | +APS acro | |
1851 | +ASCII acro | |
1852 | +ATP acro | |
1853 | +AWF acro | |
1854 | +BBK acro | |
1855 | +BC acro | |
1856 | +BDK acro | |
1857 | +BFG acro | |
1858 | +BG acro | |
1859 | +BGK acro | |
1860 | +BH acro | |
1861 | +BHK acro | |
1862 | +BISE acro | |
1863 | +BMW acro | |
1864 | +BP:s1 acro | |
1865 | +BPH acro | |
1866 | +DNA acro | |
1867 | +NSA acro | |
1868 | +RNA acro | |
1869 | +RSHA acro | |
1870 | +TWA acro | |
1871 | +BIG acro | |
1872 | +BOŚ acro | |
1873 | +BŚ acro | |
1874 | +ChRL acro | |
1875 | +DHL acro | |
1876 | +EUROPOL acro | |
1877 | +FATAH acro | |
1878 | +GL acro | |
1879 | +GUC acro | |
1880 | +HDL acro | |
1881 | +HTML acro | |
1882 | +IBL acro | |
1883 | +KAI acro | |
1884 | +KAL acro | |
1885 | +KUL acro | |
1886 | +LDL acro | |
1887 | +LOK acro | |
1888 | +MAL acro | |
1889 | +BERD acro | |
1890 | +CAD acro | |
1891 | +NEC acro | |
1892 | +OAPEC acro | |
1893 | +OPEC acro | |
1894 | +SAAB acro | |
1895 | +SIMM acro | |
1896 | +AAP acro | |
1897 | +AFP acro | |
1898 | +AGH acro | |
1899 | +AI:s1 acro | |
1900 | +AI:s2 acro | |
1901 | +AK acro | |
1902 | +AP:s1 acro | |
1903 | +AP:s2 acro | |
1904 | +AR acro | |
1905 | +ARiMR acro | |
1906 | +ASP acro | |
1907 | +ATK acro | |
1908 | +AWS acro | |
1909 | +ABS acro | |
1910 | +ADM acro | |
1911 | +ADN acro | |
1912 | +AF acro | |
1913 | +AIDS acro | |
1914 | +AIF acro | |
1915 | +AL acro | |
1916 | +AM acro | |
1917 | +AMERBANK acro | |
1918 | +AON acro | |
1919 | +ASEAN acro | |
1920 | +ASR acro | |
1921 | +AZS acro | |
1922 | +BAAS acro | |
1923 | +BASF acro | |
1924 | +ANSA acro | |
1925 | +CEFTA acro | |
1926 | +CIA acro | |
1927 | +DPA acro | |
1928 | +EFTA acro | |
1929 | +ELTA acro | |
1930 | +ENEA acro | |
1931 | +EPA acro | |
1932 | +ETA acro | |
1933 | +INA acro | |
1934 | +IRA acro | |
1935 | +JANA acro | |
1936 | +KNA acro | |
1937 | +NAFTA acro | |
1938 | +NASA acro | |
1939 | +NRA acro | |
1940 | +OPA acro | |
1941 | +PA acro | |
1942 | +PIA acro | |
1943 | +RPA acro | |
1944 | +ABB acro | |
1945 | +ABC acro | |
1946 | +AC acro | |
1947 | +ATT acro | |
1948 | +BBC acro | |
1949 | +BCG acro | |
1950 | +AGD acro | |
1951 | +LP:s1 acro | |
1952 | +LZS acro | |
1953 | +MTP acro | |
1954 | +MŚ:s2 acro | |
1955 | +OHP acro | |
1956 | +PKP acro | |
1957 | +UN acro | |
1958 | +WSiP acro | |
1959 | +WZiZT acro | |
1960 | +USA acro | |
1961 | +ZEA acro | |
1962 | +CIT acro | |
1963 | +DAT acro | |
1964 | +FAT acro | |
1965 | +LOT acro | |
1966 | +NOT acro | |
1967 | +OIT acro | |
1968 | +PAGART acro | |
1969 | +PIT acro | |
1970 | +VAT acro | |
1971 | +WAT acro | |
1972 | +ZHR acro | |
1973 | +tv acro | |
1974 | +PIT:s1 acro | |
1975 | +ABBA acro | |
1976 | +APA acro | |
1977 | +FAMA acro | |
1978 | +LETTA acro | |
1979 | +MENA acro | |
1980 | +SABENA acro | |
1981 | +UEFA acro | |
1982 | +MKOl acro | |
1983 | +MOK acro | |
1984 | +MPiK acro | |
1985 | +MŚ:s1 acro | |
1986 | +NASK acro | |
1987 | +NHL acro | |
1988 | +NIK acro | |
1989 | +NKOl acro | |
1990 | +PAH acro | |
1991 | +PAL acro | |
1992 | +PESEL acro | |
1993 | +PIH acro | |
1994 | +PKOl acro | |
1995 | +PLL acro | |
1996 | +PŁ acro | |
1997 | +URL acro | |
1998 | +UŚ acro | |
1999 | +UŚl acro | |
2000 | +WIG acro | |
2001 | +BBWR acro | |
2002 | +BGŻ acro | |
2003 | +BIOS acro | |
2004 | +BN acro | |
2005 | +BOR acro | |
2006 | +BPS acro | |
2007 | +BR acro | |
2008 | +BRBM acro | |
2009 | +BRR acro | |
2010 | +BS acro | |
2011 | +BSR acro | |
2012 | +BZ acro | |
2013 | +CAF acro | |
2014 | +CAM acro | |
2015 | +CD-ROM acro | |
2016 | +COCOM acro | |
2017 | +CRZZ acro | |
2018 | +CWKS acro | |
2019 | +DAB acro | |
2020 | +DLS acro | |
2021 | +ICJ acro | |
2022 | +IPN acro | |
2023 | +ISBN acro | |
2024 | +ISDN acro | |
2025 | +ISSN acro | |
2026 | +KERM acro | |
2027 | +MSZ acro | |
2028 | +NIP acro | |
2029 | +ONZ acro | |
2030 | +PKS acro | |
2031 | +PSS acro | |
2032 | +RN:s1 acro | |
2033 | +SARS acro | |
2034 | +UJ acro | |
2035 | +UOP acro | |
2036 | +VHF acro | |
2037 | +VHS acro | |
2038 | +WAP acro | |
2039 | +WiP acro | |
2040 | +DM acro | |
2041 | +DS acro | |
2042 | +DVD-ROM acro | |
2043 | +EBOR acro | |
2044 | +EBOiR acro | |
2045 | +EPROM acro | |
2046 | +ERM acro | |
2047 | +ESOP acro | |
2048 | +FADOM acro | |
2049 | +FAS acro | |
2050 | +FOZZ acro | |
2051 | +FPŻ acro | |
2052 | +FSM acro | |
2053 | +GKS acro | |
2054 | +GM acro | |
2055 | +GOPR acro | |
2056 | +GPRS acro | |
2057 | +GPS acro | |
2058 | +GS acro | |
2059 | +GUS acro | |
2060 | +IFOR acro | |
2061 | +IMiD acro | |
2062 | +KBN acro | |
2063 | +KLM acro | |
2064 | +KM:s1 acro | |
2065 | +KOR acro | |
2066 | +KPN acro | |
2067 | +KSERM acro | |
2068 | +LAN acro | |
2069 | +MBOiR acro | |
2070 | +MDM acro | |
2071 | +MEN acro | |
2072 | +MF acro | |
2073 | +MGM:s1 acro | |
2074 | +MKS acro | |
2075 | +MKiDN acro | |
2076 | +MON acro | |
2077 | +MOSiR acro | |
2078 | +MS-DOS acro | |
2079 | +MTS acro | |
2080 | +NEP acro | |
2081 | +NSZ acro | |
2082 | +OBOP acro | |
2083 | +OCR acro | |
2084 | +ONR acro | |
2085 | +ORWN acro | |
2086 | +OZN acro | |
2087 | +PANAM acro | |
2088 | +PBKS acro | |
2089 | +PBR acro | |
2090 | +PFRON acro | |
2091 | +PGR acro | |
2092 | +PIN acro | |
2093 | +PIP:s1 acro | |
2094 | +PIW acro | |
2095 | +PKWN acro | |
2096 | +PKiN acro | |
2097 | +PPS acro | |
2098 | +PR:s1 acro | |
2099 | +PRON acro | |
2100 | +PRS acro | |
2101 | +PUR acro | |
2102 | +PWN acro | |
2103 | +PZPN acro | |
2104 | +PŻM acro | |
2105 | +RAF acro | |
2106 | +RAM acro | |
2107 | +REGON acro | |
2108 | +RFN acro | |
2109 | +ROM acro | |
2110 | +ROP acro | |
2111 | +ROR acro | |
2112 | +RPN acro | |
2113 | +RUM acro | |
2114 | +RUP acro | |
2115 | +RdR acro | |
2116 | +SAS acro | |
2117 | +SCMS acro | |
2118 | +SECAM acro | |
2119 | +SIM acro | |
2120 | +SKS acro | |
2121 | +SPF acro | |
2122 | +SPN acro | |
2123 | +SUDANAIR acro | |
2124 | +TPN acro | |
2125 | +TS acro | |
2126 | +UAM acro | |
2127 | +UKF acro | |
2128 | +UM acro | |
2129 | +UMCS acro | |
2130 | +UMTS acro | |
2131 | +UNICEF acro | |
2132 | +UNZ acro | |
2133 | +UPS acro | |
2134 | +URM acro | |
2135 | +URz acro | |
2136 | +US acro | |
2137 | +UWM acro | |
2138 | +UWr acro | |
2139 | +UZ acro | |
2140 | +VCR acro | |
2141 | +WAN acro | |
2142 | +WBZ acro | |
2143 | +ZZ acro | |
2144 | +ckm acro | |
2145 | +lkm acro | |
2146 | +scs acro | |
2147 | +wf acro | |
2148 | +ŁBR acro | |
2149 | +ŁKS acro | |
2150 | +ŚPN acro | |
2151 | +WEP acro | |
2152 | +WF acro | |
2153 | +WKR acro | |
2154 | +WOP acro | |
2155 | +WOPR acro | |
2156 | +WSM acro | |
2157 | +WiN acro | |
2158 | +ZAIKS acro | |
2159 | +ZASP acro | |
2160 | +ZBOWiD acro | |
2161 | +ZBoWiD acro | |
2162 | +ZMS acro | |
2163 | +ZOM acro | |
2164 | +ZOSP acro | |
2165 | +ZOZ acro | |
2166 | +ZS acro | |
2167 | +ZUS acro | |
2168 | +ZWM acro | |
2169 | +ZWZ acro | |
2170 | +BWZ acro | |
2171 | +HDZ acro | |
2172 | +MWGzZ acro | |
2173 | +MZ acro | |
2174 | +OPZZ acro | |
2175 | +ZSZ acro | |
2176 | +MB acro | |
2177 | +TAB acro | |
2178 | +ŻOB acro | |
2179 | +NRF acro | |
2180 | +PKF acro | |
2181 | +RMF acro | |
2182 | +SPATiF acro | |
2183 | +TKKF acro | |
2184 | +IBM acro | |
2185 | +KM:s2 acro | |
2186 | +MGM:s2 acro | |
2187 | +PAM acro | |
2188 | +PGM acro | |
2189 | +RM acro | |
2190 | +SM:s2 acro | |
2191 | +ŚAM acro | |
2192 | +CNN acro | |
2193 | +CPN acro | |
2194 | +FN acro | |
2195 | +KEN acro | |
2196 | +KRN acro | |
2197 | +LN acro | |
2198 | +MN acro | |
2199 | +PAN acro | |
2200 | +PN acro | |
2201 | +REN acro | |
2202 | +RN:s2 acro | |
2203 | +WRN:s1 acro | |
2204 | +WRON acro | |
2205 | +ZChN acro | |
2206 | +KEP acro | |
2207 | +KSAP acro | |
2208 | +LOP acro | |
2209 | +MEP acro | |
2210 | +MOP acro | |
2211 | +NAP acro | |
2212 | +PAP acro | |
2213 | +PIP:s2 acro | |
2214 | +WOŚP acro | |
2215 | +BOS acro | |
2216 | +CBOS acro | |
2217 | +CBS acro | |
2218 | +ITAR-TASS acro | |
2219 | +KRS acro | |
2220 | +KRUS acro | |
2221 | +MPiPS acro | |
2222 | +MS acro | |
2223 | +NZS acro | |
2224 | +PBS acro | |
2225 | +PDS acro | |
2226 | +PS acro | |
2227 | +PiS acro | |
2228 | +RAS acro | |
2229 | +SGPiS acro | |
2230 | +BAV acro | |
2231 | +BUW acro | |
2232 | +TOZ acro | |
2233 | +BJ acro | |
2234 | +PTJ acro | |
2235 | +TKJ acro | |
2236 | +PAI acro | |
2237 | +RAI acro | |
2238 | +SKOK acro | |
2239 | +SOK acro | |
2240 | +PPL acro | |
2241 | +PRL acro | |
2242 | +PSL acro | |
2243 | +SDKPiL acro | |
2244 | +ZSL acro | |
2245 | +PŚ acro | |
2246 | +KPZR acro | |
2247 | +LPR acro | |
2248 | +MRR acro | |
2249 | +PPR acro | |
2250 | +PZPR acro | |
2251 | +TOPR acro | |
2252 | +TPPR acro | |
2253 | +TUR acro | |
2254 | +UPR acro | |
2255 | +GATT acro | |
2256 | +KRRiT acro | |
2257 | +PAT acro | |
2258 | +PIT:s2 acro | |
2259 | +FIFA acro | |
2260 | +S.A. acro | |
2261 | +AA:s2 acro | |
2262 | +BA acro | |
2263 | +BSA acro | |
2264 | +MSWiA acro | |
2265 | +NBA acro | |
2266 | +NWZA acro | |
2267 | +RCA acro | |
2268 | +SAA acro | |
2269 | +SA acro | |
2270 | +UPA acro | |
2271 | +WZA acro | |
2272 | +YMCA acro | |
2273 | +YWCA acro | |
2274 | +KGB acro | |
2275 | +OB acro | |
2276 | +PB acro | |
2277 | +PKB acro | |
2278 | +PNB acro | |
2279 | +SB acro | |
2280 | +UB acro | |
2281 | +USB acro | |
2282 | +UwB acro | |
2283 | +CFC acro | |
2284 | +C acro | |
2285 | +FC acro | |
2286 | +GMC acro | |
2287 | +KC acro | |
2288 | +LC acro | |
2289 | +NBC acro | |
2290 | +OC acro | |
2291 | +PC acro | |
2292 | +PVC acro | |
2293 | +UC acro | |
2294 | +USC acro | |
2295 | +WC acro | |
2296 | +WTC acro | |
2297 | +FDD acro | |
2298 | +HDD acro | |
2299 | +KLD acro | |
2300 | +KRLD acro | |
2301 | +LCD acro | |
2302 | +LSD acro | |
2303 | +ND acro | |
2304 | +NKWD acro | |
2305 | +NPD acro | |
2306 | +NRD acro | |
2307 | +OECD acro | |
2308 | +OPD acro | |
2309 | +PKD acro | |
2310 | +SChD acro | |
2311 | +SD acro | |
2312 | +SLD acro | |
2313 | +SPD:s1 acro | |
2314 | +SPD:s2 acro | |
2315 | +TPD acro | |
2316 | +WKD acro | |
2317 | +BRE acro | |
2318 | +BSE acro | |
2319 | +BWE acro | |
2320 | +CINTE acro | |
2321 | +IDE acro | |
2322 | +KBWE acro | |
2323 | +LE acro | |
2324 | +OFE acro | |
2325 | +PHARE acro | |
2326 | +PTE acro | |
2327 | +PZE acro | |
2328 | +RE acro | |
2329 | +RWE acro | |
2330 | +UE acro | |
2331 | +dBASE acro | |
2332 | +SF acro | |
2333 | +DBG acro | |
2334 | +EEG acro | |
2335 | +EKG acro | |
2336 | +EMG acro | |
2337 | +EWG acro | |
2338 | +KG acro | |
2339 | +MG acro | |
2340 | +PBG acro | |
2341 | +PGNiG acro | |
2342 | +PG acro | |
2343 | +PZG acro | |
2344 | +RWPG acro | |
2345 | +SG acro | |
2346 | +UG acro | |
2347 | +USG acro | |
2348 | +LH acro | |
2349 | +OSH acro | |
2350 | +PBH acro | |
2351 | +PH acro | |
2352 | +PZH acro | |
2353 | +SGH acro | |
2354 | +ZGH acro | |
2355 | +pH acro | |
2356 | +FBI acro | |
2357 | +ITI acro | |
2358 | +NFI acro | |
2359 | +SI acro | |
2360 | +TPI acro | |
2361 | +UPI acro | |
2362 | +WSI acro | |
2363 | +BSK acro | |
2364 | +MPK acro | |
2365 | +MZK acro | |
2366 | +PBK acro | |
2367 | +PCK acro | |
2368 | +PK acro | |
2369 | +PTTK acro | |
2370 | +SDK acro | |
2371 | +UMK acro | |
2372 | +WBK acro | |
2373 | +WSK acro | |
2374 | +ZK acro | |
2375 | +MPO acro | |
2376 | +NATO acro | |
2377 | +ORMO acro | |
2378 | +PKO acro | |
2379 | +PLO acro | |
2380 | +PO acro | |
2381 | +PeKaO acro | |
2382 | +RGO acro | |
2383 | +ROPCiO acro | |
2384 | +SEATO acro | |
2385 | +SLO acro | |
2386 | +SO acro | |
2387 | +UFO acro | |
2388 | +UNESCO acro | |
2389 | +UNO acro | |
2390 | +UO acro | |
2391 | +WHO acro | |
2392 | +WMO acro | |
2393 | +WTO acro | |
2394 | +ZOMO acro | |
2395 | +IQ acro | |
2396 | +Q acro | |
2397 | +PPP acro | |
2398 | +SDP acro | |
2399 | +SMJP acro | |
2400 | +SPP:s1 acro | |
2401 | +SP acro | |
2402 | +SdRP acro | |
2403 | +TP:s1 acro | |
2404 | +TP:s2 acro | |
2405 | +TWP acro | |
2406 | +TZSP acro | |
2407 | +UP acro | |
2408 | +WNP acro | |
2409 | +WP acro | |
2410 | +WSP acro | |
2411 | +ZHP acro | |
2412 | +ZLP acro | |
2413 | +ZMP acro | |
2414 | +ZNP acro | |
2415 | +ZSMP acro | |
2416 | +ZSP acro | |
2417 | +PCR acro | |
2418 | +PR:s2 acro | |
2419 | +SGGW-AR acro | |
2420 | +ZSRR acro | |
2421 | +ZSSR acro | |
2422 | +CGS acro | |
2423 | +IS acro | |
2424 | +SOS acro | |
2425 | +SS:s1 acro | |
2426 | +DDT acro | |
2427 | +MT acro | |
2428 | +TNT acro | |
2429 | +UHT acro | |
2430 | +UPT acro | |
2431 | +CDU acro | |
2432 | +CKU acro | |
2433 | +CPU acro | |
2434 | +CSU acro | |
2435 | +EBU acro | |
2436 | +ECU acro | |
2437 | +GPU acro | |
2438 | +PAU acro | |
2439 | +PZU acro | |
2440 | +WKU acro | |
2441 | +CV acro | |
2442 | +PCV acro | |
2443 | +PRiTV acro | |
2444 | +TGV acro | |
2445 | +KW:s3 acro | |
2446 | +MFW acro | |
2447 | +MPW acro | |
2448 | +MRiRW acro | |
2449 | +MSW acro | |
2450 | +NW acro | |
2451 | +PCW acro | |
2452 | +POW acro | |
2453 | +PW acro | |
2454 | +RSW acro | |
2455 | +SGGW acro | |
2456 | +TW acro | |
2457 | +UKSW acro | |
2458 | +UW:s1 acro | |
2459 | +UW:s2 acro | |
2460 | +VW acro | |
2461 | +WFSW acro | |
2462 | +WSW acro | |
2463 | +ŻW:s1 acro | |
2464 | +ŻZW acro | |
2465 | +LZ acro | |
2466 | +UŁ acro | |
2467 | +CPLiA acro | |
2468 | +SAPA acro | |
2469 | +UNRRA acro | |
2470 | +VGA acro | |
2471 | +ChD acro | |
2472 | +FIDE acro | |
2473 | +KE acro | |
2474 | +FK acro | |
2475 | +PTK acro | |
2476 | +SM:s1 acro | |
2477 | +TVN acro | |
2478 | +FAO acro | |
2479 | +FSO acro | |
2480 | +ISO acro | |
2481 | +KKO:s2 acro | |
2482 | +KRO acro | |
2483 | +MO acro | |
2484 | +BP:s5 acro | |
2485 | +BSP:s2 acro | |
2486 | +DOKP acro | |
2487 | +FDP:s1 acro | |
2488 | +KPP acro | |
2489 | +KWP acro | |
2490 | +NSDAP acro | |
2491 | +OSP acro | |
2492 | +OWP:s2 acro | |
2493 | +PKZP acro | |
2494 | +PPPP acro | |
2495 | +RP acro | |
2496 | +TVP acro | |
2497 | +SS:s2 acro | |
2498 | +WFTU acro | |
2499 | +CTV acro | |
2500 | +MTV acro | |
2501 | +TV acro | |
2502 | +KW:s1 acro | |
2503 | +MW acro | |
2504 | +WWW acro | |
2505 | +ŻW:s2 acro | |
2506 | +NZ acro | |
2507 | +W-Z acro | |
2508 | +HB acro | |
2509 | +ZPB acro | |
2510 | +CD acro | |
2511 | +PKLD acro | |
2512 | +GBH acro | |
2513 | +EBI acro | |
2514 | +PCI acro | |
2515 | +CK acro | |
2516 | +GBGK acro | |
2517 | +MCK acro | |
2518 | +MDK acro | |
2519 | +TK acro | |
2520 | +JAL acro | |
2521 | +NCL acro | |
2522 | +QAL acro | |
2523 | +GSM acro | |
2524 | +KGHM acro | |
2525 | +REM acro | |
2526 | +ON acro | |
2527 | +PEN acro | |
2528 | +PKN acro | |
2529 | +WRN:s2 acro | |
2530 | +GIODO acro | |
2531 | +KKO:s1 acro | |
2532 | +RPO acro | |
2533 | +BSP:s1 acro | |
2534 | +CKŻP acro | |
2535 | +DBP acro | |
2536 | +EP acro | |
2537 | +HP acro | |
2538 | +HSP acro | |
2539 | +HTTP acro | |
2540 | +IP acro | |
2541 | +KP:s1 acro | |
2542 | +KP:s2 acro | |
2543 | +MP:s1 acro | |
2544 | +NBP acro | |
2545 | +ORP acro | |
2546 | +OWP:s1 acro | |
2547 | +SPP:s2 acro | |
2548 | +UTP acro | |
2549 | +LR acro | |
2550 | +NMT acro | |
2551 | +PDT acro | |
2552 | +RPU acro | |
2553 | +DTV acro | |
2554 | +UV acro | |
2555 | +BW acro | |
2556 | +GBW acro | |
2557 | +KBW:s3 acro | |
2558 | +SW acro | |
2559 | +UKW acro | |
2560 | +NSZZ acro | |
2561 | +CZD acro | |
2562 | +DVD acro | |
2563 | +CO acro | |
2564 | +HBO acro | |
2565 | +LO acro | |
2566 | +DTP acro | |
2567 | +FDP:s2 acro | |
2568 | +FTP acro | |
2569 | +LP:s3 acro | |
2570 | +MP:s2 acro | |
2571 | +MSP acro | |
2572 | +BGW acro | |
2573 | +KBW:s1 acro | |
2574 | +BHP acro | |
2575 | +CZMP acro | |
2576 | +AA:s1 acro | |
2577 | +BCh acro | |
... | ... |
morphology/dict.ml
... | ... | @@ -161,6 +161,26 @@ let split_dict in_path filename out_path = |
161 | 161 | fprintf file "%s\t%s\t%s\n" form.orth entry.lemma form.interp |
162 | 162 | with Not_found -> ())))))) |
163 | 163 | |
164 | +let split_language lang_filename in_path filename out_path = | |
165 | + let map = File.fold_tab lang_filename StringMap.empty (fun map -> function | |
166 | + [lemma; lang] -> StringMap.add_inc map lemma lang (fun lang2 -> print_endline ("split_language: " ^ lemma ^ " " ^ lang ^ " " ^ lang2); lang) | |
167 | + | line -> failwith ("split_language: " ^ (String.concat "\t" line))) in | |
168 | + let dict = load_tab (in_path ^ filename) in | |
169 | + let dict = List.rev (assign_entry_cat dict) in | |
170 | + let filename = if Xstring.check_sufix ".gz" filename then | |
171 | + Xstring.cut_sufix ".gz" filename else filename in | |
172 | + let dict_map = Xlist.fold dict StringMap.empty (fun dict_map e -> | |
173 | + try | |
174 | + let lang = StringMap.find map e.lemma in | |
175 | + StringMap.add_inc dict_map lang [e] (fun l -> e :: l) | |
176 | + with Not_found -> dict_map) in | |
177 | + StringMap.iter dict_map (fun lang dict -> | |
178 | + File.file_out (out_path ^ "lang_" ^ lang ^ "_" ^ filename) (fun file -> | |
179 | + Xlist.iter dict (fun entry -> | |
180 | + let form = get_form entry in | |
181 | + fprintf file "%s\t%s\t%s\n" form.orth entry.lemma form.interp))) | |
182 | + | |
183 | + | |
164 | 184 | let merge_entries dict = |
165 | 185 | let dict = assign_entry_cat dict in |
166 | 186 | let map = Xlist.fold dict StringMap.empty (fun map entry -> |
... | ... | @@ -318,6 +338,9 @@ let merge_interps lemma forms = |
318 | 338 | | "y",["subst:pl:gen:m3";"subst:pl:loc:m3"] -> {empty_form with orth=orth; interp="subst:pl:gen.loc:m3"} :: forms |
319 | 339 | | "y",["subst:pl:dat:m3";"subst:sg:inst:m3";"subst:sg:loc:m3"] -> {empty_form with orth=orth; interp="subst:sg:inst.loc:m3|subst:pl:dat:m3"} :: forms |
320 | 340 | | "y",["subst:sg:dat.loc:f";"subst:sg:gen:f";"subst:sg:nom:f";"subst:sg:voc:f"] -> {empty_form with orth=orth; interp="subst:sg:nom.gen.dat.loc.voc:f"} :: forms |
341 | +(* en *) | "y",["subst:sg:loc:m2";"subst:sg:voc:m2"]-> {empty_form with orth=orth; interp="subst:sg:loc.voc:m2"} :: forms | |
342 | +(* en *) | "y",["subst:sg:gen:m3";"subst:sg:loc:m3";"subst:sg:voc:m3"] -> {empty_form with orth=orth; interp="subst:sg:loc.voc:m3"} :: {empty_form with orth=orth; interp="subst:sg:gen:m3"} :: forms | |
343 | +(* en *) | "y",["subst:sg:loc:m3";"subst:sg:voc:m3"]-> {empty_form with orth=orth; interp="subst:sg:loc.voc:m3"} :: forms | |
321 | 344 | | "e",["depr:pl:nom.acc.voc:m2";"subst:sg:nom:m1";"subst:sg:voc:m1"] -> {empty_form with orth=orth; interp="subst:sg:nom.voc:m1|depr:pl:nom.acc.voc:m2"} :: forms |
322 | 345 | | "e",["depr:pl:nom.acc.voc:m2";"subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1";"subst:sg:nom:m1";"subst:sg:voc:m1"] -> {empty_form with orth=orth; interp="subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1|depr:pl:nom.acc.voc:m2"} :: {empty_form with orth=orth; interp="subst:sg:nom.voc:m1|depr:pl:nom.acc.voc:m2"} :: forms |
323 | 346 | | "e",["subst:sg:gen.acc:m1";"subst:sg:gen:m1"] -> {empty_form with orth=orth; interp="subst:sg:gen.acc:m1"} :: forms |
... | ... | @@ -541,7 +564,14 @@ let exceptional_lemmata = StringSet.of_list ([ |
541 | 564 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; |
542 | 565 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; |
543 | 566 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*) |
544 | - ] @ File.load_lines "../morphology/data/obce.tab" @ File.load_lines "../morphology/data/akronimy.tab" @ | |
567 | + "unixowy"; "unixowość"; "survivalowy"; "survivalowość"; "survivalowiec"; "software’owy"; "software’owość"; "software’owo"; "rock’n’rollowy"; "rock’n’rollowość"; "rock’n’rollowo"; "qumrańskość"; | |
568 | + "qumrański"; "quizowy"; "quizowość"; "queerowy"; "queerowość"; "quadowy"; "quadowiec"; "oxfordzkość"; "oxfordzki"; "novellowskość"; "novellowski"; "nieunixowy"; | |
569 | + "nieunixowość"; "niesurvivalowy"; "niesurvivalowość"; "niesoftware’owy"; "niesoftware’owość"; "nierock’n’rollowy"; "nierock’n’rollowość"; "nierock’n’rollowo"; "niequmrańskość"; | |
570 | + "niequmrański"; "niequizowy"; "niequizowość"; "niequeerowość"; "niequeerowo"; "niequadowy"; "nieoxfordzkość"; "nieoxfordzki"; "nienovellowskość"; "nienovellowski"; "nienewage'owy"; "nieliverpoolskość"; | |
571 | + "nieliverpoolski"; "niekickboxingowy"; "niekickboxingowość"; "nieheavymetalowy"; "nieheavymetalowość"; "nieheavymetalowo"; "nieharvardzkość"; "nieharvardzki"; "niedeveloperskość"; | |
572 | + "niedeveloperski"; "niedaviscupowy"; "niedaviscupowość"; "niebrexitowość"; "nieampexowy"; "nieampexowość"; "newage'owy"; "newage'owość"; "newage'owo"; "liverpoolskość"; "liverpoolski"; | |
573 | + ] (*@ File.load_lines "../morphology/data/obce.tab" @ File.load_lines "../morphology/data/akronimy.tab" @*) | |
574 | + @ File.fold_tab "../morphology/data/obce_langs.tab" [] (fun l x -> List.hd x :: l) @ | |
545 | 575 | (*File.load_lines "../morphology/data/nieregularne.tab" @ File.load_lines "results/interp_validated_verb.tab" @ *) |
546 | 576 | (*File.load_lines "results/interp_validated_noun.tab" @ File.load_lines "results/interp_validated_adj.tab" @ |
547 | 577 | File.load_lines "../morphology/data/validated_adj.tab" @ File.load_lines "../morphology/data/validated_noun.tab" @ |
... | ... | @@ -557,6 +587,11 @@ let remove_exceptional_lemmata_gen ex dict = |
557 | 587 | if StringSet.mem ex entry.lemma then dict |
558 | 588 | else entry :: dict) |
559 | 589 | |
590 | +(* let select_lemmata set dict = | |
591 | + Xlist.fold dict [] (fun dict entry -> | |
592 | + if StringSet.mem set entry.lemma then entry :: dict | |
593 | + else dict) *) | |
594 | + | |
560 | 595 | let generate_stem dict = |
561 | 596 | Xlist.rev_map dict (fun entry -> |
562 | 597 | {entry with stem= |
... | ... | @@ -589,10 +624,10 @@ let fonetic_translation dict = |
589 | 624 | try |
590 | 625 | (* let lemma = Stem.simplify_lemma e.lemma in *) |
591 | 626 | (* let phon_lemma = Fonetics.translate_and_check true Fonetics.rules Fonetics.rev_rules lemma in *) |
592 | - let phon_stem = Fonetics.translate_and_check true Fonetics.rules Fonetics.rev_rules e.stem in | |
627 | + let phon_stem = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) e.stem in | |
593 | 628 | {e with (*phon_lemma = phon_lemma;*) phon_stem=Xlist.map phon_stem (fun s -> s.phon); |
594 | 629 | forms = Xlist.map e.forms (fun f -> |
595 | - let phon_orth = Fonetics.translate_and_check true Fonetics.rules Fonetics.rev_rules f.orth in | |
630 | + let phon_orth = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) f.orth in | |
596 | 631 | {f with phon_orth = phon_orth})} :: dict |
597 | 632 | with |
598 | 633 | Fonetics.NotFound(x,s) -> printf "NF %s %s %s\n%!" e.lemma x s; dict |
... | ... | @@ -616,8 +651,9 @@ let phon_validate rules dict = |
616 | 651 | let forms = Xlist.rev_map entry.forms (fun form -> |
617 | 652 | let candidates = Xlist.fold form.phon_orth [] (fun candidates s -> |
618 | 653 | Xlist.fold (Rules.CharTrees.find rules s.phon) candidates (fun candidates (stem,rule) -> |
619 | - let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) s.mapping in | |
620 | - if candidate_lemma = simple_lemma then (stem,rule) :: candidates else candidates)) in | |
654 | + let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) s.mapping in | |
655 | + let b = Xlist.fold candidate_lemmas false (fun b candidate_lemma -> candidate_lemma = simple_lemma || b) in | |
656 | + if b then (stem,rule) :: candidates else candidates)) in | |
621 | 657 | if candidates = [] then {form with validated=false} else {form with validated=true}) in |
622 | 658 | {entry with forms=forms}) |
623 | 659 | |
... | ... | @@ -642,10 +678,14 @@ let validate_interp rules dict = |
642 | 678 | Xlist.fold (Rules.CharTrees.find rules s.phon) candidates (fun candidates (stem,rule) -> |
643 | 679 | (* if rule.star = Ndm && not entry.ndm then candidates else |
644 | 680 | if rule.star <> Ndm && entry.ndm then candidates else *) |
645 | - let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) s.mapping in | |
681 | + (* let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) s.mapping in | |
646 | 682 | if candidate_lemma = simple_lemma && form.interp = rule.interp then |
683 | + (stem,rule) :: candidates else candidates)) in *) | |
684 | + let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) s.mapping in | |
685 | + let b = Xlist.fold candidate_lemmas false (fun b candidate_lemma -> candidate_lemma = simple_lemma || b) in | |
686 | + if b && form.interp = rule.interp then | |
647 | 687 | (stem,rule) :: candidates else candidates)) in |
648 | - if candidates = [] then {form with validated=false} else {form with validated=true}) in | |
688 | + if candidates = [] then ((*printf "validate_interp: %s\t%s\t%s\n" form.orth entry.lemma form.interp;*) {form with validated=false}) else {form with validated=true}) in | |
649 | 689 | {entry with forms=forms}) |
650 | 690 | |
651 | 691 | let remove_validated_forms dict = |
... | ... |
morphology/fonetics.ml
... | ... | @@ -65,8 +65,9 @@ let prepare_rules symbol_defs rules = |
65 | 65 | CharTree.create rules |
66 | 66 | |
67 | 67 | let rules, rev_rules = |
68 | - let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_acro.dic" in | |
68 | + (* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_acro.dic" in *) | |
69 | 69 | (* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_pl.dic" in *) |
70 | + let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_en.dic" in | |
70 | 71 | prepare_rules symbol_defs rules, |
71 | 72 | prepare_rules rev_symbol_defs rev_rules |
72 | 73 | |
... | ... | @@ -143,8 +144,10 @@ let translate closure rules s = |
143 | 144 | (* printf "translate 2: %s\n%!" s; *) |
144 | 145 | s*) |
145 | 146 | |
146 | -let print_phon p = | |
147 | - Printf.printf "%s %s\n" p.Types.phon (String.concat " " (Xlist.map p.Types.mapping (fun (a,b) -> a ^ "->" ^ b))) | |
147 | +let string_of_phon p = | |
148 | + Printf.sprintf "%s %s" p.Types.phon (String.concat " " (Xlist.map p.Types.mapping (fun (a,b) -> a ^ "->" ^ b))) | |
149 | + | |
150 | +let print_phon p = print_endline (string_of_phon p) | |
148 | 151 | |
149 | 152 | (*let _ = translate rules "blafickie" |
150 | 153 | let _ = translate rules "blafiacki" |
... | ... | @@ -183,11 +186,19 @@ let translate_single closure rules x = |
183 | 186 | |
184 | 187 | let rec rev_translate_rec x s = function |
185 | 188 | [] -> x,s,[] |
189 | + | (_,"") :: m -> rev_translate_rec x s m | |
186 | 190 | | (a,b) :: m -> |
187 | - if Xstring.check_prefix a s then rev_translate_rec (x^b) (Xstring.cut_prefix a s) m | |
191 | + if Xstring.check_prefix b s then rev_translate_rec (x^a) (Xstring.cut_prefix b s) m | |
188 | 192 | else x,s,m |
189 | 193 | |
190 | 194 | let rev_translate closure rev_rules s m = |
191 | 195 | let x,s,_ = rev_translate_rec "" s m in |
192 | 196 | if s = "" then x else |
193 | 197 | x ^ (translate_single closure rev_rules s) |
198 | + | |
199 | +let rev_translate2 closure rev_rules s m = | |
200 | + let x,s,_ = rev_translate_rec "" s m in | |
201 | + if s = "" then [x] else | |
202 | + let l = translate closure rev_rules s in | |
203 | + if l = [] then raise (NotFound(s,"")) else | |
204 | + Xlist.rev_map l (fun y -> x ^ y.Types.phon) | |
... | ... |
morphology/generate.ml
... | ... | @@ -135,6 +135,7 @@ let _ = |
135 | 135 | (* Dict.split_dict sgjp_path sgjp_filename201607 results_path; *) |
136 | 136 | (* Dict.split_dict sgjp_path sgjp_filename201605 results_path; *) |
137 | 137 | (* Dict.split_dict sgjp_path polimorf_filename results_path; *) |
138 | + (* Dict.split_language "data/obce_langs.tab" sgjp_path sgjp_filename results_path; *) | |
138 | 139 | () |
139 | 140 | |
140 | 141 | (* Usunięcie form z prefixami *) |
... | ... | @@ -274,6 +275,7 @@ let _ = |
274 | 275 | find_not_validated_entries compound_rule_trees results_path verb_polimorf_filename "results/not_validated_p_verb.tab"; |
275 | 276 | find_not_validated_forms compound_rule_trees results_path verb_polimorf_filename "results/not_validated_p_verb2.tab"; *) |
276 | 277 | (* find_not_validated_forms compound_rule_trees results_path "sgjp_selected.tab" "results/not_validated_verb.tab"; *) |
278 | + find_not_validated_forms compound_rule_trees results_path "lang_en_sgjp-20170730.tab" "results/lang_en.tab"; | |
277 | 279 | () |
278 | 280 | |
279 | 281 | let find_not_validated_lemmata rules path filename out_filename = |
... | ... | @@ -313,13 +315,13 @@ let test_lemmatize lemma orth = |
313 | 315 | printf "test_lemmatize: %s %s\n%!" lemma orth; |
314 | 316 | let simple_lemma = Stem.simplify_lemma lemma in |
315 | 317 | let phon_orths = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) orth in |
316 | - printf "phon_orths: %s\n%!" (String.concat " " (Xlist.map phon_orths (fun s -> s.phon))); | |
318 | + printf "phon_orths: \n %s\n%!" (String.concat "\n " (Xlist.map phon_orths Fonetics.string_of_phon)); | |
317 | 319 | Xlist.iter phon_orths (fun phon_orth -> |
318 | 320 | Xlist.iter (Rules.CharTrees.find compound_rule_trees phon_orth.phon) (fun (stem,rule) -> |
319 | - let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) (phon_orth.mapping) in | |
320 | - printf " %s %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule) candidate_lemma; | |
321 | - if candidate_lemma = simple_lemma then | |
322 | - printf "E %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule))) | |
321 | + let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) (phon_orth.mapping) in | |
322 | + Xlist.iter candidate_lemmas (fun candidate_lemma -> | |
323 | + if candidate_lemma = simple_lemma then printf "E" else printf " "; | |
324 | + printf " %s %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule) candidate_lemma))) | |
323 | 325 | |
324 | 326 | (* Sprawdzenie przebiegu lematyzacji *) |
325 | 327 | let _ = |
... | ... | @@ -380,7 +382,27 @@ let _ = |
380 | 382 | (* test_lemmatize "WAT" "Wacie"; |
381 | 383 | test_lemmatize "WAT" "WACIE"; |
382 | 384 | test_lemmatize "WAT" "WAcie"; *) |
383 | - test_lemmatize "BOŚ" "BOŚ-u"; | |
385 | + (* test_lemmatize "BOŚ" "BOŚ-u"; *) | |
386 | + (* test_lemmatize "upgrade" "upgrade’om"; *) | |
387 | + (* test_lemmatize "software" "software’y"; *) | |
388 | + (* test_lemmatize "spray" "sprayu"; *) | |
389 | + (* test_lemmatize "unixowość" "unixowościach"; *) | |
390 | + (* test_lemmatize "rolls-royce" "rolls-roysie"; *) | |
391 | + (* test_lemmatize "Arrow" "Arrowa";*) | |
392 | + (* test_lemmatize "Boy" "Boyowie"; *) | |
393 | + (* test_lemmatize "Chomsky" "Chomskiego"; *) | |
394 | + (* test_lemmatize "Bradbury" "Bradburych"; *) | |
395 | + (* test_lemmatize "compact" "compakcie"; | |
396 | + test_lemmatize "Dixa" "Diksie"; *) | |
397 | + (* test_lemmatize "developerski" "developersko"; *) | |
398 | + (* test_lemmatize "Jessica" "Jessice"; *) | |
399 | + (* test_lemmatize "Gurkha" "Gurce"; | |
400 | + test_lemmatize "Gurkha" "Gurkhi"; *) | |
401 | + (* test_lemmatize "heavymetalowiec" "heavymetalowca"; *) | |
402 | + (* test_lemmatize "niesoftware’owy" "niesoftware’owi"; *) | |
403 | + (* test_lemmatize "" ""; | |
404 | + test_lemmatize "" ""; | |
405 | + test_lemmatize "" ""; *) | |
384 | 406 | () |
385 | 407 | |
386 | 408 | (* Generowanie reguł *) |
... | ... | @@ -438,6 +460,15 @@ let find_not_interp_validated_entries interp_rules path filename out_filename = |
438 | 460 | let dict = Dict.remove_validated_entries dict in |
439 | 461 | Dict.print out_filename dict |
440 | 462 | |
463 | +let find_not_interp_validated_forms interp_rules path filename out_filename = | |
464 | + let dict = Dict.load_tab (path ^ filename) in | |
465 | + let dict = Dict.merge_entries dict in | |
466 | + let dict = Dict.process_interps dict in | |
467 | + let dict = Dict.fonetic_translation dict in | |
468 | + let dict = Dict.validate_interp interp_rules dict in | |
469 | + let dict = Dict.remove_validated_forms dict in | |
470 | + Dict.print out_filename dict | |
471 | + | |
441 | 472 | (* Wypisanie lematów ze zwalidowaną interpretacją *) |
442 | 473 | let _ = |
443 | 474 | (* find_interp_validated_lemmata interp_compound_rule_trees results_path noun_sgjp_filename "results/interp_validated_noun.tab"; |
... | ... | @@ -454,6 +485,7 @@ let _ = |
454 | 485 | (* find_not_interp_validated_lemmata interp_compound_rule_trees results_path "sgjp_selected.tab" "results/interp_not_validated_verb.tab"; *) |
455 | 486 | (* find_not_interp_validated_entries interp_compound_rule_trees results_path verb_sgjp_filename "results/selected_verb.tab"; *) |
456 | 487 | (* find_not_interp_validated_entries interp_compound_rule_trees results_path "verb_sgjp_no_pref.tab" "results/selected_verb.tab"; *) |
488 | + (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_en_sgjp-20170730.tab" "results/lang_en.tab"; *) | |
457 | 489 | () |
458 | 490 | |
459 | 491 | (* Generowanie reguł dla interpretacji *) |
... | ... |