Commit 56c5b9f7eb6000373d9c88f02a23dc67cc052e55

Authored by Wojciech Jaworski
1 parent 7f69c1b8

Wstępna wersja reguł dla ortografii angielskiej

morphology/data/akronimy.tab deleted
1   -XBW
2   -DJ
3   -VIP
4   -AIDS
5   -CMYK
6   -DOS
7   -HIV
8   -HTML
9   -SGML
10   -SMS
11   -STAR
12   -TIR
13   -UNIX
14   -CMYK
15   -HTML
16   -SGML
17   -sms
18   -ADP
19   -AIDS
20   -ANSI
21   -API
22   -APS
23   -ASCII
24   -ATP
25   -AWF
26   -BBK
27   -BC
28   -BDK
29   -BFG
30   -BG
31   -BGK
32   -BH
33   -BHK
34   -BISE
35   -BMW
36   -BP:s1
37   -BPH
38   -DNA
39   -NSA
40   -RNA
41   -RSHA
42   -TWA
43   -AL
44   -AMERBANK
45   -BIG
46   -BOŚ
47   -BŚ
48   -ChRL
49   -DHL
50   -EUROPOL
51   -FATAH
52   -GL
53   -GUC
54   -HDL
55   -HTML
56   -IBL
57   -KAI
58   -KAL
59   -KUL
60   -LDL
61   -LOK
62   -MAL
63   -BERD
64   -CAD
65   -NEC
66   -OAPEC
67   -OPEC
68   -SAAB
69   -SIMM
70   -AAP
71   -ADM
72   -ADN
73   -AF
74   -AFP
75   -AGH
76   -AI:s1
77   -AI:s2
78   -AIF
79   -AK
80   -AL
81   -AM
82   -AON
83   -AP:s1
84   -AP:s2
85   -AR
86   -ARiMR
87   -ASP
88   -ATK
89   -AWS
90   -ABS
91   -ADM
92   -ADN
93   -AF
94   -AIDS
95   -AIF
96   -AL
97   -AM
98   -AMERBANK
99   -AON
100   -APS
101   -AR
102   -ARiMR
103   -ASEAN
104   -ASR
105   -AWF
106   -AWS
107   -AZS
108   -BAAS
109   -BASF
110   -ANSA
111   -CEFTA
112   -CIA
113   -DPA
114   -EFTA
115   -ELTA
116   -ENEA
117   -EPA
118   -ETA
119   -INA
120   -IRA
121   -JANA
122   -KNA
123   -NAFTA
124   -NASA
125   -NRA
126   -OPA
127   -PA
128   -PIA
129   -RPA
130   -ABB
131   -ABC
132   -AC
133   -ADP
134   -AFP
135   -AGH
136   -AI:s1
137   -AIDS
138   -AK
139   -AL
140   -ANSI
141   -AP:s1
142   -ATK
143   -ATP
144   -ATT
145   -BBC
146   -BBK
147   -BCG
148   -BFG
149   -BG
150   -AGD
151   -LP:s1
152   -LZS
153   -MTP
154   -MŚ:s2
155   -OHP
156   -PKP
157   -UN
158   -WSiP
159   -WZiZT
160   -USA
161   -ZEA
162   -CIT
163   -DAT
164   -FAT
165   -LOT
166   -NOT
167   -OIT
168   -PAGART
169   -PIT
170   -VAT
171   -WAT
172   -ZHR
173   -tv
174   -PIT:s1
175   -ABBA
176   -APA
177   -FAMA
178   -LETTA
179   -MENA
180   -SABENA
181   -UEFA
182   -MKOl
183   -MOK
184   -MPiK
185   -MŚ:s1
186   -NASK
187   -NHL
188   -NIK
189   -NKOl
190   -PAH
191   -PAL
192   -PESEL
193   -PIH
194   -PKOl
195   -PLL
196   -PŁ
197   -URL
198   -UŚ
199   -UŚl
200   -WIG
201   -BBWR
202   -BGŻ
203   -BIOS
204   -BN
205   -BOR
206   -BPS
207   -BR
208   -BRBM
209   -BRR
210   -BS
211   -BSR
212   -BZ
213   -CAF
214   -CAM
215   -CD-ROM
216   -COCOM
217   -CRZZ
218   -CWKS
219   -DAB
220   -DLS
221   -CAM
222   -CRZZ
223   -ICJ
224   -IPN
225   -ISBN
226   -ISDN
227   -ISSN
228   -KERM
229   -MSZ
230   -NIK
231   -NIP
232   -ONZ
233   -PKS
234   -PSS
235   -RN:s1
236   -SARS
237   -UJ
238   -UOP
239   -VHF
240   -VHS
241   -WAP
242   -WiP
243   -DM
244   -DS
245   -DVD-ROM
246   -EBOR
247   -EBOiR
248   -EPROM
249   -ERM
250   -ESOP
251   -FADOM
252   -FAS
253   -FOZZ
254   -FPŻ
255   -FSM
256   -GKS
257   -GM
258   -GOPR
259   -GPRS
260   -GPS
261   -GS
262   -GUS
263   -IFOR
264   -IMiD
265   -KBN
266   -KLM
267   -KM:s1
268   -KOR
269   -KPN
270   -KSERM
271   -LAN
272   -MBOiR
273   -MDM
274   -MEN
275   -MF
276   -MGM:s1
277   -MKS
278   -MKiDN
279   -MON
280   -MOSiR
281   -MS-DOS
282   -MTS
283   -NEP
284   -NSZ
285   -OBOP
286   -OCR
287   -ONR
288   -ORWN
289   -OZN
290   -PANAM
291   -PBKS
292   -PBR
293   -PFRON
294   -PGR
295   -PIN
296   -PIP:s1
297   -PIW
298   -PKWN
299   -PKiN
300   -PPS
301   -PR:s1
302   -PRON
303   -PRS
304   -PUR
305   -PWN
306   -PZPN
307   -PŻM
308   -RAF
309   -RAM
310   -REGON
311   -RFN
312   -ROM
313   -ROP
314   -ROR
315   -RPN
316   -RUM
317   -RUP
318   -RdR
319   -SAS
320   -SCMS
321   -SECAM
322   -SIM
323   -SKS
324   -SPF
325   -SPN
326   -SUDANAIR
327   -TPN
328   -TS
329   -UAM
330   -UKF
331   -UM
332   -UMCS
333   -UMTS
334   -UNICEF
335   -UNZ
336   -UPS
337   -URM
338   -URz
339   -US
340   -UWM
341   -UWr
342   -UZ
343   -VCR
344   -WAN
345   -WBZ
346   -ZZ
347   -ckm
348   -lkm
349   -scs
350   -wf
351   -ŁBR
352   -ŁKS
353   -ŚPN
354   -WEP
355   -WF
356   -WKR
357   -WOP
358   -WOPR
359   -WSM
360   -WiN
361   -ZAIKS
362   -ZASP
363   -ZBOWiD
364   -ZBoWiD
365   -ZMS
366   -ZOM
367   -ZOSP
368   -ZOZ
369   -ZS
370   -ZUS
371   -ZWM
372   -ZWZ
373   -BWZ
374   -HDZ
375   -MWGzZ
376   -MZ
377   -OPZZ
378   -ZSZ
379   -MB
380   -TAB
381   -ŻOB
382   -NRF
383   -PKF
384   -RMF
385   -SPATiF
386   -TKKF
387   -IBM
388   -KM:s2
389   -MGM:s2
390   -PAM
391   -PGM
392   -RM
393   -SM:s2
394   -ŚAM
395   -CNN
396   -CPN
397   -FN
398   -KEN
399   -KRN
400   -LN
401   -MN
402   -PAN
403   -PN
404   -REN
405   -RN:s2
406   -WRN:s1
407   -WRON
408   -ZChN
409   -KEP
410   -KSAP
411   -LOP
412   -MEP
413   -MOP
414   -NAP
415   -PAP
416   -PIP:s2
417   -WOŚP
418   -BOS
419   -CBOS
420   -CBS
421   -ITAR-TASS
422   -KRS
423   -KRUS
424   -MPiPS
425   -MS
426   -NZS
427   -PBS
428   -PDS
429   -PS
430   -PiS
431   -RAS
432   -SGPiS
433   -BAV
434   -BUW
435   -TOZ
436   -BJ
437   -PTJ
438   -TKJ
439   -PAI
440   -RAI
441   -SKOK
442   -SOK
443   -PPL
444   -PRL
445   -PSL
446   -SDKPiL
447   -ZSL
448   -PŚ
449   -KPZR
450   -LPR
451   -MRR
452   -PPR
453   -PZPR
454   -TOPR
455   -TPPR
456   -TUR
457   -UPR
458   -GATT
459   -KRRiT
460   -PAT
461   -PIT:s2
462   -FIFA
463   -S.A.
464   -AA:s2
465   -BA
466   -BSA
467   -FIFA
468   -MSWiA
469   -NBA
470   -NWZA
471   -RCA
472   -SAA
473   -SA
474   -UPA
475   -WZA
476   -YMCA
477   -YWCA
478   -KGB
479   -MB
480   -OB
481   -PB
482   -PKB
483   -PNB
484   -SB
485   -TAB
486   -UB
487   -USB
488   -UwB
489   -CFC
490   -C
491   -FC
492   -GMC
493   -KC
494   -LC
495   -NBC
496   -OC
497   -PC
498   -PVC
499   -UC
500   -USC
501   -WC
502   -WTC
503   -FDD
504   -HDD
505   -KLD
506   -KRLD
507   -LCD
508   -LSD
509   -ND
510   -NKWD
511   -NPD
512   -NRD
513   -OECD
514   -OPD
515   -PKD
516   -SChD
517   -SD
518   -SLD
519   -SPD:s1
520   -SPD:s2
521   -TPD
522   -WKD
523   -BRE
524   -BSE
525   -BWE
526   -CINTE
527   -IDE
528   -KBWE
529   -LE
530   -OFE
531   -PHARE
532   -PTE
533   -PZE
534   -RE
535   -RWE
536   -UE
537   -dBASE
538   -RMF
539   -SF
540   -SPATiF
541   -TKKF
542   -DBG
543   -EEG
544   -EKG
545   -EMG
546   -EWG
547   -KG
548   -MG
549   -PBG
550   -PGNiG
551   -PG
552   -PZG
553   -RWPG
554   -SG
555   -UG
556   -USG
557   -LH
558   -OSH
559   -PBH
560   -PH
561   -PZH
562   -SGH
563   -ZGH
564   -pH
565   -FBI
566   -ITI
567   -NFI
568   -RAI
569   -SI
570   -TPI
571   -UPI
572   -WSI
573   -PTJ
574   -TKJ
575   -BSK
576   -MPK
577   -MZK
578   -PBK
579   -PCK
580   -PK
581   -PTTK
582   -SDK
583   -UMK
584   -WBK
585   -WSK
586   -ZK
587   -PPL
588   -PSL
589   -ZSL
590   -IBM
591   -PGM
592   -FN
593   -MN
594   -WRN:s1
595   -ZChN
596   -MPO
597   -NATO
598   -ORMO
599   -PKO
600   -PLO
601   -PO
602   -PeKaO
603   -RGO
604   -ROPCiO
605   -SEATO
606   -SLO
607   -SO
608   -UFO
609   -UNESCO
610   -UNO
611   -UO
612   -WHO
613   -WMO
614   -WTO
615   -ZOMO
616   -IQ
617   -Q
618   -PPP
619   -SDP
620   -SMJP
621   -SPP:s1
622   -SP
623   -SdRP
624   -TP:s1
625   -TP:s2
626   -TWP
627   -TZSP
628   -UP
629   -WNP
630   -WP
631   -WSP
632   -ZHP
633   -ZLP
634   -ZMP
635   -ZNP
636   -ZSMP
637   -ZSP
638   -MRR
639   -PCR
640   -PR:s2
641   -SGGW-AR
642   -TOPR
643   -TPPR
644   -TUR
645   -ZSRR
646   -ZSSR
647   -BOS
648   -CBOS
649   -CBS
650   -CGS
651   -IS
652   -MPiPS
653   -MS
654   -NZS
655   -PiS
656   -SOS
657   -SS:s1
658   -DDT
659   -GATT
660   -MT
661   -TNT
662   -UHT
663   -UPT
664   -CDU
665   -CKU
666   -CPU
667   -CSU
668   -EBU
669   -ECU
670   -GPU
671   -PAU
672   -PZU
673   -WKU
674   -CV
675   -PCV
676   -PRiTV
677   -TGV
678   -KW:s3
679   -MFW
680   -MPW
681   -MRiRW
682   -MSW
683   -NW
684   -PCW
685   -POW
686   -PW
687   -RSW
688   -SGGW
689   -TW
690   -UKSW
691   -UW:s1
692   -UW:s2
693   -VW
694   -WFSW
695   -WSW
696   -ŻW:s1
697   -ŻZW
698   -BWZ
699   -LZ
700   -MWGzZ
701   -MZ
702   -OPZZ
703   -TOZ
704   -UŁ
705   -CPLiA
706   -SAPA
707   -UNRRA
708   -VGA
709   -ChD
710   -FIDE
711   -KE
712   -FK
713   -PTK
714   -NCL
715   -SM:s1
716   -TVN
717   -FAO
718   -FSO
719   -ISO
720   -KKO:s2
721   -KRO
722   -MO
723   -BP:s5
724   -BSP:s2
725   -DOKP
726   -FDP:s1
727   -KPP
728   -KWP
729   -NSDAP
730   -OSP
731   -OWP:s2
732   -PKZP
733   -PPPP
734   -RP
735   -TVP
736   -SS:s2
737   -WFTU
738   -CTV
739   -MTV
740   -TV
741   -KW:s1
742   -MW
743   -WWW
744   -ŻW:s2
745   -NZ
746   -W-Z
747   -HB
748   -ZPB
749   -CD
750   -PKLD
751   -GBH
752   -EBI
753   -PCI
754   -CK
755   -GBGK
756   -MCK
757   -MDK
758   -TK
759   -JAL
760   -NCL
761   -QAL
762   -GSM
763   -KGHM
764   -REM
765   -ON
766   -PEN
767   -PKN
768   -TVN
769   -WRN:s2
770   -GIODO
771   -KKO:s1
772   -RPO
773   -BSP:s1
774   -CKŻP
775   -DBP
776   -EP
777   -HP
778   -HSP
779   -HTTP
780   -IP
781   -KP:s1
782   -KP:s2
783   -MP:s1
784   -NBP
785   -ORP
786   -OWP:s1
787   -SPP:s2
788   -UTP
789   -LR
790   -NMT
791   -PDT
792   -RPU
793   -DTV
794   -UV
795   -BW
796   -GBW
797   -KBW:s3
798   -SW
799   -UKW
800   -NSZZ
801   -NZ
802   -CD
803   -CZD
804   -ChD
805   -DVD
806   -CO
807   -FAO
808   -FSO
809   -HBO
810   -ISO
811   -KKO:s1
812   -KKO:s2
813   -KRO
814   -LO
815   -MO
816   -DOKP
817   -DTP
818   -EP
819   -FDP:s1
820   -FDP:s2
821   -FTP
822   -HP
823   -IP
824   -KPP
825   -KWP
826   -LP:s3
827   -MP:s2
828   -MSP
829   -NBP
830   -NSDAP
831   -OSP
832   -OWP:s1
833   -OWP:s2
834   -PKZP
835   -PPPP
836   -BGW
837   -BW
838   -GBW
839   -KBW:s1
840   -KBW:s3
841   -KW:s1
842   -BHP
843   -CZMP
844   -AA:s1
845   -BCh
846   -
847   -
848   -
849   -
850   -
851   -
852   -
853   -
854   -
855   -
856   -
857   -
858   -
859   -
860   -
861   -
862   -
863   -
morphology/data/fonetics_en.dic 0 → 100644
  1 +@symbols
  2 +ω a ą e ę o ó u
  3 +δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε
  4 +γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε
  5 +ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε
  6 +μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε
  7 +λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε
  8 +κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε
  9 +σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε
  10 +ρ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε ε
  11 +
  12 +@rev_symbols
  13 +ω a ą e ę o ó u
  14 +δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε
  15 +λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε
  16 +κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε
  17 +ρ aε axε am′iε ąε eε ęε iε oε omε onε umε óvε uε ov′iε omε emε ε
  18 +
  19 +@rev_rules
  20 +#x ch ε
  21 +ks x
  22 +č tch
  23 +aj ay ρ
  24 +ej ey ρ
  25 +oj oy ρ
  26 +ł w
  27 +#v v
  28 +′ec iec ε
  29 +óv ów ε
  30 +ov′i owi ε
  31 +ov′e owie ε
  32 +am′i ami ε
  33 +ax ach ε
  34 +ře rze ε
  35 +t′e cie ε
  36 +n′e nie ε
  37 +d′e dzie ε
  38 +#ǯ ge ρ
  39 +#d de ρ
  40 +#s se ρ
  41 +#l le ρ
  42 +#s ce ρ
  43 +d de ε
  44 +d d ε
  45 +f ph ε
  46 +k ke ε
  47 +k k ε
  48 +k kes ε
  49 +k c
  50 +k ky ε
  51 +t te ε
  52 +t t ε
  53 +t th
  54 +t the ε
  55 +r re ε
  56 +r r ε
  57 +r rh ε
  58 +m me ε
  59 +m m ε
  60 +n ne ε
  61 +n n ε
  62 +s se ε
  63 +s s ε
  64 +s ce ε
  65 +s th ε
  66 +s the ε
  67 +os′t′ ość ε
  68 +ovy owy ε
  69 +k′i ki ε
  70 +
  71 +a a ε
  72 +a ah ε
  73 +ka kha ε
  74 +vja via ε
  75 +n′ja nia ε
  76 +li ly ε
  77 +g gh ε
  78 +g g ε
  79 +
  80 +ʲi ie ε
  81 +ʲi i ε
  82 +ʲi y ε
  83 +
  84 +@rules
  85 +#x ch ε
  86 +ks x
  87 +č tch
  88 +aj ay ρ
  89 +ej ey ρ
  90 +oj oy ρ
  91 +ł w
  92 +#v v
  93 +aʲi ai ε
  94 +oʲi oi ε
  95 +eʲi ei ε
  96 +uʲi ui ε
  97 +če cze ε
  98 +óv ów ε
  99 +ov′i owi ε
  100 +ov′e owie ε
  101 +am′i ami ε
  102 +ym′i ymi ε
  103 +ax ach ε
  104 +yx ych ε
  105 +ce ce ε
  106 +f′e fie ε
  107 +ře rze ε
  108 +t′e cie ε
  109 +kt′e kcie ε
  110 +s′t′e ście ε
  111 +m′e mie ε
  112 +n′e nie ε
  113 +d′e dzie ε
  114 +p′e pie ε
  115 +s′e sie ε
  116 +ks′e ksie ε
  117 +v′e vie ε
  118 +z′e zie ε
  119 +
  120 +g′i gi ε
  121 +k′ix kich ε
  122 +k′e kie ε
  123 +k′ej kiej ε
  124 +k′im kim ε
  125 +k′im′i kimi ε
  126 +k′i ki ε
  127 +k′i khi ε
  128 +ka ka ε
  129 +ką ką ε
  130 +ko ko ε
  131 +ku ku ε
  132 +lix lych ε
  133 +lim′i lymi ε
  134 +lim lym ε
  135 +řy rzy ε
  136 +m′i mi ε
  137 +#g′em ghiem ε
  138 +g′i ghi ε
  139 +
  140 +g′em giem ε
  141 +g g iem
  142 +k′em kiem ε
  143 +k k iem
  144 +d dh
  145 +g gh
  146 +g′ ghi em
  147 +l leigh
  148 +ʲi ie ch
  149 +ʲi ie mi
  150 +ʲi ie m
  151 +ʲi ee ch
  152 +ʲi ee mi
  153 +ʲego iego ε
  154 +#′ego iego ε
  155 +ʲemu iemu ε
  156 +ʲem iem ε
  157 +ʲim im ε
  158 +ʲix ich ε
  159 +ʲe ie ε
  160 +ʲim′i imi ε
  161 +ʲov′e iowie ε
  162 +x ch ε
  163 +m′i mi ε
  164 +
  165 +oł oe ’
  166 +b by ’
  167 +k′ ky ’
  168 +r ry ’
  169 +t thy ’
  170 +d dy ’
  171 +m my ’
  172 +s cy ’
  173 +l ly ’
  174 +d dy ’
  175 +s ce ’
  176 +d de ’
  177 +f fe ’
  178 +ǯ ge ’
  179 +k ke ’
  180 +l le ’
  181 +m me ’
  182 +n ne ’
  183 +r re ’
  184 +s se ’
  185 +t te ’
  186 +t the ’
  187 +v ve ’
  188 +ł we ’
  189 + ’ ρ
  190 +’ ’ owi
  191 +
  192 +s ce ε
  193 +d de ε
  194 +f fe ε
  195 +ǯ ge ε
  196 +k ke ε
  197 +l le ε
  198 +le le ε
  199 +m me ε
  200 +me me ε
  201 +n ne ε
  202 +r re ε
  203 +re re ε
  204 +s se ε
  205 +t te ε
  206 +v ve ε
  207 +ł we ε
  208 +
  209 +vja via ε
  210 +vją vią ε
  211 +vje vie ε
  212 +vję vię ε
  213 +vji vii ε
  214 +v′ij vij ε
  215 +vjom viom ε
  216 +vjo vio ε
  217 +vjax viach ε
  218 +vjam′i viami ε
  219 +n′ja nia ε
  220 +n′ją nią ε
  221 +n′je nie ε
  222 +n′ję nię ε
  223 +n′ji nii ε
  224 +n′ij nij ε
  225 +n′jom niom ε
  226 +n′jo nio ε
  227 +n′jax niach ε
  228 +n′jam′i niami ε
  229 +
  230 +os′t′ ość ε
  231 +os′t′ą ością ε
  232 +os′t′om ościom ε
  233 +os′t′am′i ościami ε
  234 +os′t′ax ościach ε
  235 +os′t′i ości ε
  236 +ovą ową ε
  237 +ovym′i owymi ε
  238 +ovym owym ε
  239 +ovyx owych ε
  240 +ovy owy ε
  241 +ovo owo ε
  242 +ov′i owi ε
  243 +ovemu owemu ε
  244 +ovej owej ε
  245 +ovego owego ε
  246 +ove owe ε
  247 +ova owa ε
  248 +oły owy ε
  249 +oła owa ε
  250 +
... ...
morphology/data/obce_langs.tab
... ... @@ -214,7 +214,6 @@ speedway en
214 214 runway en
215 215 replay en
216 216 permalloy en
217   -longplay en
218 217 jersey:s1 en
219 218 fairway en
220 219 cockney:s1 en
... ... @@ -227,21 +226,17 @@ joule fr
227 226 image fr
228 227 grunge en
229 228 grisaille fr
230   -freestyle en
231 229 entourage fr
232   -ensemble fr
233 230 decoupage fr
234 231 collége fr
235 232 college en
236 233 collage fr
237   -chippendale:s1 en
238 234 cartridge en
239 235 assemblage fr
240 236 penthouse en
241 237 pedicure fr
242 238 offside en
243 239 mainframe en
244   -iphone en
245 240 interface en
246 241 house en
247 242 high-life en
... ... @@ -339,7 +334,6 @@ Java fr
339 334 Inkatha en
340 335 Hertha de
341 336 Nemcova cz
342   -Mantegna it
343 337 Garcia es
344 338 casanova it
345 339 Venclova cz
... ... @@ -987,7 +981,7 @@ xantia fr
987 981 Xawery pl
988 982 Xenia pl
989 983 xero pl
990   -XML en
  984 +XML acro
991 985 Xymena pl
992 986 banjo en
993 987 banjola en
... ... @@ -1765,9 +1759,7 @@ Samaranch es
1765 1759 Toeplitz de
1766 1760 Wachholz de
1767 1761 chow-chow en
1768   -collie en
1769 1762 gourde fr
1770   -kelpie en
1771 1763 malinois fr
1772 1764 mirage fr
1773 1765 Armagnac fr
... ... @@ -1792,7 +1784,6 @@ zombie en
1792 1784 collie en
1793 1785 kelpie en
1794 1786 sheltie en
1795   -gourde fr
1796 1787 porsche de
1797 1788 back-office en
1798 1789 brie fr
... ... @@ -1841,3 +1832,746 @@ scotch en
1841 1832 cicerone it
1842 1833 gaucho es
1843 1834 intermezzo it
  1835 +XBW acro
  1836 +DJ acro
  1837 +VIP acro
  1838 +DOS acro
  1839 +HIV acro
  1840 +SMS acro
  1841 +STAR acro
  1842 +TIR acro
  1843 +UNIX acro
  1844 +CMYK acro
  1845 +SGML acro
  1846 +sms acro
  1847 +ADP acro
  1848 +ANSI acro
  1849 +API acro
  1850 +APS acro
  1851 +ASCII acro
  1852 +ATP acro
  1853 +AWF acro
  1854 +BBK acro
  1855 +BC acro
  1856 +BDK acro
  1857 +BFG acro
  1858 +BG acro
  1859 +BGK acro
  1860 +BH acro
  1861 +BHK acro
  1862 +BISE acro
  1863 +BMW acro
  1864 +BP:s1 acro
  1865 +BPH acro
  1866 +DNA acro
  1867 +NSA acro
  1868 +RNA acro
  1869 +RSHA acro
  1870 +TWA acro
  1871 +BIG acro
  1872 +BOŚ acro
  1873 +BŚ acro
  1874 +ChRL acro
  1875 +DHL acro
  1876 +EUROPOL acro
  1877 +FATAH acro
  1878 +GL acro
  1879 +GUC acro
  1880 +HDL acro
  1881 +HTML acro
  1882 +IBL acro
  1883 +KAI acro
  1884 +KAL acro
  1885 +KUL acro
  1886 +LDL acro
  1887 +LOK acro
  1888 +MAL acro
  1889 +BERD acro
  1890 +CAD acro
  1891 +NEC acro
  1892 +OAPEC acro
  1893 +OPEC acro
  1894 +SAAB acro
  1895 +SIMM acro
  1896 +AAP acro
  1897 +AFP acro
  1898 +AGH acro
  1899 +AI:s1 acro
  1900 +AI:s2 acro
  1901 +AK acro
  1902 +AP:s1 acro
  1903 +AP:s2 acro
  1904 +AR acro
  1905 +ARiMR acro
  1906 +ASP acro
  1907 +ATK acro
  1908 +AWS acro
  1909 +ABS acro
  1910 +ADM acro
  1911 +ADN acro
  1912 +AF acro
  1913 +AIDS acro
  1914 +AIF acro
  1915 +AL acro
  1916 +AM acro
  1917 +AMERBANK acro
  1918 +AON acro
  1919 +ASEAN acro
  1920 +ASR acro
  1921 +AZS acro
  1922 +BAAS acro
  1923 +BASF acro
  1924 +ANSA acro
  1925 +CEFTA acro
  1926 +CIA acro
  1927 +DPA acro
  1928 +EFTA acro
  1929 +ELTA acro
  1930 +ENEA acro
  1931 +EPA acro
  1932 +ETA acro
  1933 +INA acro
  1934 +IRA acro
  1935 +JANA acro
  1936 +KNA acro
  1937 +NAFTA acro
  1938 +NASA acro
  1939 +NRA acro
  1940 +OPA acro
  1941 +PA acro
  1942 +PIA acro
  1943 +RPA acro
  1944 +ABB acro
  1945 +ABC acro
  1946 +AC acro
  1947 +ATT acro
  1948 +BBC acro
  1949 +BCG acro
  1950 +AGD acro
  1951 +LP:s1 acro
  1952 +LZS acro
  1953 +MTP acro
  1954 +MŚ:s2 acro
  1955 +OHP acro
  1956 +PKP acro
  1957 +UN acro
  1958 +WSiP acro
  1959 +WZiZT acro
  1960 +USA acro
  1961 +ZEA acro
  1962 +CIT acro
  1963 +DAT acro
  1964 +FAT acro
  1965 +LOT acro
  1966 +NOT acro
  1967 +OIT acro
  1968 +PAGART acro
  1969 +PIT acro
  1970 +VAT acro
  1971 +WAT acro
  1972 +ZHR acro
  1973 +tv acro
  1974 +PIT:s1 acro
  1975 +ABBA acro
  1976 +APA acro
  1977 +FAMA acro
  1978 +LETTA acro
  1979 +MENA acro
  1980 +SABENA acro
  1981 +UEFA acro
  1982 +MKOl acro
  1983 +MOK acro
  1984 +MPiK acro
  1985 +MŚ:s1 acro
  1986 +NASK acro
  1987 +NHL acro
  1988 +NIK acro
  1989 +NKOl acro
  1990 +PAH acro
  1991 +PAL acro
  1992 +PESEL acro
  1993 +PIH acro
  1994 +PKOl acro
  1995 +PLL acro
  1996 +PŁ acro
  1997 +URL acro
  1998 +UŚ acro
  1999 +UŚl acro
  2000 +WIG acro
  2001 +BBWR acro
  2002 +BGŻ acro
  2003 +BIOS acro
  2004 +BN acro
  2005 +BOR acro
  2006 +BPS acro
  2007 +BR acro
  2008 +BRBM acro
  2009 +BRR acro
  2010 +BS acro
  2011 +BSR acro
  2012 +BZ acro
  2013 +CAF acro
  2014 +CAM acro
  2015 +CD-ROM acro
  2016 +COCOM acro
  2017 +CRZZ acro
  2018 +CWKS acro
  2019 +DAB acro
  2020 +DLS acro
  2021 +ICJ acro
  2022 +IPN acro
  2023 +ISBN acro
  2024 +ISDN acro
  2025 +ISSN acro
  2026 +KERM acro
  2027 +MSZ acro
  2028 +NIP acro
  2029 +ONZ acro
  2030 +PKS acro
  2031 +PSS acro
  2032 +RN:s1 acro
  2033 +SARS acro
  2034 +UJ acro
  2035 +UOP acro
  2036 +VHF acro
  2037 +VHS acro
  2038 +WAP acro
  2039 +WiP acro
  2040 +DM acro
  2041 +DS acro
  2042 +DVD-ROM acro
  2043 +EBOR acro
  2044 +EBOiR acro
  2045 +EPROM acro
  2046 +ERM acro
  2047 +ESOP acro
  2048 +FADOM acro
  2049 +FAS acro
  2050 +FOZZ acro
  2051 +FPŻ acro
  2052 +FSM acro
  2053 +GKS acro
  2054 +GM acro
  2055 +GOPR acro
  2056 +GPRS acro
  2057 +GPS acro
  2058 +GS acro
  2059 +GUS acro
  2060 +IFOR acro
  2061 +IMiD acro
  2062 +KBN acro
  2063 +KLM acro
  2064 +KM:s1 acro
  2065 +KOR acro
  2066 +KPN acro
  2067 +KSERM acro
  2068 +LAN acro
  2069 +MBOiR acro
  2070 +MDM acro
  2071 +MEN acro
  2072 +MF acro
  2073 +MGM:s1 acro
  2074 +MKS acro
  2075 +MKiDN acro
  2076 +MON acro
  2077 +MOSiR acro
  2078 +MS-DOS acro
  2079 +MTS acro
  2080 +NEP acro
  2081 +NSZ acro
  2082 +OBOP acro
  2083 +OCR acro
  2084 +ONR acro
  2085 +ORWN acro
  2086 +OZN acro
  2087 +PANAM acro
  2088 +PBKS acro
  2089 +PBR acro
  2090 +PFRON acro
  2091 +PGR acro
  2092 +PIN acro
  2093 +PIP:s1 acro
  2094 +PIW acro
  2095 +PKWN acro
  2096 +PKiN acro
  2097 +PPS acro
  2098 +PR:s1 acro
  2099 +PRON acro
  2100 +PRS acro
  2101 +PUR acro
  2102 +PWN acro
  2103 +PZPN acro
  2104 +PŻM acro
  2105 +RAF acro
  2106 +RAM acro
  2107 +REGON acro
  2108 +RFN acro
  2109 +ROM acro
  2110 +ROP acro
  2111 +ROR acro
  2112 +RPN acro
  2113 +RUM acro
  2114 +RUP acro
  2115 +RdR acro
  2116 +SAS acro
  2117 +SCMS acro
  2118 +SECAM acro
  2119 +SIM acro
  2120 +SKS acro
  2121 +SPF acro
  2122 +SPN acro
  2123 +SUDANAIR acro
  2124 +TPN acro
  2125 +TS acro
  2126 +UAM acro
  2127 +UKF acro
  2128 +UM acro
  2129 +UMCS acro
  2130 +UMTS acro
  2131 +UNICEF acro
  2132 +UNZ acro
  2133 +UPS acro
  2134 +URM acro
  2135 +URz acro
  2136 +US acro
  2137 +UWM acro
  2138 +UWr acro
  2139 +UZ acro
  2140 +VCR acro
  2141 +WAN acro
  2142 +WBZ acro
  2143 +ZZ acro
  2144 +ckm acro
  2145 +lkm acro
  2146 +scs acro
  2147 +wf acro
  2148 +ŁBR acro
  2149 +ŁKS acro
  2150 +ŚPN acro
  2151 +WEP acro
  2152 +WF acro
  2153 +WKR acro
  2154 +WOP acro
  2155 +WOPR acro
  2156 +WSM acro
  2157 +WiN acro
  2158 +ZAIKS acro
  2159 +ZASP acro
  2160 +ZBOWiD acro
  2161 +ZBoWiD acro
  2162 +ZMS acro
  2163 +ZOM acro
  2164 +ZOSP acro
  2165 +ZOZ acro
  2166 +ZS acro
  2167 +ZUS acro
  2168 +ZWM acro
  2169 +ZWZ acro
  2170 +BWZ acro
  2171 +HDZ acro
  2172 +MWGzZ acro
  2173 +MZ acro
  2174 +OPZZ acro
  2175 +ZSZ acro
  2176 +MB acro
  2177 +TAB acro
  2178 +ŻOB acro
  2179 +NRF acro
  2180 +PKF acro
  2181 +RMF acro
  2182 +SPATiF acro
  2183 +TKKF acro
  2184 +IBM acro
  2185 +KM:s2 acro
  2186 +MGM:s2 acro
  2187 +PAM acro
  2188 +PGM acro
  2189 +RM acro
  2190 +SM:s2 acro
  2191 +ŚAM acro
  2192 +CNN acro
  2193 +CPN acro
  2194 +FN acro
  2195 +KEN acro
  2196 +KRN acro
  2197 +LN acro
  2198 +MN acro
  2199 +PAN acro
  2200 +PN acro
  2201 +REN acro
  2202 +RN:s2 acro
  2203 +WRN:s1 acro
  2204 +WRON acro
  2205 +ZChN acro
  2206 +KEP acro
  2207 +KSAP acro
  2208 +LOP acro
  2209 +MEP acro
  2210 +MOP acro
  2211 +NAP acro
  2212 +PAP acro
  2213 +PIP:s2 acro
  2214 +WOŚP acro
  2215 +BOS acro
  2216 +CBOS acro
  2217 +CBS acro
  2218 +ITAR-TASS acro
  2219 +KRS acro
  2220 +KRUS acro
  2221 +MPiPS acro
  2222 +MS acro
  2223 +NZS acro
  2224 +PBS acro
  2225 +PDS acro
  2226 +PS acro
  2227 +PiS acro
  2228 +RAS acro
  2229 +SGPiS acro
  2230 +BAV acro
  2231 +BUW acro
  2232 +TOZ acro
  2233 +BJ acro
  2234 +PTJ acro
  2235 +TKJ acro
  2236 +PAI acro
  2237 +RAI acro
  2238 +SKOK acro
  2239 +SOK acro
  2240 +PPL acro
  2241 +PRL acro
  2242 +PSL acro
  2243 +SDKPiL acro
  2244 +ZSL acro
  2245 +PŚ acro
  2246 +KPZR acro
  2247 +LPR acro
  2248 +MRR acro
  2249 +PPR acro
  2250 +PZPR acro
  2251 +TOPR acro
  2252 +TPPR acro
  2253 +TUR acro
  2254 +UPR acro
  2255 +GATT acro
  2256 +KRRiT acro
  2257 +PAT acro
  2258 +PIT:s2 acro
  2259 +FIFA acro
  2260 +S.A. acro
  2261 +AA:s2 acro
  2262 +BA acro
  2263 +BSA acro
  2264 +MSWiA acro
  2265 +NBA acro
  2266 +NWZA acro
  2267 +RCA acro
  2268 +SAA acro
  2269 +SA acro
  2270 +UPA acro
  2271 +WZA acro
  2272 +YMCA acro
  2273 +YWCA acro
  2274 +KGB acro
  2275 +OB acro
  2276 +PB acro
  2277 +PKB acro
  2278 +PNB acro
  2279 +SB acro
  2280 +UB acro
  2281 +USB acro
  2282 +UwB acro
  2283 +CFC acro
  2284 +C acro
  2285 +FC acro
  2286 +GMC acro
  2287 +KC acro
  2288 +LC acro
  2289 +NBC acro
  2290 +OC acro
  2291 +PC acro
  2292 +PVC acro
  2293 +UC acro
  2294 +USC acro
  2295 +WC acro
  2296 +WTC acro
  2297 +FDD acro
  2298 +HDD acro
  2299 +KLD acro
  2300 +KRLD acro
  2301 +LCD acro
  2302 +LSD acro
  2303 +ND acro
  2304 +NKWD acro
  2305 +NPD acro
  2306 +NRD acro
  2307 +OECD acro
  2308 +OPD acro
  2309 +PKD acro
  2310 +SChD acro
  2311 +SD acro
  2312 +SLD acro
  2313 +SPD:s1 acro
  2314 +SPD:s2 acro
  2315 +TPD acro
  2316 +WKD acro
  2317 +BRE acro
  2318 +BSE acro
  2319 +BWE acro
  2320 +CINTE acro
  2321 +IDE acro
  2322 +KBWE acro
  2323 +LE acro
  2324 +OFE acro
  2325 +PHARE acro
  2326 +PTE acro
  2327 +PZE acro
  2328 +RE acro
  2329 +RWE acro
  2330 +UE acro
  2331 +dBASE acro
  2332 +SF acro
  2333 +DBG acro
  2334 +EEG acro
  2335 +EKG acro
  2336 +EMG acro
  2337 +EWG acro
  2338 +KG acro
  2339 +MG acro
  2340 +PBG acro
  2341 +PGNiG acro
  2342 +PG acro
  2343 +PZG acro
  2344 +RWPG acro
  2345 +SG acro
  2346 +UG acro
  2347 +USG acro
  2348 +LH acro
  2349 +OSH acro
  2350 +PBH acro
  2351 +PH acro
  2352 +PZH acro
  2353 +SGH acro
  2354 +ZGH acro
  2355 +pH acro
  2356 +FBI acro
  2357 +ITI acro
  2358 +NFI acro
  2359 +SI acro
  2360 +TPI acro
  2361 +UPI acro
  2362 +WSI acro
  2363 +BSK acro
  2364 +MPK acro
  2365 +MZK acro
  2366 +PBK acro
  2367 +PCK acro
  2368 +PK acro
  2369 +PTTK acro
  2370 +SDK acro
  2371 +UMK acro
  2372 +WBK acro
  2373 +WSK acro
  2374 +ZK acro
  2375 +MPO acro
  2376 +NATO acro
  2377 +ORMO acro
  2378 +PKO acro
  2379 +PLO acro
  2380 +PO acro
  2381 +PeKaO acro
  2382 +RGO acro
  2383 +ROPCiO acro
  2384 +SEATO acro
  2385 +SLO acro
  2386 +SO acro
  2387 +UFO acro
  2388 +UNESCO acro
  2389 +UNO acro
  2390 +UO acro
  2391 +WHO acro
  2392 +WMO acro
  2393 +WTO acro
  2394 +ZOMO acro
  2395 +IQ acro
  2396 +Q acro
  2397 +PPP acro
  2398 +SDP acro
  2399 +SMJP acro
  2400 +SPP:s1 acro
  2401 +SP acro
  2402 +SdRP acro
  2403 +TP:s1 acro
  2404 +TP:s2 acro
  2405 +TWP acro
  2406 +TZSP acro
  2407 +UP acro
  2408 +WNP acro
  2409 +WP acro
  2410 +WSP acro
  2411 +ZHP acro
  2412 +ZLP acro
  2413 +ZMP acro
  2414 +ZNP acro
  2415 +ZSMP acro
  2416 +ZSP acro
  2417 +PCR acro
  2418 +PR:s2 acro
  2419 +SGGW-AR acro
  2420 +ZSRR acro
  2421 +ZSSR acro
  2422 +CGS acro
  2423 +IS acro
  2424 +SOS acro
  2425 +SS:s1 acro
  2426 +DDT acro
  2427 +MT acro
  2428 +TNT acro
  2429 +UHT acro
  2430 +UPT acro
  2431 +CDU acro
  2432 +CKU acro
  2433 +CPU acro
  2434 +CSU acro
  2435 +EBU acro
  2436 +ECU acro
  2437 +GPU acro
  2438 +PAU acro
  2439 +PZU acro
  2440 +WKU acro
  2441 +CV acro
  2442 +PCV acro
  2443 +PRiTV acro
  2444 +TGV acro
  2445 +KW:s3 acro
  2446 +MFW acro
  2447 +MPW acro
  2448 +MRiRW acro
  2449 +MSW acro
  2450 +NW acro
  2451 +PCW acro
  2452 +POW acro
  2453 +PW acro
  2454 +RSW acro
  2455 +SGGW acro
  2456 +TW acro
  2457 +UKSW acro
  2458 +UW:s1 acro
  2459 +UW:s2 acro
  2460 +VW acro
  2461 +WFSW acro
  2462 +WSW acro
  2463 +ŻW:s1 acro
  2464 +ŻZW acro
  2465 +LZ acro
  2466 +UŁ acro
  2467 +CPLiA acro
  2468 +SAPA acro
  2469 +UNRRA acro
  2470 +VGA acro
  2471 +ChD acro
  2472 +FIDE acro
  2473 +KE acro
  2474 +FK acro
  2475 +PTK acro
  2476 +SM:s1 acro
  2477 +TVN acro
  2478 +FAO acro
  2479 +FSO acro
  2480 +ISO acro
  2481 +KKO:s2 acro
  2482 +KRO acro
  2483 +MO acro
  2484 +BP:s5 acro
  2485 +BSP:s2 acro
  2486 +DOKP acro
  2487 +FDP:s1 acro
  2488 +KPP acro
  2489 +KWP acro
  2490 +NSDAP acro
  2491 +OSP acro
  2492 +OWP:s2 acro
  2493 +PKZP acro
  2494 +PPPP acro
  2495 +RP acro
  2496 +TVP acro
  2497 +SS:s2 acro
  2498 +WFTU acro
  2499 +CTV acro
  2500 +MTV acro
  2501 +TV acro
  2502 +KW:s1 acro
  2503 +MW acro
  2504 +WWW acro
  2505 +ŻW:s2 acro
  2506 +NZ acro
  2507 +W-Z acro
  2508 +HB acro
  2509 +ZPB acro
  2510 +CD acro
  2511 +PKLD acro
  2512 +GBH acro
  2513 +EBI acro
  2514 +PCI acro
  2515 +CK acro
  2516 +GBGK acro
  2517 +MCK acro
  2518 +MDK acro
  2519 +TK acro
  2520 +JAL acro
  2521 +NCL acro
  2522 +QAL acro
  2523 +GSM acro
  2524 +KGHM acro
  2525 +REM acro
  2526 +ON acro
  2527 +PEN acro
  2528 +PKN acro
  2529 +WRN:s2 acro
  2530 +GIODO acro
  2531 +KKO:s1 acro
  2532 +RPO acro
  2533 +BSP:s1 acro
  2534 +CKŻP acro
  2535 +DBP acro
  2536 +EP acro
  2537 +HP acro
  2538 +HSP acro
  2539 +HTTP acro
  2540 +IP acro
  2541 +KP:s1 acro
  2542 +KP:s2 acro
  2543 +MP:s1 acro
  2544 +NBP acro
  2545 +ORP acro
  2546 +OWP:s1 acro
  2547 +SPP:s2 acro
  2548 +UTP acro
  2549 +LR acro
  2550 +NMT acro
  2551 +PDT acro
  2552 +RPU acro
  2553 +DTV acro
  2554 +UV acro
  2555 +BW acro
  2556 +GBW acro
  2557 +KBW:s3 acro
  2558 +SW acro
  2559 +UKW acro
  2560 +NSZZ acro
  2561 +CZD acro
  2562 +DVD acro
  2563 +CO acro
  2564 +HBO acro
  2565 +LO acro
  2566 +DTP acro
  2567 +FDP:s2 acro
  2568 +FTP acro
  2569 +LP:s3 acro
  2570 +MP:s2 acro
  2571 +MSP acro
  2572 +BGW acro
  2573 +KBW:s1 acro
  2574 +BHP acro
  2575 +CZMP acro
  2576 +AA:s1 acro
  2577 +BCh acro
... ...
morphology/dict.ml
... ... @@ -161,6 +161,26 @@ let split_dict in_path filename out_path =
161 161 fprintf file "%s\t%s\t%s\n" form.orth entry.lemma form.interp
162 162 with Not_found -> ()))))))
163 163  
  164 +let split_language lang_filename in_path filename out_path =
  165 + let map = File.fold_tab lang_filename StringMap.empty (fun map -> function
  166 + [lemma; lang] -> StringMap.add_inc map lemma lang (fun lang2 -> print_endline ("split_language: " ^ lemma ^ " " ^ lang ^ " " ^ lang2); lang)
  167 + | line -> failwith ("split_language: " ^ (String.concat "\t" line))) in
  168 + let dict = load_tab (in_path ^ filename) in
  169 + let dict = List.rev (assign_entry_cat dict) in
  170 + let filename = if Xstring.check_sufix ".gz" filename then
  171 + Xstring.cut_sufix ".gz" filename else filename in
  172 + let dict_map = Xlist.fold dict StringMap.empty (fun dict_map e ->
  173 + try
  174 + let lang = StringMap.find map e.lemma in
  175 + StringMap.add_inc dict_map lang [e] (fun l -> e :: l)
  176 + with Not_found -> dict_map) in
  177 + StringMap.iter dict_map (fun lang dict ->
  178 + File.file_out (out_path ^ "lang_" ^ lang ^ "_" ^ filename) (fun file ->
  179 + Xlist.iter dict (fun entry ->
  180 + let form = get_form entry in
  181 + fprintf file "%s\t%s\t%s\n" form.orth entry.lemma form.interp)))
  182 +
  183 +
164 184 let merge_entries dict =
165 185 let dict = assign_entry_cat dict in
166 186 let map = Xlist.fold dict StringMap.empty (fun map entry ->
... ... @@ -318,6 +338,9 @@ let merge_interps lemma forms =
318 338 | "y",["subst:pl:gen:m3";"subst:pl:loc:m3"] -> {empty_form with orth=orth; interp="subst:pl:gen.loc:m3"} :: forms
319 339 | "y",["subst:pl:dat:m3";"subst:sg:inst:m3";"subst:sg:loc:m3"] -> {empty_form with orth=orth; interp="subst:sg:inst.loc:m3|subst:pl:dat:m3"} :: forms
320 340 | "y",["subst:sg:dat.loc:f";"subst:sg:gen:f";"subst:sg:nom:f";"subst:sg:voc:f"] -> {empty_form with orth=orth; interp="subst:sg:nom.gen.dat.loc.voc:f"} :: forms
  341 +(* en *) | "y",["subst:sg:loc:m2";"subst:sg:voc:m2"]-> {empty_form with orth=orth; interp="subst:sg:loc.voc:m2"} :: forms
  342 +(* en *) | "y",["subst:sg:gen:m3";"subst:sg:loc:m3";"subst:sg:voc:m3"] -> {empty_form with orth=orth; interp="subst:sg:loc.voc:m3"} :: {empty_form with orth=orth; interp="subst:sg:gen:m3"} :: forms
  343 +(* en *) | "y",["subst:sg:loc:m3";"subst:sg:voc:m3"]-> {empty_form with orth=orth; interp="subst:sg:loc.voc:m3"} :: forms
321 344 | "e",["depr:pl:nom.acc.voc:m2";"subst:sg:nom:m1";"subst:sg:voc:m1"] -> {empty_form with orth=orth; interp="subst:sg:nom.voc:m1|depr:pl:nom.acc.voc:m2"} :: forms
322 345 | "e",["depr:pl:nom.acc.voc:m2";"subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1";"subst:sg:nom:m1";"subst:sg:voc:m1"] -> {empty_form with orth=orth; interp="subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1|depr:pl:nom.acc.voc:m2"} :: {empty_form with orth=orth; interp="subst:sg:nom.voc:m1|depr:pl:nom.acc.voc:m2"} :: forms
323 346 | "e",["subst:sg:gen.acc:m1";"subst:sg:gen:m1"] -> {empty_form with orth=orth; interp="subst:sg:gen.acc:m1"} :: forms
... ... @@ -541,7 +564,14 @@ let exceptional_lemmata = StringSet.of_list ([
541 564 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
542 565 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
543 566 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*)
544   - ] @ File.load_lines "../morphology/data/obce.tab" @ File.load_lines "../morphology/data/akronimy.tab" @
  567 + "unixowy"; "unixowość"; "survivalowy"; "survivalowość"; "survivalowiec"; "software’owy"; "software’owość"; "software’owo"; "rock’n’rollowy"; "rock’n’rollowość"; "rock’n’rollowo"; "qumrańskość";
  568 + "qumrański"; "quizowy"; "quizowość"; "queerowy"; "queerowość"; "quadowy"; "quadowiec"; "oxfordzkość"; "oxfordzki"; "novellowskość"; "novellowski"; "nieunixowy";
  569 + "nieunixowość"; "niesurvivalowy"; "niesurvivalowość"; "niesoftware’owy"; "niesoftware’owość"; "nierock’n’rollowy"; "nierock’n’rollowość"; "nierock’n’rollowo"; "niequmrańskość";
  570 + "niequmrański"; "niequizowy"; "niequizowość"; "niequeerowość"; "niequeerowo"; "niequadowy"; "nieoxfordzkość"; "nieoxfordzki"; "nienovellowskość"; "nienovellowski"; "nienewage'owy"; "nieliverpoolskość";
  571 + "nieliverpoolski"; "niekickboxingowy"; "niekickboxingowość"; "nieheavymetalowy"; "nieheavymetalowość"; "nieheavymetalowo"; "nieharvardzkość"; "nieharvardzki"; "niedeveloperskość";
  572 + "niedeveloperski"; "niedaviscupowy"; "niedaviscupowość"; "niebrexitowość"; "nieampexowy"; "nieampexowość"; "newage'owy"; "newage'owość"; "newage'owo"; "liverpoolskość"; "liverpoolski";
  573 + ] (*@ File.load_lines "../morphology/data/obce.tab" @ File.load_lines "../morphology/data/akronimy.tab" @*)
  574 + @ File.fold_tab "../morphology/data/obce_langs.tab" [] (fun l x -> List.hd x :: l) @
545 575 (*File.load_lines "../morphology/data/nieregularne.tab" @ File.load_lines "results/interp_validated_verb.tab" @ *)
546 576 (*File.load_lines "results/interp_validated_noun.tab" @ File.load_lines "results/interp_validated_adj.tab" @
547 577 File.load_lines "../morphology/data/validated_adj.tab" @ File.load_lines "../morphology/data/validated_noun.tab" @
... ... @@ -557,6 +587,11 @@ let remove_exceptional_lemmata_gen ex dict =
557 587 if StringSet.mem ex entry.lemma then dict
558 588 else entry :: dict)
559 589  
  590 +(* let select_lemmata set dict =
  591 + Xlist.fold dict [] (fun dict entry ->
  592 + if StringSet.mem set entry.lemma then entry :: dict
  593 + else dict) *)
  594 +
560 595 let generate_stem dict =
561 596 Xlist.rev_map dict (fun entry ->
562 597 {entry with stem=
... ... @@ -589,10 +624,10 @@ let fonetic_translation dict =
589 624 try
590 625 (* let lemma = Stem.simplify_lemma e.lemma in *)
591 626 (* let phon_lemma = Fonetics.translate_and_check true Fonetics.rules Fonetics.rev_rules lemma in *)
592   - let phon_stem = Fonetics.translate_and_check true Fonetics.rules Fonetics.rev_rules e.stem in
  627 + let phon_stem = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) e.stem in
593 628 {e with (*phon_lemma = phon_lemma;*) phon_stem=Xlist.map phon_stem (fun s -> s.phon);
594 629 forms = Xlist.map e.forms (fun f ->
595   - let phon_orth = Fonetics.translate_and_check true Fonetics.rules Fonetics.rev_rules f.orth in
  630 + let phon_orth = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) f.orth in
596 631 {f with phon_orth = phon_orth})} :: dict
597 632 with
598 633 Fonetics.NotFound(x,s) -> printf "NF %s %s %s\n%!" e.lemma x s; dict
... ... @@ -616,8 +651,9 @@ let phon_validate rules dict =
616 651 let forms = Xlist.rev_map entry.forms (fun form ->
617 652 let candidates = Xlist.fold form.phon_orth [] (fun candidates s ->
618 653 Xlist.fold (Rules.CharTrees.find rules s.phon) candidates (fun candidates (stem,rule) ->
619   - let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) s.mapping in
620   - if candidate_lemma = simple_lemma then (stem,rule) :: candidates else candidates)) in
  654 + let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) s.mapping in
  655 + let b = Xlist.fold candidate_lemmas false (fun b candidate_lemma -> candidate_lemma = simple_lemma || b) in
  656 + if b then (stem,rule) :: candidates else candidates)) in
621 657 if candidates = [] then {form with validated=false} else {form with validated=true}) in
622 658 {entry with forms=forms})
623 659  
... ... @@ -642,10 +678,14 @@ let validate_interp rules dict =
642 678 Xlist.fold (Rules.CharTrees.find rules s.phon) candidates (fun candidates (stem,rule) ->
643 679 (* if rule.star = Ndm && not entry.ndm then candidates else
644 680 if rule.star <> Ndm && entry.ndm then candidates else *)
645   - let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) s.mapping in
  681 + (* let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) s.mapping in
646 682 if candidate_lemma = simple_lemma && form.interp = rule.interp then
  683 + (stem,rule) :: candidates else candidates)) in *)
  684 + let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) s.mapping in
  685 + let b = Xlist.fold candidate_lemmas false (fun b candidate_lemma -> candidate_lemma = simple_lemma || b) in
  686 + if b && form.interp = rule.interp then
647 687 (stem,rule) :: candidates else candidates)) in
648   - if candidates = [] then {form with validated=false} else {form with validated=true}) in
  688 + if candidates = [] then ((*printf "validate_interp: %s\t%s\t%s\n" form.orth entry.lemma form.interp;*) {form with validated=false}) else {form with validated=true}) in
649 689 {entry with forms=forms})
650 690  
651 691 let remove_validated_forms dict =
... ...
morphology/fonetics.ml
... ... @@ -65,8 +65,9 @@ let prepare_rules symbol_defs rules =
65 65 CharTree.create rules
66 66  
67 67 let rules, rev_rules =
68   - let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_acro.dic" in
  68 + (* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_acro.dic" in *)
69 69 (* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_pl.dic" in *)
  70 + let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_en.dic" in
70 71 prepare_rules symbol_defs rules,
71 72 prepare_rules rev_symbol_defs rev_rules
72 73  
... ... @@ -143,8 +144,10 @@ let translate closure rules s =
143 144 (* printf "translate 2: %s\n%!" s; *)
144 145 s*)
145 146  
146   -let print_phon p =
147   - Printf.printf "%s %s\n" p.Types.phon (String.concat " " (Xlist.map p.Types.mapping (fun (a,b) -> a ^ "->" ^ b)))
  147 +let string_of_phon p =
  148 + Printf.sprintf "%s %s" p.Types.phon (String.concat " " (Xlist.map p.Types.mapping (fun (a,b) -> a ^ "->" ^ b)))
  149 +
  150 +let print_phon p = print_endline (string_of_phon p)
148 151  
149 152 (*let _ = translate rules "blafickie"
150 153 let _ = translate rules "blafiacki"
... ... @@ -183,11 +186,19 @@ let translate_single closure rules x =
183 186  
184 187 let rec rev_translate_rec x s = function
185 188 [] -> x,s,[]
  189 + | (_,"") :: m -> rev_translate_rec x s m
186 190 | (a,b) :: m ->
187   - if Xstring.check_prefix a s then rev_translate_rec (x^b) (Xstring.cut_prefix a s) m
  191 + if Xstring.check_prefix b s then rev_translate_rec (x^a) (Xstring.cut_prefix b s) m
188 192 else x,s,m
189 193  
190 194 let rev_translate closure rev_rules s m =
191 195 let x,s,_ = rev_translate_rec "" s m in
192 196 if s = "" then x else
193 197 x ^ (translate_single closure rev_rules s)
  198 +
  199 +let rev_translate2 closure rev_rules s m =
  200 + let x,s,_ = rev_translate_rec "" s m in
  201 + if s = "" then [x] else
  202 + let l = translate closure rev_rules s in
  203 + if l = [] then raise (NotFound(s,"")) else
  204 + Xlist.rev_map l (fun y -> x ^ y.Types.phon)
... ...
morphology/generate.ml
... ... @@ -135,6 +135,7 @@ let _ =
135 135 (* Dict.split_dict sgjp_path sgjp_filename201607 results_path; *)
136 136 (* Dict.split_dict sgjp_path sgjp_filename201605 results_path; *)
137 137 (* Dict.split_dict sgjp_path polimorf_filename results_path; *)
  138 + (* Dict.split_language "data/obce_langs.tab" sgjp_path sgjp_filename results_path; *)
138 139 ()
139 140  
140 141 (* Usunięcie form z prefixami *)
... ... @@ -274,6 +275,7 @@ let _ =
274 275 find_not_validated_entries compound_rule_trees results_path verb_polimorf_filename "results/not_validated_p_verb.tab";
275 276 find_not_validated_forms compound_rule_trees results_path verb_polimorf_filename "results/not_validated_p_verb2.tab"; *)
276 277 (* find_not_validated_forms compound_rule_trees results_path "sgjp_selected.tab" "results/not_validated_verb.tab"; *)
  278 + find_not_validated_forms compound_rule_trees results_path "lang_en_sgjp-20170730.tab" "results/lang_en.tab";
277 279 ()
278 280  
279 281 let find_not_validated_lemmata rules path filename out_filename =
... ... @@ -313,13 +315,13 @@ let test_lemmatize lemma orth =
313 315 printf "test_lemmatize: %s %s\n%!" lemma orth;
314 316 let simple_lemma = Stem.simplify_lemma lemma in
315 317 let phon_orths = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) orth in
316   - printf "phon_orths: %s\n%!" (String.concat " " (Xlist.map phon_orths (fun s -> s.phon)));
  318 + printf "phon_orths: \n %s\n%!" (String.concat "\n " (Xlist.map phon_orths Fonetics.string_of_phon));
317 319 Xlist.iter phon_orths (fun phon_orth ->
318 320 Xlist.iter (Rules.CharTrees.find compound_rule_trees phon_orth.phon) (fun (stem,rule) ->
319   - let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) (phon_orth.mapping) in
320   - printf " %s %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule) candidate_lemma;
321   - if candidate_lemma = simple_lemma then
322   - printf "E %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule)))
  321 + let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) (phon_orth.mapping) in
  322 + Xlist.iter candidate_lemmas (fun candidate_lemma ->
  323 + if candidate_lemma = simple_lemma then printf "E" else printf " ";
  324 + printf " %s %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule) candidate_lemma)))
323 325  
324 326 (* Sprawdzenie przebiegu lematyzacji *)
325 327 let _ =
... ... @@ -380,7 +382,27 @@ let _ =
380 382 (* test_lemmatize "WAT" "Wacie";
381 383 test_lemmatize "WAT" "WACIE";
382 384 test_lemmatize "WAT" "WAcie"; *)
383   - test_lemmatize "BOŚ" "BOŚ-u";
  385 + (* test_lemmatize "BOŚ" "BOŚ-u"; *)
  386 + (* test_lemmatize "upgrade" "upgrade’om"; *)
  387 + (* test_lemmatize "software" "software’y"; *)
  388 + (* test_lemmatize "spray" "sprayu"; *)
  389 + (* test_lemmatize "unixowość" "unixowościach"; *)
  390 + (* test_lemmatize "rolls-royce" "rolls-roysie"; *)
  391 + (* test_lemmatize "Arrow" "Arrowa";*)
  392 + (* test_lemmatize "Boy" "Boyowie"; *)
  393 + (* test_lemmatize "Chomsky" "Chomskiego"; *)
  394 + (* test_lemmatize "Bradbury" "Bradburych"; *)
  395 + (* test_lemmatize "compact" "compakcie";
  396 + test_lemmatize "Dixa" "Diksie"; *)
  397 + (* test_lemmatize "developerski" "developersko"; *)
  398 + (* test_lemmatize "Jessica" "Jessice"; *)
  399 + (* test_lemmatize "Gurkha" "Gurce";
  400 + test_lemmatize "Gurkha" "Gurkhi"; *)
  401 + (* test_lemmatize "heavymetalowiec" "heavymetalowca"; *)
  402 + (* test_lemmatize "niesoftware’owy" "niesoftware’owi"; *)
  403 + (* test_lemmatize "" "";
  404 + test_lemmatize "" "";
  405 + test_lemmatize "" ""; *)
384 406 ()
385 407  
386 408 (* Generowanie reguł *)
... ... @@ -438,6 +460,15 @@ let find_not_interp_validated_entries interp_rules path filename out_filename =
438 460 let dict = Dict.remove_validated_entries dict in
439 461 Dict.print out_filename dict
440 462  
  463 +let find_not_interp_validated_forms interp_rules path filename out_filename =
  464 + let dict = Dict.load_tab (path ^ filename) in
  465 + let dict = Dict.merge_entries dict in
  466 + let dict = Dict.process_interps dict in
  467 + let dict = Dict.fonetic_translation dict in
  468 + let dict = Dict.validate_interp interp_rules dict in
  469 + let dict = Dict.remove_validated_forms dict in
  470 + Dict.print out_filename dict
  471 +
441 472 (* Wypisanie lematów ze zwalidowaną interpretacją *)
442 473 let _ =
443 474 (* find_interp_validated_lemmata interp_compound_rule_trees results_path noun_sgjp_filename "results/interp_validated_noun.tab";
... ... @@ -454,6 +485,7 @@ let _ =
454 485 (* find_not_interp_validated_lemmata interp_compound_rule_trees results_path "sgjp_selected.tab" "results/interp_not_validated_verb.tab"; *)
455 486 (* find_not_interp_validated_entries interp_compound_rule_trees results_path verb_sgjp_filename "results/selected_verb.tab"; *)
456 487 (* find_not_interp_validated_entries interp_compound_rule_trees results_path "verb_sgjp_no_pref.tab" "results/selected_verb.tab"; *)
  488 + (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_en_sgjp-20170730.tab" "results/lang_en.tab"; *)
457 489 ()
458 490  
459 491 (* Generowanie reguł dla interpretacji *)
... ...