Commit 19e22b7d7cd8316ce41c40a7f15bf71a81dffded
1 parent
72bb98ec
import przymiotników i przysłówków od Witka
Showing
2 changed files
with
86 additions
and
13 deletions
dictionary/export.py
... | ... | @@ -289,9 +289,9 @@ where ls.slownik in (%(vocabs)s) and %(antivocabs)s %(x_qual)s %(table_clause)s |
289 | 289 | % ', '.join(str(id) for id in self.refls), |
290 | 290 | 'attr_clauses': ' and '.join(attr_clauses) |
291 | 291 | } |
292 | - if tt.name == 'czasowniki': | |
293 | - print >>sys.stderr, query | |
294 | - print >>sys.stderr, [tt.id] + params + list(tt_c + cell_c + cell_c) | |
292 | + # if tt.name == 'czasowniki': | |
293 | + # print >>sys.stderr, query | |
294 | + # print >>sys.stderr, [tt.id] + params + list(tt_c + cell_c + cell_c) | |
295 | 295 | cursor.execute( |
296 | 296 | query, [tt.id] + params + list(tt_c + cell_c + cell_c)) |
297 | 297 | for row in cursor: |
... | ... |
dictionary/management/commands/import_witek.py
1 | 1 | # -*- coding: utf-8 -*- |
2 | +import sys | |
2 | 3 | from django.core.management.base import BaseCommand |
3 | 4 | from django.db import transaction |
4 | 5 | from django.db.models import Max |
5 | 6 | from common.util import uniopen, no_history |
6 | 7 | from dictionary.models import Lexeme, Vocabulary, LexemeInflectionPattern, \ |
7 | - Pattern, Qualifier, ClassificationValue, LexemeCV, Gender | |
8 | + Pattern, Qualifier, ClassificationValue, LexemeCV, Gender, \ | |
9 | + LexemeAttributeValue, CrossReferenceType, CrossReference | |
8 | 10 | |
9 | 11 | |
10 | 12 | class Command(BaseCommand): |
... | ... | @@ -25,10 +27,31 @@ def import_lexemes(lines, comment): |
25 | 27 | elements = line.strip().split(';') |
26 | 28 | if elements[1] == 'subst': |
27 | 29 | import_subst(elements, comment) |
30 | + elif elements[1] == 'adv': | |
31 | + import_adv(elements, comment) | |
32 | + elif elements[1] == 'adj': | |
33 | + import_adj(elements, comment) | |
28 | 34 | transaction.commit() |
29 | 35 | transaction.leave_transaction_management() |
30 | 36 | |
31 | 37 | |
38 | +next_id = None | |
39 | + | |
40 | +def new_lexeme(entry, pos, comment): | |
41 | + global next_id | |
42 | + if next_id: | |
43 | + next_id += 1 | |
44 | + else: | |
45 | + next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1 | |
46 | + l = Lexeme( | |
47 | + id=next_id, entry=entry, part_of_speech_id=pos, | |
48 | + status=Lexeme.STATUS_DESCRIBED, owner_vocabulary_id=WSJP, | |
49 | + comment=comment) | |
50 | + l.save() | |
51 | + WSJP.add_lexeme(l) | |
52 | + return l | |
53 | + | |
54 | + | |
32 | 55 | def import_subst(elements, comment): |
33 | 56 | try: |
34 | 57 | entry, pos, gender, pattern_data, commonness = elements |
... | ... | @@ -38,14 +61,7 @@ def import_subst(elements, comment): |
38 | 61 | assert pos == 'subst' |
39 | 62 | gender = Gender.objects.get(symbol=gender) |
40 | 63 | lip_data = [p.rsplit(' ', 1) for p in pattern_data.split('/')] |
41 | - next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1 | |
42 | - l = Lexeme( | |
43 | - id=next_id, entry=entry, part_of_speech_id='subst', | |
44 | - status=Lexeme.STATUS_DESCRIBED, owner_vocabulary_id=WSJP, | |
45 | - comment=comment) | |
46 | - l.fix_homonym_number() | |
47 | - l.save() | |
48 | - WSJP.add_lexeme(l) | |
64 | + l = new_lexeme(entry, 'subst', comment) | |
49 | 65 | comm_value = ClassificationValue.objects.get( |
50 | 66 | classification__name=u'pospolitość', label=commonness) |
51 | 67 | LexemeCV.objects.create(lexeme=l, classification_value=comm_value) |
... | ... | @@ -62,4 +78,61 @@ def import_subst(elements, comment): |
62 | 78 | raise ValueError(u"%s: can't find root" % repr(entry)) |
63 | 79 | lip.save() |
64 | 80 | if qualifier: |
65 | - lip.qualifiers.add(Qualifier.objects.get(label=qualifier)) | |
66 | 81 | \ No newline at end of file |
82 | + lip.qualifiers.add(Qualifier.objects.get(label=qualifier)) | |
83 | + | |
84 | + | |
85 | +NDM = Pattern.objects.get(name='ndm') | |
86 | +ADVADJ = CrossReferenceType.objects.get(symbol='advadj') | |
87 | + | |
88 | +def import_adv(elements, comment): | |
89 | + try: | |
90 | + entry, pos, pattern_name, adj_entry = elements | |
91 | + except ValueError: | |
92 | + print elements | |
93 | + raise | |
94 | + assert pos == 'adv' and pattern_name == 'ndm' | |
95 | + l = new_lexeme(entry, 'adv', comment) | |
96 | + lip = LexemeInflectionPattern(lexeme=l, index=1, pattern=NDM) | |
97 | + lip.root = lip.get_root() | |
98 | + lip.save() | |
99 | + adjs = Lexeme.objects.filter(entry=adj_entry) | |
100 | + if len(adjs) == 1: | |
101 | + adj = adjs.get() | |
102 | + CrossReference.objects.create(from_lexeme=l, to_lexeme=adj, type=ADVADJ) | |
103 | + else: | |
104 | + if len(adjs) == 0: | |
105 | + print >>sys.stderr, 'Brak przymiotnika: %s (%s)' % (adj_entry, entry) | |
106 | + else: | |
107 | + print >>sys.stderr, 'Niejednoznaczny przymiotnik: %s (%s)' % ( | |
108 | + adj_entry, entry) | |
109 | + | |
110 | + | |
111 | +POPRZ = LexemeAttributeValue.objects.get( | |
112 | + value=u'obecna', attribute__name=u'forma poprz.') | |
113 | +NO_POPRZ = LexemeAttributeValue.objects.get( | |
114 | + value=u'nieobecna', attribute__name=u'forma poprz.') | |
115 | +ZLOZ = LexemeAttributeValue.objects.get( | |
116 | + value=u'obecna', attribute__name=u'forma złoż.') | |
117 | +NO_ZLOZ = LexemeAttributeValue.objects.get( | |
118 | + value=u'nieobecna', attribute__name=u'forma złoż.') | |
119 | + | |
120 | +def import_adj(elements, comment): | |
121 | + try: | |
122 | + entry, pos, pattern_name, zloz, poprz = elements | |
123 | + except ValueError: | |
124 | + print elements | |
125 | + raise | |
126 | + assert pos == 'adj' | |
127 | + l = new_lexeme(entry, 'adj', comment) | |
128 | + if zloz[0] == '+': | |
129 | + ZLOZ.add_lexeme(l) | |
130 | + else: | |
131 | + NO_ZLOZ.add_lexeme(l) | |
132 | + if poprz[0] == '+': | |
133 | + POPRZ.add_lexeme(l) | |
134 | + else: | |
135 | + NO_POPRZ.add_lexeme(l) | |
136 | + pattern = Pattern.objects.get(name=pattern_name) | |
137 | + lip = LexemeInflectionPattern(lexeme=l, index=1, pattern=pattern) | |
138 | + lip.root = lip.get_root() | |
139 | + lip.save() | |
67 | 140 | \ No newline at end of file |
... | ... |