Commit 7bd2535a24f9ce09632c38a14e54083b99e46e8e
1 parent
e0dd37a7
depr, lepsze importowanie atrybutów, łączliwość
Showing
6 changed files
with
97 additions
and
38 deletions
dictionary/ajax_lexeme_view.py
... | ... | @@ -83,10 +83,9 @@ def attribute_forms(l, part_of_speech=None, ics=None): |
83 | 83 | @render_template('extra_attributes.html') |
84 | 84 | @ajax(method='get', template='extra_attributes.html') |
85 | 85 | def extra_attributes(request, lexeme_id, pos, ics): |
86 | - l = Lexeme.objects.get(pk=lexeme_id) | |
86 | + l = Lexeme.all_objects.get(pk=lexeme_id) | |
87 | 87 | part_of_speech = PartOfSpeech.objects.get(symbol=pos) |
88 | - ics = InflectionCharacteristic.objects.filter( | |
89 | - part_of_speech=part_of_speech, entry__in=ics) | |
88 | + ics = InflectionCharacteristic.objects.filter(pk__in=ics) | |
90 | 89 | return { |
91 | 90 | 'forms': attribute_forms( |
92 | 91 | l, part_of_speech=part_of_speech, ics=ics), |
... | ... | @@ -94,7 +93,7 @@ def extra_attributes(request, lexeme_id, pos, ics): |
94 | 93 | |
95 | 94 | @ajax(method='get') |
96 | 95 | def check_attributes(request, lexeme_id, pos, ics): |
97 | - l = Lexeme.objects.get(pk=lexeme_id) | |
96 | + l = Lexeme.all_objects.get(pk=lexeme_id) | |
98 | 97 | part_of_speech = PartOfSpeech.objects.get(symbol=pos) |
99 | 98 | ics = InflectionCharacteristic.objects.filter( |
100 | 99 | part_of_speech=part_of_speech, entry__in=ics) |
... | ... |
dictionary/forms.py
... | ... | @@ -114,6 +114,7 @@ class LexemeEditForm(ModelForm): |
114 | 114 | 'part_of_speech', |
115 | 115 | 'entry', |
116 | 116 | 'pronunciation', |
117 | + 'valence', | |
117 | 118 | 'status', |
118 | 119 | 'gloss', |
119 | 120 | 'note', |
... | ... | @@ -124,6 +125,7 @@ class LexemeEditForm(ModelForm): |
124 | 125 | 'gloss': TextInput(attrs={'size': 40}), |
125 | 126 | 'note': TextInput(attrs={'size': 40}), |
126 | 127 | 'pronunciation': TextInput(attrs={'size': 40}), |
128 | + 'valence': TextInput(attrs={'size': 40}), | |
127 | 129 | } |
128 | 130 | |
129 | 131 | # abstract |
... | ... |
dictionary/history.py
... | ... | @@ -10,6 +10,9 @@ attribute_translation = { |
10 | 10 | ('leksemy', 'haslo'): u'hasło', |
11 | 11 | ('leksemy', 'haslosuf'): u'sufiks hasła', |
12 | 12 | ('leksemy', 'glosa'): u'glosa', |
13 | + ('leksemy', 'nota'): u'nota', | |
14 | + ('leksemy', 'wymowa'): u'wymowa', | |
15 | + ('leksemy', 'valence'): u'łączliwość', | |
13 | 16 | ('leksemy', 'pos'): u'część mowy', |
14 | 17 | ('leksemy', 'slownik'): u'słownik właściciel', |
15 | 18 | ('leksemy', 'status'): u'status', |
... | ... | @@ -26,6 +29,9 @@ attribute_translation_list = [ |
26 | 29 | ('leksemy', 'haslo', u'hasło'), |
27 | 30 | ('leksemy', 'haslosuf', u'sufiks hasła'), |
28 | 31 | ('leksemy', 'glosa', u'glosa'), |
32 | + ('leksemy', 'nota', u'nota'), | |
33 | + ('leksemy', 'wymowa', u'wymowa'), | |
34 | + ('leksemy', 'valence', u'łączliwość'), | |
29 | 35 | ('leksemy', 'pos', u'część mowy'), |
30 | 36 | ('leksemy', 'slownik', u'słownik właściciel'), |
31 | 37 | ('leksemy', 'status', u'status'), |
... | ... | @@ -40,6 +46,9 @@ lexeme_attribute_order = [ |
40 | 46 | u'hasło', |
41 | 47 | u'sufiks hasła', |
42 | 48 | u'glosa', |
49 | + u'nota', | |
50 | + u'wymowa', | |
51 | + u'łączliwość', | |
43 | 52 | u'część mowy', |
44 | 53 | u'słownik właściciel', |
45 | 54 | u'status', |
... | ... | @@ -57,6 +66,12 @@ def get_lexeme_attr(attr, lexeme): |
57 | 66 | return lexeme.entry_suffix |
58 | 67 | elif attr == 'glosa': |
59 | 68 | return lexeme.gloss |
69 | + elif attr == 'wymowa': | |
70 | + return lexeme.pronunciation | |
71 | + elif attr == 'nota': | |
72 | + return lexeme.note | |
73 | + elif attr == 'valence': | |
74 | + return lexeme.valence | |
60 | 75 | elif attr == 'pos': |
61 | 76 | return lexeme.part_of_speech.symbol |
62 | 77 | elif attr == 'slownik': |
... | ... |
dictionary/management/commands/import_data.py
... | ... | @@ -11,8 +11,9 @@ from dictionary.models import * |
11 | 11 | DEFAULT_DATABASE = 'data/sgjp.db' |
12 | 12 | |
13 | 13 | MINI_MODE = True # do debugowania |
14 | -MINI_LEXEME_COUNT = 40000 | |
15 | -MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?" | |
14 | +MINI_LEXEME_COUNT = 5000 | |
15 | +#MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?" | |
16 | +MINI_LEXEME_QUERY = "SELECT %s FROM leksemy l WHERE EXISTS (SELECT * FROM odmieniasie WHERE nr = l.nr AND charfl = 'm1') LIMIT ?" | |
16 | 17 | |
17 | 18 | SQL_MODE = True |
18 | 19 | |
... | ... | @@ -21,7 +22,33 @@ BATCH_SIZE = 5000 |
21 | 22 | OTHER = 'inne' |
22 | 23 | DEFAULT_VOCAB = 'SGJP' |
23 | 24 | |
24 | -REFL = (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie') | |
25 | +ATTRS = { | |
26 | + u'zwrotność': ( | |
27 | + (('v', 'ger', 'pact'), None), | |
28 | + (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie'), | |
29 | + ('haslosuf', lambda suf: suf.strip(' ?') or u'—'), | |
30 | + ), | |
31 | + u'przechodniość': ( | |
32 | + (('v', 'pred'), None), | |
33 | + ('iT', 'qT', 'T'), | |
34 | + ('przechodniosc', lambda x: x), | |
35 | + ), | |
36 | + u'aspekt': ( | |
37 | + (('v', 'pred'), None), | |
38 | + ('dk', 'ndk', 'ndk/dk', 'dk/ndk', 'ndk/(dk)', 'dk/(ndk)'), | |
39 | + ('aspekt', lambda x: x), | |
40 | + ), | |
41 | + u'właściwy': ( | |
42 | + (('v', 'pred'), None), | |
43 | + ('Q', '(Q)', ''), | |
44 | + ('właściwy', lambda x: x), | |
45 | + ), | |
46 | + u'depr': ( | |
47 | + (('subst', 'skrs'), 'm1'), | |
48 | + ('n', 'd', 'nd'), | |
49 | + ('depr', lambda x: x), | |
50 | + ) | |
51 | +} | |
25 | 52 | |
26 | 53 | # tymczasowa tabelka |
27 | 54 | BASIC_FORM_LABELS = { |
... | ... | @@ -179,7 +206,7 @@ class ImportData(object): |
179 | 206 | self.vocabs = dict((v.id, v) for v in Vocabulary.objects.all()) |
180 | 207 | |
181 | 208 | def new_qualifiers(self): |
182 | - sgjp = Vocabulary.objects.get(id=DEFAULT_VOCAB) | |
209 | + default = Vocabulary.objects.get(id=DEFAULT_VOCAB) | |
183 | 210 | query_result = self.sqlite_cursor.execute(""" |
184 | 211 | SELECT okwal FROM odmieniasie |
185 | 212 | UNION |
... | ... | @@ -193,23 +220,29 @@ class ImportData(object): |
193 | 220 | for qualifier_label in row[0].split('|'): |
194 | 221 | if qualifier_label not in added: |
195 | 222 | added.add(qualifier_label) |
196 | - yield Qualifier(label=qualifier_label, vocabulary=sgjp) | |
223 | + yield Qualifier(label=qualifier_label, vocabulary=default) | |
197 | 224 | |
198 | 225 | def cache_qualifiers(self): |
199 | 226 | if 'qual' not in self.__dict__: |
200 | 227 | self.qual = dict((q.label, q) for q in Qualifier.objects.all()) |
201 | 228 | |
202 | - def create_refl_attribute(self): | |
203 | - refl, created = LexemeAttribute.objects.get_or_create( | |
204 | - name=u'zwrotność', closed=True) | |
205 | - for pos in PartOfSpeech.objects.filter(symbol__in=('v', 'ger', 'pact')): | |
206 | - refl.parts_of_speech.add(pos) #add | |
207 | - refl_values = {} | |
208 | - for val in REFL: | |
209 | - refl_values[val], created = LexemeAttributeValue.objects.get_or_create( | |
210 | - value=val, attribute=refl) | |
211 | - refl_values[''] = refl_values[u'—'] | |
212 | - return refl_values | |
229 | + def create_attributes(self): | |
230 | + attr_values = {} | |
231 | + for attr_name, ((poses, ic), values, import_info) in ATTRS.iteritems(): | |
232 | + la, created = LexemeAttribute.objects.get_or_create( | |
233 | + name=attr_name, closed=True, required=True, takes_ic=bool(ic)) | |
234 | + for pos in PartOfSpeech.objects.filter(symbol__in=poses): | |
235 | + la.parts_of_speech.add(pos) #add | |
236 | + pos_ics = InflectionCharacteristic.objects.filter( | |
237 | + part_of_speech=pos, entry=ic) | |
238 | + for ic0 in pos_ics: | |
239 | + la.inflection_characteristics.add(ic0) #add | |
240 | + values_cache = {} | |
241 | + for val in values: | |
242 | + values_cache[val], created = LexemeAttributeValue.objects.get_or_create( | |
243 | + value=val, attribute=la) | |
244 | + attr_values[attr_name] = values_cache | |
245 | + return attr_values | |
213 | 246 | |
214 | 247 | def new_lexemes(self): |
215 | 248 | self.cache_qualifiers() |
... | ... | @@ -218,7 +251,7 @@ class ImportData(object): |
218 | 251 | MINI_LEXEME_QUERY % '*',(MINI_LEXEME_COUNT,)) |
219 | 252 | else: |
220 | 253 | result = self.sqlite_cursor.execute('SELECT * FROM leksemy') |
221 | - refl_values = self.create_refl_attribute() | |
254 | + attr_values = self.create_attributes() | |
222 | 255 | date = datetime.datetime.now() |
223 | 256 | cv_table = dict( |
224 | 257 | (cv.label, cv) for cv in ClassificationValue.objects.all()) |
... | ... | @@ -238,6 +271,7 @@ class ImportData(object): |
238 | 271 | gloss=row['glosa'] or '', |
239 | 272 | note=row['nota'] or '', |
240 | 273 | pronunciation=row['wymowa'] or '', |
274 | + valence=row['łączliwość'] or '', | |
241 | 275 | part_of_speech_id=row['pos'], |
242 | 276 | source='SGJP', |
243 | 277 | status=status, |
... | ... | @@ -251,9 +285,13 @@ class ImportData(object): |
251 | 285 | if row['lkwal']: |
252 | 286 | for qual in row['lkwal'].split('|'): |
253 | 287 | lexeme_qualifiers.append((row['nr'], self.qual[qual])) |
254 | - if row['pos'] in ('v', 'ger', 'pact'): | |
255 | - refl_value = refl_values.get(row['haslosuf'].strip(' ?')) | |
256 | - lexeme_attrs.append((row['nr'], refl_value)) | |
288 | + for attr_name, ((poses, ic), values, (column, f)) in ATTRS.iteritems(): | |
289 | + if row['pos'] in poses: | |
290 | + attr_value = attr_values[attr_name].get(f(row[column])) | |
291 | + if attr_value: | |
292 | + lexeme_attrs.append((row['nr'], attr_value)) | |
293 | + elif row[column]: | |
294 | + print 'unknown value of %s: %s' % (attr_name, row[column]) | |
257 | 295 | return (lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers, |
258 | 296 | lexeme_attrs) |
259 | 297 | |
... | ... | @@ -501,7 +539,7 @@ class ImportData(object): |
501 | 539 | Pattern, |
502 | 540 | PatternType, |
503 | 541 | Qualifier, |
504 | - Vocabulary, | |
542 | + #Vocabulary, | |
505 | 543 | InflectionCharacteristic, |
506 | 544 | BaseFormLabel, |
507 | 545 | PartOfSpeech, |
... | ... | @@ -521,7 +559,8 @@ class ImportData(object): |
521 | 559 | bulk_create(InflectionCharacteristic, |
522 | 560 | self.new_inflection_characteristics()) |
523 | 561 | print 'importing vocabularies...' |
524 | - bulk_create(Vocabulary, self.new_vocabularies()) | |
562 | + for v in self.new_vocabularies(): | |
563 | + v.save() | |
525 | 564 | print 'importing qualifiers...' |
526 | 565 | bulk_create(Qualifier, self.new_qualifiers()) |
527 | 566 | print 'importing pattern types...' |
... | ... | @@ -538,26 +577,26 @@ class ImportData(object): |
538 | 577 | print 'importing lexemes...' |
539 | 578 | (lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals, |
540 | 579 | lexeme_attrs) = self.new_lexemes() |
541 | - print '...' | |
580 | + print 'creating...' | |
542 | 581 | bulk_create(Lexeme, lexemes) |
543 | - print '...' | |
582 | + print 'associations...' | |
544 | 583 | bulk_create(LexemeAssociation, lexeme_assoc) |
545 | - print '...' | |
584 | + print 'classifications...' | |
546 | 585 | for lexeme_id, cv in lexeme_cvs: |
547 | 586 | cv.lexemes.add(lexeme_id) #add |
548 | - print '...' | |
587 | + print 'qualifiers...' | |
549 | 588 | for lexeme_id, q in lexeme_quals: |
550 | 589 | q.lexeme_set.add(lexeme_id) #add |
551 | - print '...' | |
590 | + print 'attributes...' | |
552 | 591 | for lexeme_id, attr_val in lexeme_attrs: |
553 | 592 | attr_val.lexemes.add(lexeme_id) |
554 | 593 | import_lexemes() |
555 | 594 | def import_lips(): |
556 | 595 | print 'importing lexeme inflection patterns...' |
557 | 596 | lips, lip_quals = self.new_lexeme_inflection_patterns() |
558 | - print '...' | |
597 | + print 'creating...' | |
559 | 598 | bulk_create(LexemeInflectionPattern, lips) |
560 | - print '...' | |
599 | + print 'qualifiers...' | |
561 | 600 | for lexeme_id, index, q in lip_quals: |
562 | 601 | LexemeInflectionPattern.objects.get( |
563 | 602 | lexeme_id=lexeme_id, index=index).qualifiers.add(q) |
... | ... |
dictionary/models.py
... | ... | @@ -326,6 +326,7 @@ class Lexeme(Model): |
326 | 326 | note = TextField(blank=True, db_column='nota', verbose_name=u'nota') |
327 | 327 | pronunciation = TextField( |
328 | 328 | blank=True, db_column='wymowa', verbose_name=u'wymowa') |
329 | + valence = TextField(blank=True, verbose_name=u'łączliwość') | |
329 | 330 | homonym_number = IntegerField(db_column='hom', default=1) |
330 | 331 | part_of_speech = ForeignKey( |
331 | 332 | PartOfSpeech, db_column='pos', verbose_name=u'cz. mowy') |
... | ... | @@ -443,8 +444,8 @@ class Lexeme(Model): |
443 | 444 | pos = part_of_speech or self.part_of_speech |
444 | 445 | attrs = LexemeAttribute.objects.all() |
445 | 446 | attrs = attrs.filter(parts_of_speech=pos) |
446 | - attrs = (attrs.filter(inflection_characteristic__in=ics) | |
447 | - | attrs.filter(inflection_characteristic=None)) | |
447 | + attrs = (attrs.filter(inflection_characteristics__in=ics) | |
448 | + | attrs.filter(takes_ic=False)) | |
448 | 449 | return attrs |
449 | 450 | |
450 | 451 | def attributes_values(self, part_of_speech=None, ics=None): |
... | ... | @@ -523,8 +524,9 @@ class LexemeAttribute(Model): |
523 | 524 | multiple = BooleanField() |
524 | 525 | required = BooleanField() |
525 | 526 | parts_of_speech = ManyToManyField(PartOfSpeech) |
526 | - inflection_characteristic = ForeignKey( | |
527 | - InflectionCharacteristic, blank=True, null=True) | |
527 | + takes_ic = BooleanField() | |
528 | + inflection_characteristics = ManyToManyField( | |
529 | + InflectionCharacteristic, blank=True) | |
528 | 530 | |
529 | 531 | def __unicode__(self): |
530 | 532 | return self.name |
... | ... |
media/js/lexeme-view.js
... | ... | @@ -237,6 +237,7 @@ function init_form_widgets() { |
237 | 237 | li.remove(); |
238 | 238 | jqgrid.show_changed(); |
239 | 239 | $('#table-preview').html(''); |
240 | + reload_attributes(); | |
240 | 241 | }); |
241 | 242 | $(document).on('click', '#add-row', function() { |
242 | 243 | var id = lexeme_id(); |
... | ... | @@ -364,6 +365,7 @@ function init_form_widgets() { |
364 | 365 | $(document).on('change', '#id_part_of_speech', check_pos); |
365 | 366 | $(document).on('change', '#id_new_owner', reload_classifications); |
366 | 367 | // TODO trzeba też uwzględniać usunięcie odmieniasia |
368 | + // TODO ostrzegać przed znikaniem atrybutów | |
367 | 369 | $(document).on('change', '.inflection-characteristic', reload_attributes); |
368 | 370 | $(document).on('keyup', '#id_entry', show_homonym_count); |
369 | 371 | } |
... | ... | @@ -951,7 +953,7 @@ var check_pos = function() { |
951 | 953 | } |
952 | 954 | select.prop('options').add(option); |
953 | 955 | }); |
954 | - select.selectedIndex = index; | |
956 | + select[0].selectedIndex = index; | |
955 | 957 | } else { |
956 | 958 | var li = $(this); |
957 | 959 | // copypasta... |
... | ... |