Commit 7bd2535a24f9ce09632c38a14e54083b99e46e8e

Authored by janek37
1 parent e0dd37a7

depr, lepsze importowanie atrybutów, łączliwość

dictionary/ajax_lexeme_view.py
... ... @@ -83,10 +83,9 @@ def attribute_forms(l, part_of_speech=None, ics=None):
83 83 @render_template('extra_attributes.html')
84 84 @ajax(method='get', template='extra_attributes.html')
85 85 def extra_attributes(request, lexeme_id, pos, ics):
86   - l = Lexeme.objects.get(pk=lexeme_id)
  86 + l = Lexeme.all_objects.get(pk=lexeme_id)
87 87 part_of_speech = PartOfSpeech.objects.get(symbol=pos)
88   - ics = InflectionCharacteristic.objects.filter(
89   - part_of_speech=part_of_speech, entry__in=ics)
  88 + ics = InflectionCharacteristic.objects.filter(pk__in=ics)
90 89 return {
91 90 'forms': attribute_forms(
92 91 l, part_of_speech=part_of_speech, ics=ics),
... ... @@ -94,7 +93,7 @@ def extra_attributes(request, lexeme_id, pos, ics):
94 93  
95 94 @ajax(method='get')
96 95 def check_attributes(request, lexeme_id, pos, ics):
97   - l = Lexeme.objects.get(pk=lexeme_id)
  96 + l = Lexeme.all_objects.get(pk=lexeme_id)
98 97 part_of_speech = PartOfSpeech.objects.get(symbol=pos)
99 98 ics = InflectionCharacteristic.objects.filter(
100 99 part_of_speech=part_of_speech, entry__in=ics)
... ...
dictionary/forms.py
... ... @@ -114,6 +114,7 @@ class LexemeEditForm(ModelForm):
114 114 'part_of_speech',
115 115 'entry',
116 116 'pronunciation',
  117 + 'valence',
117 118 'status',
118 119 'gloss',
119 120 'note',
... ... @@ -124,6 +125,7 @@ class LexemeEditForm(ModelForm):
124 125 'gloss': TextInput(attrs={'size': 40}),
125 126 'note': TextInput(attrs={'size': 40}),
126 127 'pronunciation': TextInput(attrs={'size': 40}),
  128 + 'valence': TextInput(attrs={'size': 40}),
127 129 }
128 130  
129 131 # abstract
... ...
dictionary/history.py
... ... @@ -10,6 +10,9 @@ attribute_translation = {
10 10 ('leksemy', 'haslo'): u'hasło',
11 11 ('leksemy', 'haslosuf'): u'sufiks hasła',
12 12 ('leksemy', 'glosa'): u'glosa',
  13 + ('leksemy', 'nota'): u'nota',
  14 + ('leksemy', 'wymowa'): u'wymowa',
  15 + ('leksemy', 'valence'): u'łączliwość',
13 16 ('leksemy', 'pos'): u'część mowy',
14 17 ('leksemy', 'slownik'): u'słownik właściciel',
15 18 ('leksemy', 'status'): u'status',
... ... @@ -26,6 +29,9 @@ attribute_translation_list = [
26 29 ('leksemy', 'haslo', u'hasło'),
27 30 ('leksemy', 'haslosuf', u'sufiks hasła'),
28 31 ('leksemy', 'glosa', u'glosa'),
  32 + ('leksemy', 'nota', u'nota'),
  33 + ('leksemy', 'wymowa', u'wymowa'),
  34 + ('leksemy', 'valence', u'łączliwość'),
29 35 ('leksemy', 'pos', u'część mowy'),
30 36 ('leksemy', 'slownik', u'słownik właściciel'),
31 37 ('leksemy', 'status', u'status'),
... ... @@ -40,6 +46,9 @@ lexeme_attribute_order = [
40 46 u'hasło',
41 47 u'sufiks hasła',
42 48 u'glosa',
  49 + u'nota',
  50 + u'wymowa',
  51 + u'łączliwość',
43 52 u'część mowy',
44 53 u'słownik właściciel',
45 54 u'status',
... ... @@ -57,6 +66,12 @@ def get_lexeme_attr(attr, lexeme):
57 66 return lexeme.entry_suffix
58 67 elif attr == 'glosa':
59 68 return lexeme.gloss
  69 + elif attr == 'wymowa':
  70 + return lexeme.pronunciation
  71 + elif attr == 'nota':
  72 + return lexeme.note
  73 + elif attr == 'valence':
  74 + return lexeme.valence
60 75 elif attr == 'pos':
61 76 return lexeme.part_of_speech.symbol
62 77 elif attr == 'slownik':
... ...
dictionary/management/commands/import_data.py
... ... @@ -11,8 +11,9 @@ from dictionary.models import *
11 11 DEFAULT_DATABASE = 'data/sgjp.db'
12 12  
13 13 MINI_MODE = True # do debugowania
14   -MINI_LEXEME_COUNT = 40000
15   -MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?"
  14 +MINI_LEXEME_COUNT = 5000
  15 +#MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?"
  16 +MINI_LEXEME_QUERY = "SELECT %s FROM leksemy l WHERE EXISTS (SELECT * FROM odmieniasie WHERE nr = l.nr AND charfl = 'm1') LIMIT ?"
16 17  
17 18 SQL_MODE = True
18 19  
... ... @@ -21,7 +22,33 @@ BATCH_SIZE = 5000
21 22 OTHER = 'inne'
22 23 DEFAULT_VOCAB = 'SGJP'
23 24  
24   -REFL = (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie')
  25 +ATTRS = {
  26 + u'zwrotność': (
  27 + (('v', 'ger', 'pact'), None),
  28 + (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie'),
  29 + ('haslosuf', lambda suf: suf.strip(' ?') or u'—'),
  30 + ),
  31 + u'przechodniość': (
  32 + (('v', 'pred'), None),
  33 + ('iT', 'qT', 'T'),
  34 + ('przechodniosc', lambda x: x),
  35 + ),
  36 + u'aspekt': (
  37 + (('v', 'pred'), None),
  38 + ('dk', 'ndk', 'ndk/dk', 'dk/ndk', 'ndk/(dk)', 'dk/(ndk)'),
  39 + ('aspekt', lambda x: x),
  40 + ),
  41 + u'właściwy': (
  42 + (('v', 'pred'), None),
  43 + ('Q', '(Q)', ''),
  44 + ('właściwy', lambda x: x),
  45 + ),
  46 + u'depr': (
  47 + (('subst', 'skrs'), 'm1'),
  48 + ('n', 'd', 'nd'),
  49 + ('depr', lambda x: x),
  50 + )
  51 +}
25 52  
26 53 # tymczasowa tabelka
27 54 BASIC_FORM_LABELS = {
... ... @@ -179,7 +206,7 @@ class ImportData(object):
179 206 self.vocabs = dict((v.id, v) for v in Vocabulary.objects.all())
180 207  
181 208 def new_qualifiers(self):
182   - sgjp = Vocabulary.objects.get(id=DEFAULT_VOCAB)
  209 + default = Vocabulary.objects.get(id=DEFAULT_VOCAB)
183 210 query_result = self.sqlite_cursor.execute("""
184 211 SELECT okwal FROM odmieniasie
185 212 UNION
... ... @@ -193,23 +220,29 @@ class ImportData(object):
193 220 for qualifier_label in row[0].split('|'):
194 221 if qualifier_label not in added:
195 222 added.add(qualifier_label)
196   - yield Qualifier(label=qualifier_label, vocabulary=sgjp)
  223 + yield Qualifier(label=qualifier_label, vocabulary=default)
197 224  
198 225 def cache_qualifiers(self):
199 226 if 'qual' not in self.__dict__:
200 227 self.qual = dict((q.label, q) for q in Qualifier.objects.all())
201 228  
202   - def create_refl_attribute(self):
203   - refl, created = LexemeAttribute.objects.get_or_create(
204   - name=u'zwrotność', closed=True)
205   - for pos in PartOfSpeech.objects.filter(symbol__in=('v', 'ger', 'pact')):
206   - refl.parts_of_speech.add(pos) #add
207   - refl_values = {}
208   - for val in REFL:
209   - refl_values[val], created = LexemeAttributeValue.objects.get_or_create(
210   - value=val, attribute=refl)
211   - refl_values[''] = refl_values[u'—']
212   - return refl_values
  229 + def create_attributes(self):
  230 + attr_values = {}
  231 + for attr_name, ((poses, ic), values, import_info) in ATTRS.iteritems():
  232 + la, created = LexemeAttribute.objects.get_or_create(
  233 + name=attr_name, closed=True, required=True, takes_ic=bool(ic))
  234 + for pos in PartOfSpeech.objects.filter(symbol__in=poses):
  235 + la.parts_of_speech.add(pos) #add
  236 + pos_ics = InflectionCharacteristic.objects.filter(
  237 + part_of_speech=pos, entry=ic)
  238 + for ic0 in pos_ics:
  239 + la.inflection_characteristics.add(ic0) #add
  240 + values_cache = {}
  241 + for val in values:
  242 + values_cache[val], created = LexemeAttributeValue.objects.get_or_create(
  243 + value=val, attribute=la)
  244 + attr_values[attr_name] = values_cache
  245 + return attr_values
213 246  
214 247 def new_lexemes(self):
215 248 self.cache_qualifiers()
... ... @@ -218,7 +251,7 @@ class ImportData(object):
218 251 MINI_LEXEME_QUERY % '*',(MINI_LEXEME_COUNT,))
219 252 else:
220 253 result = self.sqlite_cursor.execute('SELECT * FROM leksemy')
221   - refl_values = self.create_refl_attribute()
  254 + attr_values = self.create_attributes()
222 255 date = datetime.datetime.now()
223 256 cv_table = dict(
224 257 (cv.label, cv) for cv in ClassificationValue.objects.all())
... ... @@ -238,6 +271,7 @@ class ImportData(object):
238 271 gloss=row['glosa'] or '',
239 272 note=row['nota'] or '',
240 273 pronunciation=row['wymowa'] or '',
  274 + valence=row['łączliwość'] or '',
241 275 part_of_speech_id=row['pos'],
242 276 source='SGJP',
243 277 status=status,
... ... @@ -251,9 +285,13 @@ class ImportData(object):
251 285 if row['lkwal']:
252 286 for qual in row['lkwal'].split('|'):
253 287 lexeme_qualifiers.append((row['nr'], self.qual[qual]))
254   - if row['pos'] in ('v', 'ger', 'pact'):
255   - refl_value = refl_values.get(row['haslosuf'].strip(' ?'))
256   - lexeme_attrs.append((row['nr'], refl_value))
  288 + for attr_name, ((poses, ic), values, (column, f)) in ATTRS.iteritems():
  289 + if row['pos'] in poses:
  290 + attr_value = attr_values[attr_name].get(f(row[column]))
  291 + if attr_value:
  292 + lexeme_attrs.append((row['nr'], attr_value))
  293 + elif row[column]:
  294 + print 'unknown value of %s: %s' % (attr_name, row[column])
257 295 return (lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers,
258 296 lexeme_attrs)
259 297  
... ... @@ -501,7 +539,7 @@ class ImportData(object):
501 539 Pattern,
502 540 PatternType,
503 541 Qualifier,
504   - Vocabulary,
  542 + #Vocabulary,
505 543 InflectionCharacteristic,
506 544 BaseFormLabel,
507 545 PartOfSpeech,
... ... @@ -521,7 +559,8 @@ class ImportData(object):
521 559 bulk_create(InflectionCharacteristic,
522 560 self.new_inflection_characteristics())
523 561 print 'importing vocabularies...'
524   - bulk_create(Vocabulary, self.new_vocabularies())
  562 + for v in self.new_vocabularies():
  563 + v.save()
525 564 print 'importing qualifiers...'
526 565 bulk_create(Qualifier, self.new_qualifiers())
527 566 print 'importing pattern types...'
... ... @@ -538,26 +577,26 @@ class ImportData(object):
538 577 print 'importing lexemes...'
539 578 (lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals,
540 579 lexeme_attrs) = self.new_lexemes()
541   - print '...'
  580 + print 'creating...'
542 581 bulk_create(Lexeme, lexemes)
543   - print '...'
  582 + print 'associations...'
544 583 bulk_create(LexemeAssociation, lexeme_assoc)
545   - print '...'
  584 + print 'classifications...'
546 585 for lexeme_id, cv in lexeme_cvs:
547 586 cv.lexemes.add(lexeme_id) #add
548   - print '...'
  587 + print 'qualifiers...'
549 588 for lexeme_id, q in lexeme_quals:
550 589 q.lexeme_set.add(lexeme_id) #add
551   - print '...'
  590 + print 'attributes...'
552 591 for lexeme_id, attr_val in lexeme_attrs:
553 592 attr_val.lexemes.add(lexeme_id)
554 593 import_lexemes()
555 594 def import_lips():
556 595 print 'importing lexeme inflection patterns...'
557 596 lips, lip_quals = self.new_lexeme_inflection_patterns()
558   - print '...'
  597 + print 'creating...'
559 598 bulk_create(LexemeInflectionPattern, lips)
560   - print '...'
  599 + print 'qualifiers...'
561 600 for lexeme_id, index, q in lip_quals:
562 601 LexemeInflectionPattern.objects.get(
563 602 lexeme_id=lexeme_id, index=index).qualifiers.add(q)
... ...
dictionary/models.py
... ... @@ -326,6 +326,7 @@ class Lexeme(Model):
326 326 note = TextField(blank=True, db_column='nota', verbose_name=u'nota')
327 327 pronunciation = TextField(
328 328 blank=True, db_column='wymowa', verbose_name=u'wymowa')
  329 + valence = TextField(blank=True, verbose_name=u'łączliwość')
329 330 homonym_number = IntegerField(db_column='hom', default=1)
330 331 part_of_speech = ForeignKey(
331 332 PartOfSpeech, db_column='pos', verbose_name=u'cz. mowy')
... ... @@ -443,8 +444,8 @@ class Lexeme(Model):
443 444 pos = part_of_speech or self.part_of_speech
444 445 attrs = LexemeAttribute.objects.all()
445 446 attrs = attrs.filter(parts_of_speech=pos)
446   - attrs = (attrs.filter(inflection_characteristic__in=ics)
447   - | attrs.filter(inflection_characteristic=None))
  447 + attrs = (attrs.filter(inflection_characteristics__in=ics)
  448 + | attrs.filter(takes_ic=False))
448 449 return attrs
449 450  
450 451 def attributes_values(self, part_of_speech=None, ics=None):
... ... @@ -523,8 +524,9 @@ class LexemeAttribute(Model):
523 524 multiple = BooleanField()
524 525 required = BooleanField()
525 526 parts_of_speech = ManyToManyField(PartOfSpeech)
526   - inflection_characteristic = ForeignKey(
527   - InflectionCharacteristic, blank=True, null=True)
  527 + takes_ic = BooleanField()
  528 + inflection_characteristics = ManyToManyField(
  529 + InflectionCharacteristic, blank=True)
528 530  
529 531 def __unicode__(self):
530 532 return self.name
... ...
media/js/lexeme-view.js
... ... @@ -237,6 +237,7 @@ function init_form_widgets() {
237 237 li.remove();
238 238 jqgrid.show_changed();
239 239 $('#table-preview').html('');
  240 + reload_attributes();
240 241 });
241 242 $(document).on('click', '#add-row', function() {
242 243 var id = lexeme_id();
... ... @@ -364,6 +365,7 @@ function init_form_widgets() {
364 365 $(document).on('change', '#id_part_of_speech', check_pos);
365 366 $(document).on('change', '#id_new_owner', reload_classifications);
366 367 // TODO trzeba też uwzględniać usunięcie odmieniasia
  368 + // TODO ostrzegać przed znikaniem atrybutów
367 369 $(document).on('change', '.inflection-characteristic', reload_attributes);
368 370 $(document).on('keyup', '#id_entry', show_homonym_count);
369 371 }
... ... @@ -951,7 +953,7 @@ var check_pos = function() {
951 953 }
952 954 select.prop('options').add(option);
953 955 });
954   - select.selectedIndex = index;
  956 + select[0].selectedIndex = index;
955 957 } else {
956 958 var li = $(this);
957 959 // copypasta...
... ...