depr, lepsze importowanie atrybutów, łączliwość

janek37
1 parent e0dd37a7
Showing 6 changed files with 97 additions and 38 deletions
dictionary/ajax_lexeme_view.py
dictionary/forms.py
dictionary/history.py
dictionary/management/commands/import_data.py
dictionary/models.py
media/js/lexeme-view.js
@@ -83,10 +83,9 @@ def attribute_forms(l, part_of_speech=None, ics=None):
 @render_template('extra_attributes.html')
 @ajax(method='get', template='extra_attributes.html')
 def extra_attributes(request, lexeme_id, pos, ics):
-  l = Lexeme.objects.get(pk=lexeme_id)
+  l = Lexeme.all_objects.get(pk=lexeme_id)
   part_of_speech = PartOfSpeech.objects.get(symbol=pos)
-  ics = InflectionCharacteristic.objects.filter(
-    part_of_speech=part_of_speech, entry__in=ics)
+  ics = InflectionCharacteristic.objects.filter(pk__in=ics)
   return {
     'forms': attribute_forms(
       l, part_of_speech=part_of_speech, ics=ics),
@@ -94,7 +93,7 @@ def extra_attributes(request, lexeme_id, pos, ics):
  
 @ajax(method='get')
 def check_attributes(request, lexeme_id, pos, ics):
-  l = Lexeme.objects.get(pk=lexeme_id)
+  l = Lexeme.all_objects.get(pk=lexeme_id)
   part_of_speech = PartOfSpeech.objects.get(symbol=pos)
   ics = InflectionCharacteristic.objects.filter(
     part_of_speech=part_of_speech, entry__in=ics)
@@ -114,6 +114,7 @@ class LexemeEditForm(ModelForm):
       'part_of_speech',
       'entry',
       'pronunciation',
+      'valence',
       'status',
       'gloss',
       'note',
@@ -124,6 +125,7 @@ class LexemeEditForm(ModelForm):
       'gloss': TextInput(attrs={'size': 40}),
       'note': TextInput(attrs={'size': 40}),
       'pronunciation': TextInput(attrs={'size': 40}),
+      'valence': TextInput(attrs={'size': 40}),
     }
  
 # abstract
@@ -10,6 +10,9 @@ attribute_translation = {
   ('leksemy', 'haslo'): u'hasło',
   ('leksemy', 'haslosuf'): u'sufiks hasła',
   ('leksemy', 'glosa'): u'glosa',
+  ('leksemy', 'nota'): u'nota',
+  ('leksemy', 'wymowa'): u'wymowa',
+  ('leksemy', 'valence'): u'łączliwość',
   ('leksemy', 'pos'): u'część mowy',
   ('leksemy', 'slownik'): u'słownik właściciel',
   ('leksemy', 'status'): u'status',
@@ -26,6 +29,9 @@ attribute_translation_list = [
   ('leksemy', 'haslo', u'hasło'),
   ('leksemy', 'haslosuf', u'sufiks hasła'),
   ('leksemy', 'glosa', u'glosa'),
+  ('leksemy', 'nota', u'nota'),
+  ('leksemy', 'wymowa', u'wymowa'),
+  ('leksemy', 'valence', u'łączliwość'),
   ('leksemy', 'pos', u'część mowy'),
   ('leksemy', 'slownik', u'słownik właściciel'),
   ('leksemy', 'status', u'status'),
@@ -40,6 +46,9 @@ lexeme_attribute_order = [
   u'hasło',
   u'sufiks hasła',
   u'glosa',
+  u'nota',
+  u'wymowa',
+  u'łączliwość',
   u'część mowy',
   u'słownik właściciel',
   u'status',
@@ -57,6 +66,12 @@ def get_lexeme_attr(attr, lexeme):
     return lexeme.entry_suffix
   elif attr == 'glosa':
     return lexeme.gloss
+  elif attr == 'wymowa':
+    return lexeme.pronunciation
+  elif attr == 'nota':
+    return lexeme.note
+  elif attr == 'valence':
+    return lexeme.valence
   elif attr == 'pos':
     return lexeme.part_of_speech.symbol
   elif attr == 'slownik':
@@ -11,8 +11,9 @@ from dictionary.models import *
 DEFAULT_DATABASE = 'data/sgjp.db'
  
 MINI_MODE = True # do debugowania
-MINI_LEXEME_COUNT = 40000
-MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?"
+MINI_LEXEME_COUNT = 5000
+#MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?"
+MINI_LEXEME_QUERY = "SELECT %s FROM leksemy l WHERE EXISTS (SELECT * FROM odmieniasie WHERE nr = l.nr AND charfl = 'm1') LIMIT ?"
  
 SQL_MODE = True
  
@@ -21,7 +22,33 @@ BATCH_SIZE = 5000
 OTHER = 'inne'
 DEFAULT_VOCAB = 'SGJP'
  
-REFL = (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie')
+ATTRS = {
+  u'zwrotność': (
+    (('v', 'ger', 'pact'), None),
+    (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie'),
+    ('haslosuf', lambda suf: suf.strip(' ?') or u'—'),
+  ),
+  u'przechodniość': (
+    (('v', 'pred'), None),
+    ('iT', 'qT', 'T'),
+    ('przechodniosc', lambda x: x),
+  ),
+  u'aspekt': (
+    (('v', 'pred'), None),
+    ('dk', 'ndk', 'ndk/dk', 'dk/ndk', 'ndk/(dk)', 'dk/(ndk)'),
+    ('aspekt', lambda x: x),
+  ),
+  u'właściwy': (
+    (('v', 'pred'), None),
+    ('Q', '(Q)', ''),
+    ('właściwy', lambda x: x),
+  ),
+  u'depr': (
+    (('subst', 'skrs'), 'm1'),
+    ('n', 'd', 'nd'),
+    ('depr', lambda x: x),
+  )
+}
  
 # tymczasowa tabelka
 BASIC_FORM_LABELS = {
@@ -179,7 +206,7 @@ class ImportData(object):
       self.vocabs = dict((v.id, v) for v in Vocabulary.objects.all())
  
   def new_qualifiers(self):
-    sgjp = Vocabulary.objects.get(id=DEFAULT_VOCAB)
+    default = Vocabulary.objects.get(id=DEFAULT_VOCAB)
     query_result = self.sqlite_cursor.execute("""
       SELECT okwal FROM odmieniasie
       UNION
@@ -193,23 +220,29 @@ class ImportData(object):
         for qualifier_label in row[0].split('|'):
           if qualifier_label not in added:
             added.add(qualifier_label)
-            yield Qualifier(label=qualifier_label, vocabulary=sgjp)
+            yield Qualifier(label=qualifier_label, vocabulary=default)
  
   def cache_qualifiers(self):
     if 'qual' not in self.__dict__:
       self.qual = dict((q.label, q) for q in Qualifier.objects.all())
  
-  def create_refl_attribute(self):
-    refl, created = LexemeAttribute.objects.get_or_create(
-      name=u'zwrotność', closed=True)
-    for pos in PartOfSpeech.objects.filter(symbol__in=('v', 'ger', 'pact')):
-      refl.parts_of_speech.add(pos) #add
-    refl_values = {}
-    for val in REFL:
-      refl_values[val], created = LexemeAttributeValue.objects.get_or_create(
-        value=val, attribute=refl)
-    refl_values[''] = refl_values[u'—']
-    return refl_values
+  def create_attributes(self):
+    attr_values = {}
+    for attr_name, ((poses, ic), values, import_info) in ATTRS.iteritems():
+      la, created = LexemeAttribute.objects.get_or_create(
+        name=attr_name, closed=True, required=True, takes_ic=bool(ic))
+      for pos in PartOfSpeech.objects.filter(symbol__in=poses):
+        la.parts_of_speech.add(pos) #add
+        pos_ics = InflectionCharacteristic.objects.filter(
+          part_of_speech=pos, entry=ic)
+        for ic0 in pos_ics:
+          la.inflection_characteristics.add(ic0) #add
+      values_cache = {}
+      for val in values:
+        values_cache[val], created = LexemeAttributeValue.objects.get_or_create(
+          value=val, attribute=la)
+      attr_values[attr_name] = values_cache
+    return attr_values
  
   def new_lexemes(self):
     self.cache_qualifiers()
@@ -218,7 +251,7 @@ class ImportData(object):
         MINI_LEXEME_QUERY % '*',(MINI_LEXEME_COUNT,))
     else:
       result = self.sqlite_cursor.execute('SELECT * FROM leksemy')
-    refl_values = self.create_refl_attribute()
+    attr_values = self.create_attributes()
     date = datetime.datetime.now()
     cv_table = dict(
       (cv.label, cv) for cv in ClassificationValue.objects.all())
@@ -238,6 +271,7 @@ class ImportData(object):
         gloss=row['glosa'] or '',
         note=row['nota'] or '',
         pronunciation=row['wymowa'] or '',
+        valence=row['łączliwość'] or '',
         part_of_speech_id=row['pos'],
         source='SGJP',
         status=status,
@@ -251,9 +285,13 @@ class ImportData(object):
       if row['lkwal']:
         for qual in row['lkwal'].split('|'):
           lexeme_qualifiers.append((row['nr'], self.qual[qual]))
-      if row['pos'] in ('v', 'ger', 'pact'):
-        refl_value = refl_values.get(row['haslosuf'].strip(' ?'))
-        lexeme_attrs.append((row['nr'], refl_value))
+      for attr_name, ((poses, ic), values, (column, f)) in ATTRS.iteritems():
+        if row['pos'] in poses:
+          attr_value = attr_values[attr_name].get(f(row[column]))
+          if attr_value:
+            lexeme_attrs.append((row['nr'], attr_value))
+          elif row[column]:
+            print 'unknown value of %s: %s' % (attr_name, row[column])
     return (lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers,
       lexeme_attrs)
  
@@ -501,7 +539,7 @@ class ImportData(object):
       Pattern,
       PatternType,
       Qualifier,
-      Vocabulary,
+      #Vocabulary,
       InflectionCharacteristic,
       BaseFormLabel,
       PartOfSpeech,
@@ -521,7 +559,8 @@ class ImportData(object):
     bulk_create(InflectionCharacteristic,
       self.new_inflection_characteristics())
     print 'importing vocabularies...'
-    bulk_create(Vocabulary, self.new_vocabularies())
+    for v in self.new_vocabularies():
+      v.save()
     print 'importing qualifiers...'
     bulk_create(Qualifier, self.new_qualifiers())
     print 'importing pattern types...'
@@ -538,26 +577,26 @@ class ImportData(object):
       print 'importing lexemes...'
       (lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals,
        lexeme_attrs) = self.new_lexemes()
-      print '...'
+      print 'creating...'
       bulk_create(Lexeme, lexemes)
-      print '...'
+      print 'associations...'
       bulk_create(LexemeAssociation, lexeme_assoc)
-      print '...'
+      print 'classifications...'
       for lexeme_id, cv in lexeme_cvs:
         cv.lexemes.add(lexeme_id) #add
-      print '...'
+      print 'qualifiers...'
       for lexeme_id, q in lexeme_quals:
         q.lexeme_set.add(lexeme_id) #add
-      print '...'
+      print 'attributes...'
       for lexeme_id, attr_val in lexeme_attrs:
         attr_val.lexemes.add(lexeme_id)
     import_lexemes()
     def import_lips():
       print 'importing lexeme inflection patterns...'
       lips, lip_quals = self.new_lexeme_inflection_patterns()
-      print '...'
+      print 'creating...'
       bulk_create(LexemeInflectionPattern, lips)
-      print '...'
+      print 'qualifiers...'
       for lexeme_id, index, q in lip_quals:
         LexemeInflectionPattern.objects.get(
           lexeme_id=lexeme_id, index=index).qualifiers.add(q)
@@ -326,6 +326,7 @@ class Lexeme(Model):
   note = TextField(blank=True, db_column='nota', verbose_name=u'nota')
   pronunciation = TextField(
     blank=True, db_column='wymowa', verbose_name=u'wymowa')
+  valence = TextField(blank=True, verbose_name=u'łączliwość')
   homonym_number = IntegerField(db_column='hom', default=1)
   part_of_speech = ForeignKey(
     PartOfSpeech, db_column='pos', verbose_name=u'cz. mowy')
@@ -443,8 +444,8 @@ class Lexeme(Model):
     pos = part_of_speech or self.part_of_speech
     attrs = LexemeAttribute.objects.all()
     attrs = attrs.filter(parts_of_speech=pos)
-    attrs = (attrs.filter(inflection_characteristic__in=ics)
-             | attrs.filter(inflection_characteristic=None))
+    attrs = (attrs.filter(inflection_characteristics__in=ics)
+             | attrs.filter(takes_ic=False))
     return attrs
  
   def attributes_values(self, part_of_speech=None, ics=None):
@@ -523,8 +524,9 @@ class LexemeAttribute(Model):
   multiple = BooleanField()
   required = BooleanField()
   parts_of_speech = ManyToManyField(PartOfSpeech)
-  inflection_characteristic = ForeignKey(
-    InflectionCharacteristic, blank=True, null=True)
+  takes_ic = BooleanField()
+  inflection_characteristics = ManyToManyField(
+    InflectionCharacteristic, blank=True)
  
   def __unicode__(self):
     return self.name
@@ -237,6 +237,7 @@ function init_form_widgets() {
     li.remove();
     jqgrid.show_changed();
     $('#table-preview').html('');
+    reload_attributes();
   });
   $(document).on('click', '#add-row', function() {
     var id = lexeme_id();
@@ -364,6 +365,7 @@ function init_form_widgets() {
   $(document).on('change', '#id_part_of_speech', check_pos);
   $(document).on('change', '#id_new_owner', reload_classifications);
   // TODO trzeba też uwzględniać usunięcie odmieniasia
+  // TODO ostrzegać przed znikaniem atrybutów
   $(document).on('change', '.inflection-characteristic', reload_attributes);
   $(document).on('keyup', '#id_entry', show_homonym_count);
 }
@@ -951,7 +953,7 @@ var check_pos = function() {
           }
           select.prop('options').add(option);
         });
-        select.selectedIndex = index;
+        select[0].selectedIndex = index;
       } else {
         var li = $(this);
         // copypasta...