importowanie zwrotności

janek37
1 parent 23aa3c4c
Showing 2 changed files with 41 additions and 11 deletions
accounts/management/commands/create_users.py
dictionary/management/commands/import_data.py
@@ -24,7 +24,10 @@ def create_users():
   SGJP = Vocabulary.objects.get(id='SGJP')
   SJPDor = Vocabulary.objects.get(id='SJPDor')
   zmiotki = Vocabulary.objects.get(id='zmiotki')
-  WSJP = Vocabulary.objects.get(id='WSJP')
+  try:
+    WSJP = Vocabulary.objects.get(id='WSJP')
+  except Vocabulary.DoesNotExist:
+    WSJP = None
   Morfologik, created = Vocabulary.objects.get_or_create(id='Morfologik')
   PoliMorf, created = Vocabulary.objects.get_or_create(id='PoliMorf')
  
@@ -44,7 +47,8 @@ def create_users():
  
   admin.groups.add(admins)
  
-  sgjp_vocabs = (SGJP, SJPDor, zmiotki, WSJP)
+  sgjp_vocabs = (SGJP, SJPDor, zmiotki)
+  if WSJP: sgjp_vocabs += (WSJP,)
   nzm_vocabs = (zmiotki, Morfologik, PoliMorf)
  
   sgjp.groups.add(managers)
@@ -65,7 +69,8 @@ def create_users():
     rednzm.visible_vocabularies.add(vocab)
     rednzm.editable_vocabularies.add(vocab)
   rednzm.visible_vocabularies.add(SGJP)
-  rednzm.visible_vocabularies.add(WSJP)
+  if WSJP:
+    rednzm.visible_vocabularies.add(WSJP)
   rednzm.visible_vocabularies.add(SJPDor)
  
   supersgjp.groups.add(superlexicographers)
@@ -79,7 +84,8 @@ def create_users():
     supernzm.visible_vocabularies.add(vocab)
     supernzm.editable_vocabularies.add(vocab)
   supernzm.visible_vocabularies.add(SGJP)
-  supernzm.visible_vocabularies.add(WSJP)
+  if WSJP:
+    supernzm.visible_vocabularies.add(WSJP)
   supernzm.visible_vocabularies.add(SJPDor)
  
   wzornik.groups.add(superlexicographers)
@@ -10,9 +10,9 @@ from dictionary.models import *
  
 DEFAULT_DATABASE = 'data/sgjp.db'
  
-MINI_MODE = False # do debugowania
+MINI_MODE = True # do debugowania
 MINI_LEXEME_COUNT = 40000
-MINI_LEXEME_QUERY = 'SELECT %s FROM leksemy LIMIT ?'
+MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?"
  
 SQL_MODE = True
  
@@ -21,6 +21,8 @@ BATCH_SIZE = 5000
 OTHER = 'inne'
 DEFAULT_VOCAB = 'SGJP'
  
+REFL = (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie')
+
 # tymczasowa tabelka
 BASIC_FORM_LABELS = {
   '0-': '1',
@@ -197,6 +199,18 @@ class ImportData(object):
     if 'qual' not in self.__dict__:
       self.qual = dict((q.label, q) for q in Qualifier.objects.all())
  
+  def create_refl_attribute(self):
+    refl, created = LexemeAttribute.objects.get_or_create(
+      name=u'zwrotność', closed=True)
+    for pos in PartOfSpeech.objects.filter(symbol__in=('v', 'ger', 'pact')):
+      refl.parts_of_speech.add(pos) #add
+    refl_values = {}
+    for val in REFL:
+      refl_values[val], created = LexemeAttributeValue.objects.get_or_create(
+        value=val, attribute=refl)
+    refl_values[''] = refl_values[u'—']
+    return refl_values
+
   def new_lexemes(self):
     self.cache_qualifiers()
     if MINI_MODE:
@@ -204,6 +218,7 @@ class ImportData(object):
         MINI_LEXEME_QUERY % '*',(MINI_LEXEME_COUNT,))
     else:
       result = self.sqlite_cursor.execute('SELECT * FROM leksemy')
+    refl_values = self.create_refl_attribute()
     date = datetime.datetime.now()
     cv_table = dict(
       (cv.label, cv) for cv in ClassificationValue.objects.all())
@@ -211,6 +226,7 @@ class ImportData(object):
     lexeme_associations = []
     lexeme_qualifiers = []
     lexeme_cvs = []
+    lexeme_attrs = []
     for row in result:
       slownik = row['slownik']
       status = 'conf' if slownik != 'zmiotki' else 'cand'
@@ -218,7 +234,7 @@ class ImportData(object):
       lexemes.append(Lexeme(
         id=row['nr'],
         entry=row['haslo'],
-        entry_suffix=row['haslosuf'] or '',
+        entry_suffix=row['haslosuf'] or '', # pozostałość historyczna
         gloss=row['glosa'] or '',
         note=row['nota'] or '',
         pronunciation=row['wymowa'] or '',
@@ -235,7 +251,11 @@ class ImportData(object):
       if row['lkwal']:
         for qual in row['lkwal'].split('|'):
           lexeme_qualifiers.append((row['nr'], self.qual[qual]))
-    return lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers
+      if row['pos'] in ('v', 'ger', 'pact'):
+        refl_value = refl_values.get(row['haslosuf'].strip(' ?'))
+        lexeme_attrs.append((row['nr'], refl_value))
+    return (lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers,
+      lexeme_attrs)
  
   def new_lexeme_associations(self):
     self.cache_vocabs()
@@ -280,7 +300,7 @@ class ImportData(object):
         'JOIN leksemy l2 on nrdo=l2.nr'
       )
     cr_type_table = dict(
-      ((crt.symbol, crt.from_pos, crt.to_pos), crt)
+      ((crt.symbol, crt.from_pos.symbol, crt.to_pos.symbol), crt)
       for crt in CrossReferenceType.objects.all()
     )
     for row in result:
@@ -516,7 +536,8 @@ class ImportData(object):
         pattern=pattern, base_form_label=bfl, index=index).qualifiers.add(q)
     def import_lexemes():
       print 'importing lexemes...'
-      lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals = self.new_lexemes()
+      (lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals,
+       lexeme_attrs) = self.new_lexemes()
       print '...'
       bulk_create(Lexeme, lexemes)
       print '...'
@@ -527,6 +548,9 @@ class ImportData(object):
       print '...'
       for lexeme_id, q in lexeme_quals:
         q.lexeme_set.add(lexeme_id) #add
+      print '...'
+      for lexeme_id, attr_val in lexeme_attrs:
+        attr_val.lexemes.add(lexeme_id)
     import_lexemes()
     def import_lips():
       print 'importing lexeme inflection patterns...'
@@ -562,4 +586,4 @@ class ImportData(object):
       bulk_create(model, self.__getattribute__(method_name)())
     elif method_name.startswith('import'):
       self.__getattribute__(method_name)()
-    self.close()
 \ No newline at end of file
+    self.close()