poprawki do importu nowej bazy

janek37
1 parent 0254c2f0
Showing 2 changed files with 84 additions and 80 deletions
dictionary/management/commands/import_data.py
dictionary/models.py
@@ -3,7 +3,7 @@
 import sqlite3
 import datetime
 from django.db import connection, transaction
-from django.core.management.base import BaseCommand, CommandError
+from django.core.management.base import BaseCommand
 from django.contrib.auth.models import User
  
 from common.util import no_history
@@ -11,18 +11,17 @@ from dictionary.models import *
  
 DEFAULT_DATABASE = 'data/sgjp.db'
 MINI_LEXEME_COUNT = 500
-mini = False
-sql = True
+MINI_MODE = False # do debugowania
+SQL_MODE = True
 cursor = connection.cursor()
 db = None
-cur = None
+sqlite_cursor = None
 no_history()
  
 class Command(BaseCommand):
   args = '<input db filename>'
   help = 'Imports initial data'
  
-  #TODO opcja -mini (?)
   def handle(self, db_name=DEFAULT_DATABASE, **options):
     global db
     db = db_name
@@ -37,8 +36,8 @@ def get_cursor(db):
 def import_lexical_classes():
   lc = LexicalClass(symbol='inne')
   lc.save()
-  for row in cur.execute('select distinct pos from wzory'):
-    if not sql:
+  for row in sqlite_cursor.execute('select distinct pos from wzory'):
+    if not SQL_MODE:
       lc = LexicalClass(symbol=row['pos'])
       lc.save()
     else:
@@ -47,17 +46,16 @@ def import_lexical_classes():
  
  
 def import_parts_of_speech():
-  cur2 = get_cursor(db)
   other = 'inne'
   lcs = {}
-  for row in cur.execute('select distinct wzory.pos, leksemy.pos from wzory '
+  for row in sqlite_cursor.execute('select distinct wzory.pos, leksemy.pos from wzory '
                          'natural join odmieniasie join leksemy on '
                          'leksemy.nr = odmieniasie.nr'):
     lcs[row[1]] = row[0]
-    print row
-  for row in cur.execute('SELECT pos FROM klasygramatyczne'):
+    #print row
+  for row in sqlite_cursor.execute('SELECT pos FROM klasygramatyczne'):
     lc = lcs.get(row['pos'], other)
-    if not sql:
+    if not SQL_MODE:
       pos = PartOfSpeech(symbol=row['pos'])
       pos.lexical_class = LexicalClass.objects.get(symbol=lc)
       pos.save()
@@ -67,13 +65,13 @@ def import_parts_of_speech():
  
  
 def import_base_form_labels():
-  query_result = cur.execute("""
+  query_result = sqlite_cursor.execute("""
     SELECT efobaz FROM paradygmaty
     UNION
     SELECT efobaz FROM zakonczenia
     """)
   for row in query_result:
-    if not sql:
+    if not SQL_MODE:
       bfl = BaseFormLabel(entry=row[0])
       bfl.save()
     else:
@@ -110,7 +108,7 @@ basic_form_labels_pos = {
 ics = {}
  
 def import_inflection_characteristics():
-  for row in cur.execute('SELECT DISTINCT charfl, pos FROM paradygmaty'):
+  for row in sqlite_cursor.execute('SELECT DISTINCT charfl, pos FROM paradygmaty'):
     if row['charfl'] == '':
       bfl_entry = '1' if row['pos'] in ('adj', 'adjcom') else ''
     else:
@@ -118,7 +116,7 @@ def import_inflection_characteristics():
     if row['pos'] in basic_form_labels_pos:
       bfl_entry = basic_form_labels_pos[row['pos']]
     bfl = BaseFormLabel.objects.get(entry=bfl_entry)
-    if not sql:
+    if not SQL_MODE:
       ic = InflectionCharacteristic(
         entry=row['charfl'], basic_form_label=bfl,
         part_of_speech=PartOfSpeech.objects.get(pk=row['pos']))
@@ -136,9 +134,9 @@ sgjp_domain = (&#39;SGJP&#39;, &#39;WSJP&#39;, &#39;SJPDor&#39;, &#39;zmiotki&#39;)
 def import_vocabularies():
   try:
     sgjp = User.objects.get(username='sgjp')
-  except:
+  except User.DoesNotExist:
     sgjp = None
-  result = cur.execute("""
+  result = sqlite_cursor.execute("""
     SELECT slownik FROM leksemy
     UNION
     SELECT slownik_uz FROM slowniki_uzywajace
@@ -153,7 +151,7 @@ def import_vocabularies():
  
 def import_qualifiers():
   sgjp = Vocabulary.objects.get(id='SGJP')
-  query_result = cur.execute("""
+  query_result = sqlite_cursor.execute("""
     SELECT okwal FROM odmieniasie
     UNION
     SELECT zkwal FROM zakonczenia
@@ -166,27 +164,27 @@ def import_qualifiers():
       for qualifier in row[0].split('|'):
         if qualifier not in added:
           added.add(qualifier)
-          if not sql:
+          if not SQL_MODE:
             q = Qualifier(label=qualifier, vocabulary=sgjp)
             q.save()
           else:
-            cursor.execute("INSERT INTO kwalifikatory (kwal, slownik) "
-                           "VALUES (%s, %s)", [qualifier, sgjp.pk])
+            cursor.execute("INSERT INTO kwalifikatory (kwal, slownik, usuniety) "
+                           "VALUES (%s, %s, %s)", [qualifier, sgjp.pk, False])
  
  
 mini_lexeme_query = 'SELECT %s FROM leksemy LIMIT ?'
  
 def import_lexemes():
-  if mini:
-    result = cur.execute(mini_lexeme_query % '*', (MINI_LEXEME_COUNT,))
+  if MINI_MODE:
+    result = sqlite_cursor.execute(mini_lexeme_query % '*', (MINI_LEXEME_COUNT,))
   else:
-    result = cur.execute('SELECT * FROM leksemy')
+    result = sqlite_cursor.execute('SELECT * FROM leksemy')
   date = datetime.datetime.now()
   cv_table = dict(ClassificationValue.objects.values_list('label', 'pk'))
   for row in result:
     slownik = row['slownik']
     status = 'conf' if slownik != 'zmiotki' else 'cand'
-    if not sql:
+    if not SQL_MODE:
       l = Lexeme()
       l.id = row['nr']
       l.entry = row['haslo']
@@ -196,6 +194,7 @@ def import_lexemes():
       l.status = status
       l.gloss = row['glosa'] or ''
       l.entry_suffix = row['haslosuf'] or ''
+      l.note = row['nota'] or ''
       cv = ClassificationValue.objects.get(label=row['pospolitosc'])
       cv.lexemes.add(l) #add
       l.fix_homonym_number()
@@ -209,11 +208,11 @@ def import_lexemes():
     else:
       cv_pk = cv_table[row['pospolitosc']]
       cursor.execute(
-        "INSERT INTO leksemy (id, haslo, haslosuf, glosa, hom, pos, zrodlo, "
-        "status, komentarz, data_modyfikacji, slownik) VALUES (%s, %s, %s, %s, "
-        "%s, %s, %s, %s, %s, %s, %s)", [row['nr'], row['haslo'],
-        row['haslosuf'] or '', row['glosa'] or '', 1, row['pos'], 'SGJP',
-        status, row['komentarz'], date, row['slownik']])
+        "INSERT INTO leksemy (id, haslo, haslosuf, glosa, nota, hom, pos, zrodlo, "
+        "status, komentarz, data_modyfikacji, slownik, usuniety) VALUES (%s, %s, %s, %s, "
+        "%s, %s, %s, %s, %s, %s, %s, %s, %s)", [row['nr'], row['haslo'],
+        row['haslosuf'] or '', row['glosa'] or '', row['nota'] or '', 1, row['pos'], 'SGJP',
+        status, row['komentarz'], date, row['slownik'], False])
       cursor.execute(
         "INSERT INTO leksemy_w_slownikach (l_id, slownik) "
         "VALUES (%s, %s)", [row['nr'], slownik])
@@ -229,13 +228,13 @@ def import_lexemes():
  
  
 def import_lexeme_associations():
-  if mini:
-    result = cur.execute('SELECT * FROM slowniki_uzywajace WHERE nr in (%s)' %
+  if MINI_MODE:
+    result = sqlite_cursor.execute('SELECT * FROM slowniki_uzywajace WHERE nr in (%s)' %
                          (mini_lexeme_query % 'nr'), [MINI_LEXEME_COUNT])
   else:
-    result = cur.execute('SELECT * FROM slowniki_uzywajace')
+    result = sqlite_cursor.execute('SELECT * FROM slowniki_uzywajace')
   for row in result:
-    if not sql:
+    if not SQL_MODE:
       v = Vocabulary.objects.get(pk=row['slownik_uz'])
       l = Lexeme.objects.get(pk=row['nr'])
       LexemeAssociation.objects.create(vocabulary=v, lexeme=l)
@@ -246,10 +245,11 @@ def import_lexeme_associations():
  
  
 def import_cross_reference_types():
-  result = cur.execute('select distinct l1.pos pos1, l2.pos pos2, t.* '
-                       'from odsylacze o join leksemy l1 on nrod=l1.nr '
-                       'join leksemy l2 on nrdo=l2.nr '
-                       'join typyodsylaczy t on t.typods=o.typods')
+  result = sqlite_cursor.execute(
+    'select distinct l1.pos pos1, l2.pos pos2, t.* '
+    'from odsylacze o join leksemy l1 on nrod=l1.nr '
+    'join leksemy l2 on nrdo=l2.nr '
+    'join typyodsylaczy t on t.typods=o.typods')
   for row in result:
     t = CrossReferenceType()
     t.symbol = row['typods']
@@ -261,23 +261,26 @@ def import_cross_reference_types():
  
  
 def import_cross_references():
-  if mini:
-    result = cur.execute('SELECT * FROM odsylacze WHERE '
-                         'nrod in (%(subq)s) and nrdo in (%(subq)s)' %
-                         {'subq': mini_lexeme_query % 'nr'},
-                         [MINI_LEXEME_COUNT, MINI_LEXEME_COUNT])
+  if MINI_MODE:
+    result = sqlite_cursor.execute(
+      'SELECT * FROM odsylacze WHERE nrod in (%(subq)s) and nrdo in (%(subq)s)'
+      % {'subq': mini_lexeme_query % 'nr'},
+      [MINI_LEXEME_COUNT, MINI_LEXEME_COUNT])
   else:
-    result = cur.execute('SELECT * FROM odsylacze')
+    result = sqlite_cursor.execute('SELECT * FROM odsylacze')
   for row in result:
     if row['nrod'] and row['nrdo']: # no bo nie wiem jak to interpretować
       l_from = Lexeme.objects.get(pk=row['nrod'])
       l_to = Lexeme.objects.get(pk=row['nrdo'])
-      cr_type = CrossReferenceType(
-        symbol=row['typods'], from_pos=l_from.part_of_speech,
-        to_pos=l_to.part_of_speech)
-      if not sql:
+      try:
+        cr_type = CrossReferenceType.objects.get(
+          symbol=row['typods'], from_pos=l_from.part_of_speech,
+          to_pos=l_to.part_of_speech)
+      except CrossReferenceType.DoesNotExist:
+        print row['typods'], l_from.part_of_speech, l_to.part_of_speech
+      if not SQL_MODE:
         CrossReference.objects.create(
-          from_lexeme=l_from, to_lexeme=to_lexeme, type=cr_type)
+          from_lexeme=l_from, to_lexeme=l_to, type=cr_type)
       else:
         cursor.execute(
           "INSERT INTO odsylacze (l_id_od, l_id_do, typods_id) "
@@ -285,48 +288,49 @@ def import_cross_references():
  
  
 def import_pattern_types():
-  for row in cur.execute('SELECT DISTINCT typr, pos FROM paradygmaty'):
+  for row in sqlite_cursor.execute('SELECT DISTINCT typr, pos FROM paradygmaty'):
     lc = PartOfSpeech.objects.get(symbol=row['pos']).lexical_class
     PatternType.objects.get_or_create(lexical_class=lc, entry=row['typr'])
   # prowizorka z powodu pustej klasy 'skr'
-  for row in cur.execute('SELECT DISTINCT typr, pos FROM wzory'):
+  for row in sqlite_cursor.execute('SELECT DISTINCT typr, pos FROM wzory'):
     lc = LexicalClass.objects.get(symbol=row['pos'])
     PatternType.objects.get_or_create(lexical_class=lc, entry=row['typr'])
  
  
 def import_patterns():
-  for row in cur.execute('SELECT * FROM wzory'):
+  for row in sqlite_cursor.execute('SELECT * FROM wzory'):
     pt = PatternType.objects.get(
       lexical_class__symbol=row['pos'], entry=row['typr'])
     status = 'temp'
-    example = '-'
-    if not sql:
+    if not SQL_MODE:
       p = Pattern()
       p.name = row['wzor']
       p.type = pt
       p.basic_form_ending = row['zakp']
+      p.example = row['przyklad'] or ''
+      p.comment = row['wkomentarz'] or ''
  
-      p.example = example
       p.status = status
       p.save()
     else:
       cursor.execute(
         "INSERT INTO wzory (w_id, typ, przyklad, zakp, status, komentarz) "
         "VALUES (%s, %s, %s, %s, %s, %s)",
-        [row['wzor'], pt.pk, example, row['zakp'], status, ''])
+        [row['wzor'], pt.pk, row['przyklad'] or '', row['zakp'], status,
+         row['wkomentarz'] or ''])
  
  
 def import_lexeme_inflection_patterns():
-  if mini:
-    result = cur.execute(
+  if MINI_MODE:
+    result = sqlite_cursor.execute(
       'SELECT * FROM odmieniasie WHERE nr IN (%s)' % (mini_lexeme_query % 'nr'),
       (MINI_LEXEME_COUNT,))
   else:
-    result = cur.execute('SELECT * FROM odmieniasie')
+    result = sqlite_cursor.execute('SELECT * FROM odmieniasie')
   pos_table = dict(Lexeme.objects.values_list('pk', 'part_of_speech'))
   pattern_pk_table = dict(Pattern.objects.values_list('name', 'pk'))
   for row in result:
-    if not sql:
+    if not SQL_MODE:
       lip = LexemeInflectionPattern()
       lip.lexeme = Lexeme.objects.get(id=row['nr'])
       lip.index = row['oskl']
@@ -356,12 +360,12 @@ def import_lexeme_inflection_patterns():
             "qualifier_id) VALUES (%s, %s)", [last_id, q_id])
  
 def import_endings():
-  if sql:
+  if SQL_MODE:
     pattern_pk_table = dict(Pattern.objects.values_list('name', 'pk'))
     bfl_table = dict(BaseFormLabel.objects.values_list('entry', 'pk'))
-  for row in cur.execute('SELECT * FROM zakonczenia'):
+  for row in sqlite_cursor.execute('SELECT * FROM zakonczenia'):
     if row['zak'] is not None:
-      if not sql:
+      if not SQL_MODE:
         e = Ending()
         e.pattern = Pattern.objects.get(name=row['wzor'])
         e.base_form_label = BaseFormLabel.objects.get(entry=row['efobaz'])
@@ -390,7 +394,7 @@ def import_endings():
  
 def import_tables():
   bfl_table = dict(BaseFormLabel.objects.values_list('entry', 'pk'))
-  for row in cur.execute('SELECT * FROM paradygmaty'):
+  for row in sqlite_cursor.execute('SELECT * FROM paradygmaty'):
     lc = PartOfSpeech.objects.get(symbol=row['pos']).lexical_class
     variant, _created = Variant.objects.get_or_create(id=row['wariant'])
     tt_data = {
@@ -401,7 +405,7 @@ def import_tables():
         entry=row['charfl'], part_of_speech__symbol=row['pos']),
     }
     tt, _created = TableTemplate.objects.get_or_create(**tt_data)
-    if not sql:
+    if not SQL_MODE:
       c = Cell()
       c.table_template = tt
       c.base_form_label = BaseFormLabel.objects.get(entry=row['efobaz'])
@@ -434,7 +438,7 @@ def import_tables():
  
  
 def import_table_headers():
-  for row in cur.execute('SELECT * FROM naglowkiwierszy'):
+  for row in sqlite_cursor.execute('SELECT * FROM naglowkiwierszy'):
     if row['styl'] != 'b' and row['nagl']:
       tts = TableTemplate.objects.filter(
         variant__id=row['wariant'], pattern_type__entry=row['typr'],
@@ -442,7 +446,7 @@ def import_table_headers():
         inflection_characteristic__part_of_speech__symbol=row['pos'])
       if tts:
         tt = tts[0]
-        if not sql:
+        if not SQL_MODE:
           th = TableHeader()
           th.table_template = tt
           th.row = row['row']
@@ -461,14 +465,14 @@ def import_table_headers():
  
  
 def single_import(fun, db):
-  global cur, cursor
+  global sqlite_cursor, cursor
   transaction.commit_unless_managed()
   transaction.enter_transaction_management()
-  transaction.managed(True)
-  cur = get_cursor(db)
+  transaction.managed()
+  sqlite_cursor = get_cursor(db)
   cursor = connection.cursor()
   fun()
-  cur.close()
+  sqlite_cursor.close()
   cursor.close()
   transaction.commit()
   transaction.leave_transaction_management()
@@ -477,7 +481,7 @@ def single_import(fun, db):
 def delete_and_import():
   transaction.commit_unless_managed()
   transaction.enter_transaction_management()
-  transaction.managed(True)
+  transaction.managed()
   models = (
     Qualifier,
     LexicalClass,
@@ -499,8 +503,8 @@ def delete_and_import():
   for model in models:
     model.objects.all().delete()
  
-  global cur
-  cur = get_cursor(db)
+  global sqlite_cursor
+  sqlite_cursor = get_cursor(db)
   print 'importing lexical classes...'
   import_lexical_classes()
   print 'importing parts of speech'
@@ -533,7 +537,7 @@ def delete_and_import():
   import_tables()
   print 'importing table headers...'
   import_table_headers()
-  cur.close()
+  sqlite_cursor.close()
   cursor.close()
   transaction.commit()
   transaction.leave_transaction_management()
@@ -541,7 +545,7 @@ def delete_and_import():
 import sys
 if __name__ == '__main__':
   if sys.argv[-1] == '-mini':
-    mini = True
+    MINI_MODE = True
     del sys.argv[-1]
   if len(sys.argv) > 1:
     db = sys.argv[1]
@@ -51,7 +51,7 @@ class Qualifier(Model):
   exclusion_class = ForeignKey(
     QualifierExclusionClass, db_column='klasa', null=True, blank=True,
     verbose_name=u'klasa wykluczania')
-  deleted = BooleanField(db_column='usuniety')
+  deleted = BooleanField(db_column='usuniety', default=False)
  
   objects = NotDeletedManager()
   all_objects = Manager()
@@ -128,7 +128,7 @@ class ClassificationValue(Model):
     'self', db_column='rodzic', null=True, blank=True,
     verbose_name=u'rodzic wartości', related_name='child_nodes')
   lexemes = ManyToManyField('Lexeme', blank=True)
-  deleted = BooleanField(db_column='usunieta')
+  deleted = BooleanField(db_column='usunieta', default=False)
  
   objects = NotDeletedManager()
   all_objects = Manager()
@@ -338,7 +338,7 @@ class Lexeme(Model):
   responsible = ForeignKey(
     User, blank=True, null=True, db_column='odpowiedzialny')
   patterns = ManyToManyField(Pattern, through='LexemeInflectionPattern')
-  deleted = BooleanField(db_column='usuniety')
+  deleted = BooleanField(db_column='usuniety', default=False)
  
   objects = NotDeletedManager()
   all_objects = Manager()