Commit f64086813653c91411f760018c256908714a86e7

Authored by janek37
1 parent 0254c2f0

poprawki do importu nowej bazy

dictionary/management/commands/import_data.py
... ... @@ -3,7 +3,7 @@
3 3 import sqlite3
4 4 import datetime
5 5 from django.db import connection, transaction
6   -from django.core.management.base import BaseCommand, CommandError
  6 +from django.core.management.base import BaseCommand
7 7 from django.contrib.auth.models import User
8 8  
9 9 from common.util import no_history
... ... @@ -11,18 +11,17 @@ from dictionary.models import *
11 11  
12 12 DEFAULT_DATABASE = 'data/sgjp.db'
13 13 MINI_LEXEME_COUNT = 500
14   -mini = False
15   -sql = True
  14 +MINI_MODE = False # do debugowania
  15 +SQL_MODE = True
16 16 cursor = connection.cursor()
17 17 db = None
18   -cur = None
  18 +sqlite_cursor = None
19 19 no_history()
20 20  
21 21 class Command(BaseCommand):
22 22 args = '<input db filename>'
23 23 help = 'Imports initial data'
24 24  
25   - #TODO opcja -mini (?)
26 25 def handle(self, db_name=DEFAULT_DATABASE, **options):
27 26 global db
28 27 db = db_name
... ... @@ -37,8 +36,8 @@ def get_cursor(db):
37 36 def import_lexical_classes():
38 37 lc = LexicalClass(symbol='inne')
39 38 lc.save()
40   - for row in cur.execute('select distinct pos from wzory'):
41   - if not sql:
  39 + for row in sqlite_cursor.execute('select distinct pos from wzory'):
  40 + if not SQL_MODE:
42 41 lc = LexicalClass(symbol=row['pos'])
43 42 lc.save()
44 43 else:
... ... @@ -47,17 +46,16 @@ def import_lexical_classes():
47 46  
48 47  
49 48 def import_parts_of_speech():
50   - cur2 = get_cursor(db)
51 49 other = 'inne'
52 50 lcs = {}
53   - for row in cur.execute('select distinct wzory.pos, leksemy.pos from wzory '
  51 + for row in sqlite_cursor.execute('select distinct wzory.pos, leksemy.pos from wzory '
54 52 'natural join odmieniasie join leksemy on '
55 53 'leksemy.nr = odmieniasie.nr'):
56 54 lcs[row[1]] = row[0]
57   - print row
58   - for row in cur.execute('SELECT pos FROM klasygramatyczne'):
  55 + #print row
  56 + for row in sqlite_cursor.execute('SELECT pos FROM klasygramatyczne'):
59 57 lc = lcs.get(row['pos'], other)
60   - if not sql:
  58 + if not SQL_MODE:
61 59 pos = PartOfSpeech(symbol=row['pos'])
62 60 pos.lexical_class = LexicalClass.objects.get(symbol=lc)
63 61 pos.save()
... ... @@ -67,13 +65,13 @@ def import_parts_of_speech():
67 65  
68 66  
69 67 def import_base_form_labels():
70   - query_result = cur.execute("""
  68 + query_result = sqlite_cursor.execute("""
71 69 SELECT efobaz FROM paradygmaty
72 70 UNION
73 71 SELECT efobaz FROM zakonczenia
74 72 """)
75 73 for row in query_result:
76   - if not sql:
  74 + if not SQL_MODE:
77 75 bfl = BaseFormLabel(entry=row[0])
78 76 bfl.save()
79 77 else:
... ... @@ -110,7 +108,7 @@ basic_form_labels_pos = {
110 108 ics = {}
111 109  
112 110 def import_inflection_characteristics():
113   - for row in cur.execute('SELECT DISTINCT charfl, pos FROM paradygmaty'):
  111 + for row in sqlite_cursor.execute('SELECT DISTINCT charfl, pos FROM paradygmaty'):
114 112 if row['charfl'] == '':
115 113 bfl_entry = '1' if row['pos'] in ('adj', 'adjcom') else ''
116 114 else:
... ... @@ -118,7 +116,7 @@ def import_inflection_characteristics():
118 116 if row['pos'] in basic_form_labels_pos:
119 117 bfl_entry = basic_form_labels_pos[row['pos']]
120 118 bfl = BaseFormLabel.objects.get(entry=bfl_entry)
121   - if not sql:
  119 + if not SQL_MODE:
122 120 ic = InflectionCharacteristic(
123 121 entry=row['charfl'], basic_form_label=bfl,
124 122 part_of_speech=PartOfSpeech.objects.get(pk=row['pos']))
... ... @@ -136,9 +134,9 @@ sgjp_domain = (&#39;SGJP&#39;, &#39;WSJP&#39;, &#39;SJPDor&#39;, &#39;zmiotki&#39;)
136 134 def import_vocabularies():
137 135 try:
138 136 sgjp = User.objects.get(username='sgjp')
139   - except:
  137 + except User.DoesNotExist:
140 138 sgjp = None
141   - result = cur.execute("""
  139 + result = sqlite_cursor.execute("""
142 140 SELECT slownik FROM leksemy
143 141 UNION
144 142 SELECT slownik_uz FROM slowniki_uzywajace
... ... @@ -153,7 +151,7 @@ def import_vocabularies():
153 151  
154 152 def import_qualifiers():
155 153 sgjp = Vocabulary.objects.get(id='SGJP')
156   - query_result = cur.execute("""
  154 + query_result = sqlite_cursor.execute("""
157 155 SELECT okwal FROM odmieniasie
158 156 UNION
159 157 SELECT zkwal FROM zakonczenia
... ... @@ -166,27 +164,27 @@ def import_qualifiers():
166 164 for qualifier in row[0].split('|'):
167 165 if qualifier not in added:
168 166 added.add(qualifier)
169   - if not sql:
  167 + if not SQL_MODE:
170 168 q = Qualifier(label=qualifier, vocabulary=sgjp)
171 169 q.save()
172 170 else:
173   - cursor.execute("INSERT INTO kwalifikatory (kwal, slownik) "
174   - "VALUES (%s, %s)", [qualifier, sgjp.pk])
  171 + cursor.execute("INSERT INTO kwalifikatory (kwal, slownik, usuniety) "
  172 + "VALUES (%s, %s, %s)", [qualifier, sgjp.pk, False])
175 173  
176 174  
177 175 mini_lexeme_query = 'SELECT %s FROM leksemy LIMIT ?'
178 176  
179 177 def import_lexemes():
180   - if mini:
181   - result = cur.execute(mini_lexeme_query % '*', (MINI_LEXEME_COUNT,))
  178 + if MINI_MODE:
  179 + result = sqlite_cursor.execute(mini_lexeme_query % '*', (MINI_LEXEME_COUNT,))
182 180 else:
183   - result = cur.execute('SELECT * FROM leksemy')
  181 + result = sqlite_cursor.execute('SELECT * FROM leksemy')
184 182 date = datetime.datetime.now()
185 183 cv_table = dict(ClassificationValue.objects.values_list('label', 'pk'))
186 184 for row in result:
187 185 slownik = row['slownik']
188 186 status = 'conf' if slownik != 'zmiotki' else 'cand'
189   - if not sql:
  187 + if not SQL_MODE:
190 188 l = Lexeme()
191 189 l.id = row['nr']
192 190 l.entry = row['haslo']
... ... @@ -196,6 +194,7 @@ def import_lexemes():
196 194 l.status = status
197 195 l.gloss = row['glosa'] or ''
198 196 l.entry_suffix = row['haslosuf'] or ''
  197 + l.note = row['nota'] or ''
199 198 cv = ClassificationValue.objects.get(label=row['pospolitosc'])
200 199 cv.lexemes.add(l) #add
201 200 l.fix_homonym_number()
... ... @@ -209,11 +208,11 @@ def import_lexemes():
209 208 else:
210 209 cv_pk = cv_table[row['pospolitosc']]
211 210 cursor.execute(
212   - "INSERT INTO leksemy (id, haslo, haslosuf, glosa, hom, pos, zrodlo, "
213   - "status, komentarz, data_modyfikacji, slownik) VALUES (%s, %s, %s, %s, "
214   - "%s, %s, %s, %s, %s, %s, %s)", [row['nr'], row['haslo'],
215   - row['haslosuf'] or '', row['glosa'] or '', 1, row['pos'], 'SGJP',
216   - status, row['komentarz'], date, row['slownik']])
  211 + "INSERT INTO leksemy (id, haslo, haslosuf, glosa, nota, hom, pos, zrodlo, "
  212 + "status, komentarz, data_modyfikacji, slownik, usuniety) VALUES (%s, %s, %s, %s, "
  213 + "%s, %s, %s, %s, %s, %s, %s, %s, %s)", [row['nr'], row['haslo'],
  214 + row['haslosuf'] or '', row['glosa'] or '', row['nota'] or '', 1, row['pos'], 'SGJP',
  215 + status, row['komentarz'], date, row['slownik'], False])
217 216 cursor.execute(
218 217 "INSERT INTO leksemy_w_slownikach (l_id, slownik) "
219 218 "VALUES (%s, %s)", [row['nr'], slownik])
... ... @@ -229,13 +228,13 @@ def import_lexemes():
229 228  
230 229  
231 230 def import_lexeme_associations():
232   - if mini:
233   - result = cur.execute('SELECT * FROM slowniki_uzywajace WHERE nr in (%s)' %
  231 + if MINI_MODE:
  232 + result = sqlite_cursor.execute('SELECT * FROM slowniki_uzywajace WHERE nr in (%s)' %
234 233 (mini_lexeme_query % 'nr'), [MINI_LEXEME_COUNT])
235 234 else:
236   - result = cur.execute('SELECT * FROM slowniki_uzywajace')
  235 + result = sqlite_cursor.execute('SELECT * FROM slowniki_uzywajace')
237 236 for row in result:
238   - if not sql:
  237 + if not SQL_MODE:
239 238 v = Vocabulary.objects.get(pk=row['slownik_uz'])
240 239 l = Lexeme.objects.get(pk=row['nr'])
241 240 LexemeAssociation.objects.create(vocabulary=v, lexeme=l)
... ... @@ -246,10 +245,11 @@ def import_lexeme_associations():
246 245  
247 246  
248 247 def import_cross_reference_types():
249   - result = cur.execute('select distinct l1.pos pos1, l2.pos pos2, t.* '
250   - 'from odsylacze o join leksemy l1 on nrod=l1.nr '
251   - 'join leksemy l2 on nrdo=l2.nr '
252   - 'join typyodsylaczy t on t.typods=o.typods')
  248 + result = sqlite_cursor.execute(
  249 + 'select distinct l1.pos pos1, l2.pos pos2, t.* '
  250 + 'from odsylacze o join leksemy l1 on nrod=l1.nr '
  251 + 'join leksemy l2 on nrdo=l2.nr '
  252 + 'join typyodsylaczy t on t.typods=o.typods')
253 253 for row in result:
254 254 t = CrossReferenceType()
255 255 t.symbol = row['typods']
... ... @@ -261,23 +261,26 @@ def import_cross_reference_types():
261 261  
262 262  
263 263 def import_cross_references():
264   - if mini:
265   - result = cur.execute('SELECT * FROM odsylacze WHERE '
266   - 'nrod in (%(subq)s) and nrdo in (%(subq)s)' %
267   - {'subq': mini_lexeme_query % 'nr'},
268   - [MINI_LEXEME_COUNT, MINI_LEXEME_COUNT])
  264 + if MINI_MODE:
  265 + result = sqlite_cursor.execute(
  266 + 'SELECT * FROM odsylacze WHERE nrod in (%(subq)s) and nrdo in (%(subq)s)'
  267 + % {'subq': mini_lexeme_query % 'nr'},
  268 + [MINI_LEXEME_COUNT, MINI_LEXEME_COUNT])
269 269 else:
270   - result = cur.execute('SELECT * FROM odsylacze')
  270 + result = sqlite_cursor.execute('SELECT * FROM odsylacze')
271 271 for row in result:
272 272 if row['nrod'] and row['nrdo']: # no bo nie wiem jak to interpretować
273 273 l_from = Lexeme.objects.get(pk=row['nrod'])
274 274 l_to = Lexeme.objects.get(pk=row['nrdo'])
275   - cr_type = CrossReferenceType(
276   - symbol=row['typods'], from_pos=l_from.part_of_speech,
277   - to_pos=l_to.part_of_speech)
278   - if not sql:
  275 + try:
  276 + cr_type = CrossReferenceType.objects.get(
  277 + symbol=row['typods'], from_pos=l_from.part_of_speech,
  278 + to_pos=l_to.part_of_speech)
  279 + except CrossReferenceType.DoesNotExist:
  280 + print row['typods'], l_from.part_of_speech, l_to.part_of_speech
  281 + if not SQL_MODE:
279 282 CrossReference.objects.create(
280   - from_lexeme=l_from, to_lexeme=to_lexeme, type=cr_type)
  283 + from_lexeme=l_from, to_lexeme=l_to, type=cr_type)
281 284 else:
282 285 cursor.execute(
283 286 "INSERT INTO odsylacze (l_id_od, l_id_do, typods_id) "
... ... @@ -285,48 +288,49 @@ def import_cross_references():
285 288  
286 289  
287 290 def import_pattern_types():
288   - for row in cur.execute('SELECT DISTINCT typr, pos FROM paradygmaty'):
  291 + for row in sqlite_cursor.execute('SELECT DISTINCT typr, pos FROM paradygmaty'):
289 292 lc = PartOfSpeech.objects.get(symbol=row['pos']).lexical_class
290 293 PatternType.objects.get_or_create(lexical_class=lc, entry=row['typr'])
291 294 # prowizorka z powodu pustej klasy 'skr'
292   - for row in cur.execute('SELECT DISTINCT typr, pos FROM wzory'):
  295 + for row in sqlite_cursor.execute('SELECT DISTINCT typr, pos FROM wzory'):
293 296 lc = LexicalClass.objects.get(symbol=row['pos'])
294 297 PatternType.objects.get_or_create(lexical_class=lc, entry=row['typr'])
295 298  
296 299  
297 300 def import_patterns():
298   - for row in cur.execute('SELECT * FROM wzory'):
  301 + for row in sqlite_cursor.execute('SELECT * FROM wzory'):
299 302 pt = PatternType.objects.get(
300 303 lexical_class__symbol=row['pos'], entry=row['typr'])
301 304 status = 'temp'
302   - example = '-'
303   - if not sql:
  305 + if not SQL_MODE:
304 306 p = Pattern()
305 307 p.name = row['wzor']
306 308 p.type = pt
307 309 p.basic_form_ending = row['zakp']
  310 + p.example = row['przyklad'] or ''
  311 + p.comment = row['wkomentarz'] or ''
308 312  
309   - p.example = example
310 313 p.status = status
311 314 p.save()
312 315 else:
313 316 cursor.execute(
314 317 "INSERT INTO wzory (w_id, typ, przyklad, zakp, status, komentarz) "
315 318 "VALUES (%s, %s, %s, %s, %s, %s)",
316   - [row['wzor'], pt.pk, example, row['zakp'], status, ''])
  319 + [row['wzor'], pt.pk, row['przyklad'] or '', row['zakp'], status,
  320 + row['wkomentarz'] or ''])
317 321  
318 322  
319 323 def import_lexeme_inflection_patterns():
320   - if mini:
321   - result = cur.execute(
  324 + if MINI_MODE:
  325 + result = sqlite_cursor.execute(
322 326 'SELECT * FROM odmieniasie WHERE nr IN (%s)' % (mini_lexeme_query % 'nr'),
323 327 (MINI_LEXEME_COUNT,))
324 328 else:
325   - result = cur.execute('SELECT * FROM odmieniasie')
  329 + result = sqlite_cursor.execute('SELECT * FROM odmieniasie')
326 330 pos_table = dict(Lexeme.objects.values_list('pk', 'part_of_speech'))
327 331 pattern_pk_table = dict(Pattern.objects.values_list('name', 'pk'))
328 332 for row in result:
329   - if not sql:
  333 + if not SQL_MODE:
330 334 lip = LexemeInflectionPattern()
331 335 lip.lexeme = Lexeme.objects.get(id=row['nr'])
332 336 lip.index = row['oskl']
... ... @@ -356,12 +360,12 @@ def import_lexeme_inflection_patterns():
356 360 "qualifier_id) VALUES (%s, %s)", [last_id, q_id])
357 361  
358 362 def import_endings():
359   - if sql:
  363 + if SQL_MODE:
360 364 pattern_pk_table = dict(Pattern.objects.values_list('name', 'pk'))
361 365 bfl_table = dict(BaseFormLabel.objects.values_list('entry', 'pk'))
362   - for row in cur.execute('SELECT * FROM zakonczenia'):
  366 + for row in sqlite_cursor.execute('SELECT * FROM zakonczenia'):
363 367 if row['zak'] is not None:
364   - if not sql:
  368 + if not SQL_MODE:
365 369 e = Ending()
366 370 e.pattern = Pattern.objects.get(name=row['wzor'])
367 371 e.base_form_label = BaseFormLabel.objects.get(entry=row['efobaz'])
... ... @@ -390,7 +394,7 @@ def import_endings():
390 394  
391 395 def import_tables():
392 396 bfl_table = dict(BaseFormLabel.objects.values_list('entry', 'pk'))
393   - for row in cur.execute('SELECT * FROM paradygmaty'):
  397 + for row in sqlite_cursor.execute('SELECT * FROM paradygmaty'):
394 398 lc = PartOfSpeech.objects.get(symbol=row['pos']).lexical_class
395 399 variant, _created = Variant.objects.get_or_create(id=row['wariant'])
396 400 tt_data = {
... ... @@ -401,7 +405,7 @@ def import_tables():
401 405 entry=row['charfl'], part_of_speech__symbol=row['pos']),
402 406 }
403 407 tt, _created = TableTemplate.objects.get_or_create(**tt_data)
404   - if not sql:
  408 + if not SQL_MODE:
405 409 c = Cell()
406 410 c.table_template = tt
407 411 c.base_form_label = BaseFormLabel.objects.get(entry=row['efobaz'])
... ... @@ -434,7 +438,7 @@ def import_tables():
434 438  
435 439  
436 440 def import_table_headers():
437   - for row in cur.execute('SELECT * FROM naglowkiwierszy'):
  441 + for row in sqlite_cursor.execute('SELECT * FROM naglowkiwierszy'):
438 442 if row['styl'] != 'b' and row['nagl']:
439 443 tts = TableTemplate.objects.filter(
440 444 variant__id=row['wariant'], pattern_type__entry=row['typr'],
... ... @@ -442,7 +446,7 @@ def import_table_headers():
442 446 inflection_characteristic__part_of_speech__symbol=row['pos'])
443 447 if tts:
444 448 tt = tts[0]
445   - if not sql:
  449 + if not SQL_MODE:
446 450 th = TableHeader()
447 451 th.table_template = tt
448 452 th.row = row['row']
... ... @@ -461,14 +465,14 @@ def import_table_headers():
461 465  
462 466  
463 467 def single_import(fun, db):
464   - global cur, cursor
  468 + global sqlite_cursor, cursor
465 469 transaction.commit_unless_managed()
466 470 transaction.enter_transaction_management()
467   - transaction.managed(True)
468   - cur = get_cursor(db)
  471 + transaction.managed()
  472 + sqlite_cursor = get_cursor(db)
469 473 cursor = connection.cursor()
470 474 fun()
471   - cur.close()
  475 + sqlite_cursor.close()
472 476 cursor.close()
473 477 transaction.commit()
474 478 transaction.leave_transaction_management()
... ... @@ -477,7 +481,7 @@ def single_import(fun, db):
477 481 def delete_and_import():
478 482 transaction.commit_unless_managed()
479 483 transaction.enter_transaction_management()
480   - transaction.managed(True)
  484 + transaction.managed()
481 485 models = (
482 486 Qualifier,
483 487 LexicalClass,
... ... @@ -499,8 +503,8 @@ def delete_and_import():
499 503 for model in models:
500 504 model.objects.all().delete()
501 505  
502   - global cur
503   - cur = get_cursor(db)
  506 + global sqlite_cursor
  507 + sqlite_cursor = get_cursor(db)
504 508 print 'importing lexical classes...'
505 509 import_lexical_classes()
506 510 print 'importing parts of speech'
... ... @@ -533,7 +537,7 @@ def delete_and_import():
533 537 import_tables()
534 538 print 'importing table headers...'
535 539 import_table_headers()
536   - cur.close()
  540 + sqlite_cursor.close()
537 541 cursor.close()
538 542 transaction.commit()
539 543 transaction.leave_transaction_management()
... ... @@ -541,7 +545,7 @@ def delete_and_import():
541 545 import sys
542 546 if __name__ == '__main__':
543 547 if sys.argv[-1] == '-mini':
544   - mini = True
  548 + MINI_MODE = True
545 549 del sys.argv[-1]
546 550 if len(sys.argv) > 1:
547 551 db = sys.argv[1]
... ...
dictionary/models.py
... ... @@ -51,7 +51,7 @@ class Qualifier(Model):
51 51 exclusion_class = ForeignKey(
52 52 QualifierExclusionClass, db_column='klasa', null=True, blank=True,
53 53 verbose_name=u'klasa wykluczania')
54   - deleted = BooleanField(db_column='usuniety')
  54 + deleted = BooleanField(db_column='usuniety', default=False)
55 55  
56 56 objects = NotDeletedManager()
57 57 all_objects = Manager()
... ... @@ -128,7 +128,7 @@ class ClassificationValue(Model):
128 128 'self', db_column='rodzic', null=True, blank=True,
129 129 verbose_name=u'rodzic wartości', related_name='child_nodes')
130 130 lexemes = ManyToManyField('Lexeme', blank=True)
131   - deleted = BooleanField(db_column='usunieta')
  131 + deleted = BooleanField(db_column='usunieta', default=False)
132 132  
133 133 objects = NotDeletedManager()
134 134 all_objects = Manager()
... ... @@ -338,7 +338,7 @@ class Lexeme(Model):
338 338 responsible = ForeignKey(
339 339 User, blank=True, null=True, db_column='odpowiedzialny')
340 340 patterns = ManyToManyField(Pattern, through='LexemeInflectionPattern')
341   - deleted = BooleanField(db_column='usuniety')
  341 + deleted = BooleanField(db_column='usuniety', default=False)
342 342  
343 343 objects = NotDeletedManager()
344 344 all_objects = Manager()
... ...