Commit 2043a7b2d845c55f325cfefd2f0924d51188e9b9
1 parent
3eabc2ee
bardziej zmodyfikowany import SGJP
Showing
3 changed files
with
262 additions
and
209 deletions
dictionary/management/commands/import_data.py
... | ... | @@ -4,7 +4,6 @@ import sqlite3 |
4 | 4 | import datetime |
5 | 5 | from django.db import connection, transaction |
6 | 6 | from django.core.management.base import BaseCommand |
7 | -from django.contrib.auth.models import User | |
8 | 7 | |
9 | 8 | from common.util import no_history |
10 | 9 | from dictionary.models import * |
... | ... | @@ -12,12 +11,13 @@ from dictionary.models import * |
12 | 11 | DEFAULT_DATABASE = 'data/sgjp.db' |
13 | 12 | |
14 | 13 | MINI_MODE = True # do debugowania |
15 | -MINI_LEXEME_COUNT = 500 | |
14 | +MINI_LEXEME_COUNT = 40000 | |
16 | 15 | MINI_LEXEME_QUERY = 'SELECT %s FROM leksemy LIMIT ?' |
17 | 16 | |
18 | -# UWAGA: aktualnie ustawienie SQL_MODE = False jest niekompletne | |
19 | 17 | SQL_MODE = True |
20 | 18 | |
19 | +BATCH_SIZE = 5000 | |
20 | + | |
21 | 21 | OTHER = 'inne' |
22 | 22 | DEFAULT_VOCAB = 'SGJP' |
23 | 23 | |
... | ... | @@ -60,6 +60,9 @@ def get_cursor(db): |
60 | 60 | conn.row_factory = sqlite3.Row |
61 | 61 | return conn.cursor() |
62 | 62 | |
63 | +def bulk_create(model, objects): | |
64 | + model.objects.bulk_create(objects, batch_size=BATCH_SIZE) | |
65 | + | |
63 | 66 | METHOD_NAMES = { |
64 | 67 | CrossReference: 'import_cross_references', |
65 | 68 | Ending: 'import_endings', |
... | ... | @@ -81,6 +84,9 @@ METHOD_NAMES = { |
81 | 84 | |
82 | 85 | class ImportData(object): |
83 | 86 | def __init__(self, db): |
87 | + transaction.commit_unless_managed() | |
88 | + transaction.enter_transaction_management() | |
89 | + transaction.managed() | |
84 | 90 | self.cursor = connection.cursor() |
85 | 91 | self.sqlite_cursor = get_cursor(db) |
86 | 92 | no_history() |
... | ... | @@ -88,12 +94,19 @@ class ImportData(object): |
88 | 94 | def close(self): |
89 | 95 | self.cursor.close() |
90 | 96 | self.sqlite_cursor.close() |
97 | + transaction.commit() | |
98 | + transaction.leave_transaction_management() | |
99 | + | |
91 | 100 | |
92 | 101 | def new_lexical_classes(self): |
93 | 102 | yield LexicalClass(symbol=OTHER) |
94 | 103 | for row in self.sqlite_cursor.execute('select distinct pos from wzory'): |
95 | 104 | yield LexicalClass(symbol=row['pos']) |
96 | 105 | |
106 | + def cache_lc(self): | |
107 | + if 'lc' not in self.__dict__: | |
108 | + self.lc = dict((lc.symbol, lc) for lc in LexicalClass.objects.all()) | |
109 | + | |
97 | 110 | def new_parts_of_speech(self): |
98 | 111 | lcs = {} |
99 | 112 | for row in self.sqlite_cursor.execute( |
... | ... | @@ -106,6 +119,16 @@ class ImportData(object): |
106 | 119 | yield PartOfSpeech( |
107 | 120 | symbol=row['pos'], lexical_class = LexicalClass.objects.get(symbol=lc)) |
108 | 121 | |
122 | + def cache_pos(self): | |
123 | + if 'pos' not in self.__dict__: | |
124 | + self.pos = dict((pos.symbol, pos) for pos in PartOfSpeech.objects.all()) | |
125 | + | |
126 | + def cache_lc_pos(self): | |
127 | + if 'lc_pos' not in self.__dict__: | |
128 | + self.lc_pos = dict( | |
129 | + (pos.symbol, pos.lexical_class) for pos in PartOfSpeech.objects.all() | |
130 | + ) | |
131 | + | |
109 | 132 | def new_base_form_labels(self): |
110 | 133 | query_result = self.sqlite_cursor.execute(""" |
111 | 134 | SELECT efobaz FROM paradygmaty |
... | ... | @@ -115,6 +138,10 @@ class ImportData(object): |
115 | 138 | for row in query_result: |
116 | 139 | yield BaseFormLabel(entry=row[0]) |
117 | 140 | |
141 | + def cache_bfl(self): | |
142 | + if 'bfls' not in self.__dict__: | |
143 | + self.bfls = dict((bfl.entry, bfl) for bfl in BaseFormLabel.objects.all()) | |
144 | + | |
118 | 145 | def new_inflection_characteristics(self): |
119 | 146 | for row in self.sqlite_cursor.execute( |
120 | 147 | 'SELECT DISTINCT charfl, pos FROM paradygmaty'): |
... | ... | @@ -130,9 +157,11 @@ class ImportData(object): |
130 | 157 | part_of_speech=PartOfSpeech.objects.get(pk=row['pos'])) |
131 | 158 | |
132 | 159 | def cache_ics(self): |
133 | - self.ics = {} | |
134 | - for ic in InflectionCharacteristic.objects.all(): | |
135 | - self.ics[(ic.basic_form_label.entry, ic.part_of_speech.symbol)] = ic | |
160 | + if 'ics' not in self.__dict__: | |
161 | + self.ics = dict( | |
162 | + ((ic.entry, ic.part_of_speech.symbol), ic) | |
163 | + for ic in InflectionCharacteristic.objects.all() | |
164 | + ) | |
136 | 165 | |
137 | 166 | def new_vocabularies(self): |
138 | 167 | result = self.sqlite_cursor.execute(""" |
... | ... | @@ -143,6 +172,10 @@ class ImportData(object): |
143 | 172 | for row in result: |
144 | 173 | yield Vocabulary(id = row[0]) |
145 | 174 | |
175 | + def cache_vocabs(self): | |
176 | + if 'vocabs' not in self.__dict__: | |
177 | + self.vocabs = dict((v.id, v) for v in Vocabulary.objects.all()) | |
178 | + | |
146 | 179 | def new_qualifiers(self): |
147 | 180 | sgjp = Vocabulary.objects.get(id=DEFAULT_VOCAB) |
148 | 181 | query_result = self.sqlite_cursor.execute(""" |
... | ... | @@ -160,51 +193,61 @@ class ImportData(object): |
160 | 193 | added.add(qualifier_label) |
161 | 194 | yield Qualifier(label=qualifier_label, vocabulary=sgjp) |
162 | 195 | |
163 | - def import_lexemes(self): | |
196 | + def cache_qualifiers(self): | |
197 | + if 'qual' not in self.__dict__: | |
198 | + self.qual = dict((q.label, q) for q in Qualifier.objects.all()) | |
199 | + | |
200 | + def new_lexemes(self): | |
201 | + self.cache_qualifiers() | |
164 | 202 | if MINI_MODE: |
165 | 203 | result = self.sqlite_cursor.execute( |
166 | 204 | MINI_LEXEME_QUERY % '*',(MINI_LEXEME_COUNT,)) |
167 | 205 | else: |
168 | 206 | result = self.sqlite_cursor.execute('SELECT * FROM leksemy') |
169 | 207 | date = datetime.datetime.now() |
170 | - cv_table = dict(ClassificationValue.objects.values_list('label', 'pk')) | |
208 | + cv_table = dict( | |
209 | + (cv.label, cv) for cv in ClassificationValue.objects.all()) | |
210 | + lexemes = [] | |
211 | + lexeme_associations = [] | |
212 | + lexeme_qualifiers = [] | |
213 | + lexeme_cvs = [] | |
171 | 214 | for row in result: |
172 | 215 | slownik = row['slownik'] |
173 | 216 | status = 'conf' if slownik != 'zmiotki' else 'cand' |
174 | - cv_pk = cv_table[row['pospolitosc']] | |
175 | - self.cursor.execute( | |
176 | - "INSERT INTO leksemy (id, haslo, haslosuf, glosa, nota, wymowa, hom, " | |
177 | - "pos, zrodlo, status, komentarz, data_modyfikacji, slownik, usuniety) " | |
178 | - "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", | |
179 | - [row['nr'], row['haslo'], row['haslosuf'] or '', row['glosa'] or '', | |
180 | - row['nota'] or '', row['wymowa'] or '', 1, row['pos'], 'SGJP', | |
181 | - status, row['komentarz'], date, row['slownik'], False]) | |
182 | - self.cursor.execute( | |
183 | - "INSERT INTO leksemy_w_slownikach (l_id, slownik) " | |
184 | - "VALUES (%s, %s)", [row['nr'], slownik]) | |
185 | - self.cursor.execute( | |
186 | - "INSERT INTO wartosci_klasyfikacji_lexemes (classificationvalue_id, " | |
187 | - "lexeme_id) VALUES (%s, %s)", [cv_pk, row['nr']]) | |
217 | + cv = cv_table[row['pospolitosc']] | |
218 | + lexemes.append(Lexeme( | |
219 | + id=row['nr'], | |
220 | + entry=row['haslo'], | |
221 | + entry_suffix=row['haslosuf'] or '', | |
222 | + gloss=row['glosa'] or '', | |
223 | + note=row['nota'] or '', | |
224 | + pronunciation=row['wymowa'] or '', | |
225 | + part_of_speech_id=row['pos'], | |
226 | + source='SGJP', | |
227 | + status=status, | |
228 | + comment=row['komentarz'] or '', | |
229 | + last_modified=date, | |
230 | + owner_vocabulary_id=slownik, | |
231 | + )) | |
232 | + lexeme_associations.append(LexemeAssociation( | |
233 | + lexeme_id=row['nr'], vocabulary_id=slownik)) | |
234 | + lexeme_cvs.append((row['nr'], cv)) | |
188 | 235 | if row['lkwal']: |
189 | 236 | for qual in row['lkwal'].split('|'): |
190 | - q_id = Qualifier.objects.get(label=qual).pk | |
191 | - self.cursor.execute( | |
192 | - "INSERT INTO kwalifikatory_leksemow (lexeme_id, " | |
193 | - "qualifier_id) VALUES (%s, %s)", [row['nr'], q_id]) | |
237 | + lexeme_qualifiers.append((row['nr'], self.qual[qual])) | |
238 | + return lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers | |
194 | 239 | |
195 | 240 | def new_lexeme_associations(self): |
241 | + self.cache_vocabs() | |
196 | 242 | if MINI_MODE: |
197 | 243 | result = self.sqlite_cursor.execute( |
198 | 244 | 'SELECT * FROM slowniki_uzywajace WHERE nr in (%s)' |
199 | 245 | % (MINI_LEXEME_QUERY % 'nr'), [MINI_LEXEME_COUNT]) |
200 | 246 | else: |
201 | 247 | result = self.sqlite_cursor.execute('SELECT * FROM slowniki_uzywajace') |
202 | - vocab_table = dict( | |
203 | - (v.id, v) for v in Vocabulary.objects.all() | |
204 | - ) | |
205 | 248 | for row in result: |
206 | 249 | yield LexemeAssociation( |
207 | - vocabulary=vocab_table[row['slownik_uz']], lexeme_id=row['nr']) | |
250 | + vocabulary=self.vocabs[row['slownik_uz']], lexeme_id=row['nr']) | |
208 | 251 | |
209 | 252 | def new_cross_reference_types(self): |
210 | 253 | result = self.sqlite_cursor.execute( |
... | ... | @@ -221,7 +264,7 @@ class ImportData(object): |
221 | 264 | to_pos=PartOfSpeech.objects.get(symbol=row['pos2']), |
222 | 265 | ) |
223 | 266 | |
224 | - def import_cross_references(self): | |
267 | + def new_cross_references(self): | |
225 | 268 | if MINI_MODE: |
226 | 269 | result = self.sqlite_cursor.execute( |
227 | 270 | 'SELECT o.*, l1.pos pos1, l2.pos pos2 FROM odsylacze o ' |
... | ... | @@ -249,129 +292,126 @@ class ImportData(object): |
249 | 292 | type=cr_type) |
250 | 293 | |
251 | 294 | def import_pattern_types(self): |
295 | + self.cache_lc_pos() | |
252 | 296 | result = self.sqlite_cursor.execute( |
253 | 297 | 'SELECT DISTINCT typr, pos FROM paradygmaty') |
254 | - lc_pos_table = dict( | |
255 | - (pos.symbol, pos.lexical_class) for pos in PartOfSpeech.objects.all() | |
256 | - ) | |
257 | 298 | for row in result: |
258 | - lc = lc_pos_table[row['pos']] | |
299 | + lc = self.lc_pos[row['pos']] | |
259 | 300 | PatternType.objects.get_or_create(lexical_class=lc, entry=row['typr']) |
260 | 301 | # prowizorka z powodu pustej klasy 'skr' |
261 | - lc_table = dict( | |
262 | - (lc.symbol, lc) for lc in LexicalClass.objects.all() | |
263 | - ) | |
302 | + self.cache_lc() | |
264 | 303 | result = self.sqlite_cursor.execute('SELECT DISTINCT typr, pos FROM wzory') |
265 | 304 | for row in result: |
266 | - lc = lc_table[row['pos']] | |
305 | + lc = self.lc[row['pos']] | |
267 | 306 | PatternType.objects.get_or_create(lexical_class=lc, entry=row['typr']) |
268 | 307 | |
308 | + def cache_ptypes(self): | |
309 | + if 'ptypes' not in self.__dict__: | |
310 | + self.ptypes = dict( | |
311 | + ((pt.lexical_class.symbol, pt.entry), pt) | |
312 | + for pt in PatternType.objects.all() | |
313 | + ) | |
314 | + | |
269 | 315 | def new_patterns(self): |
270 | - pt_table = dict( | |
271 | - ((pt.lexical_class.symbol, pt.entry), pt) | |
272 | - for pt in PatternType.objects.all() | |
273 | - ) | |
316 | + self.cache_ptypes() | |
274 | 317 | for row in self.sqlite_cursor.execute('SELECT * FROM wzory'): |
275 | - pt = pt_table[(row['pos'], row['typr'])] | |
276 | - status = 'temp' | |
277 | 318 | yield Pattern( |
278 | 319 | name=row['wzor'], |
279 | - type=pt, | |
320 | + type=self.ptypes[(row['pos'], row['typr'])], | |
280 | 321 | basic_form_ending=row['zakp'], |
281 | 322 | example=row['przyklad'] or '', |
282 | 323 | comment=row['wkomentarz'] or '', |
283 | - status = status, | |
324 | + status = 'temp', | |
284 | 325 | ) |
285 | 326 | |
286 | - def import_endings(self): | |
287 | - if SQL_MODE: | |
288 | - pattern_pk_table = dict(Pattern.objects.values_list('name', 'pk')) | |
289 | - bfl_table = dict(BaseFormLabel.objects.values_list('entry', 'pk')) | |
327 | + def cache_patterns(self): | |
328 | + if 'paterns' not in self.__dict__: | |
329 | + self.patterns = dict((p.name, p) for p in Pattern.objects.all()) | |
330 | + | |
331 | + def new_endings(self): | |
332 | + self.cache_qualifiers() | |
333 | + self.cache_patterns() | |
334 | + self.cache_bfl() | |
335 | + endings = [] | |
336 | + ending_quals = [] | |
290 | 337 | for row in self.sqlite_cursor.execute('SELECT * FROM zakonczenia'): |
291 | 338 | if row['zak'] is not None: |
292 | - if not SQL_MODE: | |
293 | - e = Ending( | |
294 | - pattern=Pattern.objects.get(name=row['wzor']), | |
295 | - base_form_label = BaseFormLabel.objects.get(entry=row['efobaz']), | |
296 | - string = row['zak'], | |
297 | - index = row['nrskl'], | |
298 | - ) | |
299 | - e.save() | |
339 | + endings.append(Ending( | |
340 | + pattern=self.patterns[row['wzor']], | |
341 | + base_form_label = self.bfls[row['efobaz']], | |
342 | + string = row['zak'], | |
343 | + index = row['nrskl'], | |
344 | + )) | |
345 | + if row['zkwal']: | |
300 | 346 | for qual in row['zkwal'].split('|'): |
301 | - e.qualifiers.add(Qualifier.objects.get(label=qual)) #add | |
302 | - else: | |
303 | - pattern_pk = pattern_pk_table[row['wzor']] | |
304 | - if pattern_pk: | |
305 | - efobaz_id = bfl_table[row['efobaz']] | |
306 | - self.cursor.execute( | |
307 | - "INSERT INTO zakonczenia (w_id, efobaz, zind, zak) VALUES " | |
308 | - "(%s, %s, %s, %s)", | |
309 | - [pattern_pk, efobaz_id, row['nrskl'], row['zak']]) | |
310 | - if row['zkwal']: | |
311 | - self.cursor.execute("select currval('zakonczenia_id_seq')") | |
312 | - last_id = self.cursor.fetchone()[0] | |
313 | - for qual in row['zkwal'].split('|'): | |
314 | - q_id = Qualifier.objects.get(label=qual).pk | |
315 | - self.cursor.execute( | |
316 | - "INSERT INTO kwalifikatory_zakonczen (ending_id, qualifier_id) " | |
317 | - "VALUES (%s, %s)", [last_id, q_id]) | |
318 | - | |
319 | - def import_lexeme_inflection_patterns(self): | |
347 | + ending_quals.append(( | |
348 | + self.patterns[row['wzor']], | |
349 | + self.bfls[row['efobaz']], | |
350 | + row['nrskl'], | |
351 | + self.qual[qual])) | |
352 | + return endings, ending_quals | |
353 | + | |
354 | + def new_lexeme_inflection_patterns(self): | |
355 | + self.cache_ics() | |
356 | + self.cache_qualifiers() | |
357 | + self.cache_patterns() | |
320 | 358 | if MINI_MODE: |
321 | 359 | result = self.sqlite_cursor.execute( |
322 | - 'SELECT * FROM odmieniasie WHERE nr IN (%s)' % (MINI_LEXEME_QUERY % 'nr'), | |
360 | + 'SELECT o.*, l.pos FROM odmieniasie o ' | |
361 | + 'JOIN leksemy l on o.nr = l.nr ' | |
362 | + 'WHERE l.nr IN (%s)' % (MINI_LEXEME_QUERY % 'nr'), | |
323 | 363 | (MINI_LEXEME_COUNT,)) |
324 | 364 | else: |
325 | 365 | result = self.sqlite_cursor.execute('SELECT * FROM odmieniasie') |
326 | - pos_table = dict(Lexeme.objects.values_list('pk', 'part_of_speech')) | |
327 | - pattern_pk_table = dict(Pattern.objects.values_list('name', 'pk')) | |
366 | + lips = [] | |
367 | + lip_quals = [] | |
328 | 368 | for row in result: |
329 | - if not SQL_MODE: | |
330 | - lip = LexemeInflectionPattern( | |
331 | - lexeme_id=row['nr'], | |
332 | - index=row['oskl'], | |
333 | - pattern=Pattern.objects.get(name=row['wzor']), | |
334 | - inflection_characteristic=self.ics[ | |
335 | - (row['charfl'], lip.lexeme.part_of_speech)], | |
336 | - root=row['rdzen'], | |
337 | - ) | |
338 | - lip.save() | |
339 | - # nieaktualne | |
340 | - if row['okwal']: | |
341 | - lip.qualifiers.add(Qualifier.objects.get(label=row['okwal'])) #add | |
342 | - else: | |
343 | - pos = pos_table[row['nr']] | |
344 | - pattern_pk = pattern_pk_table[row['wzor']] | |
345 | - charfl_id = self.ics[(row['charfl'], pos)].pk | |
346 | - self.cursor.execute( | |
347 | - "INSERT INTO odmieniasie (l_id, oind, w_id, charfl, rdzen) " | |
348 | - "VALUES (%s, %s, %s, %s, %s) ", [row['nr'], row['oskl'], pattern_pk, | |
349 | - charfl_id, row['rdzen']]) | |
350 | - if row['okwal']: | |
351 | - self.cursor.execute("select currval('odmieniasie_id_seq')") | |
352 | - last_id = self.cursor.fetchone()[0] | |
353 | - for qual in row['okwal'].split('|'): | |
354 | - q_id = Qualifier.objects.get(label=qual).pk | |
355 | - self.cursor.execute( | |
356 | - "INSERT INTO kwalifikatory_odmieniasiow (lexemeinflectionpattern_id, " | |
357 | - "qualifier_id) VALUES (%s, %s)", [last_id, q_id]) | |
369 | + lexeme_id = row['nr'] | |
370 | + lips.append(LexemeInflectionPattern( | |
371 | + lexeme_id=lexeme_id, | |
372 | + index=row['oskl'], | |
373 | + pattern=self.patterns[row['wzor']], | |
374 | + inflection_characteristic=self.ics[ | |
375 | + (row['charfl'], row['pos'])], | |
376 | + root=row['rdzen'], | |
377 | + )) | |
378 | + if row['okwal']: | |
379 | + for qual in row['okwal'].split('|'): | |
380 | + lip_quals.append((lexeme_id, row['oskl'], self.qual[qual])) #add | |
381 | + return lips, lip_quals | |
382 | + | |
383 | + def new_variants(self): | |
384 | + result = self.sqlite_cursor.execute( | |
385 | + 'SELECT DISTINCT wariant FROM paradygmaty') | |
386 | + for row in result: | |
387 | + yield Variant(id=row['wariant']) | |
388 | + | |
389 | + def new_table_templates(self): | |
390 | + self.cache_ics() | |
391 | + self.cache_ptypes() | |
392 | + self.cache_lc_pos() | |
393 | + result = self.sqlite_cursor.execute( | |
394 | + 'SELECT DISTINCT wariant, pos, typr, charfl FROM paradygmaty') | |
395 | + for row in result: | |
396 | + yield TableTemplate( | |
397 | + variant_id=row['wariant'], | |
398 | + pattern_type=self.ptypes[(self.lc_pos[row['pos']].symbol, row['typr'])], | |
399 | + inflection_characteristic=self.ics[(row['charfl'], row['pos'])]) | |
358 | 400 | |
401 | + # to zostaje, bo tabelki i tak się pozmieniają | |
359 | 402 | def import_tables(self): |
360 | - bfl_table = dict(BaseFormLabel.objects.values_list('entry', 'pk')) | |
361 | - lc_pos_table = dict( | |
362 | - (pos.symbol, pos.lexical_class) for pos in PartOfSpeech.objects.all() | |
403 | + self.cache_bfl() | |
404 | + tt_table = dict( | |
405 | + (( | |
406 | + tt.variant.id, | |
407 | + tt.pattern_type.entry, | |
408 | + tt.inflection_characteristic.entry, | |
409 | + tt.inflection_characteristic.part_of_speech.symbol, | |
410 | + ), tt) for tt in TableTemplate.objects.all() | |
363 | 411 | ) |
364 | 412 | for row in self.sqlite_cursor.execute('SELECT * FROM paradygmaty'): |
365 | - lc = lc_pos_table[row['pos']] | |
366 | - variant, _created = Variant.objects.get_or_create(id=row['wariant']) | |
367 | - tt_data = { | |
368 | - 'variant': variant, | |
369 | - 'pattern_type': PatternType.objects.get( | |
370 | - entry=row['typr'], lexical_class=lc), | |
371 | - 'inflection_characteristic': InflectionCharacteristic.objects.get( | |
372 | - entry=row['charfl'], part_of_speech_symbol=row['pos']), | |
373 | - } | |
374 | - tt, _created = TableTemplate.objects.get_or_create(**tt_data) | |
413 | + tt = tt_table[ | |
414 | + (unicode(row['wariant']), row['typr'], row['charfl'], row['pos'])] | |
375 | 415 | if not SQL_MODE: |
376 | 416 | c = Cell( |
377 | 417 | table_template=tt, |
... | ... | @@ -392,7 +432,7 @@ class ImportData(object): |
392 | 432 | ) |
393 | 433 | tc.save() |
394 | 434 | else: |
395 | - efobaz_id = bfl_table[row['efobaz']] | |
435 | + efobaz_id = self.bfls[row['efobaz']].id | |
396 | 436 | self.cursor.execute( |
397 | 437 | "INSERT INTO klatki (st_id, efobaz, tag, prefiks, sufiks, kind) " |
398 | 438 | "VALUES (%s, %s, %s, %s, %s, %s)", [tt.pk, efobaz_id, row['morf'], |
... | ... | @@ -421,19 +461,17 @@ class ImportData(object): |
421 | 461 | rowspan=row['rowspan'], |
422 | 462 | colspan=row['colspan'], |
423 | 463 | label=row['nagl'], |
424 | - horizontal=row['styl'] == 'h', | |
464 | + css_class=row['styl'], | |
425 | 465 | ) |
426 | 466 | else: |
427 | 467 | raise Exception('Brak szablonu dla nagłówka: %s', dict(row)) |
428 | 468 | |
429 | 469 | def delete_and_import(self): |
430 | - transaction.commit_unless_managed() | |
431 | - transaction.enter_transaction_management() | |
432 | - transaction.managed() | |
433 | 470 | models = ( |
434 | 471 | TableCell, |
435 | 472 | Cell, |
436 | 473 | TableTemplate, |
474 | + Variant, | |
437 | 475 | CrossReference, |
438 | 476 | CrossReferenceType, |
439 | 477 | LexemeAssociation, |
... | ... | @@ -454,63 +492,74 @@ class ImportData(object): |
454 | 492 | model.objects.all().delete() |
455 | 493 | |
456 | 494 | print 'importing lexical classes...' |
457 | - LexicalClass.objects.bulk_create(self.new_lexical_classes()) | |
458 | - print 'importing parts of speech' | |
459 | - PartOfSpeech.objects.bulk_create(self.new_parts_of_speech()) | |
460 | - print 'importing base form labels' | |
461 | - BaseFormLabel.objects.bulk_create(self.new_base_form_labels()) | |
462 | - print 'importing inflection characteristics' | |
463 | - InflectionCharacteristic.objects.bulk_create( | |
495 | + bulk_create(LexicalClass, self.new_lexical_classes()) | |
496 | + print 'importing parts of speech...' | |
497 | + bulk_create(PartOfSpeech, self.new_parts_of_speech()) | |
498 | + print 'importing base form labels...' | |
499 | + bulk_create(BaseFormLabel, self.new_base_form_labels()) | |
500 | + print 'importing inflection characteristics...' | |
501 | + bulk_create(InflectionCharacteristic, | |
464 | 502 | self.new_inflection_characteristics()) |
465 | 503 | print 'importing vocabularies...' |
466 | - Vocabulary.objects.bulk_create(self.new_vocabularies()) | |
504 | + bulk_create(Vocabulary, self.new_vocabularies()) | |
467 | 505 | print 'importing qualifiers...' |
468 | - Qualifier.objects.bulk_create(self.new_qualifiers()) | |
469 | - print 'importing lexemes...' | |
470 | - self.import_lexemes() | |
471 | - print 'importing lexeme associations...' | |
472 | - LexemeAssociation.objects.bulk_create(self.new_lexeme_associations()) | |
473 | - print 'importing cross-reference types...' | |
474 | - CrossReferenceType.objects.bulk_create( | |
475 | - self.new_cross_reference_types()) | |
476 | - print 'importing cross-references...' | |
477 | - self.import_cross_references() | |
506 | + bulk_create(Qualifier, self.new_qualifiers()) | |
478 | 507 | print 'importing pattern types...' |
479 | 508 | self.import_pattern_types() |
480 | 509 | print 'importing patterns...' |
481 | - Pattern.objects.bulk_create(self.new_patterns()) | |
482 | - print 'importing lexeme inflection patterns...' | |
483 | - self.import_lexeme_inflection_patterns() | |
510 | + bulk_create(Pattern, self.new_patterns()) | |
484 | 511 | print 'importing endings...' |
485 | - self.import_endings() | |
512 | + endings, ending_quals = self.new_endings() | |
513 | + bulk_create(Ending, endings) | |
514 | + for pattern, bfl, index, q in ending_quals: | |
515 | + Ending.objects.get( | |
516 | + pattern=pattern, base_form_label=bfl, index=index).qualifiers.add(q) | |
517 | + def import_lexemes(): | |
518 | + print 'importing lexemes...' | |
519 | + lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals = self.new_lexemes() | |
520 | + print '...' | |
521 | + bulk_create(Lexeme, lexemes) | |
522 | + print '...' | |
523 | + bulk_create(LexemeAssociation, lexeme_assoc) | |
524 | + print '...' | |
525 | + for lexeme_id, cv in lexeme_cvs: | |
526 | + cv.lexemes.add(lexeme_id) #add | |
527 | + print '...' | |
528 | + for lexeme_id, q in lexeme_quals: | |
529 | + q.lexeme_set.add(lexeme_id) #add | |
530 | + import_lexemes() | |
531 | + def import_lips(): | |
532 | + print 'importing lexeme inflection patterns...' | |
533 | + lips, lip_quals = self.new_lexeme_inflection_patterns() | |
534 | + print '...' | |
535 | + bulk_create(LexemeInflectionPattern, lips) | |
536 | + print '...' | |
537 | + for lexeme_id, index, q in lip_quals: | |
538 | + LexemeInflectionPattern.objects.get( | |
539 | + lexeme_id=lexeme_id, index=index).qualifiers.add(q) | |
540 | + import_lips() | |
541 | + print 'importing lexeme associations...' | |
542 | + bulk_create(LexemeAssociation, self.new_lexeme_associations()) | |
543 | + print 'importing cross-reference types...' | |
544 | + bulk_create(CrossReferenceType, | |
545 | + self.new_cross_reference_types()) | |
546 | + print 'importing cross-references...' | |
547 | + bulk_create(CrossReference, self.new_cross_references()) | |
548 | + print 'importing variants...' | |
549 | + bulk_create(Variant, self.new_variants()) | |
486 | 550 | print 'importing table templates...' |
551 | + bulk_create(TableTemplate, self.new_table_templates()) | |
552 | + print 'importing tables...' | |
487 | 553 | self.import_tables() |
488 | 554 | print 'importing table headers...' |
489 | - TableHeader.objects.bulk_create(self.new_table_headers()) | |
555 | + bulk_create(TableHeader, self.new_table_headers()) | |
556 | + print 'committing to database...' | |
490 | 557 | self.close() |
491 | - transaction.commit() | |
492 | - transaction.leave_transaction_management() | |
493 | 558 | |
494 | 559 | def single_import(self, model): |
495 | - transaction.commit_unless_managed() | |
496 | - transaction.enter_transaction_management() | |
497 | - transaction.managed() | |
498 | 560 | method_name = METHOD_NAMES[model] |
499 | 561 | if method_name.startswith('new'): |
500 | - model.objects.bulk_create(self.__getattribute__(method_name)()) | |
562 | + bulk_create(model, self.__getattribute__(method_name)()) | |
501 | 563 | elif method_name.startswith('import'): |
502 | 564 | self.__getattribute__(method_name)() |
503 | - self.close() | |
504 | - transaction.commit() | |
505 | - transaction.leave_transaction_management() | |
506 | - | |
507 | -import sys | |
508 | -if __name__ == '__main__': | |
509 | - if sys.argv[-1] == '-mini': | |
510 | - MINI_MODE = True | |
511 | - del sys.argv[-1] | |
512 | - if len(sys.argv) > 1: | |
513 | - db = sys.argv[1] | |
514 | - else: | |
515 | - db = DEFAULT_DATABASE | |
516 | - ImportData(db).delete_and_import() | |
565 | + self.close() | |
517 | 566 | \ No newline at end of file |
... | ... |
dictionary/models.py
... | ... | @@ -79,7 +79,7 @@ class Qualifier(Model): |
79 | 79 | return self.label |
80 | 80 | |
81 | 81 | class Meta: |
82 | - unique_together = ['label', 'vocabulary'] | |
82 | + unique_together = ('label', 'vocabulary') | |
83 | 83 | db_table = 'kwalifikatory' |
84 | 84 | ordering = ['label'] |
85 | 85 | |
... | ... | @@ -192,7 +192,7 @@ class InflectionCharacteristic(Model): |
192 | 192 | |
193 | 193 | class Meta: |
194 | 194 | db_table = 'charfle' |
195 | - unique_together = ['entry', 'part_of_speech'] | |
195 | + unique_together = ('entry', 'part_of_speech') | |
196 | 196 | |
197 | 197 | |
198 | 198 | class PatternType(Model): |
... | ... | @@ -324,7 +324,7 @@ class Lexeme(Model): |
324 | 324 | gloss = TextField(blank=True, db_column='glosa', verbose_name=u'glosa') |
325 | 325 | note = TextField(blank=True, db_column='nota', verbose_name=u'nota') |
326 | 326 | pronunciation = TextField(blank=True, db_column='wymowa', verbose_name=u'wymowa') |
327 | - homonym_number = IntegerField(db_column='hom') | |
327 | + homonym_number = IntegerField(db_column='hom', default=1) | |
328 | 328 | part_of_speech = ForeignKey(PartOfSpeech, db_column='pos', |
329 | 329 | verbose_name=u'cz. mowy') |
330 | 330 | owner_vocabulary = ForeignKey( |
... | ... | @@ -571,7 +571,7 @@ class LexemeInflectionPattern(Model): |
571 | 571 | table[y][x] = { |
572 | 572 | 'type': 'label', |
573 | 573 | 'label': [header.label], |
574 | - 'row_header': header.row_header, | |
574 | + 'css_class': header.css_class, | |
575 | 575 | 'rowspan': header.rowspan, |
576 | 576 | 'colspan': header.colspan, |
577 | 577 | } |
... | ... | @@ -732,6 +732,8 @@ def managed_vocabularies(user): |
732 | 732 | else: |
733 | 733 | return user.managed_vocabularies.all() |
734 | 734 | |
735 | +# kiedyś miało dodatkowe pole, a teraz istnieje tylko dlatego, | |
736 | +# że zapuściło korzenie | |
735 | 737 | class LexemeAssociation(Model): |
736 | 738 | lexeme = ForeignKey(Lexeme, db_column='l_id') |
737 | 739 | vocabulary = ForeignKey(Vocabulary, db_column='slownik') |
... | ... | @@ -865,10 +867,12 @@ class TableHeader(Model): |
865 | 867 | rowspan = IntegerField() |
866 | 868 | colspan = IntegerField() |
867 | 869 | label = CharField(max_length=64, blank=True, db_column='nagl') |
868 | - row_header = BooleanField(db_column='wierszowy') | |
870 | + row_header = BooleanField(db_column='wierszowy') # tymczasowo | |
871 | + css_class = BooleanField(db_column='styl') | |
869 | 872 | |
870 | 873 | def __unicode__(self): |
871 | - return '%s : %s : %s' % (self.label, self.row, self.col) | |
874 | + return '%s (%s,%s) [%s]' % ( | |
875 | + self.label, self.row, self.col, self.css_class) | |
872 | 876 | |
873 | 877 | class Meta: |
874 | 878 | db_table = 'naglowki_tabel' |
... | ... |
dictionary/templates/inflection_table.html
... | ... | @@ -2,41 +2,41 @@ |
2 | 2 | <caption>{{ inflection_characteristic.entry }}</caption> |
3 | 3 | {% for row in table %} |
4 | 4 | <tr> |
5 | - {% for cell in row %} | |
6 | - {% if cell.type != 'span' %} | |
7 | - {% if cell.type == 'empty' %} | |
8 | - <td class='EmptyCell' ></td> | |
9 | - {% else %} | |
10 | - {% if cell.type == 'forms' %} | |
11 | - <td rowspan="{{cell.rowspan}}" colspan="{{cell.colspan}}"> | |
12 | - <ul class='FormList'> | |
13 | - {% for entry in cell.forms %} | |
14 | - <li> | |
15 | - {{ entry.form }} | |
16 | - <span class="qualifiers"> | |
17 | - {% for q in entry.qualifiers %} | |
18 | - {{ q.label }} | |
19 | - {% endfor %} | |
20 | - </span> | |
21 | - </li> | |
22 | - {% endfor %} | |
23 | - </ul> | |
24 | - </td> | |
5 | + {% for cell in row %} | |
6 | + {% if cell.type != 'span' %} | |
7 | + {% if cell.type == 'empty' %} | |
8 | + <td class='EmptyCell' ></td> | |
25 | 9 | {% else %} |
26 | - <td rowspan="{{cell.rowspan}}" colspan="{{cell.colspan}}" | |
27 | - class="{{cell.row_header|yesno:"RowHeader,ColumnHeader"}}"> | |
28 | - <ul class="FormList"> | |
29 | - {% for label in cell.label %} | |
30 | - <li> | |
31 | - {{ label|safe }} | |
32 | - </li> | |
33 | - {% endfor %} | |
34 | - </ul> | |
35 | - </td> | |
10 | + {% if cell.type == 'forms' %} | |
11 | + <td rowspan="{{ cell.rowspan }}" colspan="{{ cell.colspan }}"> | |
12 | + <ul class='FormList'> | |
13 | + {% for entry in cell.forms %} | |
14 | + <li> | |
15 | + {{ entry.form }} | |
16 | + <span class="qualifiers"> | |
17 | + {% for q in entry.qualifiers %} | |
18 | + {{ q.label }} | |
19 | + {% endfor %} | |
20 | + </span> | |
21 | + </li> | |
22 | + {% endfor %} | |
23 | + </ul> | |
24 | + </td> | |
25 | + {% else %} | |
26 | + <td rowspan="{{ cell.rowspan }}" colspan="{{ cell.colspan }}" | |
27 | + class="{{ cell.css_class }}"> | |
28 | + <ul class="FormList"> | |
29 | + {% for label in cell.label %} | |
30 | + <li> | |
31 | + {{ label|safe }} | |
32 | + </li> | |
33 | + {% endfor %} | |
34 | + </ul> | |
35 | + </td> | |
36 | + {% endif %} | |
36 | 37 | {% endif %} |
37 | 38 | {% endif %} |
38 | - {% endif %} | |
39 | - {% endfor %} | |
39 | + {% endfor %} | |
40 | 40 | </tr> |
41 | 41 | {% endfor %} |
42 | 42 | </table> |
... | ... |