Commit e9f62d5e32b2c847d9c57e035916c27e13570d13
1 parent
23aa3c4c
importowanie zwrotności
Showing
2 changed files
with
41 additions
and
11 deletions
accounts/management/commands/create_users.py
... | ... | @@ -24,7 +24,10 @@ def create_users(): |
24 | 24 | SGJP = Vocabulary.objects.get(id='SGJP') |
25 | 25 | SJPDor = Vocabulary.objects.get(id='SJPDor') |
26 | 26 | zmiotki = Vocabulary.objects.get(id='zmiotki') |
27 | - WSJP = Vocabulary.objects.get(id='WSJP') | |
27 | + try: | |
28 | + WSJP = Vocabulary.objects.get(id='WSJP') | |
29 | + except Vocabulary.DoesNotExist: | |
30 | + WSJP = None | |
28 | 31 | Morfologik, created = Vocabulary.objects.get_or_create(id='Morfologik') |
29 | 32 | PoliMorf, created = Vocabulary.objects.get_or_create(id='PoliMorf') |
30 | 33 | |
... | ... | @@ -44,7 +47,8 @@ def create_users(): |
44 | 47 | |
45 | 48 | admin.groups.add(admins) |
46 | 49 | |
47 | - sgjp_vocabs = (SGJP, SJPDor, zmiotki, WSJP) | |
50 | + sgjp_vocabs = (SGJP, SJPDor, zmiotki) | |
51 | + if WSJP: sgjp_vocabs += (WSJP,) | |
48 | 52 | nzm_vocabs = (zmiotki, Morfologik, PoliMorf) |
49 | 53 | |
50 | 54 | sgjp.groups.add(managers) |
... | ... | @@ -65,7 +69,8 @@ def create_users(): |
65 | 69 | rednzm.visible_vocabularies.add(vocab) |
66 | 70 | rednzm.editable_vocabularies.add(vocab) |
67 | 71 | rednzm.visible_vocabularies.add(SGJP) |
68 | - rednzm.visible_vocabularies.add(WSJP) | |
72 | + if WSJP: | |
73 | + rednzm.visible_vocabularies.add(WSJP) | |
69 | 74 | rednzm.visible_vocabularies.add(SJPDor) |
70 | 75 | |
71 | 76 | supersgjp.groups.add(superlexicographers) |
... | ... | @@ -79,7 +84,8 @@ def create_users(): |
79 | 84 | supernzm.visible_vocabularies.add(vocab) |
80 | 85 | supernzm.editable_vocabularies.add(vocab) |
81 | 86 | supernzm.visible_vocabularies.add(SGJP) |
82 | - supernzm.visible_vocabularies.add(WSJP) | |
87 | + if WSJP: | |
88 | + supernzm.visible_vocabularies.add(WSJP) | |
83 | 89 | supernzm.visible_vocabularies.add(SJPDor) |
84 | 90 | |
85 | 91 | wzornik.groups.add(superlexicographers) |
... | ... |
dictionary/management/commands/import_data.py
... | ... | @@ -10,9 +10,9 @@ from dictionary.models import * |
10 | 10 | |
11 | 11 | DEFAULT_DATABASE = 'data/sgjp.db' |
12 | 12 | |
13 | -MINI_MODE = False # do debugowania | |
13 | +MINI_MODE = True # do debugowania | |
14 | 14 | MINI_LEXEME_COUNT = 40000 |
15 | -MINI_LEXEME_QUERY = 'SELECT %s FROM leksemy LIMIT ?' | |
15 | +MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?" | |
16 | 16 | |
17 | 17 | SQL_MODE = True |
18 | 18 | |
... | ... | @@ -21,6 +21,8 @@ BATCH_SIZE = 5000 |
21 | 21 | OTHER = 'inne' |
22 | 22 | DEFAULT_VOCAB = 'SGJP' |
23 | 23 | |
24 | +REFL = (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie') | |
25 | + | |
24 | 26 | # tymczasowa tabelka |
25 | 27 | BASIC_FORM_LABELS = { |
26 | 28 | '0-': '1', |
... | ... | @@ -197,6 +199,18 @@ class ImportData(object): |
197 | 199 | if 'qual' not in self.__dict__: |
198 | 200 | self.qual = dict((q.label, q) for q in Qualifier.objects.all()) |
199 | 201 | |
202 | + def create_refl_attribute(self): | |
203 | + refl, created = LexemeAttribute.objects.get_or_create( | |
204 | + name=u'zwrotność', closed=True) | |
205 | + for pos in PartOfSpeech.objects.filter(symbol__in=('v', 'ger', 'pact')): | |
206 | + refl.parts_of_speech.add(pos) #add | |
207 | + refl_values = {} | |
208 | + for val in REFL: | |
209 | + refl_values[val], created = LexemeAttributeValue.objects.get_or_create( | |
210 | + value=val, attribute=refl) | |
211 | + refl_values[''] = refl_values[u'—'] | |
212 | + return refl_values | |
213 | + | |
200 | 214 | def new_lexemes(self): |
201 | 215 | self.cache_qualifiers() |
202 | 216 | if MINI_MODE: |
... | ... | @@ -204,6 +218,7 @@ class ImportData(object): |
204 | 218 | MINI_LEXEME_QUERY % '*',(MINI_LEXEME_COUNT,)) |
205 | 219 | else: |
206 | 220 | result = self.sqlite_cursor.execute('SELECT * FROM leksemy') |
221 | + refl_values = self.create_refl_attribute() | |
207 | 222 | date = datetime.datetime.now() |
208 | 223 | cv_table = dict( |
209 | 224 | (cv.label, cv) for cv in ClassificationValue.objects.all()) |
... | ... | @@ -211,6 +226,7 @@ class ImportData(object): |
211 | 226 | lexeme_associations = [] |
212 | 227 | lexeme_qualifiers = [] |
213 | 228 | lexeme_cvs = [] |
229 | + lexeme_attrs = [] | |
214 | 230 | for row in result: |
215 | 231 | slownik = row['slownik'] |
216 | 232 | status = 'conf' if slownik != 'zmiotki' else 'cand' |
... | ... | @@ -218,7 +234,7 @@ class ImportData(object): |
218 | 234 | lexemes.append(Lexeme( |
219 | 235 | id=row['nr'], |
220 | 236 | entry=row['haslo'], |
221 | - entry_suffix=row['haslosuf'] or '', | |
237 | + entry_suffix=row['haslosuf'] or '', # pozostałość historyczna | |
222 | 238 | gloss=row['glosa'] or '', |
223 | 239 | note=row['nota'] or '', |
224 | 240 | pronunciation=row['wymowa'] or '', |
... | ... | @@ -235,7 +251,11 @@ class ImportData(object): |
235 | 251 | if row['lkwal']: |
236 | 252 | for qual in row['lkwal'].split('|'): |
237 | 253 | lexeme_qualifiers.append((row['nr'], self.qual[qual])) |
238 | - return lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers | |
254 | + if row['pos'] in ('v', 'ger', 'pact'): | |
255 | + refl_value = refl_values.get(row['haslosuf'].strip(' ?')) | |
256 | + lexeme_attrs.append((row['nr'], refl_value)) | |
257 | + return (lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers, | |
258 | + lexeme_attrs) | |
239 | 259 | |
240 | 260 | def new_lexeme_associations(self): |
241 | 261 | self.cache_vocabs() |
... | ... | @@ -280,7 +300,7 @@ class ImportData(object): |
280 | 300 | 'JOIN leksemy l2 on nrdo=l2.nr' |
281 | 301 | ) |
282 | 302 | cr_type_table = dict( |
283 | - ((crt.symbol, crt.from_pos, crt.to_pos), crt) | |
303 | + ((crt.symbol, crt.from_pos.symbol, crt.to_pos.symbol), crt) | |
284 | 304 | for crt in CrossReferenceType.objects.all() |
285 | 305 | ) |
286 | 306 | for row in result: |
... | ... | @@ -516,7 +536,8 @@ class ImportData(object): |
516 | 536 | pattern=pattern, base_form_label=bfl, index=index).qualifiers.add(q) |
517 | 537 | def import_lexemes(): |
518 | 538 | print 'importing lexemes...' |
519 | - lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals = self.new_lexemes() | |
539 | + (lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals, | |
540 | + lexeme_attrs) = self.new_lexemes() | |
520 | 541 | print '...' |
521 | 542 | bulk_create(Lexeme, lexemes) |
522 | 543 | print '...' |
... | ... | @@ -527,6 +548,9 @@ class ImportData(object): |
527 | 548 | print '...' |
528 | 549 | for lexeme_id, q in lexeme_quals: |
529 | 550 | q.lexeme_set.add(lexeme_id) #add |
551 | + print '...' | |
552 | + for lexeme_id, attr_val in lexeme_attrs: | |
553 | + attr_val.lexemes.add(lexeme_id) | |
530 | 554 | import_lexemes() |
531 | 555 | def import_lips(): |
532 | 556 | print 'importing lexeme inflection patterns...' |
... | ... | @@ -562,4 +586,4 @@ class ImportData(object): |
562 | 586 | bulk_create(model, self.__getattribute__(method_name)()) |
563 | 587 | elif method_name.startswith('import'): |
564 | 588 | self.__getattribute__(method_name)() |
565 | - self.close() | |
566 | 589 | \ No newline at end of file |
590 | + self.close() | |
... | ... |