Commit e9f62d5e32b2c847d9c57e035916c27e13570d13

Authored by janek37
1 parent 23aa3c4c

importowanie zwrotności

accounts/management/commands/create_users.py
... ... @@ -24,7 +24,10 @@ def create_users():
24 24 SGJP = Vocabulary.objects.get(id='SGJP')
25 25 SJPDor = Vocabulary.objects.get(id='SJPDor')
26 26 zmiotki = Vocabulary.objects.get(id='zmiotki')
27   - WSJP = Vocabulary.objects.get(id='WSJP')
  27 + try:
  28 + WSJP = Vocabulary.objects.get(id='WSJP')
  29 + except Vocabulary.DoesNotExist:
  30 + WSJP = None
28 31 Morfologik, created = Vocabulary.objects.get_or_create(id='Morfologik')
29 32 PoliMorf, created = Vocabulary.objects.get_or_create(id='PoliMorf')
30 33  
... ... @@ -44,7 +47,8 @@ def create_users():
44 47  
45 48 admin.groups.add(admins)
46 49  
47   - sgjp_vocabs = (SGJP, SJPDor, zmiotki, WSJP)
  50 + sgjp_vocabs = (SGJP, SJPDor, zmiotki)
  51 + if WSJP: sgjp_vocabs += (WSJP,)
48 52 nzm_vocabs = (zmiotki, Morfologik, PoliMorf)
49 53  
50 54 sgjp.groups.add(managers)
... ... @@ -65,7 +69,8 @@ def create_users():
65 69 rednzm.visible_vocabularies.add(vocab)
66 70 rednzm.editable_vocabularies.add(vocab)
67 71 rednzm.visible_vocabularies.add(SGJP)
68   - rednzm.visible_vocabularies.add(WSJP)
  72 + if WSJP:
  73 + rednzm.visible_vocabularies.add(WSJP)
69 74 rednzm.visible_vocabularies.add(SJPDor)
70 75  
71 76 supersgjp.groups.add(superlexicographers)
... ... @@ -79,7 +84,8 @@ def create_users():
79 84 supernzm.visible_vocabularies.add(vocab)
80 85 supernzm.editable_vocabularies.add(vocab)
81 86 supernzm.visible_vocabularies.add(SGJP)
82   - supernzm.visible_vocabularies.add(WSJP)
  87 + if WSJP:
  88 + supernzm.visible_vocabularies.add(WSJP)
83 89 supernzm.visible_vocabularies.add(SJPDor)
84 90  
85 91 wzornik.groups.add(superlexicographers)
... ...
dictionary/management/commands/import_data.py
... ... @@ -10,9 +10,9 @@ from dictionary.models import *
10 10  
11 11 DEFAULT_DATABASE = 'data/sgjp.db'
12 12  
13   -MINI_MODE = False # do debugowania
  13 +MINI_MODE = True # do debugowania
14 14 MINI_LEXEME_COUNT = 40000
15   -MINI_LEXEME_QUERY = 'SELECT %s FROM leksemy LIMIT ?'
  15 +MINI_LEXEME_QUERY = "SELECT %s FROM leksemy WHERE pos IN ('v', 'ger', 'pact') LIMIT ?"
16 16  
17 17 SQL_MODE = True
18 18  
... ... @@ -21,6 +21,8 @@ BATCH_SIZE = 5000
21 21 OTHER = 'inne'
22 22 DEFAULT_VOCAB = 'SGJP'
23 23  
  24 +REFL = (u'—', u'się', u'(się)', u'sobie', u'(sobie)', u'się/sobie')
  25 +
24 26 # tymczasowa tabelka
25 27 BASIC_FORM_LABELS = {
26 28 '0-': '1',
... ... @@ -197,6 +199,18 @@ class ImportData(object):
197 199 if 'qual' not in self.__dict__:
198 200 self.qual = dict((q.label, q) for q in Qualifier.objects.all())
199 201  
  202 + def create_refl_attribute(self):
  203 + refl, created = LexemeAttribute.objects.get_or_create(
  204 + name=u'zwrotność', closed=True)
  205 + for pos in PartOfSpeech.objects.filter(symbol__in=('v', 'ger', 'pact')):
  206 + refl.parts_of_speech.add(pos) #add
  207 + refl_values = {}
  208 + for val in REFL:
  209 + refl_values[val], created = LexemeAttributeValue.objects.get_or_create(
  210 + value=val, attribute=refl)
  211 + refl_values[''] = refl_values[u'—']
  212 + return refl_values
  213 +
200 214 def new_lexemes(self):
201 215 self.cache_qualifiers()
202 216 if MINI_MODE:
... ... @@ -204,6 +218,7 @@ class ImportData(object):
204 218 MINI_LEXEME_QUERY % '*',(MINI_LEXEME_COUNT,))
205 219 else:
206 220 result = self.sqlite_cursor.execute('SELECT * FROM leksemy')
  221 + refl_values = self.create_refl_attribute()
207 222 date = datetime.datetime.now()
208 223 cv_table = dict(
209 224 (cv.label, cv) for cv in ClassificationValue.objects.all())
... ... @@ -211,6 +226,7 @@ class ImportData(object):
211 226 lexeme_associations = []
212 227 lexeme_qualifiers = []
213 228 lexeme_cvs = []
  229 + lexeme_attrs = []
214 230 for row in result:
215 231 slownik = row['slownik']
216 232 status = 'conf' if slownik != 'zmiotki' else 'cand'
... ... @@ -218,7 +234,7 @@ class ImportData(object):
218 234 lexemes.append(Lexeme(
219 235 id=row['nr'],
220 236 entry=row['haslo'],
221   - entry_suffix=row['haslosuf'] or '',
  237 + entry_suffix=row['haslosuf'] or '', # pozostałość historyczna
222 238 gloss=row['glosa'] or '',
223 239 note=row['nota'] or '',
224 240 pronunciation=row['wymowa'] or '',
... ... @@ -235,7 +251,11 @@ class ImportData(object):
235 251 if row['lkwal']:
236 252 for qual in row['lkwal'].split('|'):
237 253 lexeme_qualifiers.append((row['nr'], self.qual[qual]))
238   - return lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers
  254 + if row['pos'] in ('v', 'ger', 'pact'):
  255 + refl_value = refl_values.get(row['haslosuf'].strip(' ?'))
  256 + lexeme_attrs.append((row['nr'], refl_value))
  257 + return (lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers,
  258 + lexeme_attrs)
239 259  
240 260 def new_lexeme_associations(self):
241 261 self.cache_vocabs()
... ... @@ -280,7 +300,7 @@ class ImportData(object):
280 300 'JOIN leksemy l2 on nrdo=l2.nr'
281 301 )
282 302 cr_type_table = dict(
283   - ((crt.symbol, crt.from_pos, crt.to_pos), crt)
  303 + ((crt.symbol, crt.from_pos.symbol, crt.to_pos.symbol), crt)
284 304 for crt in CrossReferenceType.objects.all()
285 305 )
286 306 for row in result:
... ... @@ -516,7 +536,8 @@ class ImportData(object):
516 536 pattern=pattern, base_form_label=bfl, index=index).qualifiers.add(q)
517 537 def import_lexemes():
518 538 print 'importing lexemes...'
519   - lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals = self.new_lexemes()
  539 + (lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals,
  540 + lexeme_attrs) = self.new_lexemes()
520 541 print '...'
521 542 bulk_create(Lexeme, lexemes)
522 543 print '...'
... ... @@ -527,6 +548,9 @@ class ImportData(object):
527 548 print '...'
528 549 for lexeme_id, q in lexeme_quals:
529 550 q.lexeme_set.add(lexeme_id) #add
  551 + print '...'
  552 + for lexeme_id, attr_val in lexeme_attrs:
  553 + attr_val.lexemes.add(lexeme_id)
530 554 import_lexemes()
531 555 def import_lips():
532 556 print 'importing lexeme inflection patterns...'
... ... @@ -562,4 +586,4 @@ class ImportData(object):
562 586 bulk_create(model, self.__getattribute__(method_name)())
563 587 elif method_name.startswith('import'):
564 588 self.__getattribute__(method_name)()
565   - self.close()
566 589 \ No newline at end of file
  590 + self.close()
... ...