Commit 10d042b710bc3cffd87743d87d59a40eed6c6e58
1 parent
3bc1adf2
Added new copyright text to plain text, pdf, and TEI generators.
Showing
5 changed files
with
122 additions
and
109 deletions
dictionary/ajax_argument_realizations.py
... | ... | @@ -13,6 +13,7 @@ from common.decorators import render, ajax, AjaxError |
13 | 13 | from dictionary.ajax_argument_form import argument_to_form_values, get_argument_model, \ |
14 | 14 | create_argument_form, all_fields_filled, \ |
15 | 15 | get_argument_from_form, validate_argument_form |
16 | +from dictionary.ajax_vocabulary_management import create_copyrights_str | |
16 | 17 | from dictionary.forms import ArgRealOpinionForm |
17 | 18 | from dictionary.models import Argument, ArgRealization, ArgRealOpinion, AttributeParameterModel, \ |
18 | 19 | Position, RealizationType, \ |
... | ... | @@ -206,6 +207,11 @@ def create_realizations(request, form_data): |
206 | 207 | def create_realizations_file(filename): |
207 | 208 | try: |
208 | 209 | real_file = codecs.open(filename, 'wt', 'utf-8') |
210 | + real_file.write(create_copyrights_str(dictionary_file=False, | |
211 | + frame_opinions_pks=[], | |
212 | + lemma_statuses_pks=[], | |
213 | + poss_pks=[], | |
214 | + add_frame_opinions=[])) | |
209 | 215 | write_phrase_types_extensions(real_file) |
210 | 216 | real_file.write('\n') |
211 | 217 | write_parameters_realizations(real_file) |
... | ... |
dictionary/ajax_vocabulary_management.py
... | ... | @@ -19,52 +19,6 @@ from dictionary.models import Frame_Opinion_Value, Lemma, Lemma_Status, \ |
19 | 19 | POS, Vocabulary, VocabularyFormat, WalentyStat |
20 | 20 | from dictionary.teixml import createteixml |
21 | 21 | |
22 | -TEXT_VOCABULARY_CLAUSE = u""" | |
23 | -% The Polish Valence Dictionary (Walenty) | |
24 | -% <date> | |
25 | -% | |
26 | -% The Polish Valence Dictionary (Walenty) is an adaptation of | |
27 | -% the Syntactic Dictionary of Polish Verbs by Marek Świdziński | |
28 | -% in its electronic version provided by Łukasz Dębowski and | |
29 | -% Elżbieta Hajnicz and further expanded by Witold Kieraś to | |
30 | -% include the most frequent verbs in the 1 million sample of | |
31 | -% NKJP (National Corpus of Polish). | |
32 | -% | |
33 | -% The presented resource results from an automatic conversion | |
34 | -% of the aforementioned dictionary, manually reviewed by Filip | |
35 | -% Skwarski to include correct information about a number of new | |
36 | -% features, including sentential subjects, passivisation, and | |
37 | -% control relations. | |
38 | -% | |
39 | -% The format of the new dictionary has been established by Filip | |
40 | -% Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, | |
41 | -% Marek Świdziński, and Marcin Woliński. | |
42 | -% | |
43 | -% The dictionary has been edited and compiled using a tool | |
44 | -% created by Bartłomiej Nitoń. | |
45 | -% | |
46 | -% The original Syntactic Dictionary of Polish Verbs derives from: | |
47 | -% | |
48 | -% Marek Świdziński | |
49 | -% Institute of Polish | |
50 | -% Warsaw University | |
51 | -% Warsaw, Poland | |
52 | -% | |
53 | -% © Copyright 1998,2012 by Marek Świdziński | |
54 | -% | |
55 | -% This work is distributed under a CC BY-SA license: | |
56 | -% http://creativecommons.org/licenses/by-sa/2.0/ | |
57 | -% | |
58 | -% Parameters: | |
59 | -% Dictionaries: <vocabularies> | |
60 | -% Schema opinions: <opinions> | |
61 | -% Lemma statuses: <statuses> | |
62 | -% Owners: <owners> | |
63 | -% Part of speech: <part of speech> | |
64 | -% Opinions added: <opinions added> | |
65 | -% | |
66 | -""" | |
67 | - | |
68 | 22 | EN_MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', |
69 | 23 | 'July', 'August', 'September', 'October', 'November', 'December'] |
70 | 24 | |
... | ... | @@ -121,8 +75,11 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions, |
121 | 75 | lemma_statuses, owners, poss, add_frame_opinions): |
122 | 76 | try: |
123 | 77 | f = codecs.open(file_name, 'w+', 'utf-8-sig' ) |
124 | - f.write(create_copyrights_str(vocabularies, frame_opinions, | |
125 | - lemma_statuses, owners, poss, add_frame_opinions)) | |
78 | + f.write(create_copyrights_str(dictionary_file=True, | |
79 | + frame_opinions_pks=frame_opinions, | |
80 | + lemma_statuses_pks=lemma_statuses, | |
81 | + poss_pks=poss, | |
82 | + add_frame_opinions=add_frame_opinions)) | |
126 | 83 | for lemma in lemmas: |
127 | 84 | founded_frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinions) |
128 | 85 | #frame_chars_dict = sorted_frame_char_values_dict() |
... | ... | @@ -150,34 +107,69 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions, |
150 | 107 | f.close() |
151 | 108 | return file_name |
152 | 109 | |
153 | -def create_copyrights_str(vocabularies_pks, frame_opinions_pks, | |
154 | - lemma_statuses_pks, owners_pks, poss_pks, | |
155 | - add_frame_opinions): | |
110 | +def create_copyrights_str(dictionary_file, frame_opinions_pks, | |
111 | + lemma_statuses_pks, poss_pks, add_frame_opinions): | |
156 | 112 | date = datetime.datetime.now() |
157 | 113 | month = EN_MONTHS[date.month-1] |
158 | - vocabularies = Vocabulary.objects.filter(pk__in=vocabularies_pks).order_by('name') | |
159 | - if not vocabularies.exists(): | |
160 | - vocabularies = Vocabulary.objects.order_by('name') | |
161 | - frame_opinions = Frame_Opinion_Value.objects.filter(pk__in=frame_opinions_pks).order_by('priority') | |
162 | - if not frame_opinions.exists(): | |
163 | - frame_opinions = Frame_Opinion_Value.objects.order_by('priority') | |
164 | - lemma_statuses = Lemma_Status.objects.filter(pk__in=lemma_statuses_pks).order_by('priority') | |
165 | - if not lemma_statuses.exists(): | |
166 | - lemma_statuses = Lemma_Status.objects.order_by('priority') | |
167 | - owners = User.objects.filter(pk__in=owners_pks).order_by('username') | |
168 | - if not owners.exists(): | |
169 | - owners = User.objects.filter(lemmas__old=False).distinct().order_by('username') | |
170 | - poss = POS.objects.filter(pk__in=poss_pks).order_by('priority') | |
171 | - if not poss.exists(): | |
172 | - poss = POS.objects.exclude(tag=u'unk').order_by('priority') | |
173 | - copyrights = (TEXT_VOCABULARY_CLAUSE.replace('<date>', date.strftime(month + ' %d, %Y')). | |
174 | - replace('<vocabularies>', ', '.join([vocab.name for vocab in vocabularies])). | |
175 | - replace('<opinions>', ', '.join([opinion.value for opinion in frame_opinions])). | |
176 | - replace('<statuses>', ', '.join([status.status for status in lemma_statuses])). | |
177 | - replace('<owners>', ', '.join([owner.username for owner in owners])). | |
178 | - replace('<part of speech>', ', '.join([pos.name for pos in poss])). | |
179 | - replace('<opinions added>', ('True' if add_frame_opinions else 'False'))) | |
180 | - return copyrights | |
114 | + | |
115 | + if dictionary_file: | |
116 | + license_clause = [u'% Walenty: a valence dictionary of Polish'] | |
117 | + else: | |
118 | + license_clause = [u'% This file is part of Walenty: a valence dictionary of Polish'] | |
119 | + license_clause.extend([u"% http://zil.ipipan.waw.pl/Walenty", | |
120 | + u"%% version: %s" % date.strftime(month + ' %d, %Y'), | |
121 | + u"%", | |
122 | + u"% © Copyright 2012–2016 by the Institute of Computer Science, Polish", | |
123 | + u"% Academy of Sciences (IPI PAN)", | |
124 | + u"%", | |
125 | + u"% This work is distributed under a CC BY-SA license:", | |
126 | + u"% http://creativecommons.org/licenses/by-sa/4.0/", | |
127 | + u"%", | |
128 | + u"% Walenty is a valence dictionary of Polish developed at the Institute", | |
129 | + u"% of Computer Science, Polish Academy of Sciences (IPI PAN). It currently", | |
130 | + u"%% contains %s schemata and %s frames for %s lemmata." % (WalentyStat.objects.get(label=u'Łączna liczba schematów').value, | |
131 | + WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value, | |
132 | + WalentyStat.objects.get(label=u'Łączna liczba haseł').value), | |
133 | + u"%", | |
134 | + u"% The original formalism of Walenty was established by Filip Skwarski,", | |
135 | + u"% Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin", | |
136 | + u"% Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been", | |
137 | + u"% further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam", | |
138 | + u"% Przepiórkowski, and Marcin Woliński. The semantic layer has been", | |
139 | + u"% developed by Elżbieta Hajnicz and Anna Andrzejczuk.", | |
140 | + u"%", | |
141 | + u"% The original seed of Walenty was provided by the automatic", | |
142 | + u"% conversion, manually reviewed by Filip Skwarski, of the verbal valence", | |
143 | + u"% dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata),", | |
144 | + u"% which was in turn based on SDPV, the Syntactic Dictionary of Polish", | |
145 | + u"% Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards,", | |
146 | + u"% Walenty has been developed independently by adding new entries, ", | |
147 | + u"% syntactic schemata, in particular phraseological ones, and semantic", | |
148 | + u"% frames.", | |
149 | + u"%", | |
150 | + u"% Walenty has been edited and compiled using the Slowal tool", | |
151 | + u"% (http://zil.ipipan.waw.pl/Slowal) created by Bartłomiej Nitoń and", | |
152 | + u"% Tomasz Bartosiak."]) | |
153 | + if dictionary_file: | |
154 | + frame_opinions = Frame_Opinion_Value.objects.filter(pk__in=frame_opinions_pks).order_by('priority') | |
155 | + if not frame_opinions.exists(): | |
156 | + frame_opinions = Frame_Opinion_Value.objects.order_by('priority') | |
157 | + lemma_statuses = Lemma_Status.objects.filter(pk__in=lemma_statuses_pks).order_by('priority') | |
158 | + if not lemma_statuses.exists(): | |
159 | + lemma_statuses = Lemma_Status.objects.order_by('priority') | |
160 | + poss = POS.objects.filter(pk__in=poss_pks).order_by('priority') | |
161 | + if not poss.exists(): | |
162 | + poss = POS.objects.exclude(tag=u'unk').order_by('priority') | |
163 | + license_clause.extend([u"%", | |
164 | + u"% Parameters:", | |
165 | + u"%% Schema opinions: %s" % ', '.join([opinion.value for opinion in frame_opinions]), | |
166 | + u"%% Lemma statuses: %s" % ', '.join([status.status for status in lemma_statuses]), | |
167 | + u"%% Part of speech: %s" % ', '.join([pos.tag for pos in poss]), | |
168 | + u"%% Opinions added: %s" % ('True' if add_frame_opinions else 'False'), | |
169 | + u"%\n"]) | |
170 | + else: | |
171 | + license_clause.append(u"%\n") | |
172 | + return u'\n'.join(license_clause) | |
181 | 173 | |
182 | 174 | def create_tex_walenty(lemmas, form_dict): |
183 | 175 | q_frame_opinions = [] |
... | ... | @@ -191,7 +183,10 @@ def create_tex_walenty(lemmas, form_dict): |
191 | 183 | # Pass the TeX template through Django templating engine and into the temp file |
192 | 184 | os.write(tmpfile, smart_str(h.unescape(render_to_string('tex/slowal.tex', {'lemmas': lemmas, |
193 | 185 | 'q_frame_opinions': q_frame_opinions, |
194 | - 'download_dict' : form_dict})))) | |
186 | + 'download_dict' : form_dict, | |
187 | + 'frames_count' : WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value, | |
188 | + 'schemata_count' : WalentyStat.objects.get(label=u'Łączna liczba schematów').value, | |
189 | + 'lemmata_count' : WalentyStat.objects.get(label=u'Łączna liczba haseł').value})))) | |
195 | 190 | os.close(tmpfile) |
196 | 191 | file_name = tmpfilename + '.tex' |
197 | 192 | os.rename(tmpfilename, file_name) |
... | ... |
dictionary/management/commands/create_tex_walenty.py
... | ... | @@ -10,7 +10,7 @@ from django.core.management.base import BaseCommand |
10 | 10 | from django.template.loader import render_to_string |
11 | 11 | from django.utils.encoding import smart_str |
12 | 12 | |
13 | -from dictionary.models import Lemma, get_ready_statuses | |
13 | +from dictionary.models import Lemma, WalentyStat, get_ready_statuses | |
14 | 14 | from settings import WALENTY_PATH |
15 | 15 | |
16 | 16 | class Command(BaseCommand): |
... | ... | @@ -41,6 +41,9 @@ def write_tex_walenty(outpath, lemmas): |
41 | 41 | h = HTMLParser.HTMLParser() |
42 | 42 | outfile.write(smart_str(h.unescape(render_to_string('tex/slowal.tex', {'lemmas': lemmas, |
43 | 43 | 'q_frame_opinions': [], |
44 | - 'download_dict' : {'frame_opinions': []}})))) | |
44 | + 'download_dict' : {'frame_opinions': []}, | |
45 | + 'frames_count' : WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value, | |
46 | + 'schemata_count' : WalentyStat.objects.get(label=u'Łączna liczba schematów').value, | |
47 | + 'lemmata_count' : WalentyStat.objects.get(label=u'Łączna liczba haseł').value})))) | |
45 | 48 | finally: |
46 | 49 | outfile.close() |
... | ... |
dictionary/teixml.py
... | ... | @@ -6,7 +6,7 @@ import datetime |
6 | 6 | from lxml import etree |
7 | 7 | from xml.sax.saxutils import escape |
8 | 8 | |
9 | -from dictionary.models import Atribute_Model, \ | |
9 | +from dictionary.models import Atribute_Model, WalentyStat, \ | |
10 | 10 | sortArguments, sortatributes, sortPositions, sort_positions |
11 | 11 | |
12 | 12 | XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' |
... | ... | @@ -33,18 +33,47 @@ def write_header(root): |
33 | 33 | |
34 | 34 | title_stmt = etree.SubElement(file_desc, 'titleStmt') |
35 | 35 | title = etree.SubElement(title_stmt, 'title') |
36 | - title.text = u'Polish Valence Dictionary (Walenty)' | |
36 | + title.text = u'Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty)' | |
37 | 37 | |
38 | 38 | publication_stmt = etree.SubElement(file_desc, 'publicationStmt') |
39 | 39 | publisher = etree.SubElement(publication_stmt, 'publisher') |
40 | - publisher.text = u'IPI PAN ZIL' | |
40 | + publisher.text = u'Institute of Computer Science, Polish Academy of Sciences (IPI PAN)' | |
41 | + | |
41 | 42 | date = etree.SubElement(publication_stmt, 'date') |
42 | 43 | date.attrib['when'] = datetime.datetime.now().strftime('%Y-%m-%d') |
43 | 44 | |
45 | + write_license_elem(publication_stmt) | |
46 | + | |
44 | 47 | source_desc = etree.SubElement(file_desc, 'sourceDesc') |
45 | 48 | p = etree.SubElement(source_desc, 'p') |
46 | - p.text = u'File generated using Slowal. Mentioned tool available at: walenty.ipipan.waw.pl.' | |
49 | + p.text = u'File generated using Slowal. Mentioned tool available here: http://zil.ipipan.waw.pl/Slowal.' | |
50 | + | |
51 | +def write_license_elem(parent_elem): | |
52 | + availability = etree.SubElement(parent_elem, 'availability') | |
53 | + licence = etree.SubElement(availability, 'licence') | |
54 | + licence.attrib['target'] = u'http://creativecommons.org/licenses/by-sa/4.0/' | |
55 | + | |
56 | + p = etree.SubElement(licence, 'p') | |
57 | + p.text = u'(C) Copyright 2012–2016 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)' | |
58 | + | |
59 | + p = etree.SubElement(licence, 'p') | |
60 | + p.text = u'This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/' | |
47 | 61 | |
62 | + p = etree.SubElement(licence, 'p') | |
63 | + p.text = u'Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains %s schemata and %s frames for %s lemmata.' % (WalentyStat.objects.get(label=u'Łączna liczba schematów').value, | |
64 | + WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value, | |
65 | + WalentyStat.objects.get(label=u'Łączna liczba haseł').value) | |
66 | + | |
67 | + p = etree.SubElement(licence, 'p') | |
68 | + p.text = u'The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk.' | |
69 | + | |
70 | + p = etree.SubElement(licence, 'p') | |
71 | + p.text = u'The original seed of Walenty was provided by the automatic conversion, manually reviewed by Filip Skwarski, of the verbal valence dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata), which was in turn based on SDPV, the Syntactic Dictionary of Polish Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards, Walenty has been developed independently by adding new entries, syntactic schemata, in particular phraseological ones, and semantic frames.' | |
72 | + | |
73 | + p = etree.SubElement(licence, 'p') | |
74 | + p.text = u'Walenty has been edited and compiled using the Slowal tool (http://zil.ipipan.waw.pl/Slowal) created by Bartłomiej Nitoń and Tomasz Bartosiak.' | |
75 | + | |
76 | + | |
48 | 77 | def write_entries(root, lemmas, frame_opinion_values): |
49 | 78 | text = etree.SubElement(root, 'text') |
50 | 79 | body = etree.SubElement(text, 'body') |
... | ... |
dictionary/templates/tex/slowal.tex
... | ... | @@ -47,45 +47,25 @@ |
47 | 47 | } |
48 | 48 | |
49 | 49 | |
50 | -\title{\textbf{The Polish Valence Dictionary (Walenty)}} | |
51 | -\author{Institute of Computer Science, Polish Academy of Sciences} | |
50 | +\title{\textbf{Walenty: a valence dictionary of Polish\\\textit{http://zil.ipipan.waw.pl/Walenty}}} | |
51 | +\author{Institute of Computer Science, Polish Academy of Sciences (IPI PAN)} | |
52 | 52 | \date{ {% now "jS F Y" %} } |
53 | 53 | |
54 | 54 | \begin{document} |
55 | 55 | |
56 | 56 | \maketitle |
57 | 57 | |
58 | -The Polish Valence Dictionary (Walenty) is an adaptation of | |
59 | -the Syntactic Dictionary of Polish Verbs by Marek Świdziński | |
60 | -in its electronic version provided by Łukasz Dębowski and | |
61 | -Elżbieta Hajnicz and further expanded by Witold Kieraś to | |
62 | -include the most frequent verbs in the 1 million sample of | |
63 | -NKJP (National Corpus of Polish). | |
58 | +© Copyright 2012–2016 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN) | |
64 | 59 | |
65 | -The presented resource results from an automatic conversion | |
66 | -of the aforementioned dictionary, manually reviewed by Filip | |
67 | -Skwarski to include correct information about a number of new | |
68 | -features, including sentential subjects, passivisation, and | |
69 | -control relations. | |
60 | +This work is distributed under a CC BY-SA license: \textit{http://creativecommons.org/licenses/by-sa/4.0/} | |
70 | 61 | |
71 | -The format of the new dictionary has been established by Filip | |
72 | -Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, | |
73 | -Marek Świdziński, and Marcin Woliński. | |
62 | +Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains {{ schemata_count }} schemata and {{ frames_count }} frames for {{ lemmata_count }} lemmata. | |
74 | 63 | |
75 | -The dictionary has been edited and compiled using a tool | |
76 | -created by Bartłomiej Nitoń. | |
64 | +The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk. | |
77 | 65 | |
78 | -The original Syntactic Dictionary of Polish Verbs derives from: | |
66 | +The original seed of Walenty was provided by the automatic conversion, manually reviewed by Filip Skwarski, of the verbal valence dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata), which was in turn based on SDPV, the Syntactic Dictionary of Polish Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards, Walenty has been developed independently by adding new entries, syntactic schemata, in particular phraseological ones, and semantic frames. | |
79 | 67 | |
80 | -Marek Świdziński | |
81 | -Institute of Polish | |
82 | -Warsaw University | |
83 | -Warsaw, Poland | |
84 | - | |
85 | -© Copyright 1998,2012 by Marek Świdziński | |
86 | - | |
87 | -This work is distributed under a CC BY-SA license: | |
88 | -\textit{http://creativecommons.org/licenses/by-sa/2.0/} | |
68 | +Walenty has been edited and compiled using the Slowal tool (\textit{http://zil.ipipan.waw.pl/Slowal}) created by Bartłomiej Nitoń and Tomasz Bartosiak. | |
89 | 69 | |
90 | 70 | \newpage |
91 | 71 | |
... | ... |