Commit 10d042b710bc3cffd87743d87d59a40eed6c6e58

Authored by Bartłomiej Nitoń
1 parent 3bc1adf2

Added new copyright text to plain text, pdf, and TEI generators.

dictionary/ajax_argument_realizations.py
... ... @@ -13,6 +13,7 @@ from common.decorators import render, ajax, AjaxError
13 13 from dictionary.ajax_argument_form import argument_to_form_values, get_argument_model, \
14 14 create_argument_form, all_fields_filled, \
15 15 get_argument_from_form, validate_argument_form
  16 +from dictionary.ajax_vocabulary_management import create_copyrights_str
16 17 from dictionary.forms import ArgRealOpinionForm
17 18 from dictionary.models import Argument, ArgRealization, ArgRealOpinion, AttributeParameterModel, \
18 19 Position, RealizationType, \
... ... @@ -206,6 +207,11 @@ def create_realizations(request, form_data):
206 207 def create_realizations_file(filename):
207 208 try:
208 209 real_file = codecs.open(filename, 'wt', 'utf-8')
  210 + real_file.write(create_copyrights_str(dictionary_file=False,
  211 + frame_opinions_pks=[],
  212 + lemma_statuses_pks=[],
  213 + poss_pks=[],
  214 + add_frame_opinions=[]))
209 215 write_phrase_types_extensions(real_file)
210 216 real_file.write('\n')
211 217 write_parameters_realizations(real_file)
... ...
dictionary/ajax_vocabulary_management.py
... ... @@ -19,52 +19,6 @@ from dictionary.models import Frame_Opinion_Value, Lemma, Lemma_Status, \
19 19 POS, Vocabulary, VocabularyFormat, WalentyStat
20 20 from dictionary.teixml import createteixml
21 21  
22   -TEXT_VOCABULARY_CLAUSE = u"""
23   -% The Polish Valence Dictionary (Walenty)
24   -% <date>
25   -%
26   -% The Polish Valence Dictionary (Walenty) is an adaptation of
27   -% the Syntactic Dictionary of Polish Verbs by Marek Świdziński
28   -% in its electronic version provided by Łukasz Dębowski and
29   -% Elżbieta Hajnicz and further expanded by Witold Kieraś to
30   -% include the most frequent verbs in the 1 million sample of
31   -% NKJP (National Corpus of Polish).
32   -%
33   -% The presented resource results from an automatic conversion
34   -% of the aforementioned dictionary, manually reviewed by Filip
35   -% Skwarski to include correct information about a number of new
36   -% features, including sentential subjects, passivisation, and
37   -% control relations.
38   -%
39   -% The format of the new dictionary has been established by Filip
40   -% Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski,
41   -% Marek Świdziński, and Marcin Woliński.
42   -%
43   -% The dictionary has been edited and compiled using a tool
44   -% created by Bartłomiej Nitoń.
45   -%
46   -% The original Syntactic Dictionary of Polish Verbs derives from:
47   -%
48   -% Marek Świdziński
49   -% Institute of Polish
50   -% Warsaw University
51   -% Warsaw, Poland
52   -%
53   -% © Copyright 1998,2012 by Marek Świdziński
54   -%
55   -% This work is distributed under a CC BY-SA license:
56   -% http://creativecommons.org/licenses/by-sa/2.0/
57   -%
58   -% Parameters:
59   -% Dictionaries: <vocabularies>
60   -% Schema opinions: <opinions>
61   -% Lemma statuses: <statuses>
62   -% Owners: <owners>
63   -% Part of speech: <part of speech>
64   -% Opinions added: <opinions added>
65   -%
66   -"""
67   -
68 22 EN_MONTHS = ['January', 'February', 'March', 'April', 'May', 'June',
69 23 'July', 'August', 'September', 'October', 'November', 'December']
70 24  
... ... @@ -121,8 +75,11 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions,
121 75 lemma_statuses, owners, poss, add_frame_opinions):
122 76 try:
123 77 f = codecs.open(file_name, 'w+', 'utf-8-sig' )
124   - f.write(create_copyrights_str(vocabularies, frame_opinions,
125   - lemma_statuses, owners, poss, add_frame_opinions))
  78 + f.write(create_copyrights_str(dictionary_file=True,
  79 + frame_opinions_pks=frame_opinions,
  80 + lemma_statuses_pks=lemma_statuses,
  81 + poss_pks=poss,
  82 + add_frame_opinions=add_frame_opinions))
126 83 for lemma in lemmas:
127 84 founded_frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinions)
128 85 #frame_chars_dict = sorted_frame_char_values_dict()
... ... @@ -150,34 +107,69 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions,
150 107 f.close()
151 108 return file_name
152 109  
153   -def create_copyrights_str(vocabularies_pks, frame_opinions_pks,
154   - lemma_statuses_pks, owners_pks, poss_pks,
155   - add_frame_opinions):
  110 +def create_copyrights_str(dictionary_file, frame_opinions_pks,
  111 + lemma_statuses_pks, poss_pks, add_frame_opinions):
156 112 date = datetime.datetime.now()
157 113 month = EN_MONTHS[date.month-1]
158   - vocabularies = Vocabulary.objects.filter(pk__in=vocabularies_pks).order_by('name')
159   - if not vocabularies.exists():
160   - vocabularies = Vocabulary.objects.order_by('name')
161   - frame_opinions = Frame_Opinion_Value.objects.filter(pk__in=frame_opinions_pks).order_by('priority')
162   - if not frame_opinions.exists():
163   - frame_opinions = Frame_Opinion_Value.objects.order_by('priority')
164   - lemma_statuses = Lemma_Status.objects.filter(pk__in=lemma_statuses_pks).order_by('priority')
165   - if not lemma_statuses.exists():
166   - lemma_statuses = Lemma_Status.objects.order_by('priority')
167   - owners = User.objects.filter(pk__in=owners_pks).order_by('username')
168   - if not owners.exists():
169   - owners = User.objects.filter(lemmas__old=False).distinct().order_by('username')
170   - poss = POS.objects.filter(pk__in=poss_pks).order_by('priority')
171   - if not poss.exists():
172   - poss = POS.objects.exclude(tag=u'unk').order_by('priority')
173   - copyrights = (TEXT_VOCABULARY_CLAUSE.replace('<date>', date.strftime(month + ' %d, %Y')).
174   - replace('<vocabularies>', ', '.join([vocab.name for vocab in vocabularies])).
175   - replace('<opinions>', ', '.join([opinion.value for opinion in frame_opinions])).
176   - replace('<statuses>', ', '.join([status.status for status in lemma_statuses])).
177   - replace('<owners>', ', '.join([owner.username for owner in owners])).
178   - replace('<part of speech>', ', '.join([pos.name for pos in poss])).
179   - replace('<opinions added>', ('True' if add_frame_opinions else 'False')))
180   - return copyrights
  114 +
  115 + if dictionary_file:
  116 + license_clause = [u'% Walenty: a valence dictionary of Polish']
  117 + else:
  118 + license_clause = [u'% This file is part of Walenty: a valence dictionary of Polish']
  119 + license_clause.extend([u"% http://zil.ipipan.waw.pl/Walenty",
  120 + u"%% version: %s" % date.strftime(month + ' %d, %Y'),
  121 + u"%",
  122 + u"% © Copyright 2012–2016 by the Institute of Computer Science, Polish",
  123 + u"% Academy of Sciences (IPI PAN)",
  124 + u"%",
  125 + u"% This work is distributed under a CC BY-SA license:",
  126 + u"% http://creativecommons.org/licenses/by-sa/4.0/",
  127 + u"%",
  128 + u"% Walenty is a valence dictionary of Polish developed at the Institute",
  129 + u"% of Computer Science, Polish Academy of Sciences (IPI PAN). It currently",
  130 + u"%% contains %s schemata and %s frames for %s lemmata." % (WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
  131 + WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
  132 + WalentyStat.objects.get(label=u'Łączna liczba haseł').value),
  133 + u"%",
  134 + u"% The original formalism of Walenty was established by Filip Skwarski,",
  135 + u"% Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin",
  136 + u"% Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been",
  137 + u"% further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam",
  138 + u"% Przepiórkowski, and Marcin Woliński. The semantic layer has been",
  139 + u"% developed by Elżbieta Hajnicz and Anna Andrzejczuk.",
  140 + u"%",
  141 + u"% The original seed of Walenty was provided by the automatic",
  142 + u"% conversion, manually reviewed by Filip Skwarski, of the verbal valence",
  143 + u"% dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata),",
  144 + u"% which was in turn based on SDPV, the Syntactic Dictionary of Polish",
  145 + u"% Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards,",
  146 + u"% Walenty has been developed independently by adding new entries, ",
  147 + u"% syntactic schemata, in particular phraseological ones, and semantic",
  148 + u"% frames.",
  149 + u"%",
  150 + u"% Walenty has been edited and compiled using the Slowal tool",
  151 + u"% (http://zil.ipipan.waw.pl/Slowal) created by Bartłomiej Nitoń and",
  152 + u"% Tomasz Bartosiak."])
  153 + if dictionary_file:
  154 + frame_opinions = Frame_Opinion_Value.objects.filter(pk__in=frame_opinions_pks).order_by('priority')
  155 + if not frame_opinions.exists():
  156 + frame_opinions = Frame_Opinion_Value.objects.order_by('priority')
  157 + lemma_statuses = Lemma_Status.objects.filter(pk__in=lemma_statuses_pks).order_by('priority')
  158 + if not lemma_statuses.exists():
  159 + lemma_statuses = Lemma_Status.objects.order_by('priority')
  160 + poss = POS.objects.filter(pk__in=poss_pks).order_by('priority')
  161 + if not poss.exists():
  162 + poss = POS.objects.exclude(tag=u'unk').order_by('priority')
  163 + license_clause.extend([u"%",
  164 + u"% Parameters:",
  165 + u"%% Schema opinions: %s" % ', '.join([opinion.value for opinion in frame_opinions]),
  166 + u"%% Lemma statuses: %s" % ', '.join([status.status for status in lemma_statuses]),
  167 + u"%% Part of speech: %s" % ', '.join([pos.tag for pos in poss]),
  168 + u"%% Opinions added: %s" % ('True' if add_frame_opinions else 'False'),
  169 + u"%\n"])
  170 + else:
  171 + license_clause.append(u"%\n")
  172 + return u'\n'.join(license_clause)
181 173  
182 174 def create_tex_walenty(lemmas, form_dict):
183 175 q_frame_opinions = []
... ... @@ -191,7 +183,10 @@ def create_tex_walenty(lemmas, form_dict):
191 183 # Pass the TeX template through Django templating engine and into the temp file
192 184 os.write(tmpfile, smart_str(h.unescape(render_to_string('tex/slowal.tex', {'lemmas': lemmas,
193 185 'q_frame_opinions': q_frame_opinions,
194   - 'download_dict' : form_dict}))))
  186 + 'download_dict' : form_dict,
  187 + 'frames_count' : WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
  188 + 'schemata_count' : WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
  189 + 'lemmata_count' : WalentyStat.objects.get(label=u'Łączna liczba haseł').value}))))
195 190 os.close(tmpfile)
196 191 file_name = tmpfilename + '.tex'
197 192 os.rename(tmpfilename, file_name)
... ...
dictionary/management/commands/create_tex_walenty.py
... ... @@ -10,7 +10,7 @@ from django.core.management.base import BaseCommand
10 10 from django.template.loader import render_to_string
11 11 from django.utils.encoding import smart_str
12 12  
13   -from dictionary.models import Lemma, get_ready_statuses
  13 +from dictionary.models import Lemma, WalentyStat, get_ready_statuses
14 14 from settings import WALENTY_PATH
15 15  
16 16 class Command(BaseCommand):
... ... @@ -41,6 +41,9 @@ def write_tex_walenty(outpath, lemmas):
41 41 h = HTMLParser.HTMLParser()
42 42 outfile.write(smart_str(h.unescape(render_to_string('tex/slowal.tex', {'lemmas': lemmas,
43 43 'q_frame_opinions': [],
44   - 'download_dict' : {'frame_opinions': []}}))))
  44 + 'download_dict' : {'frame_opinions': []},
  45 + 'frames_count' : WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
  46 + 'schemata_count' : WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
  47 + 'lemmata_count' : WalentyStat.objects.get(label=u'Łączna liczba haseł').value}))))
45 48 finally:
46 49 outfile.close()
... ...
dictionary/teixml.py
... ... @@ -6,7 +6,7 @@ import datetime
6 6 from lxml import etree
7 7 from xml.sax.saxutils import escape
8 8  
9   -from dictionary.models import Atribute_Model, \
  9 +from dictionary.models import Atribute_Model, WalentyStat, \
10 10 sortArguments, sortatributes, sortPositions, sort_positions
11 11  
12 12 XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
... ... @@ -33,18 +33,47 @@ def write_header(root):
33 33  
34 34 title_stmt = etree.SubElement(file_desc, 'titleStmt')
35 35 title = etree.SubElement(title_stmt, 'title')
36   - title.text = u'Polish Valence Dictionary (Walenty)'
  36 + title.text = u'Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty)'
37 37  
38 38 publication_stmt = etree.SubElement(file_desc, 'publicationStmt')
39 39 publisher = etree.SubElement(publication_stmt, 'publisher')
40   - publisher.text = u'IPI PAN ZIL'
  40 + publisher.text = u'Institute of Computer Science, Polish Academy of Sciences (IPI PAN)'
  41 +
41 42 date = etree.SubElement(publication_stmt, 'date')
42 43 date.attrib['when'] = datetime.datetime.now().strftime('%Y-%m-%d')
43 44  
  45 + write_license_elem(publication_stmt)
  46 +
44 47 source_desc = etree.SubElement(file_desc, 'sourceDesc')
45 48 p = etree.SubElement(source_desc, 'p')
46   - p.text = u'File generated using Slowal. Mentioned tool available at: walenty.ipipan.waw.pl.'
  49 + p.text = u'File generated using Slowal. Mentioned tool available here: http://zil.ipipan.waw.pl/Slowal.'
  50 +
  51 +def write_license_elem(parent_elem):
  52 + availability = etree.SubElement(parent_elem, 'availability')
  53 + licence = etree.SubElement(availability, 'licence')
  54 + licence.attrib['target'] = u'http://creativecommons.org/licenses/by-sa/4.0/'
  55 +
  56 + p = etree.SubElement(licence, 'p')
  57 + p.text = u'(C) Copyright 2012–2016 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)'
  58 +
  59 + p = etree.SubElement(licence, 'p')
  60 + p.text = u'This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/'
47 61  
  62 + p = etree.SubElement(licence, 'p')
  63 + p.text = u'Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains %s schemata and %s frames for %s lemmata.' % (WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
  64 + WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
  65 + WalentyStat.objects.get(label=u'Łączna liczba haseł').value)
  66 +
  67 + p = etree.SubElement(licence, 'p')
  68 + p.text = u'The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk.'
  69 +
  70 + p = etree.SubElement(licence, 'p')
  71 + p.text = u'The original seed of Walenty was provided by the automatic conversion, manually reviewed by Filip Skwarski, of the verbal valence dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata), which was in turn based on SDPV, the Syntactic Dictionary of Polish Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards, Walenty has been developed independently by adding new entries, syntactic schemata, in particular phraseological ones, and semantic frames.'
  72 +
  73 + p = etree.SubElement(licence, 'p')
  74 + p.text = u'Walenty has been edited and compiled using the Slowal tool (http://zil.ipipan.waw.pl/Slowal) created by Bartłomiej Nitoń and Tomasz Bartosiak.'
  75 +
  76 +
48 77 def write_entries(root, lemmas, frame_opinion_values):
49 78 text = etree.SubElement(root, 'text')
50 79 body = etree.SubElement(text, 'body')
... ...
dictionary/templates/tex/slowal.tex
... ... @@ -47,45 +47,25 @@
47 47 }
48 48  
49 49  
50   -\title{\textbf{The Polish Valence Dictionary (Walenty)}}
51   -\author{Institute of Computer Science, Polish Academy of Sciences}
  50 +\title{\textbf{Walenty: a valence dictionary of Polish\\\textit{http://zil.ipipan.waw.pl/Walenty}}}
  51 +\author{Institute of Computer Science, Polish Academy of Sciences (IPI PAN)}
52 52 \date{ {% now "jS F Y" %} }
53 53  
54 54 \begin{document}
55 55  
56 56 \maketitle
57 57  
58   -The Polish Valence Dictionary (Walenty) is an adaptation of
59   -the Syntactic Dictionary of Polish Verbs by Marek Świdziński
60   -in its electronic version provided by Łukasz Dębowski and
61   -Elżbieta Hajnicz and further expanded by Witold Kieraś to
62   -include the most frequent verbs in the 1 million sample of
63   -NKJP (National Corpus of Polish).
  58 +© Copyright 2012–2016 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)
64 59  
65   -The presented resource results from an automatic conversion
66   -of the aforementioned dictionary, manually reviewed by Filip
67   -Skwarski to include correct information about a number of new
68   -features, including sentential subjects, passivisation, and
69   -control relations.
  60 +This work is distributed under a CC BY-SA license: \textit{http://creativecommons.org/licenses/by-sa/4.0/}
70 61  
71   -The format of the new dictionary has been established by Filip
72   -Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski,
73   -Marek Świdziński, and Marcin Woliński.
  62 +Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains {{ schemata_count }} schemata and {{ frames_count }} frames for {{ lemmata_count }} lemmata.
74 63  
75   -The dictionary has been edited and compiled using a tool
76   -created by Bartłomiej Nitoń.
  64 +The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk.
77 65  
78   -The original Syntactic Dictionary of Polish Verbs derives from:
  66 +The original seed of Walenty was provided by the automatic conversion, manually reviewed by Filip Skwarski, of the verbal valence dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata), which was in turn based on SDPV, the Syntactic Dictionary of Polish Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards, Walenty has been developed independently by adding new entries, syntactic schemata, in particular phraseological ones, and semantic frames.
79 67  
80   -Marek Świdziński
81   -Institute of Polish
82   -Warsaw University
83   -Warsaw, Poland
84   -
85   -© Copyright 1998,2012 by Marek Świdziński
86   -
87   -This work is distributed under a CC BY-SA license:
88   -\textit{http://creativecommons.org/licenses/by-sa/2.0/}
  68 +Walenty has been edited and compiled using the Slowal tool (\textit{http://zil.ipipan.waw.pl/Slowal}) created by Bartłomiej Nitoń and Tomasz Bartosiak.
89 69  
90 70 \newpage
91 71  
... ...