Added new copyright text to plain text, pdf, and TEI generators.

Bartłomiej Nitoń
1 parent 3bc1adf2
Showing 5 changed files with 122 additions and 109 deletions
dictionary/ajax_argument_realizations.py
dictionary/ajax_vocabulary_management.py
dictionary/management/commands/create_tex_walenty.py
dictionary/teixml.py
dictionary/templates/tex/slowal.tex
@@ -13,6 +13,7 @@ from common.decorators import render, ajax, AjaxError
 from dictionary.ajax_argument_form import argument_to_form_values, get_argument_model, \
                                           create_argument_form, all_fields_filled, \
                                           get_argument_from_form, validate_argument_form
+from dictionary.ajax_vocabulary_management import create_copyrights_str
 from dictionary.forms import ArgRealOpinionForm
 from dictionary.models import Argument, ArgRealization, ArgRealOpinion, AttributeParameterModel, \
                               Position, RealizationType, \
@@ -206,6 +207,11 @@ def create_realizations(request, form_data):
 def create_realizations_file(filename):
     try:
         real_file = codecs.open(filename, 'wt', 'utf-8')
+        real_file.write(create_copyrights_str(dictionary_file=False,
+                                              frame_opinions_pks=[],
+                                              lemma_statuses_pks=[], 
+                                              poss_pks=[], 
+                                              add_frame_opinions=[]))
         write_phrase_types_extensions(real_file)
         real_file.write('\n')
         write_parameters_realizations(real_file)
@@ -19,52 +19,6 @@ from dictionary.models import Frame_Opinion_Value, Lemma, Lemma_Status, \
                               POS, Vocabulary, VocabularyFormat, WalentyStat
 from dictionary.teixml import createteixml
  
-TEXT_VOCABULARY_CLAUSE = u"""
-% The Polish Valence Dictionary (Walenty)
-% <date>
-%
-% The Polish Valence Dictionary (Walenty) is an adaptation of
-% the Syntactic Dictionary of Polish Verbs by Marek Świdziński
-% in its electronic version provided by Łukasz Dębowski and
-% Elżbieta Hajnicz and further expanded by Witold Kieraś to
-% include the most frequent verbs in the 1 million sample of
-% NKJP (National Corpus of Polish).
-%
-% The presented resource results from an automatic conversion
-% of the aforementioned dictionary, manually reviewed by Filip
-% Skwarski to include correct information about a number of new
-% features, including sentential subjects, passivisation, and
-% control relations.
-%
-% The format of the new dictionary has been established by Filip
-% Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski,
-% Marek Świdziński, and Marcin Woliński.
-%
-% The dictionary has been edited and compiled using a tool
-% created by Bartłomiej Nitoń.
-%
-% The original Syntactic Dictionary of Polish Verbs derives from:
-%
-% Marek Świdziński
-% Institute of Polish
-% Warsaw University
-% Warsaw, Poland
-%
-% © Copyright 1998,2012 by Marek Świdziński
-%
-% This work is distributed under a CC BY-SA license:
-% http://creativecommons.org/licenses/by-sa/2.0/
-%
-% Parameters:
-%     Dictionaries:   <vocabularies>
-%     Schema opinions: <opinions>
-%     Lemma statuses: <statuses>
-%     Owners:         <owners>
-%     Part of speech: <part of speech>
-%     Opinions added: <opinions added>
-%     
-"""
-
 EN_MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', 
              'July', 'August', 'September', 'October', 'November', 'December']
  
@@ -121,8 +75,11 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions,
                           lemma_statuses, owners, poss, add_frame_opinions):
     try:
         f = codecs.open(file_name, 'w+', 'utf-8-sig' )
-        f.write(create_copyrights_str(vocabularies, frame_opinions,
-                                      lemma_statuses, owners, poss, add_frame_opinions))
+        f.write(create_copyrights_str(dictionary_file=True,
+                                      frame_opinions_pks=frame_opinions,
+                                      lemma_statuses_pks=lemma_statuses, 
+                                      poss_pks=poss, 
+                                      add_frame_opinions=add_frame_opinions))
         for lemma in lemmas:
             founded_frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinions)  
             #frame_chars_dict = sorted_frame_char_values_dict()  
@@ -150,34 +107,69 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions,
         f.close() 
         return file_name
  
-def create_copyrights_str(vocabularies_pks, frame_opinions_pks,
-                            lemma_statuses_pks, owners_pks, poss_pks, 
-                            add_frame_opinions):
+def create_copyrights_str(dictionary_file, frame_opinions_pks,
+                             lemma_statuses_pks, poss_pks, add_frame_opinions):
     date = datetime.datetime.now()
     month = EN_MONTHS[date.month-1]
-    vocabularies = Vocabulary.objects.filter(pk__in=vocabularies_pks).order_by('name')
-    if not vocabularies.exists():
-        vocabularies = Vocabulary.objects.order_by('name')
-    frame_opinions = Frame_Opinion_Value.objects.filter(pk__in=frame_opinions_pks).order_by('priority')
-    if not frame_opinions.exists():
-        frame_opinions = Frame_Opinion_Value.objects.order_by('priority')
-    lemma_statuses = Lemma_Status.objects.filter(pk__in=lemma_statuses_pks).order_by('priority')
-    if not lemma_statuses.exists():
-        lemma_statuses = Lemma_Status.objects.order_by('priority')
-    owners = User.objects.filter(pk__in=owners_pks).order_by('username')
-    if not owners.exists():
-        owners = User.objects.filter(lemmas__old=False).distinct().order_by('username')
-    poss = POS.objects.filter(pk__in=poss_pks).order_by('priority')
-    if not poss.exists():
-        poss = POS.objects.exclude(tag=u'unk').order_by('priority')
-    copyrights = (TEXT_VOCABULARY_CLAUSE.replace('<date>', date.strftime(month + ' %d, %Y')).
-                                         replace('<vocabularies>', ', '.join([vocab.name for vocab in vocabularies])).
-                                         replace('<opinions>', ', '.join([opinion.value for opinion in frame_opinions])).
-                                         replace('<statuses>', ', '.join([status.status for status in lemma_statuses])).
-                                         replace('<owners>', ', '.join([owner.username for owner in owners])).
-                                         replace('<part of speech>', ', '.join([pos.name for pos in poss])).
-                                         replace('<opinions added>', ('True' if add_frame_opinions else 'False')))
-    return copyrights
+    
+    if dictionary_file:
+        license_clause = [u'% Walenty: a valence dictionary of Polish']
+    else:
+        license_clause = [u'% This file is part of Walenty: a valence dictionary of Polish']
+    license_clause.extend([u"% http://zil.ipipan.waw.pl/Walenty",
+                           u"%% version: %s" % date.strftime(month + ' %d, %Y'),
+                           u"%",
+                           u"% © Copyright 2012–2016 by the Institute of Computer Science, Polish",
+                           u"% Academy of Sciences (IPI PAN)",
+                           u"%",
+                           u"% This work is distributed under a CC BY-SA license:",
+                           u"% http://creativecommons.org/licenses/by-sa/4.0/",
+                           u"%",
+                           u"% Walenty is a valence dictionary of Polish developed at the Institute",
+                           u"% of Computer Science, Polish Academy of Sciences (IPI PAN). It currently",
+                           u"%% contains %s schemata and %s frames for %s lemmata." % (WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
+                                                                                       WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
+                                                                                       WalentyStat.objects.get(label=u'Łączna liczba haseł').value),
+                           u"%",
+                           u"% The original formalism of Walenty was established by Filip Skwarski,",
+                           u"% Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin",
+                           u"% Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been",
+                           u"% further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam",
+                           u"% Przepiórkowski, and Marcin Woliński. The semantic layer has been",
+                           u"% developed by Elżbieta Hajnicz and Anna Andrzejczuk.",
+                           u"%",
+                           u"% The original seed of Walenty was provided by the automatic",
+                           u"% conversion, manually reviewed by Filip Skwarski, of the verbal valence",
+                           u"% dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata),",
+                           u"% which was in turn based on SDPV, the Syntactic Dictionary of Polish",
+                           u"% Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards,",
+                           u"% Walenty has been developed independently by adding new entries, ",
+                           u"% syntactic schemata, in particular phraseological ones, and semantic",
+                           u"% frames.",
+                           u"%",
+                           u"% Walenty has been edited and compiled using the Slowal tool",
+                           u"% (http://zil.ipipan.waw.pl/Slowal) created by Bartłomiej Nitoń and", 
+                           u"% Tomasz Bartosiak."])
+    if dictionary_file:
+        frame_opinions = Frame_Opinion_Value.objects.filter(pk__in=frame_opinions_pks).order_by('priority')
+        if not frame_opinions.exists():
+            frame_opinions = Frame_Opinion_Value.objects.order_by('priority')
+        lemma_statuses = Lemma_Status.objects.filter(pk__in=lemma_statuses_pks).order_by('priority')
+        if not lemma_statuses.exists():
+            lemma_statuses = Lemma_Status.objects.order_by('priority')
+        poss = POS.objects.filter(pk__in=poss_pks).order_by('priority')
+        if not poss.exists():
+            poss = POS.objects.exclude(tag=u'unk').order_by('priority')
+        license_clause.extend([u"%",
+                               u"% Parameters:",
+                               u"%%     Schema opinions: %s" % ', '.join([opinion.value for opinion in frame_opinions]),
+                               u"%%     Lemma statuses: %s" % ', '.join([status.status for status in lemma_statuses]),
+                               u"%%     Part of speech: %s" % ', '.join([pos.tag for pos in poss]),
+                               u"%%     Opinions added: %s" % ('True' if add_frame_opinions else 'False'),
+                               u"%\n"])
+    else:
+        license_clause.append(u"%\n")
+    return u'\n'.join(license_clause)
  
 def create_tex_walenty(lemmas, form_dict):
     q_frame_opinions = []
@@ -191,7 +183,10 @@ def create_tex_walenty(lemmas, form_dict):
     # Pass the TeX template through Django templating engine and into the temp file
     os.write(tmpfile, smart_str(h.unescape(render_to_string('tex/slowal.tex', {'lemmas': lemmas, 
                                                             'q_frame_opinions': q_frame_opinions,
-                                                            'download_dict'   : form_dict}))))
+                                                            'download_dict'   : form_dict,
+                                                            'frames_count'    : WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
+                                                            'schemata_count'  : WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
+                                                            'lemmata_count'   : WalentyStat.objects.get(label=u'Łączna liczba haseł').value}))))
     os.close(tmpfile)
     file_name = tmpfilename + '.tex'
     os.rename(tmpfilename, file_name)
@@ -10,7 +10,7 @@ from django.core.management.base import BaseCommand
 from django.template.loader import render_to_string
 from django.utils.encoding import smart_str
  
-from dictionary.models import Lemma, get_ready_statuses
+from dictionary.models import Lemma, WalentyStat, get_ready_statuses
 from settings import WALENTY_PATH
  
 class Command(BaseCommand):
@@ -41,6 +41,9 @@ def write_tex_walenty(outpath, lemmas):
         h = HTMLParser.HTMLParser()
         outfile.write(smart_str(h.unescape(render_to_string('tex/slowal.tex', {'lemmas': lemmas, 
                                                                                'q_frame_opinions': [],
-                                                                               'download_dict'   : {'frame_opinions': []}}))))
+                                                                               'download_dict'   : {'frame_opinions': []},
+                                                                               'frames_count'    : WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
+                                                                               'schemata_count'  : WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
+                                                                               'lemmata_count'   : WalentyStat.objects.get(label=u'Łączna liczba haseł').value}))))
     finally:
         outfile.close()
@@ -6,7 +6,7 @@ import datetime
 from lxml import etree
 from xml.sax.saxutils import escape
  
-from dictionary.models import Atribute_Model, \
+from dictionary.models import Atribute_Model, WalentyStat, \
                               sortArguments, sortatributes, sortPositions, sort_positions
  
 XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
@@ -33,18 +33,47 @@ def write_header(root):
  
     title_stmt = etree.SubElement(file_desc, 'titleStmt')
     title = etree.SubElement(title_stmt, 'title')
-    title.text = u'Polish Valence Dictionary (Walenty)'
+    title.text = u'Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty)'
  
     publication_stmt = etree.SubElement(file_desc, 'publicationStmt')
     publisher = etree.SubElement(publication_stmt, 'publisher')
-    publisher.text = u'IPI PAN ZIL'
+    publisher.text = u'Institute of Computer Science, Polish Academy of Sciences (IPI PAN)'
+    
     date = etree.SubElement(publication_stmt, 'date')
     date.attrib['when'] = datetime.datetime.now().strftime('%Y-%m-%d')
  
+    write_license_elem(publication_stmt)
+    
     source_desc = etree.SubElement(file_desc, 'sourceDesc')
     p = etree.SubElement(source_desc, 'p')
-    p.text = u'File generated using Slowal. Mentioned tool available at: walenty.ipipan.waw.pl.'
+    p.text = u'File generated using Slowal. Mentioned tool available here: http://zil.ipipan.waw.pl/Slowal.'
+
+def write_license_elem(parent_elem):
+    availability = etree.SubElement(parent_elem, 'availability')
+    licence = etree.SubElement(availability, 'licence')
+    licence.attrib['target'] = u'http://creativecommons.org/licenses/by-sa/4.0/'
+    
+    p = etree.SubElement(licence, 'p')
+    p.text = u'(C) Copyright 2012–2016 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)'
+    
+    p = etree.SubElement(licence, 'p')
+    p.text = u'This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/'
  
+    p = etree.SubElement(licence, 'p')
+    p.text = u'Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains %s schemata and %s frames for %s lemmata.' % (WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
+                                                                                                                                                                                                                   WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
+                                                                                                                                                                                                                   WalentyStat.objects.get(label=u'Łączna liczba haseł').value)
+    
+    p = etree.SubElement(licence, 'p')
+    p.text = u'The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk.'
+    
+    p = etree.SubElement(licence, 'p')
+    p.text = u'The original seed of Walenty was provided by the automatic conversion, manually reviewed by Filip Skwarski, of the verbal valence dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata), which was in turn based on SDPV, the Syntactic Dictionary of Polish Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards, Walenty has been developed independently by adding new entries, syntactic schemata, in particular phraseological ones, and semantic frames.'
+
+    p = etree.SubElement(licence, 'p')
+    p.text = u'Walenty has been edited and compiled using the Slowal tool (http://zil.ipipan.waw.pl/Slowal) created by Bartłomiej Nitoń and Tomasz Bartosiak.'
+    
+
 def write_entries(root, lemmas, frame_opinion_values):
     text = etree.SubElement(root, 'text')
     body = etree.SubElement(text, 'body')
@@ -47,45 +47,25 @@
 }
  
  
-\title{\textbf{The Polish Valence Dictionary (Walenty)}}
-\author{Institute of Computer Science, Polish Academy of Sciences}
+\title{\textbf{Walenty: a valence dictionary of Polish\\\textit{http://zil.ipipan.waw.pl/Walenty}}}
+\author{Institute of Computer Science, Polish Academy of Sciences (IPI PAN)}
 \date{ {% now "jS F Y" %} }
  
 \begin{document}
  
 \maketitle
  
-The Polish Valence Dictionary (Walenty) is an adaptation of
-the Syntactic Dictionary of Polish Verbs by Marek Świdziński
-in its electronic version provided by Łukasz Dębowski and
-Elżbieta Hajnicz and further expanded by Witold Kieraś to
-include the most frequent verbs in the 1 million sample of
-NKJP (National Corpus of Polish).
+© Copyright 2012–2016 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)
  
-The presented resource results from an automatic conversion
-of the aforementioned dictionary, manually reviewed by Filip
-Skwarski to include correct information about a number of new
-features, including sentential subjects, passivisation, and
-control relations.
+This work is distributed under a CC BY-SA license: \textit{http://creativecommons.org/licenses/by-sa/4.0/}
  
-The format of the new dictionary has been established by Filip
-Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski,
-Marek Świdziński, and Marcin Woliński.
+Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains {{ schemata_count }} schemata and {{ frames_count }} frames for {{ lemmata_count }} lemmata.
  
-The dictionary has been edited and compiled using a tool
-created by Bartłomiej Nitoń.
+The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk.
  
-The original Syntactic Dictionary of Polish Verbs derives from:
+The original seed of Walenty was provided by the automatic conversion, manually reviewed by Filip Skwarski, of the verbal valence dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata), which was in turn based on SDPV, the Syntactic Dictionary of Polish Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards, Walenty has been developed independently by adding new entries, syntactic schemata, in particular phraseological ones, and semantic frames.
  
-Marek Świdziński
-Institute of Polish
-Warsaw University
-Warsaw, Poland
-
-© Copyright 1998,2012 by Marek Świdziński
-
-This work is distributed under a CC BY-SA license:
-\textit{http://creativecommons.org/licenses/by-sa/2.0/}
+Walenty has been edited and compiled using the Slowal tool (\textit{http://zil.ipipan.waw.pl/Slowal}) created by Bartłomiej Nitoń and Tomasz Bartosiak.
  
 \newpage