speakers.py 1.56 KB
'A list of speakers'

import re
from unicodedata import normalize, combining


class Speakers:
    'A class to manage a list of speakers'

    def __init__(self):
        self.speakers = {}
        self.names = {}

    def find_id(self, speaker):
        'Create ID of given speaker'
        try:
            return self.names[speaker]
        except KeyError:
            speaker_id = self._id(speaker)
            self.names[speaker] = speaker_id
            self.speakers[speaker_id] = self._format(speaker)
            return speaker_id

    def role(self, speaker):
        'Return role of given speaker'
        if re.match(r'(Marszałek|Wicemarszałek)', speaker):
            return 'chair'
        if re.match(r'(Poseł|Posłanka|Głos z sali|Głosy z sali)', speaker):
            return 'speaker'
        if speaker == 'komentarz':
            return 'commentator'
        if speaker == 'zdarzenie':
            return 'commentator'
        return 'guest'

    def __iter__(self):
        for speaker_id, name in self.speakers.items():
            yield (speaker_id, name)

    def _id(self, speaker):
        'Generate ASCII ID for given speaker'
        if ' ' not in speaker:
            return speaker
        cleaned = re.sub(
            r'\(.*\)', '', speaker.title().replace('Ł', 'l').replace('ł', 'l'))
        to_ascii = u"".join([c for c in normalize(
            'NFKD', cleaned) if not combining(c)])
        return re.sub(r'[^A-Za-z]', '', to_ascii)

    def _format(self, speaker):
        'Strip brackets from speaker name'
        return re.sub(r'\s+\(.*\)', '', speaker)