|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module NLP.Concraft.Polish.Request
(
-- * Request
Request (..)
, Config (..)
-- ** Short
, Short (..)
, short
-- ** Long
, Long (..)
, long
) where
import Control.Applicative ((<$>), (<*>))
import qualified Control.Monad.LazyIO as LazyIO
import qualified Data.Char as Char
import qualified Data.List.Split as Split
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Binary as B
import NLP.Concraft.Polish
import NLP.Concraft.Polish.Maca
import NLP.Concraft.Polish.Morphosyntax hiding (tag)
-------------------------------------------------
-- Configuration
-------------------------------------------------
-- | A request with configuration.
data Request t = Request {
-- | The actuall request.
rqBody :: t
-- | Request configuration.
, rqConf :: Config }
instance B.Binary t => B.Binary (Request t) where
put Request{..} = B.put rqBody >> B.put rqConf
get = Request <$> B.get <*> B.get
-- | Tagging configuration.
newtype Config = Config {
-- | Tag with marginal probabilities.
tagProbs :: Bool
} deriving (B.Binary)
-------------------------------------------------
-- Short request
-------------------------------------------------
-- | A short request.
data Short
= Short T.Text
| Par [Sent Tag]
instance B.Binary Short where
put (Short x) = B.putWord8 0 >> B.put x
put (Par x) = B.putWord8 1 >> B.put x
get = B.getWord8 >>= \x -> case x of
0 -> Short <$> B.get
_ -> Par <$> B.get
-- | Process the short request.
short :: MacaPool -> Concraft -> Request Short -> IO [Sent Tag]
short pool concraft Request{..} = case rqBody of
Short x -> map (tagit concraft) <$> macaPar pool x
Par x -> return $ map (tagit concraft) x
where
tagit = if tagProbs rqConf then marginals else tag
-------------------------------------------------
-- Long request
-------------------------------------------------
-- | A request to parse a long text.
data Long
= Long L.Text
| Doc [[Sent Tag]]
instance B.Binary Long where
put (Long x) = B.putWord8 0 >> B.put x
put (Doc x) = B.putWord8 1 >> B.put x
get = B.getWord8 >>= \x -> case x of
0 -> Long <$> B.get
_ -> Doc <$> B.get
-- | Process the long request given the processor for the
-- short request.
long :: (Request Short -> IO a) -> Request Long -> IO [a]
long handler Request{..} = case rqBody of
Long inp ->
LazyIO.mapM f . map L.unlines
. Split.splitWhen (L.all Char.isSpace)
. L.lines $ inp
Doc inp -> LazyIO.mapM g inp
where
f x = handler . r $ Short $ L.toStrict x
g x = handler . r $ Par x
r x = Request {rqBody = x, rqConf = rqConf}
|