MtasCodecPostingsFormat.java
9.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
package mtas.codec;
import java.io.IOException;
import java.util.TreeSet;
import mtas.analysis.token.MtasToken;
import mtas.analysis.token.MtasTokenString;
import mtas.codec.payload.MtasPayloadDecoder;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
/**
* The Class MtasCodecPostingsFormat.
*/
public class MtasCodecPostingsFormat extends PostingsFormat {
/** The Constant VERSION_START. */
public static final int VERSION_START = 1;
/** The Constant VERSION_OLD_1. */
public static final int VERSION_OLD_1 = 1;
/** The Constant VERSION_OLD_2. */
public static final int VERSION_OLD_2 = 2;
/** The Constant VERSION_CURRENT. */
public static final int VERSION_CURRENT = 3;
/** The Constant MTAS_OBJECT_HAS_PARENT. */
static final int MTAS_OBJECT_HAS_PARENT = 1;
/** The Constant MTAS_OBJECT_HAS_POSITION_RANGE. */
static final int MTAS_OBJECT_HAS_POSITION_RANGE = 2;
/** The Constant MTAS_OBJECT_HAS_POSITION_SET. */
static final int MTAS_OBJECT_HAS_POSITION_SET = 4;
/** The Constant MTAS_OBJECT_HAS_OFFSET. */
static final int MTAS_OBJECT_HAS_OFFSET = 8;
/** The Constant MTAS_OBJECT_HAS_REALOFFSET. */
static final int MTAS_OBJECT_HAS_REALOFFSET = 16;
/** The Constant MTAS_OBJECT_HAS_PAYLOAD. */
static final int MTAS_OBJECT_HAS_PAYLOAD = 32;
/** The Constant MTAS_STORAGE_BYTE. */
public static final int MTAS_STORAGE_BYTE = 0;
/** The Constant MTAS_STORAGE_SHORT. */
public static final int MTAS_STORAGE_SHORT = 1;
/** The Constant MTAS_STORAGE_INTEGER. */
public static final int MTAS_STORAGE_INTEGER = 2;
/** The Constant MTAS_STORAGE_LONG. */
public static final int MTAS_STORAGE_LONG = 3;
/** The Constant MTAS_TMP_FIELD_EXTENSION. */
public static final String MTAS_TMP_FIELD_EXTENSION = "mtas.field.tmp";
/** The Constant MTAS_TMP_OBJECT_EXTENSION. */
public static final String MTAS_TMP_OBJECT_EXTENSION = "mtas.object.tmp";
/** The Constant MTAS_TMP_DOCS_EXTENSION. */
public static final String MTAS_TMP_DOCS_EXTENSION = "mtas.docs.tmp";
/** The Constant MTAS_TMP_DOC_EXTENSION. */
public static final String MTAS_TMP_DOC_EXTENSION = "mtas.doc.tmp";
/** The Constant MTAS_TMP_DOCS_CHAINED_EXTENSION. */
public static final String MTAS_TMP_DOCS_CHAINED_EXTENSION = "mtas.docs.chained.tmp";
/** The Constant MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION. */
public static final String MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION = "mtas.prefix.single.position";
/** The Constant MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION. */
public static final String MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION = "mtas.prefix.multiple.position";
/** The Constant MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION. */
public static final String MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION = "mtas.prefix.set.position";
/** The Constant MTAS_OBJECT_EXTENSION. */
public static final String MTAS_OBJECT_EXTENSION = "mtas.object";
/** The Constant MTAS_TERM_EXTENSION. */
public static final String MTAS_TERM_EXTENSION = "mtas.term";
/** The Constant MTAS_FIELD_EXTENSION. */
public static final String MTAS_FIELD_EXTENSION = "mtas.field";
/** The Constant MTAS_PREFIX_EXTENSION. */
public static final String MTAS_PREFIX_EXTENSION = "mtas.prefix";
/** The Constant MTAS_DOC_EXTENSION. */
public static final String MTAS_DOC_EXTENSION = "mtas.doc";
/** The Constant MTAS_INDEX_DOC_ID_EXTENSION. */
public static final String MTAS_INDEX_DOC_ID_EXTENSION = "mtas.index.doc.id";
/** The Constant MTAS_INDEX_OBJECT_ID_EXTENSION. */
public static final String MTAS_INDEX_OBJECT_ID_EXTENSION = "mtas.index.object.id";
/** The Constant MTAS_INDEX_OBJECT_POSITION_EXTENSION. */
public static final String MTAS_INDEX_OBJECT_POSITION_EXTENSION = "mtas.index.object.position";
/** The Constant MTAS_INDEX_OBJECT_PARENT_EXTENSION. */
public static final String MTAS_INDEX_OBJECT_PARENT_EXTENSION = "mtas.index.object.parent";
/** The Constant MTAS_INDEX_TERM_PREFIX_POSITION_EXTENSION. */
public static final String MTAS_INDEX_TERM_PREFIX_POSITION_EXTENSION = "mtas.index.term.prefix.position";
/** The payload decoder. */
MtasPayloadDecoder payloadDecoder;
/** The delegate codec name. */
private String delegateCodecName = null;
/** The delegate postings format. */
private PostingsFormat delegatePostingsFormat = null;
/**
* Instantiates a new mtas codec postings format.
*/
public MtasCodecPostingsFormat() {
this(MtasCodec.MTAS_CODEC_NAME);
}
/**
* Instantiates a new mtas codec postings format.
*
* @param delegate
* the delegate
*/
public MtasCodecPostingsFormat(PostingsFormat delegate) {
super(MtasCodec.MTAS_CODEC_NAME);
delegateCodecName = delegate.getName();
delegatePostingsFormat = delegate;
// preload to prevent NoClassDefFoundErrors
try {
Class.forName("mtas.codec.payload.MtasPayloadDecoder");
Class.forName("mtas.codec.payload.MtasBitInputStream");
Class.forName("mtas.analysis.token.MtasPosition");
Class.forName("mtas.analysis.token.MtasOffset");
Class.forName("mtas.codec.tree.MtasRBTree");
Class.forName("mtas.codec.MtasTerms");
Class.forName("mtas.codec.util.CodecInfo");
Class.forName("mtas.codec.tree.MtasTreeNodeId");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
/**
* Instantiates a new mtas codec postings format.
*
* @param codecName
* the codec name
*/
public MtasCodecPostingsFormat(String codecName) {
super(codecName);
delegateCodecName = codecName;
delegatePostingsFormat = null;
// preload to prevent NoClassDefFoundErrors
try {
Class.forName("mtas.codec.payload.MtasPayloadDecoder");
Class.forName("mtas.codec.payload.MtasBitInputStream");
Class.forName("mtas.analysis.token.MtasPosition");
Class.forName("mtas.analysis.token.MtasOffset");
Class.forName("mtas.codec.tree.MtasRBTree");
Class.forName("mtas.codec.MtasTerms");
Class.forName("mtas.codec.util.CodecInfo");
Class.forName("mtas.codec.tree.MtasTreeNodeId");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.codecs.PostingsFormat#fieldsProducer(org.apache.lucene
* .index.SegmentReadState)
*/
@Override
public final FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new MtasFieldsProducer(state, getName());
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.codecs.PostingsFormat#fieldsConsumer(org.apache.lucene
* .index.SegmentWriteState)
*/
@Override
public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
if (delegatePostingsFormat != null) {
return new MtasFieldsConsumer(
delegatePostingsFormat.fieldsConsumer(state), state, getName(),
delegatePostingsFormat.getName());
} else {
PostingsFormat pf = Codec.forName(delegateCodecName).postingsFormat();
return new MtasFieldsConsumer(pf.fieldsConsumer(state), state, getName(),
pf.getName());
}
}
/**
* Gets the token.
*
* @param inObject
* the in object
* @param inTerm
* the in term
* @param ref
* the ref
* @return the token
* @throws IOException
*/
public static MtasToken<String> getToken(IndexInput inObject,
IndexInput inTerm, Long ref) throws IOException {
MtasToken<String> token = null;
inObject.seek(ref);
token = new MtasTokenString("");
token.setId(inObject.readVInt());
token.setTokenRef(ref);
int objectFlags = inObject.readVInt();
TreeSet<Integer> positions = new TreeSet<Integer>();
if ((objectFlags & MTAS_OBJECT_HAS_PARENT) == MTAS_OBJECT_HAS_PARENT) {
int parentId = inObject.readVInt();
token.setParentId(parentId);
}
if ((objectFlags
& MTAS_OBJECT_HAS_POSITION_RANGE) == MTAS_OBJECT_HAS_POSITION_RANGE) {
int positionStart = inObject.readVInt();
int positionEnd = positionStart + inObject.readVInt();
token.addPositionRange(positionStart, positionEnd);
} else if ((objectFlags
& MTAS_OBJECT_HAS_POSITION_SET) == MTAS_OBJECT_HAS_POSITION_SET) {
int size = inObject.readVInt();
int tmpPrevious = 0;
for (int t = 0; t < size; t++) {
int position = tmpPrevious + inObject.readVInt();
tmpPrevious = position;
positions.add(position);
}
token.addPositions(positions);
} else {
int position = inObject.readVInt();
token.addPosition(position);
}
if ((objectFlags & MTAS_OBJECT_HAS_OFFSET) == MTAS_OBJECT_HAS_OFFSET) {
int offsetStart = inObject.readVInt();
int offsetEnd = offsetStart + inObject.readVInt();
token.setOffset(offsetStart, offsetEnd);
}
if ((objectFlags
& MTAS_OBJECT_HAS_REALOFFSET) == MTAS_OBJECT_HAS_REALOFFSET) {
int realOffsetStart = inObject.readVInt();
int realOffsetEnd = realOffsetStart + inObject.readVInt();
token.setRealOffset(realOffsetStart, realOffsetEnd);
}
if ((objectFlags & MTAS_OBJECT_HAS_PAYLOAD) == MTAS_OBJECT_HAS_PAYLOAD) {
int length = inObject.readVInt();
byte[] mtasPayload = new byte[length];
inObject.readBytes(mtasPayload, 0, length);
token.setPayload(new BytesRef(mtasPayload));
}
Long termRef = inObject.readVLong();
inTerm.seek(termRef);
token.setTermRef(termRef);
token.setValue(inTerm.readString());
return token;
}
}