RandomIndex.java
4.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/**
*
*/
package is2.data;
import java.util.BitSet;
import is2.util.DB;
/**
* @author Dr. Bernd Bohnet, 20.05.2011
*
*
*/
public class RandomIndex implements Long2IntInterface {
final int[] prims = {52349171,199951347,89990,5001,32891,17,19,23,29,31,37,47,53,59,61,67,71};
// final int[] prims = {1,3,5,7,11,17,19,23,29,31,37,47,53,59,61,67,71};
final long hashFunctionModifiers[];
final int kbit,lbit;
final int hsize ; // maximal size of hash
final int bits; // available bits
final int moves; // needed moves to put a number into
/**
* Creates the random functions.
*
* @param kbit The bits to be mapped
* @param lbit The left shift of the bits
* @param hsize The size of the featurs space (not included in the original algorithm)
* @param numberFunctions The number of the hash functions
*/
public RandomIndex(int kbit, int lbit, int hsize, int numberFunctions) {
this.kbit =kbit;
this.lbit =lbit;
if (hsize<=0) this.hsize = 67000001; // default value
else this.hsize = hsize;
bits = (int) Math.ceil(Math.log(this.hsize)/Math.log(2));
moves = (int) Math.ceil(64f/(float)bits);
DB.println("moves "+moves+" bits "+bits+" hsize "+hsize);
hashFunctionModifiers = new long[numberFunctions];
for (int f = 0;f<numberFunctions;f++) hashFunctionModifiers[f] = prims[f];
}
public int[] hash(long x)
{
int[] hvals = new int[hashFunctionModifiers.length];
for(int k=0;k<hashFunctionModifiers.length;k++) {
// the original function: value = ((x+1) * hashFunctionModifiers[k] & m ) >> n;
// the first part of the original function
long value = (x+1) * hashFunctionModifiers[k];
// do the above >> n with a maximal size of the available hash values
// Shift all bits until they have been each xor-ed (^) in the range of the hash
// in order the have all information potentially represented there.
for(int j=1;j<= moves;j++) value = value ^ (value >> (bits*j));
// Map the value to the range of the available space should be the same as (value & m) .
hvals[k] = Math.abs((int)value % hsize);
}
return hvals;
}
public int[] hashU(long x)
{
int[] hvals = new int[hashFunctionModifiers.length];
long y = Long.reverse(x);
for(int k=0;k<hashFunctionModifiers.length;k++) {
// the original function: value = ((x+1) * hashFunctionModifiers[k] & m ) >> n;
// the first part of the original function
long value1 = (((y+1) * hashFunctionModifiers[k]) /* % 2 pow 64 */ ) >> (kbit-lbit);
// I get probably only the first part lets get the second part too
// long value2 = (((y+1>>20) * hashFunctionModifiers[k]) /* % 2 pow 64 */ ) >> (kbit-lbit);
// the modulo (%) 2 pow 64 is done since the long number can not be larger than 2 pow 64.
// System.out.println("value "+value+" shift "+(lbit-kbit));
hvals[k] = Math.abs((int)value1);
}
return hvals;
}
/*
(defun generate-hash-fn (&key (k-bit 32)
(l-bit 8)
verbosep constants (count 4))
(labels ((random-constant ()
(let ((a (+ (random (- (expt 2 k-bit) 1)) 1)))
(logior a 1)))) ;; inclusive OR ensures odd number.
(let ((pdiff (- (- k-bit l-bit)));; neg. sign to do a rightshift, see ash()
(sub1 (- (expt 2 k-bit) 1))
(constants (copy-list constants)))
(unless constants
(loop ;; a = odd number a where 0 < a < u.
until (= count (length constants))
do (pushnew (random-constant) constants)))
(when verbosep
(format t "~&generate-hash-fn(): using random constants: ~a~%"
constants))
(values
#'(lambda (x)
(loop
for a in constants
;;; always add 1 to x to avoid f(0)=0.
collect (ash (logand (* (+ 1 x) a) sub1) pdiff)))
constants))))
*/
/* (non-Javadoc)
* @see is2.data.Long2IntInterface#l2i(long)
*/
@Override
public int l2i(long l) {
// TODO Auto-generated method stub
return 0;
}
/* (non-Javadoc)
* @see is2.data.Long2IntInterface#size()
*/
@Override
public int size() {
return hsize;
}
}