Skip to content

Commit 182c6d2

Browse files
committed
Merge branch 'master' of github.com:lemire/JavaFastPFOR
2 parents 82a4d19 + d75c85f commit 182c6d2

26 files changed

+616
-292
lines changed

.github/workflows/basic.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ jobs:
88
strategy:
99
fail-fast: false
1010
matrix:
11-
java: [ 11, 16 ]
11+
java: [ 17, 21 ]
1212
steps:
13-
- uses: actions/checkout@v2
13+
- uses: actions/checkout@v4.1.1
1414
- name: Set up JDK
15-
uses: actions/setup-java@v2.5.0
15+
uses: actions/setup-java@v4.1.0
1616
with:
1717
java-version: ${{ matrix.java }}
1818
distribution: 'adopt'
@@ -21,4 +21,4 @@ jobs:
2121
- name: Build example
2222
run: javac -cp target/classes/:. example.java
2323
- name: Run example
24-
run: java -cp target/classes/:. example
24+
run: java -cp target/classes/:. example

README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
JavaFastPFOR: A simple integer compression library in Java
22
==========================================================
33
[![][maven img]][maven] [![][license img]][license] [![docs-badge][]][docs]
4-
[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/java/g/lemire/JavaFastPFOR.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/lemire/JavaFastPFOR/context:java)
54
[![Java CI](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml/badge.svg)](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml)
65

76

@@ -82,9 +81,9 @@ the following code in your pom.xml file:
8281
```xml
8382
<dependencies>
8483
<dependency>
85-
<groupId>me.lemire.integercompression</groupId>
86-
<artifactId>JavaFastPFOR</artifactId>
87-
<version>[0.1,)</version>
84+
<groupId>me.lemire.integercompression</groupId>
85+
<artifactId>JavaFastPFOR</artifactId>
86+
<version>[0.2,)</version>
8887
</dependency>
8988
</dependencies>
9089
```
@@ -222,6 +221,7 @@ He also posted his slides online: http://www.slideshare.net/ikhtearSharif/ikhtea
222221
Other recommended libraries
223222
-----------------------------
224223

224+
* Fast integer compression in Go: https://github.com/ronanh/intcomp
225225
* Encoding: Integer Compression Libraries for Go https://github.com/zhenjl/encoding
226226
* CSharpFastPFOR: A C# integer compression library https://github.com/Genbox/CSharpFastPFOR
227227
* TurboPFor is a C library that offers lots of interesting optimizations and Java wrappers. Well worth checking! (Uses a GPL license.) https://github.com/powturbo/TurboPFor

pom.xml

+5-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>me.lemire.integercompression</groupId>
44
<artifactId>JavaFastPFOR</artifactId>
5-
<version>0.1.13-SNAPSHOT</version>
5+
<version>0.2.2-SNAPSHOT</version>
66
<packaging>jar</packaging>
77
<properties>
88
<maven.compiler.source>1.8</maven.compiler.source>
@@ -69,10 +69,10 @@
6969
<plugin>
7070
<groupId>org.apache.maven.plugins</groupId>
7171
<artifactId>maven-compiler-plugin</artifactId>
72-
<version>3.8.0</version>
72+
<version>3.12.1</version>
7373
<configuration>
74-
<source>11</source>
75-
<target>11</target>
74+
<source>17</source>
75+
<target>17</target>
7676
</configuration>
7777
<executions>
7878
<execution>
@@ -138,7 +138,7 @@
138138
<plugin>
139139
<groupId>org.apache.maven.plugins</groupId>
140140
<artifactId>maven-javadoc-plugin</artifactId>
141-
<version>3.4.1</version>
141+
<version>3.6.3</version>
142142
<configuration>
143143
<excludePackageNames>me.lemire.integercompression.vector;com.kamikaze.pfordelta:me.lemire.integercompression.benchmarktools</excludePackageNames>
144144
</configuration>

src/main/java/me/lemire/integercompression/BitPacking.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -1690,7 +1690,7 @@ protected static void fastpack9(final int[] in, int inpos,
16901690
}
16911691

16921692
/**
1693-
* Unpack 32 integers
1693+
* Pack without mask 32 integers
16941694
*
16951695
* @param in
16961696
* source array
@@ -3005,7 +3005,7 @@ protected static void fastpackwithoutmask9(final int[] in, int inpos,
30053005
}
30063006

30073007
/**
3008-
* Pack the 32 integers
3008+
* Unpack the 32 integers
30093009
*
30103010
* @param in
30113011
* source array

src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java

+3
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ public void uncompress(int[] inBuf, IntWrapper inPos, int inLen,
134134
* In case you need a different way to allocate buffers, you can override this method
135135
* with a custom behavior. The default implementation allocates a new Java direct
136136
* {@link ByteBuffer} on each invocation.
137+
*
138+
* @param sizeInBytes
139+
* @return
137140
*/
138141
protected ByteBuffer makeBuffer(int sizeInBytes) {
139142
return ByteBuffer.allocateDirect(sizeInBytes);

src/main/java/me/lemire/integercompression/FastPFOR.java

+3
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,9 @@ public String toString() {
336336
* In case you need a different way to allocate buffers, you can override this method
337337
* with a custom behavior. The default implementation allocates a new Java direct
338338
* {@link ByteBuffer} on each invocation.
339+
*
340+
* @param sizeInBytes
341+
* @return
339342
*/
340343
protected ByteBuffer makeBuffer(int sizeInBytes) {
341344
return ByteBuffer.allocateDirect(sizeInBytes);

src/main/java/me/lemire/integercompression/FastPFOR128.java

+3
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,9 @@ public String toString() {
317317
* In case you need a different way to allocate buffers, you can override this method
318318
* with a custom behavior. The default implementation allocates a new Java direct
319319
* {@link ByteBuffer} on each invocation.
320+
*
321+
* @param sizeInBytes
322+
* @return
320323
*/
321324
protected ByteBuffer makeBuffer(int sizeInBytes) {
322325
return ByteBuffer.allocateDirect(sizeInBytes);

src/main/java/me/lemire/integercompression/IntCompressor.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ public IntCompressor() {
3636
* @throws UncompressibleInputException if the data is too poorly compressible
3737
*/
3838
public int[] compress(int[] input) {
39-
int [] compressed = new int[input.length + input.length / 100 + 1024];
39+
int[] compressed = new int[input.length + input.length / 100 + 1024];
40+
// Store at index=0 the length of the input, hence enabling .headlessCompress
4041
compressed[0] = input.length;
4142
IntWrapper outpos = new IntWrapper(1);
4243
try {
@@ -58,6 +59,7 @@ public int[] compress(int[] input) {
5859
* @return uncompressed array
5960
*/
6061
public int[] uncompress(int[] compressed) {
62+
// Read at index=0 the length of the input, hence enabling .headlessUncompress
6163
int[] decompressed = new int[compressed[0]];
6264
IntWrapper inpos = new IntWrapper(1);
6365
codec.headlessUncompress(compressed, inpos,

src/main/java/me/lemire/integercompression/IntegerCODEC.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public interface IntegerCODEC {
2525
* @param in
2626
* input array
2727
* @param inpos
28-
* location in the input array
28+
* where to start reading in the array
2929
* @param inlength
3030
* how many integers to compress
3131
* @param out
@@ -52,7 +52,7 @@ public void compress(int[] in, IntWrapper inpos, int inlength,
5252
* @param out
5353
* array where to write the compressed output
5454
* @param outpos
55-
* where to write the compressed output in out
55+
* where to start writing the uncompressed output in out
5656
*/
5757
public void uncompress(int[] in, IntWrapper inpos, int inlength,
5858
int[] out, IntWrapper outpos);

src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java

+9-6
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
* variation on the IntegerCODEC interface meant to be used for random access
1414
* (i.e., given a large array, you can segment it and decode just the subarray you need).
1515
*
16-
* The main difference is that we must specify the number of integers we wish to
17-
* decode. This information should be stored elsewhere.
16+
* The main difference is that you must specify the number of integers you wish to
17+
* uncompress. This information should be stored elsewhere.
1818
*
1919
* This interface was designed by the Terrier team for their search engine.
2020
*
@@ -30,10 +30,13 @@ public interface SkippableIntegerCODEC {
3030
* inpos will be incremented by 12 while outpos will be incremented by 3. We
3131
* use IntWrapper to pass the values by reference.
3232
*
33+
* Implementation note: contrary to {@link IntegerCODEC#compress},
34+
* this may skip writing information about the number of encoded integers.
35+
*
3336
* @param in
3437
* input array
3538
* @param inpos
36-
* location in the input array
39+
* where to start reading in the array
3740
* @param inlength
3841
* how many integers to compress
3942
* @param out
@@ -57,11 +60,11 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out
5760
* @param inlength
5861
* length of the compressed data (ignored by some schemes)
5962
* @param out
60-
* array where to write the compressed output
63+
* array where to write the uncompressed output
6164
* @param outpos
62-
* where to write the compressed output in out
65+
* where to start writing the uncompressed output in out
6366
* @param num
64-
* number of integers we want to decode, the actual number of integers decoded can be less
67+
* number of integers we want to decode. May be less than the actual number of compressed integers
6568
*/
6669
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
6770
IntWrapper outpos, int num);

src/main/java/me/lemire/integercompression/VariableByte.java

+3
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
214214
* In case you need a different way to allocate buffers, you can override this method
215215
* with a custom behavior. The default implementation allocates a new Java direct
216216
* {@link ByteBuffer} on each invocation.
217+
*
218+
* @param sizeInBytes
219+
* @return
217220
*/
218221
protected ByteBuffer makeBuffer(int sizeInBytes) {
219222
return ByteBuffer.allocateDirect(sizeInBytes);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
package me.lemire.longcompression;
2+
3+
import me.lemire.integercompression.BinaryPacking;
4+
import me.lemire.integercompression.IntWrapper;
5+
import me.lemire.integercompression.Util;
6+
7+
/**
8+
* Scheme based on a commonly used idea: can be extremely fast.
9+
* It encodes integers in blocks of 64 longs. For arrays containing
10+
* an arbitrary number of longs, you should use it in conjunction
11+
* with another CODEC:
12+
*
13+
* <pre>LongCODEC ic =
14+
* new Composition(new LongBinaryPacking(), new LongVariableByte()).</pre>
15+
*
16+
* Note that this does not use differential coding: if you are working on sorted
17+
* lists, you must compute the deltas separately.
18+
*
19+
* <p>
20+
* For details, please see {@link BinaryPacking}
21+
* </p>
22+
*
23+
* @author Benoit Lacelle
24+
*/
25+
public final class LongBinaryPacking implements LongCODEC, SkippableLongCODEC {
26+
final static int BLOCK_SIZE = 64;
27+
28+
@Override
29+
public void compress(long[] in, IntWrapper inpos, int inlength,
30+
long[] out, IntWrapper outpos) {
31+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
32+
if (inlength == 0)
33+
return;
34+
out[outpos.get()] = inlength;
35+
outpos.increment();
36+
headlessCompress(in, inpos, inlength, out, outpos);
37+
}
38+
39+
@Override
40+
public void headlessCompress(long[] in, IntWrapper inpos, int inlength,
41+
long[] out, IntWrapper outpos) {
42+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
43+
int tmpoutpos = outpos.get();
44+
int s = inpos.get();
45+
// Compress by block of 8 * 64 longs as much as possible
46+
for (; s + BLOCK_SIZE * 8 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 8) {
47+
// maxbits can be anything between 0 and 64 included: expressed within a byte (1 << 6)
48+
final int mbits1 = LongUtil.maxbits(in, s + 0 * BLOCK_SIZE, BLOCK_SIZE);
49+
final int mbits2 = LongUtil.maxbits(in, s + 1 * BLOCK_SIZE, BLOCK_SIZE);
50+
final int mbits3 = LongUtil.maxbits(in, s + 2 * BLOCK_SIZE, BLOCK_SIZE);
51+
final int mbits4 = LongUtil.maxbits(in, s + 3 * BLOCK_SIZE, BLOCK_SIZE);
52+
final int mbits5 = LongUtil.maxbits(in, s + 4 * BLOCK_SIZE, BLOCK_SIZE);
53+
final int mbits6 = LongUtil.maxbits(in, s + 5 * BLOCK_SIZE, BLOCK_SIZE);
54+
final int mbits7 = LongUtil.maxbits(in, s + 6 * BLOCK_SIZE, BLOCK_SIZE);
55+
final int mbits8 = LongUtil.maxbits(in, s + 7 * BLOCK_SIZE, BLOCK_SIZE);
56+
// The first long expressed the maxbits for the 8 buckets
57+
out[tmpoutpos++] = ((long) mbits1 << 56) | ((long) mbits2 << 48) | ((long) mbits3 << 40) | ((long) mbits4 << 32) | (mbits5 << 24) | (mbits6 << 16) | (mbits7 << 8) | (mbits8);
58+
LongBitPacking.fastpackwithoutmask(in, s + 0 * BLOCK_SIZE, out, tmpoutpos, (int) mbits1);
59+
tmpoutpos += mbits1;
60+
LongBitPacking.fastpackwithoutmask(in, s + 1 * BLOCK_SIZE, out, tmpoutpos, (int) mbits2);
61+
tmpoutpos += mbits2;
62+
LongBitPacking.fastpackwithoutmask(in, s + 2 * BLOCK_SIZE, out, tmpoutpos, (int) mbits3);
63+
tmpoutpos += mbits3;
64+
LongBitPacking.fastpackwithoutmask(in, s + 3 * BLOCK_SIZE, out, tmpoutpos, (int) mbits4);
65+
tmpoutpos += mbits4;
66+
LongBitPacking.fastpackwithoutmask(in, s + 4 * BLOCK_SIZE, out, tmpoutpos, (int) mbits5);
67+
tmpoutpos += mbits5;
68+
LongBitPacking.fastpackwithoutmask(in, s + 5 * BLOCK_SIZE, out, tmpoutpos, (int) mbits6);
69+
tmpoutpos += mbits6;
70+
LongBitPacking.fastpackwithoutmask(in, s + 6 * BLOCK_SIZE, out, tmpoutpos, (int) mbits7);
71+
tmpoutpos += mbits7;
72+
LongBitPacking.fastpackwithoutmask(in, s + 7 * BLOCK_SIZE, out, tmpoutpos, (int) mbits8);
73+
tmpoutpos += mbits8;
74+
}
75+
// Then we compress up to 7 blocks of 64 longs
76+
for (; s < inpos.get() + inlength; s += BLOCK_SIZE ) {
77+
final int mbits = LongUtil.maxbits(in, s, BLOCK_SIZE);
78+
out[tmpoutpos++] = mbits;
79+
LongBitPacking.fastpackwithoutmask(in, s, out, tmpoutpos, mbits);
80+
tmpoutpos += mbits;
81+
}
82+
inpos.add(inlength);
83+
outpos.set(tmpoutpos);
84+
}
85+
86+
@Override
87+
public void uncompress(long[] in, IntWrapper inpos, int inlength,
88+
long[] out, IntWrapper outpos) {
89+
if (inlength == 0)
90+
return;
91+
final int outlength = (int) in[inpos.get()];
92+
inpos.increment();
93+
headlessUncompress(in,inpos, inlength,out,outpos,outlength);
94+
}
95+
96+
@Override
97+
public void headlessUncompress(long[] in, IntWrapper inpos, int inlength,
98+
long[] out, IntWrapper outpos, int num) {
99+
final int outlength = Util.greatestMultiple(num, BLOCK_SIZE);
100+
int tmpinpos = inpos.get();
101+
int s = outpos.get();
102+
for (; s + BLOCK_SIZE * 8 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 8) {
103+
final int mbits1 = (int) ((in[tmpinpos] >>> 56));
104+
final int mbits2 = (int) ((in[tmpinpos] >>> 48) & 0xFF);
105+
final int mbits3 = (int) ((in[tmpinpos] >>> 40) & 0xFF);
106+
final int mbits4 = (int) ((in[tmpinpos] >>> 32) & 0xFF);
107+
final int mbits5 = (int) ((in[tmpinpos] >>> 24) & 0xFF);
108+
final int mbits6 = (int) ((in[tmpinpos] >>> 16) & 0xFF);
109+
final int mbits7 = (int) ((in[tmpinpos] >>> 8) & 0xFF);
110+
final int mbits8 = (int) ((in[tmpinpos]) & 0xFF);
111+
++tmpinpos;
112+
LongBitPacking.fastunpack(in, tmpinpos, out, s + 0 * BLOCK_SIZE, mbits1);
113+
tmpinpos += mbits1;
114+
LongBitPacking.fastunpack(in, tmpinpos, out, s + 1 * BLOCK_SIZE, mbits2);
115+
tmpinpos += mbits2;
116+
LongBitPacking.fastunpack(in, tmpinpos, out, s + 2 * BLOCK_SIZE, mbits3);
117+
tmpinpos += mbits3;
118+
LongBitPacking.fastunpack(in, tmpinpos, out, s + 3 * BLOCK_SIZE, mbits4);
119+
tmpinpos += mbits4;
120+
LongBitPacking.fastunpack(in, tmpinpos, out, s + 4 * BLOCK_SIZE, mbits5);
121+
tmpinpos += mbits5;
122+
LongBitPacking.fastunpack(in, tmpinpos, out, s + 5 * BLOCK_SIZE, mbits6);
123+
tmpinpos += mbits6;
124+
LongBitPacking.fastunpack(in, tmpinpos, out, s + 6 * BLOCK_SIZE, mbits7);
125+
tmpinpos += mbits7;
126+
LongBitPacking.fastunpack(in, tmpinpos, out, s + 7 * BLOCK_SIZE, mbits8);
127+
tmpinpos += mbits8;
128+
}
129+
for (; s < outpos.get() + outlength; s += BLOCK_SIZE ) {
130+
final int mbits = (int) in[tmpinpos];
131+
++tmpinpos;
132+
LongBitPacking.fastunpack(in, tmpinpos, out, s, mbits);
133+
tmpinpos += mbits;
134+
}
135+
outpos.add(outlength);
136+
inpos.set(tmpinpos);
137+
}
138+
139+
@Override
140+
public String toString() {
141+
return this.getClass().getSimpleName();
142+
}
143+
}

0 commit comments

Comments
 (0)