From 37541a4ef897065a46f91f9f00c7bc9d0f9af710 Mon Sep 17 00:00:00 2001 From: Emmanuel Bourg <ebourg@apache.org> Date: Mon, 1 Feb 2021 11:29:32 +0100 Subject: [PATCH] New upstream version 1.1.6 --- CHANGELOG | 6 +- README.md | 16 +- pom.xml | 12 +- .../googlecode/javaewah/FastAggregation.java | 161 +----------------- .../javaewah/datastructure/BitSet.java | 4 +- .../javaewah32/FastAggregation32.java | 160 +---------------- .../javaewah/EWAHCompressedBitmapTest.java | 101 ++++++++++- .../IntIteratorOverIteratingRLWTest.java | 59 ++++++- .../javaewah/IteratorAggregationTest.java | 2 +- .../javaewah/datastructure/BitSetTest.java | 73 ++++++++ .../EWAHCompressedBitmap32Test.java | 101 ++++++++++- .../IntIteratorOverIteratingRLW32Test.java | 56 +++++- .../javaewah32/IteratorAggregation32Test.java | 2 +- 13 files changed, 423 insertions(+), 330 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index cdea644..0f36985 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,9 @@ -upcoming +version 1.1.6 (April 26th 2016) + - Mostly just better testing + +version 1.1.5 (January 8th 2016) - faster shift function (gssiyankai) + - ChunkIterator not iterating correctly #61 version 1.1.4 (December 17th 2015) - Fixed issue 60: bitmap shift then or diff --git a/README.md b/README.md index 47bdfac..dfe8fca 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ JavaEWAH [![][maven img]][maven] [![][license img]][license] [![docs-badge][]][docs] +[](https://coveralls.io/r/lemire/javaewah?branch=master) (c) 2009-2016 @@ -148,6 +149,8 @@ see Section 3 of the following paper: Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. http://arxiv.org/abs/0901.3751 + + (The PDF file is freely available on the arXiv site.) Benchmark @@ -318,10 +321,11 @@ follows: */ boolean is64set = b.get(64); ``` + API Documentation ----------------- -http://lemire.me/docs/javaewah/ +http://www.javadoc.io/doc/com.googlecode.javaewah/JavaEWAH/ @@ -332,16 +336,10 @@ Further reading Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned bitmap indexes, Data & Knowledge Engineering 69 (1), 2010. http://arxiv.org/abs/0901.3751 -Owen Kaser and Daniel Lemire, Compressed bitmap indexes: beyond unions and intersections, Software: Practice and Experience, 2014. +Owen Kaser and Daniel Lemire, Compressed bitmap indexes: beyond unions and intersections, Software: Practice and Experience 46 (2), 2016. http://arxiv.org/abs/1402.4466 -Help needed ------------- - -JavaEWAH has been used in production for many years. However, we still need help writing more tests. - -[](https://coveralls.io/r/lemire/javaewah?branch=master) Acknowledgement --------------- @@ -358,4 +356,4 @@ This work was supported by NSERC grant number 26143. [docs-badge]:https://img.shields.io/badge/API-docs-blue.svg?style=flat-square -[docs]:http://lemire.me/docs/javaewah/ +[docs]:http://www.javadoc.io/doc/com.googlecode.javaewah/JavaEWAH/ diff --git a/pom.xml b/pom.xml index 0c80147..c14d4b4 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ <modelVersion>4.0.0</modelVersion> <groupId>com.googlecode.javaewah</groupId> <artifactId>JavaEWAH</artifactId> - <version>1.1.5</version> + <version>1.1.6</version> <packaging>bundle</packaging> <properties> <maven.compiler.source>1.6</maven.compiler.source> @@ -60,6 +60,16 @@ </parent> <build> <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.19.1</version> + <configuration> + <forkCount>3</forkCount> + <reuseForks>true</reuseForks> + <argLine>-Xmx1024m ${argLine}</argLine> + </configuration> + </plugin> <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>animal-sniffer-maven-plugin</artifactId> diff --git a/src/main/java/com/googlecode/javaewah/FastAggregation.java b/src/main/java/com/googlecode/javaewah/FastAggregation.java index 21b7f51..df64aea 100644 --- a/src/main/java/com/googlecode/javaewah/FastAggregation.java +++ b/src/main/java/com/googlecode/javaewah/FastAggregation.java @@ -26,6 +26,9 @@ public final class FastAggregation { /** * Compute the and aggregate using a temporary uncompressed bitmap. * + * This function does not seek to match the "sizeinbits" attributes + * of the input bitmaps. + * * @param bitmaps the source bitmaps * @param bufSize buffer size used during the computation in 64-bit * words (per input bitmap) @@ -40,6 +43,9 @@ public final class FastAggregation { /** * Compute the and aggregate using a temporary uncompressed bitmap. + * + * This function does not seek to match the "sizeinbits" attributes + * of the input bitmaps. * * @param container where the aggregate is written * @param bufSize buffer size used during the computation in 64-bit @@ -444,159 +450,4 @@ public final class FastAggregation { pq.poll().xorToContainer(pq.poll(), container); } - /** - * For internal use. Computes the bitwise or of the provided bitmaps and - * stores the result in the container. (This used to be the default.) - * - * @param container where store the result - * @param bitmaps to be aggregated - * @since 0.4.0 - * @deprecated use EWAHCompressedBitmap.or instead - */ - @Deprecated - public static void legacy_orWithContainer( - final BitmapStorage container, - final EWAHCompressedBitmap... bitmaps) { - if (bitmaps.length == 2) { - // should be more efficient - bitmaps[0].orToContainer(bitmaps[1], container); - return; - } - - // Sort the bitmaps in descending order by sizeInBits. We will - // exhaust the - // sorted bitmaps from right to left. - final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); - Arrays.sort(sortedBitmaps, - new Comparator<EWAHCompressedBitmap>() { - @Override - public int compare(EWAHCompressedBitmap a, - EWAHCompressedBitmap b) { - return a.sizeInBits() < b.sizeInBits() ? 1 : a.sizeInBits() == b.sizeInBits() ? 0 : -1; - } - } - ); - - final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; - int maxAvailablePos = 0; - for (EWAHCompressedBitmap bitmap : sortedBitmaps) { - EWAHIterator iterator = bitmap.getEWAHIterator(); - if (iterator.hasNext()) { - rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord(iterator); - } - } - - if (maxAvailablePos == 0) { // this never happens... - container.setSizeInBitsWithinLastWord(0); - return; - } - - int maxSize = sortedBitmaps[0].sizeInBits(); - - while (true) { - long maxOneRl = 0; - long minZeroRl = Long.MAX_VALUE; - long minSize = Long.MAX_VALUE; - int numEmptyRl = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - long size = rlw.size(); - if (size == 0) { - maxAvailablePos = i; - break; - } - minSize = Math.min(minSize, size); - - if (rlw.getRunningBit()) { - long rl = rlw.getRunningLength(); - maxOneRl = Math.max(maxOneRl, rl); - minZeroRl = 0; - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } else { - long rl = rlw.getRunningLength(); - minZeroRl = Math.min(minZeroRl, rl); - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } - } - - if (maxAvailablePos == 0) { - break; - } else if (maxAvailablePos == 1) { - // only one bitmap is left so just write the - // rest of it out - rlws[0].discharge(container); - break; - } - - if (maxOneRl > 0) { - container.addStreamOfEmptyWords(true, maxOneRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(maxOneRl); - } - } else if (minZeroRl > 0) { - container.addStreamOfEmptyWords(false, - minZeroRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(minZeroRl); - } - } else { - int index = 0; - - if (numEmptyRl == 1) { - // if one rlw has literal words to - // process and the rest have a run of - // 0's we can write them out here - IteratingBufferedRunningLengthWord emptyRl = null; - long minNonEmptyRl = Long.MAX_VALUE; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - long rl = rlw - .getRunningLength(); - if (rl == 0) { - assert emptyRl == null; - emptyRl = rlw; - } else { - minNonEmptyRl = Math - .min(minNonEmptyRl, - rl); - } - } - long wordsToWrite = minNonEmptyRl > minSize ? minSize - : minNonEmptyRl; - if (emptyRl != null) - emptyRl.writeLiteralWords( - (int) wordsToWrite, - container); - index += wordsToWrite; - } - - while (index < minSize) { - long word = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - if (rlw.getRunningLength() <= index) { - word |= rlw - .getLiteralWordAt(index - - (int) rlw - .getRunningLength()); - } - } - container.addWord(word); - index++; - } - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(minSize); - } - } - } - container.setSizeInBitsWithinLastWord(maxSize); - } - } diff --git a/src/main/java/com/googlecode/javaewah/datastructure/BitSet.java b/src/main/java/com/googlecode/javaewah/datastructure/BitSet.java index 22a15f7..efccf07 100644 --- a/src/main/java/com/googlecode/javaewah/datastructure/BitSet.java +++ b/src/main/java/com/googlecode/javaewah/datastructure/BitSet.java @@ -321,7 +321,7 @@ public class BitSet implements Cloneable, Iterable<Integer>, Externalizable ,Wor if (x >= this.getNumberOfWords()) return -1; long w = this.data[x]; - w >>>= (i % 64); + w >>>= i; if (w != 0) { return i + Long.numberOfTrailingZeros(w); } @@ -348,7 +348,7 @@ public class BitSet implements Cloneable, Iterable<Integer>, Externalizable ,Wor if (x >= this.getNumberOfWords()) return -1; long w = ~this.data[x]; - w >>>= (i % 64); + w >>>= i; if (w != 0) { return i + Long.numberOfTrailingZeros(w); } diff --git a/src/main/java/com/googlecode/javaewah32/FastAggregation32.java b/src/main/java/com/googlecode/javaewah32/FastAggregation32.java index 5d27d28..d260490 100644 --- a/src/main/java/com/googlecode/javaewah32/FastAggregation32.java +++ b/src/main/java/com/googlecode/javaewah32/FastAggregation32.java @@ -26,6 +26,9 @@ public final class FastAggregation32 { /** * Compute the and aggregate using a temporary uncompressed bitmap. * + * This function does not seek to match the "sizeinbits" attributes + * of the input bitmaps. + * * @param bitmaps the source bitmaps * @param bufSize buffer size used during the computation in 64-bit * words (per input bitmap) @@ -41,6 +44,9 @@ public final class FastAggregation32 { /** * Compute the and aggregate using a temporary uncompressed bitmap. * + * This function does not seek to match the "sizeinbits" attributes + * of the input bitmaps. + * * @param container where the aggregate is written * @param bufSize buffer size used during the computation in 64-bit * words (per input bitmap) @@ -385,159 +391,5 @@ public final class FastAggregation32 { pq.poll().xorToContainer(pq.poll(), container); } - /** - * For internal use. Computes the bitwise or of the provided bitmaps and - * stores the result in the container. (This used to be the default.) - * - * @param container where store the result - * @param bitmaps to be aggregated - * @since 0.4.0 - * @deprecated use EWAHCompressedBitmap32.or instead - */ - @Deprecated - public static void legacy_orWithContainer( - final BitmapStorage32 container, - final EWAHCompressedBitmap32... bitmaps) { - if (bitmaps.length == 2) { - // should be more efficient - bitmaps[0].orToContainer(bitmaps[1], container); - return; - } - - // Sort the bitmaps in descending order by sizeInBits. We will - // exhaust the - // sorted bitmaps from right to left. - final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); - Arrays.sort(sortedBitmaps, - new Comparator<EWAHCompressedBitmap32>() { - @Override - public int compare(EWAHCompressedBitmap32 a, - EWAHCompressedBitmap32 b) { - return a.sizeInBits() < b.sizeInBits() ? 1 - : a.sizeInBits() == b.sizeInBits() ? 0 - : -1; - } - } - ); - - final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; - int maxAvailablePos = 0; - for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) { - EWAHIterator32 iterator = bitmap.getEWAHIterator(); - if (iterator.hasNext()) { - rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32( - iterator); - } - } - - if (maxAvailablePos == 0) { // this never happens... - container.setSizeInBitsWithinLastWord(0); - return; - } - - int maxSize = sortedBitmaps[0].sizeInBits(); - - while (true) { - int maxOneRl = 0; - int minZeroRl = Integer.MAX_VALUE; - int minSize = Integer.MAX_VALUE; - int numEmptyRl = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - int size = rlw.size(); - if (size == 0) { - maxAvailablePos = i; - break; - } - minSize = Math.min(minSize, size); - - if (rlw.getRunningBit()) { - int rl = rlw.getRunningLength(); - maxOneRl = Math.max(maxOneRl, rl); - minZeroRl = 0; - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } else { - int rl = rlw.getRunningLength(); - minZeroRl = Math.min(minZeroRl, rl); - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } - } - - if (maxAvailablePos == 0) { - break; - } else if (maxAvailablePos == 1) { - // only one bitmap is left so just write the - // rest of it out - rlws[0].discharge(container); - break; - } - - if (maxOneRl > 0) { - container.addStreamOfEmptyWords(true, maxOneRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(maxOneRl); - } - } else if (minZeroRl > 0) { - container.addStreamOfEmptyWords(false, - minZeroRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(minZeroRl); - } - } else { - int index = 0; - - if (numEmptyRl == 1) { - // if one rlw has literal words to - // process and the rest have a run of - // 0's we can write them out here - IteratingBufferedRunningLengthWord32 emptyRl = null; - int minNonEmptyRl = Integer.MAX_VALUE; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - int rl = rlw.getRunningLength(); - if (rl == 0) { - assert emptyRl == null; - emptyRl = rlw; - } else { - minNonEmptyRl = Math - .min(minNonEmptyRl, - rl); - } - } - int wordsToWrite = minNonEmptyRl > minSize ? minSize - : minNonEmptyRl; - if (emptyRl != null) - emptyRl.writeLiteralWords( - wordsToWrite, container); - index += wordsToWrite; - } - - while (index < minSize) { - int word = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - if (rlw.getRunningLength() <= index) { - word |= rlw - .getLiteralWordAt(index - - rlw.getRunningLength()); - } - } - container.addWord(word); - index++; - } - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(minSize); - } - } - } - container.setSizeInBitsWithinLastWord(maxSize); - } } diff --git a/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java b/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java index 70c737e..cec96f3 100644 --- a/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java +++ b/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java @@ -21,6 +21,16 @@ import static com.googlecode.javaewah.EWAHCompressedBitmap.WORD_IN_BITS; */ @SuppressWarnings("javadoc") public class EWAHCompressedBitmapTest { + + @Test + public void swaptest() { + EWAHCompressedBitmap x = EWAHCompressedBitmap.bitmapOf(1,2,3); + EWAHCompressedBitmap y = EWAHCompressedBitmap.bitmapOf(1,2,3,4); + x.swap(y); + Assert.assertEquals(x.cardinality(),4); + Assert.assertEquals(y.cardinality(),3); + } + @Test public void shiftByWordSizeBits() { @@ -1267,9 +1277,98 @@ public class EWAHCompressedBitmapTest { toBeOred); Assert.assertEquals(rightcard, e2.cardinality()); EWAHCompressedBitmap foo = new EWAHCompressedBitmap(); - FastAggregation.legacy_orWithContainer(foo, toBeOred); + FastAggregation.orToContainer(foo, toBeOred); Assert.assertEquals(rightcard, foo.cardinality()); } + + public static Iterator toIterator(final EWAHCompressedBitmap[] bitmaps) { + return new Iterator() { + int k = 0; + + @Override + public boolean hasNext() { + return k < bitmaps.length; + } + + @Override + public Object next() { + return bitmaps[k++]; + } + + @Override + public void remove() { + // nothing + } + }; + } + + @Test + public void fastand() { + int[][] data = { {5, 6, 7, 8, 9}, {1, 5}, {2, 5}}; + + EWAHCompressedBitmap[] bitmaps = new EWAHCompressedBitmap[data.length]; + + for (int i = 0; i < bitmaps.length; ++i) { + bitmaps[i] = new EWAHCompressedBitmap(); + for (int j : data[i]) { + bitmaps[i].set(j); + } + bitmaps[i].setSizeInBits(1000, false); + } + EWAHCompressedBitmap and1 = FastAggregation.bufferedand(1024, bitmaps[0],bitmaps[1],bitmaps[2]); + EWAHCompressedBitmap and2 = new EWAHCompressedBitmap(); + FastAggregation.bufferedandWithContainer(and2, 32, bitmaps[0],bitmaps[1],bitmaps[2]); + EWAHCompressedBitmap and3 = EWAHCompressedBitmap.and(bitmaps[0],bitmaps[1],bitmaps[2]); + System.out.println(and1.sizeInBits()); + System.out.println(and2.sizeInBits()); + System.out.println(and3.sizeInBits()); + assertEqualsPositions(and1, and2); + assertEqualsPositions(and2, and3); + } + + + @Test + public void fastagg() { + int[][] data = {{}, {5, 6, 7, 8, 9}, {1}, {2}}; + + EWAHCompressedBitmap[] bitmaps = new EWAHCompressedBitmap[data.length]; + + for (int i = 0; i < bitmaps.length; ++i) { + bitmaps[i] = new EWAHCompressedBitmap(); + for (int j : data[i]) { + bitmaps[i].set(j); + } + bitmaps[i].setSizeInBits(1000, false); + } + + EWAHCompressedBitmap or1 = FastAggregation.bufferedor(1024, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap or2 = FastAggregation.or(bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap or3 = FastAggregation.bufferedor(1024, bitmaps); + EWAHCompressedBitmap or4 = FastAggregation.or(bitmaps); + EWAHCompressedBitmap or5 = FastAggregation.or(toIterator(bitmaps)); + EWAHCompressedBitmap or6 = new EWAHCompressedBitmap(); + FastAggregation.orToContainer(or6, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + + assertEquals(or1,or2); + assertEquals(or2,or3); + assertEquals(or3,or4); + assertEquals(or4,or5); + assertEquals(or5,or6); + + EWAHCompressedBitmap xor1 = FastAggregation.bufferedxor(1024, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap xor2 = FastAggregation.xor(bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap xor3 = FastAggregation.bufferedxor(1024, bitmaps); + EWAHCompressedBitmap xor4 = FastAggregation.xor(bitmaps); + EWAHCompressedBitmap xor5 = FastAggregation.xor(toIterator(bitmaps)); + EWAHCompressedBitmap xor6 = new EWAHCompressedBitmap(); + FastAggregation.orToContainer(xor6, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + + assertEquals(xor1,xor2); + assertEquals(xor2,xor3); + assertEquals(xor3,xor4); + assertEquals(xor4,xor5); + assertEquals(xor5,xor6); + } @Test public void testSizeInBitsWithAnd() { diff --git a/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java b/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java index 06bcebf..eac7ee8 100644 --- a/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java +++ b/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java @@ -2,17 +2,49 @@ package com.googlecode.javaewah; import org.junit.Test; + import static org.junit.Assert.*; /* * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ +import java.util.Iterator; + /** * Tests for utility class. */ @SuppressWarnings("javadoc") public class IntIteratorOverIteratingRLWTest { + + @Test + public void iteratorAggregation() { + EWAHCompressedBitmap e1 = EWAHCompressedBitmap.bitmapOf(0, 2, 1000, 10001); + EWAHCompressedBitmap e2 = new EWAHCompressedBitmap(); + for (int k = 64; k < 450; ++k) + e2.set(k); + EWAHCompressedBitmap e3 = new EWAHCompressedBitmap(); + for (int k = 64; k < 450; ++k) + e2.set(400 * k); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedand(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedand(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedor(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedor(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedxor(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedxor(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedand(500, e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedand(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedor(500, e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedor(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedxor(500, e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedxor(1024, e1, e2, e3)); + } @Test // had problems with bitmaps beginning with two consecutive clean runs @@ -22,8 +54,8 @@ public class IntIteratorOverIteratingRLWTest { EWAHCompressedBitmap e = new EWAHCompressedBitmap(); for (int i = 64; i < 128; ++i) e.set(i); - IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( - e.getIteratingRLW()); + + IntIterator ii = IteratorUtil.toSetBitsIntIterator(e.getIteratingRLW()); assertTrue(ii.hasNext()); int ctr = 0; while (ii.hasNext()) { @@ -31,6 +63,29 @@ public class IntIteratorOverIteratingRLWTest { ii.next(); } assertEquals(64, ctr); + Iterator iii = IteratorUtil.toSetBitsIterator(e.getIteratingRLW()); + assertTrue(iii.hasNext()); + ctr = 0; + while (iii.hasNext()) { + ++ctr; + iii.next(); + } + assertEquals(64, ctr); + + } + + @Test + public void testMaterialize() { + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + for (int i = 64; i < 128; ++i) + e.set(333 * i); + assertEquals(e.cardinality(), IteratorUtil.cardinality(e.getIteratingRLW())); + EWAHCompressedBitmap newe = new EWAHCompressedBitmap(); + IteratorUtil.materialize(e.getIteratingRLW(), newe); + assertEquals(e,newe); + newe.clear(); + IteratorUtil.materialize(e.getIteratingRLW(), newe,4096); + assertEquals(e,newe); } @Test diff --git a/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java b/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java index de96733..32bdebf 100644 --- a/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java +++ b/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java @@ -127,7 +127,7 @@ public class IteratorAggregationTest { EWAHCompressedBitmap tanswer = EWAHCompressedBitmap .or(x); EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - FastAggregation.legacy_orWithContainer(container, x); + FastAggregation.orToContainer(container, x); assertTrue(container.equals(tanswer)); EWAHCompressedBitmap x1 = IteratorUtil .materialize(IteratorAggregation diff --git a/src/test/java/com/googlecode/javaewah/datastructure/BitSetTest.java b/src/test/java/com/googlecode/javaewah/datastructure/BitSetTest.java index 7af8d34..9041f21 100644 --- a/src/test/java/com/googlecode/javaewah/datastructure/BitSetTest.java +++ b/src/test/java/com/googlecode/javaewah/datastructure/BitSetTest.java @@ -1,5 +1,7 @@ package com.googlecode.javaewah.datastructure; +import static org.junit.Assert.*; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; @@ -15,28 +17,83 @@ import junit.framework.Assert; import org.junit.Test; +import com.googlecode.javaewah.IntIterator; + public class BitSetTest { + + + public static ImmutableBitSet toImmutableBitSet(BitSet b) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + b.serialize(new DataOutputStream(bos)); + ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray()); + ImmutableBitSet rmap = new ImmutableBitSet(bb.asLongBuffer()); + System.out.println("bitmap 1 (mapped) : " + rmap); + if (!rmap.equals(b)) + throw new RuntimeException("Will not happen"); + return rmap; + } + @Test + public void simpleImmuExample() throws IOException { + ImmutableBitSet Bitmap1 = toImmutableBitSet(BitSet.bitmapOf(0, 2, 55, 64, 512)); + ImmutableBitSet Bitmap2 = toImmutableBitSet(BitSet.bitmapOf(1, 3, 64, 512)); + System.out.println("bitmap 1: " + Bitmap1); + System.out.println("bitmap 2: " + Bitmap2); + assertEquals(Bitmap1.cardinality(),5); + assertEquals(Bitmap2.cardinality(),4); + assertFalse(Bitmap1.hashCode()==Bitmap2.hashCode()); + IntIterator is = Bitmap1.intIterator(); + int c1 = 0; + while(is.hasNext()) { + c1++; + is.next(); + } + assertEquals(Bitmap1.cardinality(),c1); + + IntIterator iu = Bitmap1.unsetIntIterator(); + int c2 = 0; + while(iu.hasNext()) { + c2++; + iu.next(); + } + assertEquals(Bitmap1.getNumberOfWords() * 64 - Bitmap1.cardinality(),c2); + } @Test public void simpleExample() throws IOException { BitSet Bitmap1 = BitSet.bitmapOf(0, 2, 55, 64, 512); BitSet Bitmap2 = BitSet.bitmapOf(1, 3, 64, 512); + Bitmap1.trim(); + Bitmap2.trim(); + assertTrue(Bitmap1.intersects(Bitmap2)); + assertFalse(Bitmap1.hashCode()==Bitmap2.hashCode()); System.out.println("bitmap 1: " + Bitmap1); System.out.println("bitmap 2: " + Bitmap2); // or BitSet orbitmap = Bitmap1.clone(); + int orcard = Bitmap1.orcardinality(Bitmap2); orbitmap.or(Bitmap2); + assertEquals(orbitmap.cardinality(),orcard); System.out.println("bitmap 1 OR bitmap 2: " + orbitmap); // and BitSet andbitmap = Bitmap1.clone(); + int andcard = Bitmap1.andcardinality(Bitmap2); andbitmap.and(Bitmap2); + assertEquals(andbitmap.cardinality(),andcard); System.out.println("bitmap 1 AND bitmap 2: " + andbitmap); // xor BitSet xorbitmap = Bitmap1.clone(); + int xorcard = Bitmap1.xorcardinality(Bitmap2); xorbitmap.xor(Bitmap2); + assertEquals(xorbitmap.cardinality(),xorcard); System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap); + BitSet andnotbitmap = Bitmap1.clone(); + int andnotcard = Bitmap1.andNotcardinality(Bitmap2); + andnotbitmap.andNot(Bitmap2); + assertEquals(andnotbitmap.cardinality(),andnotcard); + System.out.println("bitmap 1 ANDNOT bitmap 2:" + andnotbitmap); + // serialization ByteArrayOutputStream bos = new ByteArrayOutputStream(); // Note: you could use a file output steam instead of ByteArrayOutputStream @@ -57,7 +114,23 @@ public class BitSetTest if (!rmap.equals(Bitmap1)) throw new RuntimeException("Will not happen"); + IntIterator is = Bitmap1.intIterator(); + int c1 = 0; + while(is.hasNext()) { + c1++; + is.next(); + } + assertEquals(Bitmap1.cardinality(),c1); + IntIterator iu = Bitmap1.unsetIntIterator(); + int c2 = 0; + while(iu.hasNext()) { + c2++; + iu.next(); + } + assertEquals(Bitmap1.getNumberOfWords() * 64 - Bitmap1.cardinality(),c2); + Bitmap1.clear(); + assertEquals(Bitmap1.cardinality(),0); } @Test diff --git a/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java b/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java index 951d1ef..23d4df3 100644 --- a/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java +++ b/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java @@ -6,6 +6,7 @@ package com.googlecode.javaewah32; */ import com.googlecode.javaewah.ChunkIterator; +import com.googlecode.javaewah.EWAHCompressedBitmap; import com.googlecode.javaewah.FastAggregation; import com.googlecode.javaewah.IntIterator; import org.junit.Assert; @@ -23,6 +24,15 @@ import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; */ @SuppressWarnings("javadoc") public class EWAHCompressedBitmap32Test { + + @Test + public void swaptest() { + EWAHCompressedBitmap32 x = EWAHCompressedBitmap32.bitmapOf(1,2,3); + EWAHCompressedBitmap32 y = EWAHCompressedBitmap32.bitmapOf(1,2,3,4); + x.swap(y); + Assert.assertEquals(x.cardinality(),4); + Assert.assertEquals(y.cardinality(),3); + } @Test public void shiftByWordSizeBits() { @@ -1188,6 +1198,95 @@ public class EWAHCompressedBitmap32Test { } } + public static Iterator toIterator(final EWAHCompressedBitmap32[] bitmaps) { + return new Iterator() { + int k = 0; + + @Override + public boolean hasNext() { + return k < bitmaps.length; + } + + @Override + public Object next() { + return bitmaps[k++]; + } + + @Override + public void remove() { + // nothing + } + }; + } + + @Test + public void fastand() { + int[][] data = { {5, 6, 7, 8, 9}, {1, 5}, {2, 5}}; + + EWAHCompressedBitmap32[] bitmaps = new EWAHCompressedBitmap32[data.length]; + + for (int i = 0; i < bitmaps.length; ++i) { + bitmaps[i] = new EWAHCompressedBitmap32(); + for (int j : data[i]) { + bitmaps[i].set(j); + } + bitmaps[i].setSizeInBits(1000, false); + } + EWAHCompressedBitmap32 and1 = FastAggregation32.bufferedand(1024, bitmaps[0],bitmaps[1],bitmaps[2]); + EWAHCompressedBitmap32 and2 = new EWAHCompressedBitmap32(); + FastAggregation32.bufferedandWithContainer(and2, 32, bitmaps[0],bitmaps[1],bitmaps[2]); + EWAHCompressedBitmap32 and3 = EWAHCompressedBitmap32.and(bitmaps[0],bitmaps[1],bitmaps[2]); + System.out.println(and1.sizeInBits()); + System.out.println(and2.sizeInBits()); + System.out.println(and3.sizeInBits()); + assertEqualsPositions(and1, and2); + assertEqualsPositions(and2, and3); + } + + + @Test + public void fastagg() { + int[][] data = {{}, {5, 6, 7, 8, 9}, {1}, {2}}; + + EWAHCompressedBitmap32[] bitmaps = new EWAHCompressedBitmap32[data.length]; + + for (int i = 0; i < bitmaps.length; ++i) { + bitmaps[i] = new EWAHCompressedBitmap32(); + for (int j : data[i]) { + bitmaps[i].set(j); + } + bitmaps[i].setSizeInBits(1000, false); + } + EWAHCompressedBitmap32 or1 = FastAggregation32.bufferedor(1024, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap32 or2 = FastAggregation32.or(bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap32 or3 = FastAggregation32.bufferedor(1024, bitmaps); + EWAHCompressedBitmap32 or4 = FastAggregation32.or(bitmaps); + EWAHCompressedBitmap32 or5 = FastAggregation32.or(toIterator(bitmaps)); + EWAHCompressedBitmap32 or6 = new EWAHCompressedBitmap32(); + FastAggregation32.orToContainer(or6, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + + assertEquals(or1,or2); + assertEquals(or2,or3); + assertEquals(or3,or4); + assertEquals(or4,or5); + assertEquals(or5,or6); + + EWAHCompressedBitmap32 xor1 = FastAggregation32.bufferedxor(1024, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap32 xor2 = FastAggregation32.xor(bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap32 xor3 = FastAggregation32.bufferedxor(1024, bitmaps); + EWAHCompressedBitmap32 xor4 = FastAggregation32.xor(bitmaps); + EWAHCompressedBitmap32 xor5 = FastAggregation32.xor(toIterator(bitmaps)); + EWAHCompressedBitmap32 xor6 = new EWAHCompressedBitmap32(); + FastAggregation32.xorToContainer(xor6, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + + assertEquals(xor1,xor2); + assertEquals(xor2,xor3); + assertEquals(xor3,xor4); + assertEquals(xor4,xor5); + assertEquals(xor5,xor6); + } + + @SuppressWarnings({"deprecation", "boxing"}) @Test public void OKaserBugReportJuly2013() { @@ -1211,7 +1310,7 @@ public class EWAHCompressedBitmap32Test { long rightcard = bruteForceAnswer.size(); EWAHCompressedBitmap32 foo = new EWAHCompressedBitmap32(); - FastAggregation32.legacy_orWithContainer(foo, toBeOred); + FastAggregation32.orToContainer(foo, toBeOred); Assert.assertEquals(rightcard, foo.cardinality()); EWAHCompressedBitmap32 e1 = FastAggregation.or(toBeOred); Assert.assertEquals(rightcard, e1.cardinality()); diff --git a/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32Test.java b/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32Test.java index 1a2355b..c8d572b 100644 --- a/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32Test.java +++ b/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32Test.java @@ -2,8 +2,12 @@ package com.googlecode.javaewah32; import org.junit.Test; +import com.googlecode.javaewah.IntIterator; + import static org.junit.Assert.*; +import java.util.Iterator; + /* * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. @@ -14,7 +18,31 @@ import static org.junit.Assert.*; */ @SuppressWarnings("javadoc") public class IntIteratorOverIteratingRLW32Test { - + @Test + public void iteratorAggregation() { + EWAHCompressedBitmap32 e1 = EWAHCompressedBitmap32.bitmapOf(0, 2, 1000, 10001); + EWAHCompressedBitmap32 e2 = new EWAHCompressedBitmap32(); + for (int k = 64; k < 450; ++k) + e2.set(k); + EWAHCompressedBitmap32 e3 = new EWAHCompressedBitmap32(); + for (int k = 64; k < 450; ++k) + e2.set(400 * k); + assertEquals(IteratorUtil32.materialize( + IteratorAggregation32.bufferedand(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation32.bufferedand(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize( + IteratorAggregation32.bufferedor(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation32.bufferedor(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize( + IteratorAggregation32.bufferedxor(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation32.bufferedxor(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize(IteratorAggregation32.bufferedand(500, e1.getIteratingRLW(), + e2.getIteratingRLW(), e3.getIteratingRLW())), FastAggregation32.bufferedand(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize(IteratorAggregation32.bufferedor(500, e1.getIteratingRLW(), + e2.getIteratingRLW(), e3.getIteratingRLW())), FastAggregation32.bufferedor(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(500, e1.getIteratingRLW(), + e2.getIteratingRLW(), e3.getIteratingRLW())), FastAggregation32.bufferedxor(1024, e1, e2, e3)); + } @Test // had problems with bitmaps beginning with two consecutive clean runs public void testConsecClean() { @@ -22,7 +50,7 @@ public class IntIteratorOverIteratingRLW32Test { EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); for (int i = 32; i < 64; ++i) e.set(i); - IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32(e.getIteratingRLW()); + IntIterator ii = IteratorUtil32.toSetBitsIntIterator(e.getIteratingRLW()); assertTrue(ii.hasNext()); int ctr = 0; while (ii.hasNext()) { @@ -30,6 +58,30 @@ public class IntIteratorOverIteratingRLW32Test { ii.next(); } assertEquals(32, ctr); + Iterator iii = IteratorUtil32.toSetBitsIterator(e.getIteratingRLW()); + assertTrue(iii.hasNext()); + ctr = 0; + while (iii.hasNext()) { + ++ctr; + iii.next(); + } + assertEquals(32, ctr); + + } + + + @Test + public void testMaterialize() { + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + for (int i = 64; i < 128; ++i) + e.set(333 * i); + assertEquals(e.cardinality(), IteratorUtil32.cardinality(e.getIteratingRLW())); + EWAHCompressedBitmap32 newe = new EWAHCompressedBitmap32(); + IteratorUtil32.materialize(e.getIteratingRLW(), newe); + assertEquals(e,newe); + newe.clear(); + IteratorUtil32.materialize(e.getIteratingRLW(), newe,4096); + assertEquals(e,newe); } @Test diff --git a/src/test/java/com/googlecode/javaewah32/IteratorAggregation32Test.java b/src/test/java/com/googlecode/javaewah32/IteratorAggregation32Test.java index a64ebd2..5315f20 100644 --- a/src/test/java/com/googlecode/javaewah32/IteratorAggregation32Test.java +++ b/src/test/java/com/googlecode/javaewah32/IteratorAggregation32Test.java @@ -131,7 +131,7 @@ public class IteratorAggregation32Test { .or(x); EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); FastAggregation32 - .legacy_orWithContainer( + .orToContainer( container, x); assertTrue(container.equals(tanswer)); EWAHCompressedBitmap32 x1 = IteratorUtil32 -- GitLab