我应该转储java.util.HashSet有利于CompactHashSet? [英] Should I dump java.util.HashSet in favor of CompactHashSet?
问题描述
我发现有一个 Set
的实现使用散列(具有所有有用的后果,如O(1)for contains )
等),声称在每个方面比 java.util.HashSet
更有效:
http://ontopia.wordpress.com/ 2009/09/23 / a-faster-and-more-compact-set /
http://alias-i.com/lingpipe/docs/api/com/aliasi/util/CompactHashSet.html
那么最好是使用 java.util.HashSet
完全离开我需要 java.util.Set
赞成 com.aliasi.util.CompactHashSet
?
基准是基于原文的基准,但使用现代工具。
包测试;
import com.carrotsearch.hppc.ObjectOpenHashSet;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.google.common.collect.GuavaCompactHashSet;
import net.ontopia.utils.CompactHashSet;
import net.openhft.koloboke.collect.set.hash.HashObjSet;
import net.openhft.koloboke.collect.set.hash.HashObjSets;
import org.openjdk.jmh.annotations。*;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import static java.util.Arrays.stream;
import static org.openjdk.jol.info.GraphLayout.parseInstance;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@OperationsPerInvocation(TestHashSet.TIMES)
@Threads(1)
@Fork(1)
@State(Scope.Thread)
public class TestHashSet {
public static final int TIMES = 1000000;
private static final int MAX = 5000000;
private static long ELEMENTS_SIZE;
static Long [] add = new Long [TIMES],lookup = new Long [TIMES],remove = new Long [TIMES];
static {
for(int ix = 0; ix< TIMES; ix ++)
add [ix] = new Long(Math.round(Math.random()* MAX)
ELEMENTS_SIZE = stream(add).distinct()。count()* parseInstance(add [0])。
for(int ix = 0; ix lookup [ix] = new Long(Math.round(Math.random()* MAX)
for(int ix = 0; ix remove [ix] = new Long(Math.round(Math.random()* MAX)
}
@Benchmark
public int hashSet(){
Set< Long> set = new HashSet< Long>();
for(Long o:add){
set.add(o);
}
int r = 0;
for(Long o:lookup){
r ^ = set.contains(o)? 1:0;
}
for(Long o:set){
r + = o.intValue();
}
(Long o:remove){
set.remove(o);
}
return r + set.size();
}
@Benchmark
public int compactHashSet(){
Set< Long> set = new CompactHashSet< Long>();
for(Long o:add){
set.add(o);
}
int r = 0;
for(Long o:lookup){
r ^ = set.contains(o)? 1:0;
}
(Long o:set){
r + = o.intValue();
}
(Long o:remove){
set.remove(o);
}
return r + set.size();
}
@Benchmark
public int hppcSet(){
ObjectOpenHashSet< Long> set = new ObjectOpenHashSet< Long>();
for(Long o:add){
set.add(o);
}
int r = 0;
for(Long o:lookup){
r ^ = set.contains(o)? 1:0;
}
for(ObjectCursor< Long> cur:set){
r + = cur.value.intValue();
}
(Long o:remove){
set.remove(o);
}
return r + set.size();
}
@Benchmark
public int kolobokeSet(){
Set< Long> set = HashObjSets.newMutableSet();
for(Long o:add){
set.add(o);
}
int r = 0;
for(Long o:lookup){
r ^ = set.contains(o)? 1:0;
}
(Long o:set){
r + = o.intValue();
}
(Long o:remove){
set.remove(o);
}
return r + set.size();
}
@Benchmark
public int guavaCompactHashSet(){
//公平比较 - 增长表
Set< Long> set = new GuavaCompactHashSet<>(10);
for(Long o:add){
set.add(o);
}
int r = 0;
for(Long o:lookup){
r ^ = set.contains(o)? 1:0;
}
(Long o:set){
r + = o.intValue();
}
(Long o:remove){
set.remove(o);
}
return r + set.size();
}
public static void main(String [] argv){
HashSet hashSet = new HashSet();
test(HashSet,hashSet,hashSet :: add);
CompactHashSet compactHashSet = new CompactHashSet();
test(CompactHashSet,compactHashSet,compactHashSet :: add);
HashObjSet< Object> kolobokeSet = HashObjSets.newMutableSet();
test(KolobokeSet,kolobokeSet,kolobokeSet :: add);
ObjectOpenHashSet hppcSet = new ObjectOpenHashSet();
test(HPPC set,hppcSet,hppcSet :: add);
GuavaCompactHashSet guavaCompactHashSet = new GuavaCompactHashSet(10);
test(GuavaCompactHashSet,guavaCompactHashSet,guavaCompactHashSet :: add);
}
public static void test(String name,Object set,Consumer setAdd){
for(Long o:add){
setAdd.accept ;
}
System.out.printf(%s:%.1f bytes per element\\\
,name,
((parseInstance(set).totalSize() - ELEMENTS_SIZE)* 1.0 / TIMES));
}
}
结果:
设置实现速度内存占用
分数单位+ UCOops -UseCompressedOops
CompactHashSet 828 ns / op 8.4 16.8字节/元
HashSet 676 ns / op 37.4 60.3字节/元
HPPC集853 ns / op 10.5 18.9字节/元
Koloboke集587 ns / op 8.4 16.8字节/元
GuavaCompactHashSet 874 ns / op 25.9 37.4 bytes / elem
显示 CompactHashSet
c>
I found that there is an implementation of a Set
that uses hashes (with all the useful consequences, like O(1) for contains()
etc) that is claimed to be more efficient than java.util.HashSet
in every aspect:
http://ontopia.wordpress.com/2009/09/23/a-faster-and-more-compact-set/
http://alias-i.com/lingpipe/docs/api/com/aliasi/util/CompactHashSet.html
Would it then be a good idea to quit using java.util.HashSet
completely wherever I need a java.util.Set
in favor of com.aliasi.util.CompactHashSet
?
解决方案 Let's start a little benchmark game.
Benchmarks are based on benchmarks from the original article, but use modern tools.
package tests;
import com.carrotsearch.hppc.ObjectOpenHashSet;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.google.common.collect.GuavaCompactHashSet;
import net.ontopia.utils.CompactHashSet;
import net.openhft.koloboke.collect.set.hash.HashObjSet;
import net.openhft.koloboke.collect.set.hash.HashObjSets;
import org.openjdk.jmh.annotations.*;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import static java.util.Arrays.stream;
import static org.openjdk.jol.info.GraphLayout.parseInstance;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@OperationsPerInvocation(TestHashSet.TIMES)
@Threads(1)
@Fork(1)
@State(Scope.Thread)
public class TestHashSet {
public static final int TIMES = 1000000;
private static final int MAX = 5000000;
private static long ELEMENTS_SIZE;
static Long[] add = new Long[TIMES], lookup = new Long[TIMES], remove = new Long[TIMES];
static {
for (int ix = 0; ix < TIMES; ix++)
add[ix] = new Long(Math.round(Math.random() * MAX));
ELEMENTS_SIZE = stream(add).distinct().count() * parseInstance(add[0]).totalSize();
for (int ix = 0; ix < TIMES; ix++)
lookup[ix] = new Long(Math.round(Math.random() * MAX));
for (int ix = 0; ix < TIMES; ix++)
remove[ix] = new Long(Math.round(Math.random() * MAX));
}
@Benchmark
public int hashSet() {
Set<Long> set = new HashSet<Long>();
for (Long o : add) {
set.add(o);
}
int r = 0;
for (Long o : lookup) {
r ^= set.contains(o) ? 1 : 0;
}
for (Long o : set) {
r += o.intValue();
}
for (Long o : remove) {
set.remove(o);
}
return r + set.size();
}
@Benchmark
public int compactHashSet() {
Set<Long> set = new CompactHashSet<Long>();
for (Long o : add) {
set.add(o);
}
int r = 0;
for (Long o : lookup) {
r ^= set.contains(o) ? 1 : 0;
}
for (Long o : set) {
r += o.intValue();
}
for (Long o : remove) {
set.remove(o);
}
return r + set.size();
}
@Benchmark
public int hppcSet() {
ObjectOpenHashSet<Long> set = new ObjectOpenHashSet<Long>();
for (Long o : add) {
set.add(o);
}
int r = 0;
for (Long o : lookup) {
r ^= set.contains(o) ? 1 : 0;
}
for (ObjectCursor<Long> cur : set) {
r += cur.value.intValue();
}
for (Long o : remove) {
set.remove(o);
}
return r + set.size();
}
@Benchmark
public int kolobokeSet() {
Set<Long> set = HashObjSets.newMutableSet();
for (Long o : add) {
set.add(o);
}
int r = 0;
for (Long o : lookup) {
r ^= set.contains(o) ? 1 : 0;
}
for (Long o : set) {
r += o.intValue();
}
for (Long o : remove) {
set.remove(o);
}
return r + set.size();
}
@Benchmark
public int guavaCompactHashSet() {
// fair comparison -- growing table
Set<Long> set = new GuavaCompactHashSet<>(10);
for (Long o : add) {
set.add(o);
}
int r = 0;
for (Long o : lookup) {
r ^= set.contains(o) ? 1 : 0;
}
for (Long o : set) {
r += o.intValue();
}
for (Long o : remove) {
set.remove(o);
}
return r + set.size();
}
public static void main(String[] argv) {
HashSet hashSet = new HashSet();
test("HashSet", hashSet, hashSet::add);
CompactHashSet compactHashSet = new CompactHashSet();
test("CompactHashSet", compactHashSet, compactHashSet::add);
HashObjSet<Object> kolobokeSet = HashObjSets.newMutableSet();
test("KolobokeSet", kolobokeSet, kolobokeSet::add);
ObjectOpenHashSet hppcSet = new ObjectOpenHashSet();
test("HPPC set", hppcSet, hppcSet::add);
GuavaCompactHashSet guavaCompactHashSet = new GuavaCompactHashSet(10);
test("GuavaCompactHashSet", guavaCompactHashSet, guavaCompactHashSet::add);
}
public static void test(String name, Object set, Consumer setAdd) {
for (Long o : add) {
setAdd.accept(o);
}
System.out.printf("%s: %.1f bytes per element\n", name,
((parseInstance(set).totalSize() - ELEMENTS_SIZE) * 1.0 / TIMES));
}
}
Results:
Set implementation Speed Memory footprint
Score Units +UCOops -UseCompressedOops
CompactHashSet 828 ns/op 8.4 16.8 bytes/elem
HashSet 676 ns/op 37.4 60.3 bytes/elem
HPPC Set 853 ns/op 10.5 18.9 bytes/elem
Koloboke Set 587 ns/op 8.4 16.8 bytes/elem
GuavaCompactHashSet 874 ns/op 25.9 37.4 bytes/elem
Appears that CompactHashSet
is even slower than old good HashSet
, despite it uses much less memory.
这篇关于我应该转储java.util.HashSet有利于CompactHashSet?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!