-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathset_difference.cpp
More file actions
104 lines (91 loc) · 2.56 KB
/
set_difference.cpp
File metadata and controls
104 lines (91 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include <cstdio>
#include <chrono>
#include "projectconfig.h"
#include "constants.h"
#include "shuffle_dictionary.hpp"
#include "difference/naive.hpp"
#include "difference/stl.hpp"
#ifdef __SSE2__
# include "difference/sse.hpp"
#endif
#ifdef __AVX2__
# include "difference/avx2.hpp"
#endif
#if defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512DQ__)
# include "difference/avx512.hpp"
#endif
void run(uint32_t **lists,
size_t (*func)(const uint32_t*,size_t,const uint32_t*,size_t,uint32_t*)=nullptr
){
if(func){
auto t_start = std::chrono::high_resolution_clock::now();
size_t elements=0;
for(size_t repeat=0; repeat<repeatCount; ++repeat){
for(size_t i=0; i<listCount; ++i){
uint32_t *new_list = (uint32_t*)aligned_alloc(64, arraySize*sizeof(uint32_t));
for(size_t j=i+1; j<listCount; ++j){
elements += func(
lists[i], arraySize,
lists[j], arraySize,
new_list
);
}
free(new_list);
}
}
auto t_end = std::chrono::high_resolution_clock::now();
printf("Wall clock time passed: %10.2f ms - %lu\n",
std::chrono::duration<double, std::milli>(t_end-t_start).count(),
elements
);
}
}
int main(void){
auto t_start = std::chrono::high_resolution_clock::now();
// load lists from file which was generated by genLists
FILE *fd = fopen("test.dat", "rb");
if(!fd){
puts("couldn't open test.dat");
return -1;
}
uint32_t **lists = new uint32_t*[listCount];
for(size_t i=0; i<listCount; ++i){
lists[i] = (uint32_t*)aligned_alloc(64, arraySize*sizeof(uint32_t));
fread(lists[i], 4, arraySize, fd);
}
fclose(fd);
auto t_end = std::chrono::high_resolution_clock::now();
printf("preparing lists done - %f ms\n",
std::chrono::duration<double, std::milli>(t_end-t_start).count()
);
puts("scalar:");
run(lists, difference_scalar);
puts("stl set_difference:");
run(lists, difference_scalar_stl);
#if __GNUC__ >= 5
//puts("stl parallel set_difference: uses more than one core, just for reference here");
//run(lists, difference_scalar_stl_parallel);
#endif
#ifdef __SSE2__
puts("128bit SSE vector:");
run(lists, difference_vector_sse);
#endif
#ifdef __AVX2__
puts("256bit AVX2 vector");
run(lists, difference_vector_avx2);
#endif
#if defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512DQ__)
//FIXME: broken
puts("512bit AVX512 vector");
run(lists, difference_vector_avx512_conflict);
#ifndef DISABLE_ASM
puts("512bit AVX512 vector - asm");
run(lists, difference_vector_avx512_conflict_asm);
#endif
#endif
for(size_t i=0; i<listCount; ++i){
free(lists[i]);
}
delete[] lists;
return 0;
}