Skip to content

Commit e106285

Browse files
committed
test commit
1 parent 57677f2 commit e106285

File tree

4 files changed

+90
-0
lines changed

4 files changed

+90
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include <util/generic/ptr.h>
2+
#include <util/system/cpu_id.h>
3+
#include <util/system/types.h>
4+
#include <util/stream/output.h>
5+
#include <util/generic/string.h>
6+
#include <vector>
7+
#include <immintrin.h>
8+
#include <avxintrin.h>
9+
#include <chrono>
10+
#include <ydb/library/yql/utils/simd/simd.h>
11+
12+
13+
const size_t size = 64e5;
14+
15+
template <typename T>
16+
inline double GetSum(std::vector<std::vector<T>>& columns, std::vector<T>& result) {
17+
const size_t SIZE_OF_TYPE = 256 / (sizeof(T) * 8);
18+
const size_t align_size = columns[0].size();
19+
20+
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
21+
22+
for (size_t i = 0; i < align_size; i += SIZE_OF_TYPE) {
23+
NSimd::NAVX2::TSimd8<T> final_register(&columns[0][i]);
24+
25+
for (size_t j = 1; j < columns.size(); ++j) {
26+
final_register.Add64(&columns[j][i]);
27+
}
28+
29+
final_register.Store(&result[i]);
30+
}
31+
32+
std::chrono::steady_clock::time_point finish = std::chrono::steady_clock::now();
33+
34+
return std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count();
35+
36+
}
37+
38+
double StandartAdding(std::vector<std::vector<ui64>>& columns, std::vector<ui64>& result) {
39+
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
40+
41+
for (size_t j = 0; j < columns[0].size(); ++j) {
42+
43+
for (size_t i = 0; i < columns[i].size(); ++i) {
44+
result[j] += columns[i][j];
45+
}
46+
47+
}
48+
std::chrono::steady_clock::time_point finish = std::chrono::steady_clock::now();
49+
50+
return std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count();
51+
}
52+
53+
int main() {
54+
std::vector<std::vector<ui64>> vec1(10, std::vector<ui64>(size, 1e12 + 3));
55+
56+
std::vector<ui64> result1(size, 0);
57+
std::vector<ui64> result2(size, 0);
58+
59+
double ans1 = GetSum(vec1, result1);
60+
double ans2 = StandartAdding(vec1, result2);
61+
62+
for (size_t i = 0; i < result2.size(); ++i) {
63+
if (result2[i] != result1[i]) {
64+
Cerr << "something went wrong...";
65+
return 0;
66+
}
67+
}
68+
69+
Cerr << "The results are the same. Let's compare times:\n";
70+
Cerr << "Time, using AVX2: " << ans1 << " ms\n";
71+
Cerr << "Time, using standart adding: " << ans2 << "ms";
72+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
OWNER(g:yql)
2+
3+
PROGRAM(add_columns)
4+
5+
SRCS(main.cpp)
6+
7+
SIZE(MEDIUM)
8+
9+
CFLAGS(-mavx2)
10+
11+
PEERDIR(ydb/library/yql/utils/simd)
12+
13+
END()

ydb/library/yql/utils/simd/exec/ya.make

+1
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,5 @@ RECURSE(
2828
pack_tuple
2929
tuples_to_bucket
3030
stream_store
31+
add_columns
3132
)

ydb/library/yql/utils/simd/simd_avx2.h

+4
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ struct TSimd8 {
7777
crc = _mm_crc32_u64(crc, *((ui64*) &this->Value + 3));
7878
return crc;
7979
}
80+
81+
inline void Add64(const TSimd8<T>& another) {
82+
Value = _mm256_add_epi64(Value, another.Value);
83+
}
8084

8185
inline int ToBitMask() const {
8286
return _mm256_movemask_epi8(this->Value);

0 commit comments

Comments
 (0)