Skip to content

[ML] Native compilation and unit test for Linux on aarch64 #1132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion 3rd_party/3rd_party.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@ case `uname` in
if [ -z "$CPP_CROSS_COMPILE" ] ; then
BOOST_LOCATION=/usr/local/gcc75/lib
BOOST_COMPILER=gcc
BOOST_EXTENSION=mt-x64-1_71.so.1.71.0
if [ `uname -m` = aarch64 ] ; then
BOOST_ARCH=a64
else
BOOST_ARCH=x64
fi
BOOST_EXTENSION=mt-${BOOST_ARCH}-1_71.so.1.71.0
BOOST_LIBRARIES='atomic chrono date_time filesystem iostreams log log_setup program_options regex system thread'
XML_LOCATION=/usr/local/gcc75/lib
XML_EXTENSION=.so.2
Expand Down
3 changes: 2 additions & 1 deletion build-setup/linux.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

# Machine Learning Build Machine Setup for Linux

These same instructions should work for native compilation on both x86_64 and aarch64 architectures.

To ensure everything is consistent for redistributable builds we build all redistributable components from source with a specific version of gcc.

You will need the following environment variables to be defined:
Expand Down
23 changes: 13 additions & 10 deletions lib/maths/unittest/CBasicStatisticsTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -451,12 +451,13 @@ BOOST_AUTO_TEST_CASE(testCentralMoments) {
maths::CBasicStatistics::count(meansAndVariances[0]),
maths::CBasicStatistics::count(vectorMeanAndVariances));
for (std::size_t j = 0u; j < 4; ++j) {
BOOST_REQUIRE_EQUAL(
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::CBasicStatistics::mean(meansAndVariances[j]),
(maths::CBasicStatistics::mean(vectorMeanAndVariances))(j));
BOOST_REQUIRE_EQUAL(
(maths::CBasicStatistics::mean(vectorMeanAndVariances))(j), 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::CBasicStatistics::variance(meansAndVariances[j]),
(maths::CBasicStatistics::variance(vectorMeanAndVariances))(j));
(maths::CBasicStatistics::variance(vectorMeanAndVariances))(j),
1e-14);
}
}
}
Expand All @@ -482,15 +483,17 @@ BOOST_AUTO_TEST_CASE(testCentralMoments) {
maths::CBasicStatistics::count(meansVariancesAndSkews[0]),
maths::CBasicStatistics::count(vectorMeanVarianceAndSkew));
for (std::size_t j = 0u; j < 4; ++j) {
BOOST_REQUIRE_EQUAL(
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::CBasicStatistics::mean(meansVariancesAndSkews[j]),
(maths::CBasicStatistics::mean(vectorMeanVarianceAndSkew))(j));
BOOST_REQUIRE_EQUAL(
(maths::CBasicStatistics::mean(vectorMeanVarianceAndSkew))(j), 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::CBasicStatistics::variance(meansVariancesAndSkews[j]),
(maths::CBasicStatistics::variance(vectorMeanVarianceAndSkew))(j));
BOOST_REQUIRE_EQUAL(
(maths::CBasicStatistics::variance(vectorMeanVarianceAndSkew))(j),
1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::CBasicStatistics::skewness(meansVariancesAndSkews[j]),
(maths::CBasicStatistics::skewness(vectorMeanVarianceAndSkew))(j));
(maths::CBasicStatistics::skewness(vectorMeanVarianceAndSkew))(j),
1e-14);
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion lib/maths/unittest/CSplineTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ BOOST_AUTO_TEST_CASE(testMean) {
LOG_DEBUG(<< "expectedMean = " << expectedMean
<< ", mean = " << spline.mean());
BOOST_REQUIRE_CLOSE_ABSOLUTE(expectedMean, spline.mean(),
std::numeric_limits<double>::epsilon() * expectedMean);
std::numeric_limits<double>::epsilon() *
expectedMean * 1.5);
}
}

Expand Down
4 changes: 3 additions & 1 deletion lib/maths/unittest/CXMeansOnlineTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,8 @@ BOOST_AUTO_TEST_CASE(testLatLongData) {
maths::gaussianLogLikelihood(covariance, x - mean, llj);
ll += w * std::exp(llj);
Z += w;
LOG_TRACE(<< "cluster " << j << " count " << clusters[j].count()
<< " centre " << clusters[j].centre());
}
ll /= Z;
LLC.add(std::log(ll));
Expand All @@ -718,7 +720,7 @@ BOOST_AUTO_TEST_CASE(testLatLongData) {
LOG_DEBUG(<< "gaussian log(L) = " << maths::CBasicStatistics::mean(LLR));
LOG_DEBUG(<< "clustered log(L) = " << maths::CBasicStatistics::mean(LLC));
BOOST_TEST_REQUIRE(maths::CBasicStatistics::mean(LLC) >
0.4 * maths::CBasicStatistics::mean(LLR));
0.5 * maths::CBasicStatistics::mean(LLR));
}

BOOST_AUTO_TEST_CASE(testPersist) {
Expand Down
42 changes: 22 additions & 20 deletions lib/model/unittest/CMetricDataGathererTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,14 +279,14 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) {
bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << core::CContainerPrinter::print(featureData));
BOOST_TEST_REQUIRE(!featureData.empty());
BOOST_REQUIRE_EQUAL(
1.5, featureData[0].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_EQUAL(
0.9, featureData[1].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_EQUAL(
2.1, featureData[2].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_EQUAL(
9.0, featureData[3].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
0.9, featureData[1].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
2.1, featureData[2].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
9.0, featureData[3].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_EQUAL(false, featureData[0].second[0].second.s_IsInteger);
BOOST_REQUIRE_EQUAL(false, featureData[1].second[0].second.s_IsInteger);
BOOST_REQUIRE_EQUAL(false, featureData[2].second[0].second.s_IsInteger);
Expand Down Expand Up @@ -315,14 +315,14 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) {
gatherer.sampleNow(startTime + bucketLength);
gatherer.featureData(startTime + bucketLength, bucketLength, featureData);
BOOST_TEST_REQUIRE(!featureData.empty());
BOOST_REQUIRE_EQUAL(
2.0, featureData[0].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_EQUAL(
1.8, featureData[1].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_EQUAL(
2.2, featureData[2].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_EQUAL(
6.0, featureData[3].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
2.0, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.8, featureData[1].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
2.2, featureData[2].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
6.0, featureData[3].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_EQUAL(true, featureData[3].second[0].second.s_IsInteger);
BOOST_REQUIRE_EQUAL(std::string("[(700 [2.1] 1 2)]"),
core::CContainerPrinter::print(
Expand Down Expand Up @@ -378,10 +378,12 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) {
BOOST_TEST_REQUIRE(!featureData.empty());
BOOST_REQUIRE_CLOSE_ABSOLUTE(
3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10);
BOOST_REQUIRE_EQUAL(3.2, featureData[1].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_EQUAL(3.8, featureData[2].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_EQUAL(
10.5, featureData[3].second[0].second.s_BucketValue->value()[0]);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
3.2, featureData[1].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
3.8, featureData[2].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
10.5, featureData[3].second[0].second.s_BucketValue->value()[0], 1e-14);
BOOST_REQUIRE_EQUAL(false, featureData[0].second[0].second.s_IsInteger);
BOOST_REQUIRE_EQUAL(false, featureData[1].second[0].second.s_IsInteger);
BOOST_REQUIRE_EQUAL(false, featureData[2].second[0].second.s_IsInteger);
Expand Down
111 changes: 64 additions & 47 deletions lib/seccomp/CSystemCallFilter_Linux.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@

#include <core/CLogger.h>

#include <cerrno>
#include <cstdint>
#include <cstring>

#include <linux/audit.h>
#include <linux/filter.h>
#include <sys/prctl.h>
#include <sys/syscall.h>

#include <cerrno>
#include <cstdint>
#include <cstring>

namespace ml {
namespace seccomp {

Expand Down Expand Up @@ -45,53 +45,70 @@ const std::uint32_t SECCOMP_DATA_NR_OFFSET = 0x00;
const struct sock_filter FILTER[] = {
// Load the system call number into accumulator
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SECCOMP_DATA_NR_OFFSET),
// Only applies to X86_64 arch. Jump to disallow for calls using the x32 ABI
BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, UPPER_NR_LIMIT, 42, 0),

#ifdef __x86_64__
// Only applies to x86_64 arch. Jump to disallow for calls using the i386 ABI
BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, UPPER_NR_LIMIT, 46, 0),
// If any sys call filters are added or removed then the jump
// destination for each statement including the one above must
// be updated accordingly

// Allowed sys calls, jump to return allow on match
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getpriority, 42, 0), // for nice
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_setpriority, 41, 0), // for nice
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_read, 40, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_write, 39, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_writev, 38, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_lseek, 37, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_lstat, 36, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_time, 35, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_gettime, 34, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_gettimeofday, 33, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_readlink, 32, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_stat, 31, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_fstat, 30, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_open, 29, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_close, 28, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_connect, 27, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clone, 26, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_statfs, 25, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_dup2, 24, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mkdir, 23, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rmdir, 22, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_unlinkat, 21, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getdents, 20, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getdents64, 19, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_openat, 18, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_tgkill, 17, 0), // for the crash handler
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigaction, 16, 0), // for the crash handler
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, 15, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, 14, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_madvise, 13, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_unlink, 12, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mknod, 11, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_nanosleep, 10, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_robust_list, 9, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mprotect, 8, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, 7, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap, 6, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getuid, 5, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit_group, 4, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_access, 3, 0),
// Allowed architecture-specific sys calls, jump to return allow on match
// Some of these are not used in latest glibc, and not supported in Linux
// kernels for recent architectures, but in a few cases different sys calls
// are used on different architectures
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_open, 46, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_dup2, 45, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_unlink, 44, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_stat, 43, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_lstat, 42, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_time, 41, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_readlink, 40, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getdents, 39, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rmdir, 38, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mkdir, 37, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mknod, 36, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_access, 35, 0),
#elif defined(__aarch64__)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mknodat, 36, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_faccessat, 35, 0),
#else
#error Unsupported hardware architecture
#endif

// Allowed sys calls for all architectures, jump to return allow on match
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_newfstatat, 34, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_readlinkat, 33, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_dup3, 32, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getpriority, 31, 0), // for nice
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_setpriority, 30, 0), // for nice
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_read, 29, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_write, 28, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_writev, 27, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_lseek, 26, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_gettime, 25, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_gettimeofday, 24, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_fstat, 23, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_close, 22, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_connect, 21, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clone, 20, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_statfs, 19, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mkdirat, 18, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_unlinkat, 17, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getdents64, 16, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_openat, 15, 0), // for forecast temp storage
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_tgkill, 14, 0), // for the crash handler
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigaction, 13, 0), // for the crash handler
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, 12, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, 11, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_madvise, 10, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_nanosleep, 9, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_robust_list, 8, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mprotect, 7, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, 6, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap, 5, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getuid, 4, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit_group, 3, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_brk, 2, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit, 1, 0),
// Disallow call with error code EACCES
Expand Down
38 changes: 27 additions & 11 deletions mk/linux.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

OS=Linux

CPP_PLATFORM_HOME=$(CPP_DISTRIBUTION_HOME)/platform/linux-x86_64
HARDWARE_ARCH:=$(shell uname -m)
CPP_PLATFORM_HOME=$(CPP_DISTRIBUTION_HOME)/platform/linux-$(HARDWARE_ARCH)

CC=gcc
CXX=g++ -std=gnu++14
Expand All @@ -23,9 +24,15 @@ COVERAGE=--coverage
endif
endif

ifeq ($(HARDWARE_ARCH),aarch64)
ARCHCFLAGS=-march=armv8-a+crc+crypto
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It turns out that all aarch64 CPUs have SIMD instructions, so there's no need to tell the compiler it can use them - it just will. The Amazon Graviton processors used in EC2 aarch64 instances also support the CRC and Crypto instructions, so I told the compiler it can use them. These were added to the ARMv8 architecture fairly early on, so should not restrict usage too much. I suspect any hardware that didn't support these instructions would be unable to run ML well anyway, e.g. a 5 year old Raspberry Pi.

else
ARCHCFLAGS=-msse4.2 -mfpmath=sse
endif

PLATPICFLAGS=-fPIC
PLATPIEFLAGS=-fPIE
CFLAGS=-g $(OPTCFLAGS) -msse4.2 -mfpmath=sse -fstack-protector -fno-math-errno -fno-permissive -Wall -Wcast-align -Wconversion -Wextra -Winit-self -Wparentheses -Wpointer-arith -Wswitch-enum $(COVERAGE)
CFLAGS=-g $(OPTCFLAGS) $(ARCHCFLAGS) -fstack-protector -fno-math-errno -fno-permissive -Wall -Wcast-align -Wconversion -Wextra -Winit-self -Wparentheses -Wpointer-arith -Wswitch-enum $(COVERAGE)
CXXFLAGS=$(CFLAGS) -Wno-ctor-dtor-privacy -Wno-deprecated-declarations -Wold-style-cast -fvisibility-inlines-hidden
CPPFLAGS=-isystem $(CPP_SRC_HOME)/3rd_party/include -isystem /usr/local/gcc75/include -D$(OS) -D_REENTRANT $(OPTCPPFLAGS)
CDEPFLAGS=-MM
Expand All @@ -43,21 +50,30 @@ RESOURCES_DIR=resources
LOCALLIBS=-lm -lpthread -ldl -lrt
NETLIBS=-lnsl
BOOSTVER=1_71
ifeq ($(HARDWARE_ARCH),aarch64)
BOOSTARCH=a64
else
BOOSTARCH=x64
endif
BOOSTGCCVER:=$(shell $(CXX) -dumpversion | awk -F. '{ print $$1; }')
# Use -isystem instead of -I for Boost headers to suppress warnings from Boost
BOOSTINCLUDES=-isystem /usr/local/gcc75/include/boost-$(BOOSTVER)
BOOSTCPPFLAGS=-DBOOST_ALL_DYN_LINK -DBOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
BOOSTLOGLIBS=-lboost_log-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTLOGSETUPLIBS=-lboost_log_setup-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTREGEXLIBS=-lboost_regex-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTIOSTREAMSLIBS=-lboost_iostreams-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTPROGRAMOPTIONSLIBS=-lboost_program_options-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTTHREADLIBS=-lboost_thread-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER) -lboost_system-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTFILESYSTEMLIBS=-lboost_filesystem-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER) -lboost_system-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTDATETIMELIBS=-lboost_date_time-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTTESTLIBS=-lboost_unit_test_framework-gcc$(BOOSTGCCVER)-mt-x64-$(BOOSTVER)
BOOSTLOGLIBS=-lboost_log-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
BOOSTLOGSETUPLIBS=-lboost_log_setup-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
BOOSTREGEXLIBS=-lboost_regex-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
BOOSTIOSTREAMSLIBS=-lboost_iostreams-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
BOOSTPROGRAMOPTIONSLIBS=-lboost_program_options-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
BOOSTTHREADLIBS=-lboost_thread-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER) -lboost_system-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
BOOSTFILESYSTEMLIBS=-lboost_filesystem-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER) -lboost_system-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
BOOSTDATETIMELIBS=-lboost_date_time-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
BOOSTTESTLIBS=-lboost_unit_test_framework-gcc$(BOOSTGCCVER)-mt-$(BOOSTARCH)-$(BOOSTVER)
RAPIDJSONINCLUDES=-isystem $(CPP_SRC_HOME)/3rd_party/rapidjson/include
ifeq ($(HARDWARE_ARCH),aarch64)
RAPIDJSONCPPFLAGS=-DRAPIDJSON_HAS_STDSTRING -DRAPIDJSON_NEON
else
RAPIDJSONCPPFLAGS=-DRAPIDJSON_HAS_STDSTRING -DRAPIDJSON_SSE42
endif
EIGENINCLUDES=-isystem $(CPP_SRC_HOME)/3rd_party/eigen
EIGENCPPFLAGS=-DEIGEN_MPL2_ONLY
XMLINCLUDES=`/usr/local/gcc75/bin/xml2-config --cflags`
Expand Down
Loading