Skip to content

Commit b8c5bf2

Browse files
Raymond YangRandySheriffH
Raymond Yang
authored andcommitted
Cherry pick x86 fix in release 0.3.0 (#606)
* merge x86 linux build * remove wrong argu
1 parent 17ceca2 commit b8c5bf2

15 files changed

+1067
-12
lines changed

cmake/CMakeLists.txt

+5
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ else()
129129
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -march=native -mtune=native")
130130
string(APPEND CMAKE_C_FLAGS_RELWITHDEBINFO " -march=native -mtune=native")
131131
endif()
132+
if(onnxruntime_BUILD_x86)
133+
set (CMAKE_SYSTEM_PROCESSOR "x86")
134+
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse2 -mfpmath=sse -Wno-narrowing")
135+
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2 -mfpmath=sse -Wno-narrowing")
136+
endif()
132137
endif()
133138

134139
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")

cmake/onnxruntime_mlas.cmake

+20-1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,25 @@ else()
9393
${ONNXRUNTIME_ROOT}/core/mlas/lib/aarch64/sgemma.s
9494
)
9595

96+
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86?)$")
97+
98+
enable_language(ASM)
99+
100+
set(mlas_platform_srcs_sse2
101+
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86/SgemmKernelSse2.S
102+
)
103+
set_source_files_properties(${mlas_platform_srcs_sse2} PROPERTIES COMPILE_FLAGS "-msse2")
104+
105+
set(mlas_platform_srcs_avx
106+
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86/SgemmKernelAvx.S
107+
)
108+
set_source_files_properties(${mlas_platform_srcs_avx} PROPERTIES COMPILE_FLAGS "-mavx")
109+
110+
set(mlas_platform_srcs
111+
${mlas_platform_srcs_sse2}
112+
${mlas_platform_srcs_avx}
113+
)
114+
96115
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
97116

98117
enable_language(ASM)
@@ -106,7 +125,7 @@ else()
106125
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/SgemmKernelSse2.S
107126
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/SgemmTransposePackB16x4Sse2.S
108127
)
109-
set_source_files_properties(${mlas_platform_srcs_sse} PROPERTIES COMPILE_FLAGS "-msse2")
128+
set_source_files_properties(${mlas_platform_srcs_sse2} PROPERTIES COMPILE_FLAGS "-msse2")
110129

111130
set(mlas_platform_srcs_avx
112131
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/SgemmKernelAvx.S

onnxruntime/core/mlas/lib/mlasi.h

-3
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@ Module Name:
3434
#include <cpuid.h>
3535
#include <immintrin.h>
3636
#endif
37-
#if defined(__x86_64__)
38-
#include "x86_64/xgetbv.h"
39-
#endif
4037
#endif
4138

4239
//

onnxruntime/core/mlas/lib/platform.cpp

+36-5
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,41 @@ Module Name:
2323

2424
MLAS_PLATFORM MlasPlatform;
2525

26+
#ifdef MLAS_TARGET_AMD64_IX86
27+
28+
//
29+
// Reads the processor extended control register to determine platform
30+
// capabilities.
31+
//
32+
33+
#if !defined(_XCR_XFEATURE_ENABLED_MASK)
34+
#define _XCR_XFEATURE_ENABLED_MASK 0
35+
#endif
36+
37+
inline
38+
uint64_t
39+
MlasReadExtendedControlRegister(
40+
unsigned int ext_ctrl_reg
41+
)
42+
{
43+
#if defined(_WIN32)
44+
return _xgetbv(ext_ctrl_reg);
45+
#else
46+
uint32_t eax, edx;
47+
48+
__asm__
49+
(
50+
"xgetbv"
51+
: "=a" (eax), "=d" (edx)
52+
: "c" (ext_ctrl_reg)
53+
);
54+
55+
return ((uint64_t)edx << 32) | eax;
56+
#endif
57+
}
58+
59+
#endif
60+
2661
MLAS_PLATFORM::MLAS_PLATFORM(
2762
void
2863
)
@@ -74,11 +109,7 @@ Return Value:
74109
// Check if the operating system supports saving SSE and AVX states.
75110
//
76111

77-
#if defined(_WIN32)
78-
uint64_t xcr0 = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
79-
#else
80-
uint64_t xcr0 = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
81-
#endif
112+
uint64_t xcr0 = MlasReadExtendedControlRegister(_XCR_XFEATURE_ENABLED_MASK);
82113

83114
if ((xcr0 & 0x6) == 0x6) {
84115

0 commit comments

Comments
 (0)