Skip to content

Commit e118f0c

Browse files
q10facebook-github-bot
authored andcommitted
Modularize CMake Code [1/N] (#3385)
Summary: - Currently, the logic for building a .SO file from source files is unorganized and scattered across CMakefiles. This becomes a problem when we have to consider building libraries targeted for CPU-only, CUDA, and HIP backends. - This code brings all the scattered CMake instructions into one function, `gpu_cpp_library()`, where the user specifies the all the relevant sources in one place. During the build, the function will determine which sources to use based on the designated build target, apply all the correct source properties annotations, HIPification, includes, and linking as needed, and output a single `.SO` file. - This first step is needed for us to be able to break up the FBGEMM build reliably into multiple .SO files Pull Request resolved: #3385 Reviewed By: jianyuh Differential Revision: D66112279 Pulled By: q10 fbshipit-source-id: 32a671ba70b88d642506893841c687938b4b04b6
1 parent fca9f4e commit e118f0c

File tree

11 files changed

+481
-207
lines changed

11 files changed

+481
-207
lines changed

cmake/modules/GpuCppLibrary.cmake

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake)
8+
9+
function(prepare_target_sources)
10+
# This function does the following:
11+
# 1. Take all the specified project sources for a target
12+
# 1. Filter the files out based on CPU-only, CUDA, and HIP build modes
13+
# 1. Bucketize them into sets of CXX, CU, and HIP files
14+
# 1. Apply common source file properties for each bucket
15+
# 1. Merge the buckets back into a single list of sources
16+
# 1. Export the file list as ${args_PREFIX}_sources
17+
18+
set(flags)
19+
set(singleValueArgs PREFIX)
20+
set(multiValueArgs
21+
CPU_SRCS
22+
GPU_SRCS
23+
CUDA_SPECIFIC_SRCS
24+
HIP_SPECIFIC_SRCS
25+
GPU_FLAGS
26+
INCLUDE_DIRS
27+
)
28+
29+
cmake_parse_arguments(
30+
args
31+
"${flags}" "${singleValueArgs}" "${multiValueArgs}"
32+
${ARGN})
33+
34+
############################################################################
35+
# Collect and Annotate, and Append CXX sources
36+
############################################################################
37+
38+
# Add the CPU CXX sources
39+
set(${args_PREFIX}_sources_cpp ${args_CPU_SRCS})
40+
41+
# For GPU mode, add the CXX sources from GPU_SRCS
42+
if(NOT FBGEMM_CPU_ONLY)
43+
LIST_FILTER(
44+
INPUT ${args_GPU_SRCS}
45+
OUTPUT gpu_sources_cpp
46+
REGEX "^.+\.cpp$"
47+
)
48+
list(APPEND ${args_PREFIX}_sources_cpp ${gpu_sources_cpp})
49+
endif()
50+
51+
# Set source properties
52+
set_source_files_properties(${${args_PREFIX}_sources_cpp}
53+
PROPERTIES INCLUDE_DIRECTORIES
54+
"${args_INCLUDE_DIRS}")
55+
56+
if(CXX_AVX2_FOUND)
57+
set_source_files_properties(${${args_PREFIX}_sources_cpp}
58+
PROPERTIES COMPILE_OPTIONS
59+
"${AVX2_FLAGS}")
60+
else()
61+
set_source_files_properties(${${args_PREFIX}_sources_cpp}
62+
PROPERTIES COMPILE_OPTIONS
63+
"-fopenmp")
64+
endif()
65+
66+
# Append to the full sources list
67+
list(APPEND ${args_PREFIX}_sources_combined ${${args_PREFIX}_sources_cpp})
68+
69+
############################################################################
70+
# Collect, Annotate, and Append CU sources
71+
############################################################################
72+
73+
if(NOT FBGEMM_CPU_ONLY)
74+
# Filter GPU_SRCS for CU sources - these may be HIPified later if building in ROCm mode
75+
LIST_FILTER(
76+
INPUT ${args_GPU_SRCS}
77+
OUTPUT ${args_PREFIX}_sources_cu
78+
REGEX "^.+\.cu$"
79+
)
80+
81+
# Append CUDA-specific sources, but ONLY when building in CUDA mode
82+
if(NOT USE_ROCM)
83+
list(APPEND ${args_PREFIX}_sources_cu ${args_CUDA_SPECIFIC_SRCS})
84+
endif()
85+
86+
# Set source properties
87+
set_source_files_properties(${${args_PREFIX}_sources_cu}
88+
PROPERTIES COMPILE_OPTIONS
89+
"${args_GPU_FLAGS}")
90+
91+
set_source_files_properties(${${args_PREFIX}_sources_cu}
92+
PROPERTIES INCLUDE_DIRECTORIES
93+
"${args_INCLUDE_DIRS}")
94+
95+
# Append to the full sources list
96+
list(APPEND ${args_PREFIX}_sources_combined ${${args_PREFIX}_sources_cu})
97+
endif()
98+
99+
############################################################################
100+
# Collect, Annotate, and Append HIP sources
101+
############################################################################
102+
103+
if(NOT FBGEMM_CPU_ONLY AND USE_ROCM)
104+
# Filter GPU_SRCS for HIP sources
105+
LIST_FILTER(
106+
INPUT ${args_GPU_SRCS}
107+
OUTPUT ${args_PREFIX}_sources_hip
108+
REGEX "^.+\.hip$"
109+
)
110+
111+
# Append HIP-specific sources, but ONLY when building in HIP mode
112+
list(APPEND ${args_PREFIX}_sources_hip ${args_HIP_SPECIFIC_SRCS})
113+
114+
# Set source properties
115+
set_source_files_properties(${${args_PREFIX}_sources_hip}
116+
PROPERTIES INCLUDE_DIRECTORIES
117+
"${args_INCLUDE_DIRS}")
118+
119+
# Append to the full sources list
120+
list(APPEND ${args_PREFIX}_sources_combined ${${args_PREFIX}_sources_hip})
121+
endif()
122+
123+
############################################################################
124+
# Set the Output Variable(s)
125+
############################################################################
126+
127+
set(${args_PREFIX}_sources ${${args_PREFIX}_sources_combined} PARENT_SCOPE)
128+
endfunction()
129+
130+
function(prepare_hipified_target_sources)
131+
# This function does the following:
132+
# 1. Take all the specified target sources
133+
# 1. Look up their equivalent HIPified files if applicable (presumes that hipify() already been run)
134+
# 1. Apply source file properties
135+
# 1. Update the HIP include directories
136+
137+
set(flags)
138+
set(singleValueArgs PREFIX)
139+
set(multiValueArgs SRCS INCLUDE_DIRS)
140+
141+
cmake_parse_arguments(
142+
args
143+
"${flags}" "${singleValueArgs}" "${multiValueArgs}"
144+
${ARGN})
145+
146+
get_hipified_list("${args_SRCS}" args_SRCS)
147+
148+
set_source_files_properties(${args_SRCS}
149+
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
150+
151+
# Add include directories
152+
hip_include_directories("${args_INCLUDE_DIRS}")
153+
154+
############################################################################
155+
# Set the Output Variable(s)
156+
############################################################################
157+
158+
set(${args_PREFIX}_sources_hipified ${args_SRCS} PARENT_SCOPE)
159+
endfunction()
160+
161+
function(gpu_cpp_library)
162+
# This function does the following:
163+
# 1. Take all the target sources and select relevant sources based on build type (CPU-only, CUDA, HIP)
164+
# 1. Apply source file properties as needed
165+
# 1. HIPify files as needed
166+
# 1. Build the .SO file
167+
168+
set(flags)
169+
set(singleValueArgs
170+
PREFIX # Desired name prefix for the library target
171+
)
172+
set(multiValueArgs
173+
CPU_SRCS # Sources for CPU-only build
174+
GPU_SRCS # Sources common to both CUDA and HIP builds. .CU files specified here will be HIPified when building a HIP target
175+
CUDA_SPECIFIC_SRCS # Sources available only for CUDA build
176+
HIP_SPECIFIC_SRCS # Sources available only for HIP build
177+
GPU_FLAGS # Compile flags for GPU builds
178+
INCLUDE_DIRS # Include directories for compilation
179+
)
180+
181+
cmake_parse_arguments(
182+
args
183+
"${flags}" "${singleValueArgs}" "${multiValueArgs}"
184+
${ARGN})
185+
186+
############################################################################
187+
# Prepare CXX and CU sources
188+
############################################################################
189+
190+
prepare_target_sources(
191+
PREFIX ${args_PREFIX}
192+
CPU_SRCS ${args_CPU_SRCS}
193+
GPU_SRCS ${args_GPU_SRCS}
194+
CUDA_SPECIFIC_SRCS ${args_CUDA_SPECIFIC_SRCS}
195+
HIP_SPECIFIC_SRCS ${args_HIP_SPECIFIC_SRCS}
196+
GPU_FLAGS ${args_GPU_FLAGS}
197+
INCLUDE_DIRS ${args_INCLUDE_DIRS})
198+
set(lib_sources ${${args_PREFIX}_sources})
199+
200+
201+
############################################################################
202+
# Build the Library
203+
############################################################################
204+
205+
set(lib_name ${args_PREFIX}_py)
206+
if(USE_ROCM)
207+
# Fetch the HIPified sources
208+
prepare_hipified_target_sources(
209+
PREFIX ${args_PREFIX}
210+
SRCS ${lib_sources}
211+
INCLUDE_DIRS ${args_INCLUDE_DIRS})
212+
set(lib_sources_hipified ${${args_PREFIX}_sources_hipified})
213+
214+
# Create the HIP library
215+
hip_add_library(${lib_name} SHARED
216+
${lib_sources_hipified}
217+
${args_OTHER_SRCS}
218+
${FBGEMM_HIP_HCC_LIBRARIES}
219+
HIPCC_OPTIONS
220+
${HIP_HCC_FLAGS})
221+
222+
# Append ROCM includes
223+
target_include_directories(${lib_name} PUBLIC
224+
${FBGEMM_HIP_INCLUDE}
225+
${ROCRAND_INCLUDE}
226+
${ROCM_SMI_INCLUDE})
227+
228+
else()
229+
# Create the C++/CUDA library
230+
add_library(${lib_name} MODULE
231+
${lib_sources}
232+
${args_OTHER_SRCS})
233+
endif()
234+
235+
############################################################################
236+
# Library Includes and Linking
237+
############################################################################
238+
239+
# Add PyTorch include/
240+
target_include_directories(${lib_name} PRIVATE
241+
${TORCH_INCLUDE_DIRS}
242+
${NCCL_INCLUDE_DIRS})
243+
244+
# Remove `lib` from the output artifact name, i.e. `libfoo.so` -> `foo.so`
245+
set_target_properties(${lib_name}
246+
PROPERTIES PREFIX "")
247+
248+
# Link to PyTorch
249+
target_link_libraries(${lib_name}
250+
${TORCH_LIBRARIES}
251+
${NCCL_LIBRARIES}
252+
${CUDA_DRIVER_LIBRARIES})
253+
254+
# Link to NVML if available
255+
if(NVML_LIB_PATH)
256+
target_link_libraries(${lib_name} ${NVML_LIB_PATH})
257+
endif()
258+
259+
# Silence warnings (in asmjit)
260+
target_compile_options(${lib_name} PRIVATE
261+
-Wno-deprecated-anon-enum-enum-conversion
262+
-Wno-deprecated-declarations)
263+
264+
############################################################################
265+
# Post-Build Steps
266+
############################################################################
267+
268+
# Add a post-build step to remove errant RPATHs from the .SO
269+
add_custom_target(${lib_name}_postbuild ALL
270+
DEPENDS
271+
WORKING_DIRECTORY ${OUTPUT_DIR}
272+
COMMAND bash ${FBGEMM}/.github/scripts/fbgemm_gpu_postbuild.bash)
273+
274+
# Run the post-build steps AFTER the build itself
275+
add_dependencies(${lib_name}_postbuild ${lib_name})
276+
277+
############################################################################
278+
# Set the Output Variable(s)
279+
############################################################################
280+
281+
# PREFIX = `foo` --> Target Library = `foo_py`
282+
set(${args_PREFIX}_py ${lib_name} PARENT_SCOPE)
283+
284+
BLOCK_PRINT(
285+
"GPU CPP Library Target: ${args_PREFIX}"
286+
" "
287+
"CPU_SRCS:"
288+
"${args_CPU_SRCS}"
289+
" "
290+
"GPU_SRCS:"
291+
"${args_GPU_SRCS}"
292+
" "
293+
"CUDA_SPECIFIC_SRCS:"
294+
"${args_CUDA_SPECIFIC_SRCS}"
295+
" "
296+
"HIP_SPECIFIC_SRCS"
297+
"${args_HIP_SPECIFIC_SRCS}"
298+
" "
299+
"GPU_FLAGS:"
300+
"${args_GPU_FLAGS}"
301+
" "
302+
"INCLUDE_DIRS:"
303+
"${args_INCLUDE_DIRS}"
304+
" "
305+
"Selected Source Files:"
306+
"${lib_sources}"
307+
" "
308+
"HIPified Source Files:"
309+
"${lib_sources_hipified}"
310+
" "
311+
"Output Library:"
312+
"${lib_name}"
313+
)
314+
endfunction()

cmake/modules/PyTorchSetup.cmake

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,6 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake)
1313

1414
find_package(Torch REQUIRED)
1515

16-
BLOCK_PRINT(
17-
"PyTorch Flags"
18-
""
19-
"TORCH_INCLUDE_DIRS=${TORCH_INCLUDE_DIRS}"
20-
""
21-
"TORCH_LIBRARIES=${TORCH_LIBRARIES}"
22-
)
23-
2416
#
2517
# PyTorch CUDA Extensions are normally compiled with the flags below. However we
2618
# disabled -D__CUDA_NO_HALF_CONVERSIONS__ here as it caused "error: no suitable
@@ -29,6 +21,21 @@ BLOCK_PRINT(
2921
#
3022

3123
set(TORCH_CUDA_OPTIONS
32-
--expt-relaxed-constexpr -D__CUDA_NO_HALF_OPERATORS__
33-
# -D__CUDA_NO_HALF_CONVERSIONS__
34-
-D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__)
24+
--expt-relaxed-constexpr
25+
-D__CUDA_NO_HALF_OPERATORS__
26+
# -D__CUDA_NO_HALF_CONVERSIONS__
27+
-D__CUDA_NO_BFLOAT16_CONVERSIONS__
28+
-D__CUDA_NO_HALF2_OPERATORS__)
29+
30+
BLOCK_PRINT(
31+
"PyTorch Flags:"
32+
" "
33+
"TORCH_INCLUDE_DIRS:"
34+
"${TORCH_INCLUDE_DIRS}"
35+
" "
36+
"TORCH_LIBRARIES:"
37+
"${TORCH_LIBRARIES}"
38+
" "
39+
"TORCH_CUDA_OPTIONS:"
40+
"${TORCH_CUDA_OPTIONS}"
41+
)

0 commit comments

Comments
 (0)