Skip to content

Commit 5b02e54

Browse files
committed
Revert "Revert "Split & remove extension_parallel (#8983)""
This reverts commit 05a160e. Revert "Revert "Make serial parallel_for "polyfill" iterate backwards in debug builds (#9044)"" This reverts commit 815eaff. Revert "Revert "Unbreak optimized kernels buck build (and check it in unittest-buck) (#9159)"" This reverts commit 10bb615. ghstack-source-id: 2c4a363ff685022b388319f6565b74286fbf783a ghstack-comment-id: 2718584686 Pull Request resolved: #9190
1 parent 2748b36 commit 5b02e54

26 files changed

+207
-195
lines changed

.ci/scripts/unittest-buck2.sh

+3-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ buck2 query "//backends/apple/... + //backends/example/... + \
1717
//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
1818
//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
1919

20+
UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
21+
BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
2022
# TODO: expand the covered scope of Buck targets.
2123
# //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
2224
# //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
23-
buck2 test //kernels/portable/... //runtime/backend/... //runtime/core/... \
25+
buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... //runtime/backend/... //runtime/core/... \
2426
//runtime/executor: //runtime/kernel/... //runtime/platform/...

.lintrunner.toml

+2
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ exclude_patterns = [
218218
'examples/**',
219219
'extension/**',
220220
'kernels/optimized/**',
221+
# Justified <functional> include.
222+
'runtime/kernel/thread_parallel_interface.h',
221223
'scripts/**',
222224
'third-party/**',
223225
'util/**',

CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
751751
AND EXECUTORCH_BUILD_CPUINFO
752752
)
753753
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
754-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
755754
endif()
756755

757756
if(EXECUTORCH_BUILD_PYBIND)

Test.cmake

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ if(BUILD_TESTING)
1313
add_subdirectory(extension/evalue_util/test)
1414
add_subdirectory(extension/kernel_util/test)
1515
add_subdirectory(extension/memory_allocator/test)
16-
add_subdirectory(extension/parallel/test)
1716
add_subdirectory(extension/pytree/test)
1817
add_subdirectory(kernels/portable/cpu/util/test)
1918
add_subdirectory(kernels/prim_ops/test)

build/cmake_deps.toml

+4-18
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ excludes = [
8888
deps = [
8989
"executorch",
9090
"executorch_core",
91-
"extension_parallel",
9291
"extension_threadpool",
9392
"portable_kernels",
9493
]
@@ -131,7 +130,7 @@ excludes = [
131130
deps = [
132131
"executorch_core",
133132
"executorch",
134-
"extension_parallel",
133+
"extension_threadpool",
135134
]
136135

137136
[targets.optimized_native_cpu_ops]
@@ -146,7 +145,6 @@ excludes = [
146145
deps = [
147146
"executorch_core",
148147
"executorch",
149-
"extension_parallel",
150148
"extension_threadpool",
151149
"portable_kernels",
152150
]
@@ -227,19 +225,6 @@ deps = [
227225
"extension_runner_util",
228226
]
229227

230-
[targets.extension_parallel]
231-
buck_targets = [
232-
"//extension/parallel:thread_parallel",
233-
]
234-
filters = [
235-
".cpp$",
236-
]
237-
deps = [
238-
"executorch",
239-
"executorch_core",
240-
"extension_threadpool",
241-
]
242-
243228
[targets.extension_tensor]
244229
buck_targets = [
245230
"//extension/tensor:tensor",
@@ -379,6 +364,7 @@ excludes = [
379364
deps = [
380365
"executorch",
381366
"executorch_core",
367+
"extension_threadpool",
382368
"xnnpack_backend",
383369
"portable_kernels",
384370
]
@@ -393,6 +379,7 @@ filters = [
393379
deps = [
394380
"executorch",
395381
"executorch_core",
382+
"extension_threadpool",
396383
]
397384

398385
[targets.xnnpack_schema]
@@ -427,7 +414,6 @@ deps = [
427414
"executorch",
428415
"executorch_core",
429416
"optimized_kernels",
430-
"extension_parallel",
431417
"extension_threadpool",
432418
"reduce_util",
433419
"xnnpack_backend",
@@ -465,7 +451,7 @@ deps = [
465451
"executorch_core",
466452
"extension_data_loader",
467453
"extension_module",
468-
"extension_parallel",
454+
"extension_threadpool",
469455
"portable_kernels",
470456
"quantized_kernels",
471457
"xnnpack_backend",

build/executorch-config.cmake

+1-7
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ set(lib_list
7575
custom_ops
7676
extension_module
7777
extension_module_static
78-
extension_parallel
7978
extension_runner_util
8079
extension_tensor
8180
extension_threadpool
@@ -131,14 +130,9 @@ endforeach()
131130

132131
# TODO: investigate use of install(EXPORT) to cleanly handle
133132
# target_compile_options/target_compile_definitions for everything.
134-
if(TARGET extension_parallel)
135-
set_target_properties(
136-
extension_parallel PROPERTIES INTERFACE_LINK_LIBRARIES extension_threadpool
137-
)
138-
endif()
139133
if(TARGET cpublas)
140134
set_target_properties(
141-
cpublas PROPERTIES INTERFACE_LINK_LIBRARIES extension_parallel
135+
cpublas PROPERTIES INTERFACE_LINK_LIBRARIES extension_threadpool
142136
)
143137
endif()
144138
if(TARGET extension_threadpool)

extension/llm/custom_ops/op_sdpa.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
#include <vector>
2020

2121
#ifdef ET_USE_THREADPOOL
22-
#include <executorch/extension/parallel/thread_parallel.h>
2322
#include <executorch/extension/threadpool/threadpool.h>
23+
#include <executorch/runtime/kernel/thread_parallel_interface.h>
2424
#endif
2525
#include <executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h>
2626

extension/llm/custom_ops/targets.bzl

-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def define_common_targets():
3737
"//executorch/kernels/optimized:libblas{}".format(mkl_dep),
3838
"//executorch/kernels/optimized:libvec",
3939
"//executorch/extension/kernel_util:kernel_util",
40-
"//executorch/extension/parallel:thread_parallel",
4140
"//executorch/extension/threadpool:threadpool",
4241
],
4342
deps = [

extension/parallel/CMakeLists.txt

-25
This file was deleted.

extension/parallel/TARGETS

-8
This file was deleted.

extension/parallel/targets.bzl

-26
This file was deleted.

extension/parallel/test/TARGETS

-8
This file was deleted.

extension/parallel/test/targets.bzl

-19
This file was deleted.

extension/parallel/thread_parallel.h

+4-43
Original file line numberDiff line numberDiff line change
@@ -8,46 +8,7 @@
88

99
#pragma once
1010

11-
#include <cstdint>
12-
#include <functional>
13-
14-
namespace executorch {
15-
namespace extension {
16-
17-
/**
18-
* A helper to run function in parallel.
19-
*
20-
* begin, end: describe the extent of the workitems via first and last workitem
21-
* to be processed
22-
* grain_size: number of workitems processed by user callback which is
23-
* described below
24-
* f: user function applied in parallel to the chunks, signature:
25-
* void f(int64_t begin, int64_t end)
26-
* Returns true if all work items are processed successfully, false otherwise
27-
*
28-
* Warning: parallel_for does NOT copy thread local states from the current
29-
* thread to the worker threads. Users need to protect the access to captured
30-
* data if they mutate them in f.
31-
*/
32-
bool parallel_for(
33-
const int64_t begin,
34-
const int64_t end,
35-
const int64_t grain_size,
36-
const std::function<void(int64_t, int64_t)>& f);
37-
38-
int64_t get_thread_num();
39-
40-
void set_thread_num(int64_t thread_num);
41-
42-
} // namespace extension
43-
} // namespace executorch
44-
45-
namespace torch {
46-
namespace executor {
47-
// TODO(T197294990): Remove these deprecated aliases once all users have moved
48-
// to the new `::executorch` namespaces.
49-
using ::executorch::extension::get_thread_num;
50-
using ::executorch::extension::parallel_for;
51-
using ::executorch::extension::set_thread_num;
52-
} // namespace executor
53-
} // namespace torch
11+
// This header is a stub left behind after the move to
12+
// executorch/runtime/kernel. As such, it is deprecated; include and
13+
// use the below header directly instead.
14+
#include <executorch/runtime/kernel/thread_parallel_interface.h>

extension/threadpool/CMakeLists.txt

+6-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ if(NOT CMAKE_CXX_STANDARD)
2121
endif()
2222

2323
add_library(
24-
extension_threadpool threadpool.cpp threadpool_guard.cpp cpuinfo_utils.cpp
24+
extension_threadpool threadpool.cpp threadpool_guard.cpp thread_parallel.cpp
25+
cpuinfo_utils.cpp
2526
)
2627
target_link_libraries(
2728
extension_threadpool PUBLIC executorch_core cpuinfo pthreadpool
@@ -42,3 +43,7 @@ install(
4243
INCLUDES
4344
DESTINATION ${_common_include_directories}
4445
)
46+
47+
if(BUILD_TESTING)
48+
add_subdirectory(test)
49+
endif()

extension/threadpool/targets.bzl

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ def define_common_targets():
99
"""
1010

1111
_THREADPOOL_SRCS = [
12+
"thread_parallel.cpp",
1213
"threadpool.cpp",
1314
"threadpool_guard.cpp",
1415
] + (["fb/threadpool_use_n_threads.cpp"] if not runtime.is_oss else [])
@@ -29,6 +30,8 @@ def define_common_targets():
2930
exported_deps = [
3031
third_party_dep("pthreadpool"),
3132
third_party_dep("cpuinfo"),
33+
# Allow users to use the header without an extra deps entry.
34+
"//executorch/runtime/kernel:thread_parallel_interface",
3235
],
3336
exported_preprocessor_flags = [
3437
"-DET_USE_THREADPOOL",

extension/parallel/test/CMakeLists.txt renamed to extension/threadpool/test/CMakeLists.txt

+4-16
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
# @generated by test/utils/generate_gtest_cmakelists.py
8+
#
79
# This file should be formatted with
810
# ~~~
911
# cmake-format -i CMakeLists.txt
@@ -12,28 +14,14 @@
1214
#
1315

1416
cmake_minimum_required(VERSION 3.19)
15-
project(extension_parallel_test)
16-
17-
# Use C++17 for test.
18-
set(CMAKE_CXX_STANDARD 17)
1917

2018
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
2119

2220
include(${EXECUTORCH_ROOT}/build/Test.cmake)
2321

24-
set(_test_srcs thread_parallel_test.cpp ../thread_parallel.cpp)
22+
set(_test_srcs thread_parallel_test.cpp threadpool_test.cpp)
2523

2624
et_cxx_test(
27-
extension_parallel_test
28-
SOURCES
29-
${_test_srcs}
30-
EXTRA_LIBS
31-
pthreadpool
32-
cpuinfo
25+
extension_threadpool_test SOURCES ${_test_srcs} EXTRA_LIBS
3326
extension_threadpool
3427
)
35-
target_include_directories(
36-
extension_parallel_test
37-
PRIVATE ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
38-
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
39-
)

extension/threadpool/test/targets.bzl

+12
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,15 @@ def define_common_targets():
1818
"//executorch/extension/threadpool:threadpool",
1919
],
2020
)
21+
22+
runtime.cxx_test(
23+
name = "thread_parallel_test",
24+
srcs = [
25+
"thread_parallel_test.cpp",
26+
],
27+
deps = [
28+
"//executorch/extension/threadpool:threadpool",
29+
"//executorch/runtime/kernel:thread_parallel_interface",
30+
"//executorch/runtime/platform:platform",
31+
],
32+
)

0 commit comments

Comments
 (0)