Skip to content

Extract parallel_for_each_reduce_over_dim_output_index from argmin parallelization PoC #9139

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 118 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from 102 commits
Commits
Show all changes
118 commits
Select commit Hold shift + click to select a range
d0b11e8
Update
swolchok Mar 4, 2025
9437be1
Update
swolchok Mar 4, 2025
643e10e
Update
swolchok Mar 4, 2025
6f2842b
Update
swolchok Mar 4, 2025
e47dfeb
Update
swolchok Mar 4, 2025
231ebc3
Update
swolchok Mar 5, 2025
296513c
Update
swolchok Mar 5, 2025
845a01e
Update
swolchok Mar 5, 2025
a92958a
Update
swolchok Mar 5, 2025
3fa99d6
Update
swolchok Mar 5, 2025
a6c69a6
Update
swolchok Mar 5, 2025
3bd6437
Update
swolchok Mar 5, 2025
675f01b
Update
swolchok Mar 5, 2025
5f3a768
Update
swolchok Mar 5, 2025
9fdebee
Update
swolchok Mar 5, 2025
70a7096
Update
swolchok Mar 5, 2025
337dc23
Update
swolchok Mar 5, 2025
f388177
Update
swolchok Mar 5, 2025
2949daf
Update
swolchok Mar 5, 2025
7347915
Update
swolchok Mar 5, 2025
1a8481d
Update
swolchok Mar 5, 2025
e48e816
Update
swolchok Mar 5, 2025
3351d50
Update
swolchok Mar 6, 2025
0102e25
Update
swolchok Mar 6, 2025
956f8a5
Update
swolchok Mar 6, 2025
9f7f0c1
Update
swolchok Mar 6, 2025
a1aeae7
Update
swolchok Mar 6, 2025
c658163
Update
swolchok Mar 6, 2025
7e0ccd4
Update
swolchok Mar 6, 2025
d9cd27c
Update
swolchok Mar 6, 2025
095d4f5
Update
swolchok Mar 6, 2025
e537a59
Update
swolchok Mar 6, 2025
c130224
Update
swolchok Mar 6, 2025
49c2971
Update
swolchok Mar 6, 2025
cd37460
Update
swolchok Mar 6, 2025
754a4f6
Update
swolchok Mar 6, 2025
11c5707
Update
swolchok Mar 6, 2025
7ca7627
Update
swolchok Mar 6, 2025
d428ca2
Update
swolchok Mar 6, 2025
62b6ef2
Update
swolchok Mar 6, 2025
b92ea35
Update
swolchok Mar 6, 2025
b478275
Update
swolchok Mar 7, 2025
0470870
Update
swolchok Mar 7, 2025
5a283c8
Update
swolchok Mar 7, 2025
a8a0e57
Update
swolchok Mar 7, 2025
df93cd4
Update
swolchok Mar 7, 2025
6350e07
Update
swolchok Mar 7, 2025
bd20770
Update
swolchok Mar 7, 2025
e7190a8
Update
swolchok Mar 7, 2025
4dd58a0
Update
swolchok Mar 7, 2025
1b6eb9f
Update
swolchok Mar 7, 2025
450e50b
Update
swolchok Mar 7, 2025
4459a7e
Update
swolchok Mar 7, 2025
fad4ed8
Update
swolchok Mar 7, 2025
085b624
Update
swolchok Mar 7, 2025
c7219a3
Update
swolchok Mar 7, 2025
e4af3bb
Update
swolchok Mar 8, 2025
9c9e31e
Update
swolchok Mar 8, 2025
34423ae
Update
swolchok Mar 8, 2025
379c10e
Update
swolchok Mar 8, 2025
fb5e06c
Update
swolchok Mar 8, 2025
4a0e893
Update
swolchok Mar 8, 2025
40a1bce
Update
swolchok Mar 8, 2025
37e4213
Update
swolchok Mar 8, 2025
ef1a0ce
Update
swolchok Mar 10, 2025
6844013
Update
swolchok Mar 10, 2025
e417a3b
Update
swolchok Mar 10, 2025
adaae97
Update
swolchok Mar 10, 2025
ea335ee
Update
swolchok Mar 10, 2025
2cc4910
Update
swolchok Mar 10, 2025
d7cdfa7
Update
swolchok Mar 10, 2025
98d6d01
Update
swolchok Mar 10, 2025
4a7ba26
Update
swolchok Mar 11, 2025
c0d1daa
Update
swolchok Mar 11, 2025
f408201
Update
swolchok Mar 11, 2025
e2fb689
Update
swolchok Mar 11, 2025
994c5f5
Update
swolchok Mar 11, 2025
b76240d
Update
swolchok Mar 11, 2025
18d5dde
Update
swolchok Mar 11, 2025
8cfdfa6
Update
swolchok Mar 11, 2025
4917358
Update
swolchok Mar 11, 2025
4a43b35
Update
swolchok Mar 11, 2025
3fe478d
Update
swolchok Mar 11, 2025
21d8aac
Update
swolchok Mar 11, 2025
2272c40
Update
swolchok Mar 11, 2025
f5bac6a
Update
swolchok Mar 11, 2025
c44fda6
Update
swolchok Mar 11, 2025
fa9ef9c
Update
swolchok Mar 11, 2025
73f37ee
Update
swolchok Mar 11, 2025
a8dd330
Update
swolchok Mar 11, 2025
0088cd2
Update
swolchok Mar 11, 2025
6b296df
Update
swolchok Mar 11, 2025
854c967
Update
swolchok Mar 11, 2025
03f00ee
Update
swolchok Mar 11, 2025
87085af
Update
swolchok Mar 11, 2025
2ee8846
Update
swolchok Mar 11, 2025
4779960
Update
swolchok Mar 11, 2025
e6be3fe
Update
swolchok Mar 11, 2025
930b2fd
Update
swolchok Mar 11, 2025
c66f533
Update
swolchok Mar 11, 2025
e6d6ad6
Update
swolchok Mar 11, 2025
c0f0bec
Update
swolchok Mar 11, 2025
5781018
Update
swolchok Mar 11, 2025
caac9df
Update
swolchok Mar 11, 2025
66387af
Update
swolchok Mar 11, 2025
051f69c
Update
swolchok Mar 11, 2025
d8f4b13
Update
swolchok Mar 11, 2025
ecfabce
Update
swolchok Mar 11, 2025
a792450
Update
swolchok Mar 11, 2025
cfbd318
Update
swolchok Mar 12, 2025
de50f9b
Update
swolchok Mar 12, 2025
d68ef30
Update
swolchok Mar 12, 2025
4ff9658
Update
swolchok Mar 12, 2025
98e3147
Update
swolchok Mar 12, 2025
cb6aa4d
Update
swolchok Mar 12, 2025
eabae64
Update
swolchok Mar 12, 2025
046f5b6
Update
swolchok Mar 12, 2025
fc9caab
Update
swolchok Mar 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ exclude_patterns = [
'examples/**',
'extension/**',
'kernels/optimized/**',
# Justified <functional> include.
'runtime/kernel/thread_parallel_interface.h',
'scripts/**',
'third-party/**',
'util/**',
Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
AND EXECUTORCH_BUILD_CPUINFO
)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
endif()

if(EXECUTORCH_BUILD_PYBIND)
Expand Down
1 change: 0 additions & 1 deletion Test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ if(BUILD_TESTING)
add_subdirectory(extension/evalue_util/test)
add_subdirectory(extension/kernel_util/test)
add_subdirectory(extension/memory_allocator/test)
add_subdirectory(extension/parallel/test)
add_subdirectory(extension/pytree/test)
add_subdirectory(kernels/portable/cpu/util/test)
add_subdirectory(kernels/prim_ops/test)
Expand Down
26 changes: 8 additions & 18 deletions build/cmake_deps.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ excludes = [
deps = [
"executorch",
"executorch_core",
"extension_parallel",
"extension_threadpool",
"optimized_cpublas",
"portable_kernels",
]

Expand Down Expand Up @@ -131,7 +131,7 @@ excludes = [
deps = [
"executorch_core",
"executorch",
"extension_parallel",
"extension_threadpool",
]

[targets.optimized_native_cpu_ops]
Expand All @@ -146,8 +146,8 @@ excludes = [
deps = [
"executorch_core",
"executorch",
"extension_parallel",
"extension_threadpool",
"optimized_cpublas",
"portable_kernels",
]
# ---------------------------------- core end ----------------------------------
Expand Down Expand Up @@ -227,19 +227,6 @@ deps = [
"extension_runner_util",
]

[targets.extension_parallel]
buck_targets = [
"//extension/parallel:thread_parallel",
]
filters = [
".cpp$",
]
deps = [
"executorch",
"executorch_core",
"extension_threadpool",
]

[targets.extension_tensor]
buck_targets = [
"//extension/tensor:tensor",
Expand Down Expand Up @@ -379,6 +366,7 @@ excludes = [
deps = [
"executorch",
"executorch_core",
"extension_threadpool",
"xnnpack_backend",
"portable_kernels",
]
Expand All @@ -393,6 +381,7 @@ filters = [
deps = [
"executorch",
"executorch_core",
"extension_threadpool",
]

[targets.xnnpack_schema]
Expand Down Expand Up @@ -426,8 +415,8 @@ excludes = [
deps = [
"executorch",
"executorch_core",
"optimized_cpublas",
"optimized_kernels",
"extension_parallel",
"extension_threadpool",
"reduce_util",
"xnnpack_backend",
Expand Down Expand Up @@ -465,7 +454,8 @@ deps = [
"executorch_core",
"extension_data_loader",
"extension_module",
"extension_parallel",
"extension_threadpool",
"optimized_cpublas",
"portable_kernels",
"quantized_kernels",
"xnnpack_backend",
Expand Down
24 changes: 19 additions & 5 deletions build/executorch-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ set(lib_list
custom_ops
extension_module
extension_module_static
extension_parallel
extension_runner_util
extension_tensor
extension_threadpool
Expand All @@ -90,6 +89,7 @@ set(lib_list
pthreadpool
vulkan_backend
optimized_kernels
optimized_portable_kernels
cpublas
eigen_blas
optimized_ops_lib
Expand Down Expand Up @@ -131,14 +131,28 @@ endforeach()

# TODO: investigate use of install(EXPORT) to cleanly handle
# target_compile_options/target_compile_definitions for everything.
if(TARGET extension_parallel)
if(TARGET cpublas)
set_target_properties(
extension_parallel PROPERTIES INTERFACE_LINK_LIBRARIES extension_threadpool
cpublas PROPERTIES INTERFACE_LINK_LIBRARIES
"extension_threadpool;eigen_blas"
)
endif()
if(TARGET cpublas)
if(TARGET optimized_kernels)
set_target_properties(
optimized_kernels PROPERTIES INTERFACE_LINK_LIBRARIES
"executorch_core;cpublas;extension_threadpool"
)
endif()
if(TARGET optimized_native_cpu_ops_lib)
if(TARGET optimized_portable_kernels)
set(_maybe_optimized_portable_kernels_lib optimized_portable_kernels)
else()
set(_maybe_optimized_portable_kernels_lib portable_kernels)
endif()
set_target_properties(
cpublas PROPERTIES INTERFACE_LINK_LIBRARIES extension_parallel
optimized_native_cpu_ops_lib
PROPERTIES INTERFACE_LINK_LIBRARIES
"optimized_kernels;${_maybe_optimized_portable_kernels_lib}"
)
endif()
if(TARGET extension_threadpool)
Expand Down
7 changes: 6 additions & 1 deletion configurations/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,17 @@ if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
message("Generated files ${gen_command_sources}")

# optimized_native_cpu_ops_lib: Register optimized op kernels into the runtime
if(TARGET optimized_portable_kernels)
set(_optimized_native_cpu_ops_lib_portable_kernels_lib optimized_portable_kernels)
else()
set(_optimized_native_cpu_ops_lib_portable_kernels_lib portable_kernels)
endif()
gen_operators_lib(
LIB_NAME
"optimized_native_cpu_ops_lib"
KERNEL_LIBS
portable_kernels
optimized_kernels
${_optimized_native_cpu_ops_lib_portable_kernels_lib}
DEPS
executorch
)
Expand Down
4 changes: 0 additions & 4 deletions extension/android/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ if(TARGET optimized_native_cpu_ops_lib)
APPEND
link_libraries
optimized_native_cpu_ops_lib
optimized_kernels
portable_kernels
cpublas
eigen_blas
)
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
else()
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/custom_ops/op_sdpa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#include <vector>

#ifdef ET_USE_THREADPOOL
#include <executorch/extension/parallel/thread_parallel.h>
#include <executorch/extension/threadpool/threadpool.h>
#include <executorch/runtime/kernel/thread_parallel_interface.h>
#endif
#include <executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h>

Expand Down
1 change: 0 additions & 1 deletion extension/llm/custom_ops/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def define_common_targets():
"//executorch/kernels/optimized:libblas{}".format(mkl_dep),
"//executorch/kernels/optimized:libvec",
"//executorch/extension/kernel_util:kernel_util",
"//executorch/extension/parallel:thread_parallel",
"//executorch/extension/threadpool:threadpool",
],
deps = [
Expand Down
25 changes: 0 additions & 25 deletions extension/parallel/CMakeLists.txt

This file was deleted.

8 changes: 0 additions & 8 deletions extension/parallel/TARGETS

This file was deleted.

26 changes: 0 additions & 26 deletions extension/parallel/targets.bzl

This file was deleted.

8 changes: 0 additions & 8 deletions extension/parallel/test/TARGETS

This file was deleted.

19 changes: 0 additions & 19 deletions extension/parallel/test/targets.bzl

This file was deleted.

47 changes: 4 additions & 43 deletions extension/parallel/thread_parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,46 +8,7 @@

#pragma once

#include <cstdint>
#include <functional>

namespace executorch {
namespace extension {

/**
* A helper to run function in parallel.
*
* begin, end: describe the extent of the workitems via first and last workitem
* to be processed
* grain_size: number of workitems processed by user callback which is
* described below
* f: user function applied in parallel to the chunks, signature:
* void f(int64_t begin, int64_t end)
* Returns true if all work items are processed successfully, false otherwise
*
* Warning: parallel_for does NOT copy thread local states from the current
* thread to the worker threads. Users need to protect the access to captured
* data if they mutate them in f.
*/
bool parallel_for(
const int64_t begin,
const int64_t end,
const int64_t grain_size,
const std::function<void(int64_t, int64_t)>& f);

int64_t get_thread_num();

void set_thread_num(int64_t thread_num);

} // namespace extension
} // namespace executorch

namespace torch {
namespace executor {
// TODO(T197294990): Remove these deprecated aliases once all users have moved
// to the new `::executorch` namespaces.
using ::executorch::extension::get_thread_num;
using ::executorch::extension::parallel_for;
using ::executorch::extension::set_thread_num;
} // namespace executor
} // namespace torch
// This header is a stub left behind after the move to
// executorch/runtime/kernel. As such, it is deprecated; include and
// use the below header directly instead.
#include <executorch/runtime/kernel/thread_parallel_interface.h>
7 changes: 6 additions & 1 deletion extension/threadpool/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ if(NOT CMAKE_CXX_STANDARD)
endif()

add_library(
extension_threadpool threadpool.cpp threadpool_guard.cpp cpuinfo_utils.cpp
extension_threadpool threadpool.cpp threadpool_guard.cpp thread_parallel.cpp
cpuinfo_utils.cpp
)
target_link_libraries(
extension_threadpool PUBLIC executorch_core cpuinfo pthreadpool
Expand All @@ -42,3 +43,7 @@ install(
INCLUDES
DESTINATION ${_common_include_directories}
)

if(BUILD_TESTING)
add_subdirectory(test)
endif()
3 changes: 3 additions & 0 deletions extension/threadpool/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def define_common_targets():
"""

_THREADPOOL_SRCS = [
"thread_parallel.cpp",
"threadpool.cpp",
"threadpool_guard.cpp",
] + (["fb/threadpool_use_n_threads.cpp"] if not runtime.is_oss else [])
Expand All @@ -29,6 +30,8 @@ def define_common_targets():
exported_deps = [
third_party_dep("pthreadpool"),
third_party_dep("cpuinfo"),
# Allow users to use the header without an extra deps entry.
"//executorch/runtime/kernel:thread_parallel_interface",
],
exported_preprocessor_flags = [
"-DET_USE_THREADPOOL",
Expand Down
Loading
Loading