Skip to content

Commit e07b97c

Browse files
[AUTO] Check if actual device cached blob exists before model compilation (#29636)
### Details: - Enable AUTO to check if cache blob existed at the beginning of compilation. - Disable startup fallback and runtime fallback if any cache blob existed for actual device - ~~Add an new property for AUTO to indicate whether the core is querying the cache ability from virtual device.~~ ### Tickets: - CVS-164790 --------- Co-authored-by: Wanglei Shen <[email protected]>
1 parent dd01689 commit e07b97c

File tree

8 files changed

+133
-86
lines changed

8 files changed

+133
-86
lines changed

src/inference/src/dev/core_impl.cpp

+3-5
Original file line numberDiff line numberDiff line change
@@ -1456,11 +1456,9 @@ bool ov::CoreImpl::device_supports_internal_property(const ov::Plugin& plugin, c
14561456
}
14571457

14581458
bool ov::CoreImpl::device_supports_model_caching(const ov::Plugin& plugin, const ov::AnyMap& arguments) const {
1459-
ov::AnyMap properties;
1460-
if (arguments.count(ov::device::priorities.name())) {
1461-
properties[ov::device::priorities.name()] = arguments.at(ov::device::priorities.name()).as<std::string>();
1462-
}
1463-
return plugin.supports_model_caching(properties);
1459+
ov::AnyMap properties_to_virtual_dev = arguments.empty() ? ov::AnyMap{ov::device::priorities("")} : arguments;
1460+
return ov::is_virtual_device(plugin.get_name()) ? plugin.supports_model_caching(properties_to_virtual_dev)
1461+
: plugin.supports_model_caching();
14641462
}
14651463

14661464
bool ov::CoreImpl::device_supports_cache_dir(const ov::Plugin& plugin) const {

src/plugins/auto/src/auto_schedule.cpp

+14-38
Original file line numberDiff line numberDiff line change
@@ -135,49 +135,25 @@ void AutoSchedule::init() {
135135
auto customize_helper_context_from_cache_setting = [this](bool is_actual_cpu,
136136
AutoCompileContext m_compile_context[],
137137
ScheduleContext::Ptr& m_context) {
138+
m_compile_context[CPU].m_is_enabled = true;
138139
const auto cpu_iter = deviceChecker().check_and_return_if_device_in_list("CPU", m_context->m_device_priorities);
139-
if (cpu_iter == m_context->m_device_priorities.end()) {
140+
if (cpu_iter == m_context->m_device_priorities.end() || is_actual_cpu) {
140141
m_compile_context[CPU].m_is_enabled = false;
141142
return;
142143
}
143-
m_compile_context[CPU].m_is_enabled = true;
144-
if (!is_actual_cpu) {
145-
const auto& device = m_compile_context[ACTUALDEVICE].m_device_info.device_name;
146-
auto& device_config = m_compile_context[ACTUALDEVICE].m_device_info.config;
147-
std::string cache_dir = device_config.count(ov::cache_dir.name())
148-
? device_config[ov::cache_dir.name()].as<std::string>()
149-
: m_context->m_ov_core->get_property("", ov::cache_dir);
150-
151-
if (m_context->m_startup_fallback && !cache_dir.empty()) {
152-
const auto properties =
153-
m_context->m_ov_core->create_compile_config(ov::DeviceIDParser(device).get_device_name(),
154-
device_config);
155-
std::string blobId;
156-
if (m_context->m_model)
157-
blobId = ov::ModelCache::compute_hash(std::const_pointer_cast<const ov::Model>(m_context->m_model),
158-
properties);
159-
else
160-
blobId = ov::ModelCache::compute_hash(m_context->m_model_path, properties);
161-
std::string cached_model_path = ov::util::make_path(cache_dir, blobId + ".blob");
162-
m_compile_context[CPU].m_is_enabled = !ov::util::file_exists(cached_model_path);
163-
LOG_DEBUG_TAG("device: %s %s cached blob: %s ",
164-
device.c_str(),
165-
m_compile_context[CPU].m_is_enabled ? "not found" : "found",
166-
cached_model_path.c_str());
167-
}
168-
}
169-
if (m_compile_context[CPU].m_is_enabled) {
170-
m_compile_context[CPU].m_device_info = *cpu_iter;
171-
m_compile_context[CPU].m_device_info.config[ov::hint::performance_mode.name()] =
172-
ov::hint::PerformanceMode::LATENCY;
173-
if (m_compile_context[ACTUALDEVICE].m_device_info.config.count(ov::cache_dir.name()) &&
174-
(m_context->m_startup_fallback || m_context->m_runtime_fallback)) {
175-
m_compile_context[CPU].m_device_info.config[ov::cache_dir.name()] = "";
176-
LOG_INFO_TAG("Clear cache dir setting for CPU accelerator");
177-
}
178-
m_compile_context[CPU].m_worker_name = "CPU_HELP";
179-
LOG_INFO_TAG("will load CPU for accelerator");
144+
m_compile_context[CPU].m_device_info = *cpu_iter;
145+
m_compile_context[CPU].m_device_info.config[ov::hint::performance_mode.name()] =
146+
ov::hint::PerformanceMode::LATENCY;
147+
std::string cache_dir =
148+
m_compile_context[ACTUALDEVICE].m_device_info.config.count(ov::cache_dir.name())
149+
? m_compile_context[ACTUALDEVICE].m_device_info.config[ov::cache_dir.name()].as<std::string>()
150+
: m_context->m_ov_core->get_property("", ov::cache_dir);
151+
if (!cache_dir.empty() && (m_context->m_startup_fallback || m_context->m_runtime_fallback)) {
152+
m_compile_context[CPU].m_device_info.config[ov::cache_dir.name()] = "";
153+
LOG_INFO_TAG("Clear cache dir setting for CPU accelerator");
180154
}
155+
m_compile_context[CPU].m_worker_name = "CPU_HELP";
156+
LOG_INFO_TAG("will load CPU for accelerator");
181157
};
182158
if (m_compile_context[ACTUALDEVICE].m_is_enabled) {
183159
LOG_INFO_TAG("select device:%s", m_compile_context[ACTUALDEVICE].m_device_info.device_name.c_str());

src/plugins/auto/src/plugin.cpp

+79-19
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include "openvino/runtime/device_id_parser.hpp"
2020
#include "openvino/runtime/internal_properties.hpp"
2121
#include "openvino/runtime/iremote_context.hpp"
22+
#include "openvino/runtime/compilation_context.hpp"
23+
#include "openvino/util/file_util.hpp"
2224
#include "plugin.hpp"
2325
#include "auto_schedule.hpp"
2426
#include "auto_compiled_model.hpp"
@@ -78,7 +80,6 @@ namespace auto_plugin {
7880
std::shared_ptr<std::mutex> Plugin::m_mtx = std::make_shared<std::mutex>();
7981
std::shared_ptr<std::map<unsigned int, std::list<std::string>>> Plugin::m_priority_map =
8082
std::make_shared<std::map<unsigned int, std::list<std::string>>>();
81-
8283
ov::SoPtr<ov::IRemoteContext> Plugin::create_context(const ov::AnyMap& remote_properties) const {
8384
OPENVINO_NOT_IMPLEMENTED;
8485
}
@@ -304,14 +305,17 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
304305
} else if (name == ov::device::full_name) {
305306
return decltype(ov::device::full_name)::value_type {get_device_name()};
306307
} else if (name == ov::device::capabilities.name()) {
307-
std::vector<std::string> device_list = arguments.count(ov::device::priorities.name())
308-
? m_plugin_config.parse_priorities_devices(
309-
arguments.at(ov::device::priorities.name()).as<std::string>())
308+
if (arguments.count(ov::device::priorities.name())) {
309+
// By default, all devices are assumed to support caching ability when Core checks caching ability for AUTO.
310+
return std::vector<std::string>{ov::device::capability::EXPORT_IMPORT};
311+
}
312+
std::string priorities = m_plugin_config.get_property(ov::device::priorities.name()).as<std::string>();
313+
std::vector<std::string> device_list = !priorities.empty()
314+
? m_plugin_config.parse_priorities_devices(priorities)
310315
: get_core()->get_available_devices();
316+
311317
std::vector<std::string> capabilities;
312318
for (auto const& device : device_list) {
313-
if (device[0] == '-')
314-
continue;
315319
try {
316320
auto dev_capabilities = get_core()->get_property(device, ov::device::capabilities);
317321
capabilities.insert(capabilities.end(), dev_capabilities.begin(), dev_capabilities.end());
@@ -409,7 +413,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model_impl(const std::string
409413
// and set filter configure
410414
auto auto_s_context = std::make_shared<ScheduleContext>();
411415
ov::AnyMap filter_property;
412-
auto str_devices = get_device_list(full_property);
416+
auto str_devices = get_device_list(full_property, model, model_path);
417+
// in case startup or runtime fallback is set caused by cache blob checking, we need to set the property
418+
if (full_property.count(ov::intel_auto::enable_startup_fallback.name())) {
419+
load_config.set_property(ov::intel_auto::enable_startup_fallback(
420+
full_property.at(ov::intel_auto::enable_startup_fallback.name()).as<bool>()));
421+
}
422+
if (full_property.count(ov::intel_auto::enable_runtime_fallback.name())) {
423+
load_config.set_property(ov::intel_auto::enable_runtime_fallback(
424+
full_property.at(ov::intel_auto::enable_runtime_fallback.name()).as<bool>()));
425+
}
413426
// fill in the context for auto
414427
if (load_config.get_property(ov::enable_profiling)) {
415428
filter_property.insert({ov::enable_profiling(true)});
@@ -694,7 +707,9 @@ void Plugin::register_priority(const unsigned int& priority, const std::string&
694707
}
695708
}
696709

697-
std::string Plugin::get_device_list(const ov::AnyMap& properties) const {
710+
std::string Plugin::get_device_list(ov::AnyMap& properties,
711+
const std::shared_ptr<const ov::Model>& model,
712+
const std::string& model_path) const {
698713
std::string all_devices;
699714
std::string device_architecture;
700715
auto device_list_config = properties.find(ov::device::priorities.name());
@@ -708,10 +723,56 @@ std::string Plugin::get_device_list(const ov::AnyMap& properties) const {
708723
return "";
709724
};
710725
std::vector<std::string> devices_merged;
726+
bool enable_startup_cpu = properties.count(ov::intel_auto::enable_startup_fallback.name())
727+
? properties.at(ov::intel_auto::enable_startup_fallback.name()).as<bool>()
728+
: true;
729+
bool enable_runtime_cpu = properties.count(ov::intel_auto::enable_runtime_fallback.name())
730+
? properties.at(ov::intel_auto::enable_runtime_fallback.name()).as<bool>()
731+
: true;
732+
bool is_cumulative_tput =
733+
get_device_name() != "AUTO" ||
734+
(properties.count(ov::hint::performance_mode.name()) &&
735+
properties.at(ov::hint::performance_mode.name()).as<std::string>() == "CUMULATIVE_THROUGHPUT");
711736
if (device_list_config != properties.end() && !(device_list_config->second.as<std::string>().empty())) {
712737
auto priorities = device_list_config->second;
713738
// parsing the string and splitting the comma-separated tokens
714-
std::vector<std::string> devices_to_be_merged = m_plugin_config.parse_priorities_devices(priorities.as<std::string>());
739+
std::vector<std::string> devices_to_be_merged =
740+
m_plugin_config.parse_priorities_devices(priorities.as<std::string>());
741+
std::size_t num_blob_files = 0;
742+
std::string cache_dir = properties.count(ov::cache_dir.name())
743+
? properties.at(ov::cache_dir.name()).as<std::string>()
744+
: get_core()->get_property("", ov::cache_dir);
745+
bool if_need_cache_check =
746+
!is_cumulative_tput && enable_startup_cpu && (model || !model_path.empty()) && !cache_dir.empty();
747+
if (if_need_cache_check) {
748+
for (auto&& device : devices_to_be_merged) {
749+
ov::DeviceIDParser parsed{device};
750+
if (parsed.get_device_name().find("CPU") != std::string::npos)
751+
continue;
752+
// check if cached model exists for other devices
753+
auto dev_properties = get_core()->get_supported_property(parsed.get_device_name(), properties);
754+
dev_properties = get_core()->create_compile_config(parsed.get_device_name(), dev_properties);
755+
std::string blobId;
756+
757+
if (model)
758+
blobId = ov::ModelCache::compute_hash(std::const_pointer_cast<const ov::Model>(model),
759+
dev_properties);
760+
else
761+
blobId = ov::ModelCache::compute_hash(model_path, dev_properties);
762+
std::string cached_model_path = ov::util::make_path(cache_dir, blobId + ".blob");
763+
bool is_blob_file_exist = ov::util::file_exists(cached_model_path);
764+
num_blob_files += is_blob_file_exist;
765+
LOG_DEBUG_TAG("device: %s %s cached blob: %s ",
766+
device.c_str(),
767+
is_blob_file_exist ? "found" : "not found",
768+
cached_model_path.c_str());
769+
}
770+
771+
if (enable_startup_cpu && num_blob_files) {
772+
LOG_DEBUG_TAG("Disabling CPU fallback as a cached blob file was found for a device in the candidate "
773+
"list. AUTO will work as pass-through mode.");
774+
}
775+
}
715776
std::vector<std::string> devices_to_be_deleted(devices_to_be_merged.size());
716777
const auto& iterDel = std::copy_if(devices_to_be_merged.begin(),
717778
devices_to_be_merged.end(),
@@ -765,17 +826,16 @@ std::string Plugin::get_device_list(const ov::AnyMap& properties) const {
765826
std::vector<std::string> device_list = {};
766827
try {
767828
if (parsed.get_device_name().find("CPU") != std::string::npos) {
768-
bool enable_startup_cpu =
769-
properties.count(ov::intel_auto::enable_startup_fallback.name())
770-
? properties.at(ov::intel_auto::enable_startup_fallback.name()).as<bool>()
771-
: true;
772-
bool enable_runtime_cpu =
773-
properties.count(ov::intel_auto::enable_runtime_fallback.name())
774-
? properties.at(ov::intel_auto::enable_runtime_fallback.name()).as<bool>()
775-
: true;
776-
// Skip to load CPU device if both startup and runtime fallback are disabled
777-
if (!enable_startup_cpu && !enable_runtime_cpu)
829+
// Disable CPU if any blob files found
830+
if (num_blob_files) {
831+
properties[ov::intel_auto::enable_startup_fallback.name()] = false;
832+
properties[ov::intel_auto::enable_runtime_fallback.name()] = false;
833+
continue;
834+
}
835+
// Disable CPU if enable_startup_cpu and enable_runtime_cpu are both disabled
836+
if (!enable_startup_cpu && !enable_runtime_cpu) {
778837
continue;
838+
}
779839
}
780840
auto device_id_list = get_core()
781841
->get_property(parsed.get_device_name(), ov::available_devices.name(), {})

src/plugins/auto/src/plugin.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@ class Plugin : public ov::IPlugin {
4545
MOCKTESTMACRO std::vector<auto_plugin::DeviceInformation> parse_meta_devices(const std::string & devices_requests_cfg,
4646
const ov::AnyMap& properties) const;
4747

48-
MOCKTESTMACRO std::string get_device_list(const ov::AnyMap& properties) const;
48+
MOCKTESTMACRO std::string get_device_list(ov::AnyMap& properties,
49+
const std::shared_ptr<const ov::Model>& model = nullptr,
50+
const std::string& model_path = {}) const;
4951

5052
MOCKTESTMACRO std::list<DeviceInformation> get_valid_device(const std::vector<DeviceInformation>& meta_devices,
51-
const std::string& model_precision = "FP32") const;
53+
const std::string& model_precision = "FP32") const;
5254

5355
MOCKTESTMACRO DeviceInformation select_device(const std::vector<DeviceInformation>& meta_devices,
5456
const std::string& model_precision = "FP32",

src/plugins/auto/tests/functional/behavior/caching_test.cpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_acceler
4343
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
4444
}
4545
// No cached model for actual device
46-
// will cache model for both actual device and CPU plugin
47-
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
46+
// will only cache model only for actual device. CVS-141026
47+
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 1);
4848
ov::test::utils::removeFilesWithExt(cache_path, "blob");
4949
{
5050
auto compiled_model = core.compile_model(
@@ -70,8 +70,8 @@ TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_acceler
7070
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
7171
}
7272
// model hash id changed for actual device
73-
// will cache model for both actual device and CPU as accelerator
74-
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 3);
73+
// will cache model for actual device with new device id
74+
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
7575
core.set_property(ov::cache_dir(""));
7676
}
7777

@@ -89,8 +89,8 @@ TEST_F(AutoFuncTests, load_model_path_to_actual_device_and_disable_CPU_accelerat
8989
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
9090
}
9191
// No cached model for actual device
92-
// will cache model for both actual device and CPU plugin
93-
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
92+
// will only cache model only for actual device. CVS-141026
93+
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 1);
9494
ov::test::utils::removeFilesWithExt(cache_path, "blob");
9595
{
9696
auto compiled_model = core.compile_model(
@@ -116,8 +116,8 @@ TEST_F(AutoFuncTests, load_model_path_to_actual_device_and_disable_CPU_accelerat
116116
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
117117
}
118118
// model hash id changed for actual device
119-
// will cache model for both actual device and CPU as accelerator
120-
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 3);
119+
// will cache model for actual device with new device id
120+
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
121121
core.set_property(ov::cache_dir(""));
122122
ov::test::utils::removeIRFiles(m_xml_path, m_bin_path);
123123
}
@@ -132,8 +132,8 @@ TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_acceler
132132
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
133133
}
134134
// No cached model for actual device
135-
// will cache model for both actual device and CPU plugin
136-
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
135+
// will only cache model only for actual device. CVS-141026
136+
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 1);
137137
ov::test::utils::removeFilesWithExt(cache_path, "blob");
138138
{
139139
auto compiled_model = core.compile_model(
@@ -160,7 +160,7 @@ TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_acceler
160160
ov::intel_auto::enable_startup_fallback(false)});
161161
}
162162
// model hash id changed for actual device
163-
// will cache 2 models for actual device and no cached model for CPU
163+
// will cache model for actual device with new device id
164164
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
165165
core.set_property(ov::cache_dir(""));
166166
}

0 commit comments

Comments
 (0)