Skip to content

Commit 65917b2

Browse files
authored
[core] Add hint::compiled_blob property (#29354)
### Details: - Add `ov::hint::compiled_blob`, property with tensor hint which contains compiled model blob - Compiled blob hint can be regular or weightless model. - For weightless model the property `WEIGHTS_PATH` is hint where find the model's weights - If model found in cache then weight path will read from compiled options or from property `WEIGHTS_PATH` hint. - If model compile fail from blob hint the fallback path will be used (original model). ### Related PRs: - #29175 - #29304 - #29530 ### Tickets: - CVS-153070 --------- Signed-off-by: Raasz, Pawel <[email protected]> Signed-off-by: Pawel Raasz <[email protected]>
1 parent 6063d3f commit 65917b2

File tree

25 files changed

+936
-109
lines changed

25 files changed

+936
-109
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Copyright (C) 2018-2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
namespace ov::util {
7+
8+
template <class... Ts>
9+
struct VariantVisitor : Ts... {
10+
using Ts::operator()...;
11+
};
12+
13+
template <class... Ts>
14+
VariantVisitor(Ts...) -> VariantVisitor<Ts...>;
15+
} // namespace ov::util

src/inference/dev_api/openvino/runtime/internal_properties.hpp

-6
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> cac
3737
*/
3838
static constexpr Property<bool, PropertyMutability::RO> caching_with_mmap{"CACHING_WITH_MMAP"};
3939

40-
/**
41-
* @brief Property to get a ov::AlignedBuffer with cached model
42-
* @ingroup ov_dev_api_plugin_api
43-
*/
44-
static constexpr Property<std::shared_ptr<ov::AlignedBuffer>, PropertyMutability::RW> cached_model_buffer{"CACHED_MODEL_BUFFER"};
45-
4640
/**
4741
* @brief Allow to create exclusive_async_requests with one executor
4842
* @ingroup ov_dev_api_plugin_api

src/inference/include/openvino/runtime/properties.hpp

+8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "openvino/core/except.hpp"
2525
#include "openvino/core/type/element_type.hpp"
2626
#include "openvino/runtime/common.hpp"
27+
#include "openvino/runtime/tensor.hpp"
2728

2829
namespace ov {
2930

@@ -607,6 +608,13 @@ static constexpr Property<element::Type, PropertyMutability::RW> kv_cache_precis
607608
*/
608609
static constexpr Property<float, PropertyMutability::RW> activations_scale_factor{"ACTIVATIONS_SCALE_FACTOR"};
609610

611+
/** @brief Hint for device to use model compiled blob.
612+
* @ingroup ov_runtime_cpp_prop_api
613+
*
614+
* The property is used pass compiled blob as ov::Tensor.
615+
* The blob can be regular or weightless model. The `weights_path` property is hint where to look for weights.
616+
*/
617+
inline constexpr Property<Tensor, PropertyMutability::RW> compiled_blob{"COMPILED_BLOB"};
610618
} // namespace hint
611619

612620
/**

src/inference/src/cache_manager.hpp

+8-13
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <string>
1616

1717
#include "openvino/runtime/shared_buffer.hpp"
18+
#include "openvino/runtime/tensor.hpp"
1819
#include "openvino/util/file_util.hpp"
1920
#include "openvino/util/mmap_object.hpp"
2021

@@ -69,7 +70,7 @@ class ICacheManager {
6970
/**
7071
* @brief Function passing created input stream
7172
*/
72-
using StreamReader = std::function<void(std::istream&, std::shared_ptr<ov::AlignedBuffer>)>;
73+
using StreamReader = std::function<void(std::istream&, ov::Tensor&)>;
7374

7475
/**
7576
* @brief Callback when OpenVINO intends to read model from cache
@@ -135,19 +136,13 @@ class FileStorageCacheManager final : public ICacheManager {
135136
void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) override {
136137
// Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C".
137138
ScopedLocale plocal_C(LC_ALL, "C");
138-
auto blob_file_name = getBlobFile(id);
139+
const auto blob_file_name = getBlobFile(id);
139140
if (ov::util::file_exists(blob_file_name)) {
140-
if (enable_mmap) {
141-
auto mmap = ov::load_mmap_object(blob_file_name);
142-
auto shared_buffer =
143-
std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
144-
OwningSharedStreamBuffer buf(shared_buffer);
145-
std::istream stream(&buf);
146-
reader(stream, shared_buffer);
147-
} else {
148-
std::ifstream stream(blob_file_name, std::ios_base::binary);
149-
reader(stream, nullptr);
150-
}
141+
auto compiled_blob =
142+
read_tensor_data(blob_file_name, element::u8, PartialShape::dynamic(1), 0, enable_mmap);
143+
SharedStreamBuffer buf{reinterpret_cast<char*>(compiled_blob.data()), compiled_blob.get_byte_size()};
144+
std::istream stream(&buf);
145+
reader(stream, compiled_blob);
151146
}
152147
}
153148

src/inference/src/dev/core_impl.cpp

+94-13
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "core_impl.hpp"
66

77
#include <memory>
8+
#include <variant>
89

910
#include "check_network_batchable.hpp"
1011
#include "itt.hpp"
@@ -29,6 +30,7 @@
2930
#include "openvino/util/common_util.hpp"
3031
#include "openvino/util/file_util.hpp"
3132
#include "openvino/util/shared_object.hpp"
33+
#include "openvino/util/variant_visitor.hpp"
3234
#include "openvino/util/xml_parse_utils.hpp"
3335
#include "ov_plugins.hpp"
3436
#ifdef PROXY_PLUGIN_ENABLED
@@ -207,6 +209,64 @@ static const auto core_properties_names =
207209

208210
static const auto auto_batch_properties_names =
209211
ov::util::make_array(ov::auto_batch_timeout.name(), ov::hint::allow_auto_batching.name());
212+
213+
ov::util::Path extract_weight_path(const std::string& compiled_properties) {
214+
if (auto start = compiled_properties.find(ov::weights_path.name()); start != std::string::npos) {
215+
start += std::string_view{ov::weights_path.name()}.size() + 1;
216+
auto length = compiled_properties.find(",", start);
217+
if (length != std::string::npos) {
218+
length -= start;
219+
}
220+
return {compiled_properties.substr(start, length)};
221+
} else {
222+
return {};
223+
}
224+
}
225+
226+
using model_hint_t = std::variant<std::shared_ptr<const ov::Model>, std::string>;
227+
228+
ov::SoPtr<ov::ICompiledModel> import_compiled_model(const ov::Plugin& plugin,
229+
const ov::SoPtr<ov::IRemoteContext>& context,
230+
const ov::AnyMap& config) {
231+
ov::SoPtr<ov::ICompiledModel> compiled_model;
232+
if (auto blob_hint = config.find(ov::hint::compiled_blob.name()); blob_hint != config.end()) {
233+
try {
234+
auto compiled_blob = blob_hint->second.as<ov::Tensor>();
235+
ov::SharedStreamBuffer buffer{reinterpret_cast<char*>(compiled_blob.data()), compiled_blob.get_byte_size()};
236+
std::istream stream{&buffer};
237+
compiled_model =
238+
context ? plugin.import_model(stream, context, config) : plugin.import_model(stream, config);
239+
} catch (...) {
240+
}
241+
}
242+
return compiled_model;
243+
}
244+
245+
ov::SoPtr<ov::ICompiledModel> import_compiled_model(const ov::Plugin& plugin,
246+
const ov::SoPtr<ov::IRemoteContext>& context,
247+
const ov::AnyMap& config,
248+
const model_hint_t& model_hint) {
249+
auto cfg = config;
250+
const auto apply_model_hint = ov::util::VariantVisitor{
251+
[&cfg, &plugin](const std::shared_ptr<const ov::Model>& model_ptr) {
252+
if (model_ptr != nullptr &&
253+
ov::util::contains(plugin.get_property(ov::supported_properties), ov::hint::model)) {
254+
cfg[ov::hint::model.name()] = std::const_pointer_cast<ov::Model>(model_ptr);
255+
}
256+
},
257+
[&cfg, &plugin](const std::string& model_path) {
258+
if (cfg.count(ov::weights_path.name()) == 0 &&
259+
ov::util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) {
260+
ov::util::Path weights_path{model_path};
261+
weights_path.replace_extension(".bin");
262+
if (ov::util::file_exists(weights_path)) {
263+
cfg[ov::weights_path.name()] = weights_path.string();
264+
}
265+
}
266+
}};
267+
std::visit(apply_model_hint, model_hint);
268+
return import_compiled_model(plugin, context, cfg);
269+
}
210270
} // namespace
211271

212272
bool ov::is_config_applicable(const std::string& user_device_name, const std::string& subprop_device_name) {
@@ -770,11 +830,13 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
770830

771831
auto parsed = parseDeviceNameIntoConfig(deviceName, coreConfig, config_with_batch, is_proxy_device(deviceName));
772832
auto plugin = get_plugin(parsed._deviceName);
773-
ov::SoPtr<ov::ICompiledModel> res;
774833
// will consume ov::cache_dir if plugin not support it
775834
auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
835+
auto res = import_compiled_model(plugin, {}, parsed._config, model);
776836
// Skip caching for proxy plugin. HW plugin will load network from the cache
777-
if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
837+
if (res) {
838+
// hint::compiled_blob is set and imported skip compilation
839+
} else if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
778840
CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()};
779841
cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config));
780842
cacheContent.model = std::const_pointer_cast<ov::Model>(model);
@@ -805,11 +867,13 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
805867

806868
auto parsed = parseDeviceNameIntoConfig(deviceName, coreConfig, config_with_batch, is_proxy_device(deviceName));
807869
auto plugin = get_plugin(parsed._deviceName);
808-
ov::SoPtr<ov::ICompiledModel> res;
809870
// will consume ov::cache_dir if plugin not support it
810871
auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
872+
auto res = import_compiled_model(plugin, context, parsed._config, model);
811873
// Skip caching for proxy plugin. HW plugin will load network from the cache
812-
if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
874+
if (res) {
875+
// hint::compiled_blob is set and imported skip compilation
876+
} else if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
813877
CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()};
814878
cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config));
815879
std::unique_ptr<CacheGuardEntry> lock = cacheGuard.get_hash_lock(cacheContent.blobId);
@@ -830,11 +894,13 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
830894
auto parsed = parse_device_config(device_name, coreConfig, config, false);
831895
// in case of compile_model(file_name), we need to clear-up core-level properties
832896
auto plugin = get_plugin(parsed._deviceName);
833-
ov::SoPtr<ov::ICompiledModel> compiled_model;
834897
// will consume ov::cache_dir if plugin not support it
835898
auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
899+
auto compiled_model = import_compiled_model(plugin, {}, parsed._config, model_path);
836900

837-
if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
901+
if (compiled_model) {
902+
// hint::compiled_blob is set and imported skip compilation
903+
} else if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
838904
// Skip caching for proxy plugin. HW plugin will load network from the cache
839905
CoreConfig::remove_core_skip_cache_dir(parsed._config);
840906
CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap(), model_path};
@@ -858,11 +924,13 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
858924
OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::compile_model::from_memory");
859925
auto parsed = parseDeviceNameIntoConfig(device_name, coreConfig, config);
860926
auto plugin = get_plugin(parsed._deviceName);
861-
ov::SoPtr<ov::ICompiledModel> compiled_model;
862927
// will consume ov::cache_dir if plugin not support it
863928
auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
929+
auto compiled_model = import_compiled_model(plugin, {}, parsed._config);
864930
// Skip caching for proxy plugin. HW plugin will load network from the cache
865-
if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
931+
if (compiled_model) {
932+
// hint::compiled_blob is set and imported skip compilation
933+
} else if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
866934
CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()};
867935
cacheContent.blobId =
868936
ov::ModelCache::compute_hash(model_str, weights, create_compile_config(plugin, parsed._config));
@@ -1443,12 +1511,12 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
14431511
cacheContent.blobId,
14441512
cacheContent.mmap_enabled && ov::util::contains(plugin.get_property(ov::internal::supported_properties),
14451513
ov::internal::caching_with_mmap),
1446-
[&](std::istream& networkStream, std::shared_ptr<ov::AlignedBuffer> model_buffer) {
1514+
[&](std::istream& networkStream, ov::Tensor& compiled_blob) {
14471515
OV_ITT_SCOPE(FIRST_INFERENCE,
14481516
ov::itt::domains::LoadTime,
14491517
"Core::load_model_from_cache::ReadStreamAndImport");
1518+
ov::CompiledBlobHeader header;
14501519
try {
1451-
ov::CompiledBlobHeader header;
14521520
networkStream >> header;
14531521
if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
14541522
// Original file is changed, don't use cache
@@ -1477,21 +1545,34 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
14771545

14781546
ov::AnyMap update_config = config;
14791547
update_config[ov::loaded_from_cache.name()] = true;
1548+
if (cacheContent.model &&
1549+
util::contains(plugin.get_property(ov::supported_properties), ov::hint::model)) {
1550+
update_config[ov::hint::model.name()] = cacheContent.model;
1551+
}
14801552

14811553
if (util::contains(plugin.get_property(ov::supported_properties), ov::hint::model) &&
14821554
cacheContent.model) {
14831555
update_config[ov::hint::model.name()] = cacheContent.model;
14841556
}
14851557
if (util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) {
1486-
std::filesystem::path weights_path = cacheContent.modelPath;
1558+
util::Path weights_path;
1559+
1560+
if (auto&& path_hint = update_config.find(ov::weights_path.name());
1561+
path_hint != update_config.end()) {
1562+
weights_path = path_hint->second.as<std::string>();
1563+
} else if (weights_path = extract_weight_path(header.get_runtime_info()); weights_path.empty()) {
1564+
weights_path = cacheContent.modelPath;
1565+
weights_path.replace_extension(".bin");
1566+
}
14871567
weights_path.replace_extension(".bin");
14881568

14891569
if (ov::util::file_exists(weights_path)) {
14901570
update_config[ov::weights_path.name()] = weights_path.string();
14911571
}
14921572
}
1493-
if (model_buffer) {
1494-
update_config[ov::internal::cached_model_buffer.name()] = model_buffer;
1573+
1574+
if (compiled_blob) {
1575+
update_config[ov::hint::compiled_blob.name()] = compiled_blob;
14951576
}
14961577
compiled_model = context ? plugin.import_model(networkStream, context, update_config)
14971578
: plugin.import_model(networkStream, update_config);

0 commit comments

Comments
 (0)