Skip to content

Commit dc29c76

Browse files
Reworking filter provider (#5148)
There are a few things here: * The filters should work on `DataIdentifierBorrowed` instead of `DataRequest`; metadata should not be relevant for filtering * This implies a rename from `RequestFilterDataProvider` to `FilterDataProvider` * previously the provider implementations returned `DataErrorKind::Filtered` for filtered ids, however this does not work with fallback. They should return `DataErrorKind::IdentiferNotFound`; to a caller (like the fallback adapter) it should be opaque whether the pipeline contains a filter or not * `filter_by_langid_allowlist_strict` is marked as "will be replaced with a smarter algorithm for locale filtering". This smarter algorithm now exists, so this should be deleted (fixes #834) * The `Filterable` blanket trait adds `.filterable` to every type in this crate's rustdoc, and to every type in dev docs. I replaced it with a normal constructor.
1 parent b021268 commit dc29c76

File tree

12 files changed

+105
-298
lines changed

12 files changed

+105
-298
lines changed

ffi/capi/bindings/c/ICU4XDataError.d.h

Lines changed: 5 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/bindings/cpp/ICU4XDataError.d.hpp

Lines changed: 10 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/bindings/cpp/ICU4XDataError.hpp

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/bindings/dart/DataError.g.dart

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/bindings/js/ICU4XDataError.d.ts

Lines changed: 0 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/bindings/js/ICU4XDataError.mjs

Lines changed: 10 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/src/errors.rs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@ pub mod ffi {
1919
MarkerNotFound = 0x01,
2020
IdentifierNotFound = 0x02,
2121
InvalidRequest = 0x03,
22-
FilteredResource = 0x04,
23-
InconsistentData = 0x05,
24-
Downcast = 0x06,
25-
Deserialize = 0x07,
26-
Custom = 0x08,
27-
Io = 0x09,
22+
InconsistentData = 0x04,
23+
Downcast = 0x05,
24+
Deserialize = 0x06,
25+
Custom = 0x07,
26+
Io = 0x08,
2827
}
2928

3029
#[derive(Debug, PartialEq, Eq)]
@@ -133,7 +132,6 @@ impl From<DataError> for ICU4XError {
133132
DataErrorKind::MarkerNotFound => ICU4XError::DataMissingDataMarkerError,
134133
DataErrorKind::IdentifierNotFound => ICU4XError::DataMissingLocaleError,
135134
DataErrorKind::InvalidRequest => ICU4XError::DataExtraneousLocaleError,
136-
DataErrorKind::Filtered => ICU4XError::DataFilteredResourceError,
137135
DataErrorKind::Downcast(..) => ICU4XError::DataMismatchedTypeError,
138136
DataErrorKind::Custom => ICU4XError::DataCustomError,
139137
#[cfg(all(
@@ -152,7 +150,6 @@ impl From<DataError> for ICU4XDataError {
152150
DataErrorKind::MarkerNotFound => Self::MarkerNotFound,
153151
DataErrorKind::IdentifierNotFound => Self::IdentifierNotFound,
154152
DataErrorKind::InvalidRequest => Self::InvalidRequest,
155-
DataErrorKind::Filtered => Self::FilteredResource,
156153
DataErrorKind::InconsistentData(..) => Self::InconsistentData,
157154
DataErrorKind::Downcast(..) => Self::Downcast,
158155
DataErrorKind::Deserialize => Self::Deserialize,

provider/adapters/src/filter/impls.rs

Lines changed: 26 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -6,33 +6,37 @@ use super::*;
66
use alloc::boxed::Box;
77
use icu_provider::prelude::*;
88

9-
use icu_locale::LanguageIdentifier;
10-
11-
type RequestFilterDataProviderOutput<'a, D> =
12-
RequestFilterDataProvider<D, Box<dyn Fn(DataRequest) -> bool + Sync + 'a>>;
9+
impl<D> FilterDataProvider<D, fn(DataIdentifierBorrowed) -> bool> {
10+
/// Creates a [`FilterDataProvider`] that does not do any filtering.
11+
///
12+
/// Filters can be added using [`Self::with_filter`].
13+
pub fn new(provider: D, filter_name: &'static str) -> Self {
14+
Self {
15+
inner: provider,
16+
predicate: |_| true,
17+
filter_name,
18+
}
19+
}
20+
}
1321

14-
impl<D, F> RequestFilterDataProvider<D, F>
22+
impl<D, F> FilterDataProvider<D, F>
1523
where
16-
F: Fn(DataRequest) -> bool + Sync,
24+
F: Fn(DataIdentifierBorrowed) -> bool + Sync,
1725
{
1826
/// Filter out data requests with certain langids according to the predicate function. The
1927
/// predicate should return `true` to allow a langid and `false` to reject a langid.
2028
///
21-
/// Data requests with no langid will be allowed. To reject data requests without a langid,
22-
/// chain this with [`Self::require_langid`].
23-
///
2429
/// # Examples
2530
///
2631
/// ```
2732
/// use icu_locale::LanguageIdentifier;
2833
/// use icu_locale::{langid, subtags::language};
2934
/// use icu_provider::hello_world::*;
3035
/// use icu_provider::prelude::*;
31-
/// use icu_provider_adapters::filter::Filterable;
36+
/// use icu_provider_adapters::filter::FilterDataProvider;
3237
///
33-
/// let provider = HelloWorldProvider
34-
/// .filterable("Demo no-English filter")
35-
/// .filter_by_langid(|langid| langid.language != language!("en"));
38+
/// let provider = FilterDataProvider::new(HelloWorldProvider, "Demo no-English filter")
39+
/// .with_filter(|id| id.locale.language() != language!("en"));
3640
///
3741
/// // German requests should succeed:
3842
/// let de = DataIdentifierCow::from_locale(langid!("de").into());
@@ -58,7 +62,7 @@ where
5862
/// assert!(matches!(
5963
/// response,
6064
/// Err(DataError {
61-
/// kind: DataErrorKind::Filtered,
65+
/// kind: DataErrorKind::IdentifierNotFound,
6266
/// ..
6367
/// })
6468
/// ));
@@ -70,147 +74,22 @@ where
7074
/// assert!(available_ids.contains(&DataIdentifierCow::from_locale(langid!("de").into())));
7175
/// assert!(!available_ids.contains(&DataIdentifierCow::from_locale(langid!("en").into())));
7276
/// ```
73-
pub fn filter_by_langid<'a>(
77+
#[allow(clippy::type_complexity)]
78+
pub fn with_filter<'a>(
7479
self,
75-
predicate: impl Fn(&LanguageIdentifier) -> bool + Sync + 'a,
76-
) -> RequestFilterDataProviderOutput<'a, D>
77-
where
78-
F: 'a,
79-
{
80-
let old_predicate = self.predicate;
81-
RequestFilterDataProvider {
82-
inner: self.inner,
83-
predicate: Box::new(move |request| -> bool {
84-
if !(old_predicate)(request) {
85-
return false;
86-
}
87-
predicate(&request.id.locale.get_langid())
88-
}),
89-
filter_name: self.filter_name,
90-
}
91-
}
92-
93-
/// Filter out data request except those having a language identifier that exactly matches
94-
/// one in the allowlist.
95-
///
96-
/// This will be replaced with a smarter algorithm for locale filtering; see
97-
/// <https://github.com/unicode-org/icu4x/issues/834>
98-
///
99-
/// Data requests with no langid will be allowed. To reject data requests without a langid,
100-
/// chain this with [`Self::require_langid`].
101-
///
102-
/// # Examples
103-
///
104-
/// ```
105-
/// use icu_locale::langid;
106-
/// use icu_provider::hello_world::*;
107-
/// use icu_provider::prelude::*;
108-
/// use icu_provider_adapters::filter::Filterable;
109-
///
110-
/// let allowlist = [langid!("de"), langid!("zh")];
111-
/// let provider = HelloWorldProvider
112-
/// .filterable("Demo German+Chinese filter")
113-
/// .filter_by_langid_allowlist_strict(&allowlist);
114-
///
115-
/// // German requests should succeed:
116-
/// let de = DataIdentifierCow::from_locale(langid!("de").into());
117-
/// let response: Result<DataResponse<HelloWorldV1Marker>, _> =
118-
/// provider.load(DataRequest {
119-
/// id: de.as_borrowed(),
120-
/// ..Default::default()
121-
/// });
122-
/// assert!(matches!(response, Ok(_)));
123-
///
124-
/// // English requests should fail:
125-
/// let en = DataIdentifierCow::from_locale(langid!("en-US").into());
126-
127-
/// let response: Result<DataResponse<HelloWorldV1Marker>, _> =
128-
/// provider.load(DataRequest {
129-
/// id: en.as_borrowed(),
130-
/// ..Default::default()
131-
/// });
132-
/// assert!(matches!(
133-
/// response,
134-
/// Err(DataError {
135-
/// kind: DataErrorKind::Filtered,
136-
/// ..
137-
/// })
138-
/// ));
139-
/// assert_eq!(
140-
/// response.unwrap_err().str_context,
141-
/// Some("Demo German+Chinese filter")
142-
/// );
143-
/// ```
144-
pub fn filter_by_langid_allowlist_strict<'a>(
145-
self,
146-
allowlist: &'a [LanguageIdentifier],
147-
) -> RequestFilterDataProviderOutput<'a, D>
148-
where
149-
F: 'a,
150-
{
151-
let old_predicate = self.predicate;
152-
RequestFilterDataProvider {
153-
inner: self.inner,
154-
predicate: Box::new(move |request| -> bool {
155-
if !(old_predicate)(request) {
156-
return false;
157-
}
158-
request.id.locale.is_langid_und()
159-
|| allowlist.contains(&request.id.locale.get_langid())
160-
}),
161-
filter_name: self.filter_name,
162-
}
163-
}
164-
165-
/// Require that data requests contain a langid.
166-
///
167-
/// # Examples
168-
///
169-
/// ```
170-
/// use icu_locale::langid;
171-
/// use icu_provider::hello_world::*;
172-
/// use icu_provider::prelude::*;
173-
/// use icu_provider_adapters::filter::Filterable;
174-
///
175-
/// let provider = HelloWorldProvider
176-
/// .filterable("Demo require-langid filter")
177-
/// .require_langid();
178-
///
179-
/// // Requests with a data id should succeed:
180-
/// let id = DataIdentifierCow::from_locale(langid!("de").into());
181-
/// let response: Result<DataResponse<HelloWorldV1Marker>, _> =
182-
/// provider.load(DataRequest {
183-
/// id: id.as_borrowed(),
184-
/// ..Default::default()
185-
/// });
186-
/// assert!(matches!(response, Ok(_)));
187-
///
188-
/// // Requests without a data should fail:
189-
/// let response: Result<DataResponse<HelloWorldV1Marker>, _> =
190-
/// provider.load(DataRequest {
191-
/// id: Default::default(),
192-
/// ..Default::default()
193-
/// });
194-
/// assert!(matches!(
195-
/// response,
196-
/// Err(DataError {
197-
/// kind: DataErrorKind::Filtered,
198-
/// ..
199-
/// })
200-
/// ));
201-
/// ```
202-
pub fn require_langid<'a>(self) -> RequestFilterDataProviderOutput<'a, D>
80+
predicate: impl Fn(DataIdentifierBorrowed) -> bool + Sync + 'a,
81+
) -> FilterDataProvider<D, Box<dyn Fn(DataIdentifierBorrowed) -> bool + Sync + 'a>>
20382
where
20483
F: 'a,
20584
{
20685
let old_predicate = self.predicate;
207-
RequestFilterDataProvider {
86+
FilterDataProvider {
20887
inner: self.inner,
209-
predicate: Box::new(move |request| -> bool {
210-
if !(old_predicate)(request) {
88+
predicate: Box::new(move |id| -> bool {
89+
if !(old_predicate)(id) {
21190
return false;
21291
}
213-
!request.id.locale.is_langid_und()
92+
predicate(id)
21493
}),
21594
filter_name: self.filter_name,
21695
}

0 commit comments

Comments
 (0)