Skip to content

Commit b5771e4

Browse files
committed
Merge #48
48: Char property macro 2.0 r=behnam Replaces #41. See #41 for earlier discussion. An example will show better than I can tell: ```rust char_property! { /// Represents the Unicode character /// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property, /// also known as the *bidirectional character type*. /// /// * <http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types> /// * <http://www.unicode.org/reports/tr44/#Bidi_Class_Values> pub enum BidiClass { /// Any strong left-to-right character /// /// ***General Scope*** /// /// LRM, most alphabetic, syllabic, Han ideographs, /// non-European or non-Arabic digits, ... LeftToRight { abbr => L, long => Left_To_Right, display => "Left-to-Right", } /// Any strong right-to-left (non-Arabic-type) character /// /// ***General Scope*** /// /// RLM, Hebrew alphabet, and related punctuation RightToLeft { abbr => R, long => Right_To_Left, display => "Right-to-Left", } /// Any strong right-to-left (Arabic-type) character /// /// ***General Scope*** /// /// ALM, Arabic, Thaana, and Syriac alphabets, /// most punctuation specific to those scripts, ... ArabicLetter { abbr => AL, long => Arabic_Letter, display => "Right-to-Left Arabic", } } } /// Abbreviated name bindings for the `BidiClass` property pub mod abbr_names for abbr; /// Name bindings for the `BidiClass` property as they appear in Unicode documentation pub mod long_names for long; ``` expands to: ```rust /// Represents the Unicode character /// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property, /// also known as the *bidirectional character type*. /// /// * <http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types> /// * <http://www.unicode.org/reports/tr44/#Bidi_Class_Values> #[allow(bad_style)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] pub enum BidiClass { /// Any strong left-to-right character LeftToRight, /// Any strong right-to-left (non-Arabic-type) character RightToLeft, /// Any strong right-to-left (Arabic-type) character ArabicLetter, } /// Abbreviated name bindings for the `BidiClass` property #[allow(bad_style)] pub mod abbr_names { pub use super::BidiClass::LeftToRight as L; pub use super::BidiClass::RightToLeft as R; pub use super::BidiClass::ArabicLetter as AL; } /// Name bindings for the `BidiClass` property as they appear in Unicode documentation #[allow(bad_style)] pub mod long_names { pub use super::BidiClass::LeftToRight as Left_To_Right; pub use super::BidiClass::RightToLeft as Right_To_Left; pub use super::BidiClass::ArabicLetter as Arabic_Letter; } #[allow(bad_style)] #[allow(unreachable_patterns)] impl ::std::str::FromStr for BidiClass { type Err = (); fn from_str(s: &str) -> Result<Self, Self::Err> { match s { "LeftToRight" => Ok(BidiClass::LeftToRight), "RightToLeft" => Ok(BidiClass::RightToLeft), "ArabicLetter" => Ok(BidiClass::ArabicLetter), "L" => Ok(BidiClass::LeftToRight), "R" => Ok(BidiClass::RightToLeft), "AL" => Ok(BidiClass::ArabicLetter), "Left_To_Right" => Ok(BidiClass::LeftToRight), "Right_To_Left" => Ok(BidiClass::RightToLeft), "Arabic_Letter" => Ok(BidiClass::ArabicLetter), _ => Err(()), } } } #[allow(bad_style)] #[allow(unreachable_patterns)] impl ::std::fmt::Display for BidiClass { fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { match *self { BidiClass::LeftToRight => write!(f, "{}", "Left-to-Right"), BidiClass::RightToLeft => write!(f, "{}", "Right-to-Left"), BidiClass::ArabicLetter => write!(f, "{}", "Right-to-Left Arabic"), BidiClass::LeftToRight => write!(f, "{}", "Left_To_Right".replace('_', " ")), BidiClass::RightToLeft => write!(f, "{}", "Right_To_Left".replace('_', " ")), BidiClass::ArabicLetter => write!(f, "{}", "Arabic_Letter".replace('_', " ")), _ => { write!( f, "{}", match *self { BidiClass::LeftToRight => "L", BidiClass::RightToLeft => "R", BidiClass::ArabicLetter => "AL", BidiClass::LeftToRight => "LeftToRight", BidiClass::RightToLeft => "RightToLeft", BidiClass::ArabicLetter => "ArabicLetter", } ) } } } } #[allow(bad_style)] impl ::char_property::EnumeratedCharProperty for BidiClass { fn abbr_name(&self) -> &'static str { match *self { BidiClass::LeftToRight => "L", BidiClass::RightToLeft => "R", BidiClass::ArabicLetter => "AL", } } fn all_values() -> &'static [BidiClass] { const VALUES: &[BidiClass] = &[ BidiClass::LeftToRight, BidiClass::RightToLeft, BidiClass::ArabicLetter, ]; VALUES } } ``` All three of the `abbr`, `long`, and `display` properties of the enum are optional, and have sane fallbacks: `abbr_name` and `long_name` return `None` if unspecified, and `fmt::Display` will check, in order, for `display`, `long_name`, `abbr_name`, and the variant name until it finds one to use (stringified, of course). `FromStr` is defined, matching against any of the provided `abbr`, `long`, and variant name. <hr /> Important notes: - <strike>The current format uses associated consts, so it works on beta but won't work on stable until 1.20 is stable.</strike> - Consts have a slightly different meaning than `pub use` -- `pub use` aliases the type where `const` is a new object and if used in pattern matching is a `==` call and not a pattern match. - For this reason I'm actually slightly leaning towards using `pub use` even once associated consts land; they're compartmentalized (so `use Property::*` doesn't pull in 3x as many symbols as there are variants). After using the const based aliasing for a little bit, I'm inclined to like the current solution of `unic::ucd::bidi::BidiClass::*` + `unic::ucd::bidi::bidi_class::abbr_names::*`. These really should be a `pub use` and not a `const`. - Note that I still think `const` are the way to go for cases like `Canonical_Combining_Class`, though. - <strike>The current syntax could easily be adapted to use modules instead of associated consts, but was written with the associated consts so we could get a feel of how it would look with them.</strike> - The zero-or-more meta match before a enum variant conflicts with the ident match before 1.20. See rust-lang/rust#42913, rust-lang/rust#24189 - There only tests of the macro are rather thin and could be expanded. - It's a macro, so the response when you stick stuff not matching the expected pattern is cryptic at best. - The `CharProperty` trait is pretty much the lowest common denominator. It's a starting point, and we can iterate from there. - How and where do we want to make `CharProperty` a externally visible trait? Currently having it in namespace is the only way to access `abbr_name` and `long_name`. - <strike>Earlier discussion suggested putting these into `unic::utils::char_property`. Moving it would be simple, but for now it's living in the root of `unic-utils`</strike> - <strike>The crate `unic-utils` is currently in the workspace by virtue of being a dependency of `unic`, but is not in any way visible a crate depending on `unic`.</strike> - <strike>Documentation doesn't exist.</strike>
2 parents 3c8ecc6 + 6b60633 commit b5771e4

File tree

3 files changed

+397
-0
lines changed

3 files changed

+397
-0
lines changed

unic/utils/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub const PKG_DESCRIPTION: &'static str = env!("CARGO_PKG_DESCRIPTION");
2929

3030
pub mod char_property;
3131
pub mod codepoints;
32+
mod macros;
3233
pub mod tables;
3334

3435

unic/utils/src/macros.rs

+340
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,340 @@
1+
/// Macro for declaring a character property.
2+
///
3+
/// # Syntax (Enumerated Property)
4+
///
5+
/// ```
6+
/// # #[macro_use] extern crate unic_utils;
7+
/// # fn main() {}
8+
/// char_property! {
9+
/// /// Zero or more attributes
10+
/// pub enum PropertyName {
11+
/// /// Exactly one attribute
12+
/// RustName {
13+
/// abbr => AbbrName,
14+
/// long => Long_Name,
15+
/// display => "&'static str that is a nicer presentation of the name",
16+
/// }
17+
///
18+
/// /// All annotations on the variant are optional*
19+
/// Variant2 {
20+
/// abbr => V2, // *abbr is required for Enumerated Properties
21+
/// }
22+
/// }
23+
///
24+
/// /// Zero or more attributes
25+
/// pub mod abbr_names for abbr;
26+
///
27+
/// /// Zero or more attributes
28+
/// pub mod long_names for long;
29+
/// }
30+
///
31+
/// // You must impl (Partial/Complete)CharProperty manually.
32+
/// # impl unic_utils::char_property::PartialCharProperty for PropertyName {
33+
/// # fn of(_: char) -> Option<Self> { None }
34+
/// # }
35+
/// ```
36+
///
37+
/// # Effect
38+
///
39+
/// - Implements `CharProperty` with the `abbr` and `long` presented in the appropriate method
40+
/// - Implements `FromStr` accepting any of the rust, abbr, or long names
41+
/// - Implements `Display` using the given string, falling back when not provided on
42+
/// the long name, the short name, and the rust name, in that order
43+
/// - Populates the module `abbr_names` with `pub use` bindings of variants to their abbr names
44+
/// - Populates the module `long_names` with `pub use` bindings of variants to their long names
45+
/// - Maintains all documentation comments and other `#[attributes]` as would be expected
46+
/// (with some caveats, listed below)
47+
///
48+
/// # Limitations
49+
///
50+
/// Due to [rust-lang/rust/#24189](https://github.com/rust-lang/rust/issues/24189), (fixed in
51+
/// [rust-lang/rust/#42913](https://github.com/rust-lang/rust/pull/42913), landing in 1.20),
52+
/// exactly one attribute line must be used on each variant. On 1.20 or higher, one or more may
53+
/// be used, and the restriction can be relaxed back the intended zero or more by replacing
54+
/// `$(#[$variant_meta:meta])+` with `$(#[$variant_meta:meta])*`, and
55+
/// `$(#[$variant_meta])+` with `$(#[$variant_meta])*`, and
56+
/// `$(#[$ident_meta:meta])+` with `$(#[$ident_meta:meta])*` and
57+
/// `$(#[$ident_meta])+` with `$(#[$ident_meta])*`, and
58+
/// `$(#[$rest_meta:meta])+` with `$(#[$rest_meta:meta])*`, and
59+
/// `$(#[$rest_meta])+` with `$(#[$rest_meta])*`, and
60+
/// `$(#[$queue_meta:meta])+` with `$(#[$queue_meta:meta])*`, and
61+
/// `$(#[$queue_meta])+` with `$(#[$queue_meta])*`
62+
// TODO: Once adopting 1.20, fix the macro to work with zero attributes on variants (see above)
63+
#[macro_export]
64+
macro_rules! char_property {
65+
(
66+
$(#[$name_meta:meta])* pub enum $name:ident {
67+
$( $(#[$variant_meta:meta])+ $variant:ident $tt:tt )*
68+
}
69+
70+
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident for abbr;
71+
$(#[$long_names_meta:meta])* pub mod $long_names:ident for long;
72+
) => {
73+
__char_property_internal! {
74+
$(#[$name_meta])* pub enum $name
75+
$(#[$abbr_names_meta])* pub mod $abbr_names
76+
$(#[$long_names_meta])* pub mod $long_names
77+
78+
variant [ ]
79+
abbr [ ]
80+
long [ ]
81+
display [ ]
82+
83+
buffer [ ]
84+
queue [ $( $(#[$variant_meta])+ $variant $tt )* ]
85+
}
86+
};
87+
}
88+
89+
#[macro_export]
90+
macro_rules! __char_property_internal {
91+
// == Queue => Buffer == //
92+
(
93+
$(#[$name_meta:meta])* pub enum $name:ident
94+
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
95+
$(#[$long_names_meta:meta])* pub mod $long_names:ident
96+
97+
variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
98+
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
99+
long [ $( $long_variant:ident $long:ident ; )* ]
100+
display [ $( $display_variant:ident $display:expr ; )* ]
101+
102+
buffer [ ]
103+
queue [
104+
$(#[$ident_meta:meta])+ $ident:ident $ident_tt:tt
105+
$( $(#[$rest_meta:meta])+ $rest:ident $rest_tt:tt )*
106+
]
107+
) => {
108+
__char_property_internal! {
109+
$(#[$name_meta])* pub enum $name
110+
$(#[$abbr_names_meta])* pub mod $abbr_names
111+
$(#[$long_names_meta])* pub mod $long_names
112+
113+
variant [
114+
$( $(#[$variant_meta])+ $variant ; )*
115+
$(#[$ident_meta])+ $ident ;
116+
]
117+
abbr [ $( $abbr_variant $abbr ; )* ]
118+
long [ $( $long_variant $long ; )* ]
119+
display [ $( $display_variant $display ; )* ]
120+
121+
buffer [ $ident $ident_tt ]
122+
queue [ $( $(#[$rest_meta])+ $rest $rest_tt )* ]
123+
}
124+
};
125+
126+
// == Buffer -- Abbr Name == //
127+
(
128+
$(#[$name_meta:meta])* pub enum $name:ident
129+
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
130+
$(#[$long_names_meta:meta])* pub mod $long_names:ident
131+
132+
variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
133+
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
134+
long [ $( $long_variant:ident $long:ident ; )* ]
135+
display [ $( $display_variant:ident $display:expr ; )* ]
136+
137+
buffer [ $ident:ident {
138+
abbr => $ident_abbr:ident ,
139+
$( $rest:tt )*
140+
} ]
141+
queue [ $( $(#[$queue_meta:meta])+ $queue:ident $queue_tt:tt )* ]
142+
) => {
143+
__char_property_internal! {
144+
$(#[$name_meta])* pub enum $name
145+
$(#[$abbr_names_meta])* pub mod $abbr_names
146+
$(#[$long_names_meta])* pub mod $long_names
147+
148+
variant [ $( $(#[$variant_meta])+ $variant ; )* ]
149+
abbr [
150+
$( $abbr_variant $abbr ; )*
151+
$ident $ident_abbr ;
152+
]
153+
long [ $( $long_variant $long ; )* ]
154+
display [ $( $display_variant $display ; )* ]
155+
156+
buffer [ $ident { $( $rest )* } ]
157+
queue [ $( $(#[$queue_meta])+ $queue $queue_tt )* ]
158+
}
159+
};
160+
161+
// == Buffer -- Long Name == //
162+
(
163+
$(#[$name_meta:meta])* pub enum $name:ident
164+
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
165+
$(#[$long_names_meta:meta])* pub mod $long_names:ident
166+
167+
variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
168+
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
169+
long [ $( $long_variant:ident $long:ident ; )* ]
170+
display [ $( $display_variant:ident $display:expr ; )* ]
171+
172+
buffer [ $ident:ident {
173+
long => $ident_long:ident ,
174+
$( $rest:tt )*
175+
} ]
176+
queue [ $( $(#[$queue_meta:meta])+ $queue:ident $queue_tt:tt )* ]
177+
) => {
178+
__char_property_internal! {
179+
$(#[$name_meta])* pub enum $name
180+
$(#[$abbr_names_meta])* pub mod $abbr_names
181+
$(#[$long_names_meta])* pub mod $long_names
182+
183+
variant [ $( $(#[$variant_meta])+ $variant ; )* ]
184+
abbr [ $( $abbr_variant $abbr ; )* ]
185+
long [
186+
$( $long_variant $long ; )*
187+
$ident $ident_long ;
188+
]
189+
display [ $( $display_variant $display ; )* ]
190+
191+
buffer [ $ident { $( $rest )* } ]
192+
queue [ $( $(#[$queue_meta])+ $queue $queue_tt )* ]
193+
}
194+
};
195+
196+
// == Buffer -- Display //
197+
(
198+
$(#[$name_meta:meta])* pub enum $name:ident
199+
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
200+
$(#[$long_names_meta:meta])* pub mod $long_names:ident
201+
202+
variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
203+
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
204+
long [ $( $long_variant:ident $long:ident ; )* ]
205+
display [ $( $display_variant:ident $display:expr ; )* ]
206+
207+
buffer [ $ident:ident {
208+
display => $ident_display:expr ,
209+
$( $rest:tt )*
210+
} ]
211+
queue [ $( $(#[$queue_meta:meta])+ $queue:ident $queue_tt:tt )* ]
212+
) => {
213+
__char_property_internal! {
214+
$(#[$name_meta])* pub enum $name
215+
$(#[$abbr_names_meta])* pub mod $abbr_names
216+
$(#[$long_names_meta])* pub mod $long_names
217+
218+
variant [ $( $(#[$variant_meta])+ $variant ; )* ]
219+
abbr [ $( $abbr_variant $abbr ; )* ]
220+
long [ $( $long_variant $long ; )* ]
221+
display [
222+
$( $display_variant $display ; )*
223+
$ident $ident_display ;
224+
]
225+
226+
buffer [ $ident { $( $rest )* } ]
227+
queue [ $( $(#[$queue_meta])+ $queue $queue_tt )* ]
228+
}
229+
};
230+
231+
// == Buffer -- Empty == //
232+
(
233+
$(#[$name_meta:meta])* pub enum $name:ident
234+
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
235+
$(#[$long_names_meta:meta])* pub mod $long_names:ident
236+
237+
variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
238+
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
239+
long [ $( $long_variant:ident $long:ident ; )* ]
240+
display [ $( $display_variant:ident $display:expr ; )* ]
241+
242+
buffer [ $ident:ident {} ]
243+
queue [ $( $(#[$queue_meta:meta])+ $queue:ident $queue_tt:tt )* ]
244+
) => {
245+
__char_property_internal! {
246+
$(#[$name_meta])* pub enum $name
247+
$(#[$abbr_names_meta])* pub mod $abbr_names
248+
$(#[$long_names_meta])* pub mod $long_names
249+
250+
variant [ $( $(#[$variant_meta])+ $variant ; )* ]
251+
abbr [ $( $abbr_variant $abbr ; )* ]
252+
long [ $( $long_variant $long ; )* ]
253+
display [ $( $display_variant $display ; )* ]
254+
255+
buffer [ ]
256+
queue [ $( $(#[$queue_meta])+ $queue $queue_tt )* ]
257+
}
258+
};
259+
260+
// == Final formatting == //
261+
(
262+
$(#[$name_meta:meta])* pub enum $name:ident
263+
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
264+
$(#[$long_names_meta:meta])* pub mod $long_names:ident
265+
266+
variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
267+
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
268+
long [ $( $long_variant:ident $long:ident ; )* ]
269+
display [ $( $display_variant:ident $display:expr ; )* ]
270+
271+
buffer [ ]
272+
queue [ ]
273+
) => {
274+
$(#[$name_meta])*
275+
#[allow(bad_style)]
276+
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
277+
pub enum $name {
278+
$( $(#[$variant_meta])+ $variant, )*
279+
}
280+
281+
$(#[$abbr_names_meta])*
282+
#[allow(bad_style)]
283+
pub mod $abbr_names {
284+
$( pub use super::$name::$abbr_variant as $abbr; )*
285+
}
286+
287+
$(#[$long_names_meta])*
288+
#[allow(bad_style)]
289+
pub mod $long_names {
290+
$( pub use super::$name::$long_variant as $long; )*
291+
}
292+
293+
#[allow(bad_style)]
294+
#[allow(unreachable_patterns)]
295+
impl ::std::str::FromStr for $name {
296+
type Err = ();
297+
fn from_str(s: &str) -> Result<Self, Self::Err> {
298+
match s {
299+
$( stringify!($variant) => Ok($name::$variant), )*
300+
$( stringify!($abbr) => Ok($name::$abbr_variant), )*
301+
$( stringify!($long) => Ok($name::$long_variant), )*
302+
_ => Err(()),
303+
}
304+
}
305+
}
306+
307+
#[allow(bad_style)]
308+
#[allow(unreachable_patterns)]
309+
impl ::std::fmt::Display for $name {
310+
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
311+
match *self {
312+
$( $name::$display_variant => write!(f, "{}", $display), )*
313+
$( $name::$long_variant => write!(f, "{}", stringify!($long).replace('_', " ")), )*
314+
_ => write!(f, "{}", match *self {
315+
$( $name::$abbr_variant => stringify!($abbr), )*
316+
$( $name::$variant => stringify!($variant), )*
317+
})
318+
}
319+
}
320+
}
321+
322+
#[allow(bad_style)]
323+
impl $crate::char_property::EnumeratedCharProperty for $name {
324+
fn abbr_name(&self) -> &'static str {
325+
match *self {
326+
$( $name::$abbr_variant => stringify!($abbr), )*
327+
// No catch all variant
328+
// Abbr name is required on Enumerated properties
329+
}
330+
}
331+
332+
fn all_values() -> &'static [$name] {
333+
const VALUES: &[$name] = &[
334+
$($name::$variant,)+
335+
];
336+
VALUES
337+
}
338+
}
339+
};
340+
}

0 commit comments

Comments
 (0)