Skip to content

Avoid silent failure for associated types. #16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions typegen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ quote = { workspace = true }
scale-info = { workspace = true }
syn = { workspace = true }
thiserror = { workspace = true }
smallvec = { workspace = true }

[dev-dependencies]
scale-bits = { workspace = true }
Expand Down
116 changes: 114 additions & 2 deletions typegen/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::{
},
},
utils::ensure_unique_type_paths,
DerivesRegistry, TypeSubstitutes,
DerivesRegistry, TypeSubstitutes, TypegenError,
};

mod utils;
Expand Down Expand Up @@ -852,7 +852,7 @@ fn range_fields() {
}

#[test]
fn generics() {
fn generics_foo() {
#[allow(unused)]
#[derive(TypeInfo)]
struct Foo<T> {
Expand Down Expand Up @@ -1584,3 +1584,115 @@ fn find_similar_type_paths() {
];
assert_eq!(similar, expected);
}

#[test]
fn assoc_types_skip_params() {
pub trait Config {
type Inner: TypeInfo;
}

pub struct A;
pub struct B;

impl Config for A {
type Inner = ();
}

impl Config for B {
type Inner = u32;
}

#[derive(TypeInfo)]
#[scale_info(skip_type_params(T))]
pub struct X<T: Config> {
pub inner: T::Inner,
}

let res_no_dedup = Testgen::new()
.with::<X<A>>()
.with::<X<B>>()
.try_gen_tests_mod(subxt_settings(), false);
assert!(matches!(
res_no_dedup,
Err(TypegenError::DuplicateTypePath(_))
));

let dedup_code = Testgen::new()
.with::<X<A>>()
.with::<X<B>>()
.try_gen_tests_mod(Default::default(), true)
.unwrap();

let expected_code = quote! {
pub mod tests {
use super::types;
pub struct X1 {
pub inner: (),
}
pub struct X2 {
pub inner: ::core::primitive::u32,
}
}
};

assert_eq!(dedup_code.to_string(), expected_code.to_string());
}

#[test]
fn assoc_types_no_skip_params() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great to cover this with a test!

pub trait Config {
type Inner: TypeInfo;
}

#[derive(TypeInfo)]
pub struct A;

#[derive(TypeInfo)]
pub struct B;

impl Config for A {
type Inner = ();
}
impl Config for B {
type Inner = u32;
}

#[derive(TypeInfo)]
pub struct X<T: Config> {
pub inner: T::Inner,
}

let res_no_dedup = Testgen::new()
.with::<X<A>>()
.with::<X<B>>()
.try_gen_tests_mod(subxt_settings(), false);
assert!(matches!(
res_no_dedup,
Err(TypegenError::DuplicateTypePath(_))
));

let dedup_code = Testgen::new()
.with::<X<A>>()
.with::<X<B>>()
.try_gen_tests_mod(Default::default(), true)
.unwrap();

// Unfortunately the structs A and B are still part of the type registry and X1 and X2 have generic parameters, linking to them.
let expected_code = quote! {
pub mod tests {
use super::types;
pub struct A;
pub struct B;
pub struct X1<_0> {
pub inner: (),
pub __ignore: ::core::marker::PhantomData<_0>
}
pub struct X2<_0> {
pub inner: ::core::primitive::u32,
pub __ignore: ::core::marker::PhantomData<_0>
}
}
};

assert_eq!(dedup_code.to_string(), expected_code.to_string());
}
26 changes: 20 additions & 6 deletions typegen/src/tests/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use syn::parse_quote;

use crate::typegen::ir::module_ir::ModuleIR;
use crate::utils::ensure_unique_type_paths;
use crate::TypegenError;
use crate::{
typegen::settings::substitutes::absolute_path, DerivesRegistry, TypeGenerator,
TypeGeneratorSettings, TypeSubstitutes,
Expand Down Expand Up @@ -41,12 +42,25 @@ impl Testgen {
module.to_token_stream()
}

pub fn gen_tests_mod(self, settings: TypeGeneratorSettings) -> TokenStream {
let registry: PortableRegistry = self.registry.into();
pub fn try_gen_tests_mod(
self,
settings: TypeGeneratorSettings,
deduplicate: bool,
) -> Result<TokenStream, TypegenError> {
let mut registry: PortableRegistry = self.registry.into();
if deduplicate {
ensure_unique_type_paths(&mut registry)
}
let type_gen = TypeGenerator::new(&registry, &settings);
let module = type_gen.generate_types_mod().unwrap();
let module = get_mod(&module, TESTS_MOD_PATH).unwrap();
module.to_token_stream()
type_gen
.generate_types_mod()
.map(|module| get_mod(&module, TESTS_MOD_PATH).unwrap().to_token_stream())
}

pub fn gen_tests_mod(self, settings: TypeGeneratorSettings) -> TokenStream {
self.try_gen_tests_mod(settings, false)
.unwrap()
.to_token_stream()
}
}

Expand Down Expand Up @@ -160,7 +174,7 @@ pub(super) fn subxt_default_substitutes() -> TypeSubstitutes {

const TESTS_MOD_PATH: &[&str] = &["scale_typegen", "tests"];

fn get_mod<'a>(module: &'a ModuleIR, path_segs: &[&'static str]) -> Option<&'a ModuleIR> {
fn get_mod<'a>(module: &'a ModuleIR, path_segs: &[&'static str]) -> Option<&'a ModuleIR<'a>> {
let (mod_name, rest) = path_segs.split_first()?;
let mod_ident: Ident = syn::parse_str(mod_name).unwrap();
let module = module.children.get(&mod_ident)?;
Expand Down
4 changes: 4 additions & 0 deletions typegen/src/typegen/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ pub enum TypegenError {
/// The settings do not fit the given type registry.
#[error("Settings do not fit the given type registry: {0}")]
SettingsValidation(SettingsValidationError),
/// There are two types with the the same type path but a different structure.
/// Use [`crate::utils::ensure_unique_type_paths`] on your [`scale_info::PortableRegistry`] to deduplicate type paths.
#[error("There are two types with the the same type path {0} but different structure. Use `scale_typegen::utils::ensure_unique_type_paths` on your `PortableRegistry` before, to avoid this error.")]
DuplicateTypePath(String),
}

/// Error attempting to do type substitution.
Expand Down
17 changes: 9 additions & 8 deletions typegen/src/typegen/ir/module_ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@ use scale_info::form::PortableForm;

/// Represents a Rust `mod`, containing generated types and child `mod`s.
#[derive(Debug, Clone)]
pub struct ModuleIR {
pub struct ModuleIR<'a> {
/// Name of this module.
pub name: Ident,
/// Root module identifier.
pub root_mod: Ident,
/// Submodules of this module.
pub children: BTreeMap<Ident, ModuleIR>,
pub children: BTreeMap<Ident, ModuleIR<'a>>,
/// Types in this module.
pub types: BTreeMap<scale_info::Path<PortableForm>, TypeIR>,
pub types:
BTreeMap<scale_info::Path<PortableForm>, (&'a scale_info::Type<PortableForm>, TypeIR)>,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder whether scale_info::Path<PortableForm> can also be a reference now? Not if you need to alter the path here for de-duping but possible if you've already de-duped by then perhaps?

}

impl ToTokens for ModuleIR {
impl<'a> ToTokens for ModuleIR<'a> {
fn to_tokens(&self, tokens: &mut TokenStream) {
let name = &self.name;
let root_mod = &self.root_mod;
let modules = self.children.values();
let types = self.types.values().clone();
let types = self.types.values().map(|e| &e.1).clone();

tokens.extend(quote! {
pub mod #name {
Expand All @@ -38,7 +39,7 @@ impl ToTokens for ModuleIR {
}
}

impl ModuleIR {
impl<'a> ModuleIR<'a> {
/// Create a new [`Module`], with a reference to the root `mod` for resolving type paths.
pub(crate) fn new(name: Ident, root_mod: Ident) -> Self {
Self {
Expand All @@ -61,7 +62,7 @@ impl ModuleIR {

/// Returns the generated types.
pub fn types(&self) -> impl Iterator<Item = (&scale_info::Path<PortableForm>, &TypeIR)> {
self.types.iter()
self.types.iter().map(|(k, v)| (k, &v.1))
}

/// Returns the root `mod` used for resolving type paths.
Expand All @@ -71,7 +72,7 @@ impl ModuleIR {

/// Recursively creates submodules for the given namespace and returns a mutable reference to the innermost module created this way.
/// Returns itself, if the namespace is empty.
pub fn get_or_insert_submodule(&mut self, namespace: &[String]) -> &mut ModuleIR {
pub fn get_or_insert_submodule(&mut self, namespace: &[String]) -> &mut ModuleIR<'a> {
if namespace.is_empty() {
return self;
}
Expand Down
22 changes: 19 additions & 3 deletions typegen/src/typegen/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::TypegenError;
use std::collections::btree_map::Entry;

use crate::{utils::types_equal_extended_to_params, TypegenError};

use self::{
ir::module_ir::ModuleIR,
Expand Down Expand Up @@ -89,10 +91,24 @@ impl<'a> TypeGenerator<'a> {
}

// if the type is not a builtin type, insert it into the respective module
if let Some(type_ir) = self.create_type_ir(&ty.ty, &flat_derives_registry)? {
let ty = &ty.ty;
if let Some(type_ir) = self.create_type_ir(ty, &flat_derives_registry)? {
// Create the module this type should go into
let innermost_module = root_mod.get_or_insert_submodule(namespace);
innermost_module.types.insert(path.clone(), type_ir);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

match innermost_module.types.entry(path.clone()) {
Entry::Vacant(e) => {
e.insert((ty, type_ir));
}
Entry::Occupied(e) => {
// There is already a type with the same type path present.
// We do not just want to override it, so we check if the two types are semantically similar (structure + generics).
// If not, return an error, if yes, just keep the first one.
let other_ty = &e.get().0;
if !types_equal_extended_to_params(ty, other_ty) {
return Err(TypegenError::DuplicateTypePath(ty.path.to_string()));
}
}
};
}
}

Expand Down
54 changes: 40 additions & 14 deletions typegen/src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use scale_info::{form::PortableForm, Field, PortableRegistry, Type, TypeDef, TypeParameter};
use std::collections::HashMap;
use smallvec::{smallvec, SmallVec};
use std::collections::{hash_map::Entry, HashMap};

use crate::TypegenError;

Expand Down Expand Up @@ -81,25 +82,50 @@ pub fn ensure_unique_type_paths(types: &mut PortableRegistry) {
/// all type ids mentioned in the TypeDef are either:
/// - equal
/// - or different, but map essentially to the same generic type parameter
fn types_equal_extended_to_params(a: &Type<PortableForm>, b: &Type<PortableForm>) -> bool {
let collect_params = |type_params: &[TypeParameter<PortableForm>]| {
type_params
.iter()
.filter_map(|p| p.ty.map(|ty| (ty.id, p.name.clone())))
.collect::<HashMap<u32, String>>()
};
pub(crate) fn types_equal_extended_to_params(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So right now this function Does a shallow compare, and to use an earlier example an issue is eg if we compare:

struct Foo<T = u64> {
    inner: Vec<u64>
}

struct Foo<T = u32> {
    inner: Vec<u32>
}

We would return false because inner would be pointing to a different type ID in either case, and require those types to be deduplicated into eg Foo and Foo2.

We could be smarter and notice that the T in Foo<T> is used as a param in Vec and so we only need to output one type during codegen, but it's more complicated than just recursively calling types_equal_extended_to_params because we need to track usages of the generic type, and I'm not sure if that's possible offhand?

Ie we could note that since T has type ID 32, then any instances of that ID are actually instances of T being used, but that might not be the case (eg struct Bar<T = u32> { a: T, b: u32 }). We could maybe use the typeName if that is equal to T too or something, in conjunction with the type ID? I'm curious if you have any ideas here @tadeohepperle!

All that said, for now it's perfeclty OK if it is more conservative as long as ensure_unique_type_paths then uses this to rename the type paths appropriately. Better to have more duplicated types being spat out in the codegen than to assume types can be merged when in fact they can't!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be nice with some simple unit tests for fn types_equal_extended_to_params as well, if we don't have it already.

a: &Type<PortableForm>,
b: &Type<PortableForm>,
) -> bool {
// We map each type ID to all type params if could refer to. Type IDs can refer to multiple parameters:
// E.g. Foo<A,B> can be parameterized as Foo<u8,u8>, so if 42 is the type id of u8, a field with id=42 could refer to either A or B.
fn collect_params(
type_params: &[TypeParameter<PortableForm>],
) -> HashMap<u32, SmallVec<[&str; 2]>> {
let mut hm: HashMap<u32, SmallVec<[&str; 2]>> = HashMap::new();
for p in type_params {
if let Some(ty) = &p.ty {
match hm.entry(ty.id) {
Entry::Occupied(mut e) => {
e.get_mut().push(p.name.as_str());
}
Entry::Vacant(e) => {
e.insert(smallvec![p.name.as_str()]);
}
}
}
}
hm
}

let type_params_a = collect_params(&a.type_params);
let type_params_b = collect_params(&a.type_params);
let type_params_b = collect_params(&b.type_params);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hehe whoops :D

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth unit testing this function?


if type_params_a.len() != type_params_b.len() {
if a.type_params.len() != b.type_params.len() {
return false;
}

// returns true if the ids are the same OR if they point to the same generic parameter.
// Returns true if the ids are the same OR if they point to the same generic parameter.
let ids_equal = |a: u32, b: u32| -> bool {
a == b
|| matches!((type_params_a.get(&a), type_params_b.get(&b)), (Some(a_name), Some(b_name)) if a_name == b_name)
if a == b {
return true;
}
let Some(a_param_names) = type_params_a.get(&a) else {
return false;
};
let Some(b_param_names) = type_params_b.get(&b) else {
return false;
};
// Check if there is any intersection, meaning that both IDs map to the same generic type param:
a_param_names.iter().any(|a_p| b_param_names.contains(a_p))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this will treat struct B<usize, String, bool> == struct A<(), (), usize> because both contains usize? Or am I missing something?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like like a.name == b.name is checked before this is called on composite/types should fine I think :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it is a bit more subtle: This ids_equal function is called for every field of the type: Let's say we have a generic type like this:

struct A<T,U,V> { 
    f1: T,
    f2: U,
    f3: V
}

Let's say the concrete types A<usize, String, bool> and A<(), (), usize> have been registered in the type registry and the ids of the primitive types are: usize: id=0, String: id=1, bool: id=2, (): id=3. Then A<usize, String, bool> is represented as:

NamedComposite<T: id=0, U: id=1, V: id=2> {
    f1: id=0,
    f2: id=1,
    f3: id=2
}

and A<(), (), usize> is represented as

NamedComposite<T: id=3, U: id=3, V: id=0> {
    f1: id=3,
    f2: id=3,
    f3: id=0
}

Now we compare A<usize, String, bool> and A<(), (), usize> to find out if they are basically the same rust type (they should be, both are parameterizations of a generic A<T,U,V>).
Now for field f1, we see that the type ids do not equal (0 != 3). But: a_param_names will say, oh type id=0 actually maps to [T] and b_param_names will say, oh type_id 3 maps to [T, or U].
Now the a_param_names.iter().any(|a_p| b_param_names.contains(a_p)) returns true, saying "okay this field is just the generic parameter T for both registered types, so they are considered the same".
Then we continue with the next field f2 and so on.

};

let fields_equal = |a: &[Field<PortableForm>], b: &[Field<PortableForm>]| -> bool {
Expand Down