1
1
use std:: collections:: HashMap ;
2
2
3
+ const COPYRIGHT_PREFIXES : & [ & str ] = & [ "SPDX-FileCopyrightText:" , "Copyright" , "(c)" , "(C)" , "©" ] ;
4
+
3
5
pub ( crate ) struct LicensesInterner {
4
6
by_id : Vec < License > ,
5
7
by_struct : HashMap < License , usize > ,
@@ -10,7 +12,8 @@ impl LicensesInterner {
10
12
LicensesInterner { by_id : Vec :: new ( ) , by_struct : HashMap :: new ( ) }
11
13
}
12
14
13
- pub ( crate ) fn intern ( & mut self , license : License ) -> LicenseId {
15
+ pub ( crate ) fn intern ( & mut self , mut license : License ) -> LicenseId {
16
+ license. simplify ( ) ;
14
17
if let Some ( id) = self . by_struct . get ( & license) {
15
18
LicenseId ( * id)
16
19
} else {
@@ -35,3 +38,28 @@ pub(crate) struct License {
35
38
pub ( crate ) spdx : String ,
36
39
pub ( crate ) copyright : Vec < String > ,
37
40
}
41
+
42
+ impl License {
43
+ fn simplify ( & mut self ) {
44
+ self . remove_copyright_prefixes ( ) ;
45
+ self . copyright . sort ( ) ;
46
+ self . copyright . dedup ( ) ;
47
+ }
48
+
49
+ fn remove_copyright_prefixes ( & mut self ) {
50
+ for copyright in & mut self . copyright {
51
+ let mut stripped = copyright. trim ( ) ;
52
+ let mut previous_stripped;
53
+ loop {
54
+ previous_stripped = stripped;
55
+ for pattern in COPYRIGHT_PREFIXES {
56
+ stripped = stripped. trim_start_matches ( pattern) . trim_start ( ) ;
57
+ }
58
+ if stripped == previous_stripped {
59
+ break ;
60
+ }
61
+ }
62
+ * copyright = stripped. into ( ) ;
63
+ }
64
+ }
65
+ }
0 commit comments