diff --git a/Cargo.lock b/Cargo.lock index 114bec20..8c1f7fc3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "allocator-api2" version = "0.2.18" @@ -301,6 +307,12 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "hermit-abi" version = "0.3.3" @@ -327,6 +339,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -412,6 +433,31 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "ouroboros" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "944fa20996a25aded6b4795c6d63f10014a7a83f8be9828a11860b08c5fc4a67" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39b0deead1528fd0e5947a8546a9642a9777c25f6e1e26f34c97b204bbb465bd" +dependencies = [ + "heck", + "itertools 0.12.1", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + [[package]] name = "outref" version = "0.5.1" @@ -449,6 +495,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", + "yansi", +] + [[package]] name = "quote" version = "1.0.37" @@ -557,6 +616,7 @@ dependencies = [ "dyn-clone", "itertools 0.13.0", "memchr", + "ouroboros", "regex", "rustc-hash", "serde", @@ -965,6 +1025,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "zerocopy" version = "0.7.35" diff --git a/Cargo.toml b/Cargo.toml index b135afc4..1002370c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ rustc-hash = "2.1.0" dashmap = "6.1.0" memchr = "2.7.4" itertools = "0.13" - +ouroboros = "0.18.4" codspeed-criterion-compat = { version = "2.7.2", default-features = false, optional = true } static_assertions = "1.1.0" diff --git a/benches/bench.rs b/benches/bench.rs index f4321745..cadf01f3 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -197,6 +197,43 @@ fn benchmark_replace_large_minified_source(b: &mut Bencher) { }); } +fn benchmark_source_for_replace_large_minified_source_with_cache(b: &mut Bencher) { + let antd_minify = SourceMapSource::new(SourceMapSourceOptions { + value: ANTD_MIN_JS, + name: "antd.min.js", + source_map: SourceMap::from_json(ANTD_MIN_JS_MAP).unwrap(), + original_source: None, + inner_source_map: None, + remove_original_source: false, + }); + let mut replace_source = ReplaceSource::new(antd_minify); + replace_source.replace(107, 114, "exports", None); + replace_source.replace(130, 143, "'object'", None); + replace_source.replace(165, 172, "__webpack_require__", None); + replace_source.replace(173, 180, "/*! react */\"./node_modules/.pnpm/react@18.2.0/node_modules/react/index.js\"", None); + replace_source.replace(183, 190, "__webpack_require__", None); + replace_source.replace(191, 202, "/*! react-dom */\"./node_modules/.pnpm/react-dom@18.2.0_react@18.2.0/node_modules/react-dom/index.js\"", None); + replace_source.replace(205, 212, "__webpack_require__", None); + replace_source.replace(213, 220, "/*! dayjs */\"./node_modules/.pnpm/dayjs@1.11.10/node_modules/dayjs/dayjs.min.js\"", None); + replace_source.replace(363, 370, "exports", None); + replace_source.replace(373, 385, "exports.antd", None); + replace_source.replace(390, 397, "__webpack_require__", None); + replace_source.replace(398, 405, "/*! react */\"./node_modules/.pnpm/react@18.2.0/node_modules/react/index.js\"", None); + replace_source.replace(408, 415, "__webpack_require__", None); + replace_source.replace(416, 427, "/*! react-dom */\"./node_modules/.pnpm/react-dom@18.2.0_react@18.2.0/node_modules/react-dom/index.js\"", None); + replace_source.replace(430, 437, "__webpack_require__", None); + replace_source.replace(438, 445, "/*! dayjs */\"./node_modules/.pnpm/dayjs@1.11.10/node_modules/dayjs/dayjs.min.js\"", None); + replace_source.replace(494, 498, "this", None); + let replace_source = replace_source.boxed(); + + let concat_source = ConcatSource::new(vec![replace_source.clone(), replace_source]); + let cached = CachedSource::new(concat_source); + + b.iter(|| { + cached.source(); + }); +} + fn benchmark_concat_generate_string_with_cache_as_key(b: &mut Bencher) { let sms_minify = SourceMapSource::new(SourceMapSourceOptions { value: HELLOWORLD_MIN_JS, @@ -278,6 +315,10 @@ fn bench_rspack_sources(criterion: &mut Criterion) { "concat_generate_string_as_key", benchmark_concat_generate_string_as_key, ); + group.bench_function( + "source_for_replace_large_minified_source_with_cache", + benchmark_source_for_replace_large_minified_source_with_cache, + ); group.finish(); } diff --git a/src/cached_source.rs b/src/cached_source.rs index a27af637..7e3fbe6f 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -49,40 +49,60 @@ use crate::{ /// "Hello World\nconsole.log('test');\nconsole.log('test2');\nHello2\n" /// ); /// ``` -pub struct CachedSource { + +pub struct CachedSource { + inner: CachedSourceInner, +} + +#[ouroboros::self_referencing] +pub struct CachedSourceInner { inner: Arc, + #[not_covariant] + #[borrows(inner)] + cached_rope: Arc>>, cached_hash: Arc>, cached_maps: Arc, BuildHasherDefault>>, } +impl CachedSource { + fn get_rope(&self) -> &Rope<'_> { + self + .inner + .with(|cache| cache.cached_rope.get_or_init(|| cache.inner.rope())) + } +} + impl CachedSource { /// Create a [CachedSource] with the original [Source]. pub fn new(inner: T) -> Self { Self { - inner: Arc::new(inner), - cached_hash: Default::default(), - cached_maps: Default::default(), + inner: CachedSourceInner::new( + Arc::new(inner), + |_| Default::default(), + Default::default(), + Default::default(), + ), } } /// Get the original [Source]. pub fn original(&self) -> &T { - &self.inner + self.inner.borrow_inner() } } impl Source for CachedSource { fn source(&self) -> Cow { - self.inner.source() + Cow::Owned(self.get_rope().to_string()) } fn rope(&self) -> Rope<'_> { - self.inner.rope() + self.get_rope().clone() } fn buffer(&self) -> Cow<[u8]> { - self.inner.buffer() + self.inner.borrow_inner().buffer() } fn size(&self) -> usize { @@ -90,17 +110,20 @@ impl Source for CachedSource { } fn map(&self, options: &MapOptions) -> Option { - if let Some(map) = self.cached_maps.get(options) { + if let Some(map) = self.inner.borrow_cached_maps().get(options) { map.clone() } else { - let map = self.inner.map(options); - self.cached_maps.insert(options.clone(), map.clone()); + let map = self.inner.borrow_inner().map(options); + self + .inner + .borrow_cached_maps() + .insert(options.clone(), map.clone()); map } } fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { - self.inner.to_writer(writer) + self.inner.borrow_inner().to_writer(writer) } } @@ -114,7 +137,7 @@ impl StreamChunks on_source: crate::helpers::OnSource<'_, 'a>, on_name: crate::helpers::OnName<'_, 'a>, ) -> crate::helpers::GeneratedInfo { - let cached_map = self.cached_maps.entry(options.clone()); + let cached_map = self.inner.borrow_cached_maps().entry(options.clone()); match cached_map { Entry::Occupied(entry) => { let source = self.rope(); @@ -138,7 +161,7 @@ impl StreamChunks } Entry::Vacant(entry) => { let (generated_info, map) = stream_and_get_source_and_map( - &self.inner as &T, + self.inner.borrow_inner() as &T, options, on_chunk, on_source, @@ -153,19 +176,21 @@ impl StreamChunks impl Clone for CachedSource { fn clone(&self) -> Self { - Self { - inner: self.inner.clone(), - cached_hash: self.cached_hash.clone(), - cached_maps: self.cached_maps.clone(), - } + // Self { + // inner: self.inner.clone(), + // cached_rope: Default::default(), + // cached_hash: self.cached_hash.clone(), + // cached_maps: self.cached_maps.clone(), + // } + todo!() } } impl Hash for CachedSource { fn hash(&self, state: &mut H) { - (self.cached_hash.get_or_init(|| { + (self.inner.borrow_cached_hash().get_or_init(|| { let mut hasher = FxHasher::default(); - self.inner.hash(&mut hasher); + self.original().hash(&mut hasher); hasher.finish() })) .hash(state); @@ -174,7 +199,7 @@ impl Hash for CachedSource { impl PartialEq for CachedSource { fn eq(&self, other: &Self) -> bool { - self.inner == other.inner + self.inner.borrow_inner() == other.inner.borrow_inner() } } @@ -186,9 +211,12 @@ impl std::fmt::Debug for CachedSource { f: &mut std::fmt::Formatter<'_>, ) -> Result<(), std::fmt::Error> { f.debug_struct("CachedSource") - .field("inner", self.inner.as_ref()) - .field("cached_hash", self.cached_hash.as_ref()) - .field("cached_maps", &(!self.cached_maps.is_empty())) + .field("inner", self.inner.borrow_inner().as_ref()) + .field("cached_hash", self.inner.borrow_cached_hash().as_ref()) + .field( + "cached_maps", + &(!self.inner.borrow_cached_maps().is_empty()), + ) .finish() } } @@ -236,7 +264,12 @@ mod tests { source.map(&map_options); assert_eq!( - *clone.cached_maps.get(&map_options).unwrap().value(), + *clone + .inner + .borrow_cached_maps() + .get(&map_options) + .unwrap() + .value(), source.map(&map_options) ); } diff --git a/src/replace_source.rs b/src/replace_source.rs index 29eaad34..57c30a90 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -302,7 +302,7 @@ fn check_content_at_position( lines: &[Rope], line: u32, column: u32, - expected: Rope, // FIXME: memory + expected: &Rope, ) -> bool { if let Some(line) = lines.get(line as usize - 1) { match line @@ -311,7 +311,7 @@ fn check_content_at_position( .map(|(byte_index, _)| byte_index) { Some(byte_index) => { - line.get_byte_slice(byte_index..byte_index + expected.len()) + line.get_byte_slice(byte_index..byte_index + expected.len()).as_ref() == Some(expected) } None => false, @@ -372,7 +372,7 @@ impl StreamChunks for ReplaceSource { // In this case, we can't split this mapping. // webpack-sources also have this function, refer https://github.com/webpack/webpack-sources/blob/main/lib/ReplaceSource.js#L158 let check_original_content = - |source_index: u32, line: u32, column: u32, expected_chunk: Rope| { + |source_index: u32, line: u32, column: u32, expected_chunk: &Rope| { if let Some(Some(source_content)) = source_content_lines.borrow_mut().get_mut(&source_index) { @@ -432,7 +432,7 @@ impl StreamChunks for ReplaceSource { original.source_index, original.original_line, original.original_column, - chunk.byte_slice(0..chunk_pos as usize), + &chunk.byte_slice(0..chunk_pos as usize), ) }) { original.original_column += chunk_pos; @@ -458,8 +458,24 @@ impl StreamChunks for ReplaceSource { let offset = next_replacement_pos - pos; let chunk_slice = chunk .byte_slice(chunk_pos as usize..(chunk_pos + offset) as usize); + + let new_original_column = if let Some(original) = + mapping.original.as_mut().filter(|original| { + check_original_content( + original.source_index, + original.original_line, + original.original_column, + &chunk_slice, + ) + }) + { + Some(original.original_column + chunk_slice.len() as u32) + } else { + None + }; + on_chunk( - Some(chunk_slice.clone()), + Some(chunk_slice), Mapping { generated_line: line as u32, generated_column: ((mapping.generated_column as i64) @@ -483,17 +499,9 @@ impl StreamChunks for ReplaceSource { mapping.generated_column += offset; chunk_pos += offset; pos = next_replacement_pos; - if let Some(original) = - mapping.original.as_mut().filter(|original| { - check_original_content( - original.source_index, - original.original_line, - original.original_column, - chunk_slice.clone(), - ) - }) - { - original.original_column += chunk_slice.len() as u32; + + if let Some(new_original_column) = new_original_column { + mapping.original.as_mut().unwrap().original_column = new_original_column; } } // Insert replacement content split into chunks by lines @@ -611,7 +619,7 @@ impl StreamChunks for ReplaceSource { original.source_index, original.original_line, original.original_column, - chunk.byte_slice( + &chunk.byte_slice( chunk_pos as usize..(chunk_pos + offset as u32) as usize, ), ) @@ -695,9 +703,12 @@ impl StreamChunks for ReplaceSource { // Insert remaining replacements content split into chunks by lines let mut line = result.generated_line as i64 + generated_line_offset; let matches: Vec = split_into_lines(&remainder).collect(); - for (m, content_line) in matches.iter().enumerate() { + let matches_len = matches.len(); + for (m, content_line) in matches.into_iter().enumerate() { + let ends_with_newline = content_line.ends_with("\n"); + let content_line_len = content_line.len(); on_chunk( - Some(content_line.clone()), + Some(content_line), Mapping { generated_line: line as u32, generated_column: ((result.generated_column as i64) @@ -710,11 +721,11 @@ impl StreamChunks for ReplaceSource { }, ); - if m == matches.len() - 1 && !content_line.ends_with("\n") { + if m == matches_len - 1 && !ends_with_newline { if generated_column_offset_line == line { - generated_column_offset += content_line.len() as i64; + generated_column_offset += content_line_len as i64; } else { - generated_column_offset = content_line.len() as i64; + generated_column_offset = content_line_len as i64; generated_column_offset_line = line; } } else { diff --git a/src/rope.rs b/src/rope.rs index b18d90cc..763c5cdd 100644 --- a/src/rope.rs +++ b/src/rope.rs @@ -5,7 +5,6 @@ use std::{ collections::VecDeque, hash::Hash, ops::{Bound, RangeBounds}, - rc::Rc, }; use crate::Error; @@ -13,7 +12,7 @@ use crate::Error; #[derive(Clone, Debug)] pub(crate) enum Repr<'a> { Light(&'a str), - Full(Rc>), + Full(Vec<(&'a str, usize)>), } /// A rope data structure. @@ -42,13 +41,13 @@ impl<'a> Rope<'a> { match &mut self.repr { Repr::Light(s) => { let vec = Vec::from_iter([(*s, 0), (value, s.len())]); - self.repr = Repr::Full(Rc::new(vec)); + self.repr = Repr::Full(vec); } Repr::Full(data) => { let len = data .last() .map_or(0, |(chunk, start_pos)| *start_pos + chunk.len()); - Rc::make_mut(data).push((value, len)); + data.push((value, len)); } } } @@ -60,7 +59,7 @@ impl<'a> Rope<'a> { match (&mut self.repr, value.repr) { (Repr::Light(s), Repr::Light(other)) => { let raw = Vec::from_iter([(*s, 0), (other, s.len())]); - self.repr = Repr::Full(Rc::new(raw)); + self.repr = Repr::Full(raw); } (Repr::Full(s), Repr::Full(other)) => { if !other.is_empty() { @@ -68,7 +67,7 @@ impl<'a> Rope<'a> { .last() .map_or(0, |(chunk, start_pos)| *start_pos + chunk.len()); - let cur = Rc::make_mut(s); + let cur = s; cur.reserve_exact(other.len()); for &(chunk, _) in other.iter() { @@ -82,7 +81,7 @@ impl<'a> Rope<'a> { let len = s .last() .map_or(0, |(chunk, start_pos)| *start_pos + chunk.len()); - Rc::make_mut(s).push((other, len)); + s.push((other, len)); } } (Repr::Light(s), Repr::Full(other)) => { @@ -93,7 +92,7 @@ impl<'a> Rope<'a> { raw.push((chunk, len)); len += chunk.len(); } - self.repr = Repr::Full(Rc::new(raw)); + self.repr = Repr::Full(raw); } } } @@ -324,7 +323,7 @@ impl<'a> Rope<'a> { })?; Ok(Rope { - repr: Repr::Full(Rc::new(raw)), + repr: Repr::Full(raw), }) } } @@ -412,7 +411,7 @@ impl<'a> Rope<'a> { }); Rope { - repr: Repr::Full(Rc::new(raw)), + repr: Repr::Full(raw), } } } @@ -613,7 +612,7 @@ impl<'a> Iterator for Lines<'_, 'a> { // Advance the byte index to the end of the line. *byte_idx += len; Some(Rope { - repr: Repr::Full(Rc::new(raw)), + repr: Repr::Full(raw), }) } else { // If we did not find a newline in the next few chunks, @@ -645,7 +644,7 @@ impl<'a> Iterator for Lines<'_, 'a> { // Advance the byte index to the end of the rope. *byte_idx += len; Some(Rope { - repr: Repr::Full(Rc::new(raw)), + repr: Repr::Full(raw), }) } } @@ -916,7 +915,7 @@ impl<'a> FromIterator<&'a str> for Rope<'a> { .collect::>(); Self { - repr: Repr::Full(Rc::new(raw)), + repr: Repr::Full(raw), } } } @@ -941,8 +940,6 @@ fn end_bound_to_range_end(end: Bound<&usize>) -> Option { #[cfg(test)] mod tests { - use std::rc::Rc; - use crate::rope::{Repr, Rope}; impl<'a> PartialEq for Repr<'a> { @@ -967,7 +964,7 @@ mod tests { assert_eq!(simple, "abcdef"); assert_eq!( simple.repr, - Repr::Full(Rc::new(Vec::from_iter([("abc", 0), ("def", 3)]))) + Repr::Full(Vec::from_iter([("abc", 0), ("def", 3)])) ); assert_eq!(simple.len(), 6); @@ -975,11 +972,11 @@ mod tests { assert_eq!(simple, "abcdefghi"); assert_eq!( simple.repr, - Repr::Full(Rc::new(Vec::from_iter([ + Repr::Full(Vec::from_iter([ ("abc", 0), ("def", 3), ("ghi", 6), - ]))) + ])) ); assert_eq!(simple.len(), 9); } @@ -998,7 +995,7 @@ mod tests { assert_eq!(append1, "abcdef"); assert_eq!( append1.repr, - Repr::Full(Rc::new(Vec::from_iter([("abc", 0), ("def", 3),]))) + Repr::Full(Vec::from_iter([("abc", 0), ("def", 3),])) ); // simple - complex @@ -1007,12 +1004,12 @@ mod tests { assert_eq!(append2, "abc123"); assert_eq!( append2.repr, - Repr::Full(Rc::new(Vec::from_iter([ + Repr::Full(Vec::from_iter([ ("abc", 0), ("1", 3), ("2", 4), ("3", 5), - ]))) + ])) ); // complex - simple @@ -1021,12 +1018,12 @@ mod tests { assert_eq!(append3, "123abc"); assert_eq!( append3.repr, - Repr::Full(Rc::new(Vec::from_iter([ + Repr::Full(Vec::from_iter([ ("1", 0), ("2", 1), ("3", 2), ("abc", 3), - ]))) + ])) ); // complex - complex @@ -1035,14 +1032,14 @@ mod tests { assert_eq!(append4, "123456"); assert_eq!( append4.repr, - Repr::Full(Rc::new(Vec::from_iter([ + Repr::Full(Vec::from_iter([ ("1", 0), ("2", 1), ("3", 2), ("4", 3), ("5", 4), ("6", 5), - ]))) + ])) ); } @@ -1133,7 +1130,7 @@ mod tests { assert_eq!(rope, "abcdef"); assert_eq!( rope.repr, - Repr::Full(Rc::new(Vec::from_iter([("abc", 0), ("def", 3)]))) + Repr::Full(Vec::from_iter([("abc", 0), ("def", 3)])) ); }