|
1 | 1 | #![forbid(unsafe_code)]
|
2 | 2 |
|
3 |
| -use anyhow::{anyhow, Context as AnyhowContext, Result}; |
4 |
| -use bytesize::ByteSize; |
5 |
| -use git_features::progress::Progress; |
6 |
| -use git_object::{owned, Kind}; |
7 |
| -use git_odb::pack::{self, index}; |
8 | 3 | use std::str::FromStr;
|
9 |
| -use std::{io, path::Path}; |
10 |
| - |
11 |
| -pub use index::verify::Mode as VerifyMode; |
12 | 4 |
|
13 | 5 | #[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
|
14 | 6 | pub enum OutputFormat {
|
@@ -41,223 +33,5 @@ impl FromStr for OutputFormat {
|
41 | 33 | }
|
42 | 34 | }
|
43 | 35 |
|
44 |
| -#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)] |
45 |
| -pub enum VerifyAlgorithm { |
46 |
| - LessTime, |
47 |
| - LessMemory, |
48 |
| -} |
49 |
| - |
50 |
| -impl VerifyAlgorithm { |
51 |
| - pub fn variants() -> &'static [&'static str] { |
52 |
| - &["less-time", "less-memory"] |
53 |
| - } |
54 |
| -} |
55 |
| - |
56 |
| -impl FromStr for VerifyAlgorithm { |
57 |
| - type Err = String; |
58 |
| - |
59 |
| - fn from_str(s: &str) -> Result<Self, Self::Err> { |
60 |
| - let s_lc = s.to_ascii_lowercase(); |
61 |
| - Ok(match s_lc.as_str() { |
62 |
| - "less-memory" => VerifyAlgorithm::LessMemory, |
63 |
| - "less-time" => VerifyAlgorithm::LessTime, |
64 |
| - _ => return Err(format!("Invalid verification algorithm: '{}'", s)), |
65 |
| - }) |
66 |
| - } |
67 |
| -} |
68 |
| - |
69 |
| -impl From<VerifyAlgorithm> for index::verify::Algorithm { |
70 |
| - fn from(v: VerifyAlgorithm) -> Self { |
71 |
| - match v { |
72 |
| - VerifyAlgorithm::LessMemory => index::verify::Algorithm::Lookup, |
73 |
| - VerifyAlgorithm::LessTime => index::verify::Algorithm::DeltaTreeLookup, |
74 |
| - } |
75 |
| - } |
76 |
| -} |
77 |
| - |
78 |
| -/// A general purpose context for many operations provided here |
79 |
| -pub struct Context<W1: io::Write, W2: io::Write> { |
80 |
| - /// If set, provide statistics to `out` in the given format |
81 |
| - pub output_statistics: Option<OutputFormat>, |
82 |
| - /// A stream to which to output operation results |
83 |
| - pub out: W1, |
84 |
| - /// A stream to which to errors |
85 |
| - pub err: W2, |
86 |
| - /// If set, don't use more than this amount of threads. |
87 |
| - /// Otherwise, usually use as many threads as there are logical cores. |
88 |
| - /// A value of 0 is interpreted as no-limit |
89 |
| - pub thread_limit: Option<usize>, |
90 |
| - pub mode: index::verify::Mode, |
91 |
| - pub algorithm: VerifyAlgorithm, |
92 |
| -} |
93 |
| - |
94 |
| -impl Default for Context<Vec<u8>, Vec<u8>> { |
95 |
| - fn default() -> Self { |
96 |
| - Context { |
97 |
| - output_statistics: None, |
98 |
| - thread_limit: None, |
99 |
| - mode: index::verify::Mode::Sha1CRC32, |
100 |
| - algorithm: VerifyAlgorithm::LessMemory, |
101 |
| - out: Vec::new(), |
102 |
| - err: Vec::new(), |
103 |
| - } |
104 |
| - } |
105 |
| -} |
106 |
| - |
107 |
| -pub fn init() -> Result<()> { |
108 |
| - git_repository::init::repository().with_context(|| "Repository initialization failed") |
109 |
| -} |
110 |
| - |
111 |
| -#[allow(clippy::large_enum_variant)] |
112 |
| -enum EitherCache { |
113 |
| - Left(pack::cache::DecodeEntryNoop), |
114 |
| - Right(pack::cache::DecodeEntryLRU), |
115 |
| -} |
116 |
| - |
117 |
| -impl pack::cache::DecodeEntry for EitherCache { |
118 |
| - fn put(&mut self, offset: u64, data: &[u8], kind: Kind, compressed_size: usize) { |
119 |
| - match self { |
120 |
| - EitherCache::Left(v) => v.put(offset, data, kind, compressed_size), |
121 |
| - EitherCache::Right(v) => v.put(offset, data, kind, compressed_size), |
122 |
| - } |
123 |
| - } |
124 |
| - |
125 |
| - fn get(&mut self, offset: u64, out: &mut Vec<u8>) -> Option<(Kind, usize)> { |
126 |
| - match self { |
127 |
| - EitherCache::Left(v) => v.get(offset, out), |
128 |
| - EitherCache::Right(v) => v.get(offset, out), |
129 |
| - } |
130 |
| - } |
131 |
| -} |
132 |
| - |
133 |
| -pub fn verify_pack_or_pack_index<P, W1, W2>( |
134 |
| - path: impl AsRef<Path>, |
135 |
| - progress: Option<P>, |
136 |
| - Context { |
137 |
| - mut out, |
138 |
| - mut err, |
139 |
| - mode, |
140 |
| - output_statistics, |
141 |
| - thread_limit, |
142 |
| - algorithm, |
143 |
| - }: Context<W1, W2>, |
144 |
| -) -> Result<(owned::Id, Option<index::verify::Outcome>)> |
145 |
| -where |
146 |
| - P: Progress, |
147 |
| - <P as Progress>::SubProgress: Send, |
148 |
| - W1: io::Write, |
149 |
| - W2: io::Write, |
150 |
| -{ |
151 |
| - let path = path.as_ref(); |
152 |
| - let ext = path.extension().and_then(|ext| ext.to_str()).ok_or_else(|| { |
153 |
| - anyhow!( |
154 |
| - "Cannot determine data type on path without extension '{}', expecting default extensions 'idx' and 'pack'", |
155 |
| - path.display() |
156 |
| - ) |
157 |
| - })?; |
158 |
| - let res = match ext { |
159 |
| - "pack" => { |
160 |
| - let pack = git_odb::pack::data::File::at(path).with_context(|| "Could not open pack file")?; |
161 |
| - pack.verify_checksum().map(|id| (id, None))? |
162 |
| - } |
163 |
| - "idx" => { |
164 |
| - let idx = git_odb::pack::index::File::at(path).with_context(|| "Could not open pack index file")?; |
165 |
| - let packfile_path = path.with_extension("pack"); |
166 |
| - let pack = git_odb::pack::data::File::at(&packfile_path) |
167 |
| - .map_err(|e| { |
168 |
| - writeln!( |
169 |
| - err, |
170 |
| - "Could not find matching pack file at '{}' - only index file will be verified, error was: {}", |
171 |
| - packfile_path.display(), |
172 |
| - e |
173 |
| - ) |
174 |
| - .ok(); |
175 |
| - e |
176 |
| - }) |
177 |
| - .ok(); |
178 |
| - let cache = || -> EitherCache { |
179 |
| - if output_statistics.is_some() { |
180 |
| - // turn off acceleration as we need to see entire chains all the time |
181 |
| - EitherCache::Left(pack::cache::DecodeEntryNoop) |
182 |
| - } else { |
183 |
| - EitherCache::Right(pack::cache::DecodeEntryLRU::default()) |
184 |
| - } |
185 |
| - }; |
186 |
| - |
187 |
| - idx.verify_checksum_of_index( |
188 |
| - pack.as_ref().map(|p| (p, mode, algorithm.into())), |
189 |
| - thread_limit, |
190 |
| - progress, |
191 |
| - cache, |
192 |
| - ) |
193 |
| - .with_context(|| "Verification failure")? |
194 |
| - } |
195 |
| - ext => return Err(anyhow!("Unknown extension {:?}, expecting 'idx' or 'pack'", ext)), |
196 |
| - }; |
197 |
| - if let Some(stats) = res.1.as_ref() { |
198 |
| - match output_statistics { |
199 |
| - Some(OutputFormat::Human) => drop(print_statistics(&mut out, stats)), |
200 |
| - #[cfg(feature = "serde1")] |
201 |
| - Some(OutputFormat::Json) => drop(serde_json::to_writer_pretty(out, stats)), |
202 |
| - _ => {} |
203 |
| - }; |
204 |
| - } |
205 |
| - Ok(res) |
206 |
| -} |
207 |
| - |
208 |
| -fn print_statistics(out: &mut impl io::Write, stats: &index::verify::Outcome) -> io::Result<()> { |
209 |
| - writeln!(out, "objects per delta chain length")?; |
210 |
| - let mut chain_length_to_object: Vec<_> = stats.objects_per_chain_length.iter().map(|(a, b)| (*a, *b)).collect(); |
211 |
| - chain_length_to_object.sort_by_key(|e| e.0); |
212 |
| - let mut total_object_count = 0; |
213 |
| - for (chain_length, object_count) in chain_length_to_object.into_iter() { |
214 |
| - total_object_count += object_count; |
215 |
| - writeln!(out, "\t{:>2}: {}", chain_length, object_count)?; |
216 |
| - } |
217 |
| - writeln!(out, "\t->: {}", total_object_count)?; |
218 |
| - |
219 |
| - let pack::data::decode::Outcome { |
220 |
| - kind: _, |
221 |
| - num_deltas, |
222 |
| - decompressed_size, |
223 |
| - compressed_size, |
224 |
| - object_size, |
225 |
| - } = stats.average; |
226 |
| - |
227 |
| - let width = 30; |
228 |
| - writeln!(out, "\naverages")?; |
229 |
| - #[rustfmt::skip] |
230 |
| - writeln!( |
231 |
| - out, |
232 |
| - "\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};", |
233 |
| - "delta chain length:", num_deltas, |
234 |
| - "decompressed entry [B]:", decompressed_size, |
235 |
| - "compressed entry [B]:", compressed_size, |
236 |
| - "decompressed object size [B]:", object_size, |
237 |
| - width = width |
238 |
| - )?; |
239 |
| - |
240 |
| - writeln!(out, "\ncompression")?; |
241 |
| - #[rustfmt::skip] |
242 |
| - writeln!( |
243 |
| - out, "\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}", |
244 |
| - "compressed entries size", ByteSize(stats.total_compressed_entries_size), |
245 |
| - "decompressed entries size", ByteSize(stats.total_decompressed_entries_size), |
246 |
| - "total object size", ByteSize(stats.total_object_size), |
247 |
| - "pack size", ByteSize(stats.pack_size), |
248 |
| - width = width |
249 |
| - )?; |
250 |
| - let compression_ratio = stats.total_decompressed_entries_size as f64 / stats.total_compressed_entries_size as f64; |
251 |
| - let delta_compression_ratio = stats.total_object_size as f64 / stats.total_compressed_entries_size as f64; |
252 |
| - #[rustfmt::skip] |
253 |
| - writeln!( |
254 |
| - out, |
255 |
| - "\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.3}%", |
256 |
| - "compression ratio", compression_ratio, |
257 |
| - "delta compression ratio", delta_compression_ratio, |
258 |
| - "delta gain", delta_compression_ratio / compression_ratio, |
259 |
| - "pack overhead", (1.0 - (stats.total_compressed_entries_size as f64 / stats.pack_size as f64)) * 100.0, |
260 |
| - width = width |
261 |
| - )?; |
262 |
| - Ok(()) |
263 |
| -} |
| 36 | +pub mod repository; |
| 37 | +pub mod verify; |
0 commit comments