Skip to content

Commit 8ab19a6

Browse files
committed
Merge branch 'parse-git-ignore'
2 parents f9c2190 + 9a9115f commit 8ab19a6

File tree

8 files changed

+313
-0
lines changed

8 files changed

+313
-0
lines changed

Diff for: Cargo.lock

+5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: git-attributes/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,8 @@ doctest = false
1313
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1414

1515
[dependencies]
16+
bstr = { version = "0.2.13", default-features = false, features = ["std"]}
17+
bitflags = "1.3.2"
18+
19+
[dev-dependencies]
20+
git-testtools = { path = "../tests/tools"}

Diff for: git-attributes/src/ignore.rs

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
pub mod pattern {
2+
use bitflags::bitflags;
3+
4+
bitflags! {
5+
pub struct Mode: u32 {
6+
/// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one.
7+
const NO_SUB_DIR = 1 << 0;
8+
/// A pattern that is '*literal', meaning that it ends with what's given here
9+
const ENDS_WITH = 1 << 1;
10+
/// The pattern must match a directory, and not a file.
11+
const MUST_BE_DIR = 1 << 2;
12+
const NEGATIVE = 1 << 3;
13+
}
14+
}
15+
}

Diff for: git-attributes/src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
11
#![forbid(unsafe_code, rust_2018_idioms)]
2+
3+
pub mod ignore;
4+
5+
pub mod parse;

Diff for: git-attributes/src/parse/ignore.rs

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
use crate::ignore;
2+
use bstr::{BString, ByteSlice};
3+
4+
pub struct Iter<'a> {
5+
lines: bstr::Lines<'a>,
6+
line_no: usize,
7+
}
8+
9+
impl<'a> Iter<'a> {
10+
pub fn new(buf: &'a [u8]) -> Self {
11+
Iter {
12+
lines: buf.lines(),
13+
line_no: 0,
14+
}
15+
}
16+
}
17+
18+
impl<'a> Iterator for Iter<'a> {
19+
type Item = (BString, ignore::pattern::Mode, usize);
20+
21+
fn next(&mut self) -> Option<Self::Item> {
22+
let mut res = None;
23+
for mut line in self.lines.by_ref() {
24+
self.line_no += 1;
25+
let mut mode = ignore::pattern::Mode::empty();
26+
if line.is_empty() {
27+
continue;
28+
};
29+
if line.first() == Some(&b'#') {
30+
continue;
31+
} else if line.first() == Some(&b'!') {
32+
mode |= ignore::pattern::Mode::NEGATIVE;
33+
line = &line[1..];
34+
} else if line.first() == Some(&b'\\') {
35+
let second = line.get(1);
36+
if second == Some(&b'!') || second == Some(&b'#') {
37+
line = &line[1..];
38+
}
39+
}
40+
let mut line = truncate_non_escaped_trailing_spaces(line);
41+
if line.last() == Some(&b'/') {
42+
mode |= ignore::pattern::Mode::MUST_BE_DIR;
43+
line.pop();
44+
}
45+
if !line.contains(&b'/') {
46+
mode |= ignore::pattern::Mode::NO_SUB_DIR;
47+
}
48+
if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() {
49+
mode |= ignore::pattern::Mode::ENDS_WITH;
50+
}
51+
res = Some((line, mode, self.line_no));
52+
break;
53+
}
54+
res
55+
}
56+
}
57+
58+
/// We always copy just because that's ultimately needed anyway, not because we always have to.
59+
fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString {
60+
match buf.rfind_not_byteset(br"\ ") {
61+
Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace
62+
None => buf.into(),
63+
Some(start_of_non_space) => {
64+
// This seems a bit strange but attempts to recreate the git implementation while
65+
// actually removing the escape characters before spaces. We leave other backslashes
66+
// for escapes to be handled by `glob/globset`.
67+
let mut res: BString = buf[..start_of_non_space + 1].into();
68+
69+
let mut trailing_bytes = buf[start_of_non_space + 1..].iter();
70+
let mut bare_spaces = 0;
71+
while let Some(b) = trailing_bytes.next() {
72+
match b {
73+
b' ' => {
74+
bare_spaces += 1;
75+
}
76+
b'\\' => {
77+
res.extend(std::iter::repeat(b' ').take(bare_spaces));
78+
bare_spaces = 0;
79+
// Skip what follows, like git does, but keep spaces if possible.
80+
if trailing_bytes.next() == Some(&b' ') {
81+
res.push(b' ');
82+
}
83+
}
84+
_ => unreachable!("BUG: this must be either backslash or space"),
85+
}
86+
}
87+
res
88+
}
89+
}
90+
}

Diff for: git-attributes/src/parse/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pub mod ignore;
2+
3+
pub fn ignore(buf: &[u8]) -> ignore::Iter<'_> {
4+
ignore::Iter::new(buf)
5+
}

Diff for: git-attributes/tests/attributes.rs

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
mod parse {
2+
mod ignore {
3+
use git_attributes::ignore::pattern::Mode;
4+
use git_testtools::fixture_path;
5+
6+
#[test]
7+
fn line_numbers_are_counted_correctly() {
8+
let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap();
9+
let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect();
10+
assert_eq!(
11+
actual,
12+
vec![
13+
("*.[oa]".into(), Mode::NO_SUB_DIR, 2),
14+
("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5),
15+
("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8),
16+
("/*".into(), Mode::empty(), 11),
17+
("/foo".into(), Mode::NEGATIVE, 12),
18+
("/foo/*".into(), Mode::empty(), 13),
19+
("/foo/bar".into(), Mode::NEGATIVE, 14)
20+
]
21+
);
22+
}
23+
24+
#[test]
25+
fn line_endings_can_be_windows_or_unix() {
26+
assert_eq!(
27+
git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::<Vec<_>>(),
28+
vec![
29+
(r"unix".into(), Mode::NO_SUB_DIR, 1),
30+
(r"windows".into(), Mode::NO_SUB_DIR, 2),
31+
(r"last".into(), Mode::NO_SUB_DIR, 3)
32+
]
33+
);
34+
}
35+
36+
#[test]
37+
fn mark_ends_with_pattern_specifically() {
38+
assert_eq!(
39+
git_attributes::parse::ignore(br"*literal").next(),
40+
Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1))
41+
);
42+
assert_eq!(
43+
git_attributes::parse::ignore(br"**literal").next(),
44+
Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)),
45+
"double-asterisk won't allow for fast comparisons"
46+
);
47+
assert_eq!(
48+
git_attributes::parse::ignore(br"*litera[l]").next(),
49+
Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1))
50+
);
51+
assert_eq!(
52+
git_attributes::parse::ignore(br"*litera?").next(),
53+
Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1))
54+
);
55+
assert_eq!(
56+
git_attributes::parse::ignore(br"*litera\?").next(),
57+
Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)),
58+
"for now we don't handle escapes properly like git seems to do"
59+
);
60+
}
61+
62+
#[test]
63+
fn comments_are_ignored() {
64+
assert!(git_attributes::parse::ignore(b"# hello world").next().is_none());
65+
}
66+
67+
#[test]
68+
fn backslashes_before_hashes_are_no_comments() {
69+
assert_eq!(
70+
git_attributes::parse::ignore(br"\#hello").next(),
71+
Some((r"#hello".into(), Mode::NO_SUB_DIR, 1))
72+
);
73+
}
74+
75+
#[test]
76+
fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() {
77+
assert_eq!(
78+
git_attributes::parse::ignore(br"\hello\world").next(),
79+
Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1))
80+
);
81+
}
82+
83+
#[test]
84+
fn leading_exclamation_mark_negates_pattern() {
85+
assert_eq!(
86+
git_attributes::parse::ignore(b"!hello").next(),
87+
Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1))
88+
);
89+
}
90+
91+
#[test]
92+
fn leading_exclamation_marks_can_be_escaped_with_backslash() {
93+
assert_eq!(
94+
git_attributes::parse::ignore(br"\!hello").next(),
95+
Some(("!hello".into(), Mode::NO_SUB_DIR, 1))
96+
);
97+
}
98+
99+
#[test]
100+
fn absence_of_sub_directories_are_marked() {
101+
assert_eq!(
102+
git_attributes::parse::ignore(br"a/b").next(),
103+
Some(("a/b".into(), Mode::empty(), 1))
104+
);
105+
assert_eq!(
106+
git_attributes::parse::ignore(br"ab").next(),
107+
Some(("ab".into(), Mode::NO_SUB_DIR, 1))
108+
);
109+
}
110+
111+
#[test]
112+
fn trailing_slashes_are_marked_and_removed() {
113+
assert_eq!(
114+
git_attributes::parse::ignore(b"dir/").next(),
115+
Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1))
116+
);
117+
assert_eq!(
118+
git_attributes::parse::ignore(b"dir///").next(),
119+
Some(("dir//".into(), Mode::MUST_BE_DIR, 1)),
120+
"but only the last slash is removed"
121+
);
122+
}
123+
124+
#[test]
125+
fn trailing_spaces_are_ignored() {
126+
assert_eq!(
127+
git_attributes::parse::ignore(br"a ").next(),
128+
Some(("a".into(), Mode::NO_SUB_DIR, 1))
129+
);
130+
assert_eq!(
131+
git_attributes::parse::ignore(b"a\t\t ").next(),
132+
Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)),
133+
"trailing tabs are not ignored"
134+
);
135+
}
136+
#[test]
137+
fn trailing_spaces_can_be_escaped_to_be_literal() {
138+
assert_eq!(
139+
git_attributes::parse::ignore(br"a \ ").next(),
140+
Some(("a ".into(), Mode::NO_SUB_DIR, 1)),
141+
"a single escape in front of the last desired space is enough"
142+
);
143+
assert_eq!(
144+
git_attributes::parse::ignore(br"a b c ").next(),
145+
Some(("a b c".into(), Mode::NO_SUB_DIR, 1)),
146+
"spaces in the middle are fine"
147+
);
148+
assert_eq!(
149+
git_attributes::parse::ignore(br"a\ \ \ ").next(),
150+
Some(("a ".into(), Mode::NO_SUB_DIR, 1)),
151+
"one can also escape every single one"
152+
);
153+
assert_eq!(
154+
git_attributes::parse::ignore(br"a \ ").next(),
155+
Some(("a ".into(), Mode::NO_SUB_DIR, 1)),
156+
"or just the one in the middle, losing the last actual space"
157+
);
158+
assert_eq!(
159+
git_attributes::parse::ignore(br"a \").next(),
160+
Some(("a ".into(), Mode::NO_SUB_DIR, 1)),
161+
"escaping nothing also works as a whitespace protection"
162+
);
163+
assert_eq!(
164+
git_attributes::parse::ignore(br"a \\\ ").next(),
165+
Some((r"a ".into(), Mode::NO_SUB_DIR, 1)),
166+
"strange things like these work too"
167+
);
168+
assert_eq!(
169+
git_attributes::parse::ignore(br"a \\ ").next(),
170+
Some((r"a ".into(), Mode::NO_SUB_DIR, 1)),
171+
"strange things like these work as well"
172+
);
173+
}
174+
}
175+
}

Diff for: git-attributes/tests/fixtures/ignore/various.txt

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# ignore objects and archives, anywhere in the tree.
2+
*.[oa]
3+
4+
# ignore generated html files,
5+
*.html
6+
7+
# except foo.html which is maintained by hand
8+
!foo.html
9+
10+
# exclude everything except directory foo/bar
11+
/*
12+
!/foo
13+
/foo/*
14+
!/foo/bar

0 commit comments

Comments
 (0)