Skip to content

Commit 0401758

Browse files
committed
Add all tests imaginable for branch name sanitization
1 parent 8077f3e commit 0401758

File tree

4 files changed

+378
-25
lines changed

4 files changed

+378
-25
lines changed

gix-validate/src/reference.rs

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,35 +27,82 @@ pub mod name {
2727
}
2828

2929
use bstr::BStr;
30+
use std::borrow::Cow;
3031

3132
/// Validate a reference name running all the tests in the book. This disallows lower-case references like `lower`, but also allows
3233
/// ones like `HEAD`, and `refs/lower`.
3334
pub fn name(path: &BStr) -> Result<&BStr, name::Error> {
34-
validate(path, Mode::Complete)
35+
match validate(path, Mode::Complete)? {
36+
Cow::Borrowed(inner) => Ok(inner),
37+
Cow::Owned(_) => {
38+
unreachable!("Without sanitization, there is no chance a sanitized version is returned.")
39+
}
40+
}
3541
}
3642

3743
/// Validate a partial reference name. As it is assumed to be partial, names like `some-name` is allowed
3844
/// even though these would be disallowed with when using [`name()`].
3945
pub fn name_partial(path: &BStr) -> Result<&BStr, name::Error> {
40-
validate(path, Mode::Partial)
46+
match validate(path, Mode::Partial)? {
47+
Cow::Borrowed(inner) => Ok(inner),
48+
Cow::Owned(_) => {
49+
unreachable!("Without sanitization, there is no chance a sanitized version is returned.")
50+
}
51+
}
52+
}
53+
54+
/// The infallible version of [`name_partial()`] which instead of failing, alters `path` and returns it to be a valid
55+
/// partial name, which would also pass [`name_partial()`].
56+
///
57+
/// Note that an empty `path` is replaced with a `-` in order to be valid.
58+
pub fn name_partial_or_sanitize(path: &BStr) -> Cow<'_, BStr> {
59+
validate(path, Mode::PartialSanitize).expect("BUG: errors cannot happen as any issue is fixed instantly")
4160
}
4261

4362
enum Mode {
4463
Complete,
4564
Partial,
65+
/// like Partial, but instead of failing, a sanitized version is returned.
66+
PartialSanitize,
4667
}
4768

48-
fn validate(path: &BStr, mode: Mode) -> Result<&BStr, name::Error> {
49-
crate::tag::name(path)?;
50-
if path[0] == b'/' {
51-
return Err(name::Error::StartsWithSlash);
69+
fn validate(path: &BStr, mode: Mode) -> Result<Cow<'_, BStr>, name::Error> {
70+
let mut out = crate::tag::name_inner(
71+
path,
72+
match mode {
73+
Mode::Complete | Mode::Partial => crate::tag::Mode::Validate,
74+
Mode::PartialSanitize => crate::tag::Mode::Sanitize,
75+
},
76+
)?;
77+
let sanitize = matches!(mode, Mode::PartialSanitize);
78+
if path.get(0) == Some(&b'/') {
79+
if sanitize {
80+
out.to_mut()[0] = b'-';
81+
} else {
82+
return Err(name::Error::StartsWithSlash);
83+
}
5284
}
5385
let mut previous = 0;
5486
let mut saw_slash = false;
55-
for byte in path.iter() {
87+
let mut out_ofs = 0;
88+
for (mut byte_pos, byte) in path.iter().enumerate() {
89+
byte_pos -= out_ofs;
5690
match *byte {
57-
b'/' if previous == b'/' => return Err(name::Error::RepeatedSlash),
58-
b'.' if previous == b'/' => return Err(name::Error::StartsWithDot),
91+
b'/' if previous == b'/' => {
92+
if sanitize {
93+
out.to_mut().remove(byte_pos);
94+
out_ofs += 1;
95+
} else {
96+
return Err(name::Error::RepeatedSlash);
97+
}
98+
}
99+
b'.' if previous == b'/' => {
100+
if sanitize {
101+
out.to_mut()[byte_pos] = b'-';
102+
} else {
103+
return Err(name::Error::StartsWithDot);
104+
}
105+
}
59106
_ => {}
60107
}
61108

@@ -70,5 +117,5 @@ fn validate(path: &BStr, mode: Mode) -> Result<&BStr, name::Error> {
70117
return Err(name::Error::SomeLowercase);
71118
}
72119
}
73-
Ok(path)
120+
Ok(out)
74121
}

gix-validate/src/tag.rs

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
use bstr::BStr;
1+
use bstr::{BStr, ByteSlice};
2+
use std::borrow::Cow;
23

34
///
45
#[allow(clippy::empty_docs)]
@@ -33,36 +34,110 @@ pub mod name {
3334
/// Assure the given `input` resemble a valid git tag name, which is returned unchanged on success.
3435
/// Tag names are provided as names, lik` v1.0` or `alpha-1`, without paths.
3536
pub fn name(input: &BStr) -> Result<&BStr, name::Error> {
37+
match name_inner(input, Mode::Validate)? {
38+
Cow::Borrowed(inner) => Ok(inner),
39+
Cow::Owned(_) => {
40+
unreachable!("When validating, the input isn't changed")
41+
}
42+
}
43+
}
44+
45+
#[derive(Eq, PartialEq)]
46+
pub(crate) enum Mode {
47+
Sanitize,
48+
Validate,
49+
}
50+
51+
pub(crate) fn name_inner(input: &BStr, mode: Mode) -> Result<Cow<'_, BStr>, name::Error> {
52+
let mut out = Cow::Borrowed(input);
53+
let sanitize = matches!(mode, Mode::Sanitize);
3654
if input.is_empty() {
37-
return Err(name::Error::Empty);
55+
return if sanitize {
56+
out.to_mut().push(b'-');
57+
Ok(out)
58+
} else {
59+
Err(name::Error::Empty)
60+
};
3861
}
3962
if *input.last().expect("non-empty") == b'/' {
40-
return Err(name::Error::EndsWithSlash);
63+
if sanitize {
64+
while out.last() == Some(&b'/') {
65+
out.to_mut().pop();
66+
}
67+
let bytes_from_end = out.to_mut().as_bytes_mut().iter_mut().rev();
68+
for b in bytes_from_end.take_while(|b| **b == b'/') {
69+
*b = b'-';
70+
}
71+
} else {
72+
return Err(name::Error::EndsWithSlash);
73+
}
4174
}
4275

4376
let mut previous = 0;
44-
for byte in input.iter() {
77+
let mut out_ofs = 0;
78+
for (mut byte_pos, byte) in input.iter().enumerate() {
79+
byte_pos -= out_ofs;
4580
match byte {
4681
b'\\' | b'^' | b':' | b'[' | b'?' | b' ' | b'~' | b'\0'..=b'\x1F' | b'\x7F' => {
47-
return Err(name::Error::InvalidByte {
48-
byte: (&[*byte][..]).into(),
49-
})
82+
if sanitize {
83+
out.to_mut()[byte_pos] = b'-';
84+
} else {
85+
return Err(name::Error::InvalidByte {
86+
byte: (&[*byte][..]).into(),
87+
});
88+
}
89+
}
90+
b'*' => {
91+
if sanitize {
92+
out.to_mut()[byte_pos] = b'-';
93+
} else {
94+
return Err(name::Error::Asterisk);
95+
}
96+
}
97+
98+
b'.' if previous == b'.' => {
99+
if sanitize {
100+
out.to_mut().remove(byte_pos);
101+
out_ofs += 1;
102+
} else {
103+
return Err(name::Error::DoubleDot);
104+
}
105+
}
106+
b'{' if previous == b'@' => {
107+
if sanitize {
108+
out.to_mut()[byte_pos] = b'-';
109+
} else {
110+
return Err(name::Error::ReflogPortion);
111+
}
50112
}
51-
b'*' => return Err(name::Error::Asterisk),
52-
b'.' if previous == b'.' => return Err(name::Error::DoubleDot),
53-
b'{' if previous == b'@' => return Err(name::Error::ReflogPortion),
54113
_ => {}
55114
}
56115
previous = *byte;
57116
}
58117
if input[0] == b'.' {
59-
return Err(name::Error::StartsWithDot);
118+
if sanitize {
119+
out.to_mut()[0] = b'-';
120+
} else {
121+
return Err(name::Error::StartsWithDot);
122+
}
60123
}
61124
if input[input.len() - 1] == b'.' {
62-
return Err(name::Error::EndsWithDot);
125+
if sanitize {
126+
let last = out.len() - 1;
127+
out.to_mut()[last] = b'-';
128+
} else {
129+
return Err(name::Error::EndsWithDot);
130+
}
63131
}
64132
if input.ends_with(b".lock") {
65-
return Err(name::Error::LockFileSuffix);
133+
if sanitize {
134+
while out.ends_with(b".lock") {
135+
let len_without_suffix = out.len() - b".lock".len();
136+
out.to_mut().truncate(len_without_suffix);
137+
}
138+
} else {
139+
return Err(name::Error::LockFileSuffix);
140+
}
66141
}
67-
Ok(input)
142+
Ok(out)
68143
}

0 commit comments

Comments
 (0)