Skip to content

Commit 89259b3

Browse files
committed
auto merge of #15085 : brson/rust/stridx, r=alexcrichton
Being able to index into the bytes of a string encourages poor UTF-8 hygiene. To get a view of `&[u8]` from either a `String` or `&str` slice, use the `as_bytes()` method. Closes #12710. [breaking-change] If the diffstat is any indication this shouldn't have a huge impact but it will have some. Most changes in the `str` and `path` module. A lot of the existing usages were in tests where ascii is expected. There are a number of other legit uses where the characters are known to be ascii.
2 parents bd893d1 + d21336e commit 89259b3

File tree

26 files changed

+101
-87
lines changed

26 files changed

+101
-87
lines changed

src/libcollections/str.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,8 +1569,8 @@ mod tests {
15691569
let n2: uint = v.len();
15701570
assert_eq!(n1, n2);
15711571
while i < n1 {
1572-
let a: u8 = s1.as_slice()[i];
1573-
let b: u8 = s2.as_slice()[i];
1572+
let a: u8 = s1.as_bytes()[i];
1573+
let b: u8 = s2.as_bytes()[i];
15741574
debug!("{}", a);
15751575
debug!("{}", b);
15761576
assert_eq!(a, b);

src/libcollections/string.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ impl String {
222222
return None
223223
}
224224

225-
let byte = self.as_slice()[len - 1];
225+
let byte = self.as_bytes()[len - 1];
226226
self.vec.set_len(len - 1);
227227
Some(byte)
228228
}

src/libcore/str.rs

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,7 +1743,7 @@ impl<'a> StrSlice<'a> for &'a str {
17431743
fn lines_any(&self) -> AnyLines<'a> {
17441744
self.lines().map(|line| {
17451745
let l = line.len();
1746-
if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1746+
if l > 0 && line.as_bytes()[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
17471747
else { line }
17481748
})
17491749
}
@@ -1867,26 +1867,26 @@ impl<'a> StrSlice<'a> for &'a str {
18671867
fn is_char_boundary(&self, index: uint) -> bool {
18681868
if index == self.len() { return true; }
18691869
if index > self.len() { return false; }
1870-
let b = self[index];
1870+
let b = self.as_bytes()[index];
18711871
return b < 128u8 || b >= 192u8;
18721872
}
18731873

18741874
#[inline]
18751875
fn char_range_at(&self, i: uint) -> CharRange {
1876-
if self[i] < 128u8 {
1877-
return CharRange {ch: self[i] as char, next: i + 1 };
1876+
if self.as_bytes()[i] < 128u8 {
1877+
return CharRange {ch: self.as_bytes()[i] as char, next: i + 1 };
18781878
}
18791879

18801880
// Multibyte case is a fn to allow char_range_at to inline cleanly
18811881
fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1882-
let mut val = s[i] as u32;
1882+
let mut val = s.as_bytes()[i] as u32;
18831883
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
18841884
assert!((w != 0));
18851885

18861886
val = utf8_first_byte!(val, w);
1887-
val = utf8_acc_cont_byte!(val, s[i + 1]);
1888-
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1889-
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1887+
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
1888+
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
1889+
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
18901890

18911891
return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
18921892
}
@@ -1899,23 +1899,25 @@ impl<'a> StrSlice<'a> for &'a str {
18991899
let mut prev = start;
19001900

19011901
prev = prev.saturating_sub(1);
1902-
if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
1902+
if self.as_bytes()[prev] < 128 {
1903+
return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
1904+
}
19031905

19041906
// Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
19051907
fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
19061908
// while there is a previous byte == 10......
1907-
while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
1909+
while i > 0 && s.as_bytes()[i] & 192u8 == TAG_CONT_U8 {
19081910
i -= 1u;
19091911
}
19101912

1911-
let mut val = s[i] as u32;
1913+
let mut val = s.as_bytes()[i] as u32;
19121914
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
19131915
assert!((w != 0));
19141916

19151917
val = utf8_first_byte!(val, w);
1916-
val = utf8_acc_cont_byte!(val, s[i + 1]);
1917-
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1918-
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1918+
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
1919+
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
1920+
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
19191921

19201922
return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
19211923
}

src/libgetopts/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ impl Matches {
370370
}
371371

372372
fn is_arg(arg: &str) -> bool {
373-
arg.len() > 1 && arg[0] == '-' as u8
373+
arg.len() > 1 && arg.as_bytes()[0] == '-' as u8
374374
}
375375

376376
fn find_opt(opts: &[Opt], nm: Name) -> Option<uint> {
@@ -553,7 +553,7 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
553553
} else {
554554
let mut names;
555555
let mut i_arg = None;
556-
if cur.as_slice()[1] == '-' as u8 {
556+
if cur.as_bytes()[1] == '-' as u8 {
557557
let tail = cur.as_slice().slice(2, curlen);
558558
let tail_eq: Vec<&str> = tail.split('=').collect();
559559
if tail_eq.len() <= 1 {

src/librustc/back/link.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -657,8 +657,8 @@ pub fn sanitize(s: &str) -> String {
657657

658658
// Underscore-qualify anything that didn't start as an ident.
659659
if result.len() > 0u &&
660-
result.as_slice()[0] != '_' as u8 &&
661-
! char::is_XID_start(result.as_slice()[0] as char) {
660+
result.as_bytes()[0] != '_' as u8 &&
661+
! char::is_XID_start(result.as_bytes()[0] as char) {
662662
return format!("_{}", result.as_slice());
663663
}
664664

@@ -737,9 +737,9 @@ pub fn mangle_exported_name(ccx: &CrateContext, path: PathElems,
737737
let extra2 = id % EXTRA_CHARS.len();
738738
let id = id / EXTRA_CHARS.len();
739739
let extra3 = id % EXTRA_CHARS.len();
740-
hash.push_char(EXTRA_CHARS[extra1] as char);
741-
hash.push_char(EXTRA_CHARS[extra2] as char);
742-
hash.push_char(EXTRA_CHARS[extra3] as char);
740+
hash.push_char(EXTRA_CHARS.as_bytes()[extra1] as char);
741+
hash.push_char(EXTRA_CHARS.as_bytes()[extra2] as char);
742+
hash.push_char(EXTRA_CHARS.as_bytes()[extra3] as char);
743743

744744
exported_name(path,
745745
hash.as_slice(),

src/librustc/metadata/decoder.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ fn item_sized(item: ebml::Doc) -> ast::Sized {
181181
fn item_method_sort(item: ebml::Doc) -> char {
182182
let mut ret = 'r';
183183
reader::tagged_docs(item, tag_item_trait_method_sort, |doc| {
184-
ret = doc.as_str_slice()[0] as char;
184+
ret = doc.as_str_slice().as_bytes()[0] as char;
185185
false
186186
});
187187
ret
@@ -757,13 +757,13 @@ fn get_explicit_self(item: ebml::Doc) -> ast::ExplicitSelf_ {
757757
let explicit_self_doc = reader::get_doc(item, tag_item_trait_method_explicit_self);
758758
let string = explicit_self_doc.as_str_slice();
759759

760-
let explicit_self_kind = string[0];
760+
let explicit_self_kind = string.as_bytes()[0];
761761
match explicit_self_kind as char {
762762
's' => ast::SelfStatic,
763763
'v' => ast::SelfValue,
764764
'~' => ast::SelfUniq,
765765
// FIXME(#4846) expl. region
766-
'&' => ast::SelfRegion(None, get_mutability(string[1])),
766+
'&' => ast::SelfRegion(None, get_mutability(string.as_bytes()[1])),
767767
_ => fail!("unknown self type code: `{}`", explicit_self_kind as char)
768768
}
769769
}

src/librustc/middle/dead.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ struct DeadVisitor<'a> {
399399
impl<'a> DeadVisitor<'a> {
400400
fn should_warn_about_field(&mut self, node: &ast::StructField_) -> bool {
401401
let (is_named, has_leading_underscore) = match node.ident() {
402-
Some(ref ident) => (true, token::get_ident(*ident).get()[0] == ('_' as u8)),
402+
Some(ref ident) => (true, token::get_ident(*ident).get().as_bytes()[0] == ('_' as u8)),
403403
_ => (false, false)
404404
};
405405
let field_type = ty::node_id_to_type(self.tcx, node.id);

src/librustc/middle/liveness.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1511,7 +1511,7 @@ impl<'a> Liveness<'a> {
15111511

15121512
fn should_warn(&self, var: Variable) -> Option<String> {
15131513
let name = self.ir.variable_name(var);
1514-
if name.len() == 0 || name.as_slice()[0] == ('_' as u8) {
1514+
if name.len() == 0 || name.as_bytes()[0] == ('_' as u8) {
15151515
None
15161516
} else {
15171517
Some(name)

src/librustc/middle/mem_categorization.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ pub enum FieldName {
126126
#[deriving(Clone, PartialEq, Eq, Hash)]
127127
pub enum ElementKind {
128128
VecElement,
129-
StrElement,
130129
OtherElement,
131130
}
132131

@@ -794,7 +793,7 @@ impl<'t,TYPER:Typer> MemCategorizationContext<'t,TYPER> {
794793
//! - `derefs`: the deref number to be used for
795794
//! the implicit index deref, if any (see above)
796795
797-
let element_ty = match ty::index(base_cmt.ty) {
796+
let element_ty = match ty::array_element_ty(base_cmt.ty) {
798797
Some(ref mt) => mt.ty,
799798
None => {
800799
self.tcx().sess.span_bug(
@@ -1137,9 +1136,6 @@ impl<'t,TYPER:Typer> MemCategorizationContext<'t,TYPER> {
11371136
cat_interior(_, InteriorElement(VecElement)) => {
11381137
"vec content".to_string()
11391138
}
1140-
cat_interior(_, InteriorElement(StrElement)) => {
1141-
"str content".to_string()
1142-
}
11431139
cat_interior(_, InteriorElement(OtherElement)) => {
11441140
"indexed content".to_string()
11451141
}
@@ -1320,7 +1316,6 @@ fn element_kind(t: ty::t) -> ElementKind {
13201316
ty::ty_rptr(_, ty::mt{ty:ty, ..}) |
13211317
ty::ty_uniq(ty) => match ty::get(ty).sty {
13221318
ty::ty_vec(_, None) => VecElement,
1323-
ty::ty_str => StrElement,
13241319
_ => OtherElement
13251320
},
13261321
ty::ty_vec(..) => VecElement,

src/librustc/middle/ty.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2563,6 +2563,21 @@ pub fn deref(t: t, explicit: bool) -> Option<mt> {
25632563

25642564
// Returns the type of t[i]
25652565
pub fn index(t: t) -> Option<mt> {
2566+
match get(t).sty {
2567+
ty_vec(mt, Some(_)) => Some(mt),
2568+
ty_ptr(mt{ty: t, ..}) | ty_rptr(_, mt{ty: t, ..}) |
2569+
ty_box(t) | ty_uniq(t) => match get(t).sty {
2570+
ty_vec(mt, None) => Some(mt),
2571+
_ => None,
2572+
},
2573+
_ => None
2574+
}
2575+
}
2576+
2577+
// Returns the type of elements contained within an 'array-like' type.
2578+
// This is exactly the same as the above, except it supports strings,
2579+
// which can't actually be indexed.
2580+
pub fn array_element_ty(t: t) -> Option<mt> {
25662581
match get(t).sty {
25672582
ty_vec(mt, Some(_)) => Some(mt),
25682583
ty_ptr(mt{ty: t, ..}) | ty_rptr(_, mt{ty: t, ..}) |

0 commit comments

Comments
 (0)