Skip to content

Commit 8e06270

Browse files
committed
Refactor parse_file to look more like the spec
1 parent 6a6b9da commit 8e06270

File tree

1 file changed

+148
-190
lines changed

1 file changed

+148
-190
lines changed

src/parser.rs

Lines changed: 148 additions & 190 deletions
Original file line numberDiff line numberDiff line change
@@ -470,15 +470,93 @@ impl<'a> Parser<'a> {
470470
mut self,
471471
input: Input,
472472
scheme_type: SchemeType,
473-
mut base_file_url: Option<&Url>,
473+
base_file_url: Option<&Url>,
474474
) -> ParseResult<Url> {
475475
use SyntaxViolation::Backslash;
476476
// file state
477477
debug_assert!(self.serialization.is_empty());
478478
let (first_char, input_after_first_char) = input.split_first();
479-
match first_char {
480-
None => {
479+
if matches!(first_char, Some('/') | Some('\\')) {
480+
self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\'));
481+
// file slash state
482+
let (next_char, input_after_next_char) = input_after_first_char.split_first();
483+
if matches!(next_char, Some('/') | Some('\\')) {
484+
self.log_violation_if(Backslash, || next_char == Some('\\'));
485+
// file host state
486+
self.serialization.push_str("file://");
487+
let scheme_end = "file".len() as u32;
488+
let host_start = "file://".len() as u32;
489+
let (path_start, mut host, remaining) =
490+
self.parse_file_host(input_after_next_char)?;
491+
let mut host_end = to_u32(self.serialization.len())?;
492+
let mut has_host = !matches!(host, HostInternal::None);
493+
let remaining = if path_start {
494+
self.parse_path_start(SchemeType::File, &mut has_host, remaining)
495+
} else {
496+
let path_start = self.serialization.len();
497+
self.serialization.push('/');
498+
self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
499+
};
500+
// For file URLs that have a host and whose path starts
501+
// with the windows drive letter we just remove the host.
502+
if !has_host {
503+
self.serialization
504+
.drain(host_start as usize..host_end as usize);
505+
host_end = host_start;
506+
host = HostInternal::None;
507+
}
508+
let (query_start, fragment_start) =
509+
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
510+
return Ok(Url {
511+
serialization: self.serialization,
512+
scheme_end: scheme_end,
513+
username_end: host_start,
514+
host_start: host_start,
515+
host_end: host_end,
516+
host: host,
517+
port: None,
518+
path_start: host_end,
519+
query_start: query_start,
520+
fragment_start: fragment_start,
521+
});
522+
} else {
523+
self.serialization.push_str("file:///");
524+
let scheme_end = "file".len() as u32;
525+
let path_start = "file://".len();
481526
if let Some(base_url) = base_file_url {
527+
let first_segment = base_url.path_segments().unwrap().next().unwrap();
528+
// FIXME: *normalized* drive letter
529+
if is_windows_drive_letter(first_segment) {
530+
self.serialization.push_str(first_segment);
531+
self.serialization.push('/');
532+
}
533+
}
534+
let remaining = self.parse_path(
535+
SchemeType::File,
536+
&mut false,
537+
path_start,
538+
input_after_first_char,
539+
);
540+
let (query_start, fragment_start) =
541+
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
542+
let path_start = path_start as u32;
543+
return Ok(Url {
544+
serialization: self.serialization,
545+
scheme_end: scheme_end,
546+
username_end: path_start,
547+
host_start: path_start,
548+
host_end: path_start,
549+
host: HostInternal::None,
550+
port: None,
551+
path_start: path_start,
552+
query_start: query_start,
553+
fragment_start: fragment_start,
554+
});
555+
}
556+
}
557+
if let Some(base_url) = base_file_url {
558+
match first_char {
559+
None => {
482560
// Copy everything except the fragment
483561
let before_fragment = match base_url.fragment_start {
484562
Some(i) => &base_url.serialization[..i as usize],
@@ -490,26 +568,8 @@ impl<'a> Parser<'a> {
490568
fragment_start: None,
491569
..*base_url
492570
})
493-
} else {
494-
self.serialization.push_str("file:///");
495-
let scheme_end = "file".len() as u32;
496-
let path_start = "file://".len() as u32;
497-
Ok(Url {
498-
serialization: self.serialization,
499-
scheme_end: scheme_end,
500-
username_end: path_start,
501-
host_start: path_start,
502-
host_end: path_start,
503-
host: HostInternal::None,
504-
port: None,
505-
path_start: path_start,
506-
query_start: None,
507-
fragment_start: None,
508-
})
509571
}
510-
}
511-
Some('?') => {
512-
if let Some(base_url) = base_file_url {
572+
Some('?') => {
513573
// Copy everything up to the query string
514574
let before_query = match (base_url.query_start, base_url.fragment_start) {
515575
(None, None) => &*base_url.serialization,
@@ -524,179 +584,77 @@ impl<'a> Parser<'a> {
524584
fragment_start: fragment_start,
525585
..*base_url
526586
})
527-
} else {
528-
self.serialization.push_str("file:///");
529-
let scheme_end = "file".len() as u32;
530-
let path_start = "file://".len() as u32;
531-
let (query_start, fragment_start) =
532-
self.parse_query_and_fragment(scheme_type, scheme_end, input)?;
533-
Ok(Url {
534-
serialization: self.serialization,
535-
scheme_end: scheme_end,
536-
username_end: path_start,
537-
host_start: path_start,
538-
host_end: path_start,
539-
host: HostInternal::None,
540-
port: None,
541-
path_start: path_start,
542-
query_start: query_start,
543-
fragment_start: fragment_start,
544-
})
545587
}
546-
}
547-
Some('#') => {
548-
if let Some(base_url) = base_file_url {
549-
self.fragment_only(base_url, input)
550-
} else {
551-
self.serialization.push_str("file:///");
552-
let scheme_end = "file".len() as u32;
553-
let path_start = "file://".len() as u32;
554-
let fragment_start = "file:///".len() as u32;
555-
self.serialization.push('#');
556-
self.parse_fragment(input_after_first_char);
557-
Ok(Url {
558-
serialization: self.serialization,
559-
scheme_end: scheme_end,
560-
username_end: path_start,
561-
host_start: path_start,
562-
host_end: path_start,
563-
host: HostInternal::None,
564-
port: None,
565-
path_start: path_start,
566-
query_start: None,
567-
fragment_start: Some(fragment_start),
568-
})
569-
}
570-
}
571-
Some('/') | Some('\\') => {
572-
self.log_violation_if(Backslash, || first_char == Some('\\'));
573-
// file slash state
574-
let (next_char, input_after_next_char) = input_after_first_char.split_first();
575-
self.log_violation_if(Backslash, || next_char == Some('\\'));
576-
if matches!(next_char, Some('/') | Some('\\')) {
577-
// file host state
578-
self.serialization.push_str("file://");
579-
let scheme_end = "file".len() as u32;
580-
let host_start = "file://".len() as u32;
581-
let (path_start, mut host, remaining) =
582-
self.parse_file_host(input_after_next_char)?;
583-
let mut host_end = to_u32(self.serialization.len())?;
584-
let mut has_host = !matches!(host, HostInternal::None);
585-
let remaining = if path_start {
586-
self.parse_path_start(SchemeType::File, &mut has_host, remaining)
588+
Some('#') => self.fragment_only(base_url, input),
589+
_ => {
590+
if !starts_with_windows_drive_letter_segment(&input) {
591+
let before_query = match (base_url.query_start, base_url.fragment_start) {
592+
(None, None) => &*base_url.serialization,
593+
(Some(i), _) | (None, Some(i)) => base_url.slice(..i),
594+
};
595+
self.serialization.push_str(before_query);
596+
self.pop_path(SchemeType::File, base_url.path_start as usize);
597+
let remaining = self.parse_path(
598+
SchemeType::File,
599+
&mut true,
600+
base_url.path_start as usize,
601+
input,
602+
);
603+
self.with_query_and_fragment(
604+
SchemeType::File,
605+
base_url.scheme_end,
606+
base_url.username_end,
607+
base_url.host_start,
608+
base_url.host_end,
609+
base_url.host,
610+
base_url.port,
611+
base_url.path_start,
612+
remaining,
613+
)
587614
} else {
588-
let path_start = self.serialization.len();
589-
self.serialization.push('/');
590-
self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
591-
};
592-
// For file URLs that have a host and whose path starts
593-
// with the windows drive letter we just remove the host.
594-
if !has_host {
595-
self.serialization
596-
.drain(host_start as usize..host_end as usize);
597-
host_end = host_start;
598-
host = HostInternal::None;
599-
}
600-
let (query_start, fragment_start) =
601-
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
602-
Ok(Url {
603-
serialization: self.serialization,
604-
scheme_end: scheme_end,
605-
username_end: host_start,
606-
host_start: host_start,
607-
host_end: host_end,
608-
host: host,
609-
port: None,
610-
path_start: host_end,
611-
query_start: query_start,
612-
fragment_start: fragment_start,
613-
})
614-
} else {
615-
self.serialization.push_str("file:///");
616-
let scheme_end = "file".len() as u32;
617-
let path_start = "file://".len();
618-
if let Some(base_url) = base_file_url {
619-
let first_segment = base_url.path_segments().unwrap().next().unwrap();
620-
// FIXME: *normalized* drive letter
621-
if is_windows_drive_letter(first_segment) {
622-
self.serialization.push_str(first_segment);
623-
self.serialization.push('/');
624-
}
615+
self.serialization.push_str("file:///");
616+
let scheme_end = "file".len() as u32;
617+
let path_start = "file://".len();
618+
let remaining =
619+
self.parse_path(SchemeType::File, &mut false, path_start, input);
620+
let (query_start, fragment_start) =
621+
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
622+
let path_start = path_start as u32;
623+
Ok(Url {
624+
serialization: self.serialization,
625+
scheme_end: scheme_end,
626+
username_end: path_start,
627+
host_start: path_start,
628+
host_end: path_start,
629+
host: HostInternal::None,
630+
port: None,
631+
path_start: path_start,
632+
query_start: query_start,
633+
fragment_start: fragment_start,
634+
})
625635
}
626-
let remaining = self.parse_path(
627-
SchemeType::File,
628-
&mut false,
629-
path_start,
630-
input_after_first_char,
631-
);
632-
let (query_start, fragment_start) =
633-
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
634-
let path_start = path_start as u32;
635-
Ok(Url {
636-
serialization: self.serialization,
637-
scheme_end: scheme_end,
638-
username_end: path_start,
639-
host_start: path_start,
640-
host_end: path_start,
641-
host: HostInternal::None,
642-
port: None,
643-
path_start: path_start,
644-
query_start: query_start,
645-
fragment_start: fragment_start,
646-
})
647-
}
648-
}
649-
_ => {
650-
if starts_with_windows_drive_letter_segment(&input) {
651-
base_file_url = None;
652-
}
653-
if let Some(base_url) = base_file_url {
654-
let before_query = match (base_url.query_start, base_url.fragment_start) {
655-
(None, None) => &*base_url.serialization,
656-
(Some(i), _) | (None, Some(i)) => base_url.slice(..i),
657-
};
658-
self.serialization.push_str(before_query);
659-
self.pop_path(SchemeType::File, base_url.path_start as usize);
660-
let remaining = self.parse_path(
661-
SchemeType::File,
662-
&mut true,
663-
base_url.path_start as usize,
664-
input,
665-
);
666-
self.with_query_and_fragment(
667-
SchemeType::File,
668-
base_url.scheme_end,
669-
base_url.username_end,
670-
base_url.host_start,
671-
base_url.host_end,
672-
base_url.host,
673-
base_url.port,
674-
base_url.path_start,
675-
remaining,
676-
)
677-
} else {
678-
self.serialization.push_str("file:///");
679-
let scheme_end = "file".len() as u32;
680-
let path_start = "file://".len();
681-
let remaining =
682-
self.parse_path(SchemeType::File, &mut false, path_start, input);
683-
let (query_start, fragment_start) =
684-
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
685-
let path_start = path_start as u32;
686-
Ok(Url {
687-
serialization: self.serialization,
688-
scheme_end: scheme_end,
689-
username_end: path_start,
690-
host_start: path_start,
691-
host_end: path_start,
692-
host: HostInternal::None,
693-
port: None,
694-
path_start: path_start,
695-
query_start: query_start,
696-
fragment_start: fragment_start,
697-
})
698636
}
699637
}
638+
} else {
639+
self.serialization.push_str("file:///");
640+
let scheme_end = "file".len() as u32;
641+
let path_start = "file://".len();
642+
let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input);
643+
let (query_start, fragment_start) =
644+
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
645+
let path_start = path_start as u32;
646+
Ok(Url {
647+
serialization: self.serialization,
648+
scheme_end: scheme_end,
649+
username_end: path_start,
650+
host_start: path_start,
651+
host_end: path_start,
652+
host: HostInternal::None,
653+
port: None,
654+
path_start: path_start,
655+
query_start: query_start,
656+
fragment_start: fragment_start,
657+
})
700658
}
701659
}
702660

0 commit comments

Comments
 (0)