Skip to content

Commit fb90734

Browse files
noxo0Ignition0o
authored andcommitted
Refactor parse_file to look more like the spec
1 parent b20e852 commit fb90734

File tree

1 file changed

+148
-190
lines changed

1 file changed

+148
-190
lines changed

src/parser.rs

Lines changed: 148 additions & 190 deletions
Original file line numberDiff line numberDiff line change
@@ -488,15 +488,93 @@ impl<'a> Parser<'a> {
488488
mut self,
489489
input: Input,
490490
scheme_type: SchemeType,
491-
mut base_file_url: Option<&Url>,
491+
base_file_url: Option<&Url>,
492492
) -> ParseResult<Url> {
493493
use SyntaxViolation::Backslash;
494494
// file state
495495
debug_assert!(self.serialization.is_empty());
496496
let (first_char, input_after_first_char) = input.split_first();
497-
match first_char {
498-
None => {
497+
if matches!(first_char, Some('/') | Some('\\')) {
498+
self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\'));
499+
// file slash state
500+
let (next_char, input_after_next_char) = input_after_first_char.split_first();
501+
if matches!(next_char, Some('/') | Some('\\')) {
502+
self.log_violation_if(Backslash, || next_char == Some('\\'));
503+
// file host state
504+
self.serialization.push_str("file://");
505+
let scheme_end = "file".len() as u32;
506+
let host_start = "file://".len() as u32;
507+
let (path_start, mut host, remaining) =
508+
self.parse_file_host(input_after_next_char)?;
509+
let mut host_end = to_u32(self.serialization.len())?;
510+
let mut has_host = !matches!(host, HostInternal::None);
511+
let remaining = if path_start {
512+
self.parse_path_start(SchemeType::File, &mut has_host, remaining)
513+
} else {
514+
let path_start = self.serialization.len();
515+
self.serialization.push('/');
516+
self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
517+
};
518+
// For file URLs that have a host and whose path starts
519+
// with the windows drive letter we just remove the host.
520+
if !has_host {
521+
self.serialization
522+
.drain(host_start as usize..host_end as usize);
523+
host_end = host_start;
524+
host = HostInternal::None;
525+
}
526+
let (query_start, fragment_start) =
527+
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
528+
return Ok(Url {
529+
serialization: self.serialization,
530+
scheme_end: scheme_end,
531+
username_end: host_start,
532+
host_start: host_start,
533+
host_end: host_end,
534+
host: host,
535+
port: None,
536+
path_start: host_end,
537+
query_start: query_start,
538+
fragment_start: fragment_start,
539+
});
540+
} else {
541+
self.serialization.push_str("file:///");
542+
let scheme_end = "file".len() as u32;
543+
let path_start = "file://".len();
499544
if let Some(base_url) = base_file_url {
545+
let first_segment = base_url.path_segments().unwrap().next().unwrap();
546+
// FIXME: *normalized* drive letter
547+
if is_windows_drive_letter(first_segment) {
548+
self.serialization.push_str(first_segment);
549+
self.serialization.push('/');
550+
}
551+
}
552+
let remaining = self.parse_path(
553+
SchemeType::File,
554+
&mut false,
555+
path_start,
556+
input_after_first_char,
557+
);
558+
let (query_start, fragment_start) =
559+
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
560+
let path_start = path_start as u32;
561+
return Ok(Url {
562+
serialization: self.serialization,
563+
scheme_end: scheme_end,
564+
username_end: path_start,
565+
host_start: path_start,
566+
host_end: path_start,
567+
host: HostInternal::None,
568+
port: None,
569+
path_start: path_start,
570+
query_start: query_start,
571+
fragment_start: fragment_start,
572+
});
573+
}
574+
}
575+
if let Some(base_url) = base_file_url {
576+
match first_char {
577+
None => {
500578
// Copy everything except the fragment
501579
let before_fragment = match base_url.fragment_start {
502580
Some(i) => &base_url.serialization[..i as usize],
@@ -508,26 +586,8 @@ impl<'a> Parser<'a> {
508586
fragment_start: None,
509587
..*base_url
510588
})
511-
} else {
512-
self.serialization.push_str("file:///");
513-
let scheme_end = "file".len() as u32;
514-
let path_start = "file://".len() as u32;
515-
Ok(Url {
516-
serialization: self.serialization,
517-
scheme_end,
518-
username_end: path_start,
519-
host_start: path_start,
520-
host_end: path_start,
521-
host: HostInternal::None,
522-
port: None,
523-
path_start,
524-
query_start: None,
525-
fragment_start: None,
526-
})
527589
}
528-
}
529-
Some('?') => {
530-
if let Some(base_url) = base_file_url {
590+
Some('?') => {
531591
// Copy everything up to the query string
532592
let before_query = match (base_url.query_start, base_url.fragment_start) {
533593
(None, None) => &*base_url.serialization,
@@ -542,179 +602,77 @@ impl<'a> Parser<'a> {
542602
fragment_start,
543603
..*base_url
544604
})
545-
} else {
546-
self.serialization.push_str("file:///");
547-
let scheme_end = "file".len() as u32;
548-
let path_start = "file://".len() as u32;
549-
let (query_start, fragment_start) =
550-
self.parse_query_and_fragment(scheme_type, scheme_end, input)?;
551-
Ok(Url {
552-
serialization: self.serialization,
553-
scheme_end,
554-
username_end: path_start,
555-
host_start: path_start,
556-
host_end: path_start,
557-
host: HostInternal::None,
558-
port: None,
559-
path_start,
560-
query_start,
561-
fragment_start,
562-
})
563605
}
564-
}
565-
Some('#') => {
566-
if let Some(base_url) = base_file_url {
567-
self.fragment_only(base_url, input)
568-
} else {
569-
self.serialization.push_str("file:///");
570-
let scheme_end = "file".len() as u32;
571-
let path_start = "file://".len() as u32;
572-
let fragment_start = "file:///".len() as u32;
573-
self.serialization.push('#');
574-
self.parse_fragment(input_after_first_char);
575-
Ok(Url {
576-
serialization: self.serialization,
577-
scheme_end,
578-
username_end: path_start,
579-
host_start: path_start,
580-
host_end: path_start,
581-
host: HostInternal::None,
582-
port: None,
583-
path_start,
584-
query_start: None,
585-
fragment_start: Some(fragment_start),
586-
})
587-
}
588-
}
589-
Some('/') | Some('\\') => {
590-
self.log_violation_if(Backslash, || first_char == Some('\\'));
591-
// file slash state
592-
let (next_char, input_after_next_char) = input_after_first_char.split_first();
593-
self.log_violation_if(Backslash, || next_char == Some('\\'));
594-
if matches!(next_char, Some('/') | Some('\\')) {
595-
// file host state
596-
self.serialization.push_str("file://");
597-
let scheme_end = "file".len() as u32;
598-
let host_start = "file://".len() as u32;
599-
let (path_start, mut host, remaining) =
600-
self.parse_file_host(input_after_next_char)?;
601-
let mut host_end = to_u32(self.serialization.len())?;
602-
let mut has_host = !matches!(host, HostInternal::None);
603-
let remaining = if path_start {
604-
self.parse_path_start(SchemeType::File, &mut has_host, remaining)
606+
Some('#') => self.fragment_only(base_url, input),
607+
_ => {
608+
if !starts_with_windows_drive_letter_segment(&input) {
609+
let before_query = match (base_url.query_start, base_url.fragment_start) {
610+
(None, None) => &*base_url.serialization,
611+
(Some(i), _) | (None, Some(i)) => base_url.slice(..i),
612+
};
613+
self.serialization.push_str(before_query);
614+
self.pop_path(SchemeType::File, base_url.path_start as usize);
615+
let remaining = self.parse_path(
616+
SchemeType::File,
617+
&mut true,
618+
base_url.path_start as usize,
619+
input,
620+
);
621+
self.with_query_and_fragment(
622+
SchemeType::File,
623+
base_url.scheme_end,
624+
base_url.username_end,
625+
base_url.host_start,
626+
base_url.host_end,
627+
base_url.host,
628+
base_url.port,
629+
base_url.path_start,
630+
remaining,
631+
)
605632
} else {
606-
let path_start = self.serialization.len();
607-
self.serialization.push('/');
608-
self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
609-
};
610-
// For file URLs that have a host and whose path starts
611-
// with the windows drive letter we just remove the host.
612-
if !has_host {
613-
self.serialization
614-
.drain(host_start as usize..host_end as usize);
615-
host_end = host_start;
616-
host = HostInternal::None;
633+
self.serialization.push_str("file:///");
634+
let scheme_end = "file".len() as u32;
635+
let path_start = "file://".len();
636+
let remaining =
637+
self.parse_path(SchemeType::File, &mut false, path_start, input);
638+
let (query_start, fragment_start) =
639+
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
640+
let path_start = path_start as u32;
641+
Ok(Url {
642+
serialization: self.serialization,
643+
scheme_end: scheme_end,
644+
username_end: path_start,
645+
host_start: path_start,
646+
host_end: path_start,
647+
host: HostInternal::None,
648+
port: None,
649+
path_start: path_start,
650+
query_start: query_start,
651+
fragment_start: fragment_start,
652+
})
617653
}
618-
let (query_start, fragment_start) =
619-
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
620-
Ok(Url {
621-
serialization: self.serialization,
622-
scheme_end,
623-
username_end: host_start,
624-
host_start,
625-
host_end,
626-
host,
627-
port: None,
628-
path_start: host_end,
629-
query_start,
630-
fragment_start,
631-
})
632-
} else {
633-
self.serialization.push_str("file:///");
634-
let scheme_end = "file".len() as u32;
635-
let path_start = "file://".len();
636-
if let Some(base_url) = base_file_url {
637-
let first_segment = base_url.path_segments().unwrap().next().unwrap();
638-
// FIXME: *normalized* drive letter
639-
if is_windows_drive_letter(first_segment) {
640-
self.serialization.push_str(first_segment);
641-
self.serialization.push('/');
642-
}
643-
}
644-
let remaining = self.parse_path(
645-
SchemeType::File,
646-
&mut false,
647-
path_start,
648-
input_after_first_char,
649-
);
650-
let (query_start, fragment_start) =
651-
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
652-
let path_start = path_start as u32;
653-
Ok(Url {
654-
serialization: self.serialization,
655-
scheme_end,
656-
username_end: path_start,
657-
host_start: path_start,
658-
host_end: path_start,
659-
host: HostInternal::None,
660-
port: None,
661-
path_start,
662-
query_start,
663-
fragment_start,
664-
})
665-
}
666-
}
667-
_ => {
668-
if starts_with_windows_drive_letter_segment(&input) {
669-
base_file_url = None;
670-
}
671-
if let Some(base_url) = base_file_url {
672-
let before_query = match (base_url.query_start, base_url.fragment_start) {
673-
(None, None) => &*base_url.serialization,
674-
(Some(i), _) | (None, Some(i)) => base_url.slice(..i),
675-
};
676-
self.serialization.push_str(before_query);
677-
self.pop_path(SchemeType::File, base_url.path_start as usize);
678-
let remaining = self.parse_path(
679-
SchemeType::File,
680-
&mut true,
681-
base_url.path_start as usize,
682-
input,
683-
);
684-
self.with_query_and_fragment(
685-
SchemeType::File,
686-
base_url.scheme_end,
687-
base_url.username_end,
688-
base_url.host_start,
689-
base_url.host_end,
690-
base_url.host,
691-
base_url.port,
692-
base_url.path_start,
693-
remaining,
694-
)
695-
} else {
696-
self.serialization.push_str("file:///");
697-
let scheme_end = "file".len() as u32;
698-
let path_start = "file://".len();
699-
let remaining =
700-
self.parse_path(SchemeType::File, &mut false, path_start, input);
701-
let (query_start, fragment_start) =
702-
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
703-
let path_start = path_start as u32;
704-
Ok(Url {
705-
serialization: self.serialization,
706-
scheme_end,
707-
username_end: path_start,
708-
host_start: path_start,
709-
host_end: path_start,
710-
host: HostInternal::None,
711-
port: None,
712-
path_start,
713-
query_start,
714-
fragment_start,
715-
})
716654
}
717655
}
656+
} else {
657+
self.serialization.push_str("file:///");
658+
let scheme_end = "file".len() as u32;
659+
let path_start = "file://".len();
660+
let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input);
661+
let (query_start, fragment_start) =
662+
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
663+
let path_start = path_start as u32;
664+
Ok(Url {
665+
serialization: self.serialization,
666+
scheme_end: scheme_end,
667+
username_end: path_start,
668+
host_start: path_start,
669+
host_end: path_start,
670+
host: HostInternal::None,
671+
port: None,
672+
path_start: path_start,
673+
query_start: query_start,
674+
fragment_start: fragment_start,
675+
})
718676
}
719677
}
720678

0 commit comments

Comments
 (0)