Skip to content

Commit 305909d

Browse files
authored
Introduce a utility function to transform a path into a URI (#1706)
Fixes: #1790 Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 6da2fe3 commit 305909d

File tree

4 files changed

+194
-2
lines changed

4 files changed

+194
-2
lines changed

src/core/uri/include/sourcemeta/core/uri.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
// NOLINTEND(misc-include-cleaner)
1212

1313
#include <cstdint> // std::uint32_t
14+
#include <filesystem> // std::filesystem
15+
#include <istream> // std::istream
1416
#include <memory> // std::unique_ptr
1517
#include <optional> // std::optional
1618
#include <span> // std::span
@@ -439,6 +441,19 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
439441
/// ```
440442
static auto from_fragment(std::string_view fragment) -> URI;
441443

444+
/// Create a URI from a file system path. For example:
445+
///
446+
/// ```cpp
447+
/// #include <sourcemeta/core/uri.h>
448+
/// #include <cassert>
449+
/// #include <filesystem>
450+
///
451+
/// const std::filesystem::path path{"/foo/bar"};
452+
/// const sourcemeta::core::URI uri{sourcemeta::core::URI::from_path(path)};
453+
/// assert(uri.recompose() == "file:///foo/bar");
454+
/// ```
455+
static auto from_path(const std::filesystem::path &path) -> URI;
456+
442457
/// A convenient method to canonicalize and recompose a URI from a string. For
443458
/// example:
444459
///

src/core/uri/uri.cc

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22

33
#include <sourcemeta/core/uri.h>
44

5+
#include <algorithm> // std::replace
56
#include <cassert> // assert
67
#include <cstdint> // std::uint32_t
78
#include <filesystem> // std::filesystem
8-
#include <istream> // std::istream
99
#include <optional> // std::optional
10-
#include <sstream> // std::ostringstream
10+
#include <sstream> // std::ostringstream, std::istringstream
1111
#include <stdexcept> // std::length_error, std::runtime_error
1212
#include <string> // std::stoul, std::string, std::tolower
1313
#include <tuple> // std::tie
@@ -114,6 +114,20 @@ auto canonicalize_path(const std::string &path) -> std::optional<std::string> {
114114
return canonical_path;
115115
}
116116

117+
auto uri_escape_for_path(const std::string &value) -> std::string {
118+
std::istringstream input{value};
119+
std::ostringstream output;
120+
uri_escape(input, output, sourcemeta::core::URIEscapeMode::SkipSubDelims);
121+
auto result{output.str()};
122+
// We don't want to escape ":" for Windows paths
123+
std::string::size_type position = 0;
124+
while ((position = result.find("%3A", position)) != std::string::npos) {
125+
result.replace(position, 3, ":");
126+
}
127+
128+
return result;
129+
}
130+
117131
} // namespace
118132

119133
namespace sourcemeta::core {
@@ -708,4 +722,44 @@ auto URI::canonicalize(const std::string &input) -> std::string {
708722
return URI{input}.canonicalize().recompose();
709723
}
710724

725+
auto URI::from_path(const std::filesystem::path &path) -> URI {
726+
auto normalized{path.lexically_normal().string()};
727+
const auto is_unc{normalized.starts_with("\\\\")};
728+
const auto is_windows_absolute{normalized.size() >= 2 &&
729+
normalized[1] == ':'};
730+
std::replace(normalized.begin(), normalized.end(), '\\', '/');
731+
const auto is_unix_absolute{normalized.starts_with("/")};
732+
if (!is_unix_absolute && !is_windows_absolute && !is_unc) {
733+
throw URIError(
734+
"It is not valid to construct a file:// URI out of a relative path");
735+
}
736+
737+
normalized.erase(0, normalized.find_first_not_of('/'));
738+
const std::filesystem::path final_path{normalized};
739+
740+
URI result{"file://"};
741+
742+
auto iterator{final_path.begin()};
743+
if (is_unc) {
744+
result.host_ = uri_escape_for_path(iterator->string());
745+
std::advance(iterator, 1);
746+
}
747+
748+
for (; iterator != final_path.end(); ++iterator) {
749+
if (iterator->empty()) {
750+
result.append_path("/");
751+
} else if (*iterator == "/") {
752+
if (std::next(iterator) == final_path.end()) {
753+
result.append_path("/");
754+
}
755+
} else if (result.path_.has_value()) {
756+
result.append_path(uri_escape_for_path(iterator->string()));
757+
} else {
758+
result.path_ = uri_escape_for_path(iterator->string());
759+
}
760+
}
761+
762+
return result;
763+
}
764+
711765
} // namespace sourcemeta::core

test/uri/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME uri
55
uri_empty_test.cc
66
uri_host_test.cc
77
uri_path_test.cc
8+
uri_from_path_test.cc
89
uri_parse_test.cc
910
uri_port_test.cc
1011
uri_scheme_test.cc

test/uri/uri_from_path_test.cc

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#include <gtest/gtest.h>
2+
3+
#include <sourcemeta/core/uri.h>
4+
5+
TEST(URI_from_path, unix_absolute) {
6+
const std::filesystem::path example{"/foo/bar/baz"};
7+
const auto uri{sourcemeta::core::URI::from_path(example)};
8+
EXPECT_EQ(uri.recompose(), "file:///foo/bar/baz");
9+
}
10+
11+
TEST(URI_from_path, unix_with_space_and_reserved) {
12+
const std::filesystem::path example{"/foo/My Folder/has#hash?value%"};
13+
const auto uri{sourcemeta::core::URI::from_path(example)};
14+
EXPECT_EQ(uri.recompose(), "file:///foo/My%20Folder/has%23hash%3Fvalue%25");
15+
}
16+
17+
TEST(URI_from_path, unix_trailing_slash) {
18+
const std::filesystem::path example{"/foo/bar/"};
19+
const auto uri{sourcemeta::core::URI::from_path(example)};
20+
EXPECT_EQ(uri.recompose(), "file:///foo/bar/");
21+
}
22+
23+
TEST(URI_from_path, windows_drive_absolute) {
24+
const std::filesystem::path example{R"(C:\Program Files\Test)"};
25+
const auto uri{sourcemeta::core::URI::from_path(example)};
26+
EXPECT_EQ(uri.recompose(), "file:///C:/Program%20Files/Test");
27+
}
28+
29+
TEST(URI_from_path, windows_drive_lowercase) {
30+
const std::filesystem::path example{R"(c:\temp\logs)"};
31+
const auto uri{sourcemeta::core::URI::from_path(example)};
32+
EXPECT_EQ(uri.recompose(), "file:///c:/temp/logs");
33+
}
34+
35+
TEST(URI_from_path, windows_drive_root) {
36+
const std::filesystem::path example{R"(D:\)"};
37+
const auto uri{sourcemeta::core::URI::from_path(example)};
38+
EXPECT_EQ(uri.recompose(), "file:///D:/");
39+
}
40+
41+
TEST(URI_from_path, windows_trailing_slash) {
42+
const std::filesystem::path example{R"(C:\foo\bar\)"};
43+
const auto uri{sourcemeta::core::URI::from_path(example)};
44+
EXPECT_EQ(uri.recompose(), "file:///C:/foo/bar/");
45+
}
46+
47+
TEST(URI_from_path, windows_percent_and_plus) {
48+
// '%' → %25, '+' is allowed unencoded
49+
const std::filesystem::path example{R"(C:\path\50%+plus.txt)"};
50+
const auto uri{sourcemeta::core::URI::from_path(example)};
51+
EXPECT_EQ(uri.recompose(), "file:///C:/path/50%25+plus.txt");
52+
}
53+
54+
TEST(URI_from_path, windows_unc_simple) {
55+
const std::filesystem::path example{R"(\\server\share\file.txt)"};
56+
const auto uri{sourcemeta::core::URI::from_path(example)};
57+
EXPECT_EQ(uri.recompose(),
58+
// For UNC, host=server, path=/share/file.txt
59+
"file://server/share/file.txt");
60+
}
61+
62+
TEST(URI_from_path, windows_unc_with_space) {
63+
const std::filesystem::path example{R"(\\srv\My Docs\a b.txt)"};
64+
const auto uri{sourcemeta::core::URI::from_path(example)};
65+
EXPECT_EQ(uri.recompose(), "file://srv/My%20Docs/a%20b.txt");
66+
}
67+
68+
TEST(URI_from_path, unicode_unix) {
69+
// U+00E9 (é) should be UTF-8 percent-encoded as %C3%A9
70+
const std::filesystem::path example{u8"/data/éclair.txt"};
71+
const auto uri{sourcemeta::core::URI::from_path(example)};
72+
EXPECT_EQ(uri.recompose(), "file:///data/%C3%A9clair.txt");
73+
}
74+
75+
TEST(URI_from_path, unicode_windows) {
76+
// U+00E9 (é) should be UTF-8 percent-encoded as %C3%A9
77+
const std::filesystem::path example{u8R"(C:\data\résumé.doc)"};
78+
const auto uri{sourcemeta::core::URI::from_path(example)};
79+
EXPECT_EQ(uri.recompose(), "file:///C:/data/r%C3%A9sum%C3%A9.doc");
80+
}
81+
82+
TEST(URI_from_path, unix_relative_simple) {
83+
const std::filesystem::path example{"foo/bar/baz"};
84+
EXPECT_THROW(sourcemeta::core::URI::from_path(example),
85+
sourcemeta::core::URIError);
86+
}
87+
88+
TEST(URI_from_path, unix_relative_with_dot) {
89+
const std::filesystem::path example{"./foo/bar"};
90+
EXPECT_THROW(sourcemeta::core::URI::from_path(example),
91+
sourcemeta::core::URIError);
92+
}
93+
94+
TEST(URI_from_path, unix_relative_with_dotdot) {
95+
const std::filesystem::path example{"../parent/dir"};
96+
EXPECT_THROW(sourcemeta::core::URI::from_path(example),
97+
sourcemeta::core::URIError);
98+
}
99+
100+
TEST(URI_from_path, unix_empty_path) {
101+
const std::filesystem::path example{""};
102+
EXPECT_THROW(sourcemeta::core::URI::from_path(example),
103+
sourcemeta::core::URIError);
104+
}
105+
106+
TEST(URI_from_path, windows_relative_simple) {
107+
const std::filesystem::path example{"folder\\file.txt"};
108+
EXPECT_THROW(sourcemeta::core::URI::from_path(example),
109+
sourcemeta::core::URIError);
110+
}
111+
112+
TEST(URI_from_path, windows_relative_with_dot) {
113+
const std::filesystem::path example{".\\foo\\bar"};
114+
EXPECT_THROW(sourcemeta::core::URI::from_path(example),
115+
sourcemeta::core::URIError);
116+
}
117+
118+
TEST(URI_from_path, windows_relative_with_dotdot) {
119+
const std::filesystem::path example{"..\\up\\one\\level"};
120+
EXPECT_THROW(sourcemeta::core::URI::from_path(example),
121+
sourcemeta::core::URIError);
122+
}

0 commit comments

Comments
 (0)