Skip to content

Commit d756ebf

Browse files
committed
fix code
1 parent a996e2f commit d756ebf

File tree

3 files changed

+49
-48
lines changed

3 files changed

+49
-48
lines changed

README.md

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -265,27 +265,11 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt
265265

266266
You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`
267267

268-
llama.cpp also supports downloading and running models from [ModelScope](https://www.modelscope.cn/home), there are two ways to use models in ModelScope:
268+
Altenatively, model can be fetched from [ModelScope](https://www.modelscope.cn) with CLI argument of `-ms <user>/<model>[:quant]`, for example, `llama-cli -ms Qwen/QwQ-32B-GGUF`. You may find models on ModelScope compatible with `llama.cpp` through:
269269

270-
1. Add an env variable: `LLAMACPP_USE_MODELSCOPE=True` to your command with the same arguments of Hugging Face(like `-hf <user>/<model>[:quant]`).
270+
- [Trending] https://www.modelscope.cn/models?libraries=GGUF
271271

272-
```shell
273-
LLAMACPP_USE_MODELSCOPE=True llama-cli -hf Qwen/QwQ-32B-GGUF
274-
```
275-
276-
2. Use modelscope arguments instead of the ones of Hugging Face: `-ms <user>/<model>[:quant] -msf xxx.gguf -mst xxx_token`
277-
278-
```shell
279-
llama-cli -ms Qwen/QwQ-32B-GGUF
280-
```
281-
282-
Pay attention to change the model repo to the **existing repo** of ModelScope. If you want to use a private repo, please make sure you have the rights of the repo and run with the `--ms_token` argument:
283-
284-
```shell
285-
llama-cli -ms Qwen/QwQ-32B-GGUF --ms_token xxx
286-
```
287-
288-
> You can change the endpoint of ModelScope by using `MODELSCOPE_DOMAIN=xxx`(like MODELSCOPE_DOMAIN=www.modelscope.ai).
272+
> You can change the download endpoint of ModelScope by using `MODELSCOPE_DOMAIN=xxx`(like MODELSCOPE_DOMAIN=www.modelscope.ai).
289273
290274
After downloading a model, use the CLI tools to run it locally - see below.
291275

common/common.cpp

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,7 +1196,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
11961196
return false;
11971197
}
11981198

1199-
std::filesystem::path create_credential_path() {
1199+
static std::filesystem::path create_credential_path() {
12001200
const char* home_dir = nullptr;
12011201
#ifdef _WIN32
12021202
home_dir = getenv("USERPROFILE");
@@ -1352,9 +1352,13 @@ static bool common_download_file(const std::string & url, const std::string & pa
13521352
}
13531353
}
13541354
} else {
1355+
//ModelScope does not support check etag and last-modified.
13551356
should_download = !file_exists;
1356-
const std::filesystem::path cookie_file = create_credential_path() / "cookies";
1357-
curl_easy_setopt(curl.get(), CURLOPT_COOKIEFILE, cookie_file.c_str());
1357+
if (!hf_token.empty()) {
1358+
//Login was done in the previous logic.
1359+
const std::filesystem::path cookie_file = create_credential_path() / "cookies";
1360+
curl_easy_setopt(curl.get(), CURLOPT_COOKIEFILE, cookie_file.c_str());
1361+
}
13581362
}
13591363
}
13601364

@@ -1624,7 +1628,7 @@ struct llama_model * common_load_model_from_ms(
16241628
if (!ms_token.empty()) {
16251629
ms_login(ms_token);
16261630
}
1627-
return common_load_model_from_url(model_url, local_path, "", params);
1631+
return common_load_model_from_url(model_url, local_path, ms_token, params);
16281632
}
16291633

16301634
/**
@@ -1702,8 +1706,10 @@ std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_re
17021706
}
17031707

17041708
std::pair<std::string, std::string> common_get_ms_file(const std::string & ms_repo_with_tag, const std::string & ms_token) {
1709+
//Download from ModelScope model repository, quant is optional and case-insensitive.
1710+
//default to the input tag or Q4_K_M, will fall back to first GGUF file in the repo if quant is not specified and tag is not found.
17051711
auto parts = string_split<std::string>(ms_repo_with_tag, ':');
1706-
std::string tag = parts.size() > 1 ? parts.back() : "Q4_K_M";
1712+
std::string tag = parts.size() > 1 ? parts.back() : "q4_k_m";
17071713
std::string hf_repo = parts[0];
17081714
if (string_split<std::string>(hf_repo, '/').size() != 2) {
17091715
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
@@ -1712,6 +1718,12 @@ std::pair<std::string, std::string> common_get_ms_file(const std::string & ms_re
17121718
ms_login(ms_token);
17131719
}
17141720

1721+
std::transform(tag.begin(), tag.end(), std::begin(tag), ::tolower);
1722+
if (tag == "latest" || tag.empty()) {
1723+
//ModelScope does not support latest tag
1724+
tag = "q4_k_m";
1725+
}
1726+
17151727
// fetch model info from Hugging Face Hub API
17161728
json model_info;
17171729
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
@@ -1736,13 +1748,15 @@ std::pair<std::string, std::string> common_get_ms_file(const std::string & ms_re
17361748
http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json");
17371749
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
17381750

1739-
const std::filesystem::path cookie_file = create_credential_path() / "cookies";
1740-
curl_easy_setopt(curl.get(), CURLOPT_COOKIEFILE, cookie_file.c_str());
1751+
if (!ms_token.empty()) {
1752+
const std::filesystem::path cookie_file = create_credential_path() / "cookies";
1753+
curl_easy_setopt(curl.get(), CURLOPT_COOKIEFILE, cookie_file.c_str());
1754+
}
17411755

17421756
CURLcode res = curl_easy_perform(curl.get());
17431757

17441758
if (res != CURLE_OK) {
1745-
throw std::runtime_error("error: cannot make GET request to HF API");
1759+
throw std::runtime_error("error: cannot make GET request to MS API");
17461760
}
17471761

17481762
long res_code;
@@ -1759,18 +1773,13 @@ std::pair<std::string, std::string> common_get_ms_file(const std::string & ms_re
17591773

17601774
std::vector<std::string> all_available_files;
17611775
std::string gguf_file;
1762-
std::string upper_tag;
1763-
upper_tag.reserve(tag.size());
1764-
std::string lower_tag;
1765-
lower_tag.reserve(tag.size());
1766-
std::transform(tag.begin(), tag.end(), std::back_inserter(upper_tag), ::toupper);
1767-
std::transform(tag.begin(), tag.end(), std::back_inserter(lower_tag), ::tolower);
17681776
for (const auto & _file : all_files) {
17691777
auto file = _file["Path"].get<std::string>();
1778+
std::transform(file.begin(), file.end(), std::begin(file), ::tolower);
17701779
if (!string_ends_with(file, ".gguf")) {
17711780
continue;
17721781
}
1773-
if (file.find(upper_tag) != std::string::npos || file.find(lower_tag) != std::string::npos) {
1782+
if (file.find(tag) != std::string::npos) {
17741783
gguf_file = file;
17751784
}
17761785
all_available_files.push_back(file);

examples/run/run.cpp

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -719,20 +719,27 @@ class LlamaData {
719719
}
720720

721721
int modelscope_dl(std::string & model, const std::string & bn) {
722-
// Find the second occurrence of '/' after protocol string
722+
//Download from ModelScope model repository, quant is optional and case-insensitive.
723+
//default to the input tag or Q4_K_M, will fall back to first GGUF file in the repo if quant is not specified and tag is not found.
723724
size_t pos = model.find('/');
724725
pos = model.find('/', pos + 1);
725-
std::string hfr;
726-
std::string hff;
727-
std::vector<std::string> headers = { "user-agent: llama-cpp", "Accept: application/json"};
726+
std::string msr;
727+
std::string msf;
728+
std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json"};
728729
std::string url;
729730
auto endpoint = MODELSCOPE_DOMAIN_DEFINITION;
730731

731732
if (pos == std::string::npos) {
732-
auto [model_name, _] = extract_model_and_tag(model, "");
733-
hfr = model_name;
733+
auto [model_name, tag] = extract_model_and_tag(model, "");
734+
msr = model_name;
735+
rm_until_substring(tag, "/manifests/");
736+
std::transform(tag.begin(), tag.end(), std::begin(tag), ::tolower);
737+
if (tag == "latest" || tag.empty()) {
738+
//ModelScope does not support latest tag
739+
tag = "q4_k_m";
740+
}
734741
std::string manifest_str;
735-
url = endpoint + "/api/v1/models/" + hfr + "/repo/files?Revision=master&Recursive=True";
742+
url = endpoint + "/api/v1/models/" + msr + "/repo/files?Revision=master&Recursive=True";
736743
if (int ret = download(url, "", false, headers, &manifest_str)) {
737744
return ret;
738745
}
@@ -741,23 +748,24 @@ class LlamaData {
741748
std::vector<std::string> all_available_files;
742749
for (const auto & _file : all_files) {
743750
auto file = _file["Path"].get<std::string>();
751+
std::transform(file.begin(), file.end(), std::begin(file), ::tolower);
744752
if (!string_ends_with(file, ".gguf")) {
745753
continue;
746754
}
747-
if (file.find("Q4_K_M") != std::string::npos || file.find("q4_k_m") != std::string::npos) {
748-
hff = file;
755+
if (file.find(tag) != std::string::npos) {
756+
msf = file;
749757
}
750758
all_available_files.push_back(file);
751759
}
752-
if (hff.empty()) {
753-
hff = all_available_files[0];
760+
if (msf.empty()) {
761+
msf = all_available_files[0];
754762
}
755763

756764
} else {
757-
hfr = model.substr(0, pos);
758-
hff = model.substr(pos + 1);
765+
msr = model.substr(0, pos);
766+
msf = model.substr(pos + 1);
759767
}
760-
url = endpoint + "/models/" + hfr + "/resolve/master/" + hff;
768+
url = endpoint + "/models/" + msr + "/resolve/master/" + msf;
761769
return download(url, bn, true, headers);
762770
}
763771

0 commit comments

Comments
 (0)