fix code

tastelikefeet · tastelikefeet · commit d756ebf05136 · 2025-03-28T15:05:29.000+08:00
diff --git a/README.md b/README.md
@@ -265,27 +265,11 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt
 
 You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`
 
-llama.cpp also supports downloading and running models from [ModelScope](https://www.modelscope.cn/home), there are two ways to use models in ModelScope:
+Altenatively, model can be fetched from [ModelScope](https://www.modelscope.cn) with CLI argument of `-ms <user>/<model>[:quant]`, for example, `llama-cli -ms Qwen/QwQ-32B-GGUF`. You may find models on ModelScope compatible with `llama.cpp` through:
 
-1. Add an env variable: `LLAMACPP_USE_MODELSCOPE=True` to your command with the same arguments of Hugging Face(like `-hf <user>/<model>[:quant]`).
+- [Trending] https://www.modelscope.cn/models?libraries=GGUF
 
-```shell
-LLAMACPP_USE_MODELSCOPE=True llama-cli -hf Qwen/QwQ-32B-GGUF
-```
-
-2. Use modelscope arguments instead of the ones of Hugging Face: `-ms <user>/<model>[:quant] -msf xxx.gguf -mst xxx_token` 
-
-```shell
-llama-cli -ms Qwen/QwQ-32B-GGUF
-```
-
-Pay attention to change the model repo to the **existing repo** of ModelScope. If you want to use a private repo, please make sure you have the rights of the repo and run with the `--ms_token` argument:
-
-```shell
-llama-cli -ms Qwen/QwQ-32B-GGUF --ms_token xxx
-```
-
-> You can change the endpoint of ModelScope by using `MODELSCOPE_DOMAIN=xxx`(like MODELSCOPE_DOMAIN=www.modelscope.ai).
+> You can change the download endpoint of ModelScope by using `MODELSCOPE_DOMAIN=xxx`(like MODELSCOPE_DOMAIN=www.modelscope.ai).
 
 After downloading a model, use the CLI tools to run it locally - see below.
 
diff --git a/common/common.cpp b/common/common.cpp
@@ -1196,7 +1196,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
     return false;
 }
 
-std::filesystem::path create_credential_path() {
+static std::filesystem::path create_credential_path() {
     const char* home_dir = nullptr;
 #ifdef _WIN32
     home_dir = getenv("USERPROFILE");
@@ -1352,9 +1352,13 @@ static bool common_download_file(const std::string & url, const std::string & pa
                 }
             }
         } else {
+            //ModelScope does not support check etag and last-modified.
             should_download = !file_exists;
-            const std::filesystem::path cookie_file = create_credential_path() / "cookies";
-            curl_easy_setopt(curl.get(), CURLOPT_COOKIEFILE, cookie_file.c_str());
+            if (!hf_token.empty()) {
+                //Login was done in the previous logic.
+                const std::filesystem::path cookie_file = create_credential_path() / "cookies";
+                curl_easy_setopt(curl.get(), CURLOPT_COOKIEFILE, cookie_file.c_str());
+            }
         }
     }
 
@@ -1624,7 +1628,7 @@ struct llama_model * common_load_model_from_ms(
     if (!ms_token.empty()) {
         ms_login(ms_token);
     }
-    return common_load_model_from_url(model_url, local_path, "", params);
+    return common_load_model_from_url(model_url, local_path, ms_token, params);
 }
 
 /**
@@ -1702,8 +1706,10 @@ std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_re
 }
 
 std::pair<std::string, std::string> common_get_ms_file(const std::string & ms_repo_with_tag, const std::string & ms_token) {
+    //Download from ModelScope model repository, quant is optional and case-insensitive.
+    //default to the input tag or Q4_K_M, will fall back to first GGUF file in the repo if quant is not specified and tag is not found.
     auto parts = string_split<std::string>(ms_repo_with_tag, ':');
-    std::string tag = parts.size() > 1 ? parts.back() : "Q4_K_M";
+    std::string tag = parts.size() > 1 ? parts.back() : "q4_k_m";
     std::string hf_repo = parts[0];
     if (string_split<std::string>(hf_repo, '/').size() != 2) {
         throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
@@ -1712,6 +1718,12 @@ std::pair<std::string, std::string> common_get_ms_file(const std::string & ms_re
         ms_login(ms_token);
     }
 
+    std::transform(tag.begin(), tag.end(), std::begin(tag), ::tolower);
+    if (tag == "latest" || tag.empty()) {
+        //ModelScope does not support latest tag
+        tag = "q4_k_m";
+    }
+
     // fetch model info from Hugging Face Hub API
     json model_info;
     curl_ptr       curl(curl_easy_init(), &curl_easy_cleanup);
@@ -1736,13 +1748,15 @@ std::pair<std::string, std::string> common_get_ms_file(const std::string & ms_re
     http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json");
     curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
 
-    const std::filesystem::path cookie_file = create_credential_path() / "cookies";
-    curl_easy_setopt(curl.get(), CURLOPT_COOKIEFILE, cookie_file.c_str());
+    if (!ms_token.empty()) {
+        const std::filesystem::path cookie_file = create_credential_path() / "cookies";
+        curl_easy_setopt(curl.get(), CURLOPT_COOKIEFILE, cookie_file.c_str());
+    }
 
     CURLcode res = curl_easy_perform(curl.get());
 
     if (res != CURLE_OK) {
-        throw std::runtime_error("error: cannot make GET request to HF API");
+        throw std::runtime_error("error: cannot make GET request to MS API");
     }
 
     long res_code;
@@ -1759,18 +1773,13 @@ std::pair<std::string, std::string> common_get_ms_file(const std::string & ms_re
 
     std::vector<std::string> all_available_files;
     std::string gguf_file;
-    std::string upper_tag;
-    upper_tag.reserve(tag.size());
-    std::string lower_tag;
-    lower_tag.reserve(tag.size());
-    std::transform(tag.begin(), tag.end(), std::back_inserter(upper_tag), ::toupper);
-    std::transform(tag.begin(), tag.end(), std::back_inserter(lower_tag), ::tolower);
     for (const auto & _file : all_files) {
         auto file = _file["Path"].get<std::string>();
+        std::transform(file.begin(), file.end(), std::begin(file), ::tolower);
         if (!string_ends_with(file, ".gguf")) {
             continue;
         }
-        if (file.find(upper_tag) != std::string::npos || file.find(lower_tag) != std::string::npos) {
+        if (file.find(tag) != std::string::npos) {
             gguf_file = file;
         }
         all_available_files.push_back(file);
diff --git a/examples/run/run.cpp b/examples/run/run.cpp
@@ -719,20 +719,27 @@ class LlamaData {
     }
 
     int modelscope_dl(std::string & model, const std::string & bn) {
-        // Find the second occurrence of '/' after protocol string
+        //Download from ModelScope model repository, quant is optional and case-insensitive.
+        //default to the input tag or Q4_K_M, will fall back to first GGUF file in the repo if quant is not specified and tag is not found.
         size_t pos = model.find('/');
         pos        = model.find('/', pos + 1);
-        std::string              hfr;
-        std::string              hff;
-        std::vector<std::string> headers = { "user-agent: llama-cpp", "Accept: application/json"};
+        std::string              msr;
+        std::string              msf;
+        std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json"};
         std::string              url;
         auto endpoint = MODELSCOPE_DOMAIN_DEFINITION;
 
         if (pos == std::string::npos) {
-            auto [model_name, _] = extract_model_and_tag(model, "");
-            hfr                             = model_name;
+            auto [model_name, tag] = extract_model_and_tag(model, "");
+            msr                             = model_name;
+            rm_until_substring(tag, "/manifests/");
+            std::transform(tag.begin(), tag.end(), std::begin(tag), ::tolower);
+            if (tag == "latest" || tag.empty()) {
+                //ModelScope does not support latest tag
+                tag = "q4_k_m";
+            }
             std::string manifest_str;
-            url = endpoint + "/api/v1/models/" + hfr + "/repo/files?Revision=master&Recursive=True";
+            url = endpoint + "/api/v1/models/" + msr + "/repo/files?Revision=master&Recursive=True";
             if (int ret = download(url, "", false, headers, &manifest_str)) {
                 return ret;
             }
@@ -741,23 +748,24 @@ class LlamaData {
             std::vector<std::string> all_available_files;
             for (const auto & _file : all_files) {
                 auto file = _file["Path"].get<std::string>();
+                std::transform(file.begin(), file.end(), std::begin(file), ::tolower);
                 if (!string_ends_with(file, ".gguf")) {
                     continue;
                 }
-                if (file.find("Q4_K_M") != std::string::npos || file.find("q4_k_m") != std::string::npos) {
-                    hff = file;
+                if (file.find(tag) != std::string::npos) {
+                    msf = file;
                 }
                 all_available_files.push_back(file);
             }
-            if (hff.empty()) {
-                hff = all_available_files[0];
+            if (msf.empty()) {
+                msf = all_available_files[0];
             }
 
         } else {
-            hfr = model.substr(0, pos);
-            hff = model.substr(pos + 1);
+            msr = model.substr(0, pos);
+            msf = model.substr(pos + 1);
         }
-        url = endpoint + "/models/" + hfr + "/resolve/master/" + hff;
+        url = endpoint + "/models/" + msr + "/resolve/master/" + msf;
         return download(url, bn, true, headers);
     }