@@ -907,7 +907,11 @@ struct common_init_result common_init_from_params(common_params & params) {
907
907
llama_model * model = nullptr ;
908
908
909
909
if (!params.hf_repo .empty () && !params.hf_file .empty ()) {
910
- model = common_load_model_from_hf (params.hf_repo , params.hf_file , params.model , params.hf_token , mparams);
910
+ if (LLAMACPP_USE_MODELSCOPE_DEFINITION) {
911
+ model = common_load_model_from_ms (params.hf_repo , params.hf_file , params.model , params.hf_token , mparams);
912
+ } else {
913
+ model = common_load_model_from_hf (params.hf_repo , params.hf_file , params.model , params.hf_token , mparams);
914
+ }
911
915
} else if (!params.model_url .empty ()) {
912
916
model = common_load_model_from_url (params.model_url , params.model , params.hf_token , mparams);
913
917
} else {
@@ -1207,6 +1211,12 @@ static bool common_download_file(const std::string & url, const std::string & pa
1207
1211
curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
1208
1212
curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
1209
1213
1214
+ std::vector<std::string> _headers = {" User-Agent: llama-cpp" };
1215
+ for (const auto & header : _headers) {
1216
+ http_headers.ptr = curl_slist_append (http_headers.ptr , header.c_str ());
1217
+ }
1218
+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
1219
+
1210
1220
// Check if hf-token or bearer-token was specified
1211
1221
if (!hf_token.empty ()) {
1212
1222
std::string auth_header = " Authorization: Bearer " + hf_token;
@@ -1265,6 +1275,7 @@ static bool common_download_file(const std::string & url, const std::string & pa
1265
1275
};
1266
1276
1267
1277
common_load_model_from_url_headers headers;
1278
+ bool should_download = false ;
1268
1279
1269
1280
{
1270
1281
typedef size_t (*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t , size_t , void *);
@@ -1293,32 +1304,35 @@ static bool common_download_file(const std::string & url, const std::string & pa
1293
1304
curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L ); // hide head request progress
1294
1305
curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
1295
1306
curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
1307
+ if (!LLAMACPP_USE_MODELSCOPE_DEFINITION) {
1308
+ bool was_perform_successful = curl_perform_with_retry (url, curl.get (), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
1309
+ if (!was_perform_successful) {
1310
+ return false ;
1311
+ }
1296
1312
1297
- bool was_perform_successful = curl_perform_with_retry (url, curl.get (), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
1298
- if (!was_perform_successful) {
1299
- return false ;
1300
- }
1301
-
1302
- long http_code = 0 ;
1303
- curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
1304
- if (http_code != 200 ) {
1305
- // HEAD not supported, we don't know if the file has changed
1306
- // force trigger downloading
1307
- force_download = true ;
1308
- LOG_ERR (" %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
1313
+ long http_code = 0 ;
1314
+ curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
1315
+ if (http_code != 200 ) {
1316
+ // HEAD not supported, we don't know if the file has changed
1317
+ // force trigger downloading
1318
+ force_download = true ;
1319
+ LOG_ERR (" %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
1320
+ }
1321
+ should_download = !file_exists || force_download;
1322
+ if (!should_download) {
1323
+ if (!etag.empty () && etag != headers.etag ) {
1324
+ LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__, etag.c_str (), headers.etag .c_str ());
1325
+ should_download = true ;
1326
+ } else if (!last_modified.empty () && last_modified != headers.last_modified ) {
1327
+ LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__, last_modified.c_str (), headers.last_modified .c_str ());
1328
+ should_download = true ;
1329
+ }
1330
+ }
1331
+ } else {
1332
+ should_download = !file_exists;
1309
1333
}
1310
1334
}
1311
1335
1312
- bool should_download = !file_exists || force_download;
1313
- if (!should_download) {
1314
- if (!etag.empty () && etag != headers.etag ) {
1315
- LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__, etag.c_str (), headers.etag .c_str ());
1316
- should_download = true ;
1317
- } else if (!last_modified.empty () && last_modified != headers.last_modified ) {
1318
- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__, last_modified.c_str (), headers.last_modified .c_str ());
1319
- should_download = true ;
1320
- }
1321
- }
1322
1336
if (should_download) {
1323
1337
std::string path_temporary = path + " .downloadInProgress" ;
1324
1338
if (file_exists) {
@@ -1507,6 +1521,20 @@ struct llama_model * common_load_model_from_hf(
1507
1521
return common_load_model_from_url (model_url, local_path, hf_token, params);
1508
1522
}
1509
1523
1524
+ struct llama_model * common_load_model_from_ms (
1525
+ const std::string & repo,
1526
+ const std::string & remote_path,
1527
+ const std::string & local_path,
1528
+ const std::string & ms_token,
1529
+ const struct llama_model_params & params) {
1530
+ std::string model_url = " https://" + MODELSCOPE_DOMAIN_DEFINITION + " /models/" ;
1531
+ model_url += repo;
1532
+ model_url += " /resolve/master/" ;
1533
+ model_url += remote_path;
1534
+ // modelscope does not support token in header
1535
+ return common_load_model_from_url (model_url, local_path, " " , params);
1536
+ }
1537
+
1510
1538
/* *
1511
1539
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
1512
1540
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
@@ -1581,6 +1609,82 @@ std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_re
1581
1609
return std::make_pair (hf_repo, gguf_file.at (" rfilename" ));
1582
1610
}
1583
1611
1612
+ std::pair<std::string, std::string> common_get_ms_file (const std::string & ms_repo_with_tag, const std::string & ms_token) {
1613
+ auto parts = string_split<std::string>(ms_repo_with_tag, ' :' );
1614
+ std::string tag = parts.size () > 1 ? parts.back () : " Q4_K_M" ;
1615
+ std::string hf_repo = parts[0 ];
1616
+ if (string_split<std::string>(hf_repo, ' /' ).size () != 2 ) {
1617
+ throw std::invalid_argument (" error: invalid HF repo format, expected <user>/<model>[:quant]\n " );
1618
+ }
1619
+
1620
+ // fetch model info from Hugging Face Hub API
1621
+ json model_info;
1622
+ curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
1623
+ curl_slist_ptr http_headers;
1624
+ std::string res_str;
1625
+ auto endpoint = MODELSCOPE_DOMAIN_DEFINITION;
1626
+
1627
+ std::string url = endpoint + " /api/v1/models/" + hf_repo + " /repo/files?Revision=master&Recursive=True" ;
1628
+ curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
1629
+ curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L );
1630
+ typedef size_t (*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
1631
+ auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
1632
+ static_cast <std::string *>(data)->append ((char * ) ptr, size * nmemb);
1633
+ return size * nmemb;
1634
+ };
1635
+ curl_easy_setopt (curl.get (), CURLOPT_WRITEFUNCTION, static_cast <CURLOPT_WRITEFUNCTION_PTR>(write_callback));
1636
+ curl_easy_setopt (curl.get (), CURLOPT_WRITEDATA, &res_str);
1637
+ #if defined(_WIN32)
1638
+ curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
1639
+ #endif
1640
+ // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
1641
+ http_headers.ptr = curl_slist_append (http_headers.ptr , " user-agent: llama-cpp" );
1642
+ http_headers.ptr = curl_slist_append (http_headers.ptr , " Accept: application/json" );
1643
+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
1644
+
1645
+ CURLcode res = curl_easy_perform (curl.get ());
1646
+
1647
+ if (res != CURLE_OK) {
1648
+ throw std::runtime_error (" error: cannot make GET request to HF API" );
1649
+ }
1650
+
1651
+ long res_code;
1652
+ curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &res_code);
1653
+ if (res_code == 200 ) {
1654
+ model_info = nlohmann::json::parse (res_str);
1655
+ } else if (res_code == 401 ) {
1656
+ throw std::runtime_error (" error: model is private or does not exist; if you are accessing a gated model, please provide a valid MS token" );
1657
+ } else {
1658
+ throw std::runtime_error (string_format (" error from MS API, response code: %ld, data: %s" , res_code, res_str.c_str ()));
1659
+ }
1660
+
1661
+ auto all_files = model_info[" Data" ][" Files" ];
1662
+
1663
+ std::vector<std::string> all_available_files;
1664
+ std::string gguf_file;
1665
+ std::string upper_tag;
1666
+ upper_tag.reserve (tag.size ());
1667
+ std::string lower_tag;
1668
+ lower_tag.reserve (tag.size ());
1669
+ std::transform (tag.begin (), tag.end (), std::back_inserter (upper_tag), ::toupper);
1670
+ std::transform (tag.begin (), tag.end (), std::back_inserter (lower_tag), ::tolower);
1671
+ for (const auto & _file : all_files) {
1672
+ auto file = _file[" Path" ].get <std::string>();
1673
+ if (!string_ends_with (file, " .gguf" )) {
1674
+ continue ;
1675
+ }
1676
+ if (file.find (upper_tag) != std::string::npos || file.find (lower_tag) != std::string::npos) {
1677
+ gguf_file = file;
1678
+ }
1679
+ all_available_files.push_back (file);
1680
+ }
1681
+ if (gguf_file.empty ()) {
1682
+ gguf_file = all_available_files[0 ];
1683
+ }
1684
+
1685
+ return std::make_pair (hf_repo, gguf_file);
1686
+ }
1687
+
1584
1688
#else
1585
1689
1586
1690
struct llama_model * common_load_model_from_url (
0 commit comments