@@ -815,13 +815,28 @@ void f8_e4m3_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) {
815
815
dst[i] = f8_e4m3_to_f16 (src[i]);
816
816
}
817
817
}
818
+
818
819
void f8_e5m2_to_f16_vec (uint8_t * src, uint16_t * dst, int64_t n) {
819
820
// support inplace op
820
821
for (int64_t i = n - 1 ; i >= 0 ; i--) {
821
822
dst[i] = f8_e5m2_to_f16 (src[i]);
822
823
}
823
824
}
824
825
826
+ void f64_to_f32_vec (double * src, float * dst, int64_t n) {
827
+ // support inplace op
828
+ for (int64_t i = 0 ; i < n; i++) {
829
+ dst[i] = (float )src[i];
830
+ }
831
+ }
832
+
833
+ void i64_to_i32_vec (int64_t * src, int32_t * dst, int64_t n) {
834
+ // support inplace op
835
+ for (int64_t i = 0 ; i < n; i++) {
836
+ dst[i] = (int32_t )src[i];
837
+ }
838
+ }
839
+
825
840
void convert_tensor (void * src,
826
841
ggml_type src_type,
827
842
void * dst,
@@ -1057,13 +1072,13 @@ ggml_type str_to_ggml_type(const std::string& dtype) {
1057
1072
} else if (dtype == " F32" ) {
1058
1073
ttype = GGML_TYPE_F32;
1059
1074
} else if (dtype == " F64" ) {
1060
- ttype = GGML_TYPE_F64 ;
1075
+ ttype = GGML_TYPE_F32 ;
1061
1076
} else if (dtype == " F8_E4M3" ) {
1062
1077
ttype = GGML_TYPE_F16;
1063
1078
} else if (dtype == " F8_E5M2" ) {
1064
1079
ttype = GGML_TYPE_F16;
1065
1080
} else if (dtype == " I64" ) {
1066
- ttype = GGML_TYPE_I64 ;
1081
+ ttype = GGML_TYPE_I32 ;
1067
1082
}
1068
1083
return ttype;
1069
1084
}
@@ -1185,6 +1200,14 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
1185
1200
tensor_storage.is_f8_e5m2 = true ;
1186
1201
// f8 -> f16
1187
1202
GGML_ASSERT (tensor_storage.nbytes () == tensor_data_size * 2 );
1203
+ } else if (dtype == " F64" ) {
1204
+ tensor_storage.is_f64 = true ;
1205
+ // f64 -> f32
1206
+ GGML_ASSERT (tensor_storage.nbytes () * 2 == tensor_data_size);
1207
+ } else if (dtype == " I64" ) {
1208
+ tensor_storage.is_i64 = true ;
1209
+ // i64 -> i32
1210
+ GGML_ASSERT (tensor_storage.nbytes () * 2 == tensor_data_size);
1188
1211
} else {
1189
1212
GGML_ASSERT (tensor_storage.nbytes () == tensor_data_size);
1190
1213
}
@@ -1945,7 +1968,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
1945
1968
// for the CPU and Metal backend, we can copy directly into the tensor
1946
1969
if (tensor_storage.type == dst_tensor->type ) {
1947
1970
GGML_ASSERT (ggml_nbytes (dst_tensor) == tensor_storage.nbytes ());
1948
- read_data (tensor_storage, (char *)dst_tensor->data , nbytes_to_read);
1971
+ if (tensor_storage.is_f64 || tensor_storage.is_i64 ) {
1972
+ read_buffer.resize (tensor_storage.nbytes_to_read ());
1973
+ read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
1974
+ } else {
1975
+ read_data (tensor_storage, (char *)dst_tensor->data , nbytes_to_read);
1976
+ }
1949
1977
1950
1978
if (tensor_storage.is_bf16 ) {
1951
1979
// inplace op
@@ -1956,9 +1984,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
1956
1984
} else if (tensor_storage.is_f8_e5m2 ) {
1957
1985
// inplace op
1958
1986
f8_e5m2_to_f16_vec ((uint8_t *)dst_tensor->data , (uint16_t *)dst_tensor->data , tensor_storage.nelements ());
1987
+ } else if (tensor_storage.is_f64 ) {
1988
+ f64_to_f32_vec ((double *)read_buffer.data (), (float *)dst_tensor->data , tensor_storage.nelements ());
1989
+ } else if (tensor_storage.is_i64 ) {
1990
+ i64_to_i32_vec ((int64_t *)read_buffer.data (), (int32_t *)dst_tensor->data , tensor_storage.nelements ());
1959
1991
}
1960
1992
} else {
1961
- read_buffer.resize (tensor_storage.nbytes ());
1993
+ read_buffer.resize (std::max ( tensor_storage.nbytes (), tensor_storage. nbytes_to_read () ));
1962
1994
read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
1963
1995
1964
1996
if (tensor_storage.is_bf16 ) {
@@ -1970,13 +2002,19 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
1970
2002
} else if (tensor_storage.is_f8_e5m2 ) {
1971
2003
// inplace op
1972
2004
f8_e5m2_to_f16_vec ((uint8_t *)read_buffer.data (), (uint16_t *)read_buffer.data (), tensor_storage.nelements ());
2005
+ } else if (tensor_storage.is_f64 ) {
2006
+ // inplace op
2007
+ f64_to_f32_vec ((double *)read_buffer.data (), (float *)read_buffer.data (), tensor_storage.nelements ());
2008
+ } else if (tensor_storage.is_i64 ) {
2009
+ // inplace op
2010
+ i64_to_i32_vec ((int64_t *)read_buffer.data (), (int32_t *)read_buffer.data (), tensor_storage.nelements ());
1973
2011
}
1974
2012
1975
2013
convert_tensor ((void *)read_buffer.data (), tensor_storage.type , dst_tensor->data ,
1976
2014
dst_tensor->type , (int )tensor_storage.nelements () / (int )tensor_storage.ne [0 ], (int )tensor_storage.ne [0 ]);
1977
2015
}
1978
2016
} else {
1979
- read_buffer.resize (tensor_storage.nbytes ());
2017
+ read_buffer.resize (std::max ( tensor_storage.nbytes (), tensor_storage. nbytes_to_read () ));
1980
2018
read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
1981
2019
1982
2020
if (tensor_storage.is_bf16 ) {
@@ -1988,6 +2026,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
1988
2026
} else if (tensor_storage.is_f8_e5m2 ) {
1989
2027
// inplace op
1990
2028
f8_e5m2_to_f16_vec ((uint8_t *)read_buffer.data (), (uint16_t *)read_buffer.data (), tensor_storage.nelements ());
2029
+ } else if (tensor_storage.is_f64 ) {
2030
+ // inplace op
2031
+ f64_to_f32_vec ((double *)read_buffer.data (), (float *)read_buffer.data (), tensor_storage.nelements ());
2032
+ } else if (tensor_storage.is_i64 ) {
2033
+ // inplace op
2034
+ i64_to_i32_vec ((int64_t *)read_buffer.data (), (int32_t *)read_buffer.data (), tensor_storage.nelements ());
1991
2035
}
1992
2036
1993
2037
if (tensor_storage.type == dst_tensor->type ) {
0 commit comments