Skip to content

Commit 48b7ff1

Browse files
authored
llama : fix platforms without mmap (#4578)
* llama : fix platforms without mmap * win32 : limit prefetch size to the file size * fix win32 error clobber, unnecessary std::string in std::runtime_error
1 parent 48b24b1 commit 48b7ff1

File tree

3 files changed

+24
-21
lines changed

3 files changed

+24
-21
lines changed

ggml-cuda.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7702,7 +7702,8 @@ inline void ggml_cuda_op_scale(
77027702
GGML_ASSERT(src0->type == GGML_TYPE_F32);
77037703
GGML_ASSERT( dst->type == GGML_TYPE_F32);
77047704

7705-
const float scale = ((float *) dst->op_params)[0];
7705+
float scale;
7706+
memcpy(&scale, dst->op_params, sizeof(float));
77067707

77077708
scale_f32_cuda(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream);
77087709
CUDA_CHECK(cudaGetLastError());

ggml.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10335,7 +10335,8 @@ static void ggml_compute_forward_scale_f32(
1033510335
}
1033610336

1033710337
// scale factor
10338-
const float v = *(float *) dst->op_params;
10338+
float v;
10339+
memcpy(&v, dst->op_params, sizeof(float));
1033910340

1034010341
const int ith = params->ith;
1034110342
const int nth = params->nth;
@@ -15152,7 +15153,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
1515215153
{
1515315154
// necessary for llama
1515415155
if (src0->grad) {
15155-
const float s = ((float *) tensor->op_params)[0];
15156+
float s;
15157+
memcpy(&s, tensor->op_params, sizeof(float));
1515615158

1515715159
src0->grad =
1515815160
ggml_add_or_set(ctx,

llama.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,7 @@ struct llama_file {
778778
throw std::runtime_error(format("read error: %s", strerror(errno)));
779779
}
780780
if (ret != 1) {
781-
throw std::runtime_error(std::string("unexpectedly reached end of file"));
781+
throw std::runtime_error("unexpectedly reached end of file");
782782
}
783783
}
784784

@@ -931,29 +931,29 @@ struct llama_mmap {
931931
#elif defined(_WIN32)
932932
static constexpr bool SUPPORTED = true;
933933

934-
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
935-
(void) numa;
934+
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false) {
935+
GGML_UNUSED(numa);
936936

937937
size = file->size;
938938

939939
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
940940

941941
HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
942-
DWORD error = GetLastError();
943942

944943
if (hMapping == NULL) {
944+
DWORD error = GetLastError();
945945
throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
946946
}
947947

948948
addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
949-
error = GetLastError();
949+
DWORD error = GetLastError();
950950
CloseHandle(hMapping);
951951

952952
if (addr == NULL) {
953953
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
954954
}
955955

956-
if (prefetch) {
956+
if (prefetch > 0) {
957957
// PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
958958
BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
959959
HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
@@ -965,9 +965,9 @@ struct llama_mmap {
965965
// advise the kernel to preload the mapped memory
966966
WIN32_MEMORY_RANGE_ENTRY range;
967967
range.VirtualAddress = addr;
968-
range.NumberOfBytes = (SIZE_T)size;
968+
range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
969969
if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
970-
fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n",
970+
LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
971971
llama_format_win_err(GetLastError()).c_str());
972972
}
973973
}
@@ -982,26 +982,26 @@ struct llama_mmap {
982982

983983
~llama_mmap() {
984984
if (!UnmapViewOfFile(addr)) {
985-
fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n",
985+
LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
986986
llama_format_win_err(GetLastError()).c_str());
987987
}
988988
}
989989
#else
990990
static constexpr bool SUPPORTED = false;
991991

992-
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
993-
(void) file;
994-
(void) prefetch;
995-
(void) numa;
992+
llama_mmap(struct llama_file * file, size_t prefetch = -1, bool numa = false) {
993+
GGML_UNUSED(file);
994+
GGML_UNUSED(prefetch);
995+
GGML_UNUSED(numa);
996996

997-
throw std::runtime_error(std::string("mmap not supported"));
997+
throw std::runtime_error("mmap not supported");
998998
}
999999

1000-
void unmap(size_t offset, size_t len) {
1001-
(void) offset;
1002-
(void) len;
1000+
void unmap_fragment(size_t first, size_t last) {
1001+
GGML_UNUSED(first);
1002+
GGML_UNUSED(last);
10031003

1004-
throw std::runtime_error(std::string("mmap not supported"));
1004+
throw std::runtime_error("mmap not supported");
10051005
}
10061006
#endif
10071007
};

0 commit comments

Comments
 (0)