Skip to content

Commit 6416140

Browse files
committed
[GGUF] typed metadata
1 parent 2a2c912 commit 6416140

File tree

3 files changed

+238
-12
lines changed

3 files changed

+238
-12
lines changed

packages/gguf/src/gguf.spec.ts

Lines changed: 141 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import { beforeAll, describe, expect, it } from "vitest";
2-
import type { GGUFParseOutput } from "./gguf";
2+
import type { GGUFParseOutput, MetadataValue } from "./gguf";
33
import {
44
GGMLFileQuantizationType,
55
GGMLQuantizationType,
6+
GGUFValueType,
67
gguf,
78
ggufAllShards,
89
parseGgufShardFilename,
@@ -325,4 +326,143 @@ describe("gguf", () => {
325326
nearestQuant = findNearestQuantType(GGMLFileQuantizationType.F16, visionQuants);
326327
expect(nearestQuant).toEqual(GGMLFileQuantizationType.F16);
327328
});
329+
330+
it("should not return typedMetadata by default", async () => {
331+
const result = await gguf(URL_LLAMA);
332+
expect(result).not.toHaveProperty("typedMetadata");
333+
expect(result).toHaveProperty("metadata");
334+
expect(result).toHaveProperty("tensorInfos");
335+
expect(result).toHaveProperty("tensorDataOffset");
336+
});
337+
338+
it("should return typedMetadata when requested", async () => {
339+
const { metadata, typedMetadata, tensorInfos } = await gguf(URL_LLAMA, { typedMetadata: true });
340+
341+
// Should have both metadata and typedMetadata
342+
expect(metadata).toBeDefined();
343+
expect(typedMetadata).toBeDefined();
344+
expect(tensorInfos).toBeDefined();
345+
346+
// Basic structure checks
347+
expect(typedMetadata.version).toEqual({
348+
value: 2,
349+
type: GGUFValueType.UINT32,
350+
});
351+
expect(typedMetadata.tensor_count).toEqual({
352+
value: 291n,
353+
type: GGUFValueType.UINT64,
354+
});
355+
expect(typedMetadata.kv_count).toEqual({
356+
value: 19n,
357+
type: GGUFValueType.UINT64,
358+
});
359+
360+
// Check string metadata
361+
expect(typedMetadata["general.architecture"]).toEqual({
362+
value: "llama",
363+
type: GGUFValueType.STRING,
364+
});
365+
expect(typedMetadata["general.name"]).toEqual({
366+
value: "LLaMA v2",
367+
type: GGUFValueType.STRING,
368+
});
369+
370+
// Check numeric metadata
371+
expect(typedMetadata["general.file_type"]).toEqual({
372+
value: GGMLFileQuantizationType.Q2_K,
373+
type: GGUFValueType.UINT32,
374+
});
375+
expect(typedMetadata["llama.attention.head_count"]).toEqual({
376+
value: 32,
377+
type: GGUFValueType.UINT32,
378+
});
379+
380+
// Check float metadata
381+
expect(typedMetadata["llama.attention.layer_norm_rms_epsilon"]).toEqual({
382+
value: 9.999999974752427e-7,
383+
type: GGUFValueType.FLOAT32,
384+
});
385+
});
386+
387+
it("should return typedMetadata with parameter count", async () => {
388+
const { metadata, typedMetadata, tensorInfos, parameterCount } = await gguf(URL_LLAMA, {
389+
typedMetadata: true,
390+
computeParametersCount: true,
391+
});
392+
393+
expect(metadata).toBeDefined();
394+
expect(typedMetadata).toBeDefined();
395+
expect(tensorInfos).toBeDefined();
396+
expect(parameterCount).toEqual(6_738_415_616);
397+
398+
// Verify typedMetadata structure is still correct
399+
expect(typedMetadata.version).toEqual({
400+
value: 2,
401+
type: GGUFValueType.UINT32,
402+
});
403+
expect(typedMetadata["general.architecture"]).toEqual({
404+
value: "llama",
405+
type: GGUFValueType.STRING,
406+
});
407+
});
408+
409+
it("should handle typedMetadata for V1 files", async () => {
410+
const { typedMetadata } = await gguf(URL_V1, { typedMetadata: true });
411+
412+
// V1 files use UINT32 for counts instead of UINT64
413+
expect(typedMetadata.version).toEqual({
414+
value: 1,
415+
type: GGUFValueType.UINT32,
416+
});
417+
expect(typedMetadata.tensor_count).toEqual({
418+
value: 48n,
419+
type: GGUFValueType.UINT32,
420+
});
421+
expect(typedMetadata.kv_count).toEqual({
422+
value: 18n,
423+
type: GGUFValueType.UINT32,
424+
});
425+
426+
// Check other fields are properly typed
427+
expect(typedMetadata["general.architecture"]).toEqual({
428+
value: "llama",
429+
type: GGUFValueType.STRING,
430+
});
431+
expect(typedMetadata["llama.attention.head_count"]).toEqual({
432+
value: 8,
433+
type: GGUFValueType.UINT32,
434+
});
435+
});
436+
437+
it("should handle array metadata types in typedMetadata", async () => {
438+
const { typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
439+
440+
// Check if tokens array is properly handled
441+
if (typedMetadata["tokenizer.ggml.tokens"]) {
442+
expect(typedMetadata["tokenizer.ggml.tokens"].type).toEqual(GGUFValueType.ARRAY);
443+
expect(Array.isArray(typedMetadata["tokenizer.ggml.tokens"].value)).toBe(true);
444+
}
445+
446+
// Check if scores array is properly handled
447+
if (typedMetadata["tokenizer.ggml.scores"]) {
448+
expect(typedMetadata["tokenizer.ggml.scores"].type).toEqual(GGUFValueType.ARRAY);
449+
expect(Array.isArray(typedMetadata["tokenizer.ggml.scores"].value)).toBe(true);
450+
}
451+
});
452+
453+
it("should maintain consistency between metadata and typedMetadata values", async () => {
454+
const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
455+
456+
// All keys should be present in both
457+
const metadataKeys = Object.keys(metadata);
458+
const typedMetadataKeys = Object.keys(typedMetadata);
459+
460+
expect(metadataKeys.sort()).toEqual(typedMetadataKeys.sort());
461+
462+
// Values should match for all keys
463+
const metadataAsRecord = metadata as Record<string, MetadataValue>;
464+
for (const key of metadataKeys) {
465+
expect(typedMetadata[key].value).toEqual(metadataAsRecord[key]);
466+
}
467+
});
328468
});

packages/gguf/src/gguf.ts

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
1+
import type { MetadataValue, Version, GGUFMetadata, GGUFTypedMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
22
import { GGUFValueType } from "./types";
33
import { isBackend } from "./utils/isBackend";
44
import { promisesQueue } from "./utils/promisesQueue";
@@ -8,6 +8,7 @@ export type {
88
MetadataValue,
99
Version,
1010
GGUFMetadata,
11+
GGUFTypedMetadata,
1112
GGUFTensorInfo,
1213
GGUFParseOutput,
1314
GGUFMetadataOptions,
@@ -245,9 +246,25 @@ function readMetadataValue(
245246
export async function gguf(
246247
uri: string,
247248
params: {
248-
/**
249-
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
250-
*/
249+
fetch?: typeof fetch;
250+
additionalFetchHeaders?: Record<string, string>;
251+
typedMetadata: true;
252+
allowLocalFile?: boolean;
253+
}
254+
): Promise<GGUFParseOutput & { typedMetadata: GGUFTypedMetadata }>;
255+
export async function gguf(
256+
uri: string,
257+
params: {
258+
fetch?: typeof fetch;
259+
additionalFetchHeaders?: Record<string, string>;
260+
typedMetadata: true;
261+
computeParametersCount: true;
262+
allowLocalFile?: boolean;
263+
}
264+
): Promise<GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata }>;
265+
export async function gguf(
266+
uri: string,
267+
params: {
251268
fetch?: typeof fetch;
252269
additionalFetchHeaders?: Record<string, string>;
253270
computeParametersCount: true;
@@ -257,9 +274,6 @@ export async function gguf(
257274
export async function gguf(
258275
uri: string,
259276
params?: {
260-
/**
261-
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
262-
*/
263277
fetch?: typeof fetch;
264278
additionalFetchHeaders?: Record<string, string>;
265279
allowLocalFile?: boolean;
@@ -273,10 +287,11 @@ export async function gguf(
273287
*/
274288
fetch?: typeof fetch;
275289
additionalFetchHeaders?: Record<string, string>;
290+
typedMetadata?: boolean;
276291
computeParametersCount?: boolean;
277292
allowLocalFile?: boolean;
278293
}
279-
): Promise<GGUFParseOutput & { parameterCount?: number }> {
294+
): Promise<GGUFParseOutput & { parameterCount?: number; typedMetadata?: GGUFTypedMetadata }> {
280295
let r: RangeView;
281296
if (isBackend) {
282297
/// On backend, we switch between remote/local file based on protocol
@@ -336,6 +351,21 @@ export async function gguf(
336351
kv_count: numKv.value,
337352
};
338353

354+
let typedMetadata: GGUFTypedMetadata | undefined;
355+
if (params?.typedMetadata) {
356+
typedMetadata = {
357+
version: { value: version, type: GGUFValueType.UINT32 },
358+
tensor_count: {
359+
value: tensorCount.value,
360+
type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
361+
},
362+
kv_count: {
363+
value: numKv.value,
364+
type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
365+
},
366+
};
367+
}
368+
339369
for (let i = 0; i < numKv.value; i++) {
340370
await r.fetchChunkIfNeeded(offset);
341371

@@ -366,6 +396,12 @@ export async function gguf(
366396
}
367397
offset += valueResult.length;
368398
metadata[keyResult.value] = valueResult.value;
399+
if (typedMetadata) {
400+
typedMetadata[keyResult.value] = {
401+
value: valueResult.value,
402+
type: valueType,
403+
};
404+
}
369405
}
370406

371407
const tensorInfos: GGUFTensorInfo[] = [];
@@ -405,14 +441,38 @@ export async function gguf(
405441
const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT);
406442
const tensorDataOffset = BigInt(GGML_PAD(offset, alignment));
407443

408-
if (params?.computeParametersCount) {
444+
if (params?.computeParametersCount && params?.typedMetadata) {
445+
const parameterCount = tensorInfos
446+
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
447+
.reduce((acc, val) => acc + val, 0);
448+
449+
return {
450+
metadata,
451+
tensorInfos,
452+
tensorDataOffset,
453+
parameterCount,
454+
typedMetadata: typedMetadata as GGUFTypedMetadata,
455+
} as GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata };
456+
} else if (params?.computeParametersCount) {
409457
const parameterCount = tensorInfos
410458
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
411459
.reduce((acc, val) => acc + val, 0);
412460

413-
return { metadata, tensorInfos, tensorDataOffset, parameterCount };
461+
return {
462+
metadata,
463+
tensorInfos,
464+
tensorDataOffset,
465+
parameterCount,
466+
} as GGUFParseOutput & { parameterCount: number };
467+
} else if (params?.typedMetadata) {
468+
return {
469+
metadata,
470+
tensorInfos,
471+
tensorDataOffset,
472+
typedMetadata: typedMetadata as GGUFTypedMetadata,
473+
} as GGUFParseOutput & { typedMetadata: GGUFTypedMetadata };
414474
} else {
415-
return { metadata, tensorInfos, tensorDataOffset };
475+
return { metadata, tensorInfos, tensorDataOffset } as GGUFParseOutput;
416476
}
417477
}
418478

packages/gguf/src/types.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,32 @@ export type GGUFMetadata<Options extends GGUFMetadataOptions = { strict: true }>
103103
} & GGUFModelKV &
104104
(Options extends { strict: true } ? unknown : Record<string, MetadataValue>);
105105

106+
export type GGUFTypedMetadata = {
107+
version: {
108+
value: Version;
109+
type: GGUFValueType.UINT32;
110+
};
111+
tensor_count: {
112+
value: bigint;
113+
type: GGUFValueType.UINT32 | GGUFValueType.UINT64;
114+
};
115+
kv_count: {
116+
value: bigint;
117+
type: GGUFValueType.UINT32 | GGUFValueType.UINT64;
118+
};
119+
} & {
120+
[K in keyof GGUFModelKV]?: {
121+
value: GGUFModelKV[K];
122+
type: GGUFValueType;
123+
};
124+
} & Record<
125+
string,
126+
{
127+
value: MetadataValue;
128+
type: GGUFValueType;
129+
}
130+
>;
131+
106132
export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer);
107133

108134
export interface GGUFTensorInfo {

0 commit comments

Comments
 (0)