Skip to content

Commit ca155c1

Browse files
authored
[GGUF] typed metadata (#1649)
### Description Enhance GGUF functionality by adding typedMetadata support This update introduces typedMetadata to the gguf function, allowing users to request structured metadata alongside the standard output. The implementation includes checks for both V1 and V2 file formats, ensuring compatibility and consistency in metadata retrieval. Additionally, tests have been added to validate the new functionality and ensure that metadata values align correctly between standard and typed formats. ### Usage ```ts import { GGMLQuantizationType, GGUFValueType, gguf } from "@huggingface/gguf"; const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf"; const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true }); console.log(typedMetadata); // { // version: { value: 2, type: GGUFValueType.UINT32 }, // tensor_count: { value: 291n, type: GGUFValueType.UINT64 }, // kv_count: { value: 19n, type: GGUFValueType.UINT64 }, // "general.architecture": { value: "llama", type: GGUFValueType.STRING }, // "general.file_type": { value: 10, type: GGUFValueType.UINT32 }, // "general.name": { value: "LLaMA v2", type: GGUFValueType.STRING }, // "llama.attention.head_count": { value: 32, type: GGUFValueType.UINT32 }, // "llama.attention.layer_norm_rms_epsilon": { value: 9.999999974752427e-7, type: GGUFValueType.FLOAT32 }, // "tokenizer.ggml.tokens": { value: ["<unk>", "<s>", "</s>", ...], type: GGUFValueType.ARRAY }, // ... // } // Access both value and type information console.log(typedMetadata["general.architecture"].value); // "llama" console.log(typedMetadata["general.architecture"].type); // GGUFValueType.STRING (8) ```
1 parent 38d21de commit ca155c1

File tree

4 files changed

+301
-12
lines changed

4 files changed

+301
-12
lines changed

packages/gguf/README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,41 @@ const { metadata, tensorInfos } = await gguf(
6868
);
6969
```
7070

71+
### Typed metadata
72+
73+
You can get metadata with type information by setting `typedMetadata: true`. This provides both the original value and its GGUF data type:
74+
75+
```ts
76+
import { GGMLQuantizationType, GGUFValueType, gguf } from "@huggingface/gguf";
77+
78+
const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
79+
80+
const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
81+
82+
console.log(typedMetadata);
83+
// {
84+
// version: { value: 2, type: GGUFValueType.UINT32 },
85+
// tensor_count: { value: 291n, type: GGUFValueType.UINT64 },
86+
// kv_count: { value: 19n, type: GGUFValueType.UINT64 },
87+
// "general.architecture": { value: "llama", type: GGUFValueType.STRING },
88+
// "general.file_type": { value: 10, type: GGUFValueType.UINT32 },
89+
// "general.name": { value: "LLaMA v2", type: GGUFValueType.STRING },
90+
// "llama.attention.head_count": { value: 32, type: GGUFValueType.UINT32 },
91+
// "llama.attention.layer_norm_rms_epsilon": { value: 9.999999974752427e-7, type: GGUFValueType.FLOAT32 },
92+
// "tokenizer.ggml.tokens": { value: ["<unk>", "<s>", "</s>", ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.STRING },
93+
// "tokenizer.ggml.scores": { value: [0.0, -1000.0, -1000.0, ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.FLOAT32 },
94+
// ...
95+
// }
96+
97+
// Access both value and type information
98+
console.log(typedMetadata["general.architecture"].value); // "llama"
99+
console.log(typedMetadata["general.architecture"].type); // GGUFValueType.STRING (8)
100+
101+
// For arrays, subType indicates the type of array elements
102+
console.log(typedMetadata["tokenizer.ggml.tokens"].type); // GGUFValueType.ARRAY (9)
103+
console.log(typedMetadata["tokenizer.ggml.tokens"].subType); // GGUFValueType.STRING (8)
104+
```
105+
71106
### Strictly typed
72107

73108
By default, known fields in `metadata` are typed. This includes various fields found in [llama.cpp](https://github.com/ggerganov/llama.cpp), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [ggml](https://github.com/ggerganov/ggml).

packages/gguf/src/gguf.spec.ts

Lines changed: 150 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import { beforeAll, describe, expect, it } from "vitest";
2-
import type { GGUFParseOutput } from "./gguf";
2+
import type { GGUFParseOutput, MetadataValue } from "./gguf";
33
import {
44
GGMLFileQuantizationType,
55
GGMLQuantizationType,
6+
GGUFValueType,
67
gguf,
78
ggufAllShards,
89
parseGgufShardFilename,
@@ -325,4 +326,152 @@ describe("gguf", () => {
325326
nearestQuant = findNearestQuantType(GGMLFileQuantizationType.F16, visionQuants);
326327
expect(nearestQuant).toEqual(GGMLFileQuantizationType.F16);
327328
});
329+
330+
it("should not return typedMetadata by default", async () => {
331+
const result = await gguf(URL_LLAMA);
332+
expect(result).not.toHaveProperty("typedMetadata");
333+
expect(result).toHaveProperty("metadata");
334+
expect(result).toHaveProperty("tensorInfos");
335+
expect(result).toHaveProperty("tensorDataOffset");
336+
});
337+
338+
it("should return typedMetadata when requested", async () => {
339+
const { metadata, typedMetadata, tensorInfos } = await gguf(URL_LLAMA, { typedMetadata: true });
340+
341+
// Should have both metadata and typedMetadata
342+
expect(metadata).toBeDefined();
343+
expect(typedMetadata).toBeDefined();
344+
expect(tensorInfos).toBeDefined();
345+
346+
// Basic structure checks
347+
expect(typedMetadata.version).toEqual({
348+
value: 2,
349+
type: GGUFValueType.UINT32,
350+
});
351+
expect(typedMetadata.tensor_count).toEqual({
352+
value: 291n,
353+
type: GGUFValueType.UINT64,
354+
});
355+
expect(typedMetadata.kv_count).toEqual({
356+
value: 19n,
357+
type: GGUFValueType.UINT64,
358+
});
359+
360+
// Check string metadata
361+
expect(typedMetadata["general.architecture"]).toEqual({
362+
value: "llama",
363+
type: GGUFValueType.STRING,
364+
});
365+
expect(typedMetadata["general.name"]).toEqual({
366+
value: "LLaMA v2",
367+
type: GGUFValueType.STRING,
368+
});
369+
370+
// Check numeric metadata
371+
expect(typedMetadata["general.file_type"]).toEqual({
372+
value: GGMLFileQuantizationType.Q2_K,
373+
type: GGUFValueType.UINT32,
374+
});
375+
expect(typedMetadata["llama.attention.head_count"]).toEqual({
376+
value: 32,
377+
type: GGUFValueType.UINT32,
378+
});
379+
380+
// Check float metadata
381+
expect(typedMetadata["llama.attention.layer_norm_rms_epsilon"]).toEqual({
382+
value: 9.999999974752427e-7,
383+
type: GGUFValueType.FLOAT32,
384+
});
385+
});
386+
387+
it("should return typedMetadata with parameter count", async () => {
388+
const { metadata, typedMetadata, tensorInfos, parameterCount } = await gguf(URL_LLAMA, {
389+
typedMetadata: true,
390+
computeParametersCount: true,
391+
});
392+
393+
expect(metadata).toBeDefined();
394+
expect(typedMetadata).toBeDefined();
395+
expect(tensorInfos).toBeDefined();
396+
expect(parameterCount).toEqual(6_738_415_616);
397+
398+
// Verify typedMetadata structure is still correct
399+
expect(typedMetadata.version).toEqual({
400+
value: 2,
401+
type: GGUFValueType.UINT32,
402+
});
403+
expect(typedMetadata["general.architecture"]).toEqual({
404+
value: "llama",
405+
type: GGUFValueType.STRING,
406+
});
407+
});
408+
409+
it("should handle typedMetadata for V1 files", async () => {
410+
const { typedMetadata } = await gguf(URL_V1, { typedMetadata: true });
411+
412+
// V1 files use UINT32 for counts instead of UINT64
413+
expect(typedMetadata.version).toEqual({
414+
value: 1,
415+
type: GGUFValueType.UINT32,
416+
});
417+
expect(typedMetadata.tensor_count).toEqual({
418+
value: 48n,
419+
type: GGUFValueType.UINT32,
420+
});
421+
expect(typedMetadata.kv_count).toEqual({
422+
value: 18n,
423+
type: GGUFValueType.UINT32,
424+
});
425+
426+
// Check other fields are properly typed
427+
expect(typedMetadata["general.architecture"]).toEqual({
428+
value: "llama",
429+
type: GGUFValueType.STRING,
430+
});
431+
expect(typedMetadata["llama.attention.head_count"]).toEqual({
432+
value: 8,
433+
type: GGUFValueType.UINT32,
434+
});
435+
});
436+
437+
it("should handle array metadata types in typedMetadata", async () => {
438+
const { typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
439+
440+
// Check if tokens array is properly handled
441+
if (typedMetadata["tokenizer.ggml.tokens"]) {
442+
expect(typedMetadata["tokenizer.ggml.tokens"].type).toEqual(GGUFValueType.ARRAY);
443+
expect(typedMetadata["tokenizer.ggml.tokens"].subType).toEqual(GGUFValueType.STRING);
444+
expect(Array.isArray(typedMetadata["tokenizer.ggml.tokens"].value)).toBe(true);
445+
}
446+
447+
// Check if scores array is properly handled
448+
if (typedMetadata["tokenizer.ggml.scores"]) {
449+
expect(typedMetadata["tokenizer.ggml.scores"].type).toEqual(GGUFValueType.ARRAY);
450+
expect(typedMetadata["tokenizer.ggml.scores"].subType).toEqual(GGUFValueType.FLOAT32);
451+
expect(Array.isArray(typedMetadata["tokenizer.ggml.scores"].value)).toBe(true);
452+
}
453+
454+
// Check if token_type array is properly handled
455+
if (typedMetadata["tokenizer.ggml.token_type"]) {
456+
expect(typedMetadata["tokenizer.ggml.token_type"].type).toEqual(GGUFValueType.ARRAY);
457+
expect(typedMetadata["tokenizer.ggml.token_type"].subType).toEqual(GGUFValueType.INT32);
458+
expect(Array.isArray(typedMetadata["tokenizer.ggml.token_type"].value)).toBe(true);
459+
}
460+
});
461+
462+
it("should maintain consistency between metadata and typedMetadata values", async () => {
463+
const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
464+
465+
// All keys should be present in both
466+
const metadataKeys = Object.keys(metadata);
467+
const typedMetadataKeys = Object.keys(typedMetadata);
468+
469+
expect(metadataKeys.sort()).toEqual(typedMetadataKeys.sort());
470+
471+
// Values should match for all keys
472+
const metadataAsRecord = metadata as Record<string, MetadataValue>;
473+
for (const key of metadataKeys) {
474+
expect(typedMetadata[key].value).toEqual(metadataAsRecord[key]);
475+
}
476+
});
328477
});

packages/gguf/src/gguf.ts

Lines changed: 88 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
1+
import type { MetadataValue, Version, GGUFMetadata, GGUFTypedMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
22
import { GGUFValueType } from "./types";
33
import { isBackend } from "./utils/isBackend";
44
import { promisesQueue } from "./utils/promisesQueue";
@@ -8,6 +8,7 @@ export type {
88
MetadataValue,
99
Version,
1010
GGUFMetadata,
11+
GGUFTypedMetadata,
1112
GGUFTensorInfo,
1213
GGUFParseOutput,
1314
GGUFMetadataOptions,
@@ -245,9 +246,25 @@ function readMetadataValue(
245246
export async function gguf(
246247
uri: string,
247248
params: {
248-
/**
249-
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
250-
*/
249+
fetch?: typeof fetch;
250+
additionalFetchHeaders?: Record<string, string>;
251+
typedMetadata: true;
252+
allowLocalFile?: boolean;
253+
}
254+
): Promise<GGUFParseOutput & { typedMetadata: GGUFTypedMetadata }>;
255+
export async function gguf(
256+
uri: string,
257+
params: {
258+
fetch?: typeof fetch;
259+
additionalFetchHeaders?: Record<string, string>;
260+
typedMetadata: true;
261+
computeParametersCount: true;
262+
allowLocalFile?: boolean;
263+
}
264+
): Promise<GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata }>;
265+
export async function gguf(
266+
uri: string,
267+
params: {
251268
fetch?: typeof fetch;
252269
additionalFetchHeaders?: Record<string, string>;
253270
computeParametersCount: true;
@@ -257,9 +274,6 @@ export async function gguf(
257274
export async function gguf(
258275
uri: string,
259276
params?: {
260-
/**
261-
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
262-
*/
263277
fetch?: typeof fetch;
264278
additionalFetchHeaders?: Record<string, string>;
265279
allowLocalFile?: boolean;
@@ -273,10 +287,11 @@ export async function gguf(
273287
*/
274288
fetch?: typeof fetch;
275289
additionalFetchHeaders?: Record<string, string>;
290+
typedMetadata?: boolean;
276291
computeParametersCount?: boolean;
277292
allowLocalFile?: boolean;
278293
}
279-
): Promise<GGUFParseOutput & { parameterCount?: number }> {
294+
): Promise<GGUFParseOutput & { parameterCount?: number; typedMetadata?: GGUFTypedMetadata }> {
280295
let r: RangeView;
281296
if (isBackend) {
282297
/// On backend, we switch between remote/local file based on protocol
@@ -336,6 +351,21 @@ export async function gguf(
336351
kv_count: numKv.value,
337352
};
338353

354+
let typedMetadata: GGUFTypedMetadata | undefined;
355+
if (params?.typedMetadata) {
356+
typedMetadata = {
357+
version: { value: version, type: GGUFValueType.UINT32 },
358+
tensor_count: {
359+
value: tensorCount.value,
360+
type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
361+
},
362+
kv_count: {
363+
value: numKv.value,
364+
type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
365+
},
366+
};
367+
}
368+
339369
for (let i = 0; i < numKv.value; i++) {
340370
await r.fetchChunkIfNeeded(offset);
341371

@@ -366,6 +396,29 @@ export async function gguf(
366396
}
367397
offset += valueResult.length;
368398
metadata[keyResult.value] = valueResult.value;
399+
if (typedMetadata) {
400+
const typedEntry: {
401+
value: MetadataValue;
402+
type: GGUFValueType;
403+
subType?: GGUFValueType;
404+
} = {
405+
value: valueResult.value,
406+
type: valueType,
407+
};
408+
409+
// For arrays, read the subType (element type)
410+
if (valueType === GGUFValueType.ARRAY) {
411+
// Array type is stored at the beginning of the value data
412+
// We need to read it from the original offset (before reading the value)
413+
const arrayTypeOffset = offset - valueResult.length;
414+
const arraySubType = r.view.getUint32(arrayTypeOffset, littleEndian);
415+
if (isGGUFValueType(arraySubType)) {
416+
typedEntry.subType = arraySubType;
417+
}
418+
}
419+
420+
typedMetadata[keyResult.value] = typedEntry;
421+
}
369422
}
370423

371424
const tensorInfos: GGUFTensorInfo[] = [];
@@ -405,14 +458,38 @@ export async function gguf(
405458
const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT);
406459
const tensorDataOffset = BigInt(GGML_PAD(offset, alignment));
407460

408-
if (params?.computeParametersCount) {
461+
if (params?.computeParametersCount && params?.typedMetadata) {
462+
const parameterCount = tensorInfos
463+
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
464+
.reduce((acc, val) => acc + val, 0);
465+
466+
return {
467+
metadata,
468+
tensorInfos,
469+
tensorDataOffset,
470+
parameterCount,
471+
typedMetadata: typedMetadata as GGUFTypedMetadata,
472+
} as GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata };
473+
} else if (params?.computeParametersCount) {
409474
const parameterCount = tensorInfos
410475
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
411476
.reduce((acc, val) => acc + val, 0);
412477

413-
return { metadata, tensorInfos, tensorDataOffset, parameterCount };
478+
return {
479+
metadata,
480+
tensorInfos,
481+
tensorDataOffset,
482+
parameterCount,
483+
} as GGUFParseOutput & { parameterCount: number };
484+
} else if (params?.typedMetadata) {
485+
return {
486+
metadata,
487+
tensorInfos,
488+
tensorDataOffset,
489+
typedMetadata: typedMetadata as GGUFTypedMetadata,
490+
} as GGUFParseOutput & { typedMetadata: GGUFTypedMetadata };
414491
} else {
415-
return { metadata, tensorInfos, tensorDataOffset };
492+
return { metadata, tensorInfos, tensorDataOffset } as GGUFParseOutput;
416493
}
417494
}
418495

0 commit comments

Comments
 (0)