[GGUF] typed metadata (#1649)

mishig25 · web-flow · commit ca155c16fe2a · 2025-07-24T13:47:09.000+02:00
### Description Enhance GGUF functionality by adding typedMetadata support This update introduces typedMetadata to the gguf function, allowing users to request structured metadata alongside the standard output. The implementation includes checks for both V1 and V2 file formats, ensuring compatibility and consistency in metadata retrieval. Additionally, tests have been added to validate the new functionality and ensure that metadata values align correctly between standard and typed formats. ### Usage ```ts import { GGMLQuantizationType, GGUFValueType, gguf } from "@huggingface/gguf"; const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf"; const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true }); console.log(typedMetadata); // { // version: { value: 2, type: GGUFValueType.UINT32 }, // tensor_count: { value: 291n, type: GGUFValueType.UINT64 }, // kv_count: { value: 19n, type: GGUFValueType.UINT64 }, // "general.architecture": { value: "llama", type: GGUFValueType.STRING }, // "general.file_type": { value: 10, type: GGUFValueType.UINT32 }, // "general.name": { value: "LLaMA v2", type: GGUFValueType.STRING }, // "llama.attention.head_count": { value: 32, type: GGUFValueType.UINT32 }, // "llama.attention.layer_norm_rms_epsilon": { value: 9.999999974752427e-7, type: GGUFValueType.FLOAT32 }, // "tokenizer.ggml.tokens": { value: ["<unk>", "<s>", "</s>", ...], type: GGUFValueType.ARRAY }, // ... // } // Access both value and type information console.log(typedMetadata["general.architecture"].value); // "llama" console.log(typedMetadata["general.architecture"].type); // GGUFValueType.STRING (8) ```
diff --git a/packages/gguf/README.md b/packages/gguf/README.md
@@ -68,6 +68,41 @@ const { metadata, tensorInfos } = await gguf(
 );
 ```
 
+### Typed metadata
+
+You can get metadata with type information by setting `typedMetadata: true`. This provides both the original value and its GGUF data type:
+
+```ts
+import { GGMLQuantizationType, GGUFValueType, gguf } from "@huggingface/gguf";
+
+const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
+
+const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+console.log(typedMetadata);
+// {
+//     version: { value: 2, type: GGUFValueType.UINT32 },
+//     tensor_count: { value: 291n, type: GGUFValueType.UINT64 },
+//     kv_count: { value: 19n, type: GGUFValueType.UINT64 },
+//     "general.architecture": { value: "llama", type: GGUFValueType.STRING },
+//     "general.file_type": { value: 10, type: GGUFValueType.UINT32 },
+//     "general.name": { value: "LLaMA v2", type: GGUFValueType.STRING },
+//     "llama.attention.head_count": { value: 32, type: GGUFValueType.UINT32 },
+//     "llama.attention.layer_norm_rms_epsilon": { value: 9.999999974752427e-7, type: GGUFValueType.FLOAT32 },
+//     "tokenizer.ggml.tokens": { value: ["<unk>", "<s>", "</s>", ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.STRING },
+//     "tokenizer.ggml.scores": { value: [0.0, -1000.0, -1000.0, ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.FLOAT32 },
+//     ...
+// }
+
+// Access both value and type information
+console.log(typedMetadata["general.architecture"].value); // "llama"
+console.log(typedMetadata["general.architecture"].type);  // GGUFValueType.STRING (8)
+
+// For arrays, subType indicates the type of array elements
+console.log(typedMetadata["tokenizer.ggml.tokens"].type);    // GGUFValueType.ARRAY (9)  
+console.log(typedMetadata["tokenizer.ggml.tokens"].subType); // GGUFValueType.STRING (8)
+```
+
 ### Strictly typed
 
 By default, known fields in `metadata` are typed. This includes various fields found in [llama.cpp](https://github.com/ggerganov/llama.cpp), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [ggml](https://github.com/ggerganov/ggml).
diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
@@ -1,8 +1,9 @@
 import { beforeAll, describe, expect, it } from "vitest";
-import type { GGUFParseOutput } from "./gguf";
+import type { GGUFParseOutput, MetadataValue } from "./gguf";
 import {
 	GGMLFileQuantizationType,
 	GGMLQuantizationType,
+	GGUFValueType,
 	gguf,
 	ggufAllShards,
 	parseGgufShardFilename,
@@ -325,4 +326,152 @@ describe("gguf", () => {
 		nearestQuant = findNearestQuantType(GGMLFileQuantizationType.F16, visionQuants);
 		expect(nearestQuant).toEqual(GGMLFileQuantizationType.F16);
 	});
+
+	it("should not return typedMetadata by default", async () => {
+		const result = await gguf(URL_LLAMA);
+		expect(result).not.toHaveProperty("typedMetadata");
+		expect(result).toHaveProperty("metadata");
+		expect(result).toHaveProperty("tensorInfos");
+		expect(result).toHaveProperty("tensorDataOffset");
+	});
+
+	it("should return typedMetadata when requested", async () => {
+		const { metadata, typedMetadata, tensorInfos } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// Should have both metadata and typedMetadata
+		expect(metadata).toBeDefined();
+		expect(typedMetadata).toBeDefined();
+		expect(tensorInfos).toBeDefined();
+
+		// Basic structure checks
+		expect(typedMetadata.version).toEqual({
+			value: 2,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.tensor_count).toEqual({
+			value: 291n,
+			type: GGUFValueType.UINT64,
+		});
+		expect(typedMetadata.kv_count).toEqual({
+			value: 19n,
+			type: GGUFValueType.UINT64,
+		});
+
+		// Check string metadata
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+		expect(typedMetadata["general.name"]).toEqual({
+			value: "LLaMA v2",
+			type: GGUFValueType.STRING,
+		});
+
+		// Check numeric metadata
+		expect(typedMetadata["general.file_type"]).toEqual({
+			value: GGMLFileQuantizationType.Q2_K,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata["llama.attention.head_count"]).toEqual({
+			value: 32,
+			type: GGUFValueType.UINT32,
+		});
+
+		// Check float metadata
+		expect(typedMetadata["llama.attention.layer_norm_rms_epsilon"]).toEqual({
+			value: 9.999999974752427e-7,
+			type: GGUFValueType.FLOAT32,
+		});
+	});
+
+	it("should return typedMetadata with parameter count", async () => {
+		const { metadata, typedMetadata, tensorInfos, parameterCount } = await gguf(URL_LLAMA, {
+			typedMetadata: true,
+			computeParametersCount: true,
+		});
+
+		expect(metadata).toBeDefined();
+		expect(typedMetadata).toBeDefined();
+		expect(tensorInfos).toBeDefined();
+		expect(parameterCount).toEqual(6_738_415_616);
+
+		// Verify typedMetadata structure is still correct
+		expect(typedMetadata.version).toEqual({
+			value: 2,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+	});
+
+	it("should handle typedMetadata for V1 files", async () => {
+		const { typedMetadata } = await gguf(URL_V1, { typedMetadata: true });
+
+		// V1 files use UINT32 for counts instead of UINT64
+		expect(typedMetadata.version).toEqual({
+			value: 1,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.tensor_count).toEqual({
+			value: 48n,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.kv_count).toEqual({
+			value: 18n,
+			type: GGUFValueType.UINT32,
+		});
+
+		// Check other fields are properly typed
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+		expect(typedMetadata["llama.attention.head_count"]).toEqual({
+			value: 8,
+			type: GGUFValueType.UINT32,
+		});
+	});
+
+	it("should handle array metadata types in typedMetadata", async () => {
+		const { typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// Check if tokens array is properly handled
+		if (typedMetadata["tokenizer.ggml.tokens"]) {
+			expect(typedMetadata["tokenizer.ggml.tokens"].type).toEqual(GGUFValueType.ARRAY);
+			expect(typedMetadata["tokenizer.ggml.tokens"].subType).toEqual(GGUFValueType.STRING);
+			expect(Array.isArray(typedMetadata["tokenizer.ggml.tokens"].value)).toBe(true);
+		}
+
+		// Check if scores array is properly handled
+		if (typedMetadata["tokenizer.ggml.scores"]) {
+			expect(typedMetadata["tokenizer.ggml.scores"].type).toEqual(GGUFValueType.ARRAY);
+			expect(typedMetadata["tokenizer.ggml.scores"].subType).toEqual(GGUFValueType.FLOAT32);
+			expect(Array.isArray(typedMetadata["tokenizer.ggml.scores"].value)).toBe(true);
+		}
+
+		// Check if token_type array is properly handled
+		if (typedMetadata["tokenizer.ggml.token_type"]) {
+			expect(typedMetadata["tokenizer.ggml.token_type"].type).toEqual(GGUFValueType.ARRAY);
+			expect(typedMetadata["tokenizer.ggml.token_type"].subType).toEqual(GGUFValueType.INT32);
+			expect(Array.isArray(typedMetadata["tokenizer.ggml.token_type"].value)).toBe(true);
+		}
+	});
+
+	it("should maintain consistency between metadata and typedMetadata values", async () => {
+		const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// All keys should be present in both
+		const metadataKeys = Object.keys(metadata);
+		const typedMetadataKeys = Object.keys(typedMetadata);
+
+		expect(metadataKeys.sort()).toEqual(typedMetadataKeys.sort());
+
+		// Values should match for all keys
+		const metadataAsRecord = metadata as Record<string, MetadataValue>;
+		for (const key of metadataKeys) {
+			expect(typedMetadata[key].value).toEqual(metadataAsRecord[key]);
+		}
+	});
 });
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
@@ -1,4 +1,4 @@
-import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
+import type { MetadataValue, Version, GGUFMetadata, GGUFTypedMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
 import { GGUFValueType } from "./types";
 import { isBackend } from "./utils/isBackend";
 import { promisesQueue } from "./utils/promisesQueue";
@@ -8,6 +8,7 @@ export type {
 	MetadataValue,
 	Version,
 	GGUFMetadata,
+	GGUFTypedMetadata,
 	GGUFTensorInfo,
 	GGUFParseOutput,
 	GGUFMetadataOptions,
@@ -245,9 +246,25 @@ function readMetadataValue(
 export async function gguf(
 	uri: string,
 	params: {
-		/**
-		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
-		 */
+		fetch?: typeof fetch;
+		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata: true;
+		allowLocalFile?: boolean;
+	}
+): Promise<GGUFParseOutput & { typedMetadata: GGUFTypedMetadata }>;
+export async function gguf(
+	uri: string,
+	params: {
+		fetch?: typeof fetch;
+		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata: true;
+		computeParametersCount: true;
+		allowLocalFile?: boolean;
+	}
+): Promise<GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata }>;
+export async function gguf(
+	uri: string,
+	params: {
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
 		computeParametersCount: true;
@@ -257,9 +274,6 @@ export async function gguf(
 export async function gguf(
 	uri: string,
 	params?: {
-		/**
-		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
-		 */
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
 		allowLocalFile?: boolean;
@@ -273,10 +287,11 @@ export async function gguf(
 		 */
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata?: boolean;
 		computeParametersCount?: boolean;
 		allowLocalFile?: boolean;
 	}
-): Promise<GGUFParseOutput & { parameterCount?: number }> {
+): Promise<GGUFParseOutput & { parameterCount?: number; typedMetadata?: GGUFTypedMetadata }> {
 	let r: RangeView;
 	if (isBackend) {
 		/// On backend, we switch between remote/local file based on protocol
@@ -336,6 +351,21 @@ export async function gguf(
 		kv_count: numKv.value,
 	};
 
+	let typedMetadata: GGUFTypedMetadata | undefined;
+	if (params?.typedMetadata) {
+		typedMetadata = {
+			version: { value: version, type: GGUFValueType.UINT32 },
+			tensor_count: {
+				value: tensorCount.value,
+				type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
+			},
+			kv_count: {
+				value: numKv.value,
+				type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
+			},
+		};
+	}
+
 	for (let i = 0; i < numKv.value; i++) {
 		await r.fetchChunkIfNeeded(offset);
 
@@ -366,6 +396,29 @@ export async function gguf(
 		}
 		offset += valueResult.length;
 		metadata[keyResult.value] = valueResult.value;
+		if (typedMetadata) {
+			const typedEntry: {
+				value: MetadataValue;
+				type: GGUFValueType;
+				subType?: GGUFValueType;
+			} = {
+				value: valueResult.value,
+				type: valueType,
+			};
+
+			// For arrays, read the subType (element type)
+			if (valueType === GGUFValueType.ARRAY) {
+				// Array type is stored at the beginning of the value data
+				// We need to read it from the original offset (before reading the value)
+				const arrayTypeOffset = offset - valueResult.length;
+				const arraySubType = r.view.getUint32(arrayTypeOffset, littleEndian);
+				if (isGGUFValueType(arraySubType)) {
+					typedEntry.subType = arraySubType;
+				}
+			}
+
+			typedMetadata[keyResult.value] = typedEntry;
+		}
 	}
 
 	const tensorInfos: GGUFTensorInfo[] = [];
@@ -405,14 +458,38 @@ export async function gguf(
 	const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT);
 	const tensorDataOffset = BigInt(GGML_PAD(offset, alignment));
 
-	if (params?.computeParametersCount) {
+	if (params?.computeParametersCount && params?.typedMetadata) {
+		const parameterCount = tensorInfos
+			.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
+			.reduce((acc, val) => acc + val, 0);
+
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			parameterCount,
+			typedMetadata: typedMetadata as GGUFTypedMetadata,
+		} as GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata };
+	} else if (params?.computeParametersCount) {
 		const parameterCount = tensorInfos
 			.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
 			.reduce((acc, val) => acc + val, 0);
 
-		return { metadata, tensorInfos, tensorDataOffset, parameterCount };
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			parameterCount,
+		} as GGUFParseOutput & { parameterCount: number };
+	} else if (params?.typedMetadata) {
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			typedMetadata: typedMetadata as GGUFTypedMetadata,
+		} as GGUFParseOutput & { typedMetadata: GGUFTypedMetadata };
 	} else {
-		return { metadata, tensorInfos, tensorDataOffset };
+		return { metadata, tensorInfos, tensorDataOffset } as GGUFParseOutput;
 	}
 }
 
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts