huggingface · mishig25 · Jul 24, 2025 · Jul 23, 2025 · Jul 23, 2025 · Jul 24, 2025
@@ -68,6 +68,41 @@ const { metadata, tensorInfos } = await gguf(
 );
 ```
 
+### Typed metadata
+
+You can get metadata with type information by setting `typedMetadata: true`. This provides both the original value and its GGUF data type:
+
+```ts
+import { GGMLQuantizationType, GGUFValueType, gguf } from "@huggingface/gguf";
+
+const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
+
+const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+console.log(typedMetadata);
+// {
+//     version: { value: 2, type: GGUFValueType.UINT32 },
+//     tensor_count: { value: 291n, type: GGUFValueType.UINT64 },
+//     kv_count: { value: 19n, type: GGUFValueType.UINT64 },
+//     "general.architecture": { value: "llama", type: GGUFValueType.STRING },
+//     "general.file_type": { value: 10, type: GGUFValueType.UINT32 },
+//     "general.name": { value: "LLaMA v2", type: GGUFValueType.STRING },
+//     "llama.attention.head_count": { value: 32, type: GGUFValueType.UINT32 },
+//     "llama.attention.layer_norm_rms_epsilon": { value: 9.999999974752427e-7, type: GGUFValueType.FLOAT32 },
+//     "tokenizer.ggml.tokens": { value: ["<unk>", "<s>", "</s>", ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.STRING },
+//     "tokenizer.ggml.scores": { value: [0.0, -1000.0, -1000.0, ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.FLOAT32 },
+//     ...
+// }
+
+// Access both value and type information
+console.log(typedMetadata["general.architecture"].value); // "llama"
+console.log(typedMetadata["general.architecture"].type);  // GGUFValueType.STRING (8)
+
+// For arrays, subType indicates the type of array elements
+console.log(typedMetadata["tokenizer.ggml.tokens"].type);    // GGUFValueType.ARRAY (9)  
+console.log(typedMetadata["tokenizer.ggml.tokens"].subType); // GGUFValueType.STRING (8)
+```
+
 ### Strictly typed
 
 By default, known fields in `metadata` are typed. This includes various fields found in [llama.cpp](https://github.com/ggerganov/llama.cpp), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [ggml](https://github.com/ggerganov/ggml).

@@ -1,8 +1,9 @@
 import { beforeAll, describe, expect, it } from "vitest";
-import type { GGUFParseOutput } from "./gguf";
+import type { GGUFParseOutput, MetadataValue } from "./gguf";
 import {
 	GGMLFileQuantizationType,
 	GGMLQuantizationType,
+	GGUFValueType,
 	gguf,
 	ggufAllShards,
 	parseGgufShardFilename,
@@ -325,4 +326,152 @@ describe("gguf", () => {
 		nearestQuant = findNearestQuantType(GGMLFileQuantizationType.F16, visionQuants);
 		expect(nearestQuant).toEqual(GGMLFileQuantizationType.F16);
 	});
+
+	it("should not return typedMetadata by default", async () => {
+		const result = await gguf(URL_LLAMA);
+		expect(result).not.toHaveProperty("typedMetadata");
+		expect(result).toHaveProperty("metadata");
+		expect(result).toHaveProperty("tensorInfos");
+		expect(result).toHaveProperty("tensorDataOffset");
+	});
+
+	it("should return typedMetadata when requested", async () => {
+		const { metadata, typedMetadata, tensorInfos } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// Should have both metadata and typedMetadata
+		expect(metadata).toBeDefined();
+		expect(typedMetadata).toBeDefined();
+		expect(tensorInfos).toBeDefined();
+
+		// Basic structure checks
+		expect(typedMetadata.version).toEqual({
+			value: 2,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.tensor_count).toEqual({
+			value: 291n,
+			type: GGUFValueType.UINT64,
+		});
+		expect(typedMetadata.kv_count).toEqual({
+			value: 19n,
+			type: GGUFValueType.UINT64,
+		});
+
+		// Check string metadata
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+		expect(typedMetadata["general.name"]).toEqual({
+			value: "LLaMA v2",
+			type: GGUFValueType.STRING,
+		});
+
+		// Check numeric metadata
+		expect(typedMetadata["general.file_type"]).toEqual({
+			value: GGMLFileQuantizationType.Q2_K,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata["llama.attention.head_count"]).toEqual({
+			value: 32,
+			type: GGUFValueType.UINT32,
+		});
+
+		// Check float metadata
+		expect(typedMetadata["llama.attention.layer_norm_rms_epsilon"]).toEqual({
+			value: 9.999999974752427e-7,
+			type: GGUFValueType.FLOAT32,
+		});
+	});
+
+	it("should return typedMetadata with parameter count", async () => {
+		const { metadata, typedMetadata, tensorInfos, parameterCount } = await gguf(URL_LLAMA, {
+			typedMetadata: true,
+			computeParametersCount: true,
+		});
+
+		expect(metadata).toBeDefined();
+		expect(typedMetadata).toBeDefined();
+		expect(tensorInfos).toBeDefined();
+		expect(parameterCount).toEqual(6_738_415_616);
+
+		// Verify typedMetadata structure is still correct
+		expect(typedMetadata.version).toEqual({
+			value: 2,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+	});
+
+	it("should handle typedMetadata for V1 files", async () => {
+		const { typedMetadata } = await gguf(URL_V1, { typedMetadata: true });
+
+		// V1 files use UINT32 for counts instead of UINT64
+		expect(typedMetadata.version).toEqual({
+			value: 1,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.tensor_count).toEqual({
+			value: 48n,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.kv_count).toEqual({
+			value: 18n,
+			type: GGUFValueType.UINT32,
+		});
+
+		// Check other fields are properly typed
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+		expect(typedMetadata["llama.attention.head_count"]).toEqual({
+			value: 8,
+			type: GGUFValueType.UINT32,
+		});
+	});
+
+	it("should handle array metadata types in typedMetadata", async () => {
+		const { typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// Check if tokens array is properly handled
+		if (typedMetadata["tokenizer.ggml.tokens"]) {
+			expect(typedMetadata["tokenizer.ggml.tokens"].type).toEqual(GGUFValueType.ARRAY);
+			expect(typedMetadata["tokenizer.ggml.tokens"].subType).toEqual(GGUFValueType.STRING);
+			expect(Array.isArray(typedMetadata["tokenizer.ggml.tokens"].value)).toBe(true);
+		}
+
+		// Check if scores array is properly handled
+		if (typedMetadata["tokenizer.ggml.scores"]) {
+			expect(typedMetadata["tokenizer.ggml.scores"].type).toEqual(GGUFValueType.ARRAY);
+			expect(typedMetadata["tokenizer.ggml.scores"].subType).toEqual(GGUFValueType.FLOAT32);
+			expect(Array.isArray(typedMetadata["tokenizer.ggml.scores"].value)).toBe(true);
+		}
+
+		// Check if token_type array is properly handled
+		if (typedMetadata["tokenizer.ggml.token_type"]) {
+			expect(typedMetadata["tokenizer.ggml.token_type"].type).toEqual(GGUFValueType.ARRAY);
+			expect(typedMetadata["tokenizer.ggml.token_type"].subType).toEqual(GGUFValueType.INT32);
+			expect(Array.isArray(typedMetadata["tokenizer.ggml.token_type"].value)).toBe(true);
+		}
+	});
+
+	it("should maintain consistency between metadata and typedMetadata values", async () => {
+		const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// All keys should be present in both
+		const metadataKeys = Object.keys(metadata);
+		const typedMetadataKeys = Object.keys(typedMetadata);
+
+		expect(metadataKeys.sort()).toEqual(typedMetadataKeys.sort());
+
+		// Values should match for all keys
+		const metadataAsRecord = metadata as Record<string, MetadataValue>;
+		for (const key of metadataKeys) {
+			expect(typedMetadata[key].value).toEqual(metadataAsRecord[key]);
+		}
+	});
 });
@@ -1,4 +1,4 @@
-import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
+import type { MetadataValue, Version, GGUFMetadata, GGUFTypedMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
 import { GGUFValueType } from "./types";
 import { isBackend } from "./utils/isBackend";
 import { promisesQueue } from "./utils/promisesQueue";
@@ -8,6 +8,7 @@ export type {
 	MetadataValue,
 	Version,
 	GGUFMetadata,
+	GGUFTypedMetadata,
 	GGUFTensorInfo,
 	GGUFParseOutput,
 	GGUFMetadataOptions,
@@ -245,9 +246,25 @@ function readMetadataValue(
 export async function gguf(
 	uri: string,
 	params: {
-		/**
-		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
-		 */
+		fetch?: typeof fetch;
+		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata: true;
+		allowLocalFile?: boolean;
+	}
+): Promise<GGUFParseOutput & { typedMetadata: GGUFTypedMetadata }>;
+export async function gguf(
+	uri: string,
+	params: {
+		fetch?: typeof fetch;
+		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata: true;
+		computeParametersCount: true;
+		allowLocalFile?: boolean;
+	}
+): Promise<GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata }>;
+export async function gguf(
+	uri: string,
+	params: {
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
 		computeParametersCount: true;
@@ -257,9 +274,6 @@ export async function gguf(
 export async function gguf(
 	uri: string,
 	params?: {
-		/**
-		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
-		 */
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
 		allowLocalFile?: boolean;
@@ -273,10 +287,11 @@ export async function gguf(
 		 */
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata?: boolean;
 		computeParametersCount?: boolean;
 		allowLocalFile?: boolean;
 	}
-): Promise<GGUFParseOutput & { parameterCount?: number }> {
+): Promise<GGUFParseOutput & { parameterCount?: number; typedMetadata?: GGUFTypedMetadata }> {
 	let r: RangeView;
 	if (isBackend) {
 		/// On backend, we switch between remote/local file based on protocol
@@ -336,6 +351,21 @@ export async function gguf(
 		kv_count: numKv.value,
 	};
 
+	let typedMetadata: GGUFTypedMetadata | undefined;
+	if (params?.typedMetadata) {
+		typedMetadata = {
+			version: { value: version, type: GGUFValueType.UINT32 },
+			tensor_count: {
+				value: tensorCount.value,
+				type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
+			},
+			kv_count: {
+				value: numKv.value,
+				type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
+			},
+		};
+	}
+
 	for (let i = 0; i < numKv.value; i++) {
 		await r.fetchChunkIfNeeded(offset);
 
@@ -366,6 +396,29 @@ export async function gguf(
 		}
 		offset += valueResult.length;
 		metadata[keyResult.value] = valueResult.value;
+		if (typedMetadata) {
+			const typedEntry: {
+				value: MetadataValue;
+				type: GGUFValueType;
+				subType?: GGUFValueType;
+			} = {
+				value: valueResult.value,
+				type: valueType,
+			};
+
+			// For arrays, read the subType (element type)
+			if (valueType === GGUFValueType.ARRAY) {
+				// Array type is stored at the beginning of the value data
+				// We need to read it from the original offset (before reading the value)
+				const arrayTypeOffset = offset - valueResult.length;
+				const arraySubType = r.view.getUint32(arrayTypeOffset, littleEndian);
+				if (isGGUFValueType(arraySubType)) {
+					typedEntry.subType = arraySubType;
+				}
+			}
+
+			typedMetadata[keyResult.value] = typedEntry;
+		}
 	}
 
 	const tensorInfos: GGUFTensorInfo[] = [];
@@ -405,14 +458,38 @@ export async function gguf(
 	const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT);
 	const tensorDataOffset = BigInt(GGML_PAD(offset, alignment));
 
-	if (params?.computeParametersCount) {
+	if (params?.computeParametersCount && params?.typedMetadata) {
+		const parameterCount = tensorInfos
+			.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
+			.reduce((acc, val) => acc + val, 0);
+
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			parameterCount,
+			typedMetadata: typedMetadata as GGUFTypedMetadata,
+		} as GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata };
+	} else if (params?.computeParametersCount) {
 		const parameterCount = tensorInfos
 			.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
 			.reduce((acc, val) => acc + val, 0);
 
-		return { metadata, tensorInfos, tensorDataOffset, parameterCount };
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			parameterCount,
+		} as GGUFParseOutput & { parameterCount: number };
+	} else if (params?.typedMetadata) {
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			typedMetadata: typedMetadata as GGUFTypedMetadata,
+		} as GGUFParseOutput & { typedMetadata: GGUFTypedMetadata };
 	} else {
-		return { metadata, tensorInfos, tensorDataOffset };
+		return { metadata, tensorInfos, tensorDataOffset } as GGUFParseOutput;
 	}
 }