[GGUF] typed metadata

mishig25 · mishig25 · commit 6416140f0d31 · 2025-07-23T23:20:07.000+02:00
diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
@@ -1,8 +1,9 @@
 import { beforeAll, describe, expect, it } from "vitest";
-import type { GGUFParseOutput } from "./gguf";
+import type { GGUFParseOutput, MetadataValue } from "./gguf";
 import {
 	GGMLFileQuantizationType,
 	GGMLQuantizationType,
+	GGUFValueType,
 	gguf,
 	ggufAllShards,
 	parseGgufShardFilename,
@@ -325,4 +326,143 @@ describe("gguf", () => {
 		nearestQuant = findNearestQuantType(GGMLFileQuantizationType.F16, visionQuants);
 		expect(nearestQuant).toEqual(GGMLFileQuantizationType.F16);
 	});
+
+	it("should not return typedMetadata by default", async () => {
+		const result = await gguf(URL_LLAMA);
+		expect(result).not.toHaveProperty("typedMetadata");
+		expect(result).toHaveProperty("metadata");
+		expect(result).toHaveProperty("tensorInfos");
+		expect(result).toHaveProperty("tensorDataOffset");
+	});
+
+	it("should return typedMetadata when requested", async () => {
+		const { metadata, typedMetadata, tensorInfos } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// Should have both metadata and typedMetadata
+		expect(metadata).toBeDefined();
+		expect(typedMetadata).toBeDefined();
+		expect(tensorInfos).toBeDefined();
+
+		// Basic structure checks
+		expect(typedMetadata.version).toEqual({
+			value: 2,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.tensor_count).toEqual({
+			value: 291n,
+			type: GGUFValueType.UINT64,
+		});
+		expect(typedMetadata.kv_count).toEqual({
+			value: 19n,
+			type: GGUFValueType.UINT64,
+		});
+
+		// Check string metadata
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+		expect(typedMetadata["general.name"]).toEqual({
+			value: "LLaMA v2",
+			type: GGUFValueType.STRING,
+		});
+
+		// Check numeric metadata
+		expect(typedMetadata["general.file_type"]).toEqual({
+			value: GGMLFileQuantizationType.Q2_K,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata["llama.attention.head_count"]).toEqual({
+			value: 32,
+			type: GGUFValueType.UINT32,
+		});
+
+		// Check float metadata
+		expect(typedMetadata["llama.attention.layer_norm_rms_epsilon"]).toEqual({
+			value: 9.999999974752427e-7,
+			type: GGUFValueType.FLOAT32,
+		});
+	});
+
+	it("should return typedMetadata with parameter count", async () => {
+		const { metadata, typedMetadata, tensorInfos, parameterCount } = await gguf(URL_LLAMA, {
+			typedMetadata: true,
+			computeParametersCount: true,
+		});
+
+		expect(metadata).toBeDefined();
+		expect(typedMetadata).toBeDefined();
+		expect(tensorInfos).toBeDefined();
+		expect(parameterCount).toEqual(6_738_415_616);
+
+		// Verify typedMetadata structure is still correct
+		expect(typedMetadata.version).toEqual({
+			value: 2,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+	});
+
+	it("should handle typedMetadata for V1 files", async () => {
+		const { typedMetadata } = await gguf(URL_V1, { typedMetadata: true });
+
+		// V1 files use UINT32 for counts instead of UINT64
+		expect(typedMetadata.version).toEqual({
+			value: 1,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.tensor_count).toEqual({
+			value: 48n,
+			type: GGUFValueType.UINT32,
+		});
+		expect(typedMetadata.kv_count).toEqual({
+			value: 18n,
+			type: GGUFValueType.UINT32,
+		});
+
+		// Check other fields are properly typed
+		expect(typedMetadata["general.architecture"]).toEqual({
+			value: "llama",
+			type: GGUFValueType.STRING,
+		});
+		expect(typedMetadata["llama.attention.head_count"]).toEqual({
+			value: 8,
+			type: GGUFValueType.UINT32,
+		});
+	});
+
+	it("should handle array metadata types in typedMetadata", async () => {
+		const { typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// Check if tokens array is properly handled
+		if (typedMetadata["tokenizer.ggml.tokens"]) {
+			expect(typedMetadata["tokenizer.ggml.tokens"].type).toEqual(GGUFValueType.ARRAY);
+			expect(Array.isArray(typedMetadata["tokenizer.ggml.tokens"].value)).toBe(true);
+		}
+
+		// Check if scores array is properly handled
+		if (typedMetadata["tokenizer.ggml.scores"]) {
+			expect(typedMetadata["tokenizer.ggml.scores"].type).toEqual(GGUFValueType.ARRAY);
+			expect(Array.isArray(typedMetadata["tokenizer.ggml.scores"].value)).toBe(true);
+		}
+	});
+
+	it("should maintain consistency between metadata and typedMetadata values", async () => {
+		const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });
+
+		// All keys should be present in both
+		const metadataKeys = Object.keys(metadata);
+		const typedMetadataKeys = Object.keys(typedMetadata);
+
+		expect(metadataKeys.sort()).toEqual(typedMetadataKeys.sort());
+
+		// Values should match for all keys
+		const metadataAsRecord = metadata as Record<string, MetadataValue>;
+		for (const key of metadataKeys) {
+			expect(typedMetadata[key].value).toEqual(metadataAsRecord[key]);
+		}
+	});
 });
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
@@ -1,4 +1,4 @@
-import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
+import type { MetadataValue, Version, GGUFMetadata, GGUFTypedMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
 import { GGUFValueType } from "./types";
 import { isBackend } from "./utils/isBackend";
 import { promisesQueue } from "./utils/promisesQueue";
@@ -8,6 +8,7 @@ export type {
 	MetadataValue,
 	Version,
 	GGUFMetadata,
+	GGUFTypedMetadata,
 	GGUFTensorInfo,
 	GGUFParseOutput,
 	GGUFMetadataOptions,
@@ -245,9 +246,25 @@ function readMetadataValue(
 export async function gguf(
 	uri: string,
 	params: {
-		/**
-		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
-		 */
+		fetch?: typeof fetch;
+		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata: true;
+		allowLocalFile?: boolean;
+	}
+): Promise<GGUFParseOutput & { typedMetadata: GGUFTypedMetadata }>;
+export async function gguf(
+	uri: string,
+	params: {
+		fetch?: typeof fetch;
+		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata: true;
+		computeParametersCount: true;
+		allowLocalFile?: boolean;
+	}
+): Promise<GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata }>;
+export async function gguf(
+	uri: string,
+	params: {
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
 		computeParametersCount: true;
@@ -257,9 +274,6 @@ export async function gguf(
 export async function gguf(
 	uri: string,
 	params?: {
-		/**
-		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
-		 */
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
 		allowLocalFile?: boolean;
@@ -273,10 +287,11 @@ export async function gguf(
 		 */
 		fetch?: typeof fetch;
 		additionalFetchHeaders?: Record<string, string>;
+		typedMetadata?: boolean;
 		computeParametersCount?: boolean;
 		allowLocalFile?: boolean;
 	}
-): Promise<GGUFParseOutput & { parameterCount?: number }> {
+): Promise<GGUFParseOutput & { parameterCount?: number; typedMetadata?: GGUFTypedMetadata }> {
 	let r: RangeView;
 	if (isBackend) {
 		/// On backend, we switch between remote/local file based on protocol
@@ -336,6 +351,21 @@ export async function gguf(
 		kv_count: numKv.value,
 	};
 
+	let typedMetadata: GGUFTypedMetadata | undefined;
+	if (params?.typedMetadata) {
+		typedMetadata = {
+			version: { value: version, type: GGUFValueType.UINT32 },
+			tensor_count: {
+				value: tensorCount.value,
+				type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
+			},
+			kv_count: {
+				value: numKv.value,
+				type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
+			},
+		};
+	}
+
 	for (let i = 0; i < numKv.value; i++) {
 		await r.fetchChunkIfNeeded(offset);
 
@@ -366,6 +396,12 @@ export async function gguf(
 		}
 		offset += valueResult.length;
 		metadata[keyResult.value] = valueResult.value;
+		if (typedMetadata) {
+			typedMetadata[keyResult.value] = {
+				value: valueResult.value,
+				type: valueType,
+			};
+		}
 	}
 
 	const tensorInfos: GGUFTensorInfo[] = [];
@@ -405,14 +441,38 @@ export async function gguf(
 	const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT);
 	const tensorDataOffset = BigInt(GGML_PAD(offset, alignment));
 
-	if (params?.computeParametersCount) {
+	if (params?.computeParametersCount && params?.typedMetadata) {
+		const parameterCount = tensorInfos
+			.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
+			.reduce((acc, val) => acc + val, 0);
+
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			parameterCount,
+			typedMetadata: typedMetadata as GGUFTypedMetadata,
+		} as GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata };
+	} else if (params?.computeParametersCount) {
 		const parameterCount = tensorInfos
 			.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
 			.reduce((acc, val) => acc + val, 0);
 
-		return { metadata, tensorInfos, tensorDataOffset, parameterCount };
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			parameterCount,
+		} as GGUFParseOutput & { parameterCount: number };
+	} else if (params?.typedMetadata) {
+		return {
+			metadata,
+			tensorInfos,
+			tensorDataOffset,
+			typedMetadata: typedMetadata as GGUFTypedMetadata,
+		} as GGUFParseOutput & { typedMetadata: GGUFTypedMetadata };
 	} else {
-		return { metadata, tensorInfos, tensorDataOffset };
+		return { metadata, tensorInfos, tensorDataOffset } as GGUFParseOutput;
 	}
 }
 
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
@@ -103,6 +103,32 @@ export type GGUFMetadata<Options extends GGUFMetadataOptions = { strict: true }>
 } & GGUFModelKV &
 	(Options extends { strict: true } ? unknown : Record<string, MetadataValue>);
 
+export type GGUFTypedMetadata = {
+	version: {
+		value: Version;
+		type: GGUFValueType.UINT32;
+	};
+	tensor_count: {
+		value: bigint;
+		type: GGUFValueType.UINT32 | GGUFValueType.UINT64;
+	};
+	kv_count: {
+		value: bigint;
+		type: GGUFValueType.UINT32 | GGUFValueType.UINT64;
+	};
+} & {
+	[K in keyof GGUFModelKV]?: {
+		value: GGUFModelKV[K];
+		type: GGUFValueType;
+	};
+} & Record<
+		string,
+		{
+			value: MetadataValue;
+			type: GGUFValueType;
+		}
+	>;
+
 export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer);
 
 export interface GGUFTensorInfo {