Skip to content

[GGUF] typed metadata #1649

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions packages/gguf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,41 @@ const { metadata, tensorInfos } = await gguf(
);
```

### Typed metadata

You can get metadata with type information by setting `typedMetadata: true`. This provides both the original value and its GGUF data type:

```ts
import { GGMLQuantizationType, GGUFValueType, gguf } from "@huggingface/gguf";

const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";

const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });

console.log(typedMetadata);
// {
// version: { value: 2, type: GGUFValueType.UINT32 },
// tensor_count: { value: 291n, type: GGUFValueType.UINT64 },
// kv_count: { value: 19n, type: GGUFValueType.UINT64 },
// "general.architecture": { value: "llama", type: GGUFValueType.STRING },
// "general.file_type": { value: 10, type: GGUFValueType.UINT32 },
// "general.name": { value: "LLaMA v2", type: GGUFValueType.STRING },
// "llama.attention.head_count": { value: 32, type: GGUFValueType.UINT32 },
// "llama.attention.layer_norm_rms_epsilon": { value: 9.999999974752427e-7, type: GGUFValueType.FLOAT32 },
// "tokenizer.ggml.tokens": { value: ["<unk>", "<s>", "</s>", ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.STRING },
// "tokenizer.ggml.scores": { value: [0.0, -1000.0, -1000.0, ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.FLOAT32 },
// ...
// }

// Access both value and type information
console.log(typedMetadata["general.architecture"].value); // "llama"
console.log(typedMetadata["general.architecture"].type); // GGUFValueType.STRING (8)

// For arrays, subType indicates the type of array elements
console.log(typedMetadata["tokenizer.ggml.tokens"].type); // GGUFValueType.ARRAY (9)
console.log(typedMetadata["tokenizer.ggml.tokens"].subType); // GGUFValueType.STRING (8)
```

### Strictly typed

By default, known fields in `metadata` are typed. This includes various fields found in [llama.cpp](https://github.com/ggerganov/llama.cpp), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [ggml](https://github.com/ggerganov/ggml).
Expand Down
151 changes: 150 additions & 1 deletion packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { beforeAll, describe, expect, it } from "vitest";
import type { GGUFParseOutput } from "./gguf";
import type { GGUFParseOutput, MetadataValue } from "./gguf";
import {
GGMLFileQuantizationType,
GGMLQuantizationType,
GGUFValueType,
gguf,
ggufAllShards,
parseGgufShardFilename,
Expand Down Expand Up @@ -325,4 +326,152 @@ describe("gguf", () => {
nearestQuant = findNearestQuantType(GGMLFileQuantizationType.F16, visionQuants);
expect(nearestQuant).toEqual(GGMLFileQuantizationType.F16);
});

it("should not return typedMetadata by default", async () => {
const result = await gguf(URL_LLAMA);
expect(result).not.toHaveProperty("typedMetadata");
expect(result).toHaveProperty("metadata");
expect(result).toHaveProperty("tensorInfos");
expect(result).toHaveProperty("tensorDataOffset");
});

it("should return typedMetadata when requested", async () => {
const { metadata, typedMetadata, tensorInfos } = await gguf(URL_LLAMA, { typedMetadata: true });

// Should have both metadata and typedMetadata
expect(metadata).toBeDefined();
expect(typedMetadata).toBeDefined();
expect(tensorInfos).toBeDefined();

// Basic structure checks
expect(typedMetadata.version).toEqual({
value: 2,
type: GGUFValueType.UINT32,
});
expect(typedMetadata.tensor_count).toEqual({
value: 291n,
type: GGUFValueType.UINT64,
});
expect(typedMetadata.kv_count).toEqual({
value: 19n,
type: GGUFValueType.UINT64,
});

// Check string metadata
expect(typedMetadata["general.architecture"]).toEqual({
value: "llama",
type: GGUFValueType.STRING,
});
expect(typedMetadata["general.name"]).toEqual({
value: "LLaMA v2",
type: GGUFValueType.STRING,
});

// Check numeric metadata
expect(typedMetadata["general.file_type"]).toEqual({
value: GGMLFileQuantizationType.Q2_K,
type: GGUFValueType.UINT32,
});
expect(typedMetadata["llama.attention.head_count"]).toEqual({
value: 32,
type: GGUFValueType.UINT32,
});

// Check float metadata
expect(typedMetadata["llama.attention.layer_norm_rms_epsilon"]).toEqual({
value: 9.999999974752427e-7,
type: GGUFValueType.FLOAT32,
});
});

it("should return typedMetadata with parameter count", async () => {
const { metadata, typedMetadata, tensorInfos, parameterCount } = await gguf(URL_LLAMA, {
typedMetadata: true,
computeParametersCount: true,
});

expect(metadata).toBeDefined();
expect(typedMetadata).toBeDefined();
expect(tensorInfos).toBeDefined();
expect(parameterCount).toEqual(6_738_415_616);

// Verify typedMetadata structure is still correct
expect(typedMetadata.version).toEqual({
value: 2,
type: GGUFValueType.UINT32,
});
expect(typedMetadata["general.architecture"]).toEqual({
value: "llama",
type: GGUFValueType.STRING,
});
});

it("should handle typedMetadata for V1 files", async () => {
const { typedMetadata } = await gguf(URL_V1, { typedMetadata: true });

// V1 files use UINT32 for counts instead of UINT64
expect(typedMetadata.version).toEqual({
value: 1,
type: GGUFValueType.UINT32,
});
expect(typedMetadata.tensor_count).toEqual({
value: 48n,
type: GGUFValueType.UINT32,
});
expect(typedMetadata.kv_count).toEqual({
value: 18n,
type: GGUFValueType.UINT32,
});

// Check other fields are properly typed
expect(typedMetadata["general.architecture"]).toEqual({
value: "llama",
type: GGUFValueType.STRING,
});
expect(typedMetadata["llama.attention.head_count"]).toEqual({
value: 8,
type: GGUFValueType.UINT32,
});
});

it("should handle array metadata types in typedMetadata", async () => {
const { typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });

// Check if tokens array is properly handled
if (typedMetadata["tokenizer.ggml.tokens"]) {
expect(typedMetadata["tokenizer.ggml.tokens"].type).toEqual(GGUFValueType.ARRAY);
expect(typedMetadata["tokenizer.ggml.tokens"].subType).toEqual(GGUFValueType.STRING);
expect(Array.isArray(typedMetadata["tokenizer.ggml.tokens"].value)).toBe(true);
}

// Check if scores array is properly handled
if (typedMetadata["tokenizer.ggml.scores"]) {
expect(typedMetadata["tokenizer.ggml.scores"].type).toEqual(GGUFValueType.ARRAY);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wondering, should we now separate GGUFValueType.ARRAY into GGUFValueType.ARRAY_INT32, GGUFValueType.ARRAY_STRING, etc ?

It will come in handy when we want to distinguish among array of uint, int or float

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that would be diverging from the gguf spec, no ?

https://github.com/ggml-org/ggml/blob/master/docs/gguf.md?plain=1#L191

there is no ARRAY_STRING or ARRAY_INT32 in enum gguf_metadata_value_type

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pushed 0ba56b1

if type is array, there will be property subType as well

//     "tokenizer.ggml.tokens": { value: ["<unk>", "<s>", "</s>", ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.STRING },
//     "tokenizer.ggml.scores": { value: [0.0, -1000.0, -1000.0, ...], type: GGUFValueType.ARRAY, subType: GGUFValueType.FLOAT32 },

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would keep things simple personally but 🤷

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes subType would work too!

This is necessary because we need to reconstruct the array with the original type. Otherwise GGUF will fail to load if the element type mismatched (which can be the case for float/uint/int)

expect(typedMetadata["tokenizer.ggml.scores"].subType).toEqual(GGUFValueType.FLOAT32);
expect(Array.isArray(typedMetadata["tokenizer.ggml.scores"].value)).toBe(true);
}

// Check if token_type array is properly handled
if (typedMetadata["tokenizer.ggml.token_type"]) {
expect(typedMetadata["tokenizer.ggml.token_type"].type).toEqual(GGUFValueType.ARRAY);
expect(typedMetadata["tokenizer.ggml.token_type"].subType).toEqual(GGUFValueType.INT32);
expect(Array.isArray(typedMetadata["tokenizer.ggml.token_type"].value)).toBe(true);
}
});

it("should maintain consistency between metadata and typedMetadata values", async () => {
const { metadata, typedMetadata } = await gguf(URL_LLAMA, { typedMetadata: true });

// All keys should be present in both
const metadataKeys = Object.keys(metadata);
const typedMetadataKeys = Object.keys(typedMetadata);

expect(metadataKeys.sort()).toEqual(typedMetadataKeys.sort());

// Values should match for all keys
const metadataAsRecord = metadata as Record<string, MetadataValue>;
for (const key of metadataKeys) {
expect(typedMetadata[key].value).toEqual(metadataAsRecord[key]);
}
});
});
99 changes: 88 additions & 11 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
import type { MetadataValue, Version, GGUFMetadata, GGUFTypedMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
import { GGUFValueType } from "./types";
import { isBackend } from "./utils/isBackend";
import { promisesQueue } from "./utils/promisesQueue";
Expand All @@ -8,6 +8,7 @@ export type {
MetadataValue,
Version,
GGUFMetadata,
GGUFTypedMetadata,
GGUFTensorInfo,
GGUFParseOutput,
GGUFMetadataOptions,
Expand Down Expand Up @@ -245,9 +246,25 @@ function readMetadataValue(
export async function gguf(
uri: string,
params: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
typedMetadata: true;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput & { typedMetadata: GGUFTypedMetadata }>;
export async function gguf(
uri: string,
params: {
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
typedMetadata: true;
computeParametersCount: true;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata }>;
export async function gguf(
uri: string,
params: {
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount: true;
Expand All @@ -257,9 +274,6 @@ export async function gguf(
export async function gguf(
uri: string,
params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
allowLocalFile?: boolean;
Expand All @@ -273,10 +287,11 @@ export async function gguf(
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
typedMetadata?: boolean;
computeParametersCount?: boolean;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput & { parameterCount?: number }> {
): Promise<GGUFParseOutput & { parameterCount?: number; typedMetadata?: GGUFTypedMetadata }> {
let r: RangeView;
if (isBackend) {
/// On backend, we switch between remote/local file based on protocol
Expand Down Expand Up @@ -336,6 +351,21 @@ export async function gguf(
kv_count: numKv.value,
};

let typedMetadata: GGUFTypedMetadata | undefined;
if (params?.typedMetadata) {
typedMetadata = {
version: { value: version, type: GGUFValueType.UINT32 },
tensor_count: {
value: tensorCount.value,
type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
},
kv_count: {
value: numKv.value,
type: version === 1 ? GGUFValueType.UINT32 : GGUFValueType.UINT64,
},
};
}

for (let i = 0; i < numKv.value; i++) {
await r.fetchChunkIfNeeded(offset);

Expand Down Expand Up @@ -366,6 +396,29 @@ export async function gguf(
}
offset += valueResult.length;
metadata[keyResult.value] = valueResult.value;
if (typedMetadata) {
const typedEntry: {
value: MetadataValue;
type: GGUFValueType;
subType?: GGUFValueType;
} = {
value: valueResult.value,
type: valueType,
};

// For arrays, read the subType (element type)
if (valueType === GGUFValueType.ARRAY) {
// Array type is stored at the beginning of the value data
// We need to read it from the original offset (before reading the value)
const arrayTypeOffset = offset - valueResult.length;
const arraySubType = r.view.getUint32(arrayTypeOffset, littleEndian);
if (isGGUFValueType(arraySubType)) {
typedEntry.subType = arraySubType;
}
}

typedMetadata[keyResult.value] = typedEntry;
}
}

const tensorInfos: GGUFTensorInfo[] = [];
Expand Down Expand Up @@ -405,14 +458,38 @@ export async function gguf(
const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT);
const tensorDataOffset = BigInt(GGML_PAD(offset, alignment));

if (params?.computeParametersCount) {
if (params?.computeParametersCount && params?.typedMetadata) {
const parameterCount = tensorInfos
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
.reduce((acc, val) => acc + val, 0);

return {
metadata,
tensorInfos,
tensorDataOffset,
parameterCount,
typedMetadata: typedMetadata as GGUFTypedMetadata,
} as GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata };
} else if (params?.computeParametersCount) {
const parameterCount = tensorInfos
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
.reduce((acc, val) => acc + val, 0);

return { metadata, tensorInfos, tensorDataOffset, parameterCount };
return {
metadata,
tensorInfos,
tensorDataOffset,
parameterCount,
} as GGUFParseOutput & { parameterCount: number };
} else if (params?.typedMetadata) {
return {
metadata,
tensorInfos,
tensorDataOffset,
typedMetadata: typedMetadata as GGUFTypedMetadata,
} as GGUFParseOutput & { typedMetadata: GGUFTypedMetadata };
} else {
return { metadata, tensorInfos, tensorDataOffset };
return { metadata, tensorInfos, tensorDataOffset } as GGUFParseOutput;
}
}

Expand Down
Loading