diff --git a/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs b/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs index d76009a..ca8cd9d 100644 --- a/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs +++ b/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs @@ -24,6 +24,12 @@ public int CountTokens(string text) return tokenizer?.CountTokens(text) ?? text.Length; } + /// + public IReadOnlyList GetTokens(string text) + { + return tokenizer?.GetTokens(text) ?? [text]; + } + /// public async Task GenerateEmbeddingAsync( string text, diff --git a/src/KernelMemory.DashScope/DashScopeTextGenerator.cs b/src/KernelMemory.DashScope/DashScopeTextGenerator.cs index cb05242..788bac0 100644 --- a/src/KernelMemory.DashScope/DashScopeTextGenerator.cs +++ b/src/KernelMemory.DashScope/DashScopeTextGenerator.cs @@ -30,6 +30,12 @@ public int CountTokens(string text) return tokenizer?.CountTokens(text) ?? QWenTokenizer.CountTokensStatic(text); } + /// + public IReadOnlyList GetTokens(string text) + { + return tokenizer?.GetTokens(text) ?? QWenTokenizer.GetTokensStatic(text); + } + /// public async IAsyncEnumerable GenerateTextAsync( string prompt, diff --git a/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj b/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj index ddbf3a2..b0f9cb1 100644 --- a/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj +++ b/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj @@ -19,8 +19,8 @@ - - + + diff --git a/src/KernelMemory.DashScope/LengthTokenizer.cs b/src/KernelMemory.DashScope/LengthTokenizer.cs index 88aafe0..dd48306 100644 --- a/src/KernelMemory.DashScope/LengthTokenizer.cs +++ b/src/KernelMemory.DashScope/LengthTokenizer.cs @@ -12,4 +12,10 @@ public int CountTokens(string text) { return text.Length; } + + /// + public IReadOnlyList GetTokens(string text) + { + return text.Select(x => $"{x}").ToList(); + } } diff --git a/src/KernelMemory.DashScope/QWenTokenizer.cs b/src/KernelMemory.DashScope/QWenTokenizer.cs index 36dd0f8..bf7bc16 100644 --- a/src/KernelMemory.DashScope/QWenTokenizer.cs +++ b/src/KernelMemory.DashScope/QWenTokenizer.cs @@ -13,7 +13,6 @@ public class QWenTokenizer : ITextTokenizer .Concat(Enumerable.Range(0, 205).Select(x => $"<|extra_{x}|>")) .Select((x, i) => new KeyValuePair(x, 151643 + i)) .ToDictionary(); - private static readonly ITokenizer Tokenizer = TokenizerBuilder.CreateTokenizer( DashScopeEmbeddedResource.ReadBpeFile(), SpecialTokens, @@ -45,6 +44,12 @@ public int CountTokens(string text) return Tokenizer.Encode(text).Count; } + /// + public IReadOnlyList GetTokens(string text) + { + return Tokenizer.Encode(text).Select(x => Tokenizer.Decode([x])).ToList(); + } + /// /// Count tokens. /// @@ -54,4 +59,14 @@ public static int CountTokensStatic(string text) { return Tokenizer.Encode(text).Count; } + + /// + /// Get tokens + /// + /// The text to tokenizers. + /// + public static IReadOnlyList GetTokensStatic(string text) + { + return Tokenizer.Encode(text).Select(x => Tokenizer.Decode([x])).ToList(); + } } diff --git a/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj b/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj index 1410f1d..a203fdf 100644 --- a/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj +++ b/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj @@ -19,9 +19,9 @@ - - - + + + diff --git a/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj b/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj index d773f79..7a7872c 100644 --- a/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj +++ b/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj @@ -14,10 +14,10 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + - - + + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj b/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj index f349475..a7372f3 100644 --- a/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj +++ b/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj @@ -5,8 +5,8 @@ runtime; build; native; contentfiles; analyzers; buildtransitive - - + + all runtime; build; native; contentfiles; analyzers; buildtransitive