diff --git a/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs b/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs
index d76009a..ca8cd9d 100644
--- a/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs
+++ b/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs
@@ -24,6 +24,12 @@ public int CountTokens(string text)
return tokenizer?.CountTokens(text) ?? text.Length;
}
+ ///
+ public IReadOnlyList GetTokens(string text)
+ {
+ return tokenizer?.GetTokens(text) ?? [text];
+ }
+
///
public async Task GenerateEmbeddingAsync(
string text,
diff --git a/src/KernelMemory.DashScope/DashScopeTextGenerator.cs b/src/KernelMemory.DashScope/DashScopeTextGenerator.cs
index cb05242..788bac0 100644
--- a/src/KernelMemory.DashScope/DashScopeTextGenerator.cs
+++ b/src/KernelMemory.DashScope/DashScopeTextGenerator.cs
@@ -30,6 +30,12 @@ public int CountTokens(string text)
return tokenizer?.CountTokens(text) ?? QWenTokenizer.CountTokensStatic(text);
}
+ ///
+ public IReadOnlyList GetTokens(string text)
+ {
+ return tokenizer?.GetTokens(text) ?? QWenTokenizer.GetTokensStatic(text);
+ }
+
///
public async IAsyncEnumerable GenerateTextAsync(
string prompt,
diff --git a/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj b/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj
index ddbf3a2..b0f9cb1 100644
--- a/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj
+++ b/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj
@@ -19,8 +19,8 @@
-
-
+
+
diff --git a/src/KernelMemory.DashScope/LengthTokenizer.cs b/src/KernelMemory.DashScope/LengthTokenizer.cs
index 88aafe0..dd48306 100644
--- a/src/KernelMemory.DashScope/LengthTokenizer.cs
+++ b/src/KernelMemory.DashScope/LengthTokenizer.cs
@@ -12,4 +12,10 @@ public int CountTokens(string text)
{
return text.Length;
}
+
+ ///
+ public IReadOnlyList GetTokens(string text)
+ {
+ return text.Select(x => $"{x}").ToList();
+ }
}
diff --git a/src/KernelMemory.DashScope/QWenTokenizer.cs b/src/KernelMemory.DashScope/QWenTokenizer.cs
index 36dd0f8..bf7bc16 100644
--- a/src/KernelMemory.DashScope/QWenTokenizer.cs
+++ b/src/KernelMemory.DashScope/QWenTokenizer.cs
@@ -13,7 +13,6 @@ public class QWenTokenizer : ITextTokenizer
.Concat(Enumerable.Range(0, 205).Select(x => $"<|extra_{x}|>"))
.Select((x, i) => new KeyValuePair(x, 151643 + i))
.ToDictionary();
-
private static readonly ITokenizer Tokenizer = TokenizerBuilder.CreateTokenizer(
DashScopeEmbeddedResource.ReadBpeFile(),
SpecialTokens,
@@ -45,6 +44,12 @@ public int CountTokens(string text)
return Tokenizer.Encode(text).Count;
}
+ ///
+ public IReadOnlyList GetTokens(string text)
+ {
+ return Tokenizer.Encode(text).Select(x => Tokenizer.Decode([x])).ToList();
+ }
+
///
/// Count tokens.
///
@@ -54,4 +59,14 @@ public static int CountTokensStatic(string text)
{
return Tokenizer.Encode(text).Count;
}
+
+ ///
+ /// Get tokens
+ ///
+ /// The text to tokenizers.
+ ///
+ public static IReadOnlyList GetTokensStatic(string text)
+ {
+ return Tokenizer.Encode(text).Select(x => Tokenizer.Decode([x])).ToList();
+ }
}
diff --git a/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj b/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj
index 1410f1d..a203fdf 100644
--- a/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj
+++ b/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj
@@ -19,9 +19,9 @@
-
-
-
+
+
+
diff --git a/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj b/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj
index d773f79..7a7872c 100644
--- a/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj
+++ b/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj
@@ -14,10 +14,10 @@
all
runtime; build; native; contentfiles; analyzers; buildtransitive
-
+
-
-
+
+
all
runtime; build; native; contentfiles; analyzers; buildtransitive
diff --git a/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj b/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj
index f349475..a7372f3 100644
--- a/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj
+++ b/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj
@@ -5,8 +5,8 @@
runtime; build; native; contentfiles; analyzers; buildtransitive
-
-
+
+
all
runtime; build; native; contentfiles; analyzers; buildtransitive