Skip to content

Added tokens, tokens_arr and json for offline recognizer result #936

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 44 additions & 6 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -444,14 +444,49 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
pText[text.size()] = 0;
r->text = pText;

if (!result.timestamps.empty()) {
r->timestamps = new float[result.timestamps.size()];
std::copy(result.timestamps.begin(), result.timestamps.end(),
r->timestamps);
r->count = result.timestamps.size();
// copy json
const auto &json = result.AsJsonString();
char *pJson = new char[json.size() + 1];
std::copy(json.begin(), json.end(), pJson);
pJson[json.size()] = 0;
r->json = pJson;

// copy tokens
auto count = result.tokens.size();
if (count > 0) {
size_t total_length = 0;
for (const auto &token : result.tokens) {
// +1 for the null character at the end of each token
total_length += token.size() + 1;
}

r->count = count;
// Each word ends with nullptr
char *tokens = new char[total_length]{};
char **tokens_temp = new char *[r->count];
int32_t pos = 0;
for (int32_t i = 0; i < r->count; ++i) {
tokens_temp[i] = tokens + pos;
memcpy(tokens + pos, result.tokens[i].c_str(), result.tokens[i].size());
// +1 to move past the null character
pos += result.tokens[i].size() + 1;
}
r->tokens_arr = tokens_temp;

if (!result.timestamps.empty()) {
r->timestamps = new float[r->count];
std::copy(result.timestamps.begin(), result.timestamps.end(),
r->timestamps);
} else {
r->timestamps = nullptr;
}

r->tokens = tokens;
} else {
r->timestamps = nullptr;
r->count = 0;
r->timestamps = nullptr;
r->tokens = nullptr;
r->tokens_arr = nullptr;
}

return r;
Expand All @@ -462,6 +497,9 @@ void DestroyOfflineRecognizerResult(
if (r) {
delete[] r->text;
delete[] r->timestamps;
delete[] r->tokens;
delete[] r->tokens_arr;
delete[] r->json;
delete r;
}
}
Expand Down
22 changes: 21 additions & 1 deletion sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,27 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult {

// number of entries in timestamps
int32_t count;
// TODO(fangjun): Add more fields

// Pointer to continuous memory which holds string based tokens
// which are separated by \0
const char *tokens;

// a pointer array containing the address of the first item in tokens
const char *const *tokens_arr;

/** Return a json string.
*
* The returned string contains:
* {
* "text": "The recognition result",
* "tokens": [x, x, x],
* "timestamps": [x, x, x],
* "segment": x,
* "start_time": x,
* "is_final": true|false
* }
*/
const char *json;
} SherpaOnnxOfflineRecognizerResult;

/// Get the result of the offline stream.
Expand Down
Loading