Skip to content

args, server: Simplify and fix bugs #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.14)
project("tts.cpp" C CXX)
include(CheckIncludeFileCXX)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Additional Model support will initially be added based on open source model perf
#### Requirements:

* Local GGUF format model file (see [py-gguf](./py-ggufs/README.md) for information on how to convert the hugging face models to GGUF).
* C++17 and C17
* C++23 and C11
* XCode Command Line Tools (via `xcode-select --install`) should suffice for OS X
* CMake (>=3.14)
* GGML pulled locally
Expand All @@ -60,7 +60,7 @@ We are currently [working on upstreaming some of these operations inorder to dep
#### Build:

Assuming that the above requirements are met the library and basic CLI example can be built by running the following command in the repository's base directory:
```commandline
```bash
cmake -B build
cmake --build build --config Release
```
Expand Down
15 changes: 11 additions & 4 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
# examples

include_directories(${CMAKE_CURRENT_SOURCE_DIR})
add_library(examples_common
args.cpp
args.h
args_common.cpp
args_common.h
audio_file.h
)
target_include_directories(examples_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(examples_common PUBLIC ggml tts)

if (EMSCRIPTEN)
else()
if (NOT EMSCRIPTEN)
add_subdirectory(cli)
add_subdirectory(perf_battery)
add_subdirectory(quantize)
add_subdirectory(server)
add_subdirectory(phonemize)
endif()
endif ()
76 changes: 76 additions & 0 deletions examples/args.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#include "args.h"

#include <iostream>
#include <sstream>

void arg::print_help() const {
cout << "--" << full_name;
if (*abbreviation) {
cout << " (-" << abbreviation << ")";
}
if (*description) {
cout << (required ? ":\n (REQUIRED) " : ":\n (OPTIONAL) ") << description << ".\n";
Copy link
Owner

@mmwillet mmwillet Jun 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: it is strange to have the last period added here given that all other punctuation is added by the description.

} else {
cout << (required ? " is a required parameter.\n" : " is an optional parameter.\n");
}
}

void arg::parse(span<str> & argv) {
required = false;
if (const auto bool_param{get_if<bool>(&value)}) {
*bool_param = true;
return;
}
if (argv.empty()) {
fprintf(stderr, "The option '--%s' requires an argument\n", full_name);
exit(1);
}
const str a = argv[0];
argv = argv.subspan(1);
if (const auto string_param{get_if<str>(&value)}) {
*string_param = a;
} else if (const auto int_param{get_if<int>(&value)}) {
istringstream{a} >> *int_param;
} else if (const auto float_param{get_if<float>(&value)}) {
istringstream{a} >> *float_param;
}
}

void arg_list::parse(int argc, str argv_[]) {
TTS_ASSERT(argc);
span<str> argv{argv_, static_cast<size_t>(argc)};
argv = argv.subspan(1);
while (!argv.empty()) {
str name{argv[0]};
if (*name != '-') {
fprintf(stderr, "Only named arguments are supported\n");
exit(1);
}
++name;
const map<sv, size_t> * lookup = &abbreviations;
if (*name == '-') {
++name;
lookup = &full_names;
if (name == "help"sv) {
for (const size_t i : full_names | views::values) {
args[i].print_help();
}
exit(0);
}
}
const auto found = lookup->find(sv{name});
if (found == lookup->end()) {
fprintf(stderr, "argument '%s' is not a valid argument. "
"Call '--help' for information on all valid arguments.\n", argv[0]);
exit(1);
}
argv = argv.subspan(1);
args[found->second].parse(argv);
}
for (const arg & x : args) {
if (x.required) {
fprintf(stderr, "argument '--%s' is required.\n", x.full_name);
exit(1);
}
}
}
65 changes: 65 additions & 0 deletions examples/args.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#pragma once

#include <map>
#include <thread>
#include <vector>

#include "imports.h"

/**
* Holder of one argument.
*/
class arg {
variant<bool, str, int, float> value;
bool required;

void print_help() const;

void parse(span<str> & argv);

friend class arg_list;

public:
const str full_name;
const str abbreviation;
const str description;

template <typename T>
constexpr arg(T default_value, str full_name, str abbreviation, str description, bool required = false)
: value{default_value}, required{required},
full_name{full_name}, abbreviation{abbreviation}, description{description} {
TTS_ASSERT(full_name[0] != '-');
TTS_ASSERT(abbreviation[0] != '-');
}

template <typename T>
requires is_same_v<T, bool> || is_same_v<T, str> || is_same_v<T, int> || is_same_v<T, float>
// ReSharper disable once CppNonExplicitConversionOperator // We want this to automatically cast
constexpr operator T() const { // NOLINT(*-explicit-constructor)
return get<T>(value);
}
};

class arg_list {
vector<arg> args{};
map<sv, size_t> full_names{};
map<sv, size_t> abbreviations{};

public:
void add(const arg & x) {
const size_t i{args.size()};
args.push_back(x);
TTS_ASSERT(!full_names.contains(args[i].full_name));
full_names[args[i].full_name] = i;
if (*args[i].abbreviation) {
abbreviations[args[i].abbreviation] = i;
}
}

void parse(int argc, str argv_[]);

constexpr const arg & operator [](sv full_name) const noexcept {
TTS_ASSERT(full_name[0] != '-');
return args[full_names.at(full_name)];
}
};
86 changes: 86 additions & 0 deletions examples/args_common.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include "args_common.h"

#include "tts.h"

void add_baseline_args(arg_list & args) {
// runner_from_file
args.add({"", "model-path", "mp", "The local path of the gguf model(s) to load", true});
args.add({
max(static_cast<int>(thread::hardware_concurrency()), 1), "n-threads", "nt",
"The number of CPU threads to run calculations with. Defaults to known hardware concurrency. "
"If hardware concurrency cannot be determined then it defaults to 1"
});
}

static constexpr generation_configuration default_config{};

void add_common_args(arg_list & args) {
add_baseline_args(args);
// generation_configuration
args.add({!default_config.use_cross_attn, "no-cross-attn", "ca", "Whether to not include cross attention"});
args.add({default_config.temperature, "temperature", "t", "The temperature to use when generating outputs"});
args.add({
default_config.repetition_penalty, "repetition-penalty", "r",
"The per-channel repetition penalty to be applied the sampled output of the model"
});
args.add({
default_config.top_p, "top-p", "mt",
"The sum of probabilities to sample over. Must be a value between 0.0 and 1.0. Defaults to 1.0"
});
args.add({
default_config.top_k, "topk", "tk",
"When set to an integer value greater than 0 generation uses nucleus sampling over topk nucleus size. "
"Defaults to 50"
});
args.add({
default_config.max_tokens, "max-tokens", "mt",
"The max audio tokens or token batches to generate where each represents approximates 11 ms of audio. "
"Only applied to Dia generation. If set to zero as is its default then the default max generation size. "
"Warning values under 15 are not supported"
});
args.add({
default_config.voice, "voice", "v",
"The voice to use to generate the audio. This is only used for models with voice packs"
});
add_espeak_voice_arg(args);
// runner_from_file
args.add({false, "use-metal", "m", "Whether to use metal acceleration"});
}

generation_configuration parse_generation_config(const arg_list & args) {
const generation_configuration config{
.use_cross_attn{!args["no-cross-attn"]},
.temperature{args["temperature"]},
.repetition_penalty{args["repetition-penalty"]},
.top_p{args["top-p"]},
.top_k{args["topk"]},
.max_tokens{args["max-tokens"]},
.voice{args["voice"]},
.espeak_voice_id{args["espeak-voice-id"]}
};
if (config.top_p > 1.0f || config.top_p <= 0.0f) {
fprintf(stderr, "The '--top-p' value must be between 0.0 and 1.0. It was set to '%.6f'.\n", config.top_p);
exit(1);
}
return config;
}

tts_runner * runner_from_args(const arg_list & args, const generation_configuration & config) {
return runner_from_file(args["model-path"], args["n-threads"], config, !args["use-metal"]);
}

void add_text_encoder_arg(arg_list & args) {
args.add({
"", "text-encoder-path", "tep",
"The local path of the text encoder gguf model for conditional generation"
});
}

void add_espeak_voice_arg(arg_list & args) {
args.add({
default_config.espeak_voice_id, "espeak-voice-id", "eid",
"The eSpeak voice id to use for phonemization. "
"This should only be specified when the correct eSpeak voice cannot be inferred from the Kokoro voice. "
"See MultiLanguage Configuration in the README for more info"
});
}
13 changes: 13 additions & 0 deletions examples/args_common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include "args.h"
#include "common.h"

void add_baseline_args(arg_list & args);
void add_common_args(arg_list & args);

generation_configuration parse_generation_config(const arg_list & args);
tts_runner * runner_from_args(const arg_list & args, const generation_configuration & config);

void add_text_encoder_arg(arg_list & args);
void add_espeak_voice_arg(arg_list & args);
File renamed without changes.
2 changes: 1 addition & 1 deletion examples/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ if (SDL2_FOUND)
set_source_files_properties(playback.cpp PROPERTIES COMPILE_FLAGS -DSDL2_INSTALL=1)
endif()

target_link_libraries(${TARGET} PRIVATE ggml tts)
target_link_libraries(${TARGET} PRIVATE examples_common)
Loading