Skip to content

Commit edaac49

Browse files
authored
Merge pull request #19 from umjammer/0.0.16
0.0.16
2 parents 23fe344 + 5d05505 commit edaac49

File tree

15 files changed

+984
-163
lines changed

15 files changed

+984
-163
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Text to Speech and Speech to Text (JSAPI2) engines for Java
1616
| Cocoa | [Rococoa](https://github.com/iterate-ch/rococoa/blob/d5fdd3b884d5f044bc0b168aff66e5f52a014da8/rococoa/rococoa-contrib/src/test/java/org/rococoa/contrib/appkit/NSSpeechSynthesizerTest.java), JNA || 🚫 | 😃 | |
1717
| Open JTalk | [jtalkdll](https://github.com/rosmarinus/jtalkdll), JNA || - | 💩 | |
1818
| VoiceVox | [VOICEVOX](https://voicevox.hiroshiba.jp/), REST || - | 😃 | ずんだもん |
19+
| CoeiroInk | [CoeiroInk](https://coeiroink.com/), REST || - | 😃 | つくよみちゃん |
1920
| Gyutan (Open JTalk in Java) | [Gyutan](https://github.com/umjammer/Gyutan), Library || - | 💩 | |
2021

2122
## Install
@@ -83,7 +84,9 @@ Text to Speech and Speech to Text (JSAPI2) engines for Java
8384
* wave lipsync
8485
* https://github.com/hecomi/MMD4Mecanim-LipSync-Plugin/blob/master/Assets/LipSync/Core/LipSyncCore.cs
8586
* VoiceVox editor compatible
86-
* [CoeiroInk](https://coeiroink.com/) ... api doesn't work
87+
* ~~[CoeiroInk](https://coeiroink.com/)~~ ... ~~api doesn't work~~ ~~api is different from VoiceVox?~~ yes
88+
* https://github.com/sevenc-nanashi/coeiroink-v2-bridge 🎯
89+
* ~~https://github.com/sinsen9000/MultiSpeech~~ api is old
8790
* [LMROID](https://lmroidsoftware.wixsite.com/nhoshio)
8891
* [SHAREVOX](https://www.sharevox.app)
8992
* [http://itvoice.starfree.jp/](http://itvoice.starfree.jp/)

pom.xml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
<groupId>vavi</groupId>
1010
<artifactId>vavi-speech2</artifactId>
11-
<version>0.0.15</version>
11+
<version>0.0.16</version>
1212

1313
<name>vavi-speech2</name>
1414
<description/>
@@ -108,7 +108,6 @@
108108
<configuration>
109109
<argLine>
110110
-Djava.util.logging.config.file=${project.build.testOutputDirectory}/logging.properties
111-
-Dvavi.util.logging.VaviFormatter.extraClassMethod=sun\.util\.logging\.internal\.LoggingProviderImpl\$JULWrapper#log
112111
</argLine>
113112
<reuseForks>false</reuseForks>
114113
<trimStackTrace>false</trimStackTrace>
@@ -216,7 +215,7 @@
216215
<dependency>
217216
<groupId>com.github.umjammer.rococoa</groupId> <!-- org.rococoa / com.github.umjammer.rococoa -->
218217
<artifactId>rococoa-core</artifactId>
219-
<version>0.8.11</version>
218+
<version>0.8.12</version>
220219
</dependency>
221220

222221
<dependency>
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
/*
2+
* Copyright (c) 2024 by Naohide Sano, All rights reserved.
3+
*
4+
* Programmed by Naohide Sano
5+
*/
6+
7+
package vavi.speech.coeiroink;
8+
9+
import java.io.Closeable;
10+
import java.io.IOException;
11+
import java.io.InputStream;
12+
import java.util.ArrayList;
13+
import java.util.Arrays;
14+
import java.util.Collection;
15+
import java.util.List;
16+
import java.util.StringJoiner;
17+
import java.util.concurrent.Callable;
18+
import java.util.function.Function;
19+
import java.util.logging.Level;
20+
import java.util.logging.Logger;
21+
22+
import com.google.api.client.util.ExponentialBackOff;
23+
import com.google.gson.Gson;
24+
import com.google.gson.GsonBuilder;
25+
import jakarta.ws.rs.client.Client;
26+
import jakarta.ws.rs.client.ClientBuilder;
27+
import jakarta.ws.rs.client.Entity;
28+
import jakarta.ws.rs.client.WebTarget;
29+
import jakarta.ws.rs.core.MediaType;
30+
import jakarta.ws.rs.core.Response;
31+
import jakarta.ws.rs.core.Response.Status.Family;
32+
import vavi.speech.voicevox.VoiceVox;
33+
import vavi.util.CharNormalizerJa;
34+
import vavi.util.Debug;
35+
36+
37+
/**
38+
* CoeiroInk.
39+
*
40+
* @author <a href="mailto:umjammer@gmail.com">Naohide Sano</a> (nsano)
41+
* @version 0.00 2024-04-02 nsano initial version <br>
42+
*/
43+
public class CoeiroInk implements Closeable {
44+
45+
private static final Logger logger = Logger.getLogger(CoeiroInk.class.getName());
46+
47+
/** VoiceVox application web api */
48+
private static String url = "http://127.0.0.1:50032/";
49+
50+
/** */
51+
private static final Gson gson = new GsonBuilder().create();
52+
53+
/* */
54+
static {
55+
String url = System.getProperty("vavi.speech.coeiroink.url");
56+
if (url != null) {
57+
CoeiroInk.url = url;
58+
}
59+
}
60+
61+
/** */
62+
private final WebTarget target;
63+
64+
/** */
65+
private VoiceVox.Speaker[] speakers;
66+
67+
/** */
68+
private final Client client;
69+
70+
/** */
71+
public CoeiroInk() {
72+
try {
73+
client = ClientBuilder.newClient(); // DON'T CLOSE
74+
target = client.target(url);
75+
76+
String json = target
77+
.path("/v1/engine_info")
78+
.request()
79+
.get(String.class);
80+
EngineInfo engineInfo = gson.fromJson(json, EngineInfo.class);
81+
Debug.println(Level.FINE, "version: " + engineInfo.version);
82+
} catch (Exception e) {
83+
throw new IllegalStateException("CoeiroInk is not available at " + url, e);
84+
}
85+
}
86+
87+
@Override
88+
public void close() throws IOException {
89+
client.close();
90+
}
91+
92+
/** */
93+
public Prosody getProsody(String text) {
94+
Entity<String> entity = Entity.entity("{ \"text\": \"" + text + "\"}", MediaType.APPLICATION_JSON);
95+
String json = target
96+
.path("v1/estimate_prosody")
97+
.request()
98+
.post(entity, String.class);
99+
return gson.fromJson(json, Prosody.class);
100+
}
101+
102+
/** */
103+
public InputStream synthesize(Synthesis synthesis) {
104+
Entity<String> syntheEntity = Entity.entity(gson.toJson(synthesis), MediaType.APPLICATION_JSON);
105+
return target.path("v1/synthesis")
106+
.request()
107+
.post(syntheEntity, InputStream.class);
108+
}
109+
110+
/** */
111+
public Speaker[] getAllVoices() {
112+
String speakersJson = target
113+
.path("/v1/speakers")
114+
.request()
115+
.get(String.class);
116+
return gson.fromJson(speakersJson, Speaker[].class);
117+
}
118+
119+
//----
120+
121+
public static class EngineInfo {
122+
public String device;
123+
public String version;
124+
}
125+
126+
public static class Speaker {
127+
public String speakerName;
128+
public String speakerUuid;
129+
public static class Style {
130+
public String styleName;
131+
public int styleId;
132+
public String base64Icon;
133+
public String base64Portrait;
134+
@Override public String toString() {
135+
return new StringJoiner(", ", Style.class.getSimpleName() + "[", "]")
136+
.add("styleName='" + styleName + "'")
137+
.add("styleId=" + styleId)
138+
.toString();
139+
}
140+
}
141+
public Style[] styles;
142+
public String version;
143+
public String base64Portrait;
144+
@Override public String toString() {
145+
return new StringJoiner(", ", Speaker.class.getSimpleName() + "[", "]")
146+
.add("speakerName='" + speakerName + "'")
147+
.add("speakerUuid='" + speakerUuid + "'")
148+
.add("styles=" + Arrays.toString(styles))
149+
.add("version='" + version + "'")
150+
.toString();
151+
}
152+
}
153+
154+
public static class Detail {
155+
public String phoneme;
156+
public String hira;
157+
public int accent;
158+
159+
public Detail(String phoneme, String hira, int accent) {
160+
this.phoneme = phoneme;
161+
this.hira = hira;
162+
this.accent = accent;
163+
}
164+
}
165+
166+
public static class Prosody {
167+
public String[] plain;
168+
public Detail[][] detail;
169+
170+
@Override public String toString() {
171+
return new StringJoiner(", ", Prosody.class.getSimpleName() + "[", "]")
172+
.add("plain=" + Arrays.toString(plain))
173+
.add("detail=" + Arrays.toString(detail))
174+
.toString();
175+
}
176+
}
177+
178+
public static class Mora {
179+
public String text;
180+
public String consonant;
181+
public Integer consonant_length;
182+
public String vowel;
183+
public int vowel_length;
184+
public int pitch;
185+
186+
public Mora(String text, String consonant, Integer consonant_length, String vowel, int vowel_length, int pitch) {
187+
this.text = text;
188+
this.consonant = consonant;
189+
this.consonant_length = consonant_length;
190+
this.vowel = vowel;
191+
this.vowel_length = vowel_length;
192+
this.pitch = pitch;
193+
}
194+
}
195+
196+
public static class AccentPhrase {
197+
public Mora[] moras;
198+
public int accent;
199+
public boolean is_interrogative;
200+
public Mora pause_mora;
201+
202+
public AccentPhrase(Mora[] moras, int accent, boolean is_interrogative, Mora pause_mora) {
203+
this.moras = moras;
204+
this.accent = accent;
205+
this.is_interrogative = is_interrogative;
206+
this.pause_mora = pause_mora;
207+
}
208+
}
209+
210+
Function<Prosody, AccentPhrase[]> prosodyToAccentPhrases = prosody -> {
211+
List<AccentPhrase> result = new ArrayList<>();
212+
for (var d : prosody.detail) {
213+
int accentPosition = -1;
214+
List<Mora> _moras = new ArrayList<>();
215+
int moraIndex = -1;
216+
for (var m : d) {
217+
moraIndex++;
218+
if (m.hira.equals("、")) {
219+
result.get(result.size() - 1).pause_mora = new Mora(
220+
"、",
221+
null,
222+
null,
223+
"pau",
224+
0,
225+
0
226+
);
227+
} else {
228+
String _vowel, consonant;
229+
if (m.phoneme.contains("-")) {
230+
String[] pair = m.phoneme.split("-");
231+
consonant = pair[0];
232+
_vowel = pair[1];
233+
} else {
234+
consonant = null;
235+
_vowel = m.phoneme;
236+
}
237+
_moras.add(new Mora(
238+
CharNormalizerJa.ToKatakana.normalize(m.hira),
239+
consonant,
240+
consonant != null ? 0 : null,
241+
_vowel,
242+
0,
243+
0
244+
));
245+
if (m.accent == 1) {
246+
accentPosition = moraIndex;
247+
}
248+
}
249+
}
250+
if (_moras.isEmpty()) {
251+
continue;
252+
}
253+
result.add(new AccentPhrase(
254+
_moras.toArray(Mora[]::new),
255+
accentPosition + 1,
256+
false,
257+
null
258+
));
259+
}
260+
return result.toArray(AccentPhrase[]::new);
261+
};
262+
263+
Function<AccentPhrase[], Detail[]> accentPhrasesToProsody = accentPhrases -> {
264+
return Arrays.stream(accentPhrases).map(accentPhrase -> {
265+
List<Detail> detail = new ArrayList<>();
266+
267+
int i = 0;
268+
Arrays.stream(accentPhrase.moras).forEach(mora -> {
269+
String phoneme;
270+
if (mora.consonant != null && !mora.consonant.isEmpty()) {
271+
phoneme = mora.consonant + "-" + mora.vowel;
272+
} else {
273+
phoneme = mora.vowel;
274+
}
275+
276+
var accent = 0;
277+
if (i == accentPhrase.accent - 1 || (i != 0 && i <= accentPhrase.accent - 1)) {
278+
accent = 1;
279+
}
280+
281+
detail.add(new Detail(
282+
CharNormalizerJa.ToHiragana.normalize(mora.text),
283+
phoneme,
284+
accent
285+
));
286+
});
287+
288+
if (accentPhrase.pause_mora != null) {
289+
detail.add(new Detail(
290+
"、",
291+
"_",
292+
0
293+
));
294+
}
295+
296+
return detail;
297+
}).flatMap(Collection::stream).toArray(Detail[]::new);
298+
};
299+
300+
public static class Synthesis {
301+
public String speakerUuid;
302+
public int styleId;
303+
public String text;
304+
public Detail[][] prosodyDetail;
305+
/** 0.5 ~ 1 ~ 2 */
306+
public float speedScale = 1;
307+
/** 0 ~ 1 ~ +2 */
308+
public float volumeScale = 1;
309+
/** -0.15 ~ 0 ~ +0.15 */
310+
public float pitchScale = 0;
311+
/** 0 ~ 1 ~ +2 */
312+
public float intonationScale = 1;
313+
/** 0 ~ 0.1 ~ +1.50 */
314+
public float prePhonemeLength = 0.1f;
315+
/** 0 ~ 0.1 ~ +1.50 */
316+
public float postPhonemeLength = 0.1f;
317+
public int outputSamplingRate = 22050;
318+
}
319+
320+
public static Response execWithBackoff(Callable<Response> i) {
321+
ExponentialBackOff backoff = new ExponentialBackOff.Builder().build();
322+
323+
long delay = 0;
324+
325+
Response response;
326+
do {
327+
try {
328+
Thread.sleep(delay);
329+
330+
response = i.call();
331+
332+
if (response.getStatusInfo().getFamily() == Family.SERVER_ERROR) {
333+
logger.warning(String.format("Server error %s when accessing path %s. Delaying %dms", response.getStatus(), response.getLocation() != null ? response.getLocation().toASCIIString() : "null", delay));
334+
}
335+
336+
delay = backoff.nextBackOffMillis();
337+
} catch (Exception e) { //callable throws exception
338+
throw new RuntimeException("Client request failed", e);
339+
}
340+
341+
} while (delay != ExponentialBackOff.STOP && response.getStatusInfo().getFamily() == Family.SERVER_ERROR);
342+
343+
if (response.getStatusInfo().getFamily() == Family.SERVER_ERROR) {
344+
throw new IllegalStateException("Client request failed for " + response.getLocation().toASCIIString());
345+
}
346+
347+
return response;
348+
}
349+
}

0 commit comments

Comments
 (0)