Adding Sanskrit support

krishnshyam · krishnshyam · commit f3a529b847b2 · 2025-10-12T20:43:01.000+05:30
- Adding support to Sanskrit in the addon and the dll
- Correcting Unicode data for Sinhala to correct consonant cluster recognition
- Adding fix to delete duplicate voices for single language
diff --git a/globalPlugins/hear2readng_global_plugin/__init__.py b/globalPlugins/hear2readng_global_plugin/__init__.py
@@ -209,7 +209,7 @@ def on_voice_update(self, lang):
     def _startup(self):
 
         if _h2r_config[SCT_General][ID_ShowStartupPopup]:
-            log.info("_start_checks: showNewUserMessage")
+            # log.info("_start_checks: showNewUserMessage")
             startupdialog = _StartupInfoDialog()
             gui.runScriptModalDialog(startupdialog,
                                      callback=self._on_startupinfo_closed)
@@ -220,7 +220,7 @@ def _on_startupinfo_closed(self, res):
         self._perform_checks()
 
     def _perform_checks(self):
-        log.info("_perform_checks")
+        # log.info("_perform_checks")
         postUpdateCheck()
         self._perform_voice_check()
         
@@ -312,7 +312,7 @@ def script_h2r_review_currentCharacter(self, gesture: inputCore.InputGesture):
 
         info.expand(textInfos.UNIT_CHARACTER)
         scriptCount = scriptHandler.getLastScriptRepeatCount()
-        log.info(f"script_review_currentCharacter interrupt: {scriptCount}, info: {info.text}")
+        # log.info(f"script_review_currentCharacter interrupt: {scriptCount}, info: {info.text}")
         
         if scriptCount == 1:
             try:
diff --git a/globalPlugins/hear2readng_global_plugin/h2rutils.py b/globalPlugins/hear2readng_global_plugin/h2rutils.py
@@ -7,6 +7,7 @@
 import sys
 import urllib.request
 from dataclasses import dataclass
+from glob import glob
 from io import StringIO
 from threading import Thread
 
@@ -53,6 +54,7 @@
                 "ne":"Nepali", 
                 "or":"Odia", 
                 "pa":"Punjabi", 
+                "sa":"Sanskrit",
                 "si":"Sinhala",
                 "ta":"Tamil", 
                 "te":"Telugu", 
@@ -376,10 +378,16 @@ def parse_server_voices(resp_str):
 
 
 def populateVoices():
-    pathName = os.path.join(H2RNG_VOICES_DIR)
+    """Checks and populates voice list based on the files present in the voice
+    directory
+
+    @return: Dictionary of voice files keyed by the iso2 code of the language
+    @rtype: dict
+    """
+    remove_duplicate_voices()
     voices = dict()
     #list all files in Language directory
-    file_list = os.listdir(pathName)
+    file_list = os.listdir(H2RNG_VOICES_DIR)
     #FIXME: the english voice is obsolete, maybe remove the voiceid?
     en_voice = EN_VOICE_ALOK
     voices[en_voice] = "English"
@@ -403,6 +411,29 @@ def populateVoices():
 
     return voices
 
+def remove_duplicate_voices():
+    """Ensures only one voice per language is present. Retains only the last
+    voice file when sorted alphabetically
+    """
+    file_list = glob("*.onnx", root_dir=H2RNG_VOICES_DIR)
+    iso2_set = set()
+    for file in file_list:
+        iso2 = file.split("-")[0]
+        iso2_set.add(iso2)
+
+    # log.info(f"Hear2Read NG: got lang list: {iso2_set}")
+
+    for iso2 in iso2_set:
+        lang_voices = sorted(glob(f"{iso2}*.onnx", root_dir=H2RNG_VOICES_DIR))
+        if len(lang_voices) > 1:
+            for f in lang_voices[:-1]:
+                json_file = f + ".json"
+                log.warn(f"Hear2Read NG: Found duplicate voice, deleting: {f}")
+                os.remove(os.path.join(H2RNG_VOICES_DIR, f))
+                if os.path.exists(os.path.join(H2RNG_VOICES_DIR, json_file)):
+                    os.remove(os.path.join(H2RNG_VOICES_DIR, json_file))
+
+
 def move_old_voices():
     """Tries to move voices downloaded in addon version 1.4 and lower to the 
     new dir structure to be usable by this addon. This is slightly different 
diff --git a/globalPlugins/hear2readng_global_plugin/voice_manager.py b/globalPlugins/hear2readng_global_plugin/voice_manager.py
@@ -16,6 +16,7 @@
 import gui
 import synthDriverHandler
 import wx
+from addonHandler import getCodeAddon
 from logHandler import log
 
 from synthDrivers._H2R_NG_Speak import H2RNG_DATA_DIR, H2RNG_VOICES_DIR
@@ -109,6 +110,16 @@ def __init__(self, parent=gui.mainFrame, title="Hear2Read Indic Voice Manager"):
         # progress dialog to show download progress
         self.progress_dialog = None
 
+        try:
+            version = getCodeAddon().manifest.version
+        except:
+            log.warn("Hear2Read NG: Unable to read manifest, assuming default version number")
+            version  = "1.7.3"
+
+        version_split = version.split(".")
+        self.major_version = version_split[0]
+        self.minor_version = version_split[1]
+
         self.get_display_voices()
 
         if not self.display_voices:
@@ -621,6 +632,13 @@ def get_display_voices(self):
         
         for key in set(self.installed_voices.keys()).union(
                                                     self.server_voices.keys()):
+            
+            # TODO: this is redundant now as it will be updated post fact. will
+            # need a file on the server informing this. Maybe move voices to a
+            # new location to prevent access by old versions?
+            if key == "sa" and self.major_version > 0 and self.minor_version > 7:
+                continue
+
             local_voice = self.installed_voices.get(key)
             server_voice = self.server_voices.get(key)
 
diff --git a/manifest.ini b/manifest.ini
@@ -1,9 +1,9 @@
 name = "Hear2ReadNG"
 summary = "Hear2Read Indic Speech Synthesizer"
-version = "1.7.3"
+version = "1.8.0"
 description = "This is a speech synthesizer for 11 Indic languages and English (with Indian accent) that generates natural human speech. It is based on the work done by the piper TTS team (https://github.com/rhasspy/piper). The addon includes a voice manager for installing one or more Indic voices. Supported languages: Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Nepali, Odia, Punjabi, Tamil, Telugu"
 author = "Hear2Read Contributers<info@Hear2Read.org>"
 url = "https://hear2read.org"
 docFileName = "README.txt"
 minimumNVDAVersion = "2022.1.0"
-lastTestedNVDAVersion = "2025.1"
+lastTestedNVDAVersion = "2025.3"
diff --git a/synthDrivers/Hear2ReadNG.py b/synthDrivers/Hear2ReadNG.py
@@ -30,6 +30,7 @@
 from synthDriverHandler import (
     SynthDriver,
     VoiceInfo,
+    getSynth,
     synthDoneSpeaking,
     synthIndexReached,
 )
@@ -89,7 +90,7 @@ class SynthDriver(SynthDriver):
 #        SynthDriver.VariantSetting(),
         SynthDriver.RateSetting(),
 #        SynthDriver.RateBoostSetting(),
-#        SynthDriver.PitchSetting(),
+        # SynthDriver.PitchSetting(),
 #        SynthDriver.InflectionSetting(),
         SynthDriver.VolumeSetting(),
     )
@@ -124,27 +125,26 @@ def __init__(self):
         if not self.check():
             return
         log.info("H2R NG: init started")
-        # confspec = {
-        #     "engSynth": "string(default='oneCore')",
-        #     "engVoice": "string(default='')",
-        #     "engVariant": "string(default='')",
-        #     "engRate": "integer(default=50)",
-        #     "engPitch": "integer(default=50)",
-        #     "engVolume": "integer(default=100)",
-        #     "engInflection": "integer(default=80)",
-        #     "showStartupMsg": "boolean(default=True)"
-        # }
-        
-        # config.conf.spec["hear2read"] = confspec
-        # config.conf.save()
 
         # Have H2R pitch be set to the engsynth value to allow PitchCommand
         # to be used for capitals
+        confspec_default = {
+            "voice": f"string(default='{_H2R_NG_Speak.en_voice}')",
+            "rate": "integer(default=50)",
+            "pitch": "integer(default=50)",
+            "volume": "integer(default=100)",
+            "capPitchChange": "integer(default=30)",
+        }
+        config.conf.spec["speech"][self.name] = confspec_default
+        config.conf.save()
+
         try:
-            config.conf["speech"][self.name]["pitch"] = _h2r_config[SCT_EngSynth][ID_EnglishSynthPitch]
+            tempPitch = _h2r_config[SCT_EngSynth][ID_EnglishSynthPitch]
+            # log.info(f"H2R NG got eng pitch: {tempPitch}, {type(tempPitch)}")
+            config.conf["speech"][self.name]["pitch"] = int(tempPitch)#_h2r_config[SCT_EngSynth][ID_EnglishSynthPitch]
         except KeyError as e:
             if self.name in str(e):
-                log.warn("Hear2Read no config found, updating default config")
+                log.warn("Hear2Read no config found, retrying default config")
                 confspec_default = {
                     "voice": f"string(default='{_H2R_NG_Speak.en_voice}')",
                     "rate": "integer(default=50)",
@@ -154,6 +154,12 @@ def __init__(self):
                 }
                 config.conf.spec["speech"][self.name] = confspec_default
                 config.conf.save()
+
+        h2rpitch = config.conf["speech"][self.name]["pitch"]
+        
+        # log.info(f"Hear2ReadNG: set pitch to: {h2rpitch}, {type(h2rpitch)}")
+
+        config.conf.save()
                 
         _H2R_NG_Speak.initialize(self._onIndexReached)
 
@@ -206,6 +212,7 @@ def speak(self, speechSequence: SpeechSequence):
         # log.info("H2R speak")
         # log.info(f"speech sequence: {speechSequence}")
         self.subsequences = []
+        self.first_subseq = True
         if self.is_curr_voice_eng() or not self._get_voice():
             # self.subsequences.append(speechSequence)
             _H2R_NG_Speak.speak_eng(speech_sequence=speechSequence)
@@ -282,8 +289,12 @@ def speak(self, speechSequence: SpeechSequence):
                 subSequence.append(item)
                 # pass
             elif isinstance(item, PitchCommand):
-                # log.info(f"Hear2Read got PitchCommand: {PitchCommand}")
+                # log.info(f"Hear2Read got PitchCommand: {item}")
                 subSequence.append(item)
+                # synth = getSynth()
+                # synthConf = config.conf["speech"][synth.name]
+                # h2rpitch = synthConf["pitch"]
+                # log.info(f"Hear2ReadNG: current pitch at: {h2rpitch}, {type(h2rpitch)}")
                 # pass
             elif isinstance(item, VolumeCommand):
                 subSequence.append(item)
@@ -389,6 +400,12 @@ def _processSubSequences(self):
         # log.info(f"_processSubSequences: isASCII: {isASCII}")
         # log.info(f"_processSubsequence: subsequence {subSequence}")
 
+        # Play a short silence while switching from Indian language to English
+        if not self.first_subseq and isASCII:
+            _H2R_NG_Speak.speak_silence(500)
+
+        self.first_subseq = False
+
         if isinstance(subSequence[-1], IndexCommand):
             self.currIndex = subSequence[-1].index
             # log.info(f"index boundary at: {self.currIndex}")
@@ -403,7 +420,7 @@ def _process_non_native_unicode(self, text):
         split_texts = []
         prev_range = ()
         text_bit = ""
-        has_curr_lang = False
+        # has_curr_lang = False
         is_prev_valid_lang = False
 
         # log.info(f"_process_non_native_unicode: {text}")
@@ -413,7 +430,7 @@ def _process_non_native_unicode(self, text):
             if self._script_range[0] <= ord(c) <=self._script_range[1] or c in "।॥":
                 # log.info(f"adding: {c}")
                 text_bit += c
-                has_curr_lang = True
+                # has_curr_lang = True
                 is_prev_valid_lang = True
                 continue
 
@@ -650,7 +667,7 @@ def _set_script_range(self):
             self._script_range = unicode_ranges["english"]
             return
 
-        if lang_iso in ["hi", "mr", "ne"]:
+        if lang_iso in ["hi", "mr", "ne", "sa"]:
             self._script_range = unicode_ranges["devanagari"]
         elif lang_iso in ["as", "bn"]:
             self._script_range = unicode_ranges["bengali"]
diff --git a/synthDrivers/_H2R_NG_Speak.py b/synthDrivers/_H2R_NG_Speak.py
@@ -352,6 +352,16 @@ def speak(text, params):
         _execWhenDone(_speak, text.replace("।", "."), params, mustBeAsync=True)
         return
 
+def speak_silence(time: int):
+    """Speaks silence. Used for break between Indic and English text
+
+    @param time: break time in milliseconds
+    @type time: int
+    """
+    one_sec = qual_to_hz[curr_qual]
+    # log.info(f"H2R playing silence frames: {int(one_sec * time/1000)}")
+    player.feed(bytes(int(one_sec * time/1000)))
+
 def stop():
     global isSpeaking
     # Kill all speech from now.