initial commit

2026-03-05 07:19:38 +00:00
commit 0203858a6d
5 changed files with 210 additions and 0 deletions
--- a/narrate.py
+++ b/narrate.py
@@ -0,0 +1,99 @@
+import os
+import sys
+import signal
+import re
+import queue
+import threading
+import time
+import numpy as np
+
+# SETTINGS
+os.environ["HSA_OVERRIDE_GFX_VERSION"] = "10.3.0"
+os.environ["ORT_LOGGING_LEVEL"] = "3"
+os.environ["MIGRAPHX_ENABLE_CACHE"] = "1"
+os.environ["MIGRAPHX_CACHE_PATH"] = os.path.expanduser("./migraphx_cache")
+
+def signal_handler(sig, frame):
+    print("\n[Ctrl+C] Stopping...")
+    os._exit(0)
+
+signal.signal(signal.SIGINT, signal_handler)
+
+import onnxruntime as ort
+import sounddevice as sd
+from kokoro_onnx import Kokoro
+
+# Paths
+base_dir = os.path.expanduser("./")
+model_path = os.path.join(base_dir, "kokoro-v1.0.onnx")
+voices_path = os.path.join(base_dir, "voices-v1.0.bin")
+input_text_path = os.path.join(base_dir, "narrate.txt")
+
+# Initialize Session
+try:
+    session = ort.InferenceSession(model_path, providers=[
+        ('MIGraphXExecutionProvider', {'device_id': 0, 'migraphx_fp16_enable': True}),
+        'CPUExecutionProvider'
+    ])
+    kokoro = Kokoro.from_session(session, voices_path)
+except Exception as e:
+    print(f"Init Error: {e}")
+    sys.exit(1)
+
+audio_queue = queue.Queue()
+
+def playback_worker():
+    samplerate = 24000
+    try:
+        with sd.OutputStream(samplerate=samplerate, channels=1, dtype='float32') as stream:
+            while True:
+                item = audio_queue.get()
+                if item is None:
+                    audio_queue.task_done()
+                    break
+
+                samples, text_preview = item
+                samples_reshaped = samples.reshape(-1, 1).astype('float32')
+
+                print(f"Playing: {text_preview}...")
+                stream.write(samples_reshaped)
+
+                # Physical silence (0.5s)
+                silence = np.zeros((int(samplerate * 0.5), 1), dtype='float32')
+                stream.write(silence)
+
+                audio_queue.task_done()
+    except Exception as e:
+        print(f"Playback Error: {e}")
+
+play_thread = threading.Thread(target=playback_worker, daemon=True)
+play_thread.start()
+
+# Read and Split Text
+if os.path.exists(input_text_path):
+    with open(input_text_path, "r") as f:
+        full_text = f.read()
+else:
+    full_text = "No input file found."
+
+# Split on punctuation (and any following whitespace/newlines) OR standalone newlines
+# This avoids word-level splitting and ensures no double pauses
+sentences = [s.strip() for s in re.split(r'(?<=[.!?])(?![.!?])\s*|\n+', full_text) if s.strip()]
+print(f"Detected {len(sentences)} units (sentences/lines).")
+
+try:
+    for i, sentence in enumerate(sentences):
+        samples, _ = kokoro.create(sentence, voice="af_sky", speed=1.3, lang="en-us")
+        audio_queue.put((samples, sentence[:50]))
+        print(f"[{i+1}/{len(sentences)}] Buffered.")
+
+    print("Generation complete. Waiting for playback to finish...")
+    audio_queue.put(None)
+    audio_queue.join()
+    # Final flush
+    time.sleep(0.1)
+
+except Exception as e:
+    print(f"Process Error: {e}")
+finally:
+    print("Finished.")