gpt-tts-ui / tts_ui /utils /__init__.py
hoonsubin's picture
add base proj
597e812
import base64
import uuid
import shutil
from pathlib import Path
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
from langchain_text_splitters import RecursiveCharacterTextSplitter
from yakinori import Yakinori
import regex as re
import numpy as np
import jaconv
import bunkai
# Create a temporary directory to store short-named files
tmp_dir = Path("/tmp/auralis")
tmp_dir.mkdir(exist_ok=True)
def shorten_filename(original_path: str) -> str:
"""Copies the given file to a temporary directory with a shorter, random filename."""
ext: str = Path(original_path).suffix
short_name: str = "file_" + uuid.uuid4().hex[:8] + ext
short_path: Path = tmp_dir / short_name
shutil.copyfile(original_path, short_path)
return str(short_path)
def extract_text_from_epub(epub_path: str, output_path=None) -> str:
"""
Extracts text from an EPUB file and optionally saves it to a text file.
Args:
epub_path (str): Path to the EPUB file
output_path (str, optional): Path where to save the text file
Returns:
str: The extracted text
"""
# Load the book
book: epub.EpubBook = epub.read_epub(epub_path)
# List to hold extracted text
chapters: list[str] = []
# Extract text from each chapter
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
# Get HTML content
html_content = item.get_content().decode("utf-8")
# Use BeautifulSoup to extract text
soup = BeautifulSoup(html_content, "html.parser")
# Remove scripts and styles
for script in soup(["script", "style"]):
script.decompose()
# Get text
text: str = soup.get_text()
# Clean text
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
chapters.append(text)
# Join all chapters
full_text: str = "\n\n".join(chapters)
# Save text if output path is specified
if output_path:
with open(output_path, "w", encoding="utf-8") as f:
f.write(full_text)
return full_text.replace("»", '"').replace("«", '"')
def text_from_file(txt_file_path: str) -> str:
# Shorten filename before reading
txt_short_path: str = shorten_filename(txt_file_path)
with open(txt_short_path, "r") as f:
text: str = f.read()
return text
def clone_voice(audio_path: str) -> str:
"""Clone a voice from an audio path."""
# Shorten filename before reading
audio_short_path: str = shorten_filename(audio_path)
with open(audio_short_path, "rb") as f:
audio_data: str = base64.b64encode(f.read()).decode("utf-8")
return audio_data
def calculate_byte_size(text: str) -> int:
"""Calculate UTF-8 encoded byte size of text"""
return len(text.encode("utf-8"))
def is_japanese(text) -> bool:
# Regex patterns for Hiragana, Katakana, and common Kanji/CJK unified blocks
hiragana = r"[\p{Hiragana}]"
katakana = r"[\p{Katakana}]"
# Check for Hiragana or Katakana (unique to Japanese)
return bool(re.search(hiragana, text) or re.search(katakana, text))
def preprocess_japanese_text(text: str) -> str:
alpha2kana: str = jaconv.alphabet2kana(text)
normalized_jp: str = jaconv.normalize(alpha2kana)
yakinori = Yakinori()
splitter = bunkai.Bunkai()
sentences: np.Iterator[str] = splitter(normalized_jp)
final: str = ""
for sentence in sentences:
parsed_list: list[str] = yakinori.get_parsed_list(sentence)
final += yakinori.get_hiragana_sentence(parsed_list, is_hatsuon=True)
return final
def convert_audio(data: np.ndarray) -> np.ndarray:
"""Convert any float format to proper 16-bit PCM"""
if data.dtype in [np.float16, np.float32, np.float64]:
# Normalize first to [-1, 1] range
data = data.astype(np.float32) / np.max(np.abs(data))
# Scale to 16-bit int range
data = (data * 32767).astype(np.int16)
return data
def split_text_into_chunks(
text: str, chunk_size: int = 2000, chunk_overlap: int = 100
) -> list[str]:
"""
Split text into chunks respecting byte limits and natural boundaries.
This function also automatically converts Japanese Kanji into Kana for better readability.
"""
text_to_process = text
text_separators: list[str] = [
"\n\n",
"\n",
"。",
".",
"?",
"!",
"?",
"!",
",",
"、",
",",
"」",
"』",
"\u3002",
"\uff0c",
"\u3001",
"\uff0e",
"",
]
if is_japanese(text_to_process):
text_to_process = preprocess_japanese_text(text_to_process)
splitter = RecursiveCharacterTextSplitter(
separators=text_separators,
chunk_size=chunk_size, # Optimized for TTS context windows
chunk_overlap=chunk_overlap,
length_function=len,
is_separator_regex=False,
)
return splitter.split_text(text)