import os from faster_whisper import WhisperModel from pydub import AudioSegment import string import random from datetime import datetime import shutil # Matplotlibのキャッシュディレクトリを変更 os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib" # Hugging Faceのキャッシュディレクトリを変更 os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface" class TranscriptionMaker(): # 書き起こしファイルを吐き出すディレクトリを指定 def __init__(self, output_dir="/tmp/data/transcriptions"): self.model = WhisperModel("base", device="cpu", download_root="/tmp/huggingface") self.output_dir = output_dir os.makedirs(self.output_dir, exist_ok=True) #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する def create_transcription(self,audio_directory): results = [] #ディレクトリ内のファイルを全て取得 if not os.path.isdir(audio_directory): raise ValueError(f"The specified path is not a valid directory: {audio_directory}") audio_files = os.listdir(audio_directory) audio_files = sorted(os.listdir(audio_directory)) for audio_file in audio_files: if os.path.splitext(audio_file)[-1].lower() != '.wav': continue audio_path = os.path.join(audio_directory, audio_file) try: segments,info = list(self.model.transcribe(audio_path)) except Exception as e: print(f"Error transcripting file {audio_path}: {e}") raise sorted_segments = sorted(segments, key=lambda s: s.start) for segment in sorted_segments: results.append({ "start": segment.start, "end": segment.end, "text": segment.text }) #ファイルの書き込み。ファイル名は"transcription.txt" output_file=os.path.join(self.output_dir,"transcription.txt") try: with open(output_file,"w",encoding="utf-8") as f: for result in results: f.write(f"{result['text']}\n") except OSError as e: print(f"Error writing transcription file: {e}") raise return output_file #ディレクトリ内の音声ファイルをくっつける def merge_segments(self, segments_dir, output_dir="/tmp/data/merged_segment"): if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True) files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')]) if len(files) <= 1: print('No need to merge') single_file_path = os.path.join(segments_dir, files[0]) destination_path = os.path.join(output_dir, files[0]) shutil.copy(single_file_path, destination_path) print(f"ファイル {files[0]} を {output_dir} に移動しました。") return output_dir combined_audio = AudioSegment.empty() for file in files: file_path = os.path.join(segments_dir, file) segment = AudioSegment.from_file(file_path) combined_audio += segment output_file = os.path.join(output_dir, self.generate_filename()) combined_audio.export(output_file, format="wav") return output_dir def generate_filename(self): current_time = datetime.now().strftime("%Y%m%d%H%M%S") filename = f"{current_time}.wav" return filename