Yazael commited on
Commit
f17aa72
·
verified ·
1 Parent(s): 5d0da6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -49,33 +49,37 @@ def transcribe_common(audio: str, model: str) -> str:
49
  filename = Path(audio).name
50
  logger.info(f"Model: {model}")
51
  logger.info(f"Audio: {filename}")
52
- # Read and resample audio to 16kHz
53
  try:
54
  y, sr = librosa.load(audio, mono=True, sr=16000)
55
  except Exception as e:
56
- # First convert to wav if librosa cannot read the file
57
- logger.error(f"Error reading file: {e}")
58
  from pydub import AudioSegment
59
-
60
  audio = AudioSegment.from_file(audio)
61
  audio.export("temp.wav", format="wav")
62
  y, sr = librosa.load("temp.wav", mono=True, sr=16000)
63
  Path("temp.wav").unlink()
64
 
65
- # Calculate duration and decide parameters
66
  duration = librosa.get_duration(y=y, sr=sr)
67
  logger.info(f"Duration: {duration:.2f}s")
68
  kwargs = generate_kwargs.copy()
69
- if duration > 30: # Activar `return_timestamps` para audios largos
70
  kwargs["return_timestamps"] = True
71
 
72
  start_time = time.time()
73
  result = pipe_dict[model](y, generate_kwargs=kwargs)["text"]
74
  end_time = time.time()
75
  logger.success(f"Finished in {end_time - start_time:.2f}s\n{result}")
 
 
 
 
 
 
 
76
  return result
77
 
78
 
 
79
  def transcribe_others(audio) -> tuple[str, str]:
80
  result_v3 = transcribe_common(audio, "whisper-large-v3-turbo")
81
  result_kotoba_v2 = transcribe_common(audio, "kotoba-whisper-v2.0")
 
49
  filename = Path(audio).name
50
  logger.info(f"Model: {model}")
51
  logger.info(f"Audio: {filename}")
52
+
53
  try:
54
  y, sr = librosa.load(audio, mono=True, sr=16000)
55
  except Exception as e:
 
 
56
  from pydub import AudioSegment
 
57
  audio = AudioSegment.from_file(audio)
58
  audio.export("temp.wav", format="wav")
59
  y, sr = librosa.load("temp.wav", mono=True, sr=16000)
60
  Path("temp.wav").unlink()
61
 
 
62
  duration = librosa.get_duration(y=y, sr=sr)
63
  logger.info(f"Duration: {duration:.2f}s")
64
  kwargs = generate_kwargs.copy()
65
+ if duration > 30:
66
  kwargs["return_timestamps"] = True
67
 
68
  start_time = time.time()
69
  result = pipe_dict[model](y, generate_kwargs=kwargs)["text"]
70
  end_time = time.time()
71
  logger.success(f"Finished in {end_time - start_time:.2f}s\n{result}")
72
+
73
+ # Guardar resultado en un archivo .str
74
+ output_path = f"{Path(filename).stem}.str"
75
+ with open(output_path, "w", encoding="utf-8") as f:
76
+ f.write(result)
77
+
78
+ logger.info(f"Transcription saved to {output_path}")
79
  return result
80
 
81
 
82
+
83
  def transcribe_others(audio) -> tuple[str, str]:
84
  result_v3 = transcribe_common(audio, "whisper-large-v3-turbo")
85
  result_kotoba_v2 = transcribe_common(audio, "kotoba-whisper-v2.0")