mulasagg commited on
Commit
b4db241
·
1 Parent(s): b4aa0c5

add transcribe in hindi

Browse files
Files changed (2) hide show
  1. app.py +7 -6
  2. transcribe.py +23 -21
app.py CHANGED
@@ -289,14 +289,14 @@ import time
289
 
290
 
291
  @app.post('/transcribe/')
292
- async def transcribe(file: UploadFile):
293
  """
294
  Endpoint to transcribe an uploaded audio file (.wav or .mp3).
295
  """
296
  #calculate time to transcribe
297
  start_time = time.time()
298
- if not file.filename.endswith(('.wav', '.mp3')):
299
- raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
300
 
301
  # Generate a safe temporary file path
302
  temp_filename = f"temp_{uuid.uuid4()}{os.path.splitext(file.filename)[1]}"
@@ -310,7 +310,7 @@ async def transcribe(file: UploadFile):
310
  shutil.copyfileobj(file.file, buffer)
311
 
312
  # Transcribe using your custom function
313
- result = transcribe_audio(temp_filepath)
314
  end_time = time.time()
315
  transcription_time = end_time - start_time
316
  response = {
@@ -329,9 +329,10 @@ async def transcribe(file: UploadFile):
329
  os.remove(temp_filepath)
330
 
331
 
 
332
 
333
  @app.post('/analyze_all/')
334
- async def analyze_all(file: UploadFile):
335
  """
336
  Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
337
  """
@@ -358,7 +359,7 @@ async def analyze_all(file: UploadFile):
358
  vps_result = analyze_vps_main(temp_filepath)
359
  ves_result = calc_voice_engagement_score(temp_filepath)
360
  filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
361
- transcript = transcribe_audio(temp_filepath)
362
 
363
  # Combine results into a single response
364
  combined_result = {
 
289
 
290
 
291
  @app.post('/transcribe/')
292
+ async def transcribe(file: UploadFile, language: str = Form(...)):
293
  """
294
  Endpoint to transcribe an uploaded audio file (.wav or .mp3).
295
  """
296
  #calculate time to transcribe
297
  start_time = time.time()
298
+ if not file.filename.endswith(('.wav', '.mp3','mp4')):
299
+ raise HTTPException(status_code=400, detail="Invalid file type. Only .wav ,mp4 and .mp3 files are supported.")
300
 
301
  # Generate a safe temporary file path
302
  temp_filename = f"temp_{uuid.uuid4()}{os.path.splitext(file.filename)[1]}"
 
310
  shutil.copyfileobj(file.file, buffer)
311
 
312
  # Transcribe using your custom function
313
+ result = transcribe_audio(temp_filepath, language=language, model_size="base")
314
  end_time = time.time()
315
  transcription_time = end_time - start_time
316
  response = {
 
329
  os.remove(temp_filepath)
330
 
331
 
332
+ from fastapi import UploadFile, Form
333
 
334
  @app.post('/analyze_all/')
335
+ async def analyze_all(file: UploadFile, language: str = Form(...)):
336
  """
337
  Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
338
  """
 
359
  vps_result = analyze_vps_main(temp_filepath)
360
  ves_result = calc_voice_engagement_score(temp_filepath)
361
  filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
362
+ transcript = transcribe_audio(temp_filepath, language, "base") #fix this
363
 
364
  # Combine results into a single response
365
  combined_result = {
transcribe.py CHANGED
@@ -1,24 +1,26 @@
1
- # using whisper to transcribe audio files
2
 
3
- import whisper
4
- import os
5
 
6
- def transcribe_audio(file_path, model_size="base"):
7
- """
8
- Transcribe audio file using Whisper model.
9
-
10
- Args:
11
- file_path (str): Path to the audio file.
12
- model_size (str): Size of the Whisper model to use. Options are "tiny", "base", "small", "medium", "large".
13
-
14
- Returns:
15
- str: Transcription of the audio file.
16
- """
17
- # Load the Whisper model
18
- model = whisper.load_model(model_size)
19
-
20
- # Transcribe the audio file
21
- result = model.transcribe(file_path, fp16=False)
22
 
23
- # Return the transcription
24
- return result["text"]
 
 
 
 
 
1
+ import assemblyai as aai
2
 
3
+ # Set your AssemblyAI API key once
4
+ aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4" # Replace with env var for production
5
 
6
+ def transcribe_audio(file_path: str, language, model_size=None) -> str:
7
+
8
+ print(f"Transcribing audio file: {file_path} with language: {language}")
9
+ # Configure for Hindi language
10
+ config = aai.TranscriptionConfig(
11
+ speech_model=aai.SpeechModel.best,
12
+ language_code=language
13
+ )
14
+
15
+ # Create transcriber instance
16
+ transcriber = aai.Transcriber(config=config)
17
+
18
+ # Perform transcription
19
+ transcript = transcriber.transcribe(file_path)
 
 
20
 
21
+ # Check if successful
22
+ if transcript.status == "error":
23
+ raise RuntimeError(f"Transcription failed: {transcript.error}")
24
+
25
+
26
+ return transcript.text