add transcribe in hindi
Browse files- app.py +7 -6
- transcribe.py +23 -21
app.py
CHANGED
@@ -289,14 +289,14 @@ import time
|
|
289 |
|
290 |
|
291 |
@app.post('/transcribe/')
|
292 |
-
async def transcribe(file: UploadFile):
|
293 |
"""
|
294 |
Endpoint to transcribe an uploaded audio file (.wav or .mp3).
|
295 |
"""
|
296 |
#calculate time to transcribe
|
297 |
start_time = time.time()
|
298 |
-
if not file.filename.endswith(('.wav', '.mp3')):
|
299 |
-
raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
|
300 |
|
301 |
# Generate a safe temporary file path
|
302 |
temp_filename = f"temp_{uuid.uuid4()}{os.path.splitext(file.filename)[1]}"
|
@@ -310,7 +310,7 @@ async def transcribe(file: UploadFile):
|
|
310 |
shutil.copyfileobj(file.file, buffer)
|
311 |
|
312 |
# Transcribe using your custom function
|
313 |
-
result = transcribe_audio(temp_filepath)
|
314 |
end_time = time.time()
|
315 |
transcription_time = end_time - start_time
|
316 |
response = {
|
@@ -329,9 +329,10 @@ async def transcribe(file: UploadFile):
|
|
329 |
os.remove(temp_filepath)
|
330 |
|
331 |
|
|
|
332 |
|
333 |
@app.post('/analyze_all/')
|
334 |
-
async def analyze_all(file: UploadFile):
|
335 |
"""
|
336 |
Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
|
337 |
"""
|
@@ -358,7 +359,7 @@ async def analyze_all(file: UploadFile):
|
|
358 |
vps_result = analyze_vps_main(temp_filepath)
|
359 |
ves_result = calc_voice_engagement_score(temp_filepath)
|
360 |
filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
|
361 |
-
transcript = transcribe_audio(temp_filepath)
|
362 |
|
363 |
# Combine results into a single response
|
364 |
combined_result = {
|
|
|
289 |
|
290 |
|
291 |
@app.post('/transcribe/')
|
292 |
+
async def transcribe(file: UploadFile, language: str = Form(...)):
|
293 |
"""
|
294 |
Endpoint to transcribe an uploaded audio file (.wav or .mp3).
|
295 |
"""
|
296 |
#calculate time to transcribe
|
297 |
start_time = time.time()
|
298 |
+
if not file.filename.endswith(('.wav', '.mp3','mp4')):
|
299 |
+
raise HTTPException(status_code=400, detail="Invalid file type. Only .wav ,mp4 and .mp3 files are supported.")
|
300 |
|
301 |
# Generate a safe temporary file path
|
302 |
temp_filename = f"temp_{uuid.uuid4()}{os.path.splitext(file.filename)[1]}"
|
|
|
310 |
shutil.copyfileobj(file.file, buffer)
|
311 |
|
312 |
# Transcribe using your custom function
|
313 |
+
result = transcribe_audio(temp_filepath, language=language, model_size="base")
|
314 |
end_time = time.time()
|
315 |
transcription_time = end_time - start_time
|
316 |
response = {
|
|
|
329 |
os.remove(temp_filepath)
|
330 |
|
331 |
|
332 |
+
from fastapi import UploadFile, Form
|
333 |
|
334 |
@app.post('/analyze_all/')
|
335 |
+
async def analyze_all(file: UploadFile, language: str = Form(...)):
|
336 |
"""
|
337 |
Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
|
338 |
"""
|
|
|
359 |
vps_result = analyze_vps_main(temp_filepath)
|
360 |
ves_result = calc_voice_engagement_score(temp_filepath)
|
361 |
filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
|
362 |
+
transcript = transcribe_audio(temp_filepath, language, "base") #fix this
|
363 |
|
364 |
# Combine results into a single response
|
365 |
combined_result = {
|
transcribe.py
CHANGED
@@ -1,24 +1,26 @@
|
|
1 |
-
|
2 |
|
3 |
-
|
4 |
-
|
5 |
|
6 |
-
def transcribe_audio(file_path, model_size=
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
# Transcribe the audio file
|
21 |
-
result = model.transcribe(file_path, fp16=False)
|
22 |
|
23 |
-
#
|
24 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
import assemblyai as aai
|
2 |
|
3 |
+
# Set your AssemblyAI API key once
|
4 |
+
aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4" # Replace with env var for production
|
5 |
|
6 |
+
def transcribe_audio(file_path: str, language, model_size=None) -> str:
|
7 |
+
|
8 |
+
print(f"Transcribing audio file: {file_path} with language: {language}")
|
9 |
+
# Configure for Hindi language
|
10 |
+
config = aai.TranscriptionConfig(
|
11 |
+
speech_model=aai.SpeechModel.best,
|
12 |
+
language_code=language
|
13 |
+
)
|
14 |
+
|
15 |
+
# Create transcriber instance
|
16 |
+
transcriber = aai.Transcriber(config=config)
|
17 |
+
|
18 |
+
# Perform transcription
|
19 |
+
transcript = transcriber.transcribe(file_path)
|
|
|
|
|
20 |
|
21 |
+
# Check if successful
|
22 |
+
if transcript.status == "error":
|
23 |
+
raise RuntimeError(f"Transcription failed: {transcript.error}")
|
24 |
+
|
25 |
+
|
26 |
+
return transcript.text
|