Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import gradio as gr | |
| from huggingface_hub import HfFolder | |
| import requests | |
| import asyncio | |
| from gtts import gTTS | |
| from dotenv import load_dotenv | |
| import os | |
| load_dotenv() | |
| model_id = "sanchit-gandhi/whisper-small-dv" # update with your model id | |
| pipe = pipeline("automatic-speech-recognition", model=model_id) | |
| hugging_face_token=os.getenv("HUGGING_FACE_TOKEN") | |
| async def query(text, model_id="tiiuae/falcon-7b-instruct"): | |
| api_url = f"https://api-inference.huggingface.co/models/{model_id}" | |
| headers = {"Authorization": f"Bearer {hugging_face_token}"} | |
| payload = {"inputs": text} | |
| print(f"Querying...: {text}") | |
| loop = asyncio.get_event_loop() | |
| response = await loop.run_in_executor(None, lambda: requests.post(api_url, headers=headers, json=payload)) | |
| print("\n") | |
| print("\n") | |
| print(response.json()) | |
| print("\n") | |
| return response.json()[0]["generated_text"].split("\n")[1] | |
| async def transcribe_speech(filepath): | |
| output = pipe( | |
| filepath, | |
| max_new_tokens=256, | |
| generate_kwargs={ | |
| "task": "transcribe", | |
| "language": "english", | |
| }, # update with the language you've fine-tuned on | |
| chunk_length_s=30, | |
| batch_size=8, | |
| ) | |
| return await query(output["text"]) | |
| def final(filepath): | |
| answer=asyncio.run(transcribe_speech(filepath)) | |
| return answer | |
| def main(filepath): | |
| response=final(filepath) | |
| print(response) | |
| myobj = gTTS(text=response, lang='en', slow=False) | |
| myobj.save(filepath) | |
| return filepath | |
| # return response | |
| mic_transcribe = gr.Interface( | |
| fn=main, | |
| inputs=gr.Audio(sources="microphone", type="filepath"), | |
| outputs="audio", | |
| ) | |
| file_transcribe = gr.Interface( | |
| fn=main, | |
| inputs=gr.Audio(sources="upload", type="filepath"), | |
| outputs="audio", | |
| ) | |
| demo=gr.TabbedInterface( | |
| [mic_transcribe, file_transcribe], | |
| ["Transcribe Microphone", "Transcribe Audio File"], | |
| ) | |
| demo.launch(debug=True,share=True) |