Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -3,6 +3,7 @@ import librosa 
     | 
|
| 3 | 
         
             
            import gradio as gr
         
     | 
| 4 | 
         
             
            import spaces
         
     | 
| 5 | 
         | 
| 
         | 
|
| 6 | 
         
             
            pipe = transformers.pipeline(
         
     | 
| 7 | 
         
             
                model='sarvamai/shuka_v1',
         
     | 
| 8 | 
         
             
                trust_remote_code=True,
         
     | 
| 
         @@ -12,17 +13,28 @@ pipe = transformers.pipeline( 
     | 
|
| 12 | 
         | 
| 13 | 
         
             
            @spaces.GPU(duration=120)
         
     | 
| 14 | 
         
             
            def transcribe_and_respond(audio_file):
         
     | 
| 15 | 
         
            -
                 
     | 
| 16 | 
         
            -
             
     | 
| 17 | 
         
            -
             
     | 
| 18 | 
         
            -
             
     | 
| 19 | 
         
            -
                     
     | 
| 20 | 
         
            -
             
     | 
| 21 | 
         
            -
             
     | 
| 22 | 
         
            -
             
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 25 | 
         | 
| 
         | 
|
| 26 | 
         
             
            iface = gr.Interface(
         
     | 
| 27 | 
         
             
                fn=transcribe_and_respond,
         
     | 
| 28 | 
         
             
                inputs=gr.Audio(sources="microphone", type="filepath"),  # Use the microphone for audio input
         
     | 
| 
         @@ -31,4 +43,5 @@ iface = gr.Interface( 
     | 
|
| 31 | 
         
             
                description="Record your voice, and the model will respond naturally and informatively."
         
     | 
| 32 | 
         
             
            )
         
     | 
| 33 | 
         | 
| 
         | 
|
| 34 | 
         
             
            iface.launch()
         
     | 
| 
         | 
|
| 3 | 
         
             
            import gradio as gr
         
     | 
| 4 | 
         
             
            import spaces
         
     | 
| 5 | 
         | 
| 6 | 
         
            +
            # Load the model pipeline on GPU:0
         
     | 
| 7 | 
         
             
            pipe = transformers.pipeline(
         
     | 
| 8 | 
         
             
                model='sarvamai/shuka_v1',
         
     | 
| 9 | 
         
             
                trust_remote_code=True,
         
     | 
| 
         | 
|
| 13 | 
         | 
| 14 | 
         
             
            @spaces.GPU(duration=120)
         
     | 
| 15 | 
         
             
            def transcribe_and_respond(audio_file):
         
     | 
| 16 | 
         
            +
                try:
         
     | 
| 17 | 
         
            +
                    # Check if the audio file is valid and exists
         
     | 
| 18 | 
         
            +
                    if audio_file is None or not isinstance(audio_file, str):
         
     | 
| 19 | 
         
            +
                        raise ValueError("Invalid audio file input.")
         
     | 
| 20 | 
         
            +
                    
         
     | 
| 21 | 
         
            +
                    # Load the audio using librosa
         
     | 
| 22 | 
         
            +
                    audio, sr = librosa.load(audio_file, sr=16000)
         
     | 
| 23 | 
         
            +
                    
         
     | 
| 24 | 
         
            +
                    # Prepare the conversation turns
         
     | 
| 25 | 
         
            +
                    turns = [
         
     | 
| 26 | 
         
            +
                        {'role': 'system', 'content': 'Respond naturally and informatively.'},
         
     | 
| 27 | 
         
            +
                        {'role': 'user', 'content': ''}
         
     | 
| 28 | 
         
            +
                    ]
         
     | 
| 29 | 
         
            +
                    
         
     | 
| 30 | 
         
            +
                    # Run inference with the pipeline
         
     | 
| 31 | 
         
            +
                    response = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
         
     | 
| 32 | 
         
            +
                    
         
     | 
| 33 | 
         
            +
                    return response
         
     | 
| 34 | 
         
            +
                except Exception as e:
         
     | 
| 35 | 
         
            +
                    return f"Error processing audio: {str(e)}"
         
     | 
| 36 | 
         | 
| 37 | 
         
            +
            # Create the Gradio interface with microphone input
         
     | 
| 38 | 
         
             
            iface = gr.Interface(
         
     | 
| 39 | 
         
             
                fn=transcribe_and_respond,
         
     | 
| 40 | 
         
             
                inputs=gr.Audio(sources="microphone", type="filepath"),  # Use the microphone for audio input
         
     | 
| 
         | 
|
| 43 | 
         
             
                description="Record your voice, and the model will respond naturally and informatively."
         
     | 
| 44 | 
         
             
            )
         
     | 
| 45 | 
         | 
| 46 | 
         
            +
            # Launch the Gradio app
         
     | 
| 47 | 
         
             
            iface.launch()
         
     |