Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	+v
Browse files
    	
        app.py
    CHANGED
    
    | @@ -45,11 +45,11 @@ logging.info(f"Using device: {device}") | |
| 45 | 
             
            def download_audio(url, method_choice):
         | 
| 46 | 
             
                """
         | 
| 47 | 
             
                Downloads audio from a given URL using the specified method.
         | 
| 48 | 
            -
             | 
| 49 | 
             
                Args:
         | 
| 50 | 
             
                    url (str): The URL of the audio.
         | 
| 51 | 
             
                    method_choice (str): The method to use for downloading audio.
         | 
| 52 | 
            -
             | 
| 53 | 
             
                Returns:
         | 
| 54 | 
             
                    tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
         | 
| 55 | 
             
                """
         | 
| @@ -64,11 +64,14 @@ def download_audio(url, method_choice): | |
| 64 | 
             
                        audio_file = download_direct_audio(url, method_choice)
         | 
| 65 |  | 
| 66 | 
             
                    if not audio_file or not os.path.exists(audio_file):
         | 
| 67 | 
            -
                         | 
|  | |
|  | |
| 68 | 
             
                    return audio_file, True
         | 
| 69 | 
             
                except Exception as e:
         | 
| 70 | 
            -
                     | 
| 71 | 
            -
                     | 
|  | |
| 72 |  | 
| 73 | 
             
            def download_youtube_audio(url, method_choice):
         | 
| 74 | 
             
                """
         | 
| @@ -114,15 +117,20 @@ def yt_dlp_method(url): | |
| 114 | 
             
                        'preferredcodec': 'mp3',
         | 
| 115 | 
             
                        'preferredquality': '192',
         | 
| 116 | 
             
                    }],
         | 
| 117 | 
            -
                    'quiet':  | 
| 118 | 
             
                    'no_warnings': True,
         | 
|  | |
| 119 | 
             
                }
         | 
| 120 | 
            -
                 | 
| 121 | 
            -
                     | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 126 |  | 
| 127 | 
             
            def pytube_method(url):
         | 
| 128 | 
             
                """
         | 
| @@ -136,15 +144,24 @@ def pytube_method(url): | |
| 136 | 
             
                """
         | 
| 137 | 
             
                logging.info("Using pytube method")
         | 
| 138 | 
             
                from pytube import YouTube
         | 
| 139 | 
            -
                 | 
| 140 | 
            -
             | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 148 |  | 
| 149 | 
             
            def download_rtsp_audio(url):
         | 
| 150 | 
             
                """
         | 
| @@ -173,11 +190,11 @@ def download_rtsp_audio(url): | |
| 173 | 
             
            def download_direct_audio(url, method_choice):
         | 
| 174 | 
             
                """
         | 
| 175 | 
             
                Downloads audio from a direct URL using the specified method.
         | 
| 176 | 
            -
             | 
| 177 | 
             
                Args:
         | 
| 178 | 
             
                    url (str): The direct URL of the audio file.
         | 
| 179 | 
             
                    method_choice (str): The method to use for downloading.
         | 
| 180 | 
            -
             | 
| 181 | 
             
                Returns:
         | 
| 182 | 
             
                    str: Path to the downloaded audio file, or None if failed.
         | 
| 183 | 
             
                """
         | 
| @@ -191,9 +208,14 @@ def download_direct_audio(url, method_choice): | |
| 191 | 
             
                }
         | 
| 192 | 
             
                method = methods.get(method_choice, requests_method)
         | 
| 193 | 
             
                try:
         | 
| 194 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 195 | 
             
                except Exception as e:
         | 
| 196 | 
            -
                    logging.error(f"Error downloading direct audio: {str(e)}")
         | 
| 197 | 
             
                    return None
         | 
| 198 |  | 
| 199 | 
             
            def requests_method(url):
         | 
| @@ -402,10 +424,10 @@ loaded_models = {} | |
| 402 |  | 
| 403 | 
             
            def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
         | 
| 404 | 
             
                """
         | 
| 405 | 
            -
                Transcribes audio from a given  | 
| 406 |  | 
| 407 | 
             
                Args:
         | 
| 408 | 
            -
                    input_source (str): URL of  | 
| 409 | 
             
                    pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
         | 
| 410 | 
             
                    model_id (str): The ID of the model to use.
         | 
| 411 | 
             
                    dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
         | 
| @@ -430,22 +452,36 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d | |
| 430 | 
             
                    if verbose:
         | 
| 431 | 
             
                        yield verbose_messages, "", None
         | 
| 432 |  | 
| 433 | 
            -
                    #  | 
| 434 | 
            -
                     | 
| 435 | 
            -
             | 
| 436 | 
            -
             | 
| 437 | 
            -
             | 
| 438 | 
            -
             | 
| 439 | 
            -
             | 
| 440 | 
            -
             | 
| 441 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 442 | 
             
                        return
         | 
| 443 |  | 
| 444 | 
             
                    # Convert start_time and end_time to float or None
         | 
| 445 | 
             
                    start_time = float(start_time) if start_time else None
         | 
| 446 | 
             
                    end_time = float(end_time) if end_time else None
         | 
| 447 |  | 
| 448 | 
            -
                    # Trim the audio if start or end times are provided
         | 
| 449 | 
             
                    if start_time is not None or end_time is not None:
         | 
| 450 | 
             
                        audio_path = trim_audio(audio_path, start_time, end_time)
         | 
| 451 | 
             
                        is_temp_file = True  # The trimmed audio is a temporary file
         | 
| @@ -459,7 +495,6 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d | |
| 459 | 
             
                        model_or_pipeline = loaded_models[model_key]
         | 
| 460 | 
             
                        logging.info("Loaded model from cache")
         | 
| 461 | 
             
                    else:
         | 
| 462 | 
            -
                        # Load the appropriate model or pipeline based on the pipeline type
         | 
| 463 | 
             
                        if pipeline_type == "faster-batched":
         | 
| 464 | 
             
                            model = WhisperModel(model_id, device=device, compute_type=dtype)
         | 
| 465 | 
             
                            model_or_pipeline = BatchedInferencePipeline(model=model)
         | 
| @@ -489,10 +524,11 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d | |
| 489 | 
             
                                device=device,
         | 
| 490 | 
             
                            )
         | 
| 491 | 
             
                        else:
         | 
| 492 | 
            -
                             | 
|  | |
|  | |
| 493 | 
             
                        loaded_models[model_key] = model_or_pipeline  # Cache the model or pipeline
         | 
| 494 |  | 
| 495 | 
            -
                    # Perform the transcription
         | 
| 496 | 
             
                    start_time_perf = time.time()
         | 
| 497 | 
             
                    if pipeline_type == "faster-batched":
         | 
| 498 | 
             
                        segments, info = model_or_pipeline.transcribe(audio_path, batch_size=batch_size)
         | 
| @@ -503,7 +539,6 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d | |
| 503 | 
             
                        segments = result["chunks"]
         | 
| 504 | 
             
                    end_time_perf = time.time()
         | 
| 505 |  | 
| 506 | 
            -
                    # Calculate metrics
         | 
| 507 | 
             
                    transcription_time = end_time_perf - start_time_perf
         | 
| 508 | 
             
                    audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
         | 
| 509 |  | 
| @@ -515,7 +550,6 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d | |
| 515 | 
             
                    if verbose:
         | 
| 516 | 
             
                        yield verbose_messages + metrics_output, "", None
         | 
| 517 |  | 
| 518 | 
            -
                    # Compile the transcription text
         | 
| 519 | 
             
                    transcription = ""
         | 
| 520 |  | 
| 521 | 
             
                    for segment in segments:
         | 
| @@ -527,13 +561,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d | |
| 527 | 
             
                        if verbose:
         | 
| 528 | 
             
                            yield verbose_messages + metrics_output, transcription, None
         | 
| 529 |  | 
| 530 | 
            -
                    # Save the transcription to a file
         | 
| 531 | 
             
                    transcription_file = save_transcription(transcription)
         | 
| 532 | 
             
                    yield verbose_messages + metrics_output, transcription, transcription_file
         | 
| 533 |  | 
| 534 | 
             
                except Exception as e:
         | 
| 535 | 
            -
                     | 
| 536 | 
            -
                     | 
|  | |
| 537 |  | 
| 538 | 
             
                finally:
         | 
| 539 | 
             
                    # Clean up temporary audio files
         | 
|  | |
| 45 | 
             
            def download_audio(url, method_choice):
         | 
| 46 | 
             
                """
         | 
| 47 | 
             
                Downloads audio from a given URL using the specified method.
         | 
| 48 | 
            +
             | 
| 49 | 
             
                Args:
         | 
| 50 | 
             
                    url (str): The URL of the audio.
         | 
| 51 | 
             
                    method_choice (str): The method to use for downloading audio.
         | 
| 52 | 
            +
             | 
| 53 | 
             
                Returns:
         | 
| 54 | 
             
                    tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
         | 
| 55 | 
             
                """
         | 
|  | |
| 64 | 
             
                        audio_file = download_direct_audio(url, method_choice)
         | 
| 65 |  | 
| 66 | 
             
                    if not audio_file or not os.path.exists(audio_file):
         | 
| 67 | 
            +
                        error_msg = f"Failed to download audio from {url} using method {method_choice}"
         | 
| 68 | 
            +
                        logging.error(error_msg)
         | 
| 69 | 
            +
                        return error_msg, False
         | 
| 70 | 
             
                    return audio_file, True
         | 
| 71 | 
             
                except Exception as e:
         | 
| 72 | 
            +
                    error_msg = f"Error downloading audio from {url} using method {method_choice}: {str(e)}"
         | 
| 73 | 
            +
                    logging.error(error_msg)
         | 
| 74 | 
            +
                    return error_msg, False
         | 
| 75 |  | 
| 76 | 
             
            def download_youtube_audio(url, method_choice):
         | 
| 77 | 
             
                """
         | 
|  | |
| 117 | 
             
                        'preferredcodec': 'mp3',
         | 
| 118 | 
             
                        'preferredquality': '192',
         | 
| 119 | 
             
                    }],
         | 
| 120 | 
            +
                    'quiet': False,
         | 
| 121 | 
             
                    'no_warnings': True,
         | 
| 122 | 
            +
                    'logger': logging.getLogger(),  # Capture yt-dlp logs
         | 
| 123 | 
             
                }
         | 
| 124 | 
            +
                try:
         | 
| 125 | 
            +
                    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         | 
| 126 | 
            +
                        info = ydl.extract_info(url, download=True)
         | 
| 127 | 
            +
                        output_file = ydl.prepare_filename(info)
         | 
| 128 | 
            +
                        output_file = os.path.splitext(output_file)[0] + '.mp3'
         | 
| 129 | 
            +
                        logging.info(f"Downloaded YouTube audio: {output_file}")
         | 
| 130 | 
            +
                        return output_file
         | 
| 131 | 
            +
                except Exception as e:
         | 
| 132 | 
            +
                    logging.error(f"Error in yt_dlp_method: {str(e)}")
         | 
| 133 | 
            +
                    raise Exception(f"yt-dlp failed to download audio: {str(e)}")
         | 
| 134 |  | 
| 135 | 
             
            def pytube_method(url):
         | 
| 136 | 
             
                """
         | 
|  | |
| 144 | 
             
                """
         | 
| 145 | 
             
                logging.info("Using pytube method")
         | 
| 146 | 
             
                from pytube import YouTube
         | 
| 147 | 
            +
                try:
         | 
| 148 | 
            +
                    yt = YouTube(url)
         | 
| 149 | 
            +
                    audio_stream = yt.streams.filter(only_audio=True).first()
         | 
| 150 | 
            +
                    if audio_stream is None:
         | 
| 151 | 
            +
                        error_msg = "No audio streams available with pytube."
         | 
| 152 | 
            +
                        logging.error(error_msg)
         | 
| 153 | 
            +
                        raise Exception(error_msg)
         | 
| 154 | 
            +
                    temp_dir = tempfile.mkdtemp()
         | 
| 155 | 
            +
                    out_file = audio_stream.download(output_path=temp_dir)
         | 
| 156 | 
            +
                    base, ext = os.path.splitext(out_file)
         | 
| 157 | 
            +
                    new_file = base + '.mp3'
         | 
| 158 | 
            +
                    os.rename(out_file, new_file)
         | 
| 159 | 
            +
                    logging.info(f"Downloaded and converted audio to: {new_file}")
         | 
| 160 | 
            +
                    return new_file
         | 
| 161 | 
            +
                except Exception as e:
         | 
| 162 | 
            +
                    logging.error(f"Error in pytube_method: {str(e)}")
         | 
| 163 | 
            +
                    raise Exception(f"pytube failed to download audio: {str(e)}")
         | 
| 164 | 
            +
             | 
| 165 |  | 
| 166 | 
             
            def download_rtsp_audio(url):
         | 
| 167 | 
             
                """
         | 
|  | |
| 190 | 
             
            def download_direct_audio(url, method_choice):
         | 
| 191 | 
             
                """
         | 
| 192 | 
             
                Downloads audio from a direct URL using the specified method.
         | 
| 193 | 
            +
             | 
| 194 | 
             
                Args:
         | 
| 195 | 
             
                    url (str): The direct URL of the audio file.
         | 
| 196 | 
             
                    method_choice (str): The method to use for downloading.
         | 
| 197 | 
            +
             | 
| 198 | 
             
                Returns:
         | 
| 199 | 
             
                    str: Path to the downloaded audio file, or None if failed.
         | 
| 200 | 
             
                """
         | 
|  | |
| 208 | 
             
                }
         | 
| 209 | 
             
                method = methods.get(method_choice, requests_method)
         | 
| 210 | 
             
                try:
         | 
| 211 | 
            +
                    audio_file = method(url)
         | 
| 212 | 
            +
                    if not audio_file or not os.path.exists(audio_file):
         | 
| 213 | 
            +
                        error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
         | 
| 214 | 
            +
                        logging.error(error_msg)
         | 
| 215 | 
            +
                        return None
         | 
| 216 | 
            +
                    return audio_file
         | 
| 217 | 
             
                except Exception as e:
         | 
| 218 | 
            +
                    logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
         | 
| 219 | 
             
                    return None
         | 
| 220 |  | 
| 221 | 
             
            def requests_method(url):
         | 
|  | |
| 424 |  | 
| 425 | 
             
            def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
         | 
| 426 | 
             
                """
         | 
| 427 | 
            +
                Transcribes audio from a given source using the specified pipeline and model.
         | 
| 428 |  | 
| 429 | 
             
                Args:
         | 
| 430 | 
            +
                    input_source (str or file): URL of audio, path to local file, or uploaded file object.
         | 
| 431 | 
             
                    pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
         | 
| 432 | 
             
                    model_id (str): The ID of the model to use.
         | 
| 433 | 
             
                    dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
         | 
|  | |
| 452 | 
             
                    if verbose:
         | 
| 453 | 
             
                        yield verbose_messages, "", None
         | 
| 454 |  | 
| 455 | 
            +
                    # Determine if input_source is a URL or file
         | 
| 456 | 
            +
                    audio_path = None
         | 
| 457 | 
            +
                    is_temp_file = False
         | 
| 458 | 
            +
             | 
| 459 | 
            +
                    if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
         | 
| 460 | 
            +
                        # Input source is a URL
         | 
| 461 | 
            +
                        audio_path, is_temp_file = download_audio(input_source, download_method)
         | 
| 462 | 
            +
                        if not audio_path or audio_path.startswith("Error"):
         | 
| 463 | 
            +
                            error_msg = f"Error downloading audio: {audio_path}"
         | 
| 464 | 
            +
                            logging.error(error_msg)
         | 
| 465 | 
            +
                            yield error_msg, "", None
         | 
| 466 | 
            +
                            return
         | 
| 467 | 
            +
                    elif isinstance(input_source, str) and os.path.exists(input_source):
         | 
| 468 | 
            +
                        # Input source is a local file path
         | 
| 469 | 
            +
                        audio_path = input_source
         | 
| 470 | 
            +
                        is_temp_file = False
         | 
| 471 | 
            +
                    elif hasattr(input_source, 'name'):
         | 
| 472 | 
            +
                        # Input source is an uploaded file object
         | 
| 473 | 
            +
                        audio_path = input_source.name
         | 
| 474 | 
            +
                        is_temp_file = False
         | 
| 475 | 
            +
                    else:
         | 
| 476 | 
            +
                        error_msg = "No valid audio source provided."
         | 
| 477 | 
            +
                        logging.error(error_msg)
         | 
| 478 | 
            +
                        yield error_msg, "", None
         | 
| 479 | 
             
                        return
         | 
| 480 |  | 
| 481 | 
             
                    # Convert start_time and end_time to float or None
         | 
| 482 | 
             
                    start_time = float(start_time) if start_time else None
         | 
| 483 | 
             
                    end_time = float(end_time) if end_time else None
         | 
| 484 |  | 
|  | |
| 485 | 
             
                    if start_time is not None or end_time is not None:
         | 
| 486 | 
             
                        audio_path = trim_audio(audio_path, start_time, end_time)
         | 
| 487 | 
             
                        is_temp_file = True  # The trimmed audio is a temporary file
         | 
|  | |
| 495 | 
             
                        model_or_pipeline = loaded_models[model_key]
         | 
| 496 | 
             
                        logging.info("Loaded model from cache")
         | 
| 497 | 
             
                    else:
         | 
|  | |
| 498 | 
             
                        if pipeline_type == "faster-batched":
         | 
| 499 | 
             
                            model = WhisperModel(model_id, device=device, compute_type=dtype)
         | 
| 500 | 
             
                            model_or_pipeline = BatchedInferencePipeline(model=model)
         | 
|  | |
| 524 | 
             
                                device=device,
         | 
| 525 | 
             
                            )
         | 
| 526 | 
             
                        else:
         | 
| 527 | 
            +
                            error_msg = "Invalid pipeline type"
         | 
| 528 | 
            +
                            logging.error(error_msg)
         | 
| 529 | 
            +
                            raise ValueError(error_msg)
         | 
| 530 | 
             
                        loaded_models[model_key] = model_or_pipeline  # Cache the model or pipeline
         | 
| 531 |  | 
|  | |
| 532 | 
             
                    start_time_perf = time.time()
         | 
| 533 | 
             
                    if pipeline_type == "faster-batched":
         | 
| 534 | 
             
                        segments, info = model_or_pipeline.transcribe(audio_path, batch_size=batch_size)
         | 
|  | |
| 539 | 
             
                        segments = result["chunks"]
         | 
| 540 | 
             
                    end_time_perf = time.time()
         | 
| 541 |  | 
|  | |
| 542 | 
             
                    transcription_time = end_time_perf - start_time_perf
         | 
| 543 | 
             
                    audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
         | 
| 544 |  | 
|  | |
| 550 | 
             
                    if verbose:
         | 
| 551 | 
             
                        yield verbose_messages + metrics_output, "", None
         | 
| 552 |  | 
|  | |
| 553 | 
             
                    transcription = ""
         | 
| 554 |  | 
| 555 | 
             
                    for segment in segments:
         | 
|  | |
| 561 | 
             
                        if verbose:
         | 
| 562 | 
             
                            yield verbose_messages + metrics_output, transcription, None
         | 
| 563 |  | 
|  | |
| 564 | 
             
                    transcription_file = save_transcription(transcription)
         | 
| 565 | 
             
                    yield verbose_messages + metrics_output, transcription, transcription_file
         | 
| 566 |  | 
| 567 | 
             
                except Exception as e:
         | 
| 568 | 
            +
                    error_msg = f"An error occurred during transcription: {str(e)}"
         | 
| 569 | 
            +
                    logging.error(error_msg)
         | 
| 570 | 
            +
                    yield error_msg, "", None
         | 
| 571 |  | 
| 572 | 
             
                finally:
         | 
| 573 | 
             
                    # Clean up temporary audio files
         |