Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,23 +45,22 @@ import yt_dlp
|
|
| 45 |
#
|
| 46 |
#
|
| 47 |
# Usage:
|
| 48 |
-
# Transcribe a single URL:
|
| 49 |
-
# python diarize.py https://example.com/video.mp4
|
| 50 |
#
|
| 51 |
-
#
|
| 52 |
-
#
|
| 53 |
#
|
| 54 |
-
#
|
| 55 |
-
#
|
| 56 |
#
|
| 57 |
-
#
|
| 58 |
-
#
|
| 59 |
-
#
|
| 60 |
-
#
|
| 61 |
-
#
|
| 62 |
-
#
|
| 63 |
-
#
|
| 64 |
-
#
|
|
|
|
| 65 |
#
|
| 66 |
###
|
| 67 |
|
|
@@ -350,7 +349,7 @@ def process_local_file(file_path):
|
|
| 350 |
# Video Download/Handling
|
| 351 |
#
|
| 352 |
|
| 353 |
-
def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False, demo_mode=True):
|
| 354 |
if demo_mode:
|
| 355 |
api_name = "huggingface"
|
| 356 |
api_key = os.environ.get(HF_TOKEN)
|
|
@@ -793,7 +792,7 @@ def summarize_with_openai(api_key, file_path, model):
|
|
| 793 |
}
|
| 794 |
|
| 795 |
logging.debug("openai: Preparing data + prompt for submittal")
|
| 796 |
-
|
| 797 |
data = {
|
| 798 |
"model": model,
|
| 799 |
"messages": [
|
|
@@ -803,7 +802,7 @@ def summarize_with_openai(api_key, file_path, model):
|
|
| 803 |
},
|
| 804 |
{
|
| 805 |
"role": "user",
|
| 806 |
-
"content":
|
| 807 |
}
|
| 808 |
],
|
| 809 |
"max_tokens": 4096, # Adjust tokens as needed
|
|
@@ -846,7 +845,7 @@ def summarize_with_claude(api_key, file_path, model):
|
|
| 846 |
logging.debug("anthropic: Prepping data + prompt for submittal")
|
| 847 |
user_message = {
|
| 848 |
"role": "user",
|
| 849 |
-
"content": f"{text} \n\n\n\
|
| 850 |
}
|
| 851 |
|
| 852 |
data = {
|
|
@@ -913,10 +912,10 @@ def summarize_with_cohere(api_key, file_path, model):
|
|
| 913 |
'Authorization': f'Bearer {api_key}'
|
| 914 |
}
|
| 915 |
|
| 916 |
-
|
| 917 |
data = {
|
| 918 |
"chat_history": [
|
| 919 |
-
{"role": "USER", "message":
|
| 920 |
],
|
| 921 |
"message": "Please provide a summary.",
|
| 922 |
"model": model,
|
|
@@ -964,12 +963,12 @@ def summarize_with_groq(api_key, file_path, model):
|
|
| 964 |
'Content-Type': 'application/json'
|
| 965 |
}
|
| 966 |
|
| 967 |
-
|
| 968 |
data = {
|
| 969 |
"messages": [
|
| 970 |
{
|
| 971 |
"role": "user",
|
| 972 |
-
"content":
|
| 973 |
}
|
| 974 |
],
|
| 975 |
"model": model
|
|
@@ -1021,12 +1020,13 @@ def summarize_with_llama(api_url, file_path, token):
|
|
| 1021 |
headers['Authorization'] = f'Bearer {token}'
|
| 1022 |
|
| 1023 |
|
| 1024 |
-
|
|
|
|
| 1025 |
data = {
|
| 1026 |
-
"prompt":
|
| 1027 |
}
|
| 1028 |
|
| 1029 |
-
logging.debug("llama: Submitting request to API endpoint")
|
| 1030 |
print("llama: Submitting request to API endpoint")
|
| 1031 |
response = requests.post(api_url, headers=headers, json=data)
|
| 1032 |
response_data = response.json()
|
|
@@ -1064,13 +1064,13 @@ def summarize_with_kobold(api_url, file_path):
|
|
| 1064 |
'content-type': 'application/json',
|
| 1065 |
}
|
| 1066 |
# FIXME
|
| 1067 |
-
|
| 1068 |
-
logging.debug(
|
| 1069 |
# Values literally c/p from the api docs....
|
| 1070 |
data = {
|
| 1071 |
"max_context_length": 8096,
|
| 1072 |
"max_length": 4096,
|
| 1073 |
-
"prompt":
|
| 1074 |
}
|
| 1075 |
|
| 1076 |
logging.debug("kobold: Submitting request to API endpoint")
|
|
@@ -1114,9 +1114,9 @@ def summarize_with_oobabooga(api_url, file_path):
|
|
| 1114 |
'content-type': 'application/json',
|
| 1115 |
}
|
| 1116 |
|
| 1117 |
-
prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
|
| 1118 |
-
#
|
| 1119 |
-
|
| 1120 |
|
| 1121 |
data = {
|
| 1122 |
"mode": "chat",
|
|
@@ -1268,6 +1268,7 @@ def launch_ui(demo_mode=False):
|
|
| 1268 |
gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
|
| 1269 |
gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
|
| 1270 |
gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
|
|
|
|
| 1271 |
gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
|
| 1272 |
]
|
| 1273 |
|
|
@@ -1316,6 +1317,7 @@ def launch_ui(demo_mode=False):
|
|
| 1316 |
####################################################################################################################################
|
| 1317 |
# Main()
|
| 1318 |
#
|
|
|
|
| 1319 |
def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
|
| 1320 |
if input_path is None and args.user_interface:
|
| 1321 |
return []
|
|
|
|
| 45 |
#
|
| 46 |
#
|
| 47 |
# Usage:
|
|
|
|
|
|
|
| 48 |
#
|
| 49 |
+
# Download Audio only from URL -> Transcribe audio:
|
| 50 |
+
# python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
|
| 51 |
#
|
| 52 |
+
# Download Audio+Video from URL -> Transcribe audio from Video:**
|
| 53 |
+
# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
|
| 54 |
#
|
| 55 |
+
# Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
|
| 56 |
+
# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
|
| 57 |
+
#
|
| 58 |
+
# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
|
| 59 |
+
# python summarize.py ./local/file_on_your/system --api_name <API_name>`
|
| 60 |
+
#
|
| 61 |
+
# Run it as a WebApp**
|
| 62 |
+
# python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
|
| 63 |
+
# Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
|
| 64 |
#
|
| 65 |
###
|
| 66 |
|
|
|
|
| 349 |
# Video Download/Handling
|
| 350 |
#
|
| 351 |
|
| 352 |
+
def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False,custom_prompt=None, demo_mode=True):
|
| 353 |
if demo_mode:
|
| 354 |
api_name = "huggingface"
|
| 355 |
api_key = os.environ.get(HF_TOKEN)
|
|
|
|
| 792 |
}
|
| 793 |
|
| 794 |
logging.debug("openai: Preparing data + prompt for submittal")
|
| 795 |
+
openai_prompt = f"{text} \n\n\n\n{prompt_text}"
|
| 796 |
data = {
|
| 797 |
"model": model,
|
| 798 |
"messages": [
|
|
|
|
| 802 |
},
|
| 803 |
{
|
| 804 |
"role": "user",
|
| 805 |
+
"content": openai_prompt
|
| 806 |
}
|
| 807 |
],
|
| 808 |
"max_tokens": 4096, # Adjust tokens as needed
|
|
|
|
| 845 |
logging.debug("anthropic: Prepping data + prompt for submittal")
|
| 846 |
user_message = {
|
| 847 |
"role": "user",
|
| 848 |
+
"content": f"{text} \n\n\n\n{prompt_text}"
|
| 849 |
}
|
| 850 |
|
| 851 |
data = {
|
|
|
|
| 912 |
'Authorization': f'Bearer {api_key}'
|
| 913 |
}
|
| 914 |
|
| 915 |
+
cohere_prompt = f"{text} \n\n\n\n{prompt_text}"
|
| 916 |
data = {
|
| 917 |
"chat_history": [
|
| 918 |
+
{"role": "USER", "message": cohere_prompt}
|
| 919 |
],
|
| 920 |
"message": "Please provide a summary.",
|
| 921 |
"model": model,
|
|
|
|
| 963 |
'Content-Type': 'application/json'
|
| 964 |
}
|
| 965 |
|
| 966 |
+
groq_prompt = f"{text} \n\n\n\n{prompt_text}"
|
| 967 |
data = {
|
| 968 |
"messages": [
|
| 969 |
{
|
| 970 |
"role": "user",
|
| 971 |
+
"content": groq_prompt
|
| 972 |
}
|
| 973 |
],
|
| 974 |
"model": model
|
|
|
|
| 1020 |
headers['Authorization'] = f'Bearer {token}'
|
| 1021 |
|
| 1022 |
|
| 1023 |
+
llama_prompt = f"{text} \n\n\n\n{prompt_text}"
|
| 1024 |
+
logging.debug(f"llama: Complete prompt is: {llama_prompt}")
|
| 1025 |
data = {
|
| 1026 |
+
"prompt": llama_prompt
|
| 1027 |
}
|
| 1028 |
|
| 1029 |
+
#logging.debug(f"llama: Submitting request to API endpoint {llama_prompt}")
|
| 1030 |
print("llama: Submitting request to API endpoint")
|
| 1031 |
response = requests.post(api_url, headers=headers, json=data)
|
| 1032 |
response_data = response.json()
|
|
|
|
| 1064 |
'content-type': 'application/json',
|
| 1065 |
}
|
| 1066 |
# FIXME
|
| 1067 |
+
kobold_prompt = f"{text} \n\n\n\n{prompt_text}"
|
| 1068 |
+
logging.debug(kobold_prompt)
|
| 1069 |
# Values literally c/p from the api docs....
|
| 1070 |
data = {
|
| 1071 |
"max_context_length": 8096,
|
| 1072 |
"max_length": 4096,
|
| 1073 |
+
"prompt": kobold_prompt,
|
| 1074 |
}
|
| 1075 |
|
| 1076 |
logging.debug("kobold: Submitting request to API endpoint")
|
|
|
|
| 1114 |
'content-type': 'application/json',
|
| 1115 |
}
|
| 1116 |
|
| 1117 |
+
#prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
|
| 1118 |
+
#prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
|
| 1119 |
+
ooba_prompt = f"{text}\n\n\n\n{prompt_text}"
|
| 1120 |
|
| 1121 |
data = {
|
| 1122 |
"mode": "chat",
|
|
|
|
| 1268 |
gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
|
| 1269 |
gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
|
| 1270 |
gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
|
| 1271 |
+
gr.components.Textbox(label="Custom Prompt", value="Please provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points", lines=3),
|
| 1272 |
gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
|
| 1273 |
]
|
| 1274 |
|
|
|
|
| 1317 |
####################################################################################################################################
|
| 1318 |
# Main()
|
| 1319 |
#
|
| 1320 |
+
|
| 1321 |
def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
|
| 1322 |
if input_path is None and args.user_interface:
|
| 1323 |
return []
|