Spaces:
Runtime error
Runtime error
Commit
·
fb4483d
1
Parent(s):
59e1d08
remove secrets
Browse files- .gitignore +2 -1
- .vscode/PythonImportHelper-v2-Completion.json +2 -2
- main/diarization.py +1 -3
- main/summary.py +2 -4
- main/transcribe.py +0 -1
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
__pycache__
|
|
|
|
1 |
+
__pycache__
|
2 |
+
.vscode
|
.vscode/PythonImportHelper-v2-Completion.json
CHANGED
@@ -487,7 +487,7 @@
|
|
487 |
"kind": 5,
|
488 |
"importPath": "main.diarization",
|
489 |
"description": "main.diarization",
|
490 |
-
"peekOfCode": "hugging_face_token = \"
|
491 |
"detail": "main.diarization",
|
492 |
"documentation": {}
|
493 |
},
|
@@ -577,7 +577,7 @@
|
|
577 |
"kind": 5,
|
578 |
"importPath": "main.summary",
|
579 |
"description": "main.summary",
|
580 |
-
"peekOfCode": "openai.api_key = \"
|
581 |
"detail": "main.summary",
|
582 |
"documentation": {}
|
583 |
},
|
|
|
487 |
"kind": 5,
|
488 |
"importPath": "main.diarization",
|
489 |
"description": "main.diarization",
|
490 |
+
"peekOfCode": "hugging_face_token = os.environ[\"HUGGING_FACE_TOKEN\"]\npipeline = Pipeline.from_pretrained(\n 'pyannote/speaker-diarization', use_auth_token=hugging_face_token)\nuse_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\npipeline.to(use_device)\ndef start_diarization(input_file):\n diarization = pipeline(input_file)\n sample_groups = []\n speaker_groups = {}\n for turn, _, speaker in diarization.itertracks(yield_label=True):",
|
491 |
"detail": "main.diarization",
|
492 |
"documentation": {}
|
493 |
},
|
|
|
577 |
"kind": 5,
|
578 |
"importPath": "main.summary",
|
579 |
"description": "main.summary",
|
580 |
+
"peekOfCode": "openai.api_key = os.environ[\"OPEN_AI_KEY\"]\ndef get_summary(lang_choice: int):\n transcribe_list = text_file_tool.read_simple_transcribe_file()\n transcribe = \"\\n\".join(transcribe_list)\n encoding = tiktoken.get_encoding(\"cl100k_base\")\n token_num = len(encoding.encode(transcribe))\n print(f\"Token number is {token_num}\")\n language = [\"English\", \"Bahasa Indonesia\", \"Any\"]\n result = openai.ChatCompletion.create(\n model=\"gpt-3.5-turbo\" if token_num < 4097 else \"gpt-3.5-turbo-16k\",",
|
581 |
"detail": "main.summary",
|
582 |
"documentation": {}
|
583 |
},
|
main/diarization.py
CHANGED
@@ -9,9 +9,7 @@ import os
|
|
9 |
gc.collect()
|
10 |
torch.cuda.empty_cache()
|
11 |
|
12 |
-
|
13 |
-
# hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
|
14 |
-
hugging_face_token = "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"
|
15 |
pipeline = Pipeline.from_pretrained(
|
16 |
'pyannote/speaker-diarization', use_auth_token=hugging_face_token)
|
17 |
use_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
9 |
gc.collect()
|
10 |
torch.cuda.empty_cache()
|
11 |
|
12 |
+
hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
|
|
|
|
|
13 |
pipeline = Pipeline.from_pretrained(
|
14 |
'pyannote/speaker-diarization', use_auth_token=hugging_face_token)
|
15 |
use_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
main/summary.py
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
import openai
|
2 |
import tiktoken
|
3 |
import tool.text_file_tool as text_file_tool
|
|
|
4 |
|
5 |
-
|
6 |
-
# FIXME REMEMBER TO DELETE ENVIRON BEFORE COMMIT
|
7 |
-
# openai.api_key = os.environ["OPEN_AI_KEY"]
|
8 |
-
openai.api_key = "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"
|
9 |
|
10 |
|
11 |
def get_summary(lang_choice: int):
|
|
|
1 |
import openai
|
2 |
import tiktoken
|
3 |
import tool.text_file_tool as text_file_tool
|
4 |
+
import os
|
5 |
|
6 |
+
openai.api_key = os.environ["OPEN_AI_KEY"]
|
|
|
|
|
|
|
7 |
|
8 |
|
9 |
def get_summary(lang_choice: int):
|
main/transcribe.py
CHANGED
@@ -42,7 +42,6 @@ def start_transcribe(lang_choice: int, model_size_choice: int, progress):
|
|
42 |
segment_txt = segment.text
|
43 |
speaker_txt_list.append(segment_txt)
|
44 |
|
45 |
-
# TODO CHECK FOR TRIPLE QUOTES
|
46 |
subtitle = f"{len(subtitle_txt_list) + 1}\n{start} --> {end}\n[{name}] {segment_txt}\n\n"
|
47 |
subtitle_txt_list.append(subtitle)
|
48 |
|
|
|
42 |
segment_txt = segment.text
|
43 |
speaker_txt_list.append(segment_txt)
|
44 |
|
|
|
45 |
subtitle = f"{len(subtitle_txt_list) + 1}\n{start} --> {end}\n[{name}] {segment_txt}\n\n"
|
46 |
subtitle_txt_list.append(subtitle)
|
47 |
|