Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -2089,7 +2089,7 @@ def get_chat_completion(messages, model='gpt-4-turbo'):
|
|
2089 |
# This function chunks a text into smaller pieces based on a maximum token count and a delimiter
|
2090 |
def chunk_on_delimiter(input_string: str,
|
2091 |
max_tokens: int,
|
2092 |
-
delimiter: str) ->
|
2093 |
chunks = input_string.split(delimiter)
|
2094 |
combined_chunks, _, dropped_chunk_count = combine_chunks_with_no_minimum(
|
2095 |
chunks, max_tokens, chunk_delimiter=delimiter, add_ellipsis_for_overflow=True)
|
@@ -4727,7 +4727,7 @@ Sample commands:
|
|
4727 |
|
4728 |
######### Words-per-second Chunking #########
|
4729 |
# FIXME - WHole section needs to be re-written
|
4730 |
-
def chunk_transcript(transcript: str, chunk_duration: int, words_per_second) ->
|
4731 |
words = transcript.split()
|
4732 |
words_per_chunk = chunk_duration * words_per_second
|
4733 |
chunks = [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
|
@@ -5105,7 +5105,7 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
|
|
5105 |
raise DatabaseError(f"Unexpected error: {e}")
|
5106 |
|
5107 |
|
5108 |
-
def fetch_all_keywords() ->
|
5109 |
try:
|
5110 |
with db.get_connection() as conn:
|
5111 |
cursor = conn.cursor()
|
@@ -6329,7 +6329,7 @@ def download_ffmpeg():
|
|
6329 |
#
|
6330 |
#######################################################################################################################
|
6331 |
import tiktoken
|
6332 |
-
def openai_tokenize(text: str) ->
|
6333 |
encoding = tiktoken.encoding_for_model('gpt-4-turbo')
|
6334 |
return encoding.encode(text)
|
6335 |
|
|
|
2089 |
# This function chunks a text into smaller pieces based on a maximum token count and a delimiter
|
2090 |
def chunk_on_delimiter(input_string: str,
|
2091 |
max_tokens: int,
|
2092 |
+
delimiter: str) -> list[str]:
|
2093 |
chunks = input_string.split(delimiter)
|
2094 |
combined_chunks, _, dropped_chunk_count = combine_chunks_with_no_minimum(
|
2095 |
chunks, max_tokens, chunk_delimiter=delimiter, add_ellipsis_for_overflow=True)
|
|
|
4727 |
|
4728 |
######### Words-per-second Chunking #########
|
4729 |
# FIXME - WHole section needs to be re-written
|
4730 |
+
def chunk_transcript(transcript: str, chunk_duration: int, words_per_second) -> list[str]:
|
4731 |
words = transcript.split()
|
4732 |
words_per_chunk = chunk_duration * words_per_second
|
4733 |
chunks = [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
|
|
|
5105 |
raise DatabaseError(f"Unexpected error: {e}")
|
5106 |
|
5107 |
|
5108 |
+
def fetch_all_keywords() -> list[str]:
|
5109 |
try:
|
5110 |
with db.get_connection() as conn:
|
5111 |
cursor = conn.cursor()
|
|
|
6329 |
#
|
6330 |
#######################################################################################################################
|
6331 |
import tiktoken
|
6332 |
+
def openai_tokenize(text: str) -> list[str]:
|
6333 |
encoding = tiktoken.encoding_for_model('gpt-4-turbo')
|
6334 |
return encoding.encode(text)
|
6335 |
|