Spaces:
Runtime error
Runtime error
from pathlib import Path | |
import gradio as gr | |
import openai | |
import os | |
import tiktoken | |
# Set openAI key | |
HF_TOKEN = os.getenv("NextStar") | |
openai.api_key = HF_TOKEN | |
#Set prompt engineering paths (so globally available) | |
inStructionPath = "intro_instructions_combine.txt" | |
inRulesPath = "formatting_rules_expanded.txt" | |
inExamplesPath = "examples_longer1.txt" | |
inDialoguesPath = "examples_dialogues.txt" | |
#Set to read in prompting files | |
def openReadFiles(inpath): | |
infile = Path (inpath) | |
with open(infile) as f: | |
data = f.read() | |
return data | |
# Set up prompting data (so globally available) | |
instruct = openReadFiles(inStructionPath) | |
rules = openReadFiles(inRulesPath) | |
examples = openReadFiles(inExamplesPath) | |
exampleDialogues = openReadFiles(inDialoguesPath) | |
### In case we eventually want to upload files | |
# def uploadText(): | |
# '''In case you want to upload a .txt file to translate to ASL gloss''' | |
# readFile = input("Enter the file path of the .txt you'd like to translate to ASL gloss: ") | |
# inFile = open(readFile, "r") | |
# data = inFile.read() | |
# inFile.close() | |
# print(f"Your file {readFile} has been uploaded") | |
# return data | |
def formatQuery(engText): | |
"""Add prompt instructions to English text for GPT4""" | |
instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n" | |
query = instruct+engText | |
return query | |
def num_tokens_from_string(string: str, encoding_name: str) -> int: | |
"""Returns the number of tokens in a text string.""" | |
encoding = tiktoken.get_encoding(encoding_name) | |
num_tokens = len(encoding.encode(string)) | |
return num_tokens | |
def checkTokens(tokens): | |
"""Checks tokens to ensrue we can translate to ASL gloss""" | |
goAhead = None | |
if tokens >= 553: | |
print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}") | |
goAhead = False | |
else: | |
goAhead = True | |
print(f"Has less than 553 tokens - can continue translating") | |
return goAhead | |
def getGlossFromText(query): | |
"""Sets all for getting ASL gloss""" | |
text = formatQuery(query) | |
tokens = num_tokens_from_string(text, "cl100k_base") | |
goAhead = checkTokens(tokens) | |
if goAhead == True: | |
results = getASLGloss(text) | |
else: | |
results = "Too many tokens: cannot translate" | |
return results | |
def getASLGloss(testQs): | |
"""Get ASL gloss from OpenAI using our prompt engineering""" | |
completion = openai.ChatCompletion.create( | |
model = 'gpt-4', | |
messages = [ | |
{"role": "system", "content": instruct}, | |
{"role": "system", "content": rules}, | |
{"role": "system", "content": examples}, | |
{"role": "system", "content": exampleDialogues}, | |
{"role": "user", "content": testQs}, | |
], | |
temperature = 0 | |
) | |
results = completion['choices'][0]['message']['content'] | |
return results | |
def main(): | |
title = "English to ASL Gloss" | |
description = """Translate English text to ASL Gloss""" | |
interface = gr.Interface( | |
fn=getGlossFromText, | |
inputs="textbox", | |
outputs="text", | |
title = title, | |
description = description, | |
examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"], | |
["If I don't travel often, I am sad."]]) | |
interface.launch() | |
if __name__ == "__main__": | |
main() | |
# def getAnswer(query, texts = texts, embeddings = embeddings): | |
# docsearch = FAISS.from_texts(texts, embeddings) | |
# docs = docsearch.similarity_search(query) | |
# chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False) | |
# response = chain({"input_documents": docs, "question": query}, return_only_outputs=True) | |
# #interum_q = list(response.keys()) | |
# interum_a = list(response.values()) | |
# q = query | |
# a = interum_a[0] | |
# return a | |
# # query = "describe the fisher database" | |
# # docs = docsearch.similarity_search(query) | |
# # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False) | |
# # chain({"input_documents": docs, "question": query}, return_only_outputs=True) | |
# title = "Query the S Drive!" | |
# description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)""" | |
# interface = gr.Interface( | |
# fn=getAnswer, | |
# inputs="textbox", | |
# outputs="text", | |
# title = title, | |
# description = description, | |
# examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"], | |
# ["How many audio files do we have in the CallHome database?"]]) | |
# interface.launch() | |
# if __name__ == "__main__": | |
# main() | |
# def main(): | |
# results = setMode() | |
# print (results) | |
# main() | |