EngTexToASLGloss / eng_to_aslGloss_app.py
Rachel Rakov
Added examples to page
dd1a64f
raw
history blame
5.42 kB
from pathlib import Path
import gradio as gr
import openai
import os
import tiktoken
# Set secret key
HF_TOKEN = os.getenv("NextStar")
#Set prompt engineering paths (so globally available)
inStructionPath = "intro_instructions_combine.txt"
inRulesPath = "formatting_rules_expanded.txt"
inExamplesPath = "examples_longer1.txt"
inDialoguesPath = "examples_dialogues.txt"
#Set to read in prompting files
def openReadFiles(inpath):
infile = Path (inpath)
with open(infile) as f:
data = f.read()
return data
# Set up prompting data (so globally available)
instruct = openReadFiles(inStructionPath)
rules = openReadFiles(inRulesPath)
examples = openReadFiles(inExamplesPath)
exampleDialogues = openReadFiles(inDialoguesPath)
def formatQuery(engText):
"""Add prompt instructions to English text for GPT4"""
instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
query = instruct+engText
return query
def num_tokens_from_string(string: str, encoding_name: str) -> int:
"""Returns the number of tokens in a text string."""
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
return num_tokens
def checkTokens(tokens):
"""Checks tokens to ensrue we can translate to ASL gloss"""
goAhead = None
if tokens >= 553:
print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}")
goAhead = False
else:
goAhead = True
print(f"Has less than 553 tokens - can continue translating")
return goAhead
def getGlossFromText(query):
"""Sets all for getting ASL gloss"""
text = formatQuery(query)
tokens = num_tokens_from_string(text, "cl100k_base")
goAhead = checkTokens(tokens)
if goAhead == True:
results = getASLGloss(text)
else:
results = "Too many tokens: cannot translate"
return results
def getASLGloss(testQs):
"""Get ASL gloss from OpenAI using our prompt engineering"""
openai.api_key = HF_TOKEN
completion = openai.ChatCompletion.create(
model = 'gpt-4',
messages = [
{"role": "system", "content": instruct},
{"role": "system", "content": rules},
{"role": "system", "content": examples},
{"role": "system", "content": exampleDialogues},
{"role": "user", "content": testQs},
],
temperature = 0
)
results = completion['choices'][0]['message']['content']
return results
def main():
title = "English to ASL Gloss"
#description = """Translate English text to ASL Gloss"""
description = "This program uses GPT4 to translate English text to \
ASL gloss. This program makes use of prompt engineering to first describe\
the task it wants GPT4 to complete (along with examples of translations\
of English text and it's ASL gloss translations), and then providing\
GPT4 with the English text to translate to ASL_gloss. Using these prompts,\
GPT4 translates the input text into ASL gloss."
interface = gr.Interface(
fn=getGlossFromText,
inputs="textbox",
outputs="text",
title = title,
description = description,
examples = [[("Every year I buy my dad a gift", "EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]])
# examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
# ["If I don't travel often, I am sad."]])
interface.launch()
if __name__ == "__main__":
main()
# def getAnswer(query, texts = texts, embeddings = embeddings):
# docsearch = FAISS.from_texts(texts, embeddings)
# docs = docsearch.similarity_search(query)
# chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
# response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# #interum_q = list(response.keys())
# interum_a = list(response.values())
# q = query
# a = interum_a[0]
# return a
# # query = "describe the fisher database"
# # docs = docsearch.similarity_search(query)
# # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
# # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# title = "Query the S Drive!"
# description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
# interface = gr.Interface(
# fn=getAnswer,
# inputs="textbox",
# outputs="text",
# title = title,
# description = description,
# examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
# ["How many audio files do we have in the CallHome database?"]])
# interface.launch()
# if __name__ == "__main__":
# main()
# def main():
# results = setMode()
# print (results)
# main()