Spaces:
Runtime error
Runtime error
from pathlib import Path | |
import gradio as gr | |
import openai | |
import os | |
import tiktoken | |
# Set secret key | |
HF_TOKEN = os.getenv("NextStar") | |
#Set prompt engineering paths (so globally available) | |
inStructionPath = "intro_instructions_combine.txt" | |
inRulesPath = "formatting_rules_expanded.txt" | |
inExamplesPath = "examples_longer1.txt" | |
inDialoguesPath = "examples_dialogues.txt" | |
#Set to read in prompting files | |
def openReadFiles(inpath): | |
infile = Path (inpath) | |
with open(infile) as f: | |
data = f.read() | |
return data | |
# Set up prompting data (so globally available) | |
instruct = openReadFiles(inStructionPath) | |
rules = openReadFiles(inRulesPath) | |
examples = openReadFiles(inExamplesPath) | |
exampleDialogues = openReadFiles(inDialoguesPath) | |
def formatQuery(engText): | |
"""Add prompt instructions to English text for GPT4""" | |
instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n" | |
query = instruct+engText | |
return query | |
def num_tokens_from_string(string: str, encoding_name: str) -> int: | |
"""Returns the number of tokens in a text string.""" | |
encoding = tiktoken.get_encoding(encoding_name) | |
num_tokens = len(encoding.encode(string)) | |
return num_tokens | |
def checkTokens(tokens): | |
"""Checks tokens to ensrue we can translate to ASL gloss""" | |
goAhead = None | |
if tokens >= 553: | |
print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}") | |
goAhead = False | |
else: | |
goAhead = True | |
print(f"Has less than 553 tokens - can continue translating") | |
return goAhead | |
def getGlossFromText(query): | |
"""Sets all for getting ASL gloss""" | |
text = formatQuery(query) | |
tokens = num_tokens_from_string(text, "cl100k_base") | |
goAhead = checkTokens(tokens) | |
if goAhead == True: | |
results = getASLGloss(text) | |
else: | |
results = "Too many tokens: cannot translate" | |
return results | |
def getASLGloss(testQs): | |
"""Get ASL gloss from OpenAI using our prompt engineering""" | |
openai.api_key = HF_TOKEN | |
completion = openai.ChatCompletion.create( | |
model = 'gpt-4', | |
messages = [ | |
{"role": "system", "content": instruct}, | |
{"role": "system", "content": rules}, | |
{"role": "system", "content": examples}, | |
{"role": "system", "content": exampleDialogues}, | |
{"role": "user", "content": testQs}, | |
], | |
temperature = 0 | |
) | |
results = completion['choices'][0]['message']['content'] | |
return results | |
def main(): | |
title = "English to ASL Gloss" | |
#description = """Translate English text to ASL Gloss""" | |
description = "This program uses GPT4 alongside prompt engineering to \ | |
translate English text to ASL gloss.\n \ | |
<b>Type in the English sentence you would like to translate into ASL Gloss.</b> \ | |
\n \n This version of EngToASLGloss contains superscript notation which adds \ | |
grammatical context to assist in ASL generation. \ | |
\n Below are the guidelines we are using to express grammatical concepts \ | |
in ASL gloss.\ | |
Anything within the angle brackets < > indicates this additional grammatical notation.\ | |
If the angle brackets are directly next to a word, the notation inside \ | |
the angle brackets is associate with just that word, e.g. WILL < A >. \ | |
If the angle brackets are next to a whitespace after a word,\ | |
the notation inside the angle bracket is associated with all of the words\ | |
before it, up until a comma, another angle bracket, or a double space.\ | |
\n \n This sentence is an example of this rule:\ | |
\n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\ | |
\n\r \ | |
\n The superscript notation options that will appear in results are as follows:\ | |
\n Ti marks time\ | |
\n T marks topic\ | |
\n A marks comment\ | |
\n Y/N marks yes-no question\ | |
\n WHQ marks wh-question\ | |
\n RHQ marks rhetorical question\ | |
\n < Cond > marks conditional sentences\ | |
\n lower case marks directional verbs\ | |
\n ++ marks emphesis ('very' or 'a lot of')\ | |
\n \# marks lexical fingerspelling \ | |
\n \- marks space between individual letters of fingerspelling\ | |
\n \n <b>Note: This is a prototype and is still in development. \ | |
Do not use it in a production deployment.</b> \ | |
\n For additional details on how the program works, please see \ | |
[the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)" | |
interface = gr.Interface( | |
fn=getGlossFromText, | |
inputs="textbox", | |
outputs="text", | |
title = title, | |
description = description) | |
#examples = [[("Prompt: Every year I buy my dad a gift \n", "Result: EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]]) | |
# examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"], | |
# ["If I don't travel often, I am sad."]]) | |
interface.launch() | |
if __name__ == "__main__": | |
main() | |
# def getAnswer(query, texts = texts, embeddings = embeddings): | |
# docsearch = FAISS.from_texts(texts, embeddings) | |
# docs = docsearch.similarity_search(query) | |
# chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False) | |
# response = chain({"input_documents": docs, "question": query}, return_only_outputs=True) | |
# #interum_q = list(response.keys()) | |
# interum_a = list(response.values()) | |
# q = query | |
# a = interum_a[0] | |
# return a | |
# # query = "describe the fisher database" | |
# # docs = docsearch.similarity_search(query) | |
# # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False) | |
# # chain({"input_documents": docs, "question": query}, return_only_outputs=True) | |
# title = "Query the S Drive!" | |
# description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)""" | |
# interface = gr.Interface( | |
# fn=getAnswer, | |
# inputs="textbox", | |
# outputs="text", | |
# title = title, | |
# description = description, | |
# examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"], | |
# ["How many audio files do we have in the CallHome database?"]]) | |
# interface.launch() | |
# if __name__ == "__main__": | |
# main() | |
# def main(): | |
# results = setMode() | |
# print (results) | |
# main() | |