from pathlib import Path import gradio as gr import openai import os import tiktoken # Set secret key HF_TOKEN = os.getenv("NextStar") #Set prompt engineering paths (so globally available) inStructionPath = "intro_instructions_combine.txt" inRulesPath = "formatting_rules_expanded.txt" inExamplesPath = "examples_longer1.txt" inDialoguesPath = "examples_dialogues.txt" #Set to read in prompting files def openReadFiles(inpath): infile = Path (inpath) with open(infile) as f: data = f.read() return data # Set up prompting data (so globally available) instruct = openReadFiles(inStructionPath) rules = openReadFiles(inRulesPath) examples = openReadFiles(inExamplesPath) exampleDialogues = openReadFiles(inDialoguesPath) def formatQuery(engText): """Add prompt instructions to English text for GPT4""" instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n" query = instruct+engText return query def num_tokens_from_string(string: str, encoding_name: str) -> int: """Returns the number of tokens in a text string.""" encoding = tiktoken.get_encoding(encoding_name) num_tokens = len(encoding.encode(string)) return num_tokens def checkTokens(tokens): """Checks tokens to ensrue we can translate to ASL gloss""" goAhead = None if tokens >= 553: print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}") goAhead = False else: goAhead = True print(f"Has less than 553 tokens - can continue translating") return goAhead def getGlossFromText(query): """Sets all for getting ASL gloss""" text = formatQuery(query) tokens = num_tokens_from_string(text, "cl100k_base") goAhead = checkTokens(tokens) if goAhead == True: results = getASLGloss(text) else: results = "Too many tokens: cannot translate" return results def getASLGloss(testQs): """Get ASL gloss from OpenAI using our prompt engineering""" openai.api_key = HF_TOKEN completion = openai.ChatCompletion.create( model = 'gpt-4', messages = [ {"role": "system", "content": instruct}, {"role": "system", "content": rules}, {"role": "system", "content": examples}, {"role": "system", "content": exampleDialogues}, {"role": "user", "content": testQs}, ], temperature = 0 ) results = completion['choices'][0]['message']['content'] return results def main(): title = "English to ASL Gloss" #description = """Translate English text to ASL Gloss""" description = "This program uses GPT4 to translate English text to \ ASL gloss. This program makes use of prompt engineering to first describe\ the task it wants GPT4 to complete (along with examples of translations\ of English text and it's ASL gloss translations), and then providing\ GPT4 with the English text to translate to ASL_gloss. Using these prompts,\ GPT4 translates the input text into ASL gloss." interface = gr.Interface( fn=getGlossFromText, inputs="textbox", outputs="text", title = title, description = description, examples = [[("Every year I buy my dad a gift", "EVERY-YEAR, MY DAD GIFT, ME BUY")]]) # examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"], # ["If I don't travel often, I am sad."]]) interface.launch() if __name__ == "__main__": main() # def getAnswer(query, texts = texts, embeddings = embeddings): # docsearch = FAISS.from_texts(texts, embeddings) # docs = docsearch.similarity_search(query) # chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False) # response = chain({"input_documents": docs, "question": query}, return_only_outputs=True) # #interum_q = list(response.keys()) # interum_a = list(response.values()) # q = query # a = interum_a[0] # return a # # query = "describe the fisher database" # # docs = docsearch.similarity_search(query) # # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False) # # chain({"input_documents": docs, "question": query}, return_only_outputs=True) # title = "Query the S Drive!" # description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)""" # interface = gr.Interface( # fn=getAnswer, # inputs="textbox", # outputs="text", # title = title, # description = description, # examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"], # ["How many audio files do we have in the CallHome database?"]]) # interface.launch() # if __name__ == "__main__": # main() # def main(): # results = setMode() # print (results) # main()