from pathlib import Path import gradio as gr import openai import os import tiktoken # Set openAI key HF_TOKEN = os.getenv("NextStar") #Set prompt engineering paths (so globally available) inStructionPath = "intro_instructions_combine.txt" inRulesPath = "formatting_rules_expanded.txt" inExamplesPath = "examples_longer1.txt" inDialoguesPath = "examples_dialogues.txt" #Set to read in prompting files def openReadFiles(inpath): infile = Path (inpath) with open(infile) as f: data = f.read() return data # Set up prompting data (so globally available) instruct = openReadFiles(inStructionPath) rules = openReadFiles(inRulesPath) examples = openReadFiles(inExamplesPath) exampleDialogues = openReadFiles(inDialoguesPath) ### In case we eventually want to upload files # def uploadText(): # '''In case you want to upload a .txt file to translate to ASL gloss''' # readFile = input("Enter the file path of the .txt you'd like to translate to ASL gloss: ") # inFile = open(readFile, "r") # data = inFile.read() # inFile.close() # print(f"Your file {readFile} has been uploaded") # return data def formatQuery(engText): """Add prompt instructions to English text for GPT4""" instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n" query = instruct+engText return query def num_tokens_from_string(string: str, encoding_name: str) -> int: """Returns the number of tokens in a text string.""" encoding = tiktoken.get_encoding(encoding_name) num_tokens = len(encoding.encode(string)) return num_tokens def checkTokens(tokens): """Checks tokens to ensrue we can translate to ASL gloss""" goAhead = None if tokens >= 553: print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}") goAhead = False else: goAhead = True print(f"Has less than 553 tokens - can continue translating") return goAhead def getGlossFromText(query): """Sets all for getting ASL gloss""" text = formatQuery(query) tokens = num_tokens_from_string(text, "cl100k_base") goAhead = checkTokens(tokens) if goAhead == True: results = getASLGloss(text) else: results = "Too many tokens: cannot translate" return results def getASLGloss(testQs): """Get ASL gloss from OpenAI using our prompt engineering""" openai.api_key = HF_TOKEN completion = openai.ChatCompletion.create( model = 'gpt-4', messages = [ {"role": "system", "content": instruct}, {"role": "system", "content": rules}, {"role": "system", "content": examples}, {"role": "system", "content": exampleDialogues}, {"role": "user", "content": testQs}, ], temperature = 0 ) results = completion['choices'][0]['message']['content'] return results def main(): title = "English to ASL Gloss" description = """Translate English text to ASL Gloss""" interface = gr.Interface( fn=getGlossFromText, inputs="textbox", outputs="text", title = title, description = description, examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"], ["If I don't travel often, I am sad."]]) interface.launch() if __name__ == "__main__": main() # def getAnswer(query, texts = texts, embeddings = embeddings): # docsearch = FAISS.from_texts(texts, embeddings) # docs = docsearch.similarity_search(query) # chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False) # response = chain({"input_documents": docs, "question": query}, return_only_outputs=True) # #interum_q = list(response.keys()) # interum_a = list(response.values()) # q = query # a = interum_a[0] # return a # # query = "describe the fisher database" # # docs = docsearch.similarity_search(query) # # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False) # # chain({"input_documents": docs, "question": query}, return_only_outputs=True) # title = "Query the S Drive!" # description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)""" # interface = gr.Interface( # fn=getAnswer, # inputs="textbox", # outputs="text", # title = title, # description = description, # examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"], # ["How many audio files do we have in the CallHome database?"]]) # interface.launch() # if __name__ == "__main__": # main() # def main(): # results = setMode() # print (results) # main()