EngTexToASLGloss

Runtime error

App Files Files Community

Rachel Rakov commited on Aug 22, 2023

Commit

8eb9635

1 Parent(s): 5b162f6

Added app file and requirements

Browse files

Files changed (2) hide show

eng_to_aslGloss_app.py +159 -0
requirements.txt +5 -0

eng_to_aslGloss_app.py ADDED Viewed

	@@ -0,0 +1,159 @@

+from pathlib import Path
+import gradio as gr
+import openai
+import os
+import tiktoken
+# Set  openAI key
+HF_TOKEN = os.getenv("NextStar")
+openai.api_key = HF_TOKEN
+#Set prompt engineering paths (so globally available)
+inStructionPath = "intro_instructions_combine.txt"
+inRulesPath = "formatting_rules_expanded.txt"
+inExamplesPath = "examples_longer1.txt"
+inDialoguesPath = "examples_dialogues.txt"
+#Set to read in prompting files
+def openReadFiles(inpath):
+    infile = Path (inpath)
+    with open(infile) as f:
+        data = f.read()
+    return data
+# Set up prompting data (so globally available)
+instruct = openReadFiles(inStructionPath)
+rules = openReadFiles(inRulesPath)
+examples = openReadFiles(inExamplesPath)
+exampleDialogues = openReadFiles(inDialoguesPath)
+### In case we eventually want to upload files
+# def uploadText():
+#     '''In case you want to upload a .txt file to translate to ASL gloss'''
+#     readFile = input("Enter the file path of the .txt you'd like to translate to ASL gloss: ")
+#     inFile = open(readFile, "r")
+#     data = inFile.read()
+#     inFile.close()
+#     print(f"Your file {readFile} has been uploaded")
+#     return data
+def formatQuery(engText):
+    """Add prompt instructions to English text for GPT4"""
+    instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
+    query = instruct+engText
+    return query
+def num_tokens_from_string(string: str, encoding_name: str) -> int:
+    """Returns the number of tokens in a text string."""
+    encoding = tiktoken.get_encoding(encoding_name)
+    num_tokens = len(encoding.encode(string))
+    return num_tokens
+def checkTokens(tokens):
+    """Checks tokens to ensrue we can translate to ASL gloss"""
+    goAhead = None
+    if tokens >= 553:
+        print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}")
+        goAhead = False
+    else:
+        goAhead = True
+        print(f"Has less than 553 tokens - can continue translating")
+    return goAhead
+def getGlossFromText(query):
+    """Sets all for getting ASL gloss"""
+    text = formatQuery(query)
+    tokens = num_tokens_from_string(text, "cl100k_base")
+    goAhead = checkTokens(tokens)
+    if goAhead == True:
+        results = getASLGloss(text)
+    else:
+        results = "Too many tokens: cannot translate"
+    return results
+def getASLGloss(testQs):
+    """Get ASL gloss from OpenAI using our prompt engineering"""
+    completion = openai.ChatCompletion.create(
+      model = 'gpt-4',
+      messages = [
+          {"role": "system", "content": instruct},
+          {"role": "system", "content": rules},
+          {"role": "system", "content": examples},
+          {"role": "system", "content": exampleDialogues},
+          {"role": "user", "content": testQs},
+      ],
+      temperature = 0
+    )
+    results = completion['choices'][0]['message']['content']
+    return results
+def main():
+    title = "English to ASL Gloss"
+    description = """Translate English text to ASL Gloss"""
+    interface = gr.Interface(
+        fn=getGlossFromText,
+        inputs="textbox",
+        outputs="text",
+        title = title,
+        description = description,
+        examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
+                ["If I don't travel often, I am sad."]])
+    interface.launch()
+if __name__ == "__main__":
+    main()
+#     def getAnswer(query, texts = texts, embeddings = embeddings):
+#         docsearch = FAISS.from_texts(texts, embeddings)
+#         docs = docsearch.similarity_search(query)
+#         chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
+#         response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
+#             #interum_q = list(response.keys())
+#         interum_a = list(response.values())
+#         q = query
+#         a = interum_a[0]
+#         return a
+#     # query = "describe the fisher database"
+#     # docs = docsearch.similarity_search(query)
+#     # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
+#     # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
+#     title = "Query the S Drive!"
+#     description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
+#     interface = gr.Interface(
+#         fn=getAnswer,
+#         inputs="textbox",
+#         outputs="text",
+#         title = title,
+#         description = description,
+#         examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
+#                 ["How many audio files do we have in the CallHome database?"]])
+#     interface.launch()
+# if __name__ == "__main__":
+#     main()
+# def main():
+#     results = setMode()
+#     print (results)
+# main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openai
+os
+tiktoken
+pathlib
+gradio==3.26.0