Spaces:
Runtime error
Runtime error
Rachel Rakov
commited on
Commit
·
8eb9635
1
Parent(s):
5b162f6
Added app file and requirements
Browse files- eng_to_aslGloss_app.py +159 -0
- requirements.txt +5 -0
eng_to_aslGloss_app.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import gradio as gr
|
3 |
+
import openai
|
4 |
+
import os
|
5 |
+
import tiktoken
|
6 |
+
|
7 |
+
# Set openAI key
|
8 |
+
HF_TOKEN = os.getenv("NextStar")
|
9 |
+
openai.api_key = HF_TOKEN
|
10 |
+
|
11 |
+
#Set prompt engineering paths (so globally available)
|
12 |
+
inStructionPath = "intro_instructions_combine.txt"
|
13 |
+
inRulesPath = "formatting_rules_expanded.txt"
|
14 |
+
inExamplesPath = "examples_longer1.txt"
|
15 |
+
inDialoguesPath = "examples_dialogues.txt"
|
16 |
+
|
17 |
+
#Set to read in prompting files
|
18 |
+
def openReadFiles(inpath):
|
19 |
+
infile = Path (inpath)
|
20 |
+
with open(infile) as f:
|
21 |
+
data = f.read()
|
22 |
+
return data
|
23 |
+
|
24 |
+
|
25 |
+
# Set up prompting data (so globally available)
|
26 |
+
instruct = openReadFiles(inStructionPath)
|
27 |
+
rules = openReadFiles(inRulesPath)
|
28 |
+
examples = openReadFiles(inExamplesPath)
|
29 |
+
exampleDialogues = openReadFiles(inDialoguesPath)
|
30 |
+
|
31 |
+
### In case we eventually want to upload files
|
32 |
+
# def uploadText():
|
33 |
+
# '''In case you want to upload a .txt file to translate to ASL gloss'''
|
34 |
+
# readFile = input("Enter the file path of the .txt you'd like to translate to ASL gloss: ")
|
35 |
+
# inFile = open(readFile, "r")
|
36 |
+
# data = inFile.read()
|
37 |
+
# inFile.close()
|
38 |
+
# print(f"Your file {readFile} has been uploaded")
|
39 |
+
# return data
|
40 |
+
|
41 |
+
def formatQuery(engText):
|
42 |
+
"""Add prompt instructions to English text for GPT4"""
|
43 |
+
instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
|
44 |
+
query = instruct+engText
|
45 |
+
return query
|
46 |
+
|
47 |
+
|
48 |
+
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
49 |
+
"""Returns the number of tokens in a text string."""
|
50 |
+
encoding = tiktoken.get_encoding(encoding_name)
|
51 |
+
num_tokens = len(encoding.encode(string))
|
52 |
+
return num_tokens
|
53 |
+
|
54 |
+
|
55 |
+
def checkTokens(tokens):
|
56 |
+
"""Checks tokens to ensrue we can translate to ASL gloss"""
|
57 |
+
goAhead = None
|
58 |
+
if tokens >= 553:
|
59 |
+
print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}")
|
60 |
+
goAhead = False
|
61 |
+
else:
|
62 |
+
goAhead = True
|
63 |
+
print(f"Has less than 553 tokens - can continue translating")
|
64 |
+
return goAhead
|
65 |
+
|
66 |
+
def getGlossFromText(query):
|
67 |
+
"""Sets all for getting ASL gloss"""
|
68 |
+
text = formatQuery(query)
|
69 |
+
tokens = num_tokens_from_string(text, "cl100k_base")
|
70 |
+
goAhead = checkTokens(tokens)
|
71 |
+
if goAhead == True:
|
72 |
+
results = getASLGloss(text)
|
73 |
+
else:
|
74 |
+
results = "Too many tokens: cannot translate"
|
75 |
+
return results
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
def getASLGloss(testQs):
|
80 |
+
"""Get ASL gloss from OpenAI using our prompt engineering"""
|
81 |
+
|
82 |
+
completion = openai.ChatCompletion.create(
|
83 |
+
model = 'gpt-4',
|
84 |
+
messages = [
|
85 |
+
{"role": "system", "content": instruct},
|
86 |
+
{"role": "system", "content": rules},
|
87 |
+
{"role": "system", "content": examples},
|
88 |
+
{"role": "system", "content": exampleDialogues},
|
89 |
+
{"role": "user", "content": testQs},
|
90 |
+
],
|
91 |
+
|
92 |
+
temperature = 0
|
93 |
+
)
|
94 |
+
results = completion['choices'][0]['message']['content']
|
95 |
+
return results
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
def main():
|
101 |
+
|
102 |
+
title = "English to ASL Gloss"
|
103 |
+
description = """Translate English text to ASL Gloss"""
|
104 |
+
|
105 |
+
interface = gr.Interface(
|
106 |
+
fn=getGlossFromText,
|
107 |
+
inputs="textbox",
|
108 |
+
outputs="text",
|
109 |
+
title = title,
|
110 |
+
description = description,
|
111 |
+
examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
|
112 |
+
["If I don't travel often, I am sad."]])
|
113 |
+
interface.launch()
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
if __name__ == "__main__":
|
118 |
+
main()
|
119 |
+
|
120 |
+
|
121 |
+
# def getAnswer(query, texts = texts, embeddings = embeddings):
|
122 |
+
# docsearch = FAISS.from_texts(texts, embeddings)
|
123 |
+
# docs = docsearch.similarity_search(query)
|
124 |
+
# chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
|
125 |
+
# response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
126 |
+
# #interum_q = list(response.keys())
|
127 |
+
# interum_a = list(response.values())
|
128 |
+
# q = query
|
129 |
+
# a = interum_a[0]
|
130 |
+
# return a
|
131 |
+
|
132 |
+
# # query = "describe the fisher database"
|
133 |
+
# # docs = docsearch.similarity_search(query)
|
134 |
+
# # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
|
135 |
+
# # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
136 |
+
# title = "Query the S Drive!"
|
137 |
+
# description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
|
138 |
+
|
139 |
+
# interface = gr.Interface(
|
140 |
+
# fn=getAnswer,
|
141 |
+
# inputs="textbox",
|
142 |
+
# outputs="text",
|
143 |
+
# title = title,
|
144 |
+
# description = description,
|
145 |
+
# examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
|
146 |
+
# ["How many audio files do we have in the CallHome database?"]])
|
147 |
+
# interface.launch()
|
148 |
+
|
149 |
+
|
150 |
+
|
151 |
+
# if __name__ == "__main__":
|
152 |
+
# main()
|
153 |
+
|
154 |
+
# def main():
|
155 |
+
# results = setMode()
|
156 |
+
# print (results)
|
157 |
+
# main()
|
158 |
+
|
159 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
os
|
3 |
+
tiktoken
|
4 |
+
pathlib
|
5 |
+
gradio==3.26.0
|