Rachel Rakov commited on
Commit
8eb9635
·
1 Parent(s): 5b162f6

Added app file and requirements

Browse files
Files changed (2) hide show
  1. eng_to_aslGloss_app.py +159 -0
  2. requirements.txt +5 -0
eng_to_aslGloss_app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import gradio as gr
3
+ import openai
4
+ import os
5
+ import tiktoken
6
+
7
+ # Set openAI key
8
+ HF_TOKEN = os.getenv("NextStar")
9
+ openai.api_key = HF_TOKEN
10
+
11
+ #Set prompt engineering paths (so globally available)
12
+ inStructionPath = "intro_instructions_combine.txt"
13
+ inRulesPath = "formatting_rules_expanded.txt"
14
+ inExamplesPath = "examples_longer1.txt"
15
+ inDialoguesPath = "examples_dialogues.txt"
16
+
17
+ #Set to read in prompting files
18
+ def openReadFiles(inpath):
19
+ infile = Path (inpath)
20
+ with open(infile) as f:
21
+ data = f.read()
22
+ return data
23
+
24
+
25
+ # Set up prompting data (so globally available)
26
+ instruct = openReadFiles(inStructionPath)
27
+ rules = openReadFiles(inRulesPath)
28
+ examples = openReadFiles(inExamplesPath)
29
+ exampleDialogues = openReadFiles(inDialoguesPath)
30
+
31
+ ### In case we eventually want to upload files
32
+ # def uploadText():
33
+ # '''In case you want to upload a .txt file to translate to ASL gloss'''
34
+ # readFile = input("Enter the file path of the .txt you'd like to translate to ASL gloss: ")
35
+ # inFile = open(readFile, "r")
36
+ # data = inFile.read()
37
+ # inFile.close()
38
+ # print(f"Your file {readFile} has been uploaded")
39
+ # return data
40
+
41
+ def formatQuery(engText):
42
+ """Add prompt instructions to English text for GPT4"""
43
+ instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
44
+ query = instruct+engText
45
+ return query
46
+
47
+
48
+ def num_tokens_from_string(string: str, encoding_name: str) -> int:
49
+ """Returns the number of tokens in a text string."""
50
+ encoding = tiktoken.get_encoding(encoding_name)
51
+ num_tokens = len(encoding.encode(string))
52
+ return num_tokens
53
+
54
+
55
+ def checkTokens(tokens):
56
+ """Checks tokens to ensrue we can translate to ASL gloss"""
57
+ goAhead = None
58
+ if tokens >= 553:
59
+ print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}")
60
+ goAhead = False
61
+ else:
62
+ goAhead = True
63
+ print(f"Has less than 553 tokens - can continue translating")
64
+ return goAhead
65
+
66
+ def getGlossFromText(query):
67
+ """Sets all for getting ASL gloss"""
68
+ text = formatQuery(query)
69
+ tokens = num_tokens_from_string(text, "cl100k_base")
70
+ goAhead = checkTokens(tokens)
71
+ if goAhead == True:
72
+ results = getASLGloss(text)
73
+ else:
74
+ results = "Too many tokens: cannot translate"
75
+ return results
76
+
77
+
78
+
79
+ def getASLGloss(testQs):
80
+ """Get ASL gloss from OpenAI using our prompt engineering"""
81
+
82
+ completion = openai.ChatCompletion.create(
83
+ model = 'gpt-4',
84
+ messages = [
85
+ {"role": "system", "content": instruct},
86
+ {"role": "system", "content": rules},
87
+ {"role": "system", "content": examples},
88
+ {"role": "system", "content": exampleDialogues},
89
+ {"role": "user", "content": testQs},
90
+ ],
91
+
92
+ temperature = 0
93
+ )
94
+ results = completion['choices'][0]['message']['content']
95
+ return results
96
+
97
+
98
+
99
+
100
+ def main():
101
+
102
+ title = "English to ASL Gloss"
103
+ description = """Translate English text to ASL Gloss"""
104
+
105
+ interface = gr.Interface(
106
+ fn=getGlossFromText,
107
+ inputs="textbox",
108
+ outputs="text",
109
+ title = title,
110
+ description = description,
111
+ examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
112
+ ["If I don't travel often, I am sad."]])
113
+ interface.launch()
114
+
115
+
116
+
117
+ if __name__ == "__main__":
118
+ main()
119
+
120
+
121
+ # def getAnswer(query, texts = texts, embeddings = embeddings):
122
+ # docsearch = FAISS.from_texts(texts, embeddings)
123
+ # docs = docsearch.similarity_search(query)
124
+ # chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
125
+ # response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
126
+ # #interum_q = list(response.keys())
127
+ # interum_a = list(response.values())
128
+ # q = query
129
+ # a = interum_a[0]
130
+ # return a
131
+
132
+ # # query = "describe the fisher database"
133
+ # # docs = docsearch.similarity_search(query)
134
+ # # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
135
+ # # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
136
+ # title = "Query the S Drive!"
137
+ # description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
138
+
139
+ # interface = gr.Interface(
140
+ # fn=getAnswer,
141
+ # inputs="textbox",
142
+ # outputs="text",
143
+ # title = title,
144
+ # description = description,
145
+ # examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
146
+ # ["How many audio files do we have in the CallHome database?"]])
147
+ # interface.launch()
148
+
149
+
150
+
151
+ # if __name__ == "__main__":
152
+ # main()
153
+
154
+ # def main():
155
+ # results = setMode()
156
+ # print (results)
157
+ # main()
158
+
159
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ os
3
+ tiktoken
4
+ pathlib
5
+ gradio==3.26.0