File size: 5,252 Bytes
8eb9635
 
 
 
 
 
fc90ecf
8eb9635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from pathlib import Path
import gradio as gr
import openai
import os
import tiktoken 


# Set  openAI key
HF_TOKEN = os.getenv("NextStar")
openai.api_key = HF_TOKEN

#Set prompt engineering paths (so globally available)
inStructionPath = "intro_instructions_combine.txt"
inRulesPath = "formatting_rules_expanded.txt"
inExamplesPath = "examples_longer1.txt"
inDialoguesPath = "examples_dialogues.txt"  

#Set to read in prompting files
def openReadFiles(inpath):
    infile = Path (inpath)
    with open(infile) as f:
        data = f.read()
    return data


# Set up prompting data (so globally available)
instruct = openReadFiles(inStructionPath)
rules = openReadFiles(inRulesPath)
examples = openReadFiles(inExamplesPath)
exampleDialogues = openReadFiles(inDialoguesPath)

### In case we eventually want to upload files
# def uploadText():
#     '''In case you want to upload a .txt file to translate to ASL gloss'''
#     readFile = input("Enter the file path of the .txt you'd like to translate to ASL gloss: ")
#     inFile = open(readFile, "r")
#     data = inFile.read()
#     inFile.close()
#     print(f"Your file {readFile} has been uploaded")
#     return data

def formatQuery(engText):
    """Add prompt instructions to English text for GPT4"""
    instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
    query = instruct+engText
    return query


def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens


def checkTokens(tokens):
    """Checks tokens to ensrue we can translate to ASL gloss"""
    goAhead = None
    if tokens >= 553:
        print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}")
        goAhead = False
    else:
        goAhead = True
        print(f"Has less than 553 tokens - can continue translating")
    return goAhead
    
def getGlossFromText(query):
    """Sets all for getting ASL gloss"""
    text = formatQuery(query)
    tokens = num_tokens_from_string(text, "cl100k_base")
    goAhead = checkTokens(tokens)
    if goAhead == True:
        results = getASLGloss(text)
    else:
        results = "Too many tokens: cannot translate"
    return results



def getASLGloss(testQs):
    """Get ASL gloss from OpenAI using our prompt engineering"""
    
    completion = openai.ChatCompletion.create(
      model = 'gpt-4',
      messages = [
          {"role": "system", "content": instruct},
          {"role": "system", "content": rules},
          {"role": "system", "content": examples},
          {"role": "system", "content": exampleDialogues},
          {"role": "user", "content": testQs},
      ],
    
      temperature = 0  
    )
    results = completion['choices'][0]['message']['content']
    return results
     

    

def main():
    
    title = "English to ASL Gloss"
    description = """Translate English text to ASL Gloss"""

    interface = gr.Interface(
        fn=getGlossFromText, 
        inputs="textbox", 
        outputs="text",
        title = title,
        description = description,
        examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"], 
                ["If I don't travel often, I am sad."]])
    interface.launch()
    

    
if __name__ == "__main__":
    main()

    
#     def getAnswer(query, texts = texts, embeddings = embeddings):
#         docsearch = FAISS.from_texts(texts, embeddings)
#         docs = docsearch.similarity_search(query)
#         chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
#         response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
#             #interum_q = list(response.keys())
#         interum_a = list(response.values())
#         q = query
#         a = interum_a[0]
#         return a

#     # query = "describe the fisher database"
#     # docs = docsearch.similarity_search(query)
#     # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
#     # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
#     title = "Query the S Drive!"
#     description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""

#     interface = gr.Interface(
#         fn=getAnswer, 
#         inputs="textbox", 
#         outputs="text",
#         title = title,
#         description = description,
#         examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"], 
#                 ["How many audio files do we have in the CallHome database?"]])
#     interface.launch()
    

    
# if __name__ == "__main__":
#     main()

# def main():
#     results = setMode()
#     print (results)
# main()