Spaces:
Runtime error
Runtime error
File size: 7,081 Bytes
8eb9635 fc90ecf dd1a64f 8eb9635 04433f3 8eb9635 a336ee3 8eb9635 a336ee3 8eb9635 04433f3 8eb9635 a336ee3 8eb9635 dd1a64f 681eeb9 69cbf5e a336ee3 69cbf5e a7ddb5b 406bc2a a7ddb5b 677a83f 406bc2a 69cbf5e 677a83f a7ddb5b 677a83f a7ddb5b 677a83f 69cbf5e 839440d 8eb9635 5863902 adec782 8eb9635 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
from pathlib import Path
import gradio as gr
import openai
import os
import tiktoken
# Set secret key
HF_TOKEN = os.getenv("NextStar")
#Set prompt engineering paths (so globally available)
inStructionPath = "intro_instructions_combine.txt"
inRulesPath = "formatting_rules_expanded.txt"
inExamplesPath = "examples_longer1.txt"
inDialoguesPath = "examples_dialogues.txt"
#Set to read in prompting files
def openReadFiles(inpath):
infile = Path (inpath)
with open(infile) as f:
data = f.read()
return data
# Set up prompting data (so globally available)
instruct = openReadFiles(inStructionPath)
rules = openReadFiles(inRulesPath)
examples = openReadFiles(inExamplesPath)
exampleDialogues = openReadFiles(inDialoguesPath)
def formatQuery(engText):
"""Add prompt instructions to English text for GPT4"""
instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
query = instruct+engText
return query
def num_tokens_from_string(string: str, encoding_name: str) -> int:
"""Returns the number of tokens in a text string."""
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
return num_tokens
def checkTokens(tokens):
"""Checks tokens to ensrue we can translate to ASL gloss"""
goAhead = None
if tokens >= 553:
print(f"Cannot translate to ASL gloss at this time: too many tokens ({tokens})")
goAhead = False
else:
goAhead = True
print(f"Number of tokens is acceptable: can continue translating")
return goAhead
def getGlossFromText(query):
"""Sets all for getting ASL gloss"""
text = formatQuery(query)
tokens = num_tokens_from_string(text, "cl100k_base")
goAhead = checkTokens(tokens)
if goAhead == True:
results = getASLGloss(text)
else:
results = "Too many tokens: cannot translate"
return results
def getASLGloss(testQs):
"""Get ASL gloss from OpenAI using our prompt engineering"""
openai.api_key = HF_TOKEN
completion = openai.ChatCompletion.create(
model = 'gpt-4-0125-preview',
messages = [
{"role": "system", "content": instruct},
{"role": "system", "content": rules},
{"role": "system", "content": examples},
{"role": "system", "content": exampleDialogues},
{"role": "user", "content": testQs},
],
temperature = 0
)
results = completion['choices'][0]['message']['content']
return results
def main():
title = "English to ASL Gloss"
#description = """Translate English text to ASL Gloss"""
description = "This program uses GPT4 alongside prompt engineering to \
translate English text to ASL gloss.\n \
<b>Type in the English sentence you would like to translate into ASL Gloss.</b> \
\n \n This program was last updated on February 27, 2024, and uses GPT4-Turbo (0125 preview version) \
\n\n \
\n \n This version of EngToASLGloss contains superscript notation which adds \
grammatical context to assist in ASL generation. \
\n Below are the guidelines we are using to express grammatical concepts \
in ASL gloss.\
Anything within the angle brackets < > indicates this additional grammatical notation.\
If the angle brackets are directly next to a word, the notation inside \
the angle brackets is associate with just that word, e.g. WILL < A >. \
If the angle brackets are next to a whitespace after a word,\
the notation inside the angle bracket is associated with all of the words\
before it, up until a comma, another angle bracket, or a double space.\
\n \n This sentence is an example of this rule:\
\n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\
\n\r \
\n The superscript notation options that will appear in results are as follows:\
\n Ti marks time\
\n T marks topic\
\n A marks comment\
\n Y/N marks yes-no question\
\n WHQ marks wh-question\
\n RHQ marks rhetorical question\
\n < Cond > marks conditional sentences\
\n lower case marks directional verbs\
\n ++ marks emphesis ('very' or 'a lot of')\
\n \# marks lexical fingerspelling \
\n \- marks space between individual letters of fingerspelling\
\n \n <b>Note: This is a prototype and is still in development. \
Do not use it in a production deployment.</b> \
\n For additional details on how the program works, please see \
[the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)"
interface = gr.Interface(
fn=getGlossFromText,
inputs="textbox",
outputs="text",
title = title,
description = description)
#examples = [[("Prompt: Every year I buy my dad a gift \n", "Result: EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]])
# examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
# ["If I don't travel often, I am sad."]])
interface.launch()
if __name__ == "__main__":
main()
# def getAnswer(query, texts = texts, embeddings = embeddings):
# docsearch = FAISS.from_texts(texts, embeddings)
# docs = docsearch.similarity_search(query)
# chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
# response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# #interum_q = list(response.keys())
# interum_a = list(response.values())
# q = query
# a = interum_a[0]
# return a
# # query = "describe the fisher database"
# # docs = docsearch.similarity_search(query)
# # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
# # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# title = "Query the S Drive!"
# description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
# interface = gr.Interface(
# fn=getAnswer,
# inputs="textbox",
# outputs="text",
# title = title,
# description = description,
# examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
# ["How many audio files do we have in the CallHome database?"]])
# interface.launch()
# if __name__ == "__main__":
# main()
# def main():
# results = setMode()
# print (results)
# main()
|