EngTexToASLGloss

Runtime error

EngTexToASLGloss / eng_to_aslGloss_app.py

Rachel Rakov

Troubleshooting

1655f99 almost 2 years ago

5.25 kB

	from pathlib import Path
	import gradio as gr
	import openai
	import os
	import tiktoken


	# Set openAI key
	HF_TOKEN = os.getenv("NextStar")
	openai.api_key = HF_TOKEN

	#Set prompt engineering paths (so globally available)
	inStructionPath = "intro_instructions_combine.txt"
	inRulesPath = "formatting_rules_expanded.txt"
	inExamplesPath = "examples_longer1.txt"
	inDialoguesPath = "examples_dialogues.txt"

	#Set to read in prompting files
	def openReadFiles(inpath):
	infile = Path (inpath)
	with open(infile) as f:
	data = f.read()
	return data


	# Set up prompting data (so globally available)
	instruct = openReadFiles(inStructionPath)
	rules = openReadFiles(inRulesPath)
	examples = openReadFiles(inExamplesPath)
	exampleDialogues = openReadFiles(inDialoguesPath)

	### In case we eventually want to upload files
	# def uploadText():
	# '''In case you want to upload a .txt file to translate to ASL gloss'''
	# readFile = input("Enter the file path of the .txt you'd like to translate to ASL gloss: ")
	# inFile = open(readFile, "r")
	# data = inFile.read()
	# inFile.close()
	# print(f"Your file {readFile} has been uploaded")
	# return data

	def formatQuery(engText):
	"""Add prompt instructions to English text for GPT4"""
	instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
	query = instruct+engText
	return query


	def num_tokens_from_string(string: str, encoding_name: str) -> int:
	"""Returns the number of tokens in a text string."""
	encoding = tiktoken.get_encoding(encoding_name)
	num_tokens = len(encoding.encode(string))
	return num_tokens


	def checkTokens(tokens):
	"""Checks tokens to ensrue we can translate to ASL gloss"""
	goAhead = None
	if tokens >= 553:
	print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}")
	goAhead = False
	else:
	goAhead = True
	print(f"Has less than 553 tokens - can continue translating")
	return goAhead

	def getGlossFromText(query):
	"""Sets all for getting ASL gloss"""
	text = formatQuery(query)
	tokens = num_tokens_from_string(text, "cl100k_base")
	goAhead = checkTokens(tokens)
	if goAhead == True:
	results = getASLGloss(text)
	else:
	results = "Too many tokens: cannot translate"
	return results



	def getASLGloss(testQs):
	"""Get ASL gloss from OpenAI using our prompt engineering"""

	completion = openai.ChatCompletion.create(
	model = 'gpt-4',
	messages = [
	{"role": "system", "content": instruct},
	{"role": "system", "content": rules},
	{"role": "system", "content": examples},
	{"role": "system", "content": exampleDialogues},
	{"role": "user", "content": testQs},
	],

	temperature = 0
	)
	results = completion['choices'][0]['message']['content']
	return results




	def main():

	title = "English to ASL Gloss"
	description = """Translate English text to ASL Gloss"""

	interface = gr.Interface(
	fn=getGlossFromText,
	inputs="textbox",
	outputs="text",
	title = title,
	description = description,
	examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
	["If I don't travel often, I am sad."]])
	interface.launch()



	if __name__ == "__main__":
	main()


	# def getAnswer(query, texts = texts, embeddings = embeddings):
	# docsearch = FAISS.from_texts(texts, embeddings)
	# docs = docsearch.similarity_search(query)
	# chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
	# response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
	# #interum_q = list(response.keys())
	# interum_a = list(response.values())
	# q = query
	# a = interum_a[0]
	# return a

	# # query = "describe the fisher database"
	# # docs = docsearch.similarity_search(query)
	# # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
	# # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
	# title = "Query the S Drive!"
	# description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""

	# interface = gr.Interface(
	# fn=getAnswer,
	# inputs="textbox",
	# outputs="text",
	# title = title,
	# description = description,
	# examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
	# ["How many audio files do we have in the CallHome database?"]])
	# interface.launch()



	# if __name__ == "__main__":
	# main()

	# def main():
	# results = setMode()
	# print (results)
	# main()