Spaces:

weanalyze
/

analyze_url

Running

App Files Files Community

analyze_url / utils /summarizer.py

jiandong

Upload with huggingface_hub

4ed95aa over 2 years ago

raw

history blame contribute delete

2.31 kB

	import ast
	import openai
	from transformers import GPT2Tokenizer

	# Initialize tokenizer
	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

	# Prompt engineering
	def get_prompt(text):
	# prompt_prefix = """Generate exactly 3 different and thought provoking discussion questions about given article below, and return the answers of these questions with the evidence.

	# Desired output format: [{"Q":<question>,"A":<answer>},{"Q":<question>,"A":<answer>},{"Q":<question>,"A":<answer>}].
	# """
	prompt_prefix = """Generate exactly 3 different and thought provoking discussion questions about given article below, and return the answers of these questions with the evidence.

	Desired output should be a markdown format like this:

	## Q1: <question>

	<answer>

	## Q2: <question>

	<answer>

	## Q3: <question>

	<answer>

	"""
	prompt_postfix ="""
	Given article content: \"""{}.\"""
	"""
	prompt = prompt_prefix + prompt_postfix.format(text)
	return prompt

	def limit_tokens(text, n=3000):
	# Get the first n tokens from the input text
	input_ids = tokenizer.encode(text, return_tensors="pt")
	first_n_tokens = input_ids[:, :n]
	# Convert the first n tokens back to text format
	processed_text = tokenizer.decode(first_n_tokens[0], skip_special_tokens=True)
	return processed_text


	# Chat completion
	def get_openai_chatcompletion(text):
	"""Get OpenAI Chat Completion result.
	"""
	messages = []
	processed_text = limit_tokens(text)
	augmented_prompt = get_prompt(processed_text)
	messages.append({"role":"user","content": augmented_prompt})

	try:
	result = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=messages,
	temperature=0.7
	)
	except:
	raise
	return result


	def get_analyze(result):
	try:
	# analyze = ast.literal_eval(result["choices"][0]['text'])
	# analyze = eval(result["choices"][0]['text'])
	# analyze = result["choices"][0]['text']
	analyze = result["choices"][0]["message"]["content"]
	except:
	raise
	return analyze


	def get_analyze_result(text):
	result = get_openai_chatcompletion(text)
	analyze = get_analyze(result)
	return analyze