Spaces:

bertugmirasyedi
/

aristotle-api

Sleeping

App Files Files Community

aristotle-api / app.py

bertugmirasyedi

Changed subparts to functions

d9187f0 over 2 years ago

raw

history blame

12.6 kB

	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware

	# Define the FastAPI app
	app = FastAPI(docs_url="/")

	# Add the CORS middleware to the app
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)


	@app.get("/search={query}&similarity={similarity}")
	def search(query, similarity="false"):
	import time
	import requests

	start_time = time.time()

	# Initialize the lists to store the results
	titles = []
	authors = []
	publishers = []
	descriptions = []
	images = []

	def gbooks_search(query, n_results=30):
	"""
	Access the Google Books API and return the results.
	"""
	# Set the API endpoint and query parameters
	url = "https://www.googleapis.com/books/v1/volumes"
	params = {"q": str(query), "printType": "books", "maxResults": n_results}

	# Send a GET request to the API with the specified parameters
	response = requests.get(url, params=params)

	# Parse the response JSON and append the results
	data = response.json()

	for item in data["items"]:
	volume_info = item["volumeInfo"]
	try:
	titles.append(f"{volume_info['title']}: {volume_info['subtitle']}")
	except KeyError:
	titles.append(volume_info["title"])

	try:
	descriptions.append(volume_info["description"])
	except KeyError:
	descriptions.append("Null")

	try:
	publishers.append(volume_info["publisher"])
	except KeyError:
	publishers.append("Null")

	try:
	authors.append(volume_info["authors"][0])
	except KeyError:
	authors.append("Null")

	try:
	images.append(volume_info["imageLinks"]["thumbnail"])
	except KeyError:
	images.append(
	"https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
	)

	return titles, authors, publishers, descriptions, images

	# Run the gbooks_search function
	(
	titles_placeholder,
	authors_placeholder,
	publishers_placeholder,
	descriptions_placeholder,
	images_placeholder,
	) = gbooks_search(query)

	# Append the results to the lists
	titles.extend(titles_placeholder)
	authors.extend(authors_placeholder)
	publishers.extend(publishers_placeholder)
	descriptions.extend(descriptions_placeholder)
	images.extend(images_placeholder)

	# Get the time since the start
	first_checkpoint = time.time()
	first_checkpoint_time = int(first_checkpoint - start_time)

	def openalex_search(query, n_results=10):
	"""
	Run a search on OpenAlex and return the results.
	"""
	import pyalex
	from pyalex import Works

	# Add email to the config
	pyalex.config.email = "[email protected]"

	# Define a pager object with the same query
	pager = Works().search(str(query)).paginate(per_page=n_results, n_max=n_results)

	# Generate a list of the results
	openalex_results = list(pager)

	# Get the titles, descriptions, and publishers and append them to the lists
	for result in openalex_results[0]:
	try:
	titles.append(result["title"])
	except KeyError:
	titles.append("Null")

	try:
	descriptions.append(result["abstract"])
	except KeyError:
	descriptions.append("Null")

	try:
	publishers.append(result["host_venue"]["publisher"])
	except KeyError:
	publishers.append("Null")

	try:
	authors.append(result["authorships"][0]["author"]["display_name"])
	except KeyError:
	authors.append("Null")

	images.append(
	"https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
	)

	return titles, authors, publishers, descriptions, images

	# Run the openalex_search function
	(
	titles_placeholder,
	authors_placeholder,
	publishers_placeholder,
	descriptions_placeholder,
	images_placeholder,
	) = openalex_search(query)

	# Append the results to the lists
	titles.extend(titles_placeholder)
	authors.extend(authors_placeholder)
	publishers.extend(publishers_placeholder)
	descriptions.extend(descriptions_placeholder)
	images.extend(images_placeholder)

	# Calculate the elapsed time between the first and second checkpoints
	second_checkpoint = time.time()
	second_checkpoint_time = int(second_checkpoint - first_checkpoint)

	def openai_search(query, n_results=10):
	"""
	Create a query to the OpenAI ChatGPT API and return the results.
	"""
	import openai

	# Set the OpenAI API key
	openai.api_key = "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"

	# Create ChatGPT query
	chatgpt_response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{
	"role": "system",
	"content": "You are a librarian. You are helping a patron find a book.",
	},
	{
	"role": "user",
	"content": f"Recommend me {n_results} books about {query}. Your response should be like: 'title: <title>, author: <author>, publisher: <publisher>, summary: <summary>'",
	},
	],
	)

	# Split the response into a list of results
	chatgpt_results = chatgpt_response["choices"][0]["message"]["content"].split(
	"\n"
	)[2::2]

	# Define a function to parse the results
	def parse_result(
	result, ordered_keys=["Title", "Author", "Publisher", "Summary"]
	):
	# Create a dict to store the key-value pairs
	parsed_result = {}

	for key in ordered_keys:
	# Split the result string by the key and append the value to the list
	if key != ordered_keys[-1]:
	parsed_result[key] = result.split(f"{key}: ")[1].split(",")[0]
	else:
	parsed_result[key] = result.split(f"{key}: ")[1]

	return parsed_result

	ordered_keys = ["Title", "Author", "Publisher", "Summary"]

	for result in chatgpt_results:
	try:
	# Parse the result
	parsed_result = parse_result(result, ordered_keys=ordered_keys)

	# Append the parsed result to the lists
	titles.append(parsed_result["Title"])
	authors.append(parsed_result["Author"])
	publishers.append(parsed_result["Publisher"])
	descriptions.append(parsed_result["Summary"])
	images.append(
	"https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
	)

	# In case the OpenAI API hits the limit
	except IndexError:
	break

	return titles, authors, publishers, descriptions, images

	# Run the openai_search function
	(
	titles_placeholder,
	authors_placeholder,
	publishers_placeholder,
	descriptions_placeholder,
	images_placeholder,
	) = openai_search(query)

	# Append the results to the lists
	titles.extend(titles_placeholder)
	authors.extend(authors_placeholder)
	publishers.extend(publishers_placeholder)
	descriptions.extend(descriptions_placeholder)
	images.extend(images_placeholder)

	# Calculate the elapsed time between the second and third checkpoints
	third_checkpoint = time.time()
	third_checkpoint_time = int(third_checkpoint - second_checkpoint)

	def predict(titles, descriptions, publishers, similarity=similarity):
	"""
	Create a summarizer and classifier pipeline and return the results.
	"""
	from transformers import (
	AutoTokenizer,
	AutoModelForSeq2SeqLM,
	AutoModelForSequenceClassification,
	pipeline,
	)
	from sentence_transformers import SentenceTransformer

	# Combine title, description, and publisher into a single string
	combined_data = [
	f"The book's title is {title}. It is published by {publisher}. This book is about {description}"
	for title, description, publisher in zip(titles, descriptions, publishers)
	]

	# Define the summarizer model and tokenizer
	sum_tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")

	sum_model = AutoModelForSeq2SeqLM.from_pretrained(
	"pszemraj/led-base-book-summary"
	)
	# sum_model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")

	summarizer_pipeline = pipeline(
	"summarization",
	model=sum_model,
	tokenizer=sum_tokenizer,
	batch_size=64,
	)

	# Define the zero-shot classifier
	zs_tokenizer = AutoTokenizer.from_pretrained(
	"sileod/deberta-v3-base-tasksource-nli"
	)

	zs_model = AutoModelForSequenceClassification.from_pretrained(
	"sileod/deberta-v3-base-tasksource-nli"
	)
	zs_classifier = pipeline(
	"zero-shot-classification",
	model=zs_model,
	tokenizer=zs_tokenizer,
	batch_size=64,
	hypothesis_template="This book is {}.",
	multi_label=True,
	)

	# Summarize the descriptions
	summaries = [
	summarizer_pipeline(description[0:1024])
	if (description != None)
	else [{"summary_text": "Null"}]
	for description in descriptions
	]

	# Predict the level of the book
	candidate_labels = [
	"Introductory",
	"Advanced",
	"Academic",
	"Not Academic",
	"Manual",
	]

	# Get the predicted labels
	classes = [zs_classifier(doc, candidate_labels) for doc in combined_data]

	# Calculate the similarity between the books
	if similarity != "false":
	from sentence_transformers import util

	sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2")
	book_embeddings = sentence_transformer.encode(
	combined_data, convert_to_tensor=True
	)

	similar_books = []
	for i in range(len(titles)):
	current_embedding = book_embeddings[i]

	similarity_sorted = util.semantic_search(
	current_embedding, book_embeddings, top_k=20
	)

	similar_books.append(
	{
	"sorted_by_similarity": similarity_sorted[0][1:],
	}
	)
	else:
	similar_books = [{"sorted_by_similarity": []} for i in range(len(titles))]

	return summaries, classes, similar_books

	# Run the predict function
	summaries, classes, similar_books = predict(
	titles, descriptions, publishers, similarity=similarity
	)

	# Calculate the elapsed time between the third and fourth checkpoints
	fourth_checkpoint = time.time()
	fourth_checkpoint_time = int(fourth_checkpoint - third_checkpoint)

	# Calculate the elapsed time
	end_time = time.time()
	runtime = f"{end_time - start_time:.2f} seconds"

	# Create a list of dictionaries to store the results
	results = [
	{
	"id": i,
	"title": titles[i],
	"author": authors[i],
	"publisher": publishers[i],
	"image_link": images[i],
	"labels": classes[i]["labels"][0:2],
	"label_confidences": classes[i]["scores"][0:2],
	"summary": summaries[i][0]["summary_text"],
	"similar_books": similar_books[i]["sorted_by_similarity"],
	"checkpoints": [
	first_checkpoint_time,
	second_checkpoint_time,
	third_checkpoint_time,
	fourth_checkpoint_time,
	],
	"runtime": runtime,
	}
	for i in range(len(titles))
	]

	return results