Samurai719214's picture
Update app.py
03542e5 verified
raw
history blame
2.15 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load your hosted model and tokenizer from Hugging Face.
model_name = "Samurai719214/gptneo-mythology-storyteller"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Use GPU if available.
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
def generate_full_story(excerpt: str) -> str:
"""
Given an incomplete story excerpt (without header details), this function calls the model
to generate the complete story that includes Parv, Key Event, Section and the story continuation.
"""
# Tokenize the user-provided excerpt.
encoded_input = tokenizer(excerpt, return_tensors = "pt")
# Move tensors to the appropriate device.
encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
# Generate tokens. Here, we set parameters to control length and creativity.
output = model.generate(
encoded_input["input_ids"],
attention_mask = encoded_input["attention_mask"],
max_new_tokens = 200, # Generate 200 new tokens on top of the input.
do_sample = True,
temperature = 0.8,
top_p = 0.95,
no_repeat_ngram_size = 2,
return_dict_in_generate = True
)
# Decode the generated sequence.
generated_text = tokenizer.decode(output.sequences[0], skip_special_tokens = True)
return generated_text
# Build the Gradio interface.
interface = gr.Interface(
fn = generate_full_story,
inputs = gr.Textbox(
lines = 5,
label = "Incomplete story excerpt",
placeholder = "Enter an excerpt from the Mahabharata here..."
),
outputs = gr.Textbox(label = "Chapter summary"),
title = "🏺 Mythology Storyteller",
description = (
"Enter a phrase from a chapter of your choice (if possible please enter the Parv, Key Event, & Section for an accurate answer). "
"The model will generate the summary of the respective chapter."
)
)
# Launch the Gradio app.
interface.launch(share = True)