gokilashree's picture
Update app.py
1e434ee verified
raw
history blame
3.38 kB
from transformers import MBartForConditionalGeneration, MBart50Tokenizer, AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr
import requests
import io
from PIL import Image
import os
# Load the translation model and tokenizer
model_name = "facebook/mbart-large-50-many-to-one-mmt"
tokenizer = MBart50Tokenizer.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)
# Use the Hugging Face API key from environment variables for text-to-image model
hf_api_key = os.getenv("new_hf_token")
if hf_api_key is None:
raise ValueError("Hugging Face API key not found! Please set 'full_token' environment variable.")
else:
headers = {"Authorization": f"Bearer {hf_api_key}"}
# Define the text-to-image model URL (using a faster text-to-image model)
API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4"
# Load a smaller text generation model to reduce generation time
text_generation_model_name = "EleutherAI/gpt-neo-1.3B"
text_tokenizer = AutoTokenizer.from_pretrained(text_generation_model_name)
text_model = AutoModelForCausalLM.from_pretrained(text_generation_model_name)
# Create a pipeline for text generation using the selected model
text_generator = pipeline("text-generation", model=text_model, tokenizer=text_tokenizer)
# Function to generate an image using Hugging Face's text-to-image model
def generate_image_from_text(translated_text):
try:
# Enhanced prompt to focus on details and clarity
enhanced_prompt = f"A high-quality image of a person doing yoga with clear facial features and correct body proportions in a tranquil outdoor setting. " \
f"Include detailed mountains, flowing river, and vibrant greenery, captured in soft sunrise light. Ensure the face and body are realistic and proportional."
print(f"Generating image from translated text: {enhanced_prompt}")
# Sending the enhanced prompt to the text-to-image model
response = requests.post(API_URL, headers=headers, json={"inputs": enhanced_prompt})
if response.status_code == 200:
image_data = response.content
image = Image.open(io.BytesIO(image_data))
return image
else:
raise ValueError(f"Error in image generation: {response.text}")
except Exception as e:
print(f"Error: {e}")
return None
# Translation Function
def translate_text(input_text, src_lang="en_XX", tgt_lang="hi_IN"):
tokenizer.src_lang = src_lang
encoded_input = tokenizer(input_text, return_tensors="pt")
generated_tokens = model.generate(encoded_input["input_ids"], forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang])
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
# Gradio Interface for image generation
def translate_and_generate_image(input_text):
translated_text = translate_text(input_text)
image = generate_image_from_text(translated_text)
return image
# Create a simple Gradio Interface
iface = gr.Interface(fn=translate_and_generate_image,
inputs="text",
outputs="image",
title="Yoga Image Generator",
description="Enter a description to translate and generate a high-quality yoga image.")
iface.launch()