from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer from transformers import MBartForConditionalGeneration, MBart50Tokenizer from transformers import pipeline import gradio as gr import requests import io from PIL import Image import os import torch # For LLaMA text generation # Load the translation model and tokenizer model_name = "facebook/mbart-large-50-many-to-one-mmt" tokenizer = MBart50Tokenizer.from_pretrained(model_name) model = MBartForConditionalGeneration.from_pretrained(model_name) # Load the LLaMA model for text generation model_id = "meta-llama/Llama-3.2-1B" # Use LLaMA model for text generation pipe = pipeline( "text-generation", model=model_id, torch_dtype=torch.bfloat16, # Using bfloat16 for reduced memory footprint device_map="auto" # Automatically assign devices for multi-GPU or CPU fallback ) # Use the Hugging Face API key from environment variables for text-to-image model API_URL = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image" headers = {"Authorization": f"Bearer {os.getenv('hf_tokens')}"} # Define the translation, text generation, and image generation function def translate_and_generate_image(tamil_text): # Step 1: Translate Tamil text to English using mbart-large-50 tokenizer.src_lang = "ta_IN" inputs = tokenizer(tamil_text, return_tensors="pt") translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"]) translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] # Step 2: Generate descriptive English text using LLaMA model generated_text = pipe(translated_text, max_length=100, num_return_sequences=1)[0]['generated_text'] # Step 3: Use the generated English text to create an image def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.content # Generate image using the generated text image_bytes = query({"inputs": generated_text}) image = Image.open(io.BytesIO(image_bytes)) return translated_text, generated_text, image # Gradio interface setup iface = gr.Interface( fn=translate_and_generate_image, inputs=gr.Textbox(lines=2, placeholder="Enter Tamil text here..."), outputs=[gr.Textbox(label="Translated English Text"), gr.Textbox(label="Generated Descriptive Text"), gr.Image(label="Generated Image")], title="Tamil to English Translation, Text Generation with LLaMA, and Image Creation", description="Translate Tamil text to English using Facebook's mbart-large-50 model, generate descriptive text using Meta's LLaMA model, and create an image using the generated text.", ) # Launch Gradio app with a shareable link iface.launch(share=True)