import gradio as gr from PIL import Image import requests import os from together import Together import base64 from threading import Thread import time import io # Initialize Together client client = None def initialize_client(api_key=None): global client if api_key: os.environ["TOGETHER_API_KEY"] = api_key if "TOGETHER_API_KEY" in os.environ: client = Together() else: raise ValueError("Please provide a Together API Key") def encode_image(image_path, max_size=(800, 800), quality=85): with Image.open(image_path) as img: img.thumbnail(max_size) if img.mode in ('RGBA', 'LA'): background = Image.new(img.mode[:-1], img.size, (255, 255, 255)) background.paste(img, mask=img.split()[-1]) img = background buffered = io.BytesIO() img.save(buffered, format="JPEG", quality=quality) return base64.b64encode(buffered.getvalue()).decode('utf-8') def bot_streaming(message, history, together_api_key, max_new_tokens=250, max_history=5): if client is None: initialize_client(together_api_key) txt = message["text"] messages = [] images = [] for i, msg in enumerate(history[-max_history:]): if isinstance(msg[0], tuple): messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]}) messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]}) elif isinstance(history[i-1], tuple) and isinstance(msg[0], str): pass elif isinstance(history[i-1][0], str) and isinstance(msg[0], str): messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]}) messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]}) if len(message["files"]) == 1: if isinstance(message["files"][0], str): # examples image_path = message["files"][0] else: # regular input image_path = message["files"][0]["path"] messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]}) else: messages.append({"role": "user", "content": [{"type": "text", "text": txt}]}) try: stream = client.chat.completions.create( model="meta-llama/Llama-Vision-Free", messages=messages, max_tokens=max_new_tokens, stream=True, ) buffer = "" for chunk in stream: if chunk.choices[0].delta.content is not None: buffer += chunk.choices[0].delta.content time.sleep(0.01) yield buffer except Exception as e: if "Request Entity Too Large" in str(e): yield "The image is too large. Please try with a smaller image or compress the existing one." else: yield f"An error occurred: {str(e)}" with gr.Blocks() as demo: gr.Markdown("# Meta Llama-3.2-11B-Vision-Instruct (FREE)") gr.Markdown("Try the new Llama 3.2 11B Vision API by Meta for free through Together AI. Upload an image, and start chatting about it. Just paste in your [Together AI API key](https://api.together.xyz/settings/api-keys) and get started!") together_api_key = gr.Textbox( label="Together API Key", placeholder="Enter your TOGETHER_API_KEY here", type="password" ) chatbot = gr.ChatInterface( fn=bot_streaming, textbox=gr.MultimodalTextbox(), additional_inputs=[ gr.Slider( minimum=10, maximum=500, value=250, step=10, label="Maximum number of new tokens to generate", ) ], cache_examples=False, stop_btn="Stop Generation", fill_height=True, multimodal=True ) together_api_key.change(lambda x: x, inputs=[together_api_key], outputs=[chatbot.additional_inputs[0]]) if __name__ == "__main__": demo.launch(debug=True)