import gradio as gr import spaces from transformers import pipeline import torch # Global variable to store the pipeline pipe = None @spaces.GPU def initialize_model(): global pipe if pipe is None: pipe = pipeline( "text-generation", model="apexion-ai/Orion-V1-4B", torch_dtype=torch.float16, device_map="auto" ) return pipe @spaces.GPU def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9): """Generate response using the Orion model""" # Initialize model inside the GPU-decorated function model_pipe = initialize_model() # Format the conversation history messages = [] # Add conversation history for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Generate response try: response = model_pipe( messages, max_length=max_length, temperature=temperature, top_p=top_p, do_sample=True, pad_token_id=model_pipe.tokenizer.eos_token_id ) # Extract the generated text generated_text = response[0]['generated_text'] # Get the last assistant message if isinstance(generated_text, list): assistant_response = generated_text[-1]['content'] else: # Fallback parsing if needed assistant_response = str(generated_text).split("assistant")[-1].strip() return assistant_response except Exception as e: return f"Error generating response: {str(e)}" # Create the Gradio interface def create_interface(): with gr.Blocks(title="Orion-V1-4B Chat", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🚀 Orion-V1-4B Chat Chat with the Orion-V1-4B model by Apexion AI. This is a 4B parameter language model optimized for conversation. **Model:** `apexion-ai/Orion-V1-4B` """) chatbot = gr.Chatbot( height=400, placeholder="Start chatting with Orion-V1-4B...", label="Chat" ) msg = gr.Textbox( placeholder="Type your message here...", label="Message", lines=2 ) with gr.Row(): submit_btn = gr.Button("Send", variant="primary") clear_btn = gr.Button("Clear Chat", variant="secondary") with gr.Accordion("Advanced Settings", open=False): max_length = gr.Slider( minimum=50, maximum=2048, value=512, step=50, label="Max Length" ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.1, label="Top P" ) # Event handlers def user_message(message, history): return "", history + [[message, None]] def bot_response(history, max_len, temp, top_p): if history: user_message = history[-1][0] bot_message = generate_response( user_message, history[:-1], max_len, temp, top_p ) history[-1][1] = bot_message return history # Wire up the events msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then( bot_response, [chatbot, max_length, temperature, top_p], chatbot ) submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then( bot_response, [chatbot, max_length, temperature, top_p], chatbot ) clear_btn.click(lambda: None, None, chatbot, queue=False) gr.Markdown(""" --- ### About Orion-V1-4B Orion-V1-4B is a 4 billion parameter language model developed by Apexion AI. It's designed for efficient text generation and conversation. **Features:** - 4B parameters for efficient inference - Optimized for conversational AI - Supports various text generation tasks This Space uses ZeroGPU for efficient GPU allocation. """) return demo # Launch the app if __name__ == "__main__": demo = create_interface() demo.launch()