Spaces:
Runtime error
Runtime error
File size: 2,635 Bytes
e2129f8 005bde7 e2129f8 005bde7 e2129f8 005bde7 e2129f8 005bde7 e2129f8 005bde7 e2129f8 005bde7 e2129f8 005bde7 e2129f8 2f0afae e2129f8 005bde7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re
# Load the model and tokenizer
model_name = "Qwen/Qwen2.5-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name, token="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")
model = AutoModelForCausalLM.from_pretrained(model_name, token="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")
# System prompt
system_prompt = """You are BlackBerry, an advanced AI model with the "Little Thinking" technique. You use four "Berry" thinkers to analyze queries and provide accurate responses."""
def generate_response(prompt, max_length=100):
full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nBlackBerry:"
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("BlackBerry:")[-1].strip()
def little_thinking(prompt):
thoughts = []
for i in range(4):
thought = f"*Berry-{i+1}: {generate_response(f'As Berry-{i+1}, briefly analyze: {prompt}', max_length=50)}*\n\n"
thoughts.append(thought)
return "".join(thoughts)
def reviewer_thinking(prompt):
return f"*Reviewer: {generate_response(f'As a Reviewer, briefly check: {prompt}', max_length=50)}*\n\n"
def second_reviewer_thinking(prompt):
return f"*Second Reviewer: {generate_response(f'As a Second Reviewer, briefly verify: {prompt}', max_length=50)}*\n\n"
def blackberry_response(prompt):
response = "BlackBerry: Analyzing with Little Thinking technique.\n\n"
# Little Thinking process
response += little_thinking(prompt)
# Initial answer
response += f"BlackBerry: Initial answer:\n{generate_response(prompt, max_length=100)}\n\n"
# Reviewer
response += reviewer_thinking(response)
# Second Reviewer for hard questions
if re.search(r'\b(physics|science|coordinate|hard|difficult)\b', prompt, re.IGNORECASE):
response += second_reviewer_thinking(response)
# Final answer
response += f"BlackBerry: Final answer:\n{generate_response(prompt, max_length=150)}"
return response
# Create the Gradio interface
iface = gr.Interface(
fn=blackberry_response,
inputs=gr.Textbox(lines=5, label="Enter your query"),
outputs=gr.Textbox(label="BlackBerry's Response"),
title="Blackberry-1 LLM",
description="Powered by Qwen/Qwen2.5-0.5B with 'Little Thinking' technique"
)
# Launch the app
iface.launch()
|