Spaces:

suayptalha
/

QwQ-32B-Preview-Vision

Running

File size: 3,104 Bytes

87ce80f
d8ff437
77d3dbe
87ce80f
8f1cf32
7b6d332
fab8ffe
8f1cf32
 
 
 
 
 
 
 
 
 
 
 
 
 
fab8ffe
 
 
 
8f1cf32
 
 
cc9d2fe
8f1cf32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e719a30
8f1cf32
b5de101
8f1cf32
 
b5de101
7b6d332
8f1cf32
 
 
 
 
 
b5de101
8f1cf32
 
 
 
 
 
 
 
b5de101
e719a30
7c54255
8f1cf32

import gradio as gr
from gradio_client import Client, handle_file
from huggingface_hub import InferenceClient

# Moondream2 için Client kullanıyoruz
moondream_client = Client("vikhyatk/moondream2")

# Qwen/QwQ-32B-Preview için InferenceClient kullanıyoruz
llama_client = InferenceClient("Qwen/QwQ-32B-Preview")

# Sohbet geçmişi
history = []

# Resim açıklama fonksiyonu
def describe_image(image, user_message, history):
    # Resim var mı diye kontrol et
    if image is None:
        return "No image provided", history  # Hata mesajı döndür
    # Resmi Moondream2 API'sine gönderiyoruz
    result = moondream_client.predict(
        img=handle_file(image),
        prompt="Describe this image.",
        api_name="/answer_question"
    )
    
    description = result  # Moondream2'den açıklama alıyoruz
    history.append({"role": "user", "content": user_message})  # string olarak
    history.append({"role": "assistant", "content": description})  # string olarak

    return description, history

# Text ve history ile sohbet fonksiyonu
def chat_with_text(user_message, history, max_new_tokens=250):
    # Kullanıcı mesajını history'ye ekliyoruz
    history.append({"role": "user", "content": user_message})  # string olarak

    # Tüm geçmişi Qwen/QwQ-32B-Preview'e gönderiyoruz
    texts = [{"role": msg["role"], "content": msg["content"]} for msg in history]
    llama_result = llama_client.chat_completion(
        messages=texts,
        max_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.95
    )

    # Asistan cevabını alıyoruz ve history'ye ekliyoruz
    assistant_reply = llama_result["choices"][0]["message"]["content"]
    history.append({"role": "assistant", "content": assistant_reply})  # string olarak

    return assistant_reply, history

# Resim ve/veya metin tabanlı sohbet fonksiyonu
def bot_streaming(message, history=None, max_new_tokens=250):
    if history is None:  # Eğer `history` verilmemişse boş bir liste kullanıyoruz
        history = []

    user_message = message.get("text", "")
    image = message.get("image", None)

    if image:  # Resim varsa
        response, history = describe_image(image, user_message, history)
    else:  # Sadece metin mesajı varsa
        response, history = chat_with_text(user_message, history, max_new_tokens)

    # Yalnızca metin döndürülmeli, tarihçe değil
    return response, history

# Gradio arayüzü
demo = gr.ChatInterface(
    fn=bot_streaming,
    title="Multimodal Chat Assistant",
    additional_inputs=[
        gr.Slider(
            minimum=10,
            maximum=500,
            value=250,
            step=10,
            label="Maximum number of new tokens to generate",
        )
    ],
    description=(
        "This demo combines text and image understanding using Moondream2 for visual "
        "tasks and Qwen/QwQ-32B-Preview for conversational AI. Upload an image, ask questions, "
        "or just chat!"
    ),
    stop_btn="Stop Generation",
    fill_height=True,
    multimodal=True,
)

if __name__ == "__main__":
    demo.launch(debug=True)