File size: 2,667 Bytes
87ce80f
d8ff437
77d3dbe
87ce80f
e719a30
fab8ffe
e719a30
 
9bd7332
fab8ffe
cc9d2fe
fab8ffe
 
e719a30
fab8ffe
 
cc9d2fe
e719a30
fab8ffe
 
 
 
 
 
cc9d2fe
3001d0c
 
e719a30
cc9d2fe
 
 
 
 
 
 
3001d0c
cc9d2fe
 
3001d0c
e719a30
cc9d2fe
 
 
 
e719a30
fab8ffe
cc9d2fe
 
3001d0c
e719a30
cc9d2fe
 
 
4dff0d4
9aad660
 
cc9d2fe
 
 
9aad660
cc9d2fe
9aad660
cc9d2fe
 
 
e719a30
 
cc9d2fe
 
 
 
 
 
 
 
 
 
 
e719a30
cc9d2fe
 
 
 
 
 
 
 
e719a30
 
7c54255
cc9d2fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
from gradio_client import Client, handle_file
from huggingface_hub import InferenceClient

# Moondream2 için Client kullanıyoruz
moondream_client = Client("vikhyatk/moondream2")

# LLaMA için InferenceClient kullanıyoruz
llama_client = InferenceClient("Qwen/QwQ-32B-Preview")

# Sohbet geçmişi
history = []

# Resim açıklama fonksiyonu
def describe_image(image, user_message):
    global history

    # Resmi Moondream2 API'sine gönderiyoruz
    result = moondream_client.predict(
        img=handle_file(image),
        prompt="Describe this image.",
        api_name="/answer_question"
    )
    
    description = result  # Moondream2'den açıklama alıyoruz
    history.append({"role": "user", "content": user_message})
    history.append({"role": "assistant", "content": description})

    return description

# Text ve history ile sohbet fonksiyonu
def chat_with_text(user_message, max_new_tokens=250):
    global history

    # Kullanıcı mesajını history'ye ekliyoruz
    history.append({"role": "user", "content": user_message})

    # Tüm geçmişi LLaMA'ya gönderiyoruz
    texts = [{"role": msg["role"], "content": msg["content"]} for msg in history]
    llama_result = llama_client.chat_completion(
        messages=texts,
        max_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.95
    )

    # Asistan cevabını alıyoruz ve history'ye ekliyoruz
    assistant_reply = llama_result["choices"][0]["message"]["content"]
    history.append({"role": "assistant", "content": assistant_reply})

    return assistant_reply

# Resim ve/veya metin tabanlı sohbet fonksiyonu
def bot_streaming(message, max_new_tokens=250):
    global history

    user_message = message.get("text", "")
    image = message.get("image", None)

    if image:  # Resim varsa
        response = describe_image(image, user_message)
    else:  # Sadece metin mesajı varsa
        response = chat_with_text(user_message, max_new_tokens)

    return response

# Gradio arayüzü
demo = gr.ChatInterface(
    fn=bot_streaming,
    title="Multimodal Chat Assistant",
    additional_inputs=[
        gr.Slider(
            minimum=10,
            maximum=500,
            value=250,
            step=10,
            label="Maximum number of new tokens to generate",
        )
    ],
    description=(
        "This demo combines text and image understanding using Moondream2 for visual "
        "tasks and LLaMA for conversational AI. Upload an image, ask questions, "
        "or just chat!"
    ),
    stop_btn="Stop Generation",
    fill_height=True,
    multimodal=True,
)

if __name__ == "__main__":
    demo.launch(debug=True)