Update app.py
Browse files
app.py
CHANGED
@@ -8,13 +8,13 @@ moondream_client = Client("vikhyatk/moondream2")
|
|
8 |
# LLaMA için InferenceClient kullanıyoruz
|
9 |
llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
|
10 |
|
11 |
-
# Sohbet
|
12 |
history = []
|
13 |
|
14 |
# Resim açıklama fonksiyonu
|
15 |
def describe_image(image, user_message):
|
16 |
global history
|
17 |
-
|
18 |
# Resmi Moondream2 API'sine gönderiyoruz
|
19 |
result = moondream_client.predict(
|
20 |
img=handle_file(image),
|
@@ -22,52 +22,72 @@ def describe_image(image, user_message):
|
|
22 |
api_name="/answer_question"
|
23 |
)
|
24 |
|
25 |
-
# Moondream2'den
|
26 |
-
|
|
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
llama_result = llama_client.chat_completion(
|
34 |
-
messages=
|
35 |
-
max_tokens=
|
36 |
-
temperature=0.7,
|
37 |
-
top_p=0.95
|
38 |
)
|
39 |
-
|
40 |
-
# Sonucu döndürüyoruz
|
41 |
-
return description + "\n\nAssistant: " + llama_result['choices'][0]['message']['content']
|
42 |
|
43 |
-
#
|
44 |
-
|
45 |
-
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
return llama_result['choices'][0]['message']['content']
|
61 |
|
62 |
# Gradio arayüzü
|
63 |
-
demo = gr.
|
64 |
-
fn=
|
65 |
-
|
66 |
-
|
67 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
],
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
)
|
71 |
|
72 |
if __name__ == "__main__":
|
73 |
-
demo.launch(
|
|
|
8 |
# LLaMA için InferenceClient kullanıyoruz
|
9 |
llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
|
10 |
|
11 |
+
# Sohbet geçmişi
|
12 |
history = []
|
13 |
|
14 |
# Resim açıklama fonksiyonu
|
15 |
def describe_image(image, user_message):
|
16 |
global history
|
17 |
+
|
18 |
# Resmi Moondream2 API'sine gönderiyoruz
|
19 |
result = moondream_client.predict(
|
20 |
img=handle_file(image),
|
|
|
22 |
api_name="/answer_question"
|
23 |
)
|
24 |
|
25 |
+
description = result # Moondream2'den açıklama alıyoruz
|
26 |
+
history.append({"role": "user", "content": [{"type": "text", "text": user_message}]})
|
27 |
+
history.append({"role": "assistant", "content": [{"type": "text", "text": description}]})
|
28 |
|
29 |
+
return description
|
30 |
+
|
31 |
+
# Text ve history ile sohbet fonksiyonu
|
32 |
+
def chat_with_text(user_message, max_new_tokens=250):
|
33 |
+
global history
|
34 |
+
|
35 |
+
# Kullanıcı mesajını history'ye ekliyoruz
|
36 |
+
history.append({"role": "user", "content": [{"type": "text", "text": user_message}]})
|
37 |
+
|
38 |
+
# Tüm geçmişi LLaMA'ya gönderiyoruz
|
39 |
+
texts = [
|
40 |
+
{"role": msg["role"], "content": " ".join([part["text"] for part in msg["content"]])}
|
41 |
+
for msg in history
|
42 |
+
]
|
43 |
llama_result = llama_client.chat_completion(
|
44 |
+
messages=texts,
|
45 |
+
max_tokens=max_new_tokens,
|
46 |
+
temperature=0.7,
|
47 |
+
top_p=0.95
|
48 |
)
|
|
|
|
|
|
|
49 |
|
50 |
+
# Asistan cevabını alıyoruz ve history'ye ekliyoruz
|
51 |
+
assistant_reply = llama_result["choices"][0]["message"]["content"]
|
52 |
+
history.append({"role": "assistant", "content": [{"type": "text", "text": assistant_reply}]})
|
53 |
|
54 |
+
return assistant_reply
|
55 |
+
|
56 |
+
# Resim ve/veya metin tabanlı sohbet fonksiyonu
|
57 |
+
def bot_streaming(message, history, max_new_tokens=250):
|
58 |
+
user_message = message.get("text", "")
|
59 |
+
image = message.get("image", None)
|
60 |
+
|
61 |
+
if image: # Resim yüklenmişse
|
62 |
+
response = describe_image(image, user_message)
|
63 |
+
else: # Sadece metin mesajı gönderilmişse
|
64 |
+
response = chat_with_text(user_message, max_new_tokens)
|
65 |
+
|
66 |
+
return response
|
|
|
67 |
|
68 |
# Gradio arayüzü
|
69 |
+
demo = gr.ChatInterface(
|
70 |
+
fn=bot_streaming,
|
71 |
+
title="Multimodal Chat Assistant",
|
72 |
+
additional_inputs=[
|
73 |
+
gr.Image(label="Upload an image (optional)", type="pil", optional=True),
|
74 |
+
gr.Slider(
|
75 |
+
minimum=10,
|
76 |
+
maximum=500,
|
77 |
+
value=250,
|
78 |
+
step=10,
|
79 |
+
label="Maximum number of new tokens to generate",
|
80 |
+
)
|
81 |
],
|
82 |
+
description=(
|
83 |
+
"This demo combines text and image understanding using Moondream2 for visual "
|
84 |
+
"tasks and LLaMA for conversational AI. Upload an image, ask questions, "
|
85 |
+
"or just chat!"
|
86 |
+
),
|
87 |
+
stop_btn="Stop Generation",
|
88 |
+
fill_height=True,
|
89 |
+
multimodal=True,
|
90 |
)
|
91 |
|
92 |
if __name__ == "__main__":
|
93 |
+
demo.launch(debug=True)
|