import gradio as gr from ollama import chat, ChatResponse import subprocess def interact(message: str, history: list): message_dct = { "role": "user", "content": message } chat_history = [msg for msg in history] chat_history.append(message_dct) response: ChatResponse = chat( model="deepseek-r1:1.5b", messages=chat_history, stream=True ) text_response = "" thinking_response = gr.ChatMessage(content="", metadata={"title":"Thinking Cloud"}) thinking = False for chunk in response: bit = chunk["message"]["content"] if(bit == ""): thinking = True continue elif(bit == ""): thinking = False continue if(thinking): thinking_response.content += bit else: text_response += bit final_response = [thinking_response, text_response] yield final_response interface = gr.ChatInterface( fn=interact, type="messages", title="Deepseek-R1 Chat Interface" ) if __name__ == "__main__": subprocess.run(["ollama", "run", "deepseek-r1:1.5b"]) interface.launch()