Llama2_chat / app.py
Pravincoder's picture
Update app.py
8b97b54
raw
history blame
752 Bytes
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True).float()
def chat(message,history):
for response,history in model.stream_chat(tokenizer,message,history,max_length=2048,top_p=0.7,temperature=1):
yield response
gr.ChatInterface(chat,
title="ProChat(A Chatbot that's FREE!)",
description="""
Hi guys! I am a solo developer and I made an app: __ProChat__.
""",
).queue(1).launch()