Spaces:
Sleeping
Sleeping
File size: 2,551 Bytes
6ab5056 e43cdcf 6ab5056 e43cdcf c9319f3 cc29585 e43cdcf d3e16e4 6ab5056 92dcf0e 6ab5056 e43cdcf c9319f3 e43cdcf cc29585 92dcf0e daf235b e43cdcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
from flask import Flask, request, jsonify
from threading import Thread
from typing import Iterator
import spaces
import torch
import os
app = Flask(__name__)
print("Hello welcome to Sema AI", flush=True) # Flush to ensure immediate output
# Get Hugging Face credentials from environment variables
email = os.getenv('HF_EMAIL')
password = os.getenv('HF_PASS')
GEMMA_TOKEN = os.getenv("GEMMA_TOKEN")
#print(f"email is {email} and password is {password}", flush=True)
MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_id = "google/gemma-2-2b-it"
tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
)
model.config.sliding_window = 4096
model.eval()
@app.route("/")
def hello():
return "hello 🤗, Welcome to Sema AI Chat Service."
# Flask route to handle incoming chat requests
@app.route('/chat', methods=['POST'])
def chat():
# Get JSON data from the POST request
data = request.json
prompt = data.get('prompt')
email = data.get('email')
password = data.get('password')
print(f"email 2 is {email} and password 2 is {password} and The user wants to Know: {prompt}", flush=True)
if not (password):
return jsonify({"error": "Missing password"}), 400
elif not (prompt):
return jsonify({"error": "Missing prompt"}), 400
elif not (email):
return jsonify({"error": "Missing email"}), 400
else:
return jsonify({"error": "Missing prompt, email, or password"}), 400
# Generate the response
response = generate_response(prompt, email, password)
# Return the response as JSON
return jsonify({"response": response})
# Function for generating LLM response
def generate_response(prompt_input, email, passwd):
# Hugging Face Login
sign = Login(email, passwd)
cookies = sign.login()
# Create ChatBot
chatbot = hugchat.ChatBot(cookies=cookies.get_dict())
# Simple dialogue structure
string_dialogue = "You are a helpful assistant."
string_dialogue += f"\n\nUser: {prompt_input}\n\nAssistant: "
# Generate and return the response
return chatbot.chat(string_dialogue)
if __name__ == '__main__':
app.run(debug=True)
|