File size: 2,551 Bytes
6ab5056
e43cdcf
6ab5056
 
 
 
 
 
e43cdcf
 
c9319f3
cc29585
 
 
e43cdcf
d3e16e4
 
6ab5056
 
 
 
 
 
 
 
92dcf0e
6ab5056
 
 
 
 
 
 
 
 
e43cdcf
c9319f3
 
e43cdcf
 
 
 
 
 
 
 
 
 
 
cc29585
92dcf0e
daf235b
 
 
 
 
 
 
e43cdcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
from flask import Flask, request, jsonify

from threading import Thread
from typing import Iterator

import spaces
import torch
import os

app = Flask(__name__)

print("Hello welcome to Sema AI", flush=True)  # Flush to ensure immediate output

# Get Hugging Face credentials from environment variables
email = os.getenv('HF_EMAIL')
password = os.getenv('HF_PASS')
GEMMA_TOKEN = os.getenv("GEMMA_TOKEN")
#print(f"email is {email} and password is {password}", flush=True)

MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_id = "google/gemma-2-2b-it"
tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
model.config.sliding_window = 4096
model.eval()

@app.route("/")
def hello():
    return "hello 🤗, Welcome to Sema AI Chat Service."
    
# Flask route to handle incoming chat requests
@app.route('/chat', methods=['POST'])
def chat():
    # Get JSON data from the POST request
    data = request.json
    prompt = data.get('prompt')
    email = data.get('email')
    password = data.get('password')

    print(f"email 2 is {email} and password 2 is {password} and The user wants to Know: {prompt}", flush=True)
    
    if not (password):
        return jsonify({"error": "Missing password"}), 400
    elif not (prompt):
        return jsonify({"error": "Missing prompt"}), 400
    elif not (email):
        return jsonify({"error": "Missing email"}), 400
    else:
        return jsonify({"error": "Missing prompt, email, or password"}), 400

    # Generate the response
    response = generate_response(prompt, email, password)
    
    # Return the response as JSON
    return jsonify({"response": response})

# Function for generating LLM response
def generate_response(prompt_input, email, passwd):
    # Hugging Face Login
    sign = Login(email, passwd)
    cookies = sign.login()
    # Create ChatBot                        
    chatbot = hugchat.ChatBot(cookies=cookies.get_dict())

    # Simple dialogue structure
    string_dialogue = "You are a helpful assistant."
    string_dialogue += f"\n\nUser: {prompt_input}\n\nAssistant: "

    # Generate and return the response
    return chatbot.chat(string_dialogue)

if __name__ == '__main__':
    app.run(debug=True)