Spaces:

jikoni
/

llamaSMS

Sleeping

File size: 2,551 Bytes

6ab5056
e43cdcf
6ab5056
 
 
 
 
 
e43cdcf
 
c9319f3
cc29585
 
 
e43cdcf
d3e16e4
 
6ab5056
 
 
 
 
 
 
 
92dcf0e
6ab5056
 
 
 
 
 
 
 
 
e43cdcf
c9319f3
 
e43cdcf
 
 
 
 
 
 
 
 
 
 
cc29585
92dcf0e
daf235b
 
 
 
 
 
 
e43cdcf

from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
from flask import Flask, request, jsonify

from threading import Thread
from typing import Iterator

import spaces
import torch
import os

app = Flask(__name__)

print("Hello welcome to Sema AI", flush=True)  # Flush to ensure immediate output

# Get Hugging Face credentials from environment variables
email = os.getenv('HF_EMAIL')
password = os.getenv('HF_PASS')
GEMMA_TOKEN = os.getenv("GEMMA_TOKEN")
#print(f"email is {email} and password is {password}", flush=True)

MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_id = "google/gemma-2-2b-it"
tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
model.config.sliding_window = 4096
model.eval()

@app.route("/")
def hello():
    return "hello 🤗, Welcome to Sema AI Chat Service."
    
# Flask route to handle incoming chat requests
@app.route('/chat', methods=['POST'])
def chat():
    # Get JSON data from the POST request
    data = request.json
    prompt = data.get('prompt')
    email = data.get('email')
    password = data.get('password')

    print(f"email 2 is {email} and password 2 is {password} and The user wants to Know: {prompt}", flush=True)
    
    if not (password):
        return jsonify({"error": "Missing password"}), 400
    elif not (prompt):
        return jsonify({"error": "Missing prompt"}), 400
    elif not (email):
        return jsonify({"error": "Missing email"}), 400
    else:
        return jsonify({"error": "Missing prompt, email, or password"}), 400

    # Generate the response
    response = generate_response(prompt, email, password)
    
    # Return the response as JSON
    return jsonify({"response": response})

# Function for generating LLM response
def generate_response(prompt_input, email, passwd):
    # Hugging Face Login
    sign = Login(email, passwd)
    cookies = sign.login()
    # Create ChatBot                        
    chatbot = hugchat.ChatBot(cookies=cookies.get_dict())

    # Simple dialogue structure
    string_dialogue = "You are a helpful assistant."
    string_dialogue += f"\n\nUser: {prompt_input}\n\nAssistant: "

    # Generate and return the response
    return chatbot.chat(string_dialogue)

if __name__ == '__main__':
    app.run(debug=True)