Spaces:
Sleeping
Sleeping
from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer | |
from flask import Flask, request, jsonify | |
from threading import Thread | |
from typing import Iterator | |
import spaces | |
import torch | |
import os | |
app = Flask(__name__) | |
print("Hello welcome to Sema AI", flush=True) # Flush to ensure immediate output | |
# Get Hugging Face credentials from environment variables | |
email = os.getenv('HF_EMAIL') | |
password = os.getenv('HF_PASS') | |
GEMMA_TOKEN = os.getenv("GEMMA_TOKEN") | |
#print(f"email is {email} and password is {password}", flush=True) | |
MAX_MAX_NEW_TOKENS = 2048 | |
DEFAULT_MAX_NEW_TOKENS = 1024 | |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
model_id = "google/gemma-2-2b-it" | |
tokenizer = GemmaTokenizerFast.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.bfloat16, | |
) | |
model.config.sliding_window = 4096 | |
model.eval() | |
def hello(): | |
return "hello 🤗, Welcome to Sema AI Chat Service." | |
# Flask route to handle incoming chat requests | |
def chat(): | |
# Get JSON data from the POST request | |
data = request.json | |
prompt = data.get('prompt') | |
email = data.get('email') | |
password = data.get('password') | |
print(f"email 2 is {email} and password 2 is {password} and The user wants to Know: {prompt}", flush=True) | |
if not (password): | |
return jsonify({"error": "Missing password"}), 400 | |
elif not (prompt): | |
return jsonify({"error": "Missing prompt"}), 400 | |
elif not (email): | |
return jsonify({"error": "Missing email"}), 400 | |
else: | |
return jsonify({"error": "Missing prompt, email, or password"}), 400 | |
# Generate the response | |
response = generate_response(prompt, email, password) | |
# Return the response as JSON | |
return jsonify({"response": response}) | |
# Function for generating LLM response | |
def generate_response(prompt_input, email, passwd): | |
# Hugging Face Login | |
sign = Login(email, passwd) | |
cookies = sign.login() | |
# Create ChatBot | |
chatbot = hugchat.ChatBot(cookies=cookies.get_dict()) | |
# Simple dialogue structure | |
string_dialogue = "You are a helpful assistant." | |
string_dialogue += f"\n\nUser: {prompt_input}\n\nAssistant: " | |
# Generate and return the response | |
return chatbot.chat(string_dialogue) | |
if __name__ == '__main__': | |
app.run(debug=True) | |