Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig, pipeline | |
from flask import Flask, request, jsonify | |
from threading import Thread | |
from typing import Iterator | |
import spaces | |
import torch | |
import os | |
app = Flask(__name__) | |
print("Hello welcome to Sema AI", flush=True) # Flush to ensure immediate output | |
def hello(): | |
return "hello 🤗, Welcome to Sema AI Chat Service." | |
# Get Hugging Face credentials from environment variables | |
email = os.getenv('HF_EMAIL') | |
password = os.getenv('HF_PASS') | |
GEMMA_TOKEN = os.getenv("GEMMA_TOKEN") | |
#print(f"email is {email} and password is {password}", flush=True) | |
if not (email, password,GEMMA_TOKEN): | |
print("no dependacies", flush=True) | |
""" | |
MAX_MAX_NEW_TOKENS = 2048 | |
DEFAULT_MAX_NEW_TOKENS = 1024 | |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) | |
model_id = "google/gemma-2-2b-it" | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
tokenizer = GemmaTokenizerFast.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.bfloat16, | |
) | |
model.config.sliding_window = 4096 | |
model.eval() | |
""" | |
tokenizer = AutoTokenizer.from_pretrained(model, token=GEMMA_TOKEN, device=device) | |
quantization_config = GPTQConfig( | |
bits=4, | |
group_size=128, | |
dataset="c4", # the original datasets used in GPTQ paper [‘wikitext2’,‘c4’,‘c4-new’,‘ptb’,‘ptb-new’] | |
desc_act=False, | |
tokenizer=tokenizer, | |
batch_size=1, | |
) | |
quantized=False | |
if quantized: | |
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="google/gemma-2-2b-it", | |
token=GEMMA_TOKEN, | |
quantization_config=quantization_config, | |
device_map=device | |
) | |
else: | |
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="google/gemma-2-2b-it", | |
token=GEMMA_TOKEN, | |
torch_dtype=torch.float16, | |
device_map=device | |
) | |
app_pipeline = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer | |
) | |
def generate_Text(): | |
data = request.json | |
prompt = data.get("prompt", "") | |
max_new_tokens = data.get("max_new_tokens", 1000) | |
do_sample = data.get("do_sample", True) | |
temperature = data.get("temperature", 0.1) | |
top_k = data.get("top_k", 50) | |
top_p = data.get("top_p", 0.95) | |
tokenized_prompt = app_pipeline.tokenizer.apply_chat_template( | |
prompt, tokenize=False, add_generation_prompt=True) | |
outputs = app_pipeline( | |
tokenized_prompt, | |
max_new_tokens=max_new_tokens, | |
do_sample=do_sample, | |
temperature=temperature, | |
top_k=top_k, | |
top_p=top_p | |
) | |
return jsonify({"response": outputs[0]["generated_text"][len(tokenized_prompt):]}) | |
if __name__ == "__main__": | |
app.run(debug=False, port=8888) | |
""" | |
# Flask route to handle incoming chat requests | |
@app.route('/chat', methods=['POST']) | |
def chat(): | |
# Get JSON data from the POST request | |
data = request.json | |
prompt = data.get('prompt') | |
email = data.get('email') | |
password = data.get('password') | |
print(f"email 2 is {email} and password 2 is {password} and The user wants to Know: {prompt}", flush=True) | |
if not (password): | |
return jsonify({"error": "Missing password"}), 400 | |
elif not (prompt): | |
return jsonify({"error": "Missing prompt"}), 400 | |
elif not (email): | |
return jsonify({"error": "Missing email"}), 400 | |
else: | |
return jsonify({"error": "Missing prompt, email, or password"}), 400 | |
# Generate the response | |
response = generate_response(prompt, email, password) | |
# Return the response as JSON | |
return jsonify({"response": response}) | |
# Function for generating LLM response | |
def generate_response(prompt_input, email, passwd): | |
# Hugging Face Login | |
sign = Login(email, passwd) | |
cookies = sign.login() | |
# Create ChatBot | |
chatbot = hugchat.ChatBot(cookies=cookies.get_dict()) | |
# Simple dialogue structure | |
string_dialogue = "You are a helpful assistant." | |
string_dialogue += f"\n\nUser: {prompt_input}\n\nAssistant: " | |
# Generate and return the response | |
return chatbot.chat(string_dialogue) | |
if __name__ == '__main__': | |
app.run(debug=True) | |
""" |