Spaces:

jikoni
/

llamaSMS

Sleeping

App Files Files Community

llamaSMS / main.py

Tri4

Update main.py

ed09ad2 verified about 1 year ago

raw

history blame

4.7 kB

	from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig, pipeline
	from flask import Flask, request, jsonify

	from threading import Thread
	from typing import Iterator

	import spaces
	import torch
	import os

	app = Flask(__name__)

	print("Hello welcome to Sema AI", flush=True) # Flush to ensure immediate output

	@app.route("/")
	def hello():
	return "hello 🤗, Welcome to Sema AI Chat Service."

	# Get Hugging Face credentials from environment variables
	email = os.getenv('HF_EMAIL')
	password = os.getenv('HF_PASS')
	GEMMA_TOKEN = os.getenv("GEMMA_TOKEN")
	#print(f"email is {email} and password is {password}", flush=True)

	if not (email, password,GEMMA_TOKEN):
	print("no dependacies", flush=True)

	"""
	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 1024
	MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

	model_id = "google/gemma-2-2b-it"
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

	tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	)
	model.config.sliding_window = 4096
	model.eval()
	"""


	tokenizer = AutoTokenizer.from_pretrained(model, token=GEMMA_TOKEN, device=device)

	quantization_config = GPTQConfig(
	bits=4,
	group_size=128,
	dataset="c4", # the original datasets used in GPTQ paper [‘wikitext2’,‘c4’,‘c4-new’,‘ptb’,‘ptb-new’]
	desc_act=False,
	tokenizer=tokenizer,
	batch_size=1,
	)
	quantized=False
	if quantized:
	model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="google/gemma-2-2b-it",
	token=GEMMA_TOKEN,
	quantization_config=quantization_config,
	device_map=device
	)
	else:
	model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="google/gemma-2-2b-it",
	token=GEMMA_TOKEN,
	torch_dtype=torch.float16,
	device_map=device
	)


	app_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer
	)

	@app.route("/generate_text", methods=["POST"])
	def generate_Text():
	data = request.json
	prompt = data.get("prompt", "")
	max_new_tokens = data.get("max_new_tokens", 1000)
	do_sample = data.get("do_sample", True)
	temperature = data.get("temperature", 0.1)
	top_k = data.get("top_k", 50)
	top_p = data.get("top_p", 0.95)

	tokenized_prompt = app_pipeline.tokenizer.apply_chat_template(
	prompt, tokenize=False, add_generation_prompt=True)
	outputs = app_pipeline(
	tokenized_prompt,
	max_new_tokens=max_new_tokens,
	do_sample=do_sample,
	temperature=temperature,
	top_k=top_k,
	top_p=top_p
	)

	return jsonify({"response": outputs[0]["generated_text"][len(tokenized_prompt):]})


	if __name__ == "__main__":
	app.run(debug=False, port=8888)



	"""
	# Flask route to handle incoming chat requests
	@app.route('/chat', methods=['POST'])
	def chat():
	# Get JSON data from the POST request
	data = request.json
	prompt = data.get('prompt')
	email = data.get('email')
	password = data.get('password')

	print(f"email 2 is {email} and password 2 is {password} and The user wants to Know: {prompt}", flush=True)

	if not (password):
	return jsonify({"error": "Missing password"}), 400
	elif not (prompt):
	return jsonify({"error": "Missing prompt"}), 400
	elif not (email):
	return jsonify({"error": "Missing email"}), 400
	else:
	return jsonify({"error": "Missing prompt, email, or password"}), 400

	# Generate the response
	response = generate_response(prompt, email, password)

	# Return the response as JSON
	return jsonify({"response": response})

	# Function for generating LLM response
	def generate_response(prompt_input, email, passwd):
	# Hugging Face Login
	sign = Login(email, passwd)
	cookies = sign.login()
	# Create ChatBot
	chatbot = hugchat.ChatBot(cookies=cookies.get_dict())

	# Simple dialogue structure
	string_dialogue = "You are a helpful assistant."
	string_dialogue += f"\n\nUser: {prompt_input}\n\nAssistant: "

	# Generate and return the response
	return chatbot.chat(string_dialogue)

	if __name__ == '__main__':
	app.run(debug=True)
	"""