from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer from flask import Flask, request, jsonify from threading import Thread from typing import Iterator import spaces import torch import os app = Flask(__name__) print("Hello welcome to Sema AI", flush=True) # Flush to ensure immediate output # Get Hugging Face credentials from environment variables email = os.getenv('HF_EMAIL') password = os.getenv('HF_PASS') GEMMA_TOKEN = os.getenv("GEMMA_TOKEN") #print(f"email is {email} and password is {password}", flush=True) MAX_MAX_NEW_TOKENS = 2048 DEFAULT_MAX_NEW_TOKENS = 1024 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_id = "google/gemma-2-2b-it" tokenizer = GemmaTokenizerFast.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.bfloat16, ) model.config.sliding_window = 4096 model.eval() @app.route("/") def hello(): return "hello 🤗, Welcome to Sema AI Chat Service." # Flask route to handle incoming chat requests @app.route('/chat', methods=['POST']) def chat(): # Get JSON data from the POST request data = request.json prompt = data.get('prompt') email = data.get('email') password = data.get('password') print(f"email 2 is {email} and password 2 is {password} and The user wants to Know: {prompt}", flush=True) if not (password): return jsonify({"error": "Missing password"}), 400 elif not (prompt): return jsonify({"error": "Missing prompt"}), 400 elif not (email): return jsonify({"error": "Missing email"}), 400 else: return jsonify({"error": "Missing prompt, email, or password"}), 400 # Generate the response response = generate_response(prompt, email, password) # Return the response as JSON return jsonify({"response": response}) # Function for generating LLM response def generate_response(prompt_input, email, passwd): # Hugging Face Login sign = Login(email, passwd) cookies = sign.login() # Create ChatBot chatbot = hugchat.ChatBot(cookies=cookies.get_dict()) # Simple dialogue structure string_dialogue = "You are a helpful assistant." string_dialogue += f"\n\nUser: {prompt_input}\n\nAssistant: " # Generate and return the response return chatbot.chat(string_dialogue) if __name__ == '__main__': app.run(debug=True)