from fastapi import FastAPI, HTTPException from pydantic import BaseModel from sentence_transformers import SentenceTransformer,util from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from fastapi.middleware.cors import CORSMiddleware import uvicorn import numpy as np import pandas as pd app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allow all origins; restrict this in production allow_credentials=True, allow_methods=["*"], # Allow all HTTP methods allow_headers=["*"], # Allow all HTTP headers ) # Initialize the FastAPI app # Load the pre-trained SentenceTransformer model model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True) # Define the request body schema class TextInput(BaseModel): text: str # Home route @app.get("/") async def home(): return {"message": "welcome to home page"} # Define the API endpoint for generating embeddings @app.post("/embed") async def generate_embedding(text_input: TextInput): """ Generate a 768-dimensional embedding for the input text. Returns the embedding in a structured format with rounded values. """ try: # Generate the embedding embedding = model.encode(text_input.text, convert_to_tensor=True).cpu().numpy() # Round embedding values to 2 decimal places rounded_embedding = np.round(embedding, 2).tolist() # Return structured response return { "dimensions": len(rounded_embedding), "embeddings": [rounded_embedding] } except Exception as e: # Handle any errors raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True) df = pd.read_excel("sms_process_data_main.xlsx") # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(df["MessageText"], df["label"], test_size=0.2, random_state=42) # Generate embeddings for the training data X_train_embeddings = model.encode(X_train.tolist(), show_progress_bar=True) # Initialize and train the Logistic Regression model logreg_model = LogisticRegression(max_iter=100) logreg_model.fit(X_train_embeddings, y_train) # Define input schema class TextInput(BaseModel): text: str @app.post("/prediction") async def generate_prediction(text_input: TextInput): """ Predict the label for the given text input using the trained model. """ try: # Check if input text is provided if not text_input.text.strip(): raise ValueError("Input text cannot be empty.") # Generate embedding for the input text new_embedding = model.encode([text_input.text]) # Predict the label using the trained Logistic Regression model prediction = logreg_model.predict(new_embedding).tolist()[0] # Extract single prediction # Return structured response return { "predicted_label": prediction } except ValueError as ve: raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") class SentencesInput(BaseModel): sentence1: str sentence2: str @app.post("/text_to_tensor") def text_to_tensor(input: SentencesInput): try: # Generate embeddings embeddings = model.encode([input.sentence1, input.sentence2]) # Compute cosine similarity cosine_similarity = util.cos_sim(embeddings[0], embeddings[1]).item() return {"cosine_similarity": round(cosine_similarity, 3)} except Exception as e: raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)