File size: 3,942 Bytes
ce4bfa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer,util
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
import numpy as np
import pandas as pd

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allow all origins; restrict this in production
    allow_credentials=True,
    allow_methods=["*"],  # Allow all HTTP methods
    allow_headers=["*"],  # Allow all HTTP headers
)


# Initialize the FastAPI app


# Load the pre-trained SentenceTransformer model
model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)

# Define the request body schema
class TextInput(BaseModel):
    text: str

# Home route
@app.get("/")
async def home():
    return {"message": "welcome to home page"}

# Define the API endpoint for generating embeddings
@app.post("/embed")
async def generate_embedding(text_input: TextInput):
    """
    Generate a 768-dimensional embedding for the input text.
    Returns the embedding in a structured format with rounded values.
    """
    try:
        # Generate the embedding
        embedding = model.encode(text_input.text, convert_to_tensor=True).cpu().numpy()

        # Round embedding values to 2 decimal places
        rounded_embedding = np.round(embedding, 2).tolist()

        # Return structured response
        return {
            "dimensions": len(rounded_embedding),
            "embeddings": [rounded_embedding]
        }

    except Exception as e:
        # Handle any errors
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
    


model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)
df = pd.read_excel("sms_process_data_main.xlsx")

    # Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df["MessageText"], df["label"], test_size=0.2, random_state=42)

    # Generate embeddings for the training data
X_train_embeddings = model.encode(X_train.tolist(), show_progress_bar=True)

    # Initialize and train the Logistic Regression model
logreg_model = LogisticRegression(max_iter=100)
logreg_model.fit(X_train_embeddings, y_train)

# Define input schema
class TextInput(BaseModel):
    text: str

@app.post("/prediction")
async def generate_prediction(text_input: TextInput):
    """
    Predict the label for the given text input using the trained model.
    """
    try:
        # Check if input text is provided
        if not text_input.text.strip():
            raise ValueError("Input text cannot be empty.")

        # Generate embedding for the input text
        new_embedding = model.encode([text_input.text])

        # Predict the label using the trained Logistic Regression model
        prediction = logreg_model.predict(new_embedding).tolist()[0]  # Extract single prediction

        # Return structured response
        return {
            "predicted_label": prediction
        }
    except ValueError as ve:
        raise HTTPException(status_code=400, detail=str(ve))
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
    
class SentencesInput(BaseModel):
    sentence1: str
    sentence2: str
@app.post("/text_to_tensor")
def text_to_tensor(input: SentencesInput):
    try:
        # Generate embeddings
        embeddings = model.encode([input.sentence1, input.sentence2])

        # Compute cosine similarity
        cosine_similarity = util.cos_sim(embeddings[0], embeddings[1]).item()

        return {"cosine_similarity": round(cosine_similarity, 3)}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)