Spaces:

Gsainath
/

core-api

Sleeping

App Files Files Community

Gsainath commited on Feb 28

Commit

4600230

0 Parent(s):

Re-added model with LFS

Browse files

Files changed (5) hide show

.DS_Store +0 -0
Dockerfile +20 -0
app.py +77 -0
codebert_model.pth +3 -0
requirements.txt +7 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use an official Python runtime as a parent image
+FROM python:3.9
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the entire project into the container
+COPY . .
+# Expose the FastAPI port (7860 for Hugging Face Spaces)
+EXPOSE 7860
+# Command to run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import os
+import uvicorn
+from fastapi import FastAPI
+from pydantic import BaseModel
+import torch
+from transformers import RobertaTokenizer, RobertaForSequenceClassification
+# Set Hugging Face cache directory to a writable location
+os.environ["HF_HOME"] = "/tmp/huggingface_cache"
+os.environ["TRANSFORMERS_CACHE"] = os.environ["HF_HOME"]
+os.makedirs(os.environ["HF_HOME"], exist_ok=True)  # Ensure the directory exists
+# Initialize FastAPI
+app = FastAPI()
+# Load CodeBERT Model
+class CodeBERTClassifier(torch.nn.Module):
+    def __init__(self):
+        super(CodeBERTClassifier, self).__init__()
+        self.model = RobertaForSequenceClassification.from_pretrained(
+            "microsoft/codebert-base",
+            num_labels=2,
+            cache_dir=os.environ["HF_HOME"]  # Use the custom cache directory
+        )
+    def forward(self, input_ids, attention_mask=None):
+        outputs = self.model(input_ids, attention_mask=attention_mask)
+        return outputs.logits
+def load_model():
+    model = CodeBERTClassifier().to('cpu')
+    model.load_state_dict(torch.load('codebert_model.pth', map_location='cpu'), strict=False)
+    model.eval()
+    tokenizer = RobertaTokenizer.from_pretrained(
+        "microsoft/codebert-base",
+        cache_dir=os.environ["HF_HOME"]  # Use the custom cache directory
+    )
+    return model, tokenizer
+model, tokenizer = load_model()
+# Request model
+class CodeRequest(BaseModel):
+    code_samples: list[str]
+def preprocess_input_code(code_samples):
+    inputs = tokenizer(code_samples, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
+    return inputs["input_ids"], inputs["attention_mask"]
+# Predict function
+def predict(code_samples):
+    tokens, masks = preprocess_input_code(code_samples)
+    with torch.no_grad():
+        logits = model(tokens, attention_mask=masks)
+        probabilities = torch.nn.functional.softmax(logits, dim=1).numpy()
+    return probabilities
+@app.get("/")  # This route ensures the API is reachable
+def home():
+    return {"message": "API is running!"}
+# API endpoint for prediction
+@app.post("/predict/")
+async def predict_code(request: CodeRequest):
+    probabilities = predict(request.code_samples)
+    results = [{"AI": f"{prob[1]*100:.2f}%", "Human": f"{prob[0]*100:.2f}%"} for prob in probabilities]
+    return {"predictions": results}
+@app.post("/detect/")
+async def predict_code(request: CodeRequest):
+    probabilities = predict(request.code_samples)
+    results = [{"AI": f"{prob[1]*100:.2f}%", "Human": f"{prob[0]*100:.2f}%"} for prob in probabilities]
+    return {"predictions": results}
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))  # Ensure it uses 7860
+    uvicorn.run(app, host="0.0.0.0", port=port)

codebert_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3b538e9b12be4e3aa206abd48c14cdc89dde3e1d542d31985df1b5cb73f6851
+size 498682677

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+pydantic
+torch
+transformers
+gdown
+uvicorn
+huggingface-hub