Gsainath commited on
Commit
4600230
·
0 Parent(s):

Re-added model with LFS

Browse files
Files changed (5) hide show
  1. .DS_Store +0 -0
  2. Dockerfile +20 -0
  3. app.py +77 -0
  4. codebert_model.pth +3 -0
  5. requirements.txt +7 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the entire project into the container
14
+ COPY . .
15
+
16
+ # Expose the FastAPI port (7860 for Hugging Face Spaces)
17
+ EXPOSE 7860
18
+
19
+ # Command to run the application
20
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uvicorn
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ import torch
6
+ from transformers import RobertaTokenizer, RobertaForSequenceClassification
7
+
8
+ # Set Hugging Face cache directory to a writable location
9
+ os.environ["HF_HOME"] = "/tmp/huggingface_cache"
10
+ os.environ["TRANSFORMERS_CACHE"] = os.environ["HF_HOME"]
11
+ os.makedirs(os.environ["HF_HOME"], exist_ok=True) # Ensure the directory exists
12
+
13
+ # Initialize FastAPI
14
+ app = FastAPI()
15
+
16
+ # Load CodeBERT Model
17
+ class CodeBERTClassifier(torch.nn.Module):
18
+ def __init__(self):
19
+ super(CodeBERTClassifier, self).__init__()
20
+ self.model = RobertaForSequenceClassification.from_pretrained(
21
+ "microsoft/codebert-base",
22
+ num_labels=2,
23
+ cache_dir=os.environ["HF_HOME"] # Use the custom cache directory
24
+ )
25
+
26
+ def forward(self, input_ids, attention_mask=None):
27
+ outputs = self.model(input_ids, attention_mask=attention_mask)
28
+ return outputs.logits
29
+
30
+ def load_model():
31
+ model = CodeBERTClassifier().to('cpu')
32
+ model.load_state_dict(torch.load('codebert_model.pth', map_location='cpu'), strict=False)
33
+ model.eval()
34
+ tokenizer = RobertaTokenizer.from_pretrained(
35
+ "microsoft/codebert-base",
36
+ cache_dir=os.environ["HF_HOME"] # Use the custom cache directory
37
+ )
38
+ return model, tokenizer
39
+
40
+ model, tokenizer = load_model()
41
+
42
+ # Request model
43
+ class CodeRequest(BaseModel):
44
+ code_samples: list[str]
45
+
46
+ def preprocess_input_code(code_samples):
47
+ inputs = tokenizer(code_samples, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
48
+ return inputs["input_ids"], inputs["attention_mask"]
49
+
50
+ # Predict function
51
+ def predict(code_samples):
52
+ tokens, masks = preprocess_input_code(code_samples)
53
+ with torch.no_grad():
54
+ logits = model(tokens, attention_mask=masks)
55
+ probabilities = torch.nn.functional.softmax(logits, dim=1).numpy()
56
+ return probabilities
57
+
58
+
59
+ @app.get("/") # This route ensures the API is reachable
60
+ def home():
61
+ return {"message": "API is running!"}
62
+
63
+ # API endpoint for prediction
64
+ @app.post("/predict/")
65
+ async def predict_code(request: CodeRequest):
66
+ probabilities = predict(request.code_samples)
67
+ results = [{"AI": f"{prob[1]*100:.2f}%", "Human": f"{prob[0]*100:.2f}%"} for prob in probabilities]
68
+ return {"predictions": results}
69
+ @app.post("/detect/")
70
+ async def predict_code(request: CodeRequest):
71
+ probabilities = predict(request.code_samples)
72
+ results = [{"AI": f"{prob[1]*100:.2f}%", "Human": f"{prob[0]*100:.2f}%"} for prob in probabilities]
73
+ return {"predictions": results}
74
+
75
+ if __name__ == "__main__":
76
+ port = int(os.environ.get("PORT", 7860)) # Ensure it uses 7860
77
+ uvicorn.run(app, host="0.0.0.0", port=port)
codebert_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3b538e9b12be4e3aa206abd48c14cdc89dde3e1d542d31985df1b5cb73f6851
3
+ size 498682677
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ pydantic
3
+ torch
4
+ transformers
5
+ gdown
6
+ uvicorn
7
+ huggingface-hub