Spaces:

Akhilgautam30
/

personality_assesment

Sleeping

App Files Files Community

Akhilgautam30 commited on Jul 29, 2024

Commit

52c6760

1 Parent(s): 8243432

added code based on rogelio's changes

Browse files

Files changed (3) hide show

.github/workflows/main.yml +1 -1
main.py +20 -13
model_utils.py +69 -20

.github/workflows/main.yml CHANGED Viewed

@@ -16,4 +16,4 @@ jobs:
       - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: git push --force https://akhilgautam2011%40gmail.com:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/Akhilgautam30/personality_assesment main

       - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push https://akhilgautam2011%40gmail.com:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/Akhilgautam30/personality_assesment main

main.py CHANGED Viewed

@@ -1,31 +1,38 @@
-# API/main.py
-# main.py (in the root directory)
-#test command
 import sys
 import os
-from model_utils import predict_personality
 from fastapi import FastAPI
 app = FastAPI()
 @app.get("/")
 async def root():
-    return {"message": "Personality Assessment API is running"}
 @app.get("/predict")
 async def predict_personality_get(text: str):
-    try:
-        print("--------------------------")
-        predictions = predict_personality(text)
-        return {"predictions": predictions}
-    except NameError:
-        return {"error": "predict_personality function not available"}
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

+# main.py
 import sys
 import os
 from fastapi import FastAPI
+from model_utils import load_model_and_weights, single_predict
 app = FastAPI()
+# Load the model and tokenizer
+output_folder = '.'  # Adjust this path as needed
+hugging_model = 'roberta-base'
+model = load_model_and_weights(hugging_model, output_folder)
+# Root path handler for unit test
 @app.get("/")
 async def root():
+    test_text = ("always a problem. My hair is really wet and I should go dry it, but this assignment is what I need to do now. "
+                 "I almost slept through my eight o clock class, but I somehow made it. Ok this show keeps getting cheezier and cheezier "
+                 "oh dear. I have to cash a check and deposit it so my check book balances, which is something that needs to be done and "
+                 "really quickly because I will have to pay extra for all the hot checks I have written- uh oh. My twenty minutes probably "
+                 "seems shorter because I am a slower typist than most people. PROPNAME is a psycho whore, I hate hate her. Something shocking "
+                 "happens on this show every 0 seconds. I don't think that Days of our lives is a good show, but I seem to be addicted to it "
+                 "anyway. PROPNAME is so nice and her and LOCNAME are finally together, but probably not for long because there is")
+    predictions = single_predict(model, test_text)
+    return {"predictions": predictions}
 @app.get("/predict")
 async def predict_personality_get(text: str):
+    predictions = single_predict(model, text)
+    return {"predictions": predictions}
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

model_utils.py CHANGED Viewed

@@ -3,21 +3,81 @@
 import os
 import tensorflow as tf
 from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
 # Define the personality trait labels
 traits = ['cAGR', 'cCON', 'cEXT', 'cOPN', 'cNEU']
-def load_model_and_weights():
-    model_name = "roberta-base"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = TFAutoModelForSequenceClassification.from_pretrained(
-        model_name,
-        num_labels=len(traits),
-        problem_type="multi_label_classification"
     )
-    # Load custom weights
-    weights_path = os.path.join(os.getcwd(), 'weights-roberta-base.h5')
     if os.path.exists(weights_path):
         try:
             model.load_weights(weights_path)
@@ -28,15 +88,4 @@ def load_model_and_weights():
     else:
         print(f"Warning: Custom weights file not found at {weights_path}")
         print("Using default weights.")
-    return tokenizer, model
-# Load the model and tokenizer
-tokenizer, model = load_model_and_weights()
-def predict_personality(text):
-    inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=512)
-    outputs = model(inputs)
-    probabilities = tf.nn.sigmoid(outputs.logits)[0]  # Using sigmoid for multi-label
-    predictions = [{"trait": trait, "score": float(prob)} for trait, prob in zip(traits, probabilities)]
-    return predictions

 import os
 import tensorflow as tf
 from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+import numpy as np
+from nltk.corpus import stopwords
+from keras.preprocessing.text import Tokenizer
 # Define the personality trait labels
 traits = ['cAGR', 'cCON', 'cEXT', 'cOPN', 'cNEU']
+def preprocess(docs):
+    stopwrd = set(stopwords.words('english'))
+    t = Tokenizer(num_words=20000, filters='!"#$%&()*+,-./:;<=>?@[\\]^`{|}~\t\n')
+    t.fit_on_texts(docs)
+    encoded_docs = t.texts_to_sequences(docs)
+    idx2word = {v: k for k, v in t.word_index.items()}
+    def abbreviation_handler(text):
+        ln = text.lower()
+        ln = ln.replace(r"'t", " not")
+        ln = ln.replace(r"'s", " is")
+        ln = ln.replace(r"'ll", " will")
+        ln = ln.replace(r"'ve", " have")
+        ln = ln.replace(r"'re", " are")
+        ln = ln.replace(r"'m", " am")
+        ln = ln.replace(r"'", " ")
+        return ln
+    def stopwords_handler(text):
+        words = text.split()
+        new_words = [w for w in words if w not in stopwrd]
+        return ' '.join(new_words)
+    def sequence_to_text(listOfSequences):
+        tokenized_list = []
+        for text in listOfSequences:
+            newText = ''
+            for num in text:
+                newText += idx2word[num] + ' '
+            newText = abbreviation_handler(newText)
+            newText = stopwords_handler(newText)
+            tokenized_list.append(newText)
+        return tokenized_list
+    newLists = sequence_to_text(encoded_docs)
+    return newLists
+def tokenize_text(text, hugging_model='roberta-base'):
+    clean_text = preprocess(text)
+    tokenizer = AutoTokenizer.from_pretrained(hugging_model)
+    inputs = tokenizer(clean_text, padding=True, truncation=True, return_tensors='tf')
+    x = dict(inputs)
+    return x
+def single_predict(model, text, traits=['cAGR', 'cCON', 'cEXT', 'cOPN', 'cNEU']):
+    traits_scores = dict()
+    predicted_labels = dict()
+    x = tokenize_text([text])
+    logits = model.predict(x, verbose=0).logits
+    probs = tf.math.sigmoid(logits).numpy()
+    predictions = np.where(probs > 0.5, 1, 0)
+    for t, s in zip(traits, probs[0]):
+        traits_scores[t] = s
+    for t, l in zip(traits, predictions[0]):
+        predicted_labels[t] = l
+    final_dic = {'probability': traits_scores, 'predicted_label': predicted_labels}
+    return final_dic
+def load_model_and_weights(hugging_model='roberta-base', output_folder='.'):
     model = TFAutoModelForSequenceClassification.from_pretrained(
+        hugging_model, num_labels=len(traits), problem_type="multi_label_classification"
     )
+    if len(hugging_model.split('/')) > 1:
+        _hugging_model = hugging_model.split('/')[1]
+    else:
+        _hugging_model = hugging_model.split('/')[0]
+    weights_path = os.path.join(output_folder, f'weights-{_hugging_model}.h5')
     if os.path.exists(weights_path):
         try:
             model.load_weights(weights_path)
     else:
         print(f"Warning: Custom weights file not found at {weights_path}")
         print("Using default weights.")
+    return model