Ajay Karthick Senthil Kumar commited on
Commit
dc66f8e
·
0 Parent(s):

New branch for app

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -0
  2. .github/workflows/main.yml +21 -0
  3. .gitignore +6 -0
  4. README.md +12 -0
  5. app.py +110 -0
  6. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+
.github/workflows/main.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ name: Sync to Hugging Face hub
3
+ on:
4
+ push:
5
+ branches: [app]
6
+
7
+ # to run this workflow manually from the Actions tab
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ sync-to-hub:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v3
15
+ with:
16
+ ref: app
17
+ fetch-depth: 0
18
+ - name: Push to hub
19
+ env:
20
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
21
+ run: git push --force https://ajaykarthick:[email protected]/spaces/ajaykarthick/text-classifier-naive-bayes app:main
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ data
2
+ .DS_Store
3
+ .ipynb_checkpoints
4
+
5
+ notebooks
6
+ model
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Text Classifier Naive Bayes
3
+ emoji: 📈
4
+ colorFrom: green
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 3.17.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Naive Bayes Text Classifier Application
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import string
3
+ import re
4
+ import pickle
5
+ import huggingface_hub
6
+
7
+ import numpy as np
8
+ import nltk
9
+ nltk.download('stopwords')
10
+ nltk.download('wordnet')
11
+ nltk.download('omw-1.4')
12
+ from nltk.corpus import stopwords
13
+
14
+
15
+
16
+ def clean_review(review):
17
+ review = review.lower()
18
+ review = re.sub(r"http\S+|www.\S+", "", review)
19
+ review = re.sub(r"<[^>]*>", "", review)
20
+ review = review.replace(".", " ")
21
+
22
+ review = "".join([c for c in review if c not in string.punctuation])
23
+ review = " ".join([word for word in re.split('\W+', review)
24
+ if word not in stopwords.words('english')])
25
+ wn = nltk.WordNetLemmatizer()
26
+ review = " ".join([wn.lemmatize(word, 'r') for word in re.split('\W+', review)])
27
+
28
+ return review
29
+
30
+ def find_occurrence(frequency, word, label):
31
+ n = 0
32
+ if (word, label) in frequency:
33
+ n = frequency[(word, label)]
34
+
35
+ return n
36
+
37
+ def classify_text(freqs, logprior, text):
38
+ loglikelihood = {}
39
+ p_w_pos = {}
40
+ p_w_neg = {}
41
+
42
+ # calculate V, the number of unique words in the vocabulary
43
+ vocab = set([word for word, label in freqs.keys()])
44
+ V = len(vocab)
45
+
46
+ #calculate num_pos and num_neg - the total number of positive and negative words for all documents
47
+ num_pos = num_neg = 0
48
+ for word, label in freqs.keys():
49
+ # if the label is positive (greater than zero)
50
+ if label > 0:
51
+
52
+ # Increment the number of positive words by the count for this (word, label) pair
53
+ num_pos += freqs[(word, label)]
54
+
55
+ # else, the label is negative
56
+ else:
57
+
58
+ # increment the number of negative words by the count for this (word,label) pair
59
+ num_neg += freqs[(word, label)]
60
+
61
+
62
+
63
+ # process the review to get a list of words
64
+ word_l = clean_review(text).split()
65
+
66
+ # initialize probability to zero
67
+ total_prob = 0
68
+
69
+ # add the logprior
70
+ total_prob += logprior
71
+
72
+ # For each word in the vocabulary...
73
+ for word in word_l:
74
+ # get the positive and negative frequency of the word
75
+ freq_pos = find_occurrence(freqs, word, 1)
76
+ freq_neg = find_occurrence(freqs, word, 0)
77
+
78
+ # calculate the probability that each word is positive, and negative
79
+ p_w_pos[word] = (freq_pos + 1) / (num_pos + V)
80
+ p_w_neg[word] = (freq_neg + 1) / (num_neg + V)
81
+
82
+ if freq_pos + freq_neg > 0:
83
+ # calculate the log likelihood of the word
84
+ loglikelihood[word] = np.log(p_w_pos[word] / p_w_neg[word])
85
+ # add the log likelihood of that word to the probability
86
+ total_prob += loglikelihood[word]
87
+ else:
88
+ loglikelihood[word] = ''
89
+
90
+ if total_prob > 0:
91
+ total_prob = 1
92
+ else:
93
+ total_prob = 0
94
+
95
+ return total_prob
96
+
97
+ model_path = huggingface_hub.hf_hub_download("ajaykarthick/naive-bayes-review-classify-model", "naive-bayes-text-classifier-model")
98
+
99
+ model_params = pickle.load(open(model_path, mode='rb'))
100
+ freqs = model_params['freqs_dict']
101
+ logprior = model_params['logprior']
102
+
103
+
104
+ def greet(name):
105
+ total_prob = classify_text(freqs, logprior, name)
106
+ print(name, str(total_prob))
107
+ return 'POSITIVE' if total_prob == 0 else 'NEGATIVE'
108
+
109
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
110
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ nltk
2
+ huggingface_hub