Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,56 +2,84 @@ import streamlit as st
|
|
2 |
from transformers import pipeline
|
3 |
from PyPDF2 import PdfReader
|
4 |
import docx
|
|
|
5 |
|
6 |
# Page setup
|
7 |
st.set_page_config(layout="wide")
|
8 |
st.title("π AI Content Analyzer")
|
9 |
-
st.markdown("Upload PDF/Word files
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
# Load AI detection model
|
12 |
@st.cache_resource
|
13 |
def load_model():
|
14 |
return pipeline("text-classification", model="roberta-base-openai-detector")
|
15 |
|
16 |
detector = load_model()
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# File uploader
|
19 |
-
uploaded_file = st.file_uploader("
|
20 |
|
21 |
if uploaded_file:
|
22 |
# Extract text
|
23 |
text = ""
|
24 |
if uploaded_file.name.endswith(".pdf"):
|
25 |
reader = PdfReader(uploaded_file)
|
26 |
-
text = "".join([page.extract_text() or "" for page in reader.pages])
|
27 |
elif uploaded_file.name.endswith(".docx"):
|
28 |
doc = docx.Document(uploaded_file)
|
29 |
-
text = "
|
30 |
else:
|
31 |
text = uploaded_file.read().decode("utf-8")
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
else:
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
#
|
42 |
-
st.subheader("
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
- **AI Probability:** {ai_prob:.2f}%
|
53 |
-
""")
|
54 |
-
|
55 |
-
# Text preview
|
56 |
-
with st.expander("View extracted text"):
|
57 |
-
st.text(text[:1000] + "...") # First 1000 chars
|
|
|
2 |
from transformers import pipeline
|
3 |
from PyPDF2 import PdfReader
|
4 |
import docx
|
5 |
+
import plotly.graph_objects as go
|
6 |
|
7 |
# Page setup
|
8 |
st.set_page_config(layout="wide")
|
9 |
st.title("π AI Content Analyzer")
|
10 |
+
st.markdown("Upload PDF/Word files (Max 1000 words analysis)")
|
11 |
+
|
12 |
+
# Constants
|
13 |
+
MAX_WORDS = 1000 # Word limit
|
14 |
+
WORD_LIMIT_WARNING = 1500 # Warning threshold
|
15 |
|
|
|
16 |
@st.cache_resource
|
17 |
def load_model():
|
18 |
return pipeline("text-classification", model="roberta-base-openai-detector")
|
19 |
|
20 |
detector = load_model()
|
21 |
|
22 |
+
def count_words(text):
|
23 |
+
return len(text.split())
|
24 |
+
|
25 |
+
def create_gauge(score):
|
26 |
+
fig = go.Figure(go.Indicator(
|
27 |
+
mode = "gauge+number",
|
28 |
+
value = score,
|
29 |
+
domain = {'x': [0, 1], 'y': [0, 1]},
|
30 |
+
title = {'text': "AI Probability", 'font': {'size': 20}},
|
31 |
+
gauge = {
|
32 |
+
'axis': {'range': [None, 100], 'tickwidth': 1},
|
33 |
+
'bar': {'color': "darkblue"},
|
34 |
+
'steps': [
|
35 |
+
{'range': [0, 50], 'color': 'green'},
|
36 |
+
{'range': [50, 75], 'color': 'yellow'},
|
37 |
+
{'range': [75, 100], 'color': 'red'}]
|
38 |
+
}))
|
39 |
+
st.plotly_chart(fig, use_container_width=True)
|
40 |
+
|
41 |
# File uploader
|
42 |
+
uploaded_file = st.file_uploader("Choose file", type=["pdf", "docx", "txt"])
|
43 |
|
44 |
if uploaded_file:
|
45 |
# Extract text
|
46 |
text = ""
|
47 |
if uploaded_file.name.endswith(".pdf"):
|
48 |
reader = PdfReader(uploaded_file)
|
49 |
+
text = " ".join([page.extract_text() or "" for page in reader.pages])
|
50 |
elif uploaded_file.name.endswith(".docx"):
|
51 |
doc = docx.Document(uploaded_file)
|
52 |
+
text = " ".join([para.text for para in doc.paragraphs])
|
53 |
else:
|
54 |
text = uploaded_file.read().decode("utf-8")
|
55 |
|
56 |
+
word_count = count_words(text)
|
57 |
+
|
58 |
+
if word_count > WORD_LIMIT_WARNING:
|
59 |
+
st.warning(f"β οΈ File has {word_count} words (Analyzing first {MAX_WORDS} words only)")
|
60 |
+
|
61 |
+
if st.button("Analyze"):
|
62 |
+
if word_count < 50:
|
63 |
+
st.error("β Not enough text (min 50 words required)")
|
64 |
else:
|
65 |
+
# Process first 1000 words
|
66 |
+
processed_text = " ".join(text.split()[:MAX_WORDS])
|
67 |
+
|
68 |
+
# Analysis
|
69 |
+
result = detector(processed_text)
|
70 |
+
ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
|
71 |
|
72 |
+
# Results
|
73 |
+
st.subheader("Results")
|
74 |
+
create_gauge(ai_prob)
|
75 |
|
76 |
+
col1, col2 = st.columns(2)
|
77 |
+
with col1:
|
78 |
+
st.metric("Words Analyzed", f"{min(word_count, MAX_WORDS)}/{word_count}")
|
79 |
+
with col2:
|
80 |
+
st.metric("AI Probability", f"{ai_prob:.1f}%")
|
81 |
|
82 |
+
with st.expander("View analysis details"):
|
83 |
+
st.write(f"**File:** {uploaded_file.name}")
|
84 |
+
st.write(f"**Model:** roberta-base-openai-detector")
|
85 |
+
st.text_area("Sample text", processed_text[:500]+"...", height=150)
|
|
|
|
|
|
|
|
|
|
|
|