Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,24 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import pipeline
|
3 |
from PyPDF2 import PdfReader
|
4 |
import docx
|
5 |
import plotly.graph_objects as go
|
6 |
|
7 |
-
# Page
|
8 |
st.set_page_config(layout="wide")
|
9 |
st.title("π AI Content Analyzer")
|
10 |
-
st.markdown("Upload PDF/Word files
|
11 |
|
12 |
# Constants
|
13 |
-
MAX_WORDS = 1000 #
|
14 |
-
|
15 |
|
|
|
16 |
@st.cache_resource
|
17 |
def load_model():
|
18 |
-
|
|
|
|
|
19 |
|
20 |
detector = load_model()
|
21 |
|
@@ -27,7 +30,7 @@ def create_gauge(score):
|
|
27 |
mode = "gauge+number",
|
28 |
value = score,
|
29 |
domain = {'x': [0, 1], 'y': [0, 1]},
|
30 |
-
title = {'text': "AI Probability", 'font': {'size': 20}},
|
31 |
gauge = {
|
32 |
'axis': {'range': [None, 100], 'tickwidth': 1},
|
33 |
'bar': {'color': "darkblue"},
|
@@ -39,7 +42,7 @@ def create_gauge(score):
|
|
39 |
st.plotly_chart(fig, use_container_width=True)
|
40 |
|
41 |
# File uploader
|
42 |
-
uploaded_file = st.file_uploader("
|
43 |
|
44 |
if uploaded_file:
|
45 |
# Extract text
|
@@ -47,30 +50,29 @@ if uploaded_file:
|
|
47 |
if uploaded_file.name.endswith(".pdf"):
|
48 |
reader = PdfReader(uploaded_file)
|
49 |
text = " ".join([page.extract_text() or "" for page in reader.pages])
|
50 |
-
|
51 |
doc = docx.Document(uploaded_file)
|
52 |
text = " ".join([para.text for para in doc.paragraphs])
|
53 |
-
else:
|
54 |
-
text = uploaded_file.read().decode("utf-8")
|
55 |
|
56 |
word_count = count_words(text)
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
62 |
if word_count < 50:
|
63 |
-
st.error("β
|
64 |
else:
|
65 |
# Process first 1000 words
|
66 |
processed_text = " ".join(text.split()[:MAX_WORDS])
|
67 |
|
68 |
-
#
|
69 |
result = detector(processed_text)
|
70 |
ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
|
71 |
|
72 |
-
#
|
73 |
-
st.subheader("Results")
|
74 |
create_gauge(ai_prob)
|
75 |
|
76 |
col1, col2 = st.columns(2)
|
@@ -79,7 +81,5 @@ if uploaded_file:
|
|
79 |
with col2:
|
80 |
st.metric("AI Probability", f"{ai_prob:.1f}%")
|
81 |
|
82 |
-
with st.expander("View
|
83 |
-
st.
|
84 |
-
st.write(f"**Model:** roberta-base-openai-detector")
|
85 |
-
st.text_area("Sample text", processed_text[:500]+"...", height=150)
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import pipeline, AutoTokenizer
|
3 |
from PyPDF2 import PdfReader
|
4 |
import docx
|
5 |
import plotly.graph_objects as go
|
6 |
|
7 |
+
# Page configuration
|
8 |
st.set_page_config(layout="wide")
|
9 |
st.title("π AI Content Analyzer")
|
10 |
+
st.markdown("Upload PDF/Word files to detect AI-generated content")
|
11 |
|
12 |
# Constants
|
13 |
+
MAX_WORDS = 1000 # Maximum words to analyze
|
14 |
+
WARNING_THRESHOLD = 1200 # Warning threshold for large files
|
15 |
|
16 |
+
# Load AI detection model
|
17 |
@st.cache_resource
|
18 |
def load_model():
|
19 |
+
model_name = "roberta-base-openai-detector"
|
20 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
21 |
+
return pipeline("text-classification", model=model_name, tokenizer=tokenizer)
|
22 |
|
23 |
detector = load_model()
|
24 |
|
|
|
30 |
mode = "gauge+number",
|
31 |
value = score,
|
32 |
domain = {'x': [0, 1], 'y': [0, 1]},
|
33 |
+
title = {'text': "AI Content Probability", 'font': {'size': 20}},
|
34 |
gauge = {
|
35 |
'axis': {'range': [None, 100], 'tickwidth': 1},
|
36 |
'bar': {'color': "darkblue"},
|
|
|
42 |
st.plotly_chart(fig, use_container_width=True)
|
43 |
|
44 |
# File uploader
|
45 |
+
uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx"])
|
46 |
|
47 |
if uploaded_file:
|
48 |
# Extract text
|
|
|
50 |
if uploaded_file.name.endswith(".pdf"):
|
51 |
reader = PdfReader(uploaded_file)
|
52 |
text = " ".join([page.extract_text() or "" for page in reader.pages])
|
53 |
+
else:
|
54 |
doc = docx.Document(uploaded_file)
|
55 |
text = " ".join([para.text for para in doc.paragraphs])
|
|
|
|
|
56 |
|
57 |
word_count = count_words(text)
|
58 |
|
59 |
+
# Word limit warning
|
60 |
+
if word_count > WARNING_THRESHOLD:
|
61 |
+
st.warning(f"β οΈ File contains {word_count} words (Analyzing first {MAX_WORDS} words only)")
|
62 |
+
|
63 |
+
if st.button("Analyze Content"):
|
64 |
if word_count < 50:
|
65 |
+
st.error("β Insufficient text for analysis (minimum 50 words required)")
|
66 |
else:
|
67 |
# Process first 1000 words
|
68 |
processed_text = " ".join(text.split()[:MAX_WORDS])
|
69 |
|
70 |
+
# Perform analysis
|
71 |
result = detector(processed_text)
|
72 |
ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
|
73 |
|
74 |
+
# Display results
|
75 |
+
st.subheader("Analysis Results")
|
76 |
create_gauge(ai_prob)
|
77 |
|
78 |
col1, col2 = st.columns(2)
|
|
|
81 |
with col2:
|
82 |
st.metric("AI Probability", f"{ai_prob:.1f}%")
|
83 |
|
84 |
+
with st.expander("View Text Sample"):
|
85 |
+
st.text(processed_text[:1000] + ("..." if word_count>1000 else ""))
|
|
|
|