Kavinda2000003 commited on
Commit
517c66b
Β·
verified Β·
1 Parent(s): af30526

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -26
app.py CHANGED
@@ -2,56 +2,84 @@ import streamlit as st
2
  from transformers import pipeline
3
  from PyPDF2 import PdfReader
4
  import docx
 
5
 
6
  # Page setup
7
  st.set_page_config(layout="wide")
8
  st.title("πŸ“„ AI Content Analyzer")
9
- st.markdown("Upload PDF/Word files to detect AI-generated text")
 
 
 
 
10
 
11
- # Load AI detection model
12
  @st.cache_resource
13
  def load_model():
14
  return pipeline("text-classification", model="roberta-base-openai-detector")
15
 
16
  detector = load_model()
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # File uploader
19
- uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx", "txt"])
20
 
21
  if uploaded_file:
22
  # Extract text
23
  text = ""
24
  if uploaded_file.name.endswith(".pdf"):
25
  reader = PdfReader(uploaded_file)
26
- text = "".join([page.extract_text() or "" for page in reader.pages])
27
  elif uploaded_file.name.endswith(".docx"):
28
  doc = docx.Document(uploaded_file)
29
- text = "\n".join([para.text for para in doc.paragraphs])
30
  else:
31
  text = uploaded_file.read().decode("utf-8")
32
 
33
- # Analyze on button click
34
- if st.button("Analyze Content"):
35
- if len(text) < 5000:
36
- st.warning("Not enough text to analyze!")
 
 
 
 
37
  else:
38
- result = detector(text[:5000]) # First 5000 chars for speed
39
- ai_prob = result[0]['score'] * 100 if result[0]['label'] == 'FAKE' else 100 - (result[0]['score'] * 100)
 
 
 
 
40
 
41
- # Display results
42
- st.subheader("Analysis Results")
 
43
 
44
- # AI Probability Meter
45
- st.metric("AI Content Probability", f"{ai_prob:.2f}%")
46
- st.progress(int(ai_prob))
 
 
47
 
48
- # File details
49
- st.markdown(f"""
50
- - **File:** `{uploaded_file.name}`
51
- - **Text Length:** {len(text)} characters
52
- - **AI Probability:** {ai_prob:.2f}%
53
- """)
54
-
55
- # Text preview
56
- with st.expander("View extracted text"):
57
- st.text(text[:1000] + "...") # First 1000 chars
 
2
  from transformers import pipeline
3
  from PyPDF2 import PdfReader
4
  import docx
5
+ import plotly.graph_objects as go
6
 
7
  # Page setup
8
  st.set_page_config(layout="wide")
9
  st.title("πŸ“„ AI Content Analyzer")
10
+ st.markdown("Upload PDF/Word files (Max 1000 words analysis)")
11
+
12
+ # Constants
13
+ MAX_WORDS = 1000 # Word limit
14
+ WORD_LIMIT_WARNING = 1500 # Warning threshold
15
 
 
16
  @st.cache_resource
17
  def load_model():
18
  return pipeline("text-classification", model="roberta-base-openai-detector")
19
 
20
  detector = load_model()
21
 
22
+ def count_words(text):
23
+ return len(text.split())
24
+
25
+ def create_gauge(score):
26
+ fig = go.Figure(go.Indicator(
27
+ mode = "gauge+number",
28
+ value = score,
29
+ domain = {'x': [0, 1], 'y': [0, 1]},
30
+ title = {'text': "AI Probability", 'font': {'size': 20}},
31
+ gauge = {
32
+ 'axis': {'range': [None, 100], 'tickwidth': 1},
33
+ 'bar': {'color': "darkblue"},
34
+ 'steps': [
35
+ {'range': [0, 50], 'color': 'green'},
36
+ {'range': [50, 75], 'color': 'yellow'},
37
+ {'range': [75, 100], 'color': 'red'}]
38
+ }))
39
+ st.plotly_chart(fig, use_container_width=True)
40
+
41
  # File uploader
42
+ uploaded_file = st.file_uploader("Choose file", type=["pdf", "docx", "txt"])
43
 
44
  if uploaded_file:
45
  # Extract text
46
  text = ""
47
  if uploaded_file.name.endswith(".pdf"):
48
  reader = PdfReader(uploaded_file)
49
+ text = " ".join([page.extract_text() or "" for page in reader.pages])
50
  elif uploaded_file.name.endswith(".docx"):
51
  doc = docx.Document(uploaded_file)
52
+ text = " ".join([para.text for para in doc.paragraphs])
53
  else:
54
  text = uploaded_file.read().decode("utf-8")
55
 
56
+ word_count = count_words(text)
57
+
58
+ if word_count > WORD_LIMIT_WARNING:
59
+ st.warning(f"⚠️ File has {word_count} words (Analyzing first {MAX_WORDS} words only)")
60
+
61
+ if st.button("Analyze"):
62
+ if word_count < 50:
63
+ st.error("❌ Not enough text (min 50 words required)")
64
  else:
65
+ # Process first 1000 words
66
+ processed_text = " ".join(text.split()[:MAX_WORDS])
67
+
68
+ # Analysis
69
+ result = detector(processed_text)
70
+ ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
71
 
72
+ # Results
73
+ st.subheader("Results")
74
+ create_gauge(ai_prob)
75
 
76
+ col1, col2 = st.columns(2)
77
+ with col1:
78
+ st.metric("Words Analyzed", f"{min(word_count, MAX_WORDS)}/{word_count}")
79
+ with col2:
80
+ st.metric("AI Probability", f"{ai_prob:.1f}%")
81
 
82
+ with st.expander("View analysis details"):
83
+ st.write(f"**File:** {uploaded_file.name}")
84
+ st.write(f"**Model:** roberta-base-openai-detector")
85
+ st.text_area("Sample text", processed_text[:500]+"...", height=150)