Kavinda2000003 commited on
Commit
fb25f09
Β·
verified Β·
1 Parent(s): 39465f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -1,21 +1,24 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
  from PyPDF2 import PdfReader
4
  import docx
5
  import plotly.graph_objects as go
6
 
7
- # Page setup
8
  st.set_page_config(layout="wide")
9
  st.title("πŸ“„ AI Content Analyzer")
10
- st.markdown("Upload PDF/Word files (Max 1000 words analysis)")
11
 
12
  # Constants
13
- MAX_WORDS = 1000 # Word limit
14
- WORD_LIMIT_WARNING = 1500 # Warning threshold
15
 
 
16
  @st.cache_resource
17
  def load_model():
18
- return pipeline("text-classification", model="roberta-base-openai-detector")
 
 
19
 
20
  detector = load_model()
21
 
@@ -27,7 +30,7 @@ def create_gauge(score):
27
  mode = "gauge+number",
28
  value = score,
29
  domain = {'x': [0, 1], 'y': [0, 1]},
30
- title = {'text': "AI Probability", 'font': {'size': 20}},
31
  gauge = {
32
  'axis': {'range': [None, 100], 'tickwidth': 1},
33
  'bar': {'color': "darkblue"},
@@ -39,7 +42,7 @@ def create_gauge(score):
39
  st.plotly_chart(fig, use_container_width=True)
40
 
41
  # File uploader
42
- uploaded_file = st.file_uploader("Choose file", type=["pdf", "docx", "txt"])
43
 
44
  if uploaded_file:
45
  # Extract text
@@ -47,30 +50,29 @@ if uploaded_file:
47
  if uploaded_file.name.endswith(".pdf"):
48
  reader = PdfReader(uploaded_file)
49
  text = " ".join([page.extract_text() or "" for page in reader.pages])
50
- elif uploaded_file.name.endswith(".docx"):
51
  doc = docx.Document(uploaded_file)
52
  text = " ".join([para.text for para in doc.paragraphs])
53
- else:
54
- text = uploaded_file.read().decode("utf-8")
55
 
56
  word_count = count_words(text)
57
 
58
- if word_count > WORD_LIMIT_WARNING:
59
- st.warning(f"⚠️ File has {word_count} words (Analyzing first {MAX_WORDS} words only)")
60
-
61
- if st.button("Analyze"):
 
62
  if word_count < 50:
63
- st.error("❌ Not enough text (min 50 words required)")
64
  else:
65
  # Process first 1000 words
66
  processed_text = " ".join(text.split()[:MAX_WORDS])
67
 
68
- # Analysis
69
  result = detector(processed_text)
70
  ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
71
 
72
- # Results
73
- st.subheader("Results")
74
  create_gauge(ai_prob)
75
 
76
  col1, col2 = st.columns(2)
@@ -79,7 +81,5 @@ if uploaded_file:
79
  with col2:
80
  st.metric("AI Probability", f"{ai_prob:.1f}%")
81
 
82
- with st.expander("View analysis details"):
83
- st.write(f"**File:** {uploaded_file.name}")
84
- st.write(f"**Model:** roberta-base-openai-detector")
85
- st.text_area("Sample text", processed_text[:500]+"...", height=150)
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer
3
  from PyPDF2 import PdfReader
4
  import docx
5
  import plotly.graph_objects as go
6
 
7
+ # Page configuration
8
  st.set_page_config(layout="wide")
9
  st.title("πŸ“„ AI Content Analyzer")
10
+ st.markdown("Upload PDF/Word files to detect AI-generated content")
11
 
12
  # Constants
13
+ MAX_WORDS = 1000 # Maximum words to analyze
14
+ WARNING_THRESHOLD = 1200 # Warning threshold for large files
15
 
16
+ # Load AI detection model
17
  @st.cache_resource
18
  def load_model():
19
+ model_name = "roberta-base-openai-detector"
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
+ return pipeline("text-classification", model=model_name, tokenizer=tokenizer)
22
 
23
  detector = load_model()
24
 
 
30
  mode = "gauge+number",
31
  value = score,
32
  domain = {'x': [0, 1], 'y': [0, 1]},
33
+ title = {'text': "AI Content Probability", 'font': {'size': 20}},
34
  gauge = {
35
  'axis': {'range': [None, 100], 'tickwidth': 1},
36
  'bar': {'color': "darkblue"},
 
42
  st.plotly_chart(fig, use_container_width=True)
43
 
44
  # File uploader
45
+ uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx"])
46
 
47
  if uploaded_file:
48
  # Extract text
 
50
  if uploaded_file.name.endswith(".pdf"):
51
  reader = PdfReader(uploaded_file)
52
  text = " ".join([page.extract_text() or "" for page in reader.pages])
53
+ else:
54
  doc = docx.Document(uploaded_file)
55
  text = " ".join([para.text for para in doc.paragraphs])
 
 
56
 
57
  word_count = count_words(text)
58
 
59
+ # Word limit warning
60
+ if word_count > WARNING_THRESHOLD:
61
+ st.warning(f"⚠️ File contains {word_count} words (Analyzing first {MAX_WORDS} words only)")
62
+
63
+ if st.button("Analyze Content"):
64
  if word_count < 50:
65
+ st.error("❌ Insufficient text for analysis (minimum 50 words required)")
66
  else:
67
  # Process first 1000 words
68
  processed_text = " ".join(text.split()[:MAX_WORDS])
69
 
70
+ # Perform analysis
71
  result = detector(processed_text)
72
  ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
73
 
74
+ # Display results
75
+ st.subheader("Analysis Results")
76
  create_gauge(ai_prob)
77
 
78
  col1, col2 = st.columns(2)
 
81
  with col2:
82
  st.metric("AI Probability", f"{ai_prob:.1f}%")
83
 
84
+ with st.expander("View Text Sample"):
85
+ st.text(processed_text[:1000] + ("..." if word_count>1000 else ""))