Rohanharsh163 commited on
Commit
a3295e7
Β·
verified Β·
1 Parent(s): 2fae4b8

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +9 -44
src/streamlit_app.py CHANGED
@@ -1,10 +1,7 @@
1
  import streamlit as st
2
  from PIL import Image
3
  import requests
4
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel
5
  from io import BytesIO
6
- import torch
7
- import torchvision.transforms as T
8
 
9
  st.set_page_config(page_title="WikiExplorer AR", layout="centered")
10
  st.title("πŸ“· WikiExplorer AR (Streamlit Edition)")
@@ -23,47 +20,16 @@ lang = st.selectbox(
23
 
24
  lang_code = lang[1]
25
 
26
- # --- Load Hugging Face OCR model ---
27
- @st.cache_resource
28
- def load_trocr():
29
- processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
30
- model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
31
- return processor, model
32
-
33
- processor, model = load_trocr()
34
-
35
- # --- Camera input (main source of place name) ---
36
- st.markdown("**πŸ“Έ Capture a place name from signage, poster, or board:**")
37
- img_file_buffer = st.camera_input("Take a picture")
38
-
39
- # --- Optional text input if OCR fails ---
40
- place_name = st.text_input("πŸ“ Or manually enter the place name (optional)")
41
-
42
- # --- OCR from captured image ---
43
- def run_trocr_ocr(image_data):
44
- image = Image.open(image_data).convert("RGB")
45
- transform = T.Compose([
46
- T.Resize((384, 384)),
47
- T.ToTensor()
48
- ])
49
- pixel_values = transform(image).unsqueeze(0)
50
- generated_ids = model.generate(pixel_values)
51
- text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
52
- return text.strip()
53
 
54
  if img_file_buffer is not None:
55
  st.markdown("### πŸ“· Captured Image")
56
  st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True)
57
- try:
58
- with st.spinner("🧠 Running OCR..."):
59
- ocr_text = run_trocr_ocr(BytesIO(img_file_buffer.getvalue()))
60
- if ocr_text:
61
- place_name = ocr_text
62
- st.success(f"🧠 OCR detected: **{place_name}**")
63
- else:
64
- st.warning("OCR ran but could not extract any meaningful text.")
65
- except Exception as e:
66
- st.error(f"OCR failed: {e}")
67
 
68
  # --- Translation helpers ---
69
  def translate_text(text, target_lang):
@@ -159,9 +125,8 @@ if place_name.strip():
159
  # --- Footer ---
160
  st.markdown("""
161
  ---
162
- - πŸ“Έ Take a picture to auto-detect monument/place using Hugging Face OCR.
163
- - ✍️ Optional manual input if OCR fails.
164
  - 🌐 Wikipedia multilingual summary with fallback + sentence-level translation.
165
  - πŸ–ΌοΈ Commons image gallery integration.
166
- - βœ… Works in Hugging Face Spaces with Streamlit + Transformers.
167
- """)
 
1
  import streamlit as st
2
  from PIL import Image
3
  import requests
 
4
  from io import BytesIO
 
 
5
 
6
  st.set_page_config(page_title="WikiExplorer AR", layout="centered")
7
  st.title("πŸ“· WikiExplorer AR (Streamlit Edition)")
 
20
 
21
  lang_code = lang[1]
22
 
23
+ # --- Place name input ---
24
+ st.markdown("**πŸ“ Enter a place or person name to learn more:**")
25
+ place_name = st.text_input("πŸ›οΈ For example: Charminar, Taj Mahal, Shah Jahan")
26
+
27
+ # --- Camera input (optional) ---
28
+ img_file_buffer = st.camera_input("πŸ“Έ Take a picture (optional)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if img_file_buffer is not None:
31
  st.markdown("### πŸ“· Captured Image")
32
  st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True)
 
 
 
 
 
 
 
 
 
 
33
 
34
  # --- Translation helpers ---
35
  def translate_text(text, target_lang):
 
125
  # --- Footer ---
126
  st.markdown("""
127
  ---
128
+ - πŸ“Έ Capture photo optionally β€” main input is typed place name.
 
129
  - 🌐 Wikipedia multilingual summary with fallback + sentence-level translation.
130
  - πŸ–ΌοΈ Commons image gallery integration.
131
+ - βœ… Works in Hugging Face Spaces with Streamlit.
132
+ """)