Rohanharsh163 commited on
Commit
2fae4b8
Β·
verified Β·
1 Parent(s): 91b376c

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +42 -31
src/streamlit_app.py CHANGED
@@ -1,8 +1,10 @@
1
  import streamlit as st
2
  from PIL import Image
3
  import requests
4
- import easyocr
5
  from io import BytesIO
 
 
6
 
7
  st.set_page_config(page_title="WikiExplorer AR", layout="centered")
8
  st.title("πŸ“· WikiExplorer AR (Streamlit Edition)")
@@ -21,37 +23,47 @@ lang = st.selectbox(
21
 
22
  lang_code = lang[1]
23
 
24
- # --- Load OCR Model ---
25
  @st.cache_resource
26
- def load_ocr_model():
27
- return easyocr.Reader(['en']) # You can add 'hi', 'te', 'ta' for multilingual OCR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- ocr_reader = load_ocr_model()
30
-
31
- # --- Place name input (optional if image is provided) ---
32
- st.markdown("**πŸ“ Enter a place or person name to learn more (or capture it):**")
33
- place_name = st.text_input("πŸ›οΈ For example: Charminar, Taj Mahal, Shah Jahan")
34
-
35
- # --- Camera input ---
36
- img_file_buffer = st.camera_input("πŸ“Έ Take a picture (optional)")
37
-
38
- # --- OCR from camera image ---
39
  if img_file_buffer is not None:
40
  st.markdown("### πŸ“· Captured Image")
41
  st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True)
42
-
43
- image_bytes = BytesIO(img_file_buffer.getvalue())
44
- result = ocr_reader.readtext(image_bytes)
45
-
46
- if result:
47
- detected_texts = [item[1] for item in result if item[1].strip()]
48
- if detected_texts:
49
- place_name = detected_texts[0] # Top detected phrase
50
- st.success(f"🧠 OCR detected: **{place_name}**")
51
- else:
52
- st.warning("OCR ran but could not extract any meaningful text.")
53
- else:
54
- st.warning("Could not detect text in the image.")
55
 
56
  # --- Translation helpers ---
57
  def translate_text(text, target_lang):
@@ -147,10 +159,9 @@ if place_name.strip():
147
  # --- Footer ---
148
  st.markdown("""
149
  ---
150
- - πŸ“Œ Supports text search and camera input.
151
- - 🧠 OCR auto-detects place name from image.
152
  - 🌐 Wikipedia multilingual summary with fallback + sentence-level translation.
153
  - πŸ–ΌοΈ Commons image gallery integration.
154
- - βœ… Ready for Hugging Face deployment.
155
- - πŸ› οΈ Streamlit only β€” no backend needed.
156
  """)
 
1
  import streamlit as st
2
  from PIL import Image
3
  import requests
4
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
5
  from io import BytesIO
6
+ import torch
7
+ import torchvision.transforms as T
8
 
9
  st.set_page_config(page_title="WikiExplorer AR", layout="centered")
10
  st.title("πŸ“· WikiExplorer AR (Streamlit Edition)")
 
23
 
24
  lang_code = lang[1]
25
 
26
+ # --- Load Hugging Face OCR model ---
27
  @st.cache_resource
28
+ def load_trocr():
29
+ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
30
+ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
31
+ return processor, model
32
+
33
+ processor, model = load_trocr()
34
+
35
+ # --- Camera input (main source of place name) ---
36
+ st.markdown("**πŸ“Έ Capture a place name from signage, poster, or board:**")
37
+ img_file_buffer = st.camera_input("Take a picture")
38
+
39
+ # --- Optional text input if OCR fails ---
40
+ place_name = st.text_input("πŸ“ Or manually enter the place name (optional)")
41
+
42
+ # --- OCR from captured image ---
43
+ def run_trocr_ocr(image_data):
44
+ image = Image.open(image_data).convert("RGB")
45
+ transform = T.Compose([
46
+ T.Resize((384, 384)),
47
+ T.ToTensor()
48
+ ])
49
+ pixel_values = transform(image).unsqueeze(0)
50
+ generated_ids = model.generate(pixel_values)
51
+ text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
52
+ return text.strip()
53
 
 
 
 
 
 
 
 
 
 
 
54
  if img_file_buffer is not None:
55
  st.markdown("### πŸ“· Captured Image")
56
  st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True)
57
+ try:
58
+ with st.spinner("🧠 Running OCR..."):
59
+ ocr_text = run_trocr_ocr(BytesIO(img_file_buffer.getvalue()))
60
+ if ocr_text:
61
+ place_name = ocr_text
62
+ st.success(f"🧠 OCR detected: **{place_name}**")
63
+ else:
64
+ st.warning("OCR ran but could not extract any meaningful text.")
65
+ except Exception as e:
66
+ st.error(f"OCR failed: {e}")
 
 
 
67
 
68
  # --- Translation helpers ---
69
  def translate_text(text, target_lang):
 
159
  # --- Footer ---
160
  st.markdown("""
161
  ---
162
+ - πŸ“Έ Take a picture to auto-detect monument/place using Hugging Face OCR.
163
+ - ✍️ Optional manual input if OCR fails.
164
  - 🌐 Wikipedia multilingual summary with fallback + sentence-level translation.
165
  - πŸ–ΌοΈ Commons image gallery integration.
166
+ - βœ… Works in Hugging Face Spaces with Streamlit + Transformers.
 
167
  """)