Baskar2005 commited on
Commit
8a035b2
·
verified ·
1 Parent(s): 9a1f016

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py CHANGED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import html2text
4
+ import os
5
+
6
+ api_key=os.getenv('APIKEY_BRIGHTDATA')
7
+ st.title("ByPass Capcha & Text Extractor")
8
+
9
+ # api_key = st.text_input("Bright Data API Key", type="password")
10
+ zone = "web_unlocker1"
11
+ url = st.text_input("Target URL", value="https://2captcha.com/demo/recaptcha-v2")
12
+
13
+ # url = url + "?hl=en" # or "?lang=en" or "/en/" depending on the site
14
+
15
+ if st.button("Extract Text"):
16
+ if not url :
17
+ st.warning("Please enter both API key and URL.")
18
+ else:
19
+ headers = {
20
+ "Authorization": f"Bearer {api_key}",
21
+ "Content-Type": "application/json",
22
+ "Accept-Language": "en-US,en;q=0.9"
23
+ }
24
+ data = {
25
+ "zone": zone,
26
+ "url": url,
27
+ "format": "raw"
28
+ }
29
+ with st.spinner("Fetching page..."):
30
+ try:
31
+ response = requests.post(
32
+ "https://api.brightdata.com/request",
33
+ json=data,
34
+ headers=headers,
35
+ timeout=60
36
+ )
37
+ response.raise_for_status()
38
+ html = response.text
39
+ # Convert HTML to readable text
40
+ text = html2text.html2text(html)
41
+ st.subheader("Extracted Text")
42
+ st.text_area("Result", text, height=400)
43
+ st.download_button("Download as .txt", text, file_name="extracted.txt")
44
+ except Exception as e:
45
+ st.error(f"Error: {e}")