File size: 1,480 Bytes
8a035b2
 
 
 
 
 
 
 
 
 
1a579a9
8a035b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import streamlit as st
import requests
import html2text
import os 

api_key=os.getenv('APIKEY_BRIGHTDATA')
st.title("ByPass Capcha & Text Extractor")

# api_key = st.text_input("Bright Data API Key", type="password")
zone = "web_unlocker1"
url = st.text_input("Target URL", value="https://in.indeed.com/cmp/Ey/reviews")

# url = url + "?hl=en"  # or "?lang=en" or "/en/" depending on the site

if st.button("Extract Text"):
    if not url :
        st.warning("Please enter both API key and URL.")
    else:
        headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json",
    "Accept-Language": "en-US,en;q=0.9"
}
        data = {
            "zone": zone,
            "url": url,
            "format": "raw"
        }
        with st.spinner("Fetching page..."):
            try:
                response = requests.post(
                    "https://api.brightdata.com/request",
                    json=data,
                    headers=headers,
                    timeout=60
                )
                response.raise_for_status()
                html = response.text
                # Convert HTML to readable text
                text = html2text.html2text(html)
                st.subheader("Extracted Text")
                st.text_area("Result", text, height=400)
                st.download_button("Download as .txt", text, file_name="extracted.txt")
            except Exception as e:
                st.error(f"Error: {e}")