import streamlit as st import requests import html2text import os api_key=os.getenv('APIKEY_BRIGHTDATA') st.title("ByPass Capcha & Text Extractor") # api_key = st.text_input("Bright Data API Key", type="password") zone = "web_unlocker1" url = st.text_input("Target URL", value="https://in.indeed.com/cmp/Ey/reviews") # url = url + "?hl=en" # or "?lang=en" or "/en/" depending on the site if st.button("Extract Text"): if not url : st.warning("Please enter both API key and URL.") else: headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept-Language": "en-US,en;q=0.9" } data = { "zone": zone, "url": url, "format": "raw" } with st.spinner("Fetching page..."): try: response = requests.post( "https://api.brightdata.com/request", json=data, headers=headers, timeout=60 ) response.raise_for_status() html = response.text # Convert HTML to readable text text = html2text.html2text(html) st.subheader("Extracted Text") st.text_area("Result", text, height=400) st.download_button("Download as .txt", text, file_name="extracted.txt") except Exception as e: st.error(f"Error: {e}")