mobenta commited on
Commit
729aae2
·
verified ·
1 Parent(s): 73ba21d

Delete app (7).py

Browse files
Files changed (1) hide show
  1. app (7).py +0 -65
app (7).py DELETED
@@ -1,65 +0,0 @@
1
-
2
- import streamlit as st
3
- import os
4
- from scraper import fetch_html_selenium, format_data, save_raw_data, save_formatted_data
5
- from pagination_detector import detect_pagination_elements
6
- from assets import PRICING
7
- import google.generativeai as genai
8
-
9
- # Access API keys from Hugging Face Secrets
10
- openai_api_key = os.getenv('OPENAI_API_KEY')
11
- google_api_key = os.getenv('GOOGLE_API_KEY')
12
- groq_api_key = os.getenv('GROQ_API_KEY')
13
-
14
- # Check if the keys are available
15
- if not openai_api_key or not google_api_key or not groq_api_key:
16
- st.error("API keys are missing! Please add them as secrets in Hugging Face.")
17
-
18
- # Initialize Streamlit app
19
- st.set_page_config(page_title="Universal Web Scraper", page_icon="🦑")
20
- st.title("Universal Web Scraper 🦑")
21
-
22
- # Initialize session state variables if they don't exist
23
- if 'results' not in st.session_state:
24
- st.session_state['results'] = None
25
- if 'perform_scrape' not in st.session_state:
26
- st.session_state['perform_scrape'] = False
27
-
28
- # Sidebar components
29
- st.sidebar.title("Web Scraper Settings")
30
- model_selection = st.sidebar.selectbox("Select Model", options=list(PRICING.keys()), index=0)
31
- url_input = st.sidebar.text_input("Enter URL(s) separated by whitespace")
32
-
33
- # Add toggle to show/hide tags field
34
- show_tags = st.sidebar.checkbox("Enable Scraping", value=False)
35
-
36
- # Conditionally show tags input based on the toggle
37
- tags = []
38
- if show_tags:
39
- tags = st.sidebar.text_input("Enter Fields to Extract (comma-separated)").split(",")
40
-
41
- st.sidebar.markdown("---")
42
- # Add pagination toggle and input
43
- use_pagination = st.sidebar.checkbox("Enable Pagination", value=False)
44
- pagination_details = None
45
- if use_pagination:
46
- pagination_details = st.sidebar.text_input("Enter Pagination Details (optional)", help="Describe how to navigate through pages")
47
-
48
- st.sidebar.markdown("---")
49
-
50
- # Define the scraping function
51
- def perform_scrape():
52
- raw_html = fetch_html_selenium(url_input)
53
- markdown = format_data(raw_html)
54
- save_raw_data(markdown, "scraped_data")
55
-
56
- if use_pagination:
57
- pagination_data, _, _ = detect_pagination_elements(url_input, pagination_details, model_selection, markdown)
58
- return pagination_data
59
-
60
- return markdown
61
-
62
- if st.sidebar.button("Scrape"):
63
- with st.spinner("Scraping data..."):
64
- result = perform_scrape()
65
- st.write(result)