File size: 10,373 Bytes
4c3fe29
 
f2b00a1
35ae779
3819331
80fef4e
31d5b37
 
35ae779
15fdb32
35ae779
31d5b37
d74e8cc
 
80fef4e
 
 
006e72f
80fef4e
006e72f
f2b00a1
7f2bf6a
f2b00a1
7f2bf6a
f2b00a1
 
 
 
 
 
31d5b37
7f2bf6a
f2b00a1
 
 
 
 
 
 
3819331
006e72f
f2b00a1
7f2bf6a
 
 
 
 
f2b00a1
7f2bf6a
f2b00a1
 
 
 
7f2bf6a
 
f2b00a1
 
 
 
31d5b37
f2b00a1
80fef4e
f2b00a1
006e72f
7f2bf6a
80fef4e
 
7f2bf6a
80fef4e
7f2bf6a
 
80fef4e
7f2bf6a
 
 
 
 
 
3819331
80fef4e
f2b00a1
35ae779
7f2bf6a
80fef4e
 
 
7f2bf6a
 
80fef4e
 
7f2bf6a
 
80fef4e
35ae779
15fdb32
7f2bf6a
80fef4e
7f2bf6a
 
 
35ae779
80fef4e
 
7f2bf6a
80fef4e
35ae779
 
 
7f2bf6a
 
f2b00a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31d5b37
7f2bf6a
80fef4e
 
3819331
 
80fef4e
15fdb32
31d5b37
3819331
15fdb32
3819331
31d5b37
006e72f
15fdb32
80fef4e
31d5b37
d74e8cc
80fef4e
 
 
 
 
 
 
 
 
d74e8cc
 
f2b00a1
d74e8cc
 
 
 
 
 
 
f2b00a1
d74e8cc
 
f2b00a1
 
 
 
 
 
 
d74e8cc
 
 
 
 
 
 
 
 
3819331
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import os
os.system("playwright install")
# app.py (Final, Working Async Version with All Bugs Fixed)

import gradio as gr
from playwright.async_api import async_playwright, Error as PlaywrightError
from bs4 import BeautifulSoup
import urllib.parse
import os
from itertools import cycle
import uuid

# --- 1. GLOBAL RESOURCES & STATE ---
# These are initialized on the first request to be compatible with Spaces.
P = None
BROWSER = None
REVOLVER = None
LIVE_CONTEXTS = {}  # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
APP_STARTED = False

# --- 2. PLAIN DATA STATE CLASSES (Copyable) ---
class TabState:
    """A plain data class representing a tab's state. Fully copyable."""
    def __init__(self, tab_id, proxy_used="Direct Connection"):
        self.id = tab_id
        self.url = "about:blank"
        self.title = "New Tab"
        self.parsed_text = "Welcome! Navigate to a URL or search to get started."
        self.links = []
        self.proxy_used = proxy_used

class BrowserState:
    """A plain data class representing the browser's overall state."""
    def __init__(self):
        self.tabs = []
        self.active_tab_id = None
    def get_active_tab(self):
        if not self.active_tab_id: return None
        return next((t for t in self.tabs if t.id == self.active_tab_id), None)

class CredentialRevolver:
    """Manages a rotating list of proxies from an environment variable."""
    def __init__(self, proxy_string: str):
        self.proxies = self._parse_proxies(proxy_string)
        if self.proxies: self.proxy_cycler = cycle(self.proxies)
        else: self.proxy_cycler = None
    def _parse_proxies(self, proxy_string: str):
        proxies = []
        for line in proxy_string.strip().splitlines():
            try:
                parsed = urllib.parse.urlparse(f"//{line.strip()}")
                server = f"{parsed.scheme or 'http'}://{parsed.hostname}:{parsed.port}"
                proxies.append({"server": server, "username": parsed.username, "password": parsed.password})
            except: pass
        return proxies
    def get_next(self):
        return next(self.proxy_cycler) if self.proxy_cycler else None
    def count(self):
        return len(self.proxies)

# --- 3. CORE ASYNC LOGIC ---
async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
    """Uses Playwright to navigate and BeautifulSoup to parse, updating the TabState object."""
    log = f"▶️ Navigating to {url}..."; live_page = LIVE_CONTEXTS[tab_state.id]["page"]
    try:
        await live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
        tab_state.url = live_page.url; tab_state.title = await live_page.title() or "No Title"
        log += f"\n✅ Arrived at: {tab_state.url}"
        html_content = await live_page.content(); soup = BeautifulSoup(html_content, 'lxml')
        for script in soup(["script", "style", "nav", "footer"]): script.extract()
        tab_state.parsed_text = soup.get_text(separator='\n', strip=True)
        tab_state.links = [{'text': link.get_text(strip=True) or "[No Link Text]", 'url': urllib.parse.urljoin(tab_state.url, link['href'])} for link in soup.find_all('a', href=True) if link.get('href', '').startswith('http')]
        log += f"\n🔗 Found {len(tab_state.links)} links."
    except PlaywrightError as e:
        error_message = str(e); tab_state.title = "Error"; tab_state.url = url
        tab_state.parsed_text = f"❌ Failed to load page.\n\nError: {error_message}"
        tab_state.links = []; log += f"\n❌ {error_message}"
    return log

async def handle_action(browser_state: BrowserState, action: str, value=None):
    """Modifies the state based on user actions."""
    log = ""; active_tab_state = browser_state.get_active_tab()
    if action == "new_tab":
        tab_id, proxy_config = str(uuid.uuid4()), REVOLVER.get_next()
        context = await BROWSER.new_context(proxy=proxy_config)
        page = await context.new_page()
        LIVE_CONTEXTS[tab_id] = {"context": context, "page": page}
        new_tab = TabState(tab_id, proxy_used=proxy_config['server'] if proxy_config else "Direct")
        browser_state.tabs.append(new_tab); browser_state.active_tab_id = tab_id
        log = await _fetch_and_update_tab_state(new_tab, "https://www.whatsmyip.org/")
    elif action == "go" and active_tab_state:
        url = value if (urllib.parse.urlparse(value).scheme and urllib.parse.urlparse(value).netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(value)}"
        log = await _fetch_and_update_tab_state(active_tab_state, url)
    elif action == "click" and active_tab_state and value is not None:
        try:
            link_index = int(value)
            if 0 <= link_index < len(active_tab_state.links): log = await _fetch_and_update_tab_state(active_tab_state, active_tab_state.links[link_index]['url'])
            else: log = "Invalid link number."
        except: log = "Please enter a valid number to click."
    elif action == "close_tab" and active_tab_state:
        if len(browser_state.tabs) > 1:
            tab_to_close_id = browser_state.active_tab_id; tab_index = browser_state.tabs.index(active_tab_state)
            browser_state.tabs.pop(tab_index); new_index = tab_index - 1 if tab_index > 0 else 0
            browser_state.active_tab_id = browser_state.tabs[new_index].id
            resources = LIVE_CONTEXTS.pop(tab_to_close_id); await resources['context'].close(); log = f"💣 Tab closed."
        else: log = "Cannot close the last tab."
    elif action == "switch_tab" and value is not None:
        browser_state.active_tab_id = value; log = f"Switched to tab."
    return browser_state, log

# **BUG FIX 1: NameError Fix**
# This function is now defined before it is called by the master_handler.
def update_ui_components(browser_state: BrowserState):
    """Generates a dictionary of UI updates from the state. Not async."""
    active_tab = browser_state.get_active_tab()
    if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
    tab_choices = [(f"Tab {i}: {t.title[:25]}... (via {t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
    links_md = "### 🔗 Links on Page\n" + ('\n'.join(f"{i}. [{link['text'][:80]}]({link['url']})" for i, link in enumerate(active_tab.links[:25])) if active_tab.links else "_No links found._")
    return {
        page_content: gr.Markdown(f"# {active_tab.title}\n**URL:** {active_tab.url}\n\n---\n\n{active_tab.parsed_text[:2000]}..."),
        url_textbox: gr.Textbox(value=active_tab.url), links_display: gr.Markdown(links_md),
        tab_selector: gr.Radio(choices=tab_choices, value=active_tab.id, label="Active Tabs"),
    }

# --- 4. GRADIO UI AND EVENT HANDLING ---
with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
    browser_state = gr.State(BrowserState())
    gr.Markdown("# 🛰️ Real Browser Demo (Final Working Version)")
    # UI Layout is the same...
    with gr.Row():
        with gr.Column(scale=3):
            url_textbox = gr.Textbox(label="URL or Search Term", interactive=True); go_btn = gr.Button("Go", variant="primary")
            with gr.Accordion("Page Content (Text Only)", open=True): page_content = gr.Markdown("Loading...")
            log_display = gr.Textbox(label="Status Log", interactive=False)
        with gr.Column(scale=1):
            with gr.Row(): new_tab_btn = gr.Button("➕ New Tab"); close_tab_btn = gr.Button("❌ Close Tab")
            tab_selector = gr.Radio(choices=[], label="Active Tabs", interactive=True)
            with gr.Accordion("Clickable Links", open=True):
                links_display = gr.Markdown("...");
                with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
    
    all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
    
    async def master_handler(current_state, action, value=None):
        global APP_STARTED, P, BROWSER, REVOLVER
        if not APP_STARTED:
            print("🚀 First request received, starting up Playwright...");
            P = await async_playwright().start(); BROWSER = await P.firefox.launch(headless=True)
            proxy_list_str = os.getenv("PROXY_LIST", ""); REVOLVER = CredentialRevolver(proxy_list_str)
            print(f"✅ Playwright started. {REVOLVER.count()} proxies loaded."); APP_STARTED = True
        
        new_state, log = await handle_action(current_state, action, value)
        ui_updates = update_ui_components(new_state)
        
        # **BUG FIX 2: ValueError Fix**
        # We must return a tuple with a value for EACH output component, in the correct order.
        return (
            new_state, # 1. For the browser_state output
            ui_updates[page_content], # 2.
            ui_updates[url_textbox], # 3.
            ui_updates[links_display], # 4.
            ui_updates[tab_selector], # 5.
            log # 6. The log textbox is now updated correctly
        )
    
    # Define simple async lambdas for each event to ensure clean wiring
    on_load = lambda s: master_handler(s, "new_tab", None)
    on_go_click = lambda s, v: master_handler(s, "go", v)
    on_click_link = lambda s, v: master_handler(s, "click", v)
    on_new_tab = lambda s: master_handler(s, "new_tab", None)
    on_close_tab = lambda s: master_handler(s, "close_tab", None)
    on_switch_tab = lambda s, v: master_handler(s, "switch_tab", v)
        
    # Wire up the new, clean event handlers
    demo.load(on_load, inputs=[browser_state], outputs=[browser_state, *all_outputs])
    go_btn.click(on_go_click, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
    url_textbox.submit(on_go_click, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
    click_btn.click(on_click_link, [browser_state, click_num_box], [browser_state, *all_outputs], show_progress="full")
    new_tab_btn.click(on_new_tab, [browser_state], [browser_state, *all_outputs], show_progress="full")
    close_tab_btn.click(on_close_tab, [browser_state], [browser_state, *all_outputs])
    tab_selector.input(on_switch_tab, [browser_state, tab_selector], [browser_state, *all_outputs])

demo.launch()