unclecode commited on
Commit
ae95bfb
·
1 Parent(s): ed4cc12

Initil commit

Browse files
Files changed (2) hide show
  1. Dockerfile +58 -0
  2. app.py +29 -0
Dockerfile ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # 1) Install dependencies for Dev Mode + Playwright
4
+ RUN apt-get update && \
5
+ apt-get install -y \
6
+ bash \
7
+ curl \
8
+ wget \
9
+ procps \
10
+ git \
11
+ git-lfs \
12
+ libnss3 \
13
+ libatk1.0-0 \
14
+ libatk-bridge2.0-0 \
15
+ libx11-6 \
16
+ libx11-xcb1 \
17
+ libxcomposite1 \
18
+ libxcursor1 \
19
+ libxdamage1 \
20
+ libxext6 \
21
+ libxfixes3 \
22
+ libxi6 \
23
+ libxrandr2 \
24
+ libxrender1 \
25
+ libxss1 \
26
+ libxtst6 \
27
+ libappindicator1 \
28
+ libsecret-1-0 \
29
+ fonts-ipafont-gothic && \
30
+ rm -rf /var/lib/apt/lists/*
31
+
32
+ # 2) Copy code into /app
33
+ WORKDIR /app
34
+ COPY . /app
35
+
36
+ # 3) Install Python dependencies
37
+ RUN pip install --upgrade pip
38
+ RUN pip install gradio
39
+ RUN pip install -U crawl4ai
40
+
41
+ # 4) Install Playwright browser(s)
42
+ RUN pip install playwright
43
+ RUN playwright install --with-deps chrome
44
+
45
+ # 5) Make /app owned by user 1000 (Dev Mode requirement)
46
+ RUN chown -R 1000 /app
47
+
48
+ # 6) Ensure the HOME variable points to /app (so Git writes to /app/.gitconfig)
49
+ ENV HOME=/app
50
+
51
+ # 7) Switch to user 1000
52
+ USER 1000
53
+
54
+ # 8) Expose port for Gradio
55
+ EXPOSE 7860
56
+
57
+ # 9) Start your Gradio app
58
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import asyncio
3
+ from crawl4ai import AsyncWebCrawler
4
+ from crawl4ai import CrawlerRunConfig, CacheMode
5
+ from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
6
+
7
+ def crawl_url_to_markdown(url):
8
+ async def _crawl(u):
9
+ config = CrawlerRunConfig(
10
+ cache_mode=CacheMode.BYPASS,
11
+ markdown_generator=DefaultMarkdownGenerator()
12
+ )
13
+ async with AsyncWebCrawler() as crawler:
14
+ result = await crawler.arun(u, config=config)
15
+ return result.markdown_v2.raw_markdown
16
+
17
+ return asyncio.run(_crawl(url))
18
+
19
+ demo = gr.Interface(
20
+ fn=crawl_url_to_markdown,
21
+ inputs="text",
22
+ outputs="text",
23
+ title="Crawl4AI to Markdown",
24
+ description="Enter a URL to crawl, returns page as Markdown"
25
+ )
26
+
27
+ if __name__ == "__main__":
28
+ # Launch on 0.0.0.0:7860 for Hugging Face Spaces
29
+ demo.launch(server_name="0.0.0.0", server_port=7860)