Synced repo using 'sync_with_huggingface' Github Action
Browse files- Dockerfile +2 -0
- scrape_chroma.py +2 -1
Dockerfile
CHANGED
@@ -18,6 +18,8 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
18 |
COPY . .
|
19 |
|
20 |
RUN chmod +x /app/entrypoint.sh
|
|
|
|
|
21 |
|
22 |
EXPOSE 8501
|
23 |
|
|
|
18 |
COPY . .
|
19 |
|
20 |
RUN chmod +x /app/entrypoint.sh
|
21 |
+
RUN mkdir -p /app/data && chmod -R 777 /app/data
|
22 |
+
|
23 |
|
24 |
EXPOSE 8501
|
25 |
|
scrape_chroma.py
CHANGED
@@ -43,7 +43,8 @@ def scrape_and_store():
|
|
43 |
print(f"Fetched {len(all_titles_sources)} unique titles.")
|
44 |
|
45 |
# Save to CSV
|
46 |
-
save_titles_to_csv(all_titles_sources, filename="pib_titles.csv")
|
|
|
47 |
|
48 |
# Prepare for ChromaDB
|
49 |
documents = [title for title, source in all_titles_sources]
|
|
|
43 |
print(f"Fetched {len(all_titles_sources)} unique titles.")
|
44 |
|
45 |
# Save to CSV
|
46 |
+
save_titles_to_csv(all_titles_sources, filename="data/pib_titles.csv")
|
47 |
+
|
48 |
|
49 |
# Prepare for ChromaDB
|
50 |
documents = [title for title, source in all_titles_sources]
|