ScrapeApp / app.py
halimbahae's picture
Create app.py
47a7cf1 verified
raw
history blame
3.19 kB
import streamlit as st
from transformers import pipeline
from scrapegraphai.graphs import SmartScraperGraph
import torch
# Page config
st.set_page_config(
page_title="Zephyr Chat & Scrape",
page_icon="πŸ€–",
layout="wide"
)
# Initialize session state
if "messages" not in st.session_state:
st.session_state.messages = []
if "scrape_results" not in st.session_state:
st.session_state.scrape_results = None
# Load Zephyr model
@st.cache_resource
def load_model():
return pipeline(
"text-generation",
model="HuggingFaceH4/zephyr-7b-beta",
torch_dtype=torch.float16,
device_map="auto",
)
# Initialize the model
model = load_model()
# Sidebar for web scraping
with st.sidebar:
st.title("Web Scraping")
url = st.text_input("Enter URL to scrape")
scrape_prompt = st.text_input("What information do you want to extract?")
if st.button("Scrape"):
try:
# Configure scraper
graph_config = {
"llm": {
"model": "HuggingFaceH4/zephyr-7b-beta",
"temperature": 0.7,
},
"verbose": True
}
# Create scraper instance
scraper = SmartScraperGraph(
prompt=scrape_prompt,
source=url,
config=graph_config
)
# Run scraping
st.session_state.scrape_results = scraper.run()
st.success("Scraping completed!")
except Exception as e:
st.error(f"Error during scraping: {str(e)}")
# Main chat interface
st.title("Zephyr Chatbot πŸ€–")
# Display scraped results if available
if st.session_state.scrape_results:
st.subheader("Scraped Information")
st.json(st.session_state.scrape_results)
# Display chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat input
if prompt := st.chat_input("What's on your mind?"):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Generate response
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
# Include scraped content in context if available
context = ""
if st.session_state.scrape_results:
context = f"Scraped information: {str(st.session_state.scrape_results)}\n"
full_prompt = f"{context}User: {prompt}\nAssistant:"
response = model(
full_prompt,
max_length=1000,
temperature=0.7,
top_p=0.95,
repetition_penalty=1.15
)[0]["generated_text"]
# Clean up response to get only the assistant's reply
response = response.split("Assistant:")[-1].strip()
st.markdown(response)
st.session_state.messages.append({"role": "assistant", "content": response})