Spaces:

halimbahae
/

ScrapeApp

Runtime error

File size: 3,187 Bytes

47a7cf1

import streamlit as st
from transformers import pipeline
from scrapegraphai.graphs import SmartScraperGraph
import torch

# Page config
st.set_page_config(
    page_title="Zephyr Chat & Scrape",
    page_icon="🤖",
    layout="wide"
)

# Initialize session state
if "messages" not in st.session_state:
    st.session_state.messages = []

if "scrape_results" not in st.session_state:
    st.session_state.scrape_results = None

# Load Zephyr model
@st.cache_resource
def load_model():
    return pipeline(
        "text-generation",
        model="HuggingFaceH4/zephyr-7b-beta",
        torch_dtype=torch.float16,
        device_map="auto",
    )

# Initialize the model
model = load_model()

# Sidebar for web scraping
with st.sidebar:
    st.title("Web Scraping")
    url = st.text_input("Enter URL to scrape")
    scrape_prompt = st.text_input("What information do you want to extract?")
    
    if st.button("Scrape"):
        try:
            # Configure scraper
            graph_config = {
                "llm": {
                    "model": "HuggingFaceH4/zephyr-7b-beta",
                    "temperature": 0.7,
                },
                "verbose": True
            }
            
            # Create scraper instance
            scraper = SmartScraperGraph(
                prompt=scrape_prompt,
                source=url,
                config=graph_config
            )
            
            # Run scraping
            st.session_state.scrape_results = scraper.run()
            st.success("Scraping completed!")
            
        except Exception as e:
            st.error(f"Error during scraping: {str(e)}")

# Main chat interface
st.title("Zephyr Chatbot 🤖")

# Display scraped results if available
if st.session_state.scrape_results:
    st.subheader("Scraped Information")
    st.json(st.session_state.scrape_results)
    
# Display chat messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Chat input
if prompt := st.chat_input("What's on your mind?"):
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)
    
    # Generate response
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            # Include scraped content in context if available
            context = ""
            if st.session_state.scrape_results:
                context = f"Scraped information: {str(st.session_state.scrape_results)}\n"
            
            full_prompt = f"{context}User: {prompt}\nAssistant:"
            
            response = model(
                full_prompt,
                max_length=1000,
                temperature=0.7,
                top_p=0.95,
                repetition_penalty=1.15
            )[0]["generated_text"]
            
            # Clean up response to get only the assistant's reply
            response = response.split("Assistant:")[-1].strip()
            
            st.markdown(response)
            st.session_state.messages.append({"role": "assistant", "content": response})