import streamlit as st from transformers import pipeline from scrapegraphai.graphs import SmartScraperGraph import torch # Page config st.set_page_config( page_title="Zephyr Chat & Scrape", page_icon="🤖", layout="wide" ) # Initialize session state if "messages" not in st.session_state: st.session_state.messages = [] if "scrape_results" not in st.session_state: st.session_state.scrape_results = None # Load Zephyr model @st.cache_resource def load_model(): return pipeline( "text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.float16, device_map="auto", ) # Initialize the model model = load_model() # Sidebar for web scraping with st.sidebar: st.title("Web Scraping") url = st.text_input("Enter URL to scrape") scrape_prompt = st.text_input("What information do you want to extract?") if st.button("Scrape"): try: # Configure scraper graph_config = { "llm": { "model": "HuggingFaceH4/zephyr-7b-beta", "temperature": 0.7, }, "verbose": True } # Create scraper instance scraper = SmartScraperGraph( prompt=scrape_prompt, source=url, config=graph_config ) # Run scraping st.session_state.scrape_results = scraper.run() st.success("Scraping completed!") except Exception as e: st.error(f"Error during scraping: {str(e)}") # Main chat interface st.title("Zephyr Chatbot 🤖") # Display scraped results if available if st.session_state.scrape_results: st.subheader("Scraped Information") st.json(st.session_state.scrape_results) # Display chat messages for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Chat input if prompt := st.chat_input("What's on your mind?"): # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Generate response with st.chat_message("assistant"): with st.spinner("Thinking..."): # Include scraped content in context if available context = "" if st.session_state.scrape_results: context = f"Scraped information: {str(st.session_state.scrape_results)}\n" full_prompt = f"{context}User: {prompt}\nAssistant:" response = model( full_prompt, max_length=1000, temperature=0.7, top_p=0.95, repetition_penalty=1.15 )[0]["generated_text"] # Clean up response to get only the assistant's reply response = response.split("Assistant:")[-1].strip() st.markdown(response) st.session_state.messages.append({"role": "assistant", "content": response})