import gradio as gr import os from huggingface_hub import login # # api_key = os.getenv('llama3token') # # login(api_key) # HF_TOKEN = os.getenv('llama3token') # login(HF_TOKEN) # demo = gr.load("deepseek-ai/DeepSeek-R1-Distill-Llama-8B", src="models") # demo.launch() import streamlit as st import requests # Hugging Face API URL # API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # # The model meta-llama/Meta-Llama-3-8B is too large to be loaded automatically (16GB > 10GB). Please use Spaces (https://huggingface.co/spaces) or Inference Endpoints (https://huggingface.co/inference-endpoints). # API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-3B-Instruct" HF_TOKEN = os.getenv('hftoken') # Function to query the Hugging Face API def query(payload): headers = {"Authorization": f"Bearer {HF_TOKEN}"} response = requests.post(API_URL, headers=headers, json=payload) print(response.json()) return response.json() # Streamlit app st.title("DeepSeek-R1-Distill-Qwen-32B Chatbot") # Input text box user_input = st.text_input("Enter your message:") if user_input: # Query the Hugging Face API with the user input payload = {"inputs": user_input} output = query(payload) # Display the output if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0]: st.write("Response:") st.write(output[0]['generated_text']) else: st.write("Error: Unable to generate a response. Please try again.")