import gradio as gr import torch import os from huggingface_hub import login print(f"Is CUDA available: {torch.cuda.is_available()}") print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") # # api_key = os.getenv('llama3token') # # login(api_key) # HF_TOKEN = os.getenv('llama3token') # login(HF_TOKEN) # demo = gr.load("deepseek-ai/DeepSeek-R1-Distill-Llama-8B", src="models") # demo.launch() import streamlit as st import requests # Hugging Face API URL API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Llama-8B" HF_TOKEN = os.getenv('llama3token') # Function to query the Hugging Face API def query(payload): headers = {"Authorization": f"Bearer {HF_TOKEN}"} response = requests.post(API_URL, headers=headers, json=payload) return response.json() # Streamlit app st.title("DeepSeek-R1-Distill-Qwen-32B Chatbot") # Input text box user_input = st.text_input("Enter your message:") if user_input: # Query the Hugging Face API with the user input payload = {"inputs": user_input} output = query(payload) # Display the output if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0]: st.write("Response:") st.write(output[0]['generated_text']) else: st.write("Error: Unable to generate a response. Please try again.")