Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import TextLoader | |
# Initialize the Hugging Face Inference client with an open-source LLM | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # You can use any supported model | |
# Sample knowledge base for Crustdata APIs | |
docs = """ | |
# Crustdata Dataset API | |
## Description | |
The Crustdata Dataset API provides access to a wide variety of datasets across different domains. It allows users to search, filter, and retrieve datasets based on categories, tags, and other metadata. | |
## Key Endpoints | |
### 1. **GET /datasets** | |
- **Description**: Retrieves a list of available datasets. | |
- **Parameters**: | |
- `category` (optional): Filter datasets by a specific category. | |
- `tags` (optional): Filter datasets by tags (comma-separated). | |
- `limit` (optional): Maximum number of datasets to return (default: 10). | |
- **Example Request**: | |
```bash | |
curl -X GET "https://api.crustdata.com/datasets?category=finance&tags=economy,stocks&limit=5" | |
``` | |
- **Example Response**: | |
```json | |
{ | |
"datasets": [ | |
{ | |
"id": "12345", | |
"name": "Global Finance Dataset", | |
"category": "finance", | |
"tags": ["economy", "stocks"] | |
}, | |
... | |
] | |
} | |
``` | |
### 2. **GET /datasets/{id}** | |
- **Description**: Retrieves detailed information about a specific dataset. | |
- **Parameters**: | |
- `id` (required): The unique identifier of the dataset. | |
- **Example Request**: | |
```bash | |
curl -X GET "https://api.crustdata.com/datasets/12345" | |
``` | |
- **Example Response**: | |
```json | |
{ | |
"id": "12345", | |
"name": "Global Finance Dataset", | |
"description": "A comprehensive dataset on global financial markets.", | |
"category": "finance", | |
"tags": ["economy", "stocks"], | |
"source": "World Bank" | |
} | |
``` | |
--- | |
# Crustdata Discovery and Enrichment API | |
## Description | |
The Crustdata Discovery and Enrichment API allows users to enrich their datasets by adding metadata, geolocation information, and other relevant attributes. | |
## Key Endpoints | |
### 1. **POST /enrich** | |
- **Description**: Enriches input data with additional metadata based on the specified enrichment type. | |
- **Parameters**: | |
- `input_data` (required): A list of data entries to be enriched. | |
- `enrichment_type` (required): The type of enrichment to apply. Supported types: | |
- `geolocation` | |
- `demographics` | |
- **Example Request**: | |
```bash | |
curl -X POST "https://api.crustdata.com/enrich" \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"input_data": [{"address": "123 Main St, Springfield"}], | |
"enrichment_type": "geolocation" | |
}' | |
``` | |
- **Example Response**: | |
```json | |
{ | |
"enriched_data": [ | |
{ | |
"address": "123 Main St, Springfield", | |
"latitude": 37.12345, | |
"longitude": -93.12345 | |
} | |
] | |
} | |
``` | |
### 2. **POST /search** | |
- **Description**: Searches for relevant metadata or datasets based on user-provided criteria. | |
- **Parameters**: | |
- `query` (required): The search term or query string. | |
- `filters` (optional): Additional filters to narrow down the search results. | |
- **Example Request**: | |
```bash | |
curl -X POST "https://api.crustdata.com/search" \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"query": "energy consumption", | |
"filters": {"category": "energy"} | |
}' | |
``` | |
- **Example Response**: | |
```json | |
{ | |
"results": [ | |
{ | |
"id": "67890", | |
"name": "Energy Consumption Dataset", | |
"category": "energy", | |
"tags": ["consumption", "renewables"] | |
} | |
] | |
} | |
``` | |
--- | |
# General Notes | |
- All endpoints require authentication using an API key. | |
- API requests must include the `Authorization` header: | |
```plaintext | |
Authorization: Bearer YOUR_API_KEY | |
``` | |
- Response format: JSON | |
- Base URL: `https://api.crustdata.com` | |
""" | |
# Split the documentation into smaller chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
doc_chunks = text_splitter.create_documents([docs]) | |
# Create embeddings and initialize FAISS vector store | |
embedding_model = "sentence-transformers/all-MiniLM-L6-v2" | |
embeddings = HuggingFaceEmbeddings(model_name=embedding_model) | |
docsearch = FAISS.from_documents(doc_chunks, embeddings) | |
def retrieve_context(query): | |
"""Retrieve the most relevant context from the knowledge base.""" | |
results = docsearch.similarity_search(query, k=2) # Retrieve top 2 most similar chunks | |
context = "\n".join([res.page_content for res in results]) | |
return context | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
): | |
"""Generate a response using the Hugging Face Inference API.""" | |
# Retrieve relevant context from the knowledge base | |
context = retrieve_context(message) | |
prompt = f"{system_message}\n\nContext:\n{context}\n\nUser: {message}\nAssistant:" | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": prompt}) | |
response = "" | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
# Gradio interface | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value="You are a technical assistant for Crustdata APIs.", label="System message"), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), | |
], | |
title="Crustdata API Chatbot", | |
description="Ask any technical questions about Crustdata’s Dataset and Discovery APIs.", | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) | |