wifix199's picture
Update app.py
a1c8ee1 verified
raw
history blame
4.97 kB
import gradio as gr
import openai
from langchain.chains import RetrievalQA
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
import os
OPENAI_API_KEYS = os.getenv("OPENAI_API_KEYS")
# Knowledge base for Crustdata APIs
docs = """
# Crustdata Dataset API
## Description
The Crustdata Dataset API provides access to a wide variety of datasets across different domains. It allows users to search, filter, and retrieve datasets based on categories, tags, and other metadata.
## Key Endpoints
### 1. **GET /datasets**
- **Description**: Retrieves a list of available datasets.
- **Parameters**:
- `category` (optional): Filter datasets by a specific category.
- `tags` (optional): Filter datasets by tags (comma-separated).
- `limit` (optional): Maximum number of datasets to return (default: 10).
- **Example Request**:
```bash
curl -X GET "https://api.crustdata.com/datasets?category=finance&tags=economy,stocks&limit=5"
```
- **Example Response**:
```json
{
"datasets": [
{
"id": "12345",
"name": "Global Finance Dataset",
"category": "finance",
"tags": ["economy", "stocks"]
},
...
]
}
```
### 2. **GET /datasets/{id}**
- **Description**: Retrieves detailed information about a specific dataset.
- **Parameters**:
- `id` (required): The unique identifier of the dataset.
- **Example Request**:
```bash
curl -X GET "https://api.crustdata.com/datasets/12345"
```
- **Example Response**:
```json
{
"id": "12345",
"name": "Global Finance Dataset",
"description": "A comprehensive dataset on global financial markets.",
"category": "finance",
"tags": ["economy", "stocks"],
"source": "World Bank"
}
```
---
# Crustdata Discovery and Enrichment API
## Description
The Crustdata Discovery and Enrichment API allows users to enrich their datasets by adding metadata, geolocation information, and other relevant attributes.
## Key Endpoints
### 1. **POST /enrich**
- **Description**: Enriches input data with additional metadata based on the specified enrichment type.
- **Parameters**:
- `input_data` (required): A list of data entries to be enriched.
- `enrichment_type` (required): The type of enrichment to apply. Supported types:
- `geolocation`
- `demographics`
- **Example Request**:
```bash
curl -X POST "https://api.crustdata.com/enrich" \
-H "Content-Type: application/json" \
-d '{
"input_data": [{"address": "123 Main St, Springfield"}],
"enrichment_type": "geolocation"
}'
```
- **Example Response**:
```json
{
"enriched_data": [
{
"address": "123 Main St, Springfield",
"latitude": 37.12345,
"longitude": -93.12345
}
]
}
```
### 2. **POST /search**
- **Description**: Searches for relevant metadata or datasets based on user-provided criteria.
- **Parameters**:
- `query` (required): The search term or query string.
- `filters` (optional): Additional filters to narrow down the search results.
- **Example Request**:
```bash
curl -X POST "https://api.crustdata.com/search" \
-H "Content-Type: application/json" \
-d '{
"query": "energy consumption",
"filters": {"category": "energy"}
}'
```
- **Example Response**:
```json
{
"results": [
{
"id": "67890",
"name": "Energy Consumption Dataset",
"category": "energy",
"tags": ["consumption", "renewables"]
}
]
}
```
---
# General Notes
- All endpoints require authentication using an API key.
- API requests must include the `Authorization` header:
```plaintext
Authorization: Bearer YOUR_API_KEY
```
- Response format: JSON
- Base URL: `https://api.crustdata.com`
"""
# Split the documentation into chunks for embedding
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
doc_chunks = text_splitter.create_documents([docs])
# Embed the documents using OpenAI embeddings
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_documents(doc_chunks, embeddings)
# Create a QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=OpenAI(model="gpt-3.5-turbo"),
retriever=docsearch.as_retriever(),
return_source_documents=True
)
# Function to handle user queries
def answer_question(question):
result = qa_chain.run(question)
return result
# Create a Gradio interface
chat_interface = gr.Interface(
fn=answer_question,
inputs=gr.Textbox(lines=2, placeholder="Ask a question about Crustdata APIs..."),
outputs="text",
title="Crustdata API Chat",
description="Ask any technical questions about Crustdata’s Dataset and Discovery APIs.",
)
# Launch the Gradio app
chat_interface.launch(share=True)