Spaces:

Game4all
/

Graphify

Running

App Files Files Community

Game4all commited on 3 days ago

Commit

51f2dc1

0 Parent(s):

Initial commit

Browse files

Files changed (13) hide show

.gitignore +210 -0
Dockerfile +12 -0
README.md +10 -0
main.py +91 -0
prompts/ner/extract_entities +27 -0
prompts/ner/extract_relations +22 -0
prompts/search/create_search_plan +18 -0
requirements.txt +7 -0
schemas.py +35 -0
static/css/index.css +9 -0
static/index.html +83 -0
static/js/index.js +112 -0
utils.py +37 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,210 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+ENTRYPOINT ["uvicorn", "main:api", "--port", "7860", "--host", "0.0.0.0"]

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Graphify
+emoji: 🏢
+colorFrom: yellow
+colorTo: red
+sdk: docker
+pinned: false
+license: mit
+short_description: Transform data into knowledge graphs
+---

main.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import sys
+from litellm import acompletion
+from dotenv import load_dotenv
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+from jinja2 import Environment, FileSystemLoader, StrictUndefined, TemplateNotFound
+from schemas import CreateSearchPlanRequest, CreateSearchPlanResponse, ExtractEntitiesRequest, ExtractEntitiesResponse, ExtractedRelationsResponse
+from utils import build_visjs_graph, fmt_prompt
+import logging
+load_dotenv()
+logging.basicConfig(
+    level=logging.INFO,
+    format='[%(asctime)s][%(levelname)s][%(filename)s:%(lineno)d]: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+LLM_MODEL = os.environ.get('LLM_MODEL', default=None)
+LLM_TOKEN = os.environ.get('LLM_TOKEN', default=None)
+LLM_BASE_URL = os.environ.get('LLM_BASE_URL', default=None)
+if not LLM_MODEL and not LLM_TOKEN:
+    logging.error("No LLM_TOKEN and LLM_MODEL were provided.")
+    sys.exit(-1)
+prompt_env = Environment(loader=FileSystemLoader(
+    "prompts"), undefined=StrictUndefined, enable_async=True)
+api = FastAPI()
+@api.post("/extract_entities")
+async def extract_entities(body: ExtractEntitiesRequest):
+    """Extract entities from the given input text and return them"""
+    # Extract entities from the text
+    entities_completion = await acompletion(LLM_MODEL, api_key=LLM_TOKEN, base_url=LLM_BASE_URL, messages=[
+        {
+            "role": "user",
+            "content": await fmt_prompt(prompt_env, "ner/extract_entities", **{
+                "response_format": ExtractEntitiesResponse.model_json_schema(),
+                "input_text": body.content
+            })
+        }
+    ], response_format=ExtractEntitiesResponse)
+    extracted_entities = ExtractEntitiesResponse.model_validate_json(
+        entities_completion.choices[0].message.content)
+    # Extract relationships in a second step
+    relations_completion = await acompletion(LLM_MODEL, api_key=LLM_TOKEN, base_url=LLM_BASE_URL, messages=[
+        {
+            "role": "user",
+            "content": await fmt_prompt(prompt_env, "ner/extract_relations", **{
+                "response_format": ExtractedRelationsResponse.model_json_schema(),
+                "input_text": body.content,
+                "entities": extracted_entities.entities
+            })
+        }
+    ], response_format=ExtractedRelationsResponse, num_retries=5)
+    relation_model = ExtractedRelationsResponse.model_validate_json(
+        relations_completion.choices[0].message.content)
+    display_lists = build_visjs_graph(
+        extracted_entities.entities, relation_model.relations)
+    return display_lists
+@api.post("/create_search_plan")
+async def create_search_plan(body: CreateSearchPlanRequest):
+    plan_completion = await acompletion(LLM_MODEL, api_key=LLM_TOKEN, base_url=LLM_BASE_URL, messages=[
+        {
+            "role": "user",
+            "content": await fmt_prompt(prompt_env, "search/create_search_plan", **{
+                "response_format": CreateSearchPlanResponse.model_json_schema(),
+                "user_query": body.query,
+            })
+        }
+    ], response_format=CreateSearchPlanResponse)
+    plan_model = CreateSearchPlanResponse.model_validate_json(
+        plan_completion.choices[0].message.content)
+    return plan_model
+api.mount("/", StaticFiles(directory="static", html=True), name="static")

prompts/ner/extract_entities ADDED Viewed

	@@ -0,0 +1,27 @@

+<role>You are a useful search assistant. </role>
+<task>
+	Extract all the entities that appear in the following given input texxt
+	An entity may refer to:
+	- A person (eg. Marie Curie)
+	- An organisation (eg. Google Inc, United Nations)
+	- A date (eg. July 14, 2025)
+	- A fact
+	- An event (eg. WWII)
+	- An acronym (eg.NASA)
+	- A location (eg. NY City)
+	- A product (eg. ChatGPT, Iphone 14)
+	- A quantity, money or percentage
+	- A technical term or domain concept
+</task>
+<response_format>
+	Reply in JSON with the following reponse schema:
+	{{response_format}}
+</response_format>
+<input_text>
+	Here is the text:
+	{{input_text}}
+</input_text>

prompts/ner/extract_relations ADDED Viewed

	@@ -0,0 +1,22 @@

+<role>You are a useful search assistant. </role>
+<task>
+	Extract all the relations described in the following text for the following list of entities.
+	Please provide relations for the entities listed under.
+</task>
+<entities>
+	Here is the list of entities:
+	{% for entity in entities -%}
+	- {{entity}}
+	{% endfor %}
+</entities>
+<response_format>
+	Reply in JSON with the following response schema:
+	{{response_format}}
+</response_format>
+<input_text>
+	Here is the text:
+	{{input_text}}
+</input_text>

prompts/search/create_search_plan ADDED Viewed

	@@ -0,0 +1,18 @@

+<role>You are a useful search assistant. </role>
+<task>
+    You are tasked with creating a comprehensive search plan to explore a complex query given by the user under.
+    Your goal is to break down the query into multiple sub-queries that represents dimensions that must be explored for
+    a holistic understanding of
+    the initial query.
+</task>
+<response_format>
+    Only provide the sub-queries formulated as questions.
+    Provide the answer as a JSON object using the following response schema:
+    {{response_format}}
+</response_format>
+<topic>
+    The user query is:
+    **{{user_query}}**
+</topic>

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+litellm
+pydantic
+fastapi
+uvicorn
+networkx
+Jinja2
+dotenv

schemas.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from pydantic import BaseModel, Field
+# ============================================= Entity + Relations extraction
+class ExtractEntitiesRequest(BaseModel):
+    content: str
+class ExtractEntitiesResponse(BaseModel):
+    entities: list[str] = Field(..., description="A list of entities")
+class ExtractedRelation(BaseModel):
+    start: str = Field(..., description="The first entity in the relationship")
+    to: str = Field(..., description="The second entity of the relationship")
+    tag: str = Field(..., description="A tag describing the relationship", examples=[
+                     "related_to", "born_in", "made", "created"])
+    description: str = Field(...,
+                             description="A detailled description of the relationship")
+class ExtractedRelationsResponse(BaseModel):
+    relations: list[ExtractedRelation]
+# ========================================================  Create search plan ==================
+class CreateSearchPlanRequest(BaseModel):
+    query: str
+class CreateSearchPlanResponse(BaseModel):
+    sub_queries: list[str] = Field(...,
+                                   description="A list of subqueries formulated as questions")

static/css/index.css ADDED Viewed

	@@ -0,0 +1,9 @@

+.dot-grid {
+  position: relative;
+  width: 100%;
+  height: 100%;
+  background-image: radial-gradient(rgba(128, 128, 128, 0.4) 1px, transparent 1px);
+  background-size: 12px 12px; /* Adjust spacing between dots */
+  background-repeat: repeat;
+  overflow: hidden;
+}

static/index.html ADDED Viewed

	@@ -0,0 +1,83 @@

+<!DOCTYPE html>
+<html lang="en" data-theme="dark">
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Chatbot UI</title>
+    <link href="https://cdn.jsdelivr.net/npm/daisyui@5" rel="stylesheet" type="text/css" />
+    <link href="css/index.css" rel="stylesheet" type="text/css" />
+    <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4"></script>
+    <script src="js/index.js"></script>
+</head>
+<body class="bg-base-200 p-4 overflow-hidden">
+    <div class="relative h-[95vh] bg-base-100 rounded-2xl p-5 shadow-xl">
+        <!-- tab container-->
+        <div class="absolute -top-2 flex left-1/2 transform -translate-x-1/2 z-10">
+            <div class="tabs tabs-box glass">
+                <button id="chat-tab" class="tab tab-active flex items-center gap-2" onclick="switchTab('chat-tab')">
+                    <svg width="16px" height="16px" viewBox="0 0 24 24" fill="none">
+                        <g stroke-width="0"></g>
+                        <g stroke-linecap="round" stroke-linejoin="round"></g>
+                        <g>
+                            <path d="M8 10.5H16" stroke="#ffffff" stroke-width="1.5" stroke-linecap="round"></path>
+                            <path d="M8 14H13.5" stroke="#ffffff" stroke-width="1.5" stroke-linecap="round"></path>
+                            <path
+                                d="M17 3.33782C15.5291 2.48697 13.8214 2 12 2C6.47715 2 2 6.47715 2 12C2 13.5997 2.37562 15.1116 3.04346 16.4525C3.22094 16.8088 3.28001 17.2161 3.17712 17.6006L2.58151 19.8267C2.32295 20.793 3.20701 21.677 4.17335 21.4185L6.39939 20.8229C6.78393 20.72 7.19121 20.7791 7.54753 20.9565C8.88837 21.6244 10.4003 22 12 22C17.5228 22 22 17.5228 22 12C22 10.1786 21.513 8.47087 20.6622 7"
+                                stroke="#ffffff" stroke-width="1.5" stroke-linecap="round"></path>
+                        </g>
+                    </svg>
+                    Chat
+                </button>
+                <button id="explore-tab" class="tab flex items-center gap-2" onclick="switchTab('explore-tab')">
+                    <svg fill="#ffffff" width="16px" height="16px" viewBox="0 -64 640 640" stroke="#ffffff">
+                        <g stroke-width="0"></g>
+                        <g stroke-linecap="round" stroke-linejoin="round"></g>
+                        <g>
+                            <path
+                                d="M384 320H256c-17.67 0-32 14.33-32 32v128c0 17.67 14.33 32 32 32h128c17.67 0 32-14.33 32-32V352c0-17.67-14.33-32-32-32zM192 32c0-17.67-14.33-32-32-32H32C14.33 0 0 14.33 0 32v128c0 17.67 14.33 32 32 32h95.72l73.16 128.04C211.98 300.98 232.4 288 256 288h.28L192 175.51V128h224V64H192V32zM608 0H480c-17.67 0-32 14.33-32 32v128c0 17.67 14.33 32 32 32h128c17.67 0 32-14.33 32-32V32c0-17.67-14.33-32-32-32z">
+                            </path>
+                        </g>
+                    </svg>
+                    Explore
+                </button>
+            </div>
+        </div>
+        <!-- Chat UI Container -->
+        <div id="chat-tab-contents" class="relative w-full h-full">
+            <!-- <div class=" flex flex-col overflow-hidden"> -->
+            <div id="chat-messages-container" class="w-full h-full space-y-4 overflow-y-auto flex-1 pr-2 pb-20">
+                <!-- Message Bubble (Bot) -->
+                <div class="chat chat-start">
+                    <div class="chat-bubble chat-bubble-secondary">Hello! Give me some data and I'll turn it into a nice KG graph for you</div>
+                </div>
+            </div>
+            <!-- Input Area -->
+            <div class="absolute bottom-0 left-0 right-0 p-4 rounded-xl shadow-xl bg-gray-700 text-base-content">
+                <div class="w-full">
+                    <div class="flex gap-2 items-end">
+                        <textarea id="chat-input"
+                            class="w-full bg-base-800 text-base-content border max-h-[25vh] overflow-y-auto resize-y p-2 rounded-md"
+                            placeholder="Type your message..." rows="1"></textarea>
+                        <button id="chat-send-input" class="btn btn-sm btn-primary rounded-full">Send</button>
+                    </div>
+                </div>
+            </div>
+            <!-- </div> -->
+        </div>
+        <div class="relative w-full h-full dot-grid overflow-hidden hidden" id="explore-tab-contents">
+            <div id="grapha" class="w-full h-full "></div>
+        </div>
+    </div>
+</body>
+</html>

static/js/index.js ADDED Viewed

	@@ -0,0 +1,112 @@

+// mapping of tab to its contents
+const TABS = {
+    "chat-tab": "chat-tab-contents",
+    "explore-tab": "explore-tab-contents"
+};
+// switch to the specified tab
+function switchTab(newTab) {
+    Object.keys(TABS).forEach(tabId => {
+        const tabElement = document.getElementById(tabId);
+        if (tabElement) {
+            tabElement.classList.remove("tab-active");
+        }
+    });
+    // Hide all tab contents
+    Object.values(TABS).forEach(contentId => {
+        const contentElement = document.getElementById(contentId);
+        if (contentElement) {
+            contentElement.classList.add("hidden");
+        }
+    });
+    // Activate the new tab if it exists in the mapping
+    if (newTab in TABS) {
+        const newTabElement = document.getElementById(newTab);
+        const newContentElement = document.getElementById(TABS[newTab]);
+        if (newTabElement) newTabElement.classList.add("tab-active");
+        if (newContentElement) newContentElement.classList.remove("hidden");
+    }
+}
+function addMessage(message, role = "user") {
+    const chatContainer = document.getElementById("chat-messages-container");
+    const chatDiv = document.createElement("div");
+    chatDiv.classList.add("chat");
+    if (role === "user")
+        chatDiv.classList.add("chat-end");
+    else if (role === "bot")
+        chatDiv.classList.add("chat-start");
+    else
+        chatDiv.classList.add("chat-start");
+    const bubbleDiv = document.createElement("div");
+    bubbleDiv.classList.add("chat-bubble");
+    if (role === "user")
+        bubbleDiv.classList.add("chat-bubble-primary");
+    else if (role === "bot")
+        bubbleDiv.classList.add("chat-bubble-secondary");
+    else
+        bubbleDiv.classList.add("chat-bubble-secondary");
+    bubbleDiv.textContent = message;
+    chatDiv.appendChild(bubbleDiv);
+    chatContainer.appendChild(chatDiv);
+    chatContainer.scrollTop = chatContainer.scrollHeight;
+}
+function escapeString(str) {
+    return str
+        .replace(/\\/g, '\\\\')   // Escape backslashes
+        .replace(/"/g, '\\"')     // Escape double quotes
+        .replace(/\n/g, '\\n')    // Escape newlines
+        .replace(/\r/g, '\\r')    // Escape carriage returns
+        .replace(/\t/g, '\\t');   // Escape tabs
+}
+// extract entities from the given input text
+async function extractEntities(text) {
+    let escaped_text = escapeString(text);
+    try {
+        let graph_data_req = await fetch("/extract_entities", {
+            method: "POST",
+            headers: new Headers({
+                "Content-Type": "application/json"
+            }),
+            body: JSON.stringify({
+                content: escaped_text
+            })
+        })
+        let graph_data = await graph_data_req.json();
+        console.log(graph_data);
+        var options = {};
+        var network = new vis.Network(document.getElementById('grapha'), graph_data, options);
+        addMessage("I've created a knowledge graph based on what you've provided me. Check the explore tab !", "bot");
+    } catch (e) {
+        console.error("Error while trying to extract entities into a KG", e);
+    }
+}
+document.addEventListener('DOMContentLoaded', _ => {
+    document.getElementById('chat-send-input').addEventListener('click', (ev) => {
+        let textbox = document.getElementById('chat-input');
+        let text = textbox.value;
+        addMessage(text, "user");
+        extractEntities(text);
+        textbox.value = "";
+    });
+})

utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from typing import Dict, List
+from jinja2 import Environment
+from schemas import ExtractedRelation
+def build_visjs_graph(entities: List[str], relations: List[ExtractedRelation]) -> Dict[str, List[Dict]]:
+    """Builds a vertex and edge graph for displaying in UI"""
+    unique_entities = set(entities)  # maintains order, removes duplicates
+    entity_to_id = {entity: idx for idx, entity in enumerate(unique_entities)}
+    nodes = [
+        {"id": entity_to_id[entity], "label": entity, "title": entity}
+        for entity in unique_entities
+    ]
+    # Create edges list from relations
+    edges = []
+    for rel in relations:
+        start_id = entity_to_id.get(rel.start)
+        end_id = entity_to_id.get(rel.to)
+        if start_id is not None and end_id is not None:
+            edges.append({
+                "from": start_id,
+                "to": end_id,
+                "label": rel.tag,
+                "title": rel.description,
+                "arrows": "to",
+            })
+    return {"nodes": nodes, "edges": edges}
+async def fmt_prompt(env: Environment, prompt_id: str, **args):
+    """Returns a formatted prompt"""
+    prompt = env.get_template(prompt_id)
+    return await prompt.render_async(args)