Spaces:

dwb2023
/

parsimony

Sleeping

File size: 5,283 Bytes

import os
import gradio as gr
from typing import Union, Dict, List, Optional
import pandas as pd
from smolagents import CodeAgent, HfApiModel, tool, GradioUI
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from openinference.instrumentation.smolagents import SmolagentsInstrumentor
from openinference.semconv.resource import ResourceAttributes
from opentelemetry.sdk.resources import Resource

resource = Resource(attributes={
    ResourceAttributes.PROJECT_NAME: 'hf-parsimony'
})

PHOENIX_API_KEY = os.getenv("PHOENIX_API_KEY")
HF_TOKEN=os.getenv("HF_TOKEN")
api_key = f"api_key={PHOENIX_API_KEY}"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = api_key
os.environ["PHOENIX_CLIENT_HEADERS"] = api_key
os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "https://app.phoenix.arize.com"

endpoint = "https://app.phoenix.arize.com/v1/traces"
trace_provider = TracerProvider()
trace_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint)))
SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)

examples = [
    ["Validate whether FANCA interacts with PALB2 in homologous recombination repair using BioGRID and PubMed evidence."],
    ["Show the evolution of TP53-MDM2 interaction evidence from 2018-2023, highlighting key supporting papers."],
    ["Compare BRCA2 interaction networks between human and mouse homologs, marking conserved relationships."],
    ["Identify synthetic lethal partners for BRCA1-deficient cancers with confidence > 0.9 and clinical trial associations."],
    ["Find novel VHL interactors proposed in 2023 PubMed articles not yet in BioGRID."],
    ["Visualize the ATM interaction network with nodes sized by betweenness centrality and colored by validation source."],
    ["Explain the Fanconi Anemia pathway and show its core components with experimental validation status."],
    ["Correlate TP53BP1 protein interactions with mRNA co-expression patterns in TCGA breast cancer data."],
    ["Identify high-betweenness nodes in the KRAS interaction network with druggable protein products."],
    ["List all interactions with conflicting evidence between BioGRID and STRING, sorted by confidence delta."],
]

class GradioUIWithExamples(GradioUI):
    def __init__(self, agent, examples=None, **kwargs):
        super().__init__(agent, **kwargs)
        self.examples = examples


    def build_interface(self):
        with gr.Blocks() as demo:
            # Title Section
            gr.Markdown("## From Answers to Insight - Architecting Evolvable Agentic Systems")

            # Description Section
            gr.Markdown(
                """
                **🔬 Architectural Blueprint**: *Production systems require intentional design - combining smolagents' efficiency with Phoenix observability, powered by ontology-driven prompting patterns.*
                """
            )

            # Core Innovation Vectors Section
            with gr.Accordion("Core Innovation Vectors", open=True):
                gr.Markdown(
                    """
                    - ✅ **Competency Question Engine**: Structured prompting using domain vocabulary
                    - ✅ **Telemetry-First Core**: Phoenix spans capturing full evidence chains, establishing OpenTelemetry metrics pipeline for performance benchmarking
                    - ✅ **Domain Foundation**: Smolagents + biomedical vocabulary layer
                    - 🛠️ **Human-in-the-Loop Evolution**: Gradio UI with human feedback capture
                    - 🛠️ **Benchmarking Rigor**: Validation scorecards and audit framework
                    - 🛠️ **Domain Insight Engine**: Knowledge graph grounding and multimodal evidence synthesis
                    """
                )


            input_box = gr.Textbox(
                label="Your Question",
                placeholder="e.g., 'Find novel VHL interactors proposed in 2023 PubMed articles not yet in BioGRID.'"
            )
            output_box = gr.Textbox(
                label="Analysis Results",
                placeholder="Response will appear here...",
                interactive=False,
            )
            submit_button = gr.Button("Submit")

            submit_button.click(
                self.agent.run,
                inputs=input_box,
                outputs=output_box,
            )

            if self.examples:
                gr.Markdown("### Examples")
                for example in self.examples:
                    gr.Button(example[0]).click(
                        lambda x=example[0]: x,
                        inputs=[],
                        outputs=input_box,
                    )
        return demo

    def launch(self):
        demo = self.build_interface()
        demo.launch()

model = HfApiModel(model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', token=HF_TOKEN)
agent = CodeAgent(
    tools=[],
    model=model,
    additional_authorized_imports=["gradio","pandas","numpy","datasets","duckdb","json","streamlit","requests","json"],
    add_base_tools=True
)

interface = GradioUIWithExamples(agent, examples=examples)
interface.launch()