Spaces:
Runtime error
Runtime error
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from typing import Dict, List | |
import gradio as gr | |
import pandas as pd | |
import json | |
import re | |
from src.core import * | |
from src.ressources.main_css import * | |
app = FastAPI( | |
title="Insight Finder", | |
description="Find relevant technologies from a problem", | |
) | |
class InputProblem(BaseModel): | |
problem: str | |
class InputConstraints(BaseModel): | |
constraints: Dict[str, str] | |
# This schema defines the structure for a single technology object | |
class Technology(BaseModel): | |
"""Represents a single technology entry with its details.""" | |
title: str | |
purpose: str | |
key_components: str | |
advantages: str | |
limitations: str | |
id: int | |
class OutputPriorArt(BaseModel): | |
"""Represents the search of prior art using the technology combinations""" | |
content: str | |
uris: List | |
class InputPriorArtConstraints(BaseModel): | |
technologies: List[Technology] | |
constraints: Dict[str, str] | |
class InputPriorArtProblem(BaseModel): | |
technologies: List[Technology] | |
problem: str | |
# This schema defines the root structure of the JSON | |
class TechnologyData(BaseModel): | |
"""Represents the top-level object containing a list of technologies.""" | |
technologies: List[Technology] | |
async def process(data: InputProblem): | |
result= process_input(data, global_tech, global_tech_embeddings, "problem") | |
return {"technologies": result} | |
async def process_constraints(constraints: InputConstraints): | |
result= process_input(constraints.constraints, global_tech, global_tech_embeddings, "constraints") | |
return {"technologies": result} | |
async def prior_art_constraints(data: InputPriorArtConstraints): | |
prior_art = process_prior_art(data.technologies, data.constraints, "constraints", "pydantic") | |
print(prior_art) | |
return prior_art | |
async def prior_art_problems(data: InputPriorArtProblem): | |
prior_art = process_prior_art(data.technologies, data.problems, "problem", "pydantic") | |
return prior_art | |
def make_json_serializable(data): | |
if isinstance(data, dict): | |
return {k: make_json_serializable(v) for k, v in data.items()} | |
elif isinstance(data, list): | |
return [make_json_serializable(item) for item in data] | |
elif isinstance(data, tuple): | |
return tuple(make_json_serializable(item) for item in data) | |
elif hasattr(data, 'item'): | |
return float(data.item()) | |
else: | |
return data | |
def format_constraints_html(constraints: dict) -> str: | |
html_content = "<div class='constraints-container'>" | |
for title, description in constraints.items(): | |
html_content += f""" | |
<div class='constraint-item'> | |
<p><span class='constraint-title'>{title}:</span> <span class='constraint-description'>{description}</span></p> | |
</div> | |
""" | |
html_content += "</div>" | |
return "<h1>Retrieved Constraints</h1>" + html_content | |
def format_best_combinations_html(combinations_data: list) -> str: | |
html_content = "<div class='combinations-outer-container'>" | |
for i, combination in enumerate(combinations_data): | |
problem_title = combination.get("problem", {}).get("title", f"Problem {i+1}") | |
technologies = combination.get("technologies", []) | |
html_content += f""" | |
<div class='problem-card'> | |
<h3 class='problem-card-title'>{problem_title}</h3> | |
<div class='technologies-inner-container'> | |
""" | |
for tech_info_score in technologies: | |
tech_info = tech_info_score[0] | |
if isinstance(tech_info, dict): | |
html_content += f""" | |
<div class='technology-card'> | |
<h4 class='tech-card-title'>{tech_info.get('title', 'N/A')}</h4> | |
<p><strong>Purpose:</strong> {tech_info.get('purpose', 'N/A')}</p> | |
<p><strong>Components:</strong> {tech_info.get('key_components', 'N/A')}</p> | |
<p><strong>Advantages:</strong> {tech_info.get('advantages', 'N/A')}</p> | |
<p><strong>Limitations:</strong> {tech_info.get('limitations', 'N/A')}</p> | |
</div> | |
""" | |
html_content += """ | |
</div> | |
</div> | |
""" | |
html_content += "</div>" | |
return "<h1>The 5 Best Technology Combinations per constraint</h1>" + html_content | |
def format_final_technologies_html(technologies_list: list) -> str: | |
html_content = "<div class='final-tech-container'>" | |
for tech_info in technologies_list: | |
if isinstance(tech_info, dict): | |
html_content += f""" | |
<div class='final-tech-card'> | |
<h4 class='final-tech-title'>{tech_info.get('title', 'N/A')}</h4> | |
<p><strong>Purpose:</strong> {tech_info.get('purpose', 'N/A')}</p> | |
<p><strong>Components:</strong> {tech_info.get('key_components', 'N/A')}</p> | |
<p><strong>Advantages:</strong> {tech_info.get('advantages', 'N/A')}</p> | |
<p><strong>Limitations:</strong> {tech_info.get('limitations', 'N/A')}</p> | |
</div> | |
""" | |
html_content += "</div>" | |
return "<h1>The best technologies combinations </h1>" + html_content | |
def format_prior_art_html(prior_art_data: dict) -> str: | |
if not prior_art_data or 'content' not in prior_art_data: | |
return "<div class='prior-art-container'><p>No prior art data available.</p></div>" | |
content = prior_art_data['content'] | |
uris = prior_art_data.get('uris', []) | |
# 1. Convert **text** to <strong>text</strong> | |
processed_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content) | |
# 2. Convert [x](uri) to clickable links | |
# This regex handles cases where [x] is followed by (uri) | |
# It captures the number (group 1) and the URI (group 2) | |
processed_content = re.sub(r'\[(\d+)\]\((https?:\/\/[^\s\)]+)\)', r'<a href="\2" target="_blank" class="prior-art-inline-link">\1</a>', processed_content) | |
# Split content into initial summary and then document sections | |
sections = processed_content.split("Here are the documents found and the technologies used within them:\n\n") | |
summary_html = "" | |
documents_html = "" | |
# Process summary part (the first part of the split) | |
if len(sections) > 0: | |
summary_lines = sections[0].strip().split('\n') | |
summary_html += " <div class='prior-art-summary'>\n" | |
for line in summary_lines: | |
if line.strip().startswith('*'): | |
# For bullet points, specially format bold text | |
# The bolding for **text** is already handled by re.sub | |
parts = line.split(':', 1) | |
if len(parts) > 1: | |
summary_html += f" <p class='summary-bullet'><strong>{parts[0].replace('*', '').strip()}:</strong> {parts[1].strip()}</p>\n" | |
else: | |
summary_html += f" <p class='summary-bullet'>{line.replace('*', '').strip()}</p>\n" | |
elif line.strip(): | |
summary_html += f" <p>{line.strip()}</p>\n" | |
summary_html += " </div>\n" | |
# Process documents part (the second part of the split) | |
if len(sections) > 1: | |
documents_raw = sections[1].strip() | |
# Split by "number. **" to get individual document entries reliably | |
document_entries = re.split(r'(\d+\.\s*\*\*.*?\*\*)', documents_raw) | |
parsed_docs = [] | |
for i in range(1, len(document_entries), 2): | |
title_line = document_entries[i].strip() | |
content_block = document_entries[i+1].strip() if i+1 < len(document_entries) else "" | |
parsed_docs.append({'title_line': title_line, 'content_block': content_block}) | |
documents_html += " <div class='prior-art-documents'>\n" | |
for doc in parsed_docs: | |
doc_number_title = doc['title_line'] | |
doc_content_lines = [l.strip() for l in doc['content_block'].split('\n') if l.strip()] | |
doc_description = "" | |
tech_used_section = [] | |
desc_start_idx = -1 | |
tech_start_idx = -1 | |
for idx, line in enumerate(doc_content_lines): | |
if line.startswith("Description:"): | |
desc_start_idx = idx | |
elif line.startswith("Technologies Used:"): | |
tech_start_idx = idx | |
if desc_start_idx != -1: | |
desc_end_idx = tech_start_idx if tech_start_idx != -1 else len(doc_content_lines) | |
doc_description = " ".join(doc_content_lines[desc_start_idx:desc_end_idx]).replace("Description:", "").strip() | |
if tech_start_idx != -1: | |
tech_used_section = [l.replace('*', '').strip() for l in doc_content_lines[tech_start_idx:] if l.strip().startswith('*')] | |
documents_html += f"""\ | |
<div class='prior-art-document-card'> | |
<h4 class='document-title'>{doc_number_title}</h4> | |
<p class='document-description'><strong>Description:</strong> {doc_description}</p>\n""" | |
if tech_used_section: | |
documents_html += " <div class='document-technologies'>\n" | |
documents_html += " <h5>Technologies Used:</h5>\n <ul>\n" | |
for tech_item in tech_used_section: | |
if tech_item.strip(): | |
tech_parts = tech_item.split(':', 1) | |
if len(tech_parts) > 1: | |
documents_html += f" <li><strong>{tech_parts[0].strip()}:</strong> {tech_parts[1].strip()}</li>\n" | |
else: | |
documents_html += f" <li>{tech_item.strip()}</li>\n" | |
documents_html += " </ul>\n </div>\n" | |
documents_html += " </div>\n" | |
documents_html += " </div>\n" | |
# Grouped URLs at the end | |
grouped_uris_html = "" | |
if uris: | |
grouped_uris_html += " <div class='grouped-uris-section'>\n" | |
grouped_uris_html += " <hr class='disruptive-line'>\n" # Disruptive line | |
grouped_uris_html += " <h3>Referenced Documents (URIs):</h3>\n" | |
grouped_uris_html += " <ul>\n" | |
for idx, uri in enumerate(uris): | |
grouped_uris_html += f" <li>{idx + 1}. <a href='{uri}' target='_blank' class='prior-art-grouped-link'>Document {idx + 1} Link</a></li>\n" | |
grouped_uris_html += " </ul>\n </div>\n" | |
return f"<div class='prior-art-container'>\n{summary_html}{documents_html}{grouped_uris_html}</div>" | |
def gradio_prior_art(best_technologies, constraints): | |
prior_art = process_prior_art(best_technologies, constraints, "constraints", "dict") | |
html_prior_art = format_prior_art_html(prior_art) | |
print(html_prior_art) | |
return html_prior_art | |
def process_input_gradio(problem_description: str): | |
""" | |
Processes the input problem description step-by-step for Gradio. | |
Returns all intermediate results. | |
""" | |
# Step 1: Set Prompt | |
prompt = set_prompt(problem_description) | |
# Step 2: Retrieve Constraints | |
constraints = retrieve_constraints(prompt) | |
# Step 3: Stem Constraints | |
constraints_stemmed = stem(constraints, "constraints") | |
save_dataframe(pd.DataFrame({"stemmed_constraints": constraints_stemmed}), "constraints_stemmed.xlsx") | |
print(constraints_stemmed) | |
# Step 4: Global Tech (already loaded, just acknowledge) | |
# save_dataframe(global_tech_df, "global_tech.xlsx") # This is already done implicitly by loading | |
# Step 5: Get Contrastive Similarities | |
result_similarities, matrix = get_contrastive_similarities( | |
constraints_stemmed, global_tech, global_tech_embeddings | |
) | |
save_to_pickle(result_similarities) | |
# Step 6: Find Best List Combinations | |
best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix) | |
# Step 7: Select Technologies | |
best_technologies_id = select_technologies(best_combinations) | |
# Step 8: Get Technologies by ID | |
best_technologies = get_technologies_by_id(best_technologies_id, global_tech) | |
# Format outputs for Gradio | |
# For Constraints: | |
constraints_html = format_constraints_html(constraints) | |
# For Best Combinations: | |
best_combinations_html = format_best_combinations_html(best_combinations) | |
# For Final Technologies: | |
final_technologies_html = format_final_technologies_html(best_technologies) | |
return ( | |
prompt, | |
constraints_html, # Output HTML for constraints | |
best_combinations_html, # Output HTML for best combinations | |
", ".join(map(str, best_technologies_id)), # Still a simple text list | |
final_technologies_html, # Output HTML for final technologies | |
{"technologies": best_technologies}, # `best_technologies` is the actual list of dicts | |
constraints | |
) | |
# Return a gr.update object to change the value and visibility in one step | |
# return gr.update(value=html_prior_art, visible=True) | |
# --- Gradio Interface Setup --- | |
input_problem = gr.Textbox( | |
label="Enter Problem Description", | |
placeholder="e.g., Develop a secure and scalable e-commerce platform with real-time analytics." | |
) | |
output_prompt = gr.Textbox(label="1. Generated Prompt", interactive=False) | |
output_constraints = gr.HTML(label="2. Retrieved Constraints") # Changed to HTML | |
output_best_combinations = gr.HTML(label="7. Best Technology Combinations Found") # Changed to HTML | |
output_selected_ids = gr.Textbox(label="8. Selected Technology IDs", interactive=False) | |
output_final_technologies = gr.HTML(label="9. Final Best Technologies") # Changed to HTML | |
output_prior_art = gr.HTML(label="10. Prior Art Analysis") # Initially hidden | |
stock_technologies = gr.JSON(visible=False) | |
stock_constraints = gr.JSON(visible=False) | |
with gr.Blocks( | |
theme=gr.themes.Soft(), | |
css=custom_css | |
) as gradio_app_blocks: | |
gr.Markdown("# Insight Finder: Step-by-Step Technology Selection") | |
gr.Markdown("## Enter a problem description to see how relevant technologies are identified through various processing steps.") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
input_problem.render() | |
with gr.Column(scale=1): | |
gr.Markdown("Click to start the analysis:"), | |
process_button = gr.Button("Process Problem", elem_id="process_button") | |
gr.Markdown("---") | |
gr.Markdown("### Processing Steps & Results:") | |
with gr.Row(): | |
with gr.Column(): | |
output_prompt.render() | |
output_constraints.render() | |
with gr.Column(): | |
output_selected_ids.render() | |
output_best_combinations.render() | |
output_final_technologies.render() | |
gr.Markdown("---") | |
gr.Markdown("### Prior Art Analysis") | |
prior_art_button = gr.Button("Find Prior Art", elem_id="prior_art_button") | |
output_prior_art.render() | |
stock_technologies.render() | |
stock_constraints.render() | |
process_button.click( | |
fn=process_input_gradio, | |
inputs=input_problem, | |
outputs=[ | |
output_prompt, | |
output_constraints, | |
output_best_combinations, | |
output_selected_ids, | |
output_final_technologies, | |
stock_technologies, | |
stock_constraints | |
] | |
) | |
prior_art_button.click( | |
fn=gradio_prior_art, | |
inputs=[stock_technologies, stock_constraints], | |
outputs=output_prior_art | |
) | |
gr.mount_gradio_app(app, gradio_app_blocks, path="/gradio") | |
#if __name__ == "__main__": | |
# gradio_app_blocks.launch() |