Spaces:
Runtime error
Runtime error
File size: 5,875 Bytes
39838a2 53a648d 39838a2 53a648d 39838a2 6d79ec9 39838a2 6d79ec9 39838a2 6d79ec9 53a648d 6d79ec9 53a648d 6d79ec9 53a648d a619283 53a648d a619283 39838a2 53a648d 39838a2 a619283 53a648d 39838a2 a619283 53a648d e34d0c9 53a648d 39838a2 6d79ec9 39838a2 6d79ec9 53a648d 6d79ec9 39838a2 53a648d 39838a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import gradio as gr
from llama_cpp import Llama
import logging
import os
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
SYSTEM_INSTRUCTION = """Convert natural language queries into boolean search queries by following these rules:
1. FIRST: Remove all meta-terms from this list (they should NEVER appear in output):
- articles, papers, research, studies
- examining, investigating, analyzing
- findings, documents, literature
- publications, journals, reviews
Example: "Research examining X" β just "X"
2. SECOND: Remove generic implied terms that don't add search value:
- Remove words like "practices," "techniques," "methods," "approaches," "strategies"
- Remove words like "impacts," "effects," "influences," "role," "applications"
- For example: "sustainable agriculture practices" β "sustainable agriculture"
- For example: "teaching methodologies" β "teaching"
- For example: "leadership styles" β "leadership"
3. THEN: Format the remaining terms:
CRITICAL QUOTING RULES:
- Multi-word phrases MUST ALWAYS be in quotes - NO EXCEPTIONS
- Examples of correct quoting:
- Wrong: machine learning AND deep learning
- Right: "machine learning" AND "deep learning"
- Wrong: natural language processing
- Right: "natural language processing"
- Single words must NEVER have quotes (e.g., science, research, learning)
- Use AND to connect required concepts
- Use OR with parentheses for alternatives"""
def load_model():
"""Load the model."""
logger.info("Loading model...")
model = Llama.from_pretrained(
repo_id="Zwounds/boolean-search-model",
filename="boolean.gguf",
)
logger.info("Model loaded successfully")
return model
def extract_response(output: str) -> str:
"""Extract the response part from the output."""
if not output:
return ""
# Return the generated text, trimming any system prompts
return output.strip()
def get_boolean_query(query: str, model=None) -> str:
"""Generate boolean query from natural language."""
# Format the conversation
prompt = f"""<|im_start|>system
{SYSTEM_INSTRUCTION}<|im_end|>
<|im_start|>user
{query}<|im_end|>
<|im_start|>assistant
"""
# Generate response
output = model.create_completion(
prompt,
max_tokens=64,
stop=["<|im_end|>"],
echo=False,
temperature=0.0
)
return extract_response(output['choices'][0]['text'])
# Example queries demonstrating various cases
examples = [
# Testing removal of meta-terms
["Find research papers examining the long-term effects of meditation on brain structure"],
# Testing removal of generic implied terms (practices, techniques, methods)
["Articles about deep learning techniques for natural language processing tasks"],
# Testing removal of impact/effect terms
["Studies on the impact of early childhood nutrition on cognitive development"],
# Testing handling of technology applications
["Information on virtual reality applications in architectural design and urban planning"],
# Testing proper OR relationship with parentheses
["Research on electric vehicles adoption in urban environments or rural communities"],
# Testing proper quoting of multi-word concepts only
["Articles on biodiversity loss in coral reefs and rainforest ecosystems"],
# Testing removal of strategy/approach terms
["Studies about different teaching approaches for children with learning disabilities"],
# Testing complex OR relationships
["Research examining social media influence on political polarization or public discourse"],
# Testing implied terms in specific industries
["Articles about implementation strategies for blockchain in supply chain management or financial services"],
# Testing qualifiers that don't add search value
["Research on effective leadership styles in multicultural organizations"],
# Testing removal of multiple implied terms
["Studies on the effects of microplastic pollution techniques on marine ecosystem health"],
# Testing domain-specific implied terms
["Articles about successful cybersecurity protection methods for critical infrastructure"],
# Testing generalized vs specific concepts
["Research papers on quantum computing algorithms for cryptography or optimization problems"],
# Testing implied terms in outcome descriptions
["Studies examining the relationship between sleep quality and academic performance outcomes"],
# Testing complex nesting of concepts
["Articles about renewable energy integration challenges in developing countries or island nations"]
]
# Load model globally
logger.info("Initializing model...")
model = load_model()
# Create Gradio interface
title = "Natural Language to Boolean Search"
description = """Convert natural language queries into boolean search expressions. The model will:
1. Remove search-related terms (like 'articles', 'research', etc.)
2. Handle generic implied terms (like 'practices', 'methods')
3. Format concepts using proper boolean syntax:
- Multi-word phrases in quotes
- Single words without quotes
- AND to connect required concepts
- OR with parentheses for alternatives
"""
demo = gr.Interface(
fn=lambda x: get_boolean_query(x, model),
inputs=[
gr.Textbox(
label="Enter your natural language query",
placeholder="e.g., I'm looking for information about climate change and renewable energy"
)
],
outputs=gr.Textbox(label="Boolean Search Query"),
title=title,
description=description,
examples=examples,
theme=gr.themes.Soft()
)
if __name__ == "__main__":
demo.launch()
|