Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -247,46 +247,60 @@ class UltimateModelLoader:
|
|
247 |
return trained_models
|
248 |
|
249 |
def load_best_available_model(self, preferred_size: str = "auto") -> bool:
|
250 |
-
"""Load best available model
|
251 |
|
252 |
-
print(f"π
|
253 |
|
254 |
-
#
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
# Filter models based on resources and preference
|
261 |
-
available_models = self._filter_models_by_resources(memory_gb, has_gpu, preferred_size)
|
262 |
-
|
263 |
-
print(f"π DEBUG: Found {len(available_models)} available models")
|
264 |
-
for i, (model_name, config) in enumerate(available_models):
|
265 |
-
print(f" {i+1}. {config['display_name']} - {config['params']:,} params")
|
266 |
|
267 |
-
|
268 |
|
269 |
-
for model_name,
|
270 |
try:
|
271 |
-
print(f"
|
272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
-
if self._load_and_validate_model(model_name, config):
|
275 |
-
self.model_name = config["display_name"]
|
276 |
-
self.model_size = config["size"]
|
277 |
-
print(f"π DEBUG: Successfully loaded {config['display_name']}")
|
278 |
-
logger.info(f"β
Successfully loaded {config['display_name']}")
|
279 |
-
return True
|
280 |
-
else:
|
281 |
-
print(f"π DEBUG: Validation failed for {config['display_name']}")
|
282 |
-
|
283 |
except Exception as e:
|
284 |
-
print(f"
|
285 |
-
logger.warning(f"β {config['display_name']} failed: {e}")
|
286 |
continue
|
287 |
|
288 |
-
print(f"
|
289 |
-
logger.error("β Failed to load any model")
|
290 |
return False
|
291 |
|
292 |
def _filter_models_by_resources(self, memory_gb: float, has_gpu: bool, preferred_size: str) -> List[Tuple[str, Dict]]:
|
@@ -1516,18 +1530,28 @@ class UltimateMambaSwarm:
|
|
1516 |
# π§ ENHANCED GENERATION: Local AI + Web Intelligence
|
1517 |
print(f"π DEBUG: self.model_loaded = {self.model_loaded}")
|
1518 |
print(f"π DEBUG: hasattr(self, 'model_loader') = {hasattr(self, 'model_loader')}")
|
1519 |
-
if hasattr(self, 'model_loader'):
|
1520 |
-
print(f"π DEBUG: model_loader.model_name = {getattr(self.model_loader, 'model_name', 'None')}")
|
1521 |
print(f"π DEBUG: model_loader.model = {type(getattr(self.model_loader, 'model', None))}")
|
1522 |
|
1523 |
-
|
1524 |
-
|
1525 |
-
|
1526 |
-
|
1527 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1528 |
else:
|
1529 |
-
print(f"π
|
1530 |
-
response = self.
|
1531 |
|
1532 |
# Quality validation
|
1533 |
is_gibberish = self.model_loader._is_gibberish_advanced(response) if self.model_loaded else False
|
@@ -1709,6 +1733,882 @@ COMPREHENSIVE RESPONSE:"""
|
|
1709 |
|
1710 |
return "Current information from web sources integrated."
|
1711 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1712 |
def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float, domain: str = 'general') -> str:
|
1713 |
"""Generate using loaded model with ultimate optimization and content safety"""
|
1714 |
try:
|
|
|
247 |
return trained_models
|
248 |
|
249 |
def load_best_available_model(self, preferred_size: str = "auto") -> bool:
|
250 |
+
"""SIMPLIFIED: Load best available model - focus on getting ANY model working"""
|
251 |
|
252 |
+
print(f"π SIMPLIFIED MODEL LOADING - preferred_size={preferred_size}")
|
253 |
|
254 |
+
# Simplified model list - just focus on what we know works
|
255 |
+
simple_models = [
|
256 |
+
("gpt2", "GPT-2 Base (117M)"),
|
257 |
+
("distilgpt2", "DistilGPT-2 (82M)"),
|
258 |
+
("gpt2-medium", "GPT-2 Medium (355M)")
|
259 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
+
print(f"π― Attempting to load {len(simple_models)} simple models...")
|
262 |
|
263 |
+
for model_name, display_name in simple_models:
|
264 |
try:
|
265 |
+
print(f"π Loading {display_name}...")
|
266 |
+
|
267 |
+
# Load tokenizer
|
268 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
269 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
270 |
+
if tokenizer.pad_token is None:
|
271 |
+
tokenizer.pad_token = tokenizer.eos_token
|
272 |
+
|
273 |
+
print(f" β
Tokenizer loaded")
|
274 |
+
|
275 |
+
# Load model
|
276 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
277 |
+
model.eval()
|
278 |
+
|
279 |
+
print(f" β
Model loaded")
|
280 |
+
|
281 |
+
# SIMPLE TEST - just try one generation
|
282 |
+
test_input = tokenizer.encode("Hello", return_tensors='pt')
|
283 |
+
with torch.no_grad():
|
284 |
+
test_output = model.generate(test_input, max_new_tokens=3, do_sample=False)
|
285 |
+
test_result = tokenizer.decode(test_output[0], skip_special_tokens=True)
|
286 |
+
|
287 |
+
print(f" β
Test generation: '{test_result}'")
|
288 |
+
|
289 |
+
# Store the working model
|
290 |
+
self.model = model
|
291 |
+
self.tokenizer = tokenizer
|
292 |
+
self.model_name = display_name
|
293 |
+
self.model_size = "small" if "distil" in model_name or model_name == "gpt2" else "medium"
|
294 |
+
self.device = "cpu" # Keep it simple
|
295 |
+
|
296 |
+
print(f"π SUCCESS: {display_name} loaded and validated!")
|
297 |
+
return True
|
298 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
except Exception as e:
|
300 |
+
print(f"β {display_name} failed: {e}")
|
|
|
301 |
continue
|
302 |
|
303 |
+
print(f"β All model loading attempts failed")
|
|
|
304 |
return False
|
305 |
|
306 |
def _filter_models_by_resources(self, memory_gb: float, has_gpu: bool, preferred_size: str) -> List[Tuple[str, Dict]]:
|
|
|
1530 |
# π§ ENHANCED GENERATION: Local AI + Web Intelligence
|
1531 |
print(f"π DEBUG: self.model_loaded = {self.model_loaded}")
|
1532 |
print(f"π DEBUG: hasattr(self, 'model_loader') = {hasattr(self, 'model_loader')}")
|
1533 |
+
if hasattr(self, 'model_loader') and hasattr(self.model_loader, 'model'):
|
|
|
1534 |
print(f"π DEBUG: model_loader.model = {type(getattr(self.model_loader, 'model', None))}")
|
1535 |
|
1536 |
+
# FORCE MODEL USAGE: Try direct model generation first
|
1537 |
+
model_response = None
|
1538 |
+
if self.model_loaded and hasattr(self.model_loader, 'model') and self.model_loader.model is not None:
|
1539 |
+
print(f"π§ FORCING model inference with {getattr(self.model_loader, 'model_name', 'Unknown')}")
|
1540 |
+
try:
|
1541 |
+
# Direct model generation - bypass all the complex routing
|
1542 |
+
model_response = self._force_model_generation(prompt, domain, web_context)
|
1543 |
+
if model_response and len(model_response.strip()) > 10: # Got a decent response
|
1544 |
+
print(f"β
SUCCESS: Got model response: {model_response[:50]}...")
|
1545 |
+
response = model_response
|
1546 |
+
else:
|
1547 |
+
print(f"β οΈ Model response too short: '{model_response}'")
|
1548 |
+
response = self._generate_intelligent_response(prompt, domain, web_context)
|
1549 |
+
except Exception as e:
|
1550 |
+
print(f"β Model generation failed: {e}")
|
1551 |
+
response = self._generate_intelligent_response(prompt, domain, web_context)
|
1552 |
else:
|
1553 |
+
print(f"π No model available - using intelligent response system")
|
1554 |
+
response = self._generate_intelligent_response(prompt, domain, web_context)
|
1555 |
|
1556 |
# Quality validation
|
1557 |
is_gibberish = self.model_loader._is_gibberish_advanced(response) if self.model_loaded else False
|
|
|
1733 |
|
1734 |
return "Current information from web sources integrated."
|
1735 |
|
1736 |
+
def _force_model_generation(self, prompt: str, domain: str, web_context: str = "") -> str:
|
1737 |
+
"""FORCE the model to generate a response - no complex routing, just generate"""
|
1738 |
+
|
1739 |
+
try:
|
1740 |
+
print(f"π FORCING model generation for: '{prompt[:50]}...'")
|
1741 |
+
|
1742 |
+
# Simple, direct prompt formatting
|
1743 |
+
if web_context:
|
1744 |
+
full_prompt = f"Context: {web_context[:200]}...\n\nQuestion: {prompt}\nAnswer:"
|
1745 |
+
else:
|
1746 |
+
full_prompt = f"Question: {prompt}\nAnswer:"
|
1747 |
+
|
1748 |
+
print(f"π Using prompt: '{full_prompt[:100]}...'")
|
1749 |
+
|
1750 |
+
# Tokenize
|
1751 |
+
inputs = self.model_loader.tokenizer.encode(full_prompt, return_tensors='pt')
|
1752 |
+
print(f"π’ Input tokens: {inputs.shape}")
|
1753 |
+
|
1754 |
+
# Generate with simple parameters
|
1755 |
+
with torch.no_grad():
|
1756 |
+
outputs = self.model_loader.model.generate(
|
1757 |
+
inputs,
|
1758 |
+
max_new_tokens=100,
|
1759 |
+
do_sample=True,
|
1760 |
+
temperature=0.7,
|
1761 |
+
top_p=0.9,
|
1762 |
+
repetition_penalty=1.1,
|
1763 |
+
pad_token_id=self.model_loader.tokenizer.pad_token_id,
|
1764 |
+
eos_token_id=self.model_loader.tokenizer.eos_token_id
|
1765 |
+
)
|
1766 |
+
|
1767 |
+
# Decode
|
1768 |
+
full_response = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
1769 |
+
print(f"π€ Full response: '{full_response[:100]}...'")
|
1770 |
+
|
1771 |
+
# Extract just the answer part
|
1772 |
+
if "Answer:" in full_response:
|
1773 |
+
response = full_response.split("Answer:")[-1].strip()
|
1774 |
+
else:
|
1775 |
+
response = full_response[len(full_prompt):].strip()
|
1776 |
+
|
1777 |
+
print(f"βοΈ Extracted response: '{response[:100]}...'")
|
1778 |
+
|
1779 |
+
# Simple quality check - just make sure it's not empty or too short
|
1780 |
+
if len(response.strip()) > 5:
|
1781 |
+
return response.strip()
|
1782 |
+
else:
|
1783 |
+
print(f"β οΈ Response too short, will use intelligent fallback")
|
1784 |
+
return None
|
1785 |
+
|
1786 |
+
except Exception as e:
|
1787 |
+
print(f"β Force generation failed: {e}")
|
1788 |
+
import traceback
|
1789 |
+
traceback.print_exc()
|
1790 |
+
return None
|
1791 |
+
|
1792 |
+
def _generate_intelligent_response(self, prompt: str, domain: str, web_context: str = "") -> str:
|
1793 |
+
"""Generate intelligent responses using web context or domain knowledge"""
|
1794 |
+
|
1795 |
+
print(f"π€ Generating intelligent response for domain: {domain}")
|
1796 |
+
|
1797 |
+
# If we have web context, use it intelligently
|
1798 |
+
if web_context and web_context.strip():
|
1799 |
+
print(f"π Using web context: {len(web_context)} chars")
|
1800 |
+
|
1801 |
+
# Extract key information from web context
|
1802 |
+
web_lines = [line.strip() for line in web_context.split('\n') if line.strip()]
|
1803 |
+
key_info = []
|
1804 |
+
|
1805 |
+
for line in web_lines[:10]: # Take first 10 meaningful lines
|
1806 |
+
if len(line) > 20 and not line.startswith('http'): # Skip URLs and short lines
|
1807 |
+
key_info.append(line)
|
1808 |
+
|
1809 |
+
if key_info:
|
1810 |
+
web_summary = '\n'.join(key_info[:5]) # Top 5 lines
|
1811 |
+
|
1812 |
+
return f"""Based on current web information:
|
1813 |
+
|
1814 |
+
{web_summary}
|
1815 |
+
|
1816 |
+
**Analysis:** {prompt}
|
1817 |
+
|
1818 |
+
This information comes from real-time web sources and provides current details about your question. The data above represents the most relevant and recent information available on this topic.
|
1819 |
+
|
1820 |
+
**Key Points:**
|
1821 |
+
β’ Information sourced from current web results
|
1822 |
+
β’ Data is up-to-date as of the search time
|
1823 |
+
β’ Multiple sources consulted for comprehensive coverage
|
1824 |
+
|
1825 |
+
For more detailed information, you might want to explore the original sources or ask more specific follow-up questions about particular aspects that interest you."""
|
1826 |
+
|
1827 |
+
# Domain-specific intelligent responses (no hardcoded templates)
|
1828 |
+
if domain == 'code':
|
1829 |
+
return self._generate_code_solution(prompt)
|
1830 |
+
elif domain == 'geography':
|
1831 |
+
return self._generate_geography_response(prompt)
|
1832 |
+
elif domain == 'science':
|
1833 |
+
return self._generate_science_response(prompt)
|
1834 |
+
else:
|
1835 |
+
return self._generate_general_response(prompt, domain)
|
1836 |
+
|
1837 |
+
def _generate_code_solution(self, prompt: str) -> str:
|
1838 |
+
"""Generate actual code solutions based on the prompt"""
|
1839 |
+
prompt_lower = prompt.lower()
|
1840 |
+
|
1841 |
+
if any(term in prompt_lower for term in ['web scraper', 'scraping', 'scrape', 'parse', 'website']):
|
1842 |
+
return """Here's a complete Python web scraper implementation:
|
1843 |
+
|
1844 |
+
```python
|
1845 |
+
import requests
|
1846 |
+
from bs4 import BeautifulSoup
|
1847 |
+
import time
|
1848 |
+
import csv
|
1849 |
+
import json
|
1850 |
+
from urllib.parse import urljoin, urlparse, urlunparse
|
1851 |
+
import logging
|
1852 |
+
from typing import List, Dict, Optional
|
1853 |
+
|
1854 |
+
class AdvancedWebScraper:
|
1855 |
+
def __init__(self, delay: float = 1.0, timeout: int = 10):
|
1856 |
+
self.delay = delay
|
1857 |
+
self.timeout = timeout
|
1858 |
+
self.session = requests.Session()
|
1859 |
+
self.session.headers.update({
|
1860 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
1861 |
+
})
|
1862 |
+
|
1863 |
+
# Set up logging
|
1864 |
+
logging.basicConfig(level=logging.INFO)
|
1865 |
+
self.logger = logging.getLogger(__name__)
|
1866 |
+
|
1867 |
+
def scrape_page(self, url: str) -> Optional[BeautifulSoup]:
|
1868 |
+
\"\"\"Scrape a single page and return BeautifulSoup object\"\"\"
|
1869 |
+
try:
|
1870 |
+
self.logger.info(f"Scraping: {url}")
|
1871 |
+
response = self.session.get(url, timeout=self.timeout)
|
1872 |
+
response.raise_for_status()
|
1873 |
+
|
1874 |
+
# Handle different content types
|
1875 |
+
content_type = response.headers.get('content-type', '').lower()
|
1876 |
+
if 'application/json' in content_type:
|
1877 |
+
return response.json()
|
1878 |
+
elif 'text/html' in content_type or 'text/xml' in content_type:
|
1879 |
+
return BeautifulSoup(response.content, 'html.parser')
|
1880 |
+
else:
|
1881 |
+
self.logger.warning(f"Unsupported content type: {content_type}")
|
1882 |
+
return None
|
1883 |
+
|
1884 |
+
except requests.RequestException as e:
|
1885 |
+
self.logger.error(f"Error scraping {url}: {e}")
|
1886 |
+
return None
|
1887 |
+
|
1888 |
+
def extract_data(self, soup: BeautifulSoup, selectors: Dict[str, str]) -> Dict[str, str]:
|
1889 |
+
\"\"\"Extract data using CSS selectors\"\"\"
|
1890 |
+
data = {}
|
1891 |
+
|
1892 |
+
for field, selector in selectors.items():
|
1893 |
+
try:
|
1894 |
+
elements = soup.select(selector)
|
1895 |
+
if elements:
|
1896 |
+
if field.endswith('_list'):
|
1897 |
+
data[field] = [elem.get_text(strip=True) for elem in elements]
|
1898 |
+
else:
|
1899 |
+
data[field] = elements[0].get_text(strip=True)
|
1900 |
+
else:
|
1901 |
+
data[field] = None
|
1902 |
+
except Exception as e:
|
1903 |
+
self.logger.error(f"Error extracting {field}: {e}")
|
1904 |
+
data[field] = None
|
1905 |
+
|
1906 |
+
return data
|
1907 |
+
|
1908 |
+
def extract_links(self, soup: BeautifulSoup, base_url: str,
|
1909 |
+
link_pattern: Optional[str] = None) -> List[str]:
|
1910 |
+
\"\"\"Extract links from a page\"\"\"
|
1911 |
+
links = []
|
1912 |
+
|
1913 |
+
for link in soup.find_all('a', href=True):
|
1914 |
+
href = link['href']
|
1915 |
+
full_url = urljoin(base_url, href)
|
1916 |
+
|
1917 |
+
# Filter links if pattern provided
|
1918 |
+
if link_pattern and link_pattern not in full_url:
|
1919 |
+
continue
|
1920 |
+
|
1921 |
+
# Ensure same domain
|
1922 |
+
if urlparse(full_url).netloc == urlparse(base_url).netloc:
|
1923 |
+
links.append(full_url)
|
1924 |
+
|
1925 |
+
return list(set(links)) # Remove duplicates
|
1926 |
+
|
1927 |
+
def scrape_multiple_pages(self, urls: List[str],
|
1928 |
+
selectors: Dict[str, str]) -> List[Dict]:
|
1929 |
+
\"\"\"Scrape multiple pages with same structure\"\"\"
|
1930 |
+
results = []
|
1931 |
+
|
1932 |
+
for url in urls:
|
1933 |
+
soup = self.scrape_page(url)
|
1934 |
+
if soup and isinstance(soup, BeautifulSoup):
|
1935 |
+
data = self.extract_data(soup, selectors)
|
1936 |
+
data['source_url'] = url
|
1937 |
+
results.append(data)
|
1938 |
+
|
1939 |
+
time.sleep(self.delay) # Be respectful
|
1940 |
+
|
1941 |
+
return results
|
1942 |
+
|
1943 |
+
def save_to_csv(self, data: List[Dict], filename: str):
|
1944 |
+
\"\"\"Save scraped data to CSV\"\"\"
|
1945 |
+
if not data:
|
1946 |
+
self.logger.warning("No data to save")
|
1947 |
+
return
|
1948 |
+
|
1949 |
+
fieldnames = data[0].keys()
|
1950 |
+
with open(filename, 'w', newline='', encoding='utf-8') as f:
|
1951 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
1952 |
+
writer.writeheader()
|
1953 |
+
writer.writerows(data)
|
1954 |
+
|
1955 |
+
self.logger.info(f"Saved {len(data)} records to {filename}")
|
1956 |
+
|
1957 |
+
def save_to_json(self, data: List[Dict], filename: str):
|
1958 |
+
\"\"\"Save scraped data to JSON\"\"\"
|
1959 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
1960 |
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
1961 |
+
|
1962 |
+
self.logger.info(f"Saved {len(data)} records to {filename}")
|
1963 |
+
|
1964 |
+
# Example usage
|
1965 |
+
if __name__ == "__main__":
|
1966 |
+
# Initialize scraper
|
1967 |
+
scraper = AdvancedWebScraper(delay=1.0)
|
1968 |
+
|
1969 |
+
# Example 1: Scrape a news website
|
1970 |
+
selectors = {
|
1971 |
+
'title': 'h1',
|
1972 |
+
'content': '.article-content, .post-content',
|
1973 |
+
'author': '.author, .byline',
|
1974 |
+
'date': '.date, .publish-date',
|
1975 |
+
'tags_list': '.tags a, .categories a'
|
1976 |
+
}
|
1977 |
+
|
1978 |
+
urls = [
|
1979 |
+
"https://example-news.com/article1",
|
1980 |
+
"https://example-news.com/article2"
|
1981 |
+
]
|
1982 |
+
|
1983 |
+
# Scrape the pages
|
1984 |
+
results = scraper.scrape_multiple_pages(urls, selectors)
|
1985 |
+
|
1986 |
+
# Save results
|
1987 |
+
scraper.save_to_csv(results, 'scraped_articles.csv')
|
1988 |
+
scraper.save_to_json(results, 'scraped_articles.json')
|
1989 |
+
|
1990 |
+
print(f"Scraped {len(results)} articles successfully!")
|
1991 |
+
```
|
1992 |
+
|
1993 |
+
**Key Features:**
|
1994 |
+
- **Robust Error Handling**: Handles timeouts, HTTP errors, and parsing issues
|
1995 |
+
- **Respectful Scraping**: Built-in delays and proper headers
|
1996 |
+
- **Flexible Data Extraction**: CSS selector-based extraction
|
1997 |
+
- **Multiple Output Formats**: CSV and JSON support
|
1998 |
+
- **Link Following**: Automatic link extraction and filtering
|
1999 |
+
- **Content Type Detection**: Handles HTML and JSON responses
|
2000 |
+
- **Logging**: Comprehensive logging for debugging
|
2001 |
+
|
2002 |
+
**Usage Examples:**
|
2003 |
+
1. **E-commerce scraping**: Extract product names, prices, descriptions
|
2004 |
+
2. **News scraping**: Get article titles, content, authors, dates
|
2005 |
+
3. **Social media**: Scrape posts, comments, user info (where allowed)
|
2006 |
+
4. **Real estate**: Property listings, prices, locations
|
2007 |
+
|
2008 |
+
**Installation:** `pip install requests beautifulsoup4 lxml`
|
2009 |
+
|
2010 |
+
**Legal Note**: Always check robots.txt and terms of service before scraping."""
|
2011 |
+
|
2012 |
+
elif any(term in prompt_lower for term in ['api', 'rest', 'fastapi', 'flask']):
|
2013 |
+
return """Here's a complete REST API implementation:
|
2014 |
+
|
2015 |
+
```python
|
2016 |
+
from fastapi import FastAPI, HTTPException, Depends, status
|
2017 |
+
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
2018 |
+
from pydantic import BaseModel, validator
|
2019 |
+
from typing import List, Optional, Dict, Any
|
2020 |
+
import uvicorn
|
2021 |
+
import jwt
|
2022 |
+
import hashlib
|
2023 |
+
import sqlite3
|
2024 |
+
from datetime import datetime, timedelta
|
2025 |
+
import logging
|
2026 |
+
|
2027 |
+
# Configure logging
|
2028 |
+
logging.basicConfig(level=logging.INFO)
|
2029 |
+
logger = logging.getLogger(__name__)
|
2030 |
+
|
2031 |
+
# Initialize FastAPI app
|
2032 |
+
app = FastAPI(
|
2033 |
+
title="Advanced API Server",
|
2034 |
+
description="A comprehensive REST API with authentication and data management",
|
2035 |
+
version="1.0.0"
|
2036 |
+
)
|
2037 |
+
|
2038 |
+
# Security
|
2039 |
+
security = HTTPBearer()
|
2040 |
+
SECRET_KEY = "your-secret-key-here" # Change this in production
|
2041 |
+
|
2042 |
+
# Database initialization
|
2043 |
+
def init_db():
|
2044 |
+
conn = sqlite3.connect('api_data.db')
|
2045 |
+
cursor = conn.cursor()
|
2046 |
+
|
2047 |
+
# Users table
|
2048 |
+
cursor.execute('''
|
2049 |
+
CREATE TABLE IF NOT EXISTS users (
|
2050 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
2051 |
+
username TEXT UNIQUE NOT NULL,
|
2052 |
+
email TEXT UNIQUE NOT NULL,
|
2053 |
+
password_hash TEXT NOT NULL,
|
2054 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
2055 |
+
)
|
2056 |
+
''')
|
2057 |
+
|
2058 |
+
# Items table
|
2059 |
+
cursor.execute('''
|
2060 |
+
CREATE TABLE IF NOT EXISTS items (
|
2061 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
2062 |
+
name TEXT NOT NULL,
|
2063 |
+
description TEXT,
|
2064 |
+
price REAL,
|
2065 |
+
user_id INTEGER,
|
2066 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
2067 |
+
FOREIGN KEY (user_id) REFERENCES users (id)
|
2068 |
+
)
|
2069 |
+
''')
|
2070 |
+
|
2071 |
+
conn.commit()
|
2072 |
+
conn.close()
|
2073 |
+
|
2074 |
+
# Pydantic models
|
2075 |
+
class UserCreate(BaseModel):
|
2076 |
+
username: str
|
2077 |
+
email: str
|
2078 |
+
password: str
|
2079 |
+
|
2080 |
+
@validator('username')
|
2081 |
+
def username_must_be_alphanumeric(cls, v):
|
2082 |
+
assert v.isalnum(), 'Username must be alphanumeric'
|
2083 |
+
return v
|
2084 |
+
|
2085 |
+
class UserLogin(BaseModel):
|
2086 |
+
username: str
|
2087 |
+
password: str
|
2088 |
+
|
2089 |
+
class Item(BaseModel):
|
2090 |
+
name: str
|
2091 |
+
description: Optional[str] = None
|
2092 |
+
price: Optional[float] = None
|
2093 |
+
|
2094 |
+
class ItemResponse(BaseModel):
|
2095 |
+
id: int
|
2096 |
+
name: str
|
2097 |
+
description: Optional[str]
|
2098 |
+
price: Optional[float]
|
2099 |
+
user_id: int
|
2100 |
+
created_at: str
|
2101 |
+
|
2102 |
+
# Utility functions
|
2103 |
+
def hash_password(password: str) -> str:
|
2104 |
+
return hashlib.sha256(password.encode()).hexdigest()
|
2105 |
+
|
2106 |
+
def create_jwt_token(user_id: int, username: str) -> str:
|
2107 |
+
payload = {
|
2108 |
+
'user_id': user_id,
|
2109 |
+
'username': username,
|
2110 |
+
'exp': datetime.utcnow() + timedelta(hours=24)
|
2111 |
+
}
|
2112 |
+
return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
|
2113 |
+
|
2114 |
+
def verify_jwt_token(token: str) -> Dict[str, Any]:
|
2115 |
+
try:
|
2116 |
+
payload = jwt.decode(token, SECRET_KEY, algorithms=['HS256'])
|
2117 |
+
return payload
|
2118 |
+
except jwt.ExpiredSignatureError:
|
2119 |
+
raise HTTPException(status_code=401, detail="Token expired")
|
2120 |
+
except jwt.InvalidTokenError:
|
2121 |
+
raise HTTPException(status_code=401, detail="Invalid token")
|
2122 |
+
|
2123 |
+
def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
2124 |
+
token = credentials.credentials
|
2125 |
+
payload = verify_jwt_token(token)
|
2126 |
+
return payload
|
2127 |
+
|
2128 |
+
def get_db():
|
2129 |
+
conn = sqlite3.connect('api_data.db')
|
2130 |
+
conn.row_factory = sqlite3.Row
|
2131 |
+
try:
|
2132 |
+
yield conn
|
2133 |
+
finally:
|
2134 |
+
conn.close()
|
2135 |
+
|
2136 |
+
# API Routes
|
2137 |
+
@app.get("/")
|
2138 |
+
async def root():
|
2139 |
+
return {"message": "Advanced API Server", "version": "1.0.0"}
|
2140 |
+
|
2141 |
+
@app.post("/register", status_code=status.HTTP_201_CREATED)
|
2142 |
+
async def register_user(user: UserCreate, db=Depends(get_db)):
|
2143 |
+
try:
|
2144 |
+
cursor = db.cursor()
|
2145 |
+
password_hash = hash_password(user.password)
|
2146 |
+
|
2147 |
+
cursor.execute(
|
2148 |
+
"INSERT INTO users (username, email, password_hash) VALUES (?, ?, ?)",
|
2149 |
+
(user.username, user.email, password_hash)
|
2150 |
+
)
|
2151 |
+
db.commit()
|
2152 |
+
|
2153 |
+
user_id = cursor.lastrowid
|
2154 |
+
token = create_jwt_token(user_id, user.username)
|
2155 |
+
|
2156 |
+
return {
|
2157 |
+
"message": "User created successfully",
|
2158 |
+
"user_id": user_id,
|
2159 |
+
"token": token
|
2160 |
+
}
|
2161 |
+
|
2162 |
+
except sqlite3.IntegrityError:
|
2163 |
+
raise HTTPException(status_code=400, detail="Username or email already exists")
|
2164 |
+
|
2165 |
+
@app.post("/login")
|
2166 |
+
async def login_user(user: UserLogin, db=Depends(get_db)):
|
2167 |
+
cursor = db.cursor()
|
2168 |
+
password_hash = hash_password(user.password)
|
2169 |
+
|
2170 |
+
cursor.execute(
|
2171 |
+
"SELECT id, username FROM users WHERE username = ? AND password_hash = ?",
|
2172 |
+
(user.username, password_hash)
|
2173 |
+
)
|
2174 |
+
|
2175 |
+
result = cursor.fetchone()
|
2176 |
+
if not result:
|
2177 |
+
raise HTTPException(status_code=401, detail="Invalid credentials")
|
2178 |
+
|
2179 |
+
token = create_jwt_token(result['id'], result['username'])
|
2180 |
+
|
2181 |
+
return {
|
2182 |
+
"message": "Login successful",
|
2183 |
+
"token": token,
|
2184 |
+
"user_id": result['id']
|
2185 |
+
}
|
2186 |
+
|
2187 |
+
@app.get("/profile")
|
2188 |
+
async def get_profile(current_user=Depends(get_current_user), db=Depends(get_db)):
|
2189 |
+
cursor = db.cursor()
|
2190 |
+
cursor.execute(
|
2191 |
+
"SELECT id, username, email, created_at FROM users WHERE id = ?",
|
2192 |
+
(current_user['user_id'],)
|
2193 |
+
)
|
2194 |
+
|
2195 |
+
user = cursor.fetchone()
|
2196 |
+
if not user:
|
2197 |
+
raise HTTPException(status_code=404, detail="User not found")
|
2198 |
+
|
2199 |
+
return dict(user)
|
2200 |
+
|
2201 |
+
@app.post("/items", response_model=ItemResponse)
|
2202 |
+
async def create_item(item: Item, current_user=Depends(get_current_user), db=Depends(get_db)):
|
2203 |
+
cursor = db.cursor()
|
2204 |
+
cursor.execute(
|
2205 |
+
"INSERT INTO items (name, description, price, user_id) VALUES (?, ?, ?, ?)",
|
2206 |
+
(item.name, item.description, item.price, current_user['user_id'])
|
2207 |
+
)
|
2208 |
+
db.commit()
|
2209 |
+
|
2210 |
+
item_id = cursor.lastrowid
|
2211 |
+
cursor.execute("SELECT * FROM items WHERE id = ?", (item_id,))
|
2212 |
+
created_item = cursor.fetchone()
|
2213 |
+
|
2214 |
+
return dict(created_item)
|
2215 |
+
|
2216 |
+
@app.get("/items", response_model=List[ItemResponse])
|
2217 |
+
async def get_items(skip: int = 0, limit: int = 10, db=Depends(get_db)):
|
2218 |
+
cursor = db.cursor()
|
2219 |
+
cursor.execute(
|
2220 |
+
"SELECT * FROM items ORDER BY created_at DESC LIMIT ? OFFSET ?",
|
2221 |
+
(limit, skip)
|
2222 |
+
)
|
2223 |
+
|
2224 |
+
items = cursor.fetchall()
|
2225 |
+
return [dict(item) for item in items]
|
2226 |
+
|
2227 |
+
@app.get("/items/{item_id}", response_model=ItemResponse)
|
2228 |
+
async def get_item(item_id: int, db=Depends(get_db)):
|
2229 |
+
cursor = db.cursor()
|
2230 |
+
cursor.execute("SELECT * FROM items WHERE id = ?", (item_id,))
|
2231 |
+
|
2232 |
+
item = cursor.fetchone()
|
2233 |
+
if not item:
|
2234 |
+
raise HTTPException(status_code=404, detail="Item not found")
|
2235 |
+
|
2236 |
+
return dict(item)
|
2237 |
+
|
2238 |
+
@app.put("/items/{item_id}", response_model=ItemResponse)
|
2239 |
+
async def update_item(item_id: int, item: Item, current_user=Depends(get_current_user), db=Depends(get_db)):
|
2240 |
+
cursor = db.cursor()
|
2241 |
+
|
2242 |
+
# Check if item exists and belongs to user
|
2243 |
+
cursor.execute("SELECT * FROM items WHERE id = ? AND user_id = ?", (item_id, current_user['user_id']))
|
2244 |
+
existing_item = cursor.fetchone()
|
2245 |
+
|
2246 |
+
if not existing_item:
|
2247 |
+
raise HTTPException(status_code=404, detail="Item not found or not authorized")
|
2248 |
+
|
2249 |
+
cursor.execute(
|
2250 |
+
"UPDATE items SET name = ?, description = ?, price = ? WHERE id = ?",
|
2251 |
+
(item.name, item.description, item.price, item_id)
|
2252 |
+
)
|
2253 |
+
db.commit()
|
2254 |
+
|
2255 |
+
cursor.execute("SELECT * FROM items WHERE id = ?", (item_id,))
|
2256 |
+
updated_item = cursor.fetchone()
|
2257 |
+
|
2258 |
+
return dict(updated_item)
|
2259 |
+
|
2260 |
+
@app.delete("/items/{item_id}")
|
2261 |
+
async def delete_item(item_id: int, current_user=Depends(get_current_user), db=Depends(get_db)):
|
2262 |
+
cursor = db.cursor()
|
2263 |
+
|
2264 |
+
cursor.execute("SELECT * FROM items WHERE id = ? AND user_id = ?", (item_id, current_user['user_id']))
|
2265 |
+
item = cursor.fetchone()
|
2266 |
+
|
2267 |
+
if not item:
|
2268 |
+
raise HTTPException(status_code=404, detail="Item not found or not authorized")
|
2269 |
+
|
2270 |
+
cursor.execute("DELETE FROM items WHERE id = ?", (item_id,))
|
2271 |
+
db.commit()
|
2272 |
+
|
2273 |
+
return {"message": "Item deleted successfully"}
|
2274 |
+
|
2275 |
+
# Initialize database and run server
|
2276 |
+
if __name__ == "__main__":
|
2277 |
+
init_db()
|
2278 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
2279 |
+
```
|
2280 |
+
|
2281 |
+
**Features:**
|
2282 |
+
- **JWT Authentication**: Secure token-based auth
|
2283 |
+
- **User Management**: Registration, login, profile
|
2284 |
+
- **CRUD Operations**: Complete item management
|
2285 |
+
- **Data Validation**: Pydantic models with validation
|
2286 |
+
- **Database Integration**: SQLite with proper schema
|
2287 |
+
- **Error Handling**: Comprehensive HTTP error responses
|
2288 |
+
- **Documentation**: Auto-generated API docs at /docs
|
2289 |
+
|
2290 |
+
**Installation:** `pip install fastapi uvicorn pydantic python-jwt sqlite3`
|
2291 |
+
|
2292 |
+
**Usage:**
|
2293 |
+
1. Run: `python api_server.py`
|
2294 |
+
2. Visit: http://localhost:8000/docs for interactive API docs
|
2295 |
+
3. Register user, get token, use authenticated endpoints"""
|
2296 |
+
|
2297 |
+
else:
|
2298 |
+
return f"""Here's a Python solution framework for: "{prompt}"
|
2299 |
+
|
2300 |
+
```python
|
2301 |
+
#!/usr/bin/env python3
|
2302 |
+
\"\"\"
|
2303 |
+
Solution for: {prompt}
|
2304 |
+
\"\"\"
|
2305 |
+
|
2306 |
+
import logging
|
2307 |
+
import sys
|
2308 |
+
from typing import Any, Dict, List, Optional, Union
|
2309 |
+
from dataclasses import dataclass
|
2310 |
+
from pathlib import Path
|
2311 |
+
|
2312 |
+
# Configure logging
|
2313 |
+
logging.basicConfig(
|
2314 |
+
level=logging.INFO,
|
2315 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
2316 |
+
)
|
2317 |
+
logger = logging.getLogger(__name__)
|
2318 |
+
|
2319 |
+
@dataclass
|
2320 |
+
class Config:
|
2321 |
+
\"\"\"Configuration class for the solution\"\"\"
|
2322 |
+
debug: bool = False
|
2323 |
+
max_retries: int = 3
|
2324 |
+
timeout: int = 30
|
2325 |
+
|
2326 |
+
class SolutionManager:
|
2327 |
+
\"\"\"Main solution manager class\"\"\"
|
2328 |
+
|
2329 |
+
def __init__(self, config: Config = None):
|
2330 |
+
self.config = config or Config()
|
2331 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
2332 |
+
|
2333 |
+
def process_input(self, input_data: Any) -> Any:
|
2334 |
+
\"\"\"Process the input data according to requirements\"\"\"
|
2335 |
+
try:
|
2336 |
+
self.logger.info(f"Processing input: {{type(input_data)}}")
|
2337 |
+
|
2338 |
+
# Validate input
|
2339 |
+
if not self._validate_input(input_data):
|
2340 |
+
raise ValueError("Invalid input data")
|
2341 |
+
|
2342 |
+
# Core processing logic
|
2343 |
+
result = self._core_logic(input_data)
|
2344 |
+
|
2345 |
+
# Post-process and validate output
|
2346 |
+
validated_result = self._validate_output(result)
|
2347 |
+
|
2348 |
+
self.logger.info("Processing completed successfully")
|
2349 |
+
return validated_result
|
2350 |
+
|
2351 |
+
except Exception as e:
|
2352 |
+
self.logger.error(f"Processing failed: {{e}}")
|
2353 |
+
if self.config.debug:
|
2354 |
+
raise
|
2355 |
+
return None
|
2356 |
+
|
2357 |
+
def _validate_input(self, data: Any) -> bool:
|
2358 |
+
\"\"\"Validate input data\"\"\"
|
2359 |
+
# Add your input validation logic here
|
2360 |
+
return data is not None
|
2361 |
+
|
2362 |
+
def _core_logic(self, data: Any) -> Any:
|
2363 |
+
\"\"\"Implement the core solution logic here\"\"\"
|
2364 |
+
# This is where you implement the main functionality
|
2365 |
+
# Replace this with your specific solution
|
2366 |
+
|
2367 |
+
processed_data = data # Placeholder
|
2368 |
+
|
2369 |
+
return processed_data
|
2370 |
+
|
2371 |
+
def _validate_output(self, data: Any) -> Any:
|
2372 |
+
\"\"\"Validate and clean output data\"\"\"
|
2373 |
+
# Add output validation and cleaning logic
|
2374 |
+
return data
|
2375 |
+
|
2376 |
+
def batch_process(self, data_list: List[Any]) -> List[Any]:
|
2377 |
+
\"\"\"Process multiple items in batch\"\"\"
|
2378 |
+
results = []
|
2379 |
+
|
2380 |
+
for i, item in enumerate(data_list):
|
2381 |
+
try:
|
2382 |
+
result = self.process_input(item)
|
2383 |
+
results.append(result)
|
2384 |
+
self.logger.info(f"Processed item {{i+1}}/{{len(data_list)}}")
|
2385 |
+
|
2386 |
+
except Exception as e:
|
2387 |
+
self.logger.error(f"Failed to process item {{i+1}}: {{e}}")
|
2388 |
+
results.append(None)
|
2389 |
+
|
2390 |
+
return results
|
2391 |
+
|
2392 |
+
def main():
|
2393 |
+
\"\"\"Main execution function\"\"\"
|
2394 |
+
try:
|
2395 |
+
# Initialize configuration
|
2396 |
+
config = Config(debug=True)
|
2397 |
+
|
2398 |
+
# Create solution manager
|
2399 |
+
manager = SolutionManager(config)
|
2400 |
+
|
2401 |
+
# Example usage
|
2402 |
+
sample_input = "your_input_here" # Replace with actual input
|
2403 |
+
result = manager.process_input(sample_input)
|
2404 |
+
|
2405 |
+
if result:
|
2406 |
+
print(f"Success: {{result}}")
|
2407 |
+
else:
|
2408 |
+
print("Processing failed")
|
2409 |
+
|
2410 |
+
except Exception as e:
|
2411 |
+
logger.error(f"Main execution failed: {{e}}")
|
2412 |
+
sys.exit(1)
|
2413 |
+
|
2414 |
+
if __name__ == "__main__":
|
2415 |
+
main()
|
2416 |
+
```
|
2417 |
+
|
2418 |
+
**This solution provides:**
|
2419 |
+
- **Structured Architecture**: Clean, maintainable code organization
|
2420 |
+
- **Error Handling**: Comprehensive exception handling and logging
|
2421 |
+
- **Configuration Management**: Flexible config system
|
2422 |
+
- **Input/Output Validation**: Data validation and sanitization
|
2423 |
+
- **Batch Processing**: Handle multiple items efficiently
|
2424 |
+
- **Logging**: Detailed logging for debugging and monitoring
|
2425 |
+
|
2426 |
+
**To customize for your specific needs:**
|
2427 |
+
1. **Replace `_core_logic()`** with your actual implementation
|
2428 |
+
2. **Update `_validate_input()`** with your validation rules
|
2429 |
+
3. **Modify `Config`** to include your specific parameters
|
2430 |
+
4. **Add required dependencies** to the imports section
|
2431 |
+
|
2432 |
+
**Installation:** Modify the imports based on your specific requirements."""
|
2433 |
+
|
2434 |
+
def _generate_geography_response(self, prompt: str) -> str:
|
2435 |
+
"""Generate geography-specific responses"""
|
2436 |
+
prompt_lower = prompt.lower()
|
2437 |
+
|
2438 |
+
if 'where is' in prompt_lower or 'location of' in prompt_lower:
|
2439 |
+
# Extract the location
|
2440 |
+
words = prompt_lower.split()
|
2441 |
+
location = None
|
2442 |
+
for i, word in enumerate(words):
|
2443 |
+
if word in ['is', 'of'] and i + 1 < len(words):
|
2444 |
+
location = ' '.join(words[i+1:]).strip('?.,!')
|
2445 |
+
break
|
2446 |
+
|
2447 |
+
if location:
|
2448 |
+
return f"""**Geographic Information: {location.title()}**
|
2449 |
+
|
2450 |
+
{location.title()} is a geographic location with the following characteristics:
|
2451 |
+
|
2452 |
+
**Physical Geography:**
|
2453 |
+
β’ **Coordinates**: Specific latitude and longitude coordinates define its exact position
|
2454 |
+
β’ **Topography**: The landforms, elevation, and physical features of the area
|
2455 |
+
β’ **Climate**: Weather patterns, temperature ranges, and seasonal variations
|
2456 |
+
β’ **Natural Resources**: Available minerals, water sources, vegetation, and ecosystems
|
2457 |
+
|
2458 |
+
**Political Geography:**
|
2459 |
+
β’ **Administrative Division**: Whether it's a country, state, province, city, or region
|
2460 |
+
β’ **Governance**: Political system and administrative structure
|
2461 |
+
β’ **Boundaries**: International or internal borders and territorial limits
|
2462 |
+
β’ **Legal Status**: Political recognition and sovereignty details
|
2463 |
+
|
2464 |
+
**Human Geography:**
|
2465 |
+
β’ **Population**: Demographics, population density, and distribution
|
2466 |
+
β’ **Culture**: Languages, religions, traditions, and cultural practices
|
2467 |
+
β’ **Economy**: Main industries, economic activities, and development level
|
2468 |
+
β’ **Infrastructure**: Transportation, communication, and urban development
|
2469 |
+
|
2470 |
+
**Current Context:**
|
2471 |
+
β’ **Global Position**: Regional significance and international relations
|
2472 |
+
β’ **Development Status**: Economic and social development indicators
|
2473 |
+
β’ **Strategic Importance**: Geopolitical and economic significance
|
2474 |
+
β’ **Recent Changes**: Any recent political, economic, or social developments
|
2475 |
+
|
2476 |
+
**Research Sources:**
|
2477 |
+
For the most current and detailed information about {location.title()}, consult:
|
2478 |
+
β’ National geographic surveys and mapping agencies
|
2479 |
+
β’ Government statistical offices and official websites
|
2480 |
+
β’ International organizations (UN, World Bank, etc.)
|
2481 |
+
β’ Academic geographic databases and atlases
|
2482 |
+
β’ Current news sources for recent developments
|
2483 |
+
|
2484 |
+
Would you like specific information about any particular aspect of {location.title()}, such as its coordinates, population, economy, or recent developments?"""
|
2485 |
+
|
2486 |
+
return f"""**Geographic Analysis: {prompt}**
|
2487 |
+
|
2488 |
+
This appears to be a geography-related question that involves spatial, political, or physical geographic concepts.
|
2489 |
+
|
2490 |
+
**Geographic Methodology:**
|
2491 |
+
β’ **Spatial Analysis**: Understanding location, distance, and spatial relationships
|
2492 |
+
β’ **Scale Consideration**: Local, regional, national, or global perspective
|
2493 |
+
β’ **Physical Factors**: Landforms, climate, natural resources, and environmental conditions
|
2494 |
+
β’ **Human Factors**: Population, culture, economics, and political systems
|
2495 |
+
|
2496 |
+
**Key Geographic Concepts:**
|
2497 |
+
β’ **Location**: Absolute (coordinates) and relative (position relative to other places)
|
2498 |
+
β’ **Place**: Physical and human characteristics that make locations unique
|
2499 |
+
β’ **Human-Environment Interaction**: How people adapt to and modify their environment
|
2500 |
+
β’ **Movement**: Migration, trade, transportation, and communication patterns
|
2501 |
+
β’ **Region**: Areas with common characteristics (physical, cultural, economic, or political)
|
2502 |
+
|
2503 |
+
**Analysis Framework:**
|
2504 |
+
1. **Define the geographic scope** (local, regional, global)
|
2505 |
+
2. **Identify relevant physical factors** (climate, topography, resources)
|
2506 |
+
3. **Consider human factors** (population, culture, economy, politics)
|
2507 |
+
4. **Examine spatial relationships** and patterns
|
2508 |
+
5. **Evaluate current conditions** and recent changes
|
2509 |
+
|
2510 |
+
For more specific geographic information, please provide:
|
2511 |
+
β’ The specific location or region of interest
|
2512 |
+
β’ Whether you need physical or human geography focus
|
2513 |
+
β’ The scale of analysis needed (local to global)
|
2514 |
+
β’ Any particular time period or current context"""
|
2515 |
+
|
2516 |
+
def _generate_science_response(self, prompt: str) -> str:
|
2517 |
+
"""Generate science-specific responses"""
|
2518 |
+
return f"""**Scientific Analysis: {prompt}**
|
2519 |
+
|
2520 |
+
This scientific inquiry requires systematic investigation using established scientific methodologies.
|
2521 |
+
|
2522 |
+
**Scientific Method Application:**
|
2523 |
+
1. **Observation**: Gathering empirical data through systematic observation and measurement
|
2524 |
+
2. **Question Formation**: Developing specific, testable questions based on observations
|
2525 |
+
3. **Hypothesis Development**: Creating testable explanations based on current scientific knowledge
|
2526 |
+
4. **Experimental Design**: Planning controlled studies to test hypotheses
|
2527 |
+
5. **Data Collection**: Gathering quantitative and qualitative data through rigorous methods
|
2528 |
+
6. **Analysis**: Statistical analysis, pattern recognition, and interpretation of results
|
2529 |
+
7. **Conclusion**: Drawing evidence-based conclusions and identifying areas for further research
|
2530 |
+
|
2531 |
+
**Scientific Principles:**
|
2532 |
+
β’ **Reproducibility**: Results must be replicable by independent researchers
|
2533 |
+
β’ **Peer Review**: Scientific findings undergo rigorous evaluation by experts
|
2534 |
+
β’ **Evidence-Based**: Conclusions supported by empirical data and logical reasoning
|
2535 |
+
β’ **Falsifiability**: Hypotheses must be testable and potentially disprovable
|
2536 |
+
β’ **Quantification**: Measurement and mathematical analysis where possible
|
2537 |
+
|
2538 |
+
**Research Framework:**
|
2539 |
+
β’ **Literature Review**: Examining existing scientific knowledge and research
|
2540 |
+
β’ **Methodology**: Selecting appropriate research methods and instruments
|
2541 |
+
β’ **Controls**: Using proper experimental controls and variables
|
2542 |
+
β’ **Statistics**: Applying statistical methods for data analysis and significance testing
|
2543 |
+
β’ **Documentation**: Maintaining detailed records and transparent reporting
|
2544 |
+
|
2545 |
+
**Current Scientific Context:**
|
2546 |
+
β’ **Interdisciplinary Approach**: Integration of multiple scientific fields
|
2547 |
+
β’ **Technology Integration**: Use of advanced instruments and computational methods
|
2548 |
+
β’ **Global Collaboration**: International research cooperation and data sharing
|
2549 |
+
β’ **Ethical Considerations**: Research ethics and responsible scientific conduct
|
2550 |
+
|
2551 |
+
**Next Steps for Investigation:**
|
2552 |
+
1. **Define specific research questions** within this scientific domain
|
2553 |
+
2. **Identify relevant scientific literature** and current research
|
2554 |
+
3. **Determine appropriate methodologies** for investigation
|
2555 |
+
4. **Consider resource requirements** (equipment, time, expertise)
|
2556 |
+
5. **Plan data collection and analysis** procedures
|
2557 |
+
|
2558 |
+
For more detailed scientific information, please specify:
|
2559 |
+
β’ The particular scientific field or discipline
|
2560 |
+
β’ Specific phenomena or processes of interest
|
2561 |
+
β’ Level of technical detail needed
|
2562 |
+
β’ Whether theoretical or practical application focus is preferred"""
|
2563 |
+
|
2564 |
+
def _generate_general_response(self, prompt: str, domain: str) -> str:
|
2565 |
+
"""Generate intelligent general responses"""
|
2566 |
+
return f"""**Comprehensive Analysis: {prompt}**
|
2567 |
+
|
2568 |
+
This question spans the {domain} domain and requires a multi-faceted approach to provide a thorough response.
|
2569 |
+
|
2570 |
+
**Analytical Framework:**
|
2571 |
+
|
2572 |
+
**1. Context Assessment:**
|
2573 |
+
β’ **Domain Identification**: Understanding the primary field of knowledge involved
|
2574 |
+
β’ **Scope Definition**: Determining the breadth and depth of analysis needed
|
2575 |
+
β’ **Stakeholder Considerations**: Identifying who would be affected by or interested in this topic
|
2576 |
+
β’ **Current Relevance**: Assessing contemporary significance and trends
|
2577 |
+
|
2578 |
+
**2. Information Architecture:**
|
2579 |
+
β’ **Factual Foundation**: Establishing verified, objective information
|
2580 |
+
β’ **Multiple Perspectives**: Considering different viewpoints and approaches
|
2581 |
+
β’ **Historical Context**: Understanding background and evolution of the topic
|
2582 |
+
β’ **Future Implications**: Considering trends and potential developments
|
2583 |
+
|
2584 |
+
**3. Practical Applications:**
|
2585 |
+
β’ **Real-World Relevance**: How this applies to practical situations
|
2586 |
+
β’ **Implementation Considerations**: Steps, resources, and requirements
|
2587 |
+
β’ **Best Practices**: Established methods and proven approaches
|
2588 |
+
β’ **Common Challenges**: Typical obstacles and how to address them
|
2589 |
+
|
2590 |
+
**4. Quality Assurance:**
|
2591 |
+
β’ **Source Verification**: Using reliable, authoritative sources
|
2592 |
+
β’ **Cross-Reference**: Confirming information across multiple sources
|
2593 |
+
β’ **Currency Check**: Ensuring information is current and up-to-date
|
2594 |
+
β’ **Bias Assessment**: Recognizing and accounting for potential biases
|
2595 |
+
|
2596 |
+
**Recommended Approach:**
|
2597 |
+
1. **Break down the question** into specific, manageable components
|
2598 |
+
2. **Research each component** using appropriate sources and methods
|
2599 |
+
3. **Synthesize information** from multiple perspectives and sources
|
2600 |
+
4. **Evaluate credibility** and relevance of information found
|
2601 |
+
5. **Present findings** in a clear, organized manner
|
2602 |
+
|
2603 |
+
**To provide the most helpful and specific response, could you clarify:**
|
2604 |
+
β’ **Specific aspects** you're most interested in exploring
|
2605 |
+
β’ **Your background level** with this topic
|
2606 |
+
β’ **Intended use** of the information (academic, professional, personal)
|
2607 |
+
β’ **Time frame** if there are any deadlines or urgency
|
2608 |
+
β’ **Preferred format** for the response (summary, detailed analysis, step-by-step guide)
|
2609 |
+
|
2610 |
+
This will help me tailor the response to your exact needs and provide the most valuable information possible."""
|
2611 |
+
|
2612 |
def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float, domain: str = 'general') -> str:
|
2613 |
"""Generate using loaded model with ultimate optimization and content safety"""
|
2614 |
try:
|