Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| from uuid import uuid4 | |
| from datetime import datetime | |
| # Define the path for the golden dataset | |
| GOLDEN_DATASET_DIR = os.path.join(os.path.dirname(__file__), "test_data") | |
| os.makedirs(GOLDEN_DATASET_DIR, exist_ok=True) | |
| def create_test_case(query, agent_type="believer", context=None): | |
| """Helper function to create a test case with standard structure.""" | |
| return { | |
| "id": str(uuid4()), | |
| "input": { | |
| "query": query, | |
| "agent_type": agent_type, | |
| "context": context | |
| }, | |
| "expected_output": { | |
| "debate_structure": { | |
| "min_turns": 2, | |
| "max_turns": 4, | |
| "required_agents": ["extractor", "believer", "skeptic", "supervisor"] | |
| }, | |
| "transcript_requirements": { | |
| "required_fields": ["id", "podcastScript", "topic"], | |
| "topic_match": True, | |
| "min_script_length": 500 | |
| }, | |
| "podcast_requirements": { | |
| "required_fields": ["content", "audio_file", "title", "description", "category"], | |
| "audio_format": "mp3", | |
| "min_file_size": 1000 | |
| } | |
| } | |
| } | |
| def generate_test_cases(): | |
| """Generate 50 diverse test cases.""" | |
| test_cases = [] | |
| # Technology Topics (10 cases) | |
| tech_topics = [ | |
| ("Are electric vehicles better for the environment?", "believer"), | |
| ("Should artificial intelligence be regulated?", "skeptic"), | |
| ("Is blockchain technology revolutionizing finance?", "believer"), | |
| ("Are smart homes making us too dependent on technology?", "skeptic"), | |
| ("Should social media platforms be responsible for content moderation?", "believer"), | |
| ("Is 5G technology safe for public health?", "skeptic"), | |
| ("Will quantum computing make current encryption obsolete?", "believer"), | |
| ("Should facial recognition be used in public spaces?", "skeptic"), | |
| ("Are autonomous vehicles ready for widespread adoption?", "believer"), | |
| ("Does virtual reality have practical applications beyond gaming?", "skeptic") | |
| ] | |
| # Society and Culture (10 cases) | |
| society_topics = [ | |
| ("Is remote work the future of employment?", "believer"), | |
| ("Should universal basic income be implemented globally?", "skeptic"), | |
| ("Are social media platforms harming mental health?", "believer"), | |
| ("Should voting be mandatory?", "skeptic"), | |
| ("Is cancel culture beneficial for society?", "believer"), | |
| ("Should there be limits on free speech online?", "skeptic"), | |
| ("Are gender quotas effective in achieving equality?", "believer"), | |
| ("Should religious education be part of public schools?", "skeptic"), | |
| ("Is multiculturalism strengthening or weakening societies?", "believer"), | |
| ("Should citizenship be available for purchase?", "skeptic") | |
| ] | |
| # Environment and Sustainability (10 cases) | |
| environment_topics = [ | |
| ("Can renewable energy completely replace fossil fuels?", "believer"), | |
| ("Should nuclear power be part of climate change solution?", "skeptic"), | |
| ("Is carbon pricing effective in reducing emissions?", "believer"), | |
| ("Should single-use plastics be completely banned?", "skeptic"), | |
| ("Are vertical farms the future of agriculture?", "believer"), | |
| ("Should meat consumption be regulated for environmental reasons?", "skeptic"), | |
| ("Is geoengineering a viable solution to climate change?", "believer"), | |
| ("Should private companies be allowed to exploit space resources?", "skeptic"), | |
| ("Are carbon offsets an effective environmental solution?", "believer"), | |
| ("Should environmental protection override economic growth?", "skeptic") | |
| ] | |
| # Health and Wellness (10 cases) | |
| health_topics = [ | |
| ("Should healthcare be completely free?", "believer"), | |
| ("Is telemedicine as effective as traditional healthcare?", "skeptic"), | |
| ("Should vaccines be mandatory?", "believer"), | |
| ("Is genetic engineering of humans ethical?", "skeptic"), | |
| ("Should alternative medicine be covered by insurance?", "believer"), | |
| ("Is human enhancement technology ethical?", "skeptic"), | |
| ("Should organ donation be opt-out rather than opt-in?", "believer"), | |
| ("Are fitness trackers improving public health?", "skeptic"), | |
| ("Should sugar be regulated like tobacco?", "believer"), | |
| ("Is meditation effective as mental health treatment?", "skeptic") | |
| ] | |
| # Education and Career (10 cases) | |
| education_topics = [ | |
| ("Should college education be free?", "believer"), | |
| ("Is standardized testing effective?", "skeptic"), | |
| ("Should coding be mandatory in schools?", "believer"), | |
| ("Are traditional degrees becoming obsolete?", "skeptic"), | |
| ("Should student debt be forgiven?", "believer"), | |
| ("Is homeschooling as effective as traditional schooling?", "skeptic"), | |
| ("Should arts education be mandatory?", "believer"), | |
| ("Are gap years beneficial for students?", "skeptic"), | |
| ("Should schools teach financial literacy?", "believer"), | |
| ("Is year-round schooling better for learning?", "skeptic") | |
| ] | |
| # Add all topics to test cases | |
| for topics in [tech_topics, society_topics, environment_topics, health_topics, education_topics]: | |
| for query, agent_type in topics: | |
| test_cases.append(create_test_case(query, agent_type)) | |
| return test_cases | |
| def generate_golden_dataset(): | |
| """Generate a golden dataset for testing the podcast debate system.""" | |
| # Get test cases | |
| test_cases = generate_test_cases() | |
| # Create sample transcripts | |
| sample_transcripts = [ | |
| { | |
| "id": str(uuid4()), | |
| "podcastScript": """**Podcast Script: Electric Vehicles and the Environment** | |
| Host: Welcome to our debate on the environmental impact of electric vehicles... | |
| Skeptic: While EVs reduce direct emissions, we must consider the environmental cost of battery production... | |
| Believer: The long-term benefits of EVs in reducing carbon emissions far outweigh the initial production impact...""", | |
| "topic": "Are electric vehicles better for the environment?" | |
| }, | |
| { | |
| "id": str(uuid4()), | |
| "podcastScript": """**Podcast Script: AI Regulation Debate** | |
| Host: Today we're exploring the complex topic of AI regulation... | |
| Skeptic: Without proper oversight, AI development could lead to serious societal risks... | |
| Believer: Smart regulation can help us harness AI's benefits while minimizing potential harm...""", | |
| "topic": "Should artificial intelligence be regulated?" | |
| } | |
| ] | |
| # Create the golden dataset structure | |
| golden_dataset = { | |
| "metadata": { | |
| "created_at": datetime.now().isoformat(), | |
| "version": "1.0", | |
| "description": "Golden dataset for testing the podcast debate system", | |
| "total_test_cases": len(test_cases), | |
| "categories": [ | |
| "Technology", | |
| "Society and Culture", | |
| "Environment and Sustainability", | |
| "Health and Wellness", | |
| "Education and Career" | |
| ] | |
| }, | |
| "test_cases": test_cases, | |
| "sample_transcripts": sample_transcripts, | |
| "validation_rules": { | |
| "debate": { | |
| "required_agents": ["extractor", "believer", "skeptic", "supervisor"], | |
| "min_debate_turns": 2, | |
| "max_debate_turns": 4 | |
| }, | |
| "transcript": { | |
| "required_fields": ["id", "podcastScript", "topic"], | |
| "min_script_length": 500 | |
| }, | |
| "podcast": { | |
| "required_fields": ["content", "audio_file", "title", "description", "category"], | |
| "supported_audio_formats": ["mp3"], | |
| "min_file_size": 1000 | |
| } | |
| } | |
| } | |
| # Save the golden dataset | |
| output_file = os.path.join(GOLDEN_DATASET_DIR, "golden_dataset.json") | |
| with open(output_file, "w") as f: | |
| json.dump(golden_dataset, f, indent=2) | |
| print(f"Golden dataset generated successfully at: {output_file}") | |
| return golden_dataset | |
| def validate_test_case(test_case, actual_output): | |
| """Validate a test case against actual output.""" | |
| validation_results = { | |
| "test_case_id": test_case["id"], | |
| "query": test_case["input"]["query"], | |
| "validations": [] | |
| } | |
| # Validate debate structure | |
| expected_structure = test_case["expected_output"]["debate_structure"] | |
| debate_history = actual_output.get("debate_history", []) | |
| validation_results["validations"].append({ | |
| "check": "debate_turns", | |
| "passed": expected_structure["min_turns"] <= len(debate_history) <= expected_structure["max_turns"], | |
| "details": f"Expected {expected_structure['min_turns']}-{expected_structure['max_turns']} turns, got {len(debate_history)}" | |
| }) | |
| # Validate transcript | |
| transcript_reqs = test_case["expected_output"]["transcript_requirements"] | |
| if "transcript" in actual_output: | |
| transcript = actual_output["transcript"] | |
| validation_results["validations"].append({ | |
| "check": "transcript_fields", | |
| "passed": all(field in transcript for field in transcript_reqs["required_fields"]), | |
| "details": "Transcript field validation" | |
| }) | |
| # Validate podcast output | |
| podcast_reqs = test_case["expected_output"]["podcast_requirements"] | |
| if "final_podcast" in actual_output: | |
| podcast = actual_output["final_podcast"] | |
| validation_results["validations"].append({ | |
| "check": "podcast_fields", | |
| "passed": all(field in podcast for field in podcast_reqs["required_fields"]), | |
| "details": "Podcast field validation" | |
| }) | |
| return validation_results | |
| if __name__ == "__main__": | |
| # Generate the golden dataset | |
| dataset = generate_golden_dataset() | |
| print("\nGolden Dataset Summary:") | |
| print(f"Number of test cases: {len(dataset['test_cases'])}") | |
| print(f"Number of sample transcripts: {len(dataset['sample_transcripts'])}") | |
| print(f"Categories covered: {dataset['metadata']['categories']}") | |
| print(f"Validation rules defined: {list(dataset['validation_rules'].keys())}") |