Spaces:
Sleeping
Sleeping
import json | |
import gradio as gr | |
from smolagents import CodeAgent, HfApiModel | |
import pandas as pd | |
from io import StringIO | |
agent = CodeAgent( | |
tools=[], | |
model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"), | |
additional_authorized_imports=['json'] | |
) | |
def parse_text_content(content): | |
"""Universal text parser with multiple pattern handling""" | |
lines = content.split('\n') | |
csv_lines = ["id,description"] | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
# Handle ID-description pattern | |
if ' ' in line and len(line.split()) > 1: | |
parts = line.split(' ', 1) | |
clean_desc = parts[1].replace('(edited)', '').strip() | |
csv_lines.append(f"{parts[0]},{clean_desc}") | |
# Handle separator pattern | |
elif '!' in line: | |
csv_lines.append(line.replace('!', ',', 1)) | |
# Handle single-value lines | |
else: | |
csv_lines.append(f"{line},") | |
return '\n'.join(csv_lines) | |
def process_file(file_path): | |
"""Process individual files with error handling""" | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
# Generate CSV | |
csv_data = parse_text_content(content) | |
# Convert to DataFrame | |
df = pd.read_csv(StringIO(csv_data), keep_default_na=False) | |
return True, df, csv_data | |
except Exception as e: | |
return False, pd.DataFrame(), str(e) | |
def handle_upload(files): | |
"""Process multiple files""" | |
all_dfs = [] | |
full_content = "" | |
for file in files: | |
success, df, content = process_file(file) | |
if success: | |
all_dfs.append(df) | |
full_content += f"\n\n--- {file.name} ---\n{content}" | |
combined_df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame() | |
# Generate analysis | |
analysis = analyze_content(full_content) | |
return combined_df, analysis | |
with gr.Blocks() as demo: | |
gr.Markdown("# Advanced Text Analyzer") | |
with gr.Row(): | |
file_input = gr.File( | |
file_count="multiple", | |
file_types=[".txt"], | |
label="Upload Documents" | |
) | |
submit_btn = gr.Button("Analyze") | |
with gr.Row(): | |
data_output = gr.Dataframe(label="Structured Data") | |
json_output = gr.JSON(label="Content Analysis") | |
submit_btn.click( | |
handle_upload, | |
inputs=file_input, | |
outputs=[data_output, json_output] | |
) | |
if __name__ == "__main__": | |
demo.launch() |