Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,15 +25,20 @@ def generate_file_tree(paths):
|
|
| 25 |
def get_all_files(owner, repo, path="", is_hf=False):
|
| 26 |
"""Recursively fetch all files from a repository."""
|
| 27 |
if is_hf:
|
| 28 |
-
|
|
|
|
| 29 |
else:
|
| 30 |
api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
|
| 31 |
|
| 32 |
try:
|
| 33 |
-
response = requests.get(api_url)
|
| 34 |
response.raise_for_status()
|
| 35 |
items = response.json()
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
files = []
|
| 38 |
for item in items:
|
| 39 |
if item['type'] == 'file':
|
|
@@ -50,9 +55,15 @@ def get_repo_contents(url):
|
|
| 50 |
if "huggingface.co" in url:
|
| 51 |
parts = url.rstrip('/').split('/')
|
| 52 |
owner, repo = parts[-2], parts[-1]
|
| 53 |
-
files
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
return owner, repo, files, True
|
| 57 |
else: # Assume GitHub URL
|
| 58 |
parts = url.rstrip('/').split('/')
|
|
@@ -71,7 +82,7 @@ def process_file_content(file_info, owner, repo, is_hf=False):
|
|
| 71 |
|
| 72 |
try:
|
| 73 |
if is_hf:
|
| 74 |
-
file_url = f"https://huggingface.co/spaces/{owner}/{repo}/
|
| 75 |
else:
|
| 76 |
file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
|
| 77 |
|
|
@@ -200,31 +211,36 @@ def index():
|
|
| 200 |
|
| 201 |
@app.route('/process', methods=['POST'])
|
| 202 |
def process():
|
|
|
|
|
|
|
|
|
|
| 203 |
if 'files[]' in request.files:
|
| 204 |
files = request.files.getlist('files[]')
|
| 205 |
if not files:
|
| 206 |
-
|
|
|
|
| 207 |
|
| 208 |
markdown_content = create_markdown_document(files=files)
|
| 209 |
-
|
| 210 |
-
|
|
|
|
| 211 |
else:
|
| 212 |
repo_url = request.json.get('repo_url')
|
| 213 |
if not repo_url:
|
| 214 |
-
|
|
|
|
| 215 |
|
| 216 |
markdown_content = create_markdown_document(repo_url)
|
| 217 |
-
|
| 218 |
-
owner, repo, _, is_hf = get_repo_contents(repo_url)
|
| 219 |
if not owner:
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
-
return jsonify(
|
| 224 |
-
'markdown': markdown_content,
|
| 225 |
-
'html': html_content,
|
| 226 |
-
'filename': filename
|
| 227 |
-
})
|
| 228 |
|
| 229 |
@app.route('/download', methods=['POST'])
|
| 230 |
def download():
|
|
|
|
| 25 |
def get_all_files(owner, repo, path="", is_hf=False):
|
| 26 |
"""Recursively fetch all files from a repository."""
|
| 27 |
if is_hf:
|
| 28 |
+
# Attempt to fetch file list from Hugging Face Space (publicly accessible files)
|
| 29 |
+
api_url = f"https://huggingface.co/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
|
| 30 |
else:
|
| 31 |
api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
|
| 32 |
|
| 33 |
try:
|
| 34 |
+
response = requests.get(api_url, headers={"Accept": "application/json"})
|
| 35 |
response.raise_for_status()
|
| 36 |
items = response.json()
|
| 37 |
|
| 38 |
+
# Hugging Face might not return JSON in the same format; adjust if HTML is returned
|
| 39 |
+
if isinstance(items, str): # If response isn’t JSON, it’s likely HTML
|
| 40 |
+
return None # Fallback to error handling
|
| 41 |
+
|
| 42 |
files = []
|
| 43 |
for item in items:
|
| 44 |
if item['type'] == 'file':
|
|
|
|
| 55 |
if "huggingface.co" in url:
|
| 56 |
parts = url.rstrip('/').split('/')
|
| 57 |
owner, repo = parts[-2], parts[-1]
|
| 58 |
+
# Fallback approach: manually fetch known files or use a simpler file list
|
| 59 |
+
# For now, assume a flat structure and fetch known files directly
|
| 60 |
+
# This is a workaround until a proper API token or endpoint is confirmed
|
| 61 |
+
known_files = [
|
| 62 |
+
{'path': 'app.py', 'type': 'file'},
|
| 63 |
+
{'path': 'README.md', 'type': 'file'}
|
| 64 |
+
# Add more known paths or implement HTML scraping if needed
|
| 65 |
+
]
|
| 66 |
+
files = get_all_files(owner, repo, "", True) or known_files
|
| 67 |
return owner, repo, files, True
|
| 68 |
else: # Assume GitHub URL
|
| 69 |
parts = url.rstrip('/').split('/')
|
|
|
|
| 82 |
|
| 83 |
try:
|
| 84 |
if is_hf:
|
| 85 |
+
file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
|
| 86 |
else:
|
| 87 |
file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
|
| 88 |
|
|
|
|
| 211 |
|
| 212 |
@app.route('/process', methods=['POST'])
|
| 213 |
def process():
|
| 214 |
+
# Ensure consistent response structure
|
| 215 |
+
response_data = {'markdown': '', 'html': '', 'filename': '', 'error': None}
|
| 216 |
+
|
| 217 |
if 'files[]' in request.files:
|
| 218 |
files = request.files.getlist('files[]')
|
| 219 |
if not files:
|
| 220 |
+
response_data['error'] = 'No files uploaded'
|
| 221 |
+
return jsonify(response_data), 400
|
| 222 |
|
| 223 |
markdown_content = create_markdown_document(files=files)
|
| 224 |
+
response_data['markdown'] = markdown_content
|
| 225 |
+
response_data['html'] = markdown.markdown(markdown_content)
|
| 226 |
+
response_data['filename'] = "uploaded_files_summary.md"
|
| 227 |
else:
|
| 228 |
repo_url = request.json.get('repo_url')
|
| 229 |
if not repo_url:
|
| 230 |
+
response_data['error'] = 'Please provide a repository URL or upload files'
|
| 231 |
+
return jsonify(response_data), 400
|
| 232 |
|
| 233 |
markdown_content = create_markdown_document(repo_url)
|
| 234 |
+
owner, repo, contents, is_hf = get_repo_contents(repo_url)
|
|
|
|
| 235 |
if not owner:
|
| 236 |
+
response_data['error'] = markdown_content # Error message from get_repo_contents
|
| 237 |
+
return jsonify(response_data), 400
|
| 238 |
+
|
| 239 |
+
response_data['markdown'] = markdown_content
|
| 240 |
+
response_data['html'] = markdown.markdown(markdown_content)
|
| 241 |
+
response_data['filename'] = f"{owner}_{repo}_summary.md"
|
| 242 |
|
| 243 |
+
return jsonify(response_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
@app.route('/download', methods=['POST'])
|
| 246 |
def download():
|