Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -80,14 +80,24 @@ def get_hf_files(repo, name, path=""):
|
|
| 80 |
processed_files.extend(get_hf_files(repo, name, dir_part))
|
| 81 |
continue
|
| 82 |
|
| 83 |
-
# Fetch raw file content
|
| 84 |
raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
continue
|
| 92 |
|
| 93 |
# Process file
|
|
@@ -147,9 +157,11 @@ def process_file_content(file_info, owner, repo, is_hf=False):
|
|
| 147 |
response = requests.get(file_url, timeout=10)
|
| 148 |
response.raise_for_status()
|
| 149 |
|
| 150 |
-
# Ensure we get raw content, not HTML
|
| 151 |
if response.headers.get('Content-Type', '').startswith('text/html'):
|
| 152 |
raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
|
|
|
|
|
|
|
| 153 |
|
| 154 |
content_raw = response.content
|
| 155 |
size = len(content_raw)
|
|
|
|
| 80 |
processed_files.extend(get_hf_files(repo, name, dir_part))
|
| 81 |
continue
|
| 82 |
|
| 83 |
+
# Fetch raw file content with authentication if needed (optional token)
|
| 84 |
raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
|
| 85 |
+
try:
|
| 86 |
+
response = requests.get(raw_url, timeout=10)
|
| 87 |
+
response.raise_for_status()
|
| 88 |
+
|
| 89 |
+
# Ensure we get raw content, not HTML
|
| 90 |
+
if response.headers.get('Content-Type', '').startswith('text/html'):
|
| 91 |
+
print(f"Warning: Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
# Check if the response is a valid file (non-HTML, non-JSON)
|
| 95 |
+
if not response.headers.get('Content-Type', '').startswith(('text/plain', 'application/octet-stream', 'text/')):
|
| 96 |
+
print(f"Unexpected content type for {file_path}: {response.headers.get('Content-Type', '')}")
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
except requests.exceptions.RequestException as e:
|
| 100 |
+
print(f"Error downloading {file_path} from {raw_url}: {str(e)}")
|
| 101 |
continue
|
| 102 |
|
| 103 |
# Process file
|
|
|
|
| 157 |
response = requests.get(file_url, timeout=10)
|
| 158 |
response.raise_for_status()
|
| 159 |
|
| 160 |
+
# Ensure we get raw content, not HTML or JSON
|
| 161 |
if response.headers.get('Content-Type', '').startswith('text/html'):
|
| 162 |
raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
|
| 163 |
+
if response.headers.get('Content-Type', '').startswith('application/json'):
|
| 164 |
+
raise Exception(f"Received JSON instead of raw content for {file_path}: {response.text[:100]}...")
|
| 165 |
|
| 166 |
content_raw = response.content
|
| 167 |
size = len(content_raw)
|