Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,22 +4,28 @@ import gradio as gr
|
|
| 4 |
from magika import Magika
|
| 5 |
from huggingface_hub import login
|
| 6 |
|
| 7 |
-
# Get the HF token from environment variables
|
| 8 |
hf_token = os.getenv("HF_TOKEN")
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
raise ValueError("HF_TOKEN environment variable is not set")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
SUPPORTED_FILE_TYPES = ["txt", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html"]
|
| 15 |
|
| 16 |
def validate_url(url):
|
| 17 |
return url.startswith('https://')
|
| 18 |
|
| 19 |
-
def clone_repo(url, repo_dir,
|
| 20 |
env = os.environ.copy()
|
| 21 |
env['GIT_LFS_SKIP_SMUDGE'] = '1'
|
| 22 |
-
|
|
|
|
| 23 |
result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
|
| 24 |
if result.returncode != 0:
|
| 25 |
return False, result.stderr
|
|
@@ -54,7 +60,7 @@ def validate_file_types(directory):
|
|
| 54 |
file_types[file_path] = f"Error: {str(e)}"
|
| 55 |
return file_types
|
| 56 |
|
| 57 |
-
def extract_repo_content(url,
|
| 58 |
if not validate_url(url):
|
| 59 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": "Invalid URL"}]
|
| 60 |
|
|
@@ -62,7 +68,7 @@ def extract_repo_content(url, token):
|
|
| 62 |
if os.path.exists(repo_dir):
|
| 63 |
subprocess.run(["rm", "-rf", repo_dir])
|
| 64 |
|
| 65 |
-
success, error = clone_repo(url, repo_dir,
|
| 66 |
if not success:
|
| 67 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": f"Failed to clone repository: {error}"}]
|
| 68 |
|
|
@@ -101,7 +107,7 @@ def format_output(extracted_content, repo_url):
|
|
| 101 |
return formatted_output
|
| 102 |
|
| 103 |
def extract_and_display(url):
|
| 104 |
-
extracted_content = extract_repo_content(url, hf_token)
|
| 105 |
formatted_output = format_output(extracted_content, url)
|
| 106 |
return formatted_output
|
| 107 |
|
|
|
|
| 4 |
from magika import Magika
|
| 5 |
from huggingface_hub import login
|
| 6 |
|
| 7 |
+
# Get the HF token and space author name from environment variables
|
| 8 |
hf_token = os.getenv("HF_TOKEN")
|
| 9 |
+
hf_user = os.getenv("SPACE_AUTHOR_NAME")
|
| 10 |
+
|
| 11 |
+
if not hf_token:
|
| 12 |
raise ValueError("HF_TOKEN environment variable is not set")
|
| 13 |
+
if not hf_user:
|
| 14 |
+
raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")
|
| 15 |
+
|
| 16 |
+
# Perform login using the token
|
| 17 |
+
login(token=hf_token, add_to_git_credential=True)
|
| 18 |
|
| 19 |
SUPPORTED_FILE_TYPES = ["txt", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html"]
|
| 20 |
|
| 21 |
def validate_url(url):
|
| 22 |
return url.startswith('https://')
|
| 23 |
|
| 24 |
+
def clone_repo(url, repo_dir, hf_token, hf_user):
|
| 25 |
env = os.environ.copy()
|
| 26 |
env['GIT_LFS_SKIP_SMUDGE'] = '1'
|
| 27 |
+
# Construct the Git URL with the token and author name for authentication
|
| 28 |
+
token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
|
| 29 |
result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
|
| 30 |
if result.returncode != 0:
|
| 31 |
return False, result.stderr
|
|
|
|
| 60 |
file_types[file_path] = f"Error: {str(e)}"
|
| 61 |
return file_types
|
| 62 |
|
| 63 |
+
def extract_repo_content(url, hf_token, hf_user):
|
| 64 |
if not validate_url(url):
|
| 65 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": "Invalid URL"}]
|
| 66 |
|
|
|
|
| 68 |
if os.path.exists(repo_dir):
|
| 69 |
subprocess.run(["rm", "-rf", repo_dir])
|
| 70 |
|
| 71 |
+
success, error = clone_repo(url, repo_dir, hf_token, hf_user)
|
| 72 |
if not success:
|
| 73 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": f"Failed to clone repository: {error}"}]
|
| 74 |
|
|
|
|
| 107 |
return formatted_output
|
| 108 |
|
| 109 |
def extract_and_display(url):
|
| 110 |
+
extracted_content = extract_repo_content(url, hf_token, hf_user)
|
| 111 |
formatted_output = format_output(extracted_content, url)
|
| 112 |
return formatted_output
|
| 113 |
|