Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import sys | |
| import requests | |
| import zipfile | |
| import io | |
| import ast | |
| def is_file_type(file_path, file_extension): | |
| """Check if the file has the specified file extension.""" | |
| return file_path.endswith(file_extension) | |
| def is_likely_useful_file(file_path, lang="python"): | |
| """Determine if the file is likely to be useful by excluding certain directories and specific file types.""" | |
| excluded_dirs = ["docs", "examples", "tests", "test", "scripts", "utils", "benchmarks"] | |
| utility_or_config_files = [] | |
| github_workflow_or_docs = [".github", ".gitignore", "LICENSE"] | |
| if lang == "python": | |
| excluded_dirs.append("__pycache__") | |
| utility_or_config_files.extend(["hubconf.py", "setup.py"]) | |
| github_workflow_or_docs.extend(["stale.py", "gen-card-", "write_model_card"]) | |
| elif lang == "go": | |
| excluded_dirs.append("vendor") | |
| utility_or_config_files.extend(["go.mod", "go.sum", "Makefile"]) | |
| if any(part.startswith(".") for part in file_path.split("/")): | |
| return False | |
| if "test" in file_path.lower(): | |
| return False | |
| for excluded_dir in excluded_dirs: | |
| if f"/{excluded_dir}/" in file_path or file_path.startswith(excluded_dir + "/"): | |
| return False | |
| for file_name in utility_or_config_files: | |
| if file_name in file_path: | |
| return False | |
| for doc_file in github_workflow_or_docs: | |
| if doc_file in file_path: | |
| return False | |
| return True | |
| def is_test_file(file_content, lang): | |
| """Determine if the file content suggests it is a test file.""" | |
| test_indicators = {"python": ["unittest", "pytest"], "go": ["testing"]}.get(lang, []) | |
| if lang == "python": | |
| try: | |
| module = ast.parse(file_content) | |
| for node in ast.walk(module): | |
| if isinstance(node, ast.Import): | |
| for alias in node.names: | |
| if alias.name in test_indicators: | |
| return True | |
| elif isinstance(node, ast.ImportFrom): | |
| if node.module in test_indicators: | |
| return True | |
| except SyntaxError: | |
| pass | |
| return False | |
| def has_sufficient_content(file_content, min_line_count=10): | |
| """Check if the file has a minimum number of substantive lines.""" | |
| lines = [line for line in file_content.split("\n") if line.strip() and not line.strip().startswith(("#", "//"))] | |
| return len(lines) >= min_line_count | |
| def remove_comments_and_docstrings(source): | |
| """Remove comments and docstrings from the Python source code.""" | |
| tree = ast.parse(source) | |
| for node in ast.walk(tree): | |
| if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)) and ast.get_docstring(node): | |
| node.body = node.body[1:] # Remove docstring | |
| elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str): | |
| node.value.s = "" # Remove comments | |
| return ast.unparse(tree) | |
| def download_repo(repo_url, branch_or_tag="master"): | |
| """Download and process files from a GitHub repository.""" | |
| download_url = f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip" | |
| lang = "python" | |
| print(download_url) | |
| response = requests.get(download_url) | |
| if response.status_code == 200: | |
| zip_file = zipfile.ZipFile(io.BytesIO(response.content)) | |
| file_contents = "" | |
| print(zip_file.namelist()) | |
| for file_path in zip_file.namelist(): | |
| # Skip directories, non-language files, less likely useful files, hidden directories, and test files | |
| if file_path.endswith("/") or not is_file_type(file_path, ".py") or not is_likely_useful_file(file_path): | |
| print("Dir or non-lang or useless:", file_path) | |
| continue | |
| file_content = zip_file.read(file_path).decode("utf-8") | |
| # Skip test files based on content | |
| if is_test_file(file_content, lang): | |
| print("Test file:", file_path) | |
| continue | |
| print("Appending", file_path) | |
| file_contents += f"// File: {file_path}\n" if lang == "go" else f"# File: {file_path}\n" | |
| file_contents += file_content | |
| file_contents += "\n\n" | |
| return file_contents | |
| else: | |
| print(f"Failed to download the repository. Status code: {response.status_code}") | |
| sys.exit(1) | |
| def download_and_process(repo_url, branch_or_tag="master"): | |
| file_contents = download_repo(repo_url, branch_or_tag) | |
| return file_contents | |
| iface = gr.Interface( | |
| fn=download_and_process, | |
| inputs=[ | |
| gr.components.Textbox(label="GitHub Repository URL", value="https://github.com/cognitivecomputations/github2file"), | |
| gr.components.Textbox(label="Branch or Tag", value="master"), | |
| ], | |
| outputs=gr.components.Code( | |
| label="Output File", | |
| language="python", | |
| interactive=True, | |
| ), | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |