Spaces:

westlake-repl
/

Demo_ProTrek_650M

Running

App Files Files Community

Demo_ProTrek_650M / utils /downloader.py

LTEnjoy

Upload 21 files

52da96f verified about 1 year ago

raw

history blame

5.93 kB

	import os


	from utils.mpr import MultipleProcessRunner
	from tqdm import tqdm


	class Downloader(MultipleProcessRunner):
	"""
	Download files that has unified resource locator
	"""

	def __init__(self, base_url, save_path, overwrite=False, skip_error_info=False, **kwargs):
	"""

	Args:
	base_url: Unified Resource Locator of pdb file
	save_path: Unified Resource Locator of saving path
	overwrite: whether overwrite existing files
	"""
	super().__init__(**kwargs)

	self.base_url = base_url
	self.save_path = save_path
	self.overwrite = overwrite
	self.skip_error_info = skip_error_info

	if not overwrite:
	# remove existing files in data
	self.data = [uniprot for uniprot in tqdm(self.data, desc="Filtering out existing files...")
	if not os.path.exists(self.save_path.format(uniprot))]

	def _aggregate(self, final_path: str, sub_paths):
	pass

	def _target_static(self, process_id, data, sub_path, *args):
	for i, uniprot in enumerate(data):
	url = self.base_url.format(uniprot)
	save_path = self.save_path.format(uniprot)

	# shell cmd to download files
	wget = f"wget -q -o /dev/null {url} -O {save_path}"

	rm = f"rm {save_path}"
	err = f"echo 'Error: {url} cannot be downloaded!'"
	if self.skip_error_info:
	err += ">/dev/null"

	os.system(f"{wget} \|\| ({rm} && {err})")

	self.terminal_progress_bar(process_id, i + 1, len(data), f"Process{process_id} Downloading files...")

	def run(self):
	"""
	Run this function to download files
	"""
	super().run()

	def __len__(self):
	return len(self.data)

	@staticmethod
	# Clear empty files in specific directory
	def clear_empty_files(path):
	cnt = 0
	for file in tqdm(os.listdir(path), desc="Clearing empty files..."):
	if os.path.getsize(os.path.join(path, file)) == 0:
	os.remove(os.path.join(path, file))
	cnt += 1
	print(f"Removed {cnt} empty files")
	return cnt


	class AlphaDBDownloader(Downloader):
	"""
	Download files from AlphaFold2 database
	"""
	def __init__(self, uniprot_ids, type: str, save_dir: str, **kwargs):
	"""

	Args:
	uniprots: Uniprot ids
	type: Which type of files to download. Must be one of ['pdb', 'mmcif', 'plddt', "pae"]
	save_dir: Saving directory
	**kwargs:
	"""

	url_dict = {
	"pdb": "https://alphafold.ebi.ac.uk/files/AF-{}-F1-model_v4.pdb",
	"mmcif": "https://alphafold.ebi.ac.uk/files/AF-{}-F1-model_v4.cif",
	"plddt": "https://alphafold.ebi.ac.uk/files/AF-{}-F1-confidence_v4.json",
	"pae": "https://alphafold.ebi.ac.uk/files/AF-{}-F1-predicted_aligned_error_v4.json"
	}

	save_dict = {
	"pdb": "{}.pdb",
	"mmcif": "{}.cif",
	"plddt": "{}.json",
	"pae": "{}.json"
	}
	base_url = url_dict[type]
	save_path = os.path.join(save_dir, save_dict[type])

	super().__init__(data=uniprot_ids, base_url=base_url, save_path=save_path, **kwargs)


	class PDBDownloader(Downloader):
	"""
	Download files from PDB
	"""
	def __init__(self, pdb_ids, type: str, save_dir: str, **kwargs):
	"""

	Args:
	pdb_ids: PDB ids
	type: Which type of files to download. Must be one of ['pdb', 'mmcif']
	save_dir: Saving directory
	"""

	url_dict = {
	"pdb": "https://files.rcsb.org/download/{}.pdb",
	"mmcif": "https://files.rcsb.org/download/{}.cif"
	}

	save_dict = {
	"pdb": "{}.pdb",
	"mmcif": "{}.cif"
	}

	base_url = url_dict[type]
	save_path = os.path.join(save_dir, save_dict[type])

	super().__init__(data=pdb_ids, base_url=base_url, save_path=save_path, **kwargs)


	class CATHDownloader(Downloader):
	def __init__(self, cath_ids, save_dir, **kwargs):
	"""
	Download files from CATH
	Args:
	cath_ids: CATH ids
	save_dir: Saving directory
	"""

	url = "http://www.cathdb.info/version/v4_3_0/api/rest/id/{}.pdb"
	save_path = os.path.join(save_dir, "{}.pdb")

	super().__init__(data=cath_ids, base_url=url, save_path=save_path, **kwargs)


	def download_pdb(pdb_id: str, format: str, save_path: str):
	"""
	Download pdb file from PDB
	Args:
	pdb_id: PDB id
	format: File , must be one of ['pdb', 'cif']
	save_path: Saving path
	"""

	url = f"https://files.rcsb.org/download/{pdb_id}.{format}"
	wget = f"wget -q -o /dev/null {url} -O {save_path}"
	rm = f"rm {save_path}"
	err = f"echo 'Error: {url} cannot be downloaded!'"
	os.system(f"{wget} \|\| ({rm} && {err})")


	def download_af2(uniprot_id: str, format: str, save_path: str):
	"""
	Download files from AlphaFold2 database
	Args:
	uniprot_id: Uniprot id
	format: File format, must be one of ['pdb', 'cif', 'plddt', 'pae']
	save_path: Saving path
	"""

	url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-model_v4.{format}"
	wget = f"wget -q -o /dev/null {url} -O {save_path}"
	rm = f"rm {save_path}"
	err = f"echo 'Error: {url} cannot be downloaded!'"
	os.system(f"{wget} \|\| ({rm} && {err})")