Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	File size: 3,105 Bytes
			
			| c2a02c6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | import tarfile, glob, os
from biopandas.pdb import PandasPdb
import argparse
import numpy as np
parser = argparse.ArgumentParser(description='ASCARIS')
parser.add_argument('-file_name', '--file_name',
                    help='Enter the file tar file name to untar',
                    default=1)
args = parser.parse_args()
alphafold = args.file_name
def threeToOne(variant):
    if variant == "ALA":
        variant = "A"
    elif variant == "ARG":
        variant = "R"
    elif variant == "VAL":
        variant = "V"
    elif variant == "GLU":
        variant = "E"
    elif variant == "PRO":
        variant = "P"
    elif variant == "LEU":
        variant = "L"
    elif variant == "GLY":
        variant = "G"
    elif variant == "ASN":
        variant = "N"
    elif variant == "SER":
        variant = "S"
    elif variant == "GLN":
        variant = "Q"
    elif variant == "THR":
        variant = "T"
    elif variant == "MET":
        variant = "M"
    elif variant == "LYS":
        variant = "K"
    elif variant == "ASP":
        variant = "D"
    elif variant == "ILE":
        variant = "I"
    elif variant == "PHE":
        variant = "F"
    elif variant == "TRP":
        variant = "W"
    elif variant == "TYR":
        variant = "Y"
    elif variant == "HIS":
        variant = "H"
    elif variant == "CYS":
        variant = "C"
    elif variant == 'UNK':
        variant = 'X'
    elif variant == 'ASX':
        variant = 'O'
    return (variant)
# Unzip AlphaFold structures
def create_file():
    os.makedirs('input_files/alphafold_structures/', exist_ok=True)
    for f in glob.glob(f'input_files/{alphafold}'):
        with tarfile.open(f) as tar:
            tar.extractall(f'input_files/alphafold_structures/')
    # Create summary file
    alphafold_summary_file = open('input_files/alphafold_summary.txt', 'w')
    alphafold_summary_file.write('uniprotID\tchain\tsequence\tmodel_num')
    alphafold_summary_file.write('\n')
    for f in glob.glob('input_files/alphafold_structures/*pdb*'):
        str1 = PandasPdb().read_pdb(f)
        str1 = str1.df['ATOM']
        str1 = str1[['alt_loc', 'residue_name', 'residue_number', 'atom_name', 'insertion', 'chain_id']]
        str1 = str1[str1.atom_name == 'CA']
        str1['residue_name'] = str1['residue_name'].apply(lambda x: threeToOne(x))
        str1['alt_loc'] = str1['alt_loc'].replace({'': np.NaN})
        str1 = str1.drop_duplicates(['residue_name', 'residue_number'])
        structure_residues_pdb = ''.join(str1.residue_name.to_list())
        model_no = f.split('-')[2].strip()[1:]
        up_name = f.split('-')[1].strip()
        chain_id = list(set(str1.chain_id.to_list()))[0]
        alphafold_summary_file.write(up_name)
        alphafold_summary_file.write('\t')
        alphafold_summary_file.write(chain_id)
        alphafold_summary_file.write('\t')
        alphafold_summary_file.write(structure_residues_pdb)
        alphafold_summary_file.write('\t')
        alphafold_summary_file.write(model_no)
        alphafold_summary_file.write('\n')
if __name__ == '__main__':
    create_file() | 
