Source code for frustratometer.pfam.pfam

import urllib.request
from pathlib import Path
import logging
import gzip
from ..utils import create_directory
import glob
from typing import Union


#Download whole database
[docs] def download_database(path: Union[Path,str], name: str ='PFAM_current', url: Union[Path,str] ="https://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.full.uniprot.gz")->Path: """ Downloads and creates a pfam database in the Database folder Parameters ---------- path : str Adress of pfam database name: str Name of the new database folder Returns ------- alignments_path: Path Path of the local database of alignments """ #Make sure that the path exists path=Path(path) assert path.exists(), "The path doesn't exist" assert path.is_dir(), "The path is not a directory" # Create database directory data_path = create_directory(path / name) alignments_path = create_directory(data_path / 'Alignments') # Get file name file_name = url.split('/')[-1] # Download pfam alignments logging.debug(f"Downloading {url} to {data_path}/{file_name}") urllib.request.urlretrieve(f"{url}", f"{data_path}/{file_name}") # Split PFAM alignments with gzip.open(f'{data_path}/{file_name}') as in_file: new_lines = '' acc = 'Unknown' for line in in_file: line = line.decode('utf-8') if line.strip() == '//': if len(new_lines) > 0: with open(f'{alignments_path}/{acc}.sto', 'w+') as out_file: logging.debug(f'{alignments_path}/{acc}.sto') out_file.write(new_lines) new_lines = '' acc = 'Unknown' continue new_lines += line l = line.strip().split() if len(l) == 3 and l[0] == "#=GF" and l[1] == "AC": acc = l[2] if len(new_lines) > 0: with open(f'{alignments_path}/{acc}.sto', 'w+') as out_file: logging.debug(f'{alignments_path}/{acc}.sto') out_file.write(new_lines) return alignments_path
# Get a single alignment
[docs] def get_alignment(pfamid: str, database_path: Union[Path,str])->Path: """ Retrieves a pfam family alignment from local database Parameters ---------- pfamid : str ID of PFAM family. ex: PF00001 database_path: str Address of local database Returns ------- alignment_file : Path location of alignment """ path = Path(database_path) assert path.exists(), "The path doesn't exist" assert path.is_dir(), "The path is not a directory" files=glob.glob(str(path/f'{pfamid}')+'.*.sto') print(str(path/f'{pfamid}')+'.*.sto') if len(files)==0: raise(IOError,'File not found') if len(files)>1: raise(IOError, 'Multiple files found') return Path(files[0])
[docs] def download_aligment(pfamid: str, output_file: Union[Path,str], alignment_type: str ='full')->bytes: """ Retrieves a pfam family alignment from interpro Parameters ---------- pfamid : str, ID of PFAM family. ex: PF00001 output_file: Path location of the output file. Default: Temporary file alignment_type: str, alignment type to retrieve. Options: full, seed, uniprot Returns ------- output_file : Path Output file (in stockholm format) """ url = f'https://www.ebi.ac.uk/interpro/api/entry/pfam/{pfamid}/?annotation=alignment:{alignment_type}' #url = f'https://www.ebi.ac.uk/interpro/wwwapi//entry/pfam/{pfamid}/?annotation=alignment:{alignment_type}&download' logging.debug(f'Downloading {url} to {output_file}') zipped_alignment = urllib.request.urlopen(url).read() alignment = gzip.decompress(zipped_alignment) output_file=Path(output_file) output_file.write_bytes(alignment) return output_file