Source code for frustratometer.classes.DCA

"""Provide the primary functions."""
from typing import Union
import numpy as np
from pathlib import Path
from typing import Union


#Import other modules
from .. import pdb
from .. import filter
from .. import dca
from .. import map
from .. import align
from .. import frustration
from .. import pfam
from .Frustratometer import Frustratometer

__all__=['DCA']
##################
# PFAM functions #
##################


# Class wrapper
[docs] class DCA(Frustratometer): """ The DCA class contains many class methods that can be used, depending on whether they have already calculated the DCA couplings and fields parameters. If the user already has calculated these parameters, the "from_potts_model_file" or "from_pottsmodel" methods can be used. Otherwise, the user can locally generate these parameters using the pyDCA package. In this case, the user can try using the "from_distance_matrix," "from_pfam_alignment," or "from_hmmer_alignment" methods. """ # @classmethod # def from_distance_matrix(cls, # potts_model: dict, # distance_matrix : np.array, # sequence: str =None, # sequence_cutoff: Union[float, None] = None, # distance_cutoff: Union[float, None] = None)->object: # """ # Generate DCA object from previously generated distance matrix. # Parameters # ---------- # pdb_structure : object # Structure object generated by Structure class # sequence_cutoff : float # Sequence seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. # distance_cutoff : float # Distance seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. # Returns # ------- # DCA object # """ # self = cls() # # Set initialization variables # self._potts_model = potts_model # self._potts_model_file = None # self._pdb_file = None # self._chain = None # self._sequence_cutoff = sequence_cutoff # self._distance_cutoff = distance_cutoff # self._distance_matrix_method = None # self._sequence = sequence # # Compute fast properties # self.distance_matrix = distance_matrix # self.aa_freq = frustration.compute_aa_freq(self.sequence) # self.contact_freq = frustration.compute_contact_freq(self.sequence) # self.mask = frustration.compute_mask(self.distance_matrix, self.distance_cutoff, self.sequence_cutoff) # # Initialize slow properties # self._native_energy = None # self._decoy_fluctuation = {} # return self
[docs] @classmethod def from_potts_model_file(cls,pdb_structure: object, potts_model_file: Union[Path,str] = None, reformat_potts_model: bool = False, sequence_cutoff: Union[float, None] = None, distance_cutoff: Union[float, None] = None)->object: """ Generate DCA object from previously generated potts model file. Parameters ---------- pdb_structure : object Structure object generated by Structure class potts_model_file : Path or str File path of potts model file reformat_potts_model : bool If True, the fields matrix will be transposed, while the couplings matrix will be reformatted into a (NxNx21x21) matrix. This reformatting is necessary for some potts model files generated by the mfDCA Matlab algorithm. Default is False. sequence_cutoff : float Sequence seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. distance_cutoff : float Distance seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. Returns ------- DCA object """ self = cls() # Set initialization variables self.structure=pdb_structure.structure self._chain=pdb_structure.chain self._sequence=pdb_structure.sequence self._pdb_file=pdb_structure.pdb_file self._potts_model_file=potts_model_file self._sequence_cutoff=sequence_cutoff self._distance_cutoff=distance_cutoff self._distance_matrix_method=None self.reformat_potts_model=reformat_potts_model self.init_index_shift=pdb_structure.init_index_shift self.full_to_aligned_index_dict=pdb_structure.full_to_aligned_index_dict self.filtered_aligned_sequence=pdb_structure.filtered_aligned_sequence self.aligned_sequence=pdb_structure.aligned_sequence self.mapped_distance_matrix=pdb_structure.mapped_distance_matrix self.distance_matrix=self.mapped_distance_matrix if self.distance_cutoff==None: example_matrix=np.ones((len(self.filtered_aligned_sequence),len(self.filtered_aligned_sequence))) self.mask = frustration.compute_mask(example_matrix, self.distance_cutoff, self.sequence_cutoff) else: self.mask = frustration.compute_mask(self.mapped_distance_matrix, self.distance_cutoff, self.sequence_cutoff) self.minimally_frustrated_threshold=1 # Compute fast properties self._potts_model = dca.matlab.load_potts_model(self.potts_model_file) if self.reformat_potts_model: self.potts_model["h"]=self.potts_model["h"].T self.potts_model["J"]= self.potts_model["familycouplings"].reshape(int(len(self.filtered_aligned_sequence)),21,int(len(self.filtered_aligned_sequence)),21).transpose(0,2,1,3) if self.filtered_aligned_sequence is not None: self.aa_freq = frustration.compute_aa_freq(self.sequence) self.contact_freq = frustration.compute_contact_freq(self.sequence) else: self.aa_freq = None self.contact_freq = None # Initialize slow properties self._native_energy = None self._decoy_fluctuation = {} return self
[docs] @classmethod def from_pottsmodel(cls,pdb_structure : object, potts_model: dict, reformat_potts_model: bool = False, sequence_cutoff: Union[float, None] = None, distance_cutoff: Union[float, None] = None)->object: """ Generate DCA object from previously generated potts model. Parameters ---------- pdb_structure : object Structure object generated by Structure class potts_model : dict Dictionary of potts model file, containing fields and couplings keys. reformat_potts_model : bool If True, the fields matrix will be transposed, while the couplings matrix will be reformatted into a (NxNx21x21) matrix. This reformatting is necessary for some potts model files generated by the mfDCA Matlab algorithm. Default is False. sequence_cutoff : float Sequence seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. distance_cutoff : float Distance seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. Returns ------- DCA object """ self = cls() # Set initialization variables self.structure=pdb_structure.structure self._chain=pdb_structure.chain self._sequence=pdb_structure.sequence self._pdb_file=pdb_structure.pdb_file self._potts_model=potts_model self._sequence_cutoff=sequence_cutoff self._distance_cutoff=distance_cutoff self._distance_matrix_method=None self.reformat_potts_model=reformat_potts_model self.init_index_shift=pdb_structure.init_index_shift self.full_to_aligned_index_dict=pdb_structure.full_to_aligned_index_dict self.filtered_aligned_sequence=pdb_structure.filtered_aligned_sequence self.aligned_sequence=pdb_structure.aligned_sequence self.mapped_distance_matrix=pdb_structure.mapped_distance_matrix self.distance_matrix=self.mapped_distance_matrix if self.distance_cutoff==None: example_matrix=np.ones((len(self.filtered_aligned_sequence),len(self.filtered_aligned_sequence))) self.mask = frustration.compute_mask(example_matrix, self.distance_cutoff, self.sequence_cutoff) else: self.mask = frustration.compute_mask(self.mapped_distance_matrix, self.distance_cutoff, self.sequence_cutoff) self.minimally_frustrated_threshold=1 # Compute fast properties if self.reformat_potts_model: self.potts_model["h"]=self.potts_model["h"].T self.potts_model["J"]= self.potts_model["familycouplings"].reshape(int(len(self.filtered_aligned_sequence)),21,int(len(self.filtered_aligned_sequence)),21).transpose(0,2,1,3) if self.filtered_aligned_sequence is not None: self.aa_freq = frustration.compute_aa_freq(self.sequence) self.contact_freq = frustration.compute_contact_freq(self.sequence) else: self.aa_freq = None self.contact_freq = None # Initialize slow properties self._native_energy = None self._decoy_fluctuation = {} return self
[docs] @classmethod def from_pfam_alignment(cls,pdb_structure : object, alignment_output_file_name: Union[Path,str], filtered_alignment_output_file_name: Union[Path,str], PFAM_ID: str = None, DCA_format : str = "plmDCA", sequence_cutoff: Union[float, None] = None, distance_cutoff: Union[float, None] = None)->object: """ Generate DCA object from a locally downloaded PFAM alignment file that will be used to generate a Potts Model file. Parameters ---------- pdb_structure : object Structure object generated by Structure class alignment_output_file_name : Path or str File name of generated alignment file. The file will be in stockholm format. filtered_alignment_output_file_name : Path or str File name of generated filtered alignment file. The file will be in Fasta format. PFAM_ID : str PFAM ID associated with structure object DCA_format : str Options are "plmDCA" and "mfDCA" sequence_cutoff : float Sequence seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. distance_cutoff : float Distance seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. Returns ------- DCA object """ self = cls() self.structure=pdb_structure.structure self._chain=pdb_structure.chain self._sequence=pdb_structure.sequence self._pdb_file=pdb_structure.pdb_file self._sequence_cutoff=sequence_cutoff self._distance_cutoff=distance_cutoff self._distance_matrix_method=None self.alignment_output_file_name = alignment_output_file_name self.filtered_alignment_output_file_name = filtered_alignment_output_file_name self.DCA_format=DCA_format self.mapped_distance_matrix=pdb_structure.mapped_distance_matrix self.distance_matrix=self.mapped_distance_matrix self.mask = frustration.compute_mask(self.mapped_distance_matrix, self.distance_cutoff, self.sequence_cutoff) self.minimally_frustrated_threshold=1 if PFAM_ID==None: self.PFAM_ID=map.get_pfamID(self.pdb_file,self.chain) else: self.PFAM_ID=PFAM_ID self.alignment_file=pfam.download_aligment(self.PFAM_ID,self.alignment_output_file_name) self.filtered_alignment_file=filter.filter_alignment(self.alignment_output_file_name,self.filtered_alignment_output_file_name) if self.DCA_format=="plmDCA": self.potts_model=dca.pydca.plmdca(str(self.filtered_alignment_file)) else: self.potts_model=dca.pydca.mfdca(str(self.filtered_alignment_file)) self.aa_freq = None self.contact_freq = None # Initialize slow properties self._native_energy = None self._decoy_fluctuation = {} return self
[docs] @classmethod def from_hmmer_alignment(cls,pdb_structure : object, alignment_output_file_name: Union[Path,str], filtered_alignment_output_file_name: Union[Path,str], query_sequence_database_file : Union[Path,str], DCA_format : str = "plmDCA", sequence_cutoff: Union[float, None] = None, distance_cutoff: Union[float, None] = None)->object: """ Generate DCA object from a locally generated jackhmmer alignment file that will be used to generate a Potts Model file. The protein sequence of the structure object will be used as the query sequence by the Jackhmmer algorithm. Parameters ---------- pdb_structure : object Structure object generated by Structure class alignment_output_file_name : Path or str File name of generated alignment file. The file will be in stockholm format. filtered_alignment_output_file_name : Path or str File name of generated filtered alignment file. The file will be in Fasta format. query_sequence_database_file : Path or str File name of sequence database. DCA_format : str Options are "plmDCA" and "mfDCA" sequence_cutoff : float Sequence seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. distance_cutoff : float Distance seperation cutoff; the couplings terms of contacts that are separated by more than this cutoff will be ignored. Returns ------- DCA object """ self = cls() self.structure=pdb_structure.structure self._chain=pdb_structure.chain self._sequence=pdb_structure.sequence self._pdb_file=pdb_structure.pdb_file self._sequence_cutoff=sequence_cutoff self._distance_cutoff=distance_cutoff self._distance_matrix_method=None self.alignment_output_file_name = alignment_output_file_name self.filtered_alignment_output_file_name = filtered_alignment_output_file_name self.query_sequence_database_file=query_sequence_database_file self.DCA_format=DCA_format self.mapped_distance_matrix=pdb_structure.mapped_distance_matrix self.distance_matrix=self.mapped_distance_matrix self.mask = frustration.compute_mask(self.mapped_distance_matrix, self.distance_cutoff, self.sequence_cutoff) self.minimally_frustrated_threshold=1 self.alignment_file=align.jackhmmer(self.sequence,self.alignment_output_file_name,self.query_sequence_database_file) self.filtered_alignment_file=filter.filter_alignment(self.alignment_output_file_name,self.filtered_alignment_output_file_name) if self.DCA_format=="plmDCA": self.potts_model=dca.pydca.plmdca(str(self.filtered_alignment_file)) else: self.potts_model=dca.pydca.mfdca(str(self.filtered_alignment_file)) self.aa_freq = None self.contact_freq = None # Initialize slow properties self._native_energy = None self._decoy_fluctuation = {} return self
# @classmethod # def from_alignment(cls, # alignment: str, # pdb_file: str, # chain: str, # sequence_cutoff: Union[float, None] = None, # distance_cutoff: Union[float, None] = None, # distance_matrix_method='minimum')->object: # # Compute dca # potts_model = dca.pydca.plmdca(alignment) # return cls.from_pottsmodel(potts_model, # pdb_file=pdb_file, # chain=chain, # sequence_cutoff=sequence_cutoff, # distance_cutoff=distance_cutoff, # distance_matrix_method=distance_matrix_method) @property def sequence(self): return self._sequence # Set a new sequence in case someone needs to calculate the energy of a diferent sequence with the same structure @sequence.setter def sequence(self, value): assert len(value) == len(self._sequence) self._sequence = value @property def pdb_file(self): return str(self._pdb_file) # @pdb_file.setter # def pdb_file(self, value): # self._pdb_file = Path(value) @property def pdb_name(self, value): """ Returns PDBid from pdb name """ assert self._pdb_file.exists() return self._pdb_file.stem @property def chain(self): return self._chain # @chain.setter # def chain(self, value): # self._chain = value @property def pfamID(self, value): """ Returns pfamID from pdb name """ return self._pfamID @property def alignment_type(self, value): return self._alignment_type # @alignment_type.setter # def alignment_type(self, value): # self._alignment_type = value @property def alignment_sequence_database(self, value): return self._alignment_sequence_database # @alignment_sequence_database.setter # def alignment_sequence_database(self, value): # self._alignment_sequence_database = value @property def download_all_alignment_files(self, value): return self._download_all_alignment_files # @download_all_alignment_files.setter # def download_all_alignment_files(self, value): # self._download_all_alignment_files = value @property def alignment_files_directory(self, value): return self._alignment_files_directory # @alignment_files_directory.setter # def alignment_files_directory(self, value): # self._alignment_files_directory = value @property def alignment_output_file(self, value): return self._alignment_output_file # @alignment_output_file.setter # def alignment_output_file(self, value): # self._alignment_output_file = value @property def sequence_cutoff(self): return self._sequence_cutoff @sequence_cutoff.setter def sequence_cutoff(self, value): self.mask = frustration.compute_mask(self.distance_matrix, self.distance_cutoff, self.sequence_cutoff) self._sequence_cutoff = value self._native_energy = None self._decoy_fluctuation = {} @property def distance_cutoff(self): return self._distance_cutoff @distance_cutoff.setter def distance_cutoff(self, value): self.mask = frustration.compute_mask(self.distance_matrix, self.distance_cutoff, self.sequence_cutoff) self._distance_cutoff = value self._native_energy = None self._decoy_fluctuation = {} @property def distance_matrix_method(self): return self._distance_matrix_method @distance_matrix_method.setter def distance_matrix_method(self, value): self.distance_matrix = pdb.get_distance_matrix(self._pdb_file, self._chain, value) self.mask = frustration.compute_mask(self.distance_matrix, self.distance_cutoff, self.sequence_cutoff) self._distance_matrix_method = value self._native_energy = None self._decoy_fluctuation = {} @property def potts_model_file(self): return self._potts_model_file @potts_model_file.setter def potts_model_file(self, value): if value == None: print("Generating PDB alignment using Jackhmmer") align.create_alignment_jackhmmer(self.sequence, self.pdb_name, output_file="dcaf_{}_alignment.sto".format(self.pdb_name)) filter.convert_and_filter_alignment(self.pdb_name) dca.matlab.compute_plm(self.pdb_name) raise ValueError("Need to generate potts model") else: self.potts_model = dca.matlab.load_potts_model(value) self._potts_model_file = value self._native_energy = None self._decoy_fluctuation = {} @property def potts_model(self): return self._potts_model @potts_model.setter def potts_model(self, value): self._potts_model = value self._potts_model_file = None self._native_energy = None self._decoy_fluctuation = {}