Source code for frustratometer.dca.pydca

import numpy as np
import pydca
from typing import Union
from pathlib import Path

[docs] def plmdca(filtered_alignment_file : str, sequence_type : str='protein', seqid : float =0.8, lambda_h : float=1.0, lambda_J : float=20.0, num_threads : int =10, max_iterations : int=500, verbose : bool=False, msa_file_format : str='fasta')-> dict: plmdca_inst = pydca.plmdca.PlmDCA(msa_file=filtered_alignment_file, biomolecule=sequence_type, seqid = seqid, lambda_h=lambda_h, lambda_J = lambda_J, max_iterations = max_iterations, num_threads = num_threads, verbose = verbose, msa_file_format=msa_file_format) potts_model = plmdca_inst.get_potts_model() # Move gaps to the beginning qq = [20, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] potts_model['h'] = potts_model['h'][:,qq] potts_model['J'] = potts_model['J'][:,:,qq,:][:,:,:,qq] potts_model['L'] = plmdca_inst.num_sequences return potts_model
[docs] def mfdca(filtered_alignment_file : str)->dict: mfdca_inst = pydca.meanfield_dca.MeanFieldDCA(filtered_alignment_file,'protein') N = mfdca_inst.sequences_len q = mfdca_inst.num_site_states reg_fi = mfdca_inst.get_reg_single_site_freqs() reg_fij = mfdca_inst.get_reg_pair_site_freqs() corr_mat = mfdca_inst.construct_corr_mat(reg_fi, reg_fij) couplings = mfdca_inst.compute_couplings(corr_mat) fields = mfdca_inst.compute_fields(couplings) couplings=couplings.reshape(N,q-1,N,q-1) fields = np.array([a for a in fields.values()]) fields = np.concatenate([np.zeros([N,1]),fields],axis=1) couplings = np.concatenate([np.zeros([N,1,N,q-1]),couplings],axis=1) couplings = np.concatenate([np.zeros([N,q,N,1]),couplings],axis=3) couplings = couplings.transpose(0,2,1,3) potts_model = {'h':fields,'J':couplings,'N':N,'q':q} return potts_model
__all__ = ['plmdca', 'mfdca']