Rurociągowy bioinformatyka strukturalna
#!/bin/bash
echo "1] Performing Blast Search..."
blastp -query target.fa -db $uniprot -out blast_uni.out
echo "OK!"
echo
echo "2] Obtaining HMM profile..."
hmmscan -o pfam.out --tblout profile.out $pfam target.fa
family=$(cat profile.out | awk 'NR==4' | awk '{print $1}')
echo $family
hmmfetch $pfam $family > domain.hmm
echo "OK!"
echo
echo "3] Searching sequences with known structure using HMM profile..."
hmmsearch domain.hmm $pdb > pdb_hmm.out
structures=$(grep -m 1 -A 6 "E-value" pdb_hmm.out | tail -n +3 | awk '{print $9}')
echo $structures
echo "OK!"
echo
echo "4] Align obtained sequences with the target using HMM profile..."
cat target.fa > structures.fa
for i in $structures
do
id=$(echo $i | cut -d '_' -f1)
if [ -f "$id.pdb" ]; then
break
else
wget https://files.rcsb.org/download/$id.pdb &> /dev/null
$splitchain -i $id.pdb -o $id
id2=$(echo $i | tr -d _)
cat $id2.fa >> structures.fa
fi
done
hmmalign domain.hmm structures.fa > hmm_alignment.aln
cp hmm_alignment.aln hmm_alignment.sto
perl /shared/PERL/aconvertMod2.pl -in h -out c <hmm_alignment.sto>hmm_alignment.clu
echo "OK!"
Dante Aviñó