Spaces:

HUBioDataLab
/

ASCARIS

Sleeping

App Files Files Community

fatmacankara commited on Aug 25, 2023

Commit

b9d0e9c

1 Parent(s): 298b080

Update code/add_3Dalignment.py

Browse files

Files changed (1) hide show

code/add_3Dalignment.py +68 -6

code/add_3Dalignment.py CHANGED Viewed

@@ -11,6 +11,35 @@ import gzip
 from pathlib import Path
 from Bio.Align import substitution_matrices
 aligner = Align.PairwiseAligner()
 def distance(x1, y1, z1, x2, y2, z2):
     d = math.sqrt(math.pow(x2 - x1, 2) +
@@ -186,7 +215,40 @@ def get_coords(annot, alignments, coords, resnums_for_sasa, mode):
 def get_alignments_3D(identifier, model_num, pdb_path, pdbSequence, source, chain, pdbID, mode, path_3D_alignment,file_format = 'gzip'):
     if mode == 1:
         atomSequence = ''
         coords = []
         resnums_for_sasa = []
@@ -206,7 +268,7 @@ def get_alignments_3D(identifier, model_num, pdb_path, pdbSequence, source, chai
                         atomSequence += threeToOne(line[17:20].strip())
                         coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
                         resnums_for_sasa.append(line[22:26].strip())
         #f = open(Path(path_3D_alignment / f'{identifier}_{pdbID}_{str(chain)}_alignment.txt'),"w")
         aligner.mode = 'local'
@@ -249,15 +311,15 @@ def get_alignments_3D(identifier, model_num, pdb_path, pdbSequence, source, chai
                             atomSequence += threeToOne(line[17:20].strip())
                             coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
                             resnums_for_sasa.append(line[22:26].strip())
-            f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
             aligner.mode = 'local'
             aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
             aligner.open_gap_score = -11
             aligner.extend_gap_score = -1
             alignments = aligner.align(pdbSequence, atomSequence)
             alignments = (list(alignments))
-            for alignment in alignments:
-                f.write(str(alignment))
-                f.write('\n')
-                f.write('\n')
             return alignments, coords, resnums_for_sasa

 from pathlib import Path
 from Bio.Align import substitution_matrices
 aligner = Align.PairwiseAligner()
+import requests
+from Bio.PDB import PDBParser, PPBuilder
+from io import StringIO
+from Bio.PDB.Polypeptide import *
+def convert_non_standard_amino_acids(sequence):
+    """
+    Convert non-standard or ambiguous amino acid codes to their closest relatives.
+    """
+    # Define a dictionary to map non-standard codes to standard amino acids
+    conversion_dict = {
+        'B': 'D',  # Aspartic Acid (D) is often used for B (Asx)
+        'Z': 'E',  # Glutamic Acid (E) is often used for Z (Glx)
+        'X': 'A',  # Alanine (A) is a common placeholder for unknown/ambiguous
+        'U': 'C',  # Cysteine (C) is often used for Selenocysteine (U)
+        'J': 'L',  # Leucine (L) is often used for J (Leu/Ile)
+        'O': 'K',  # Lysine (K) is often used for O (Pyrrolysine)
+        # '*' or 'Stop' represents a stop codon; you may replace with '' to remove
+        '*': '',
+    }
+    # Replace non-standard codes with their closest relatives
+    converted_sequence = ''.join([conversion_dict.get(aa, aa) for aa in sequence])
+    return converted_sequence
 def distance(x1, y1, z1, x2, y2, z2):
     d = math.sqrt(math.pow(x2 - x1, 2) +
 def get_alignments_3D(identifier, model_num, pdb_path, pdbSequence, source, chain, pdbID, mode, path_3D_alignment,file_format = 'gzip'):
+    st.write('I am here get alignments 3D')
+    uniprotSequence = convert_non_standard_amino_acids(uniprotSequence)
+    pdbSequence = convert_non_standard_amino_acids(pdbSequence)
     if mode == 1:
+        if source != 'modbase':
+            # Step 1: Fetch the PDB file
+            pdb_url = f"https://files.rcsb.org/download/{pdbID}.pdb"
+            response = requests.get(pdb_url)
+            response.raise_for_status()  # Check for a successful response
+            # Step 2: Parse the PDB file from memory
+            atoms = [i for i in response.text.split('\n') if i.startswith('ATOM')]
+            atoms = [i.split() for i in atoms]
+            atoms = [i for i in atoms if (i[2] == 'CA' and i[4]  == chain)]
+            atomSequence = ''.join([three_to_one(i[3]) for i in atoms])
+            coords = [[i[6] ,i[7] ,i[8]] for i in atoms]
+            resnums_for_sasa = [i[5] for i in atoms]
+        else:
+            pdb_url = f"https://files.rcsb.org/download/{pdb_code}.pdb"
+            response = requests.get(pdbID)
+            response.raise_for_status()  # Check for a successful response
+            # Step 2: Parse the PDB file from memory
+            atoms = [i for i in response.text.split('\n') if i.startswith('ATOM')]
+            atoms = [i.split() for i in atoms]
+            atoms = [i for i in atoms if i[2] == 'CA']
+            atomSequence = ''.join([three_to_one(i[3]) for i in atoms])
+            coords = [[i[6] ,i[7] ,i[8]] for i in atoms]
+            resnums_for_sasa = [i[5] for i in atoms]
+        """
         atomSequence = ''
         coords = []
         resnums_for_sasa = []
                         atomSequence += threeToOne(line[17:20].strip())
                         coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
                         resnums_for_sasa.append(line[22:26].strip())
+        """
         #f = open(Path(path_3D_alignment / f'{identifier}_{pdbID}_{str(chain)}_alignment.txt'),"w")
         aligner.mode = 'local'
                             atomSequence += threeToOne(line[17:20].strip())
                             coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
                             resnums_for_sasa.append(line[22:26].strip())
+            #f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
             aligner.mode = 'local'
             aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
             aligner.open_gap_score = -11
             aligner.extend_gap_score = -1
             alignments = aligner.align(pdbSequence, atomSequence)
             alignments = (list(alignments))
+            #for alignment in alignments:
+            #    f.write(str(alignment))
+            #    f.write('\n')
+            #    f.write('\n')
             return alignments, coords, resnums_for_sasa