Spaces:
Sleeping
Sleeping
Commit
·
cd60b33
1
Parent(s):
8336e50
Update code/alphafold_featureVector.py
Browse files
code/alphafold_featureVector.py
CHANGED
|
@@ -607,11 +607,8 @@ def alphafold(input_set, mode, impute):
|
|
| 607 |
pdbSequence, Path(path_to_output_files / 'alignment_files'))
|
| 608 |
|
| 609 |
pdb_alignStatus = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[0]
|
| 610 |
-
st.write('alignment_list---')
|
| 611 |
info_per_model[mod]['pdb_alignStatus'] = pdb_alignStatus
|
| 612 |
-
|
| 613 |
-
st.write(uniprot_matched.at[i, 'pos'])
|
| 614 |
-
st.write(pdb_alignStatus)
|
| 615 |
mutationPositionOnPDB = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[1]
|
| 616 |
info_per_model[mod]['mutationPositionOnPDB'] = mutationPositionOnPDB
|
| 617 |
startGap = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[2]
|
|
@@ -629,7 +626,6 @@ def alphafold(input_set, mode, impute):
|
|
| 629 |
info_per_model[mod][annot] = annotation_pos_on_pdb_
|
| 630 |
|
| 631 |
|
| 632 |
-
st.write('Downloading the model from ASCARIS dataset.')
|
| 633 |
pdb_path = hf_hub_download(repo_id="HuBioDataLab/AlphafoldStructures", filename=f"AF-{uniprotID}-F{mod}-model_v4.pdb.gz",repo_type = 'dataset')
|
| 634 |
|
| 635 |
|
|
@@ -637,7 +633,6 @@ def alphafold(input_set, mode, impute):
|
|
| 637 |
# file_content = f.read()
|
| 638 |
# st.write(file_content)
|
| 639 |
|
| 640 |
-
st.write('Download complete.')
|
| 641 |
|
| 642 |
|
| 643 |
#st.write(get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan', 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
|
@@ -648,14 +643,11 @@ def alphafold(input_set, mode, impute):
|
|
| 648 |
|
| 649 |
|
| 650 |
pdbSequence = convert_non_standard_amino_acids(pdbSequence)
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
st.write('Hello I am in 3Dalignment')
|
| 654 |
|
| 655 |
atomSequence = ''
|
| 656 |
coords = []
|
| 657 |
resnums_for_sasa = []
|
| 658 |
-
st.write('Hello I am in 3Dalignment GZIP')
|
| 659 |
with gzip.open(pdb_path, mode='rb') as f:
|
| 660 |
|
| 661 |
for line in f:
|
|
@@ -668,7 +660,6 @@ def alphafold(input_set, mode, impute):
|
|
| 668 |
atomSequence += threeToOne(line[17:20].strip())
|
| 669 |
coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
|
| 670 |
resnums_for_sasa.append(line[22:26].strip())
|
| 671 |
-
st.write('o-complee')
|
| 672 |
#f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
|
| 673 |
aligner.mode = 'local'
|
| 674 |
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
|
|
@@ -681,7 +672,6 @@ def alphafold(input_set, mode, impute):
|
|
| 681 |
# 'gzip') != None:
|
| 682 |
|
| 683 |
if alignments != None:
|
| 684 |
-
st.write('I am here')
|
| 685 |
#alignments, coords, resnums_for_sasa = get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan',
|
| 686 |
# 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
| 687 |
# 'gzip')
|
|
@@ -693,10 +683,8 @@ def alphafold(input_set, mode, impute):
|
|
| 693 |
#fullID = f'AF-{uniprotID}-F{mod}-model_v4.pdb.gz'
|
| 694 |
#st.write(fullID)
|
| 695 |
run_freesasa(pdb_path, Path(path_to_output_files / f'freesasa_files/AF-{uniprotID}-F{mod}.txt'), include_hetatms=True,outdir=None, force_rerun=False)
|
| 696 |
-
st.write('Calculated')
|
| 697 |
#calculate_freesasa(uniprotID, mod, existing_free_sasa, alphafold_path, path_to_output_files)
|
| 698 |
if (mutationPositionOnPDB != 'nan'):
|
| 699 |
-
st.write('Here1')
|
| 700 |
if (int(mutationPositionOnPDB) <= 1400):
|
| 701 |
try:
|
| 702 |
coordMut = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[0]
|
|
@@ -707,12 +695,7 @@ def alphafold(input_set, mode, impute):
|
|
| 707 |
coordMut = np.NaN
|
| 708 |
|
| 709 |
sasa_pos = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[2]
|
| 710 |
-
st.write('sasa_pos', sasa_pos)
|
| 711 |
-
st.write('existing_free_sasa', existing_free_sasa)
|
| 712 |
-
st.write('existing_free_sasa', existing_free_sasa)
|
| 713 |
|
| 714 |
-
|
| 715 |
-
st.write('all', list(Path(path_to_output_files / 'freesasa_files').glob("*")))
|
| 716 |
|
| 717 |
|
| 718 |
if sasa_pos != np.NaN:
|
|
@@ -720,22 +703,20 @@ def alphafold(input_set, mode, impute):
|
|
| 720 |
for filename in list(Path(path_to_output_files / 'freesasa_files').glob("*"))[1:]:
|
| 721 |
|
| 722 |
try:
|
| 723 |
-
st.write(list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper())
|
| 724 |
fname = list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper()
|
| 725 |
except IndexError:
|
| 726 |
|
| 727 |
st.write('IndexError')
|
| 728 |
fname = ''
|
| 729 |
-
st.write(uniprotID , fname, uniprotID == fname)
|
| 730 |
if uniprotID == fname:
|
| 731 |
files = open(filename, 'r')
|
| 732 |
file = files.readlines()
|
| 733 |
for k in file:
|
| 734 |
-
|
| 735 |
if str(k.strip()[10:13].strip()) == str(sasa_pos):
|
| 736 |
st.write('WHY ')
|
| 737 |
st.write(str(k[4:7].strip()))
|
| 738 |
st.write('WHY 2')
|
|
|
|
| 739 |
sy.write('kk',k[4:7])
|
| 740 |
residue = str(k[4:7].strip())
|
| 741 |
|
|
|
|
| 607 |
pdbSequence, Path(path_to_output_files / 'alignment_files'))
|
| 608 |
|
| 609 |
pdb_alignStatus = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[0]
|
|
|
|
| 610 |
info_per_model[mod]['pdb_alignStatus'] = pdb_alignStatus
|
| 611 |
+
|
|
|
|
|
|
|
| 612 |
mutationPositionOnPDB = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[1]
|
| 613 |
info_per_model[mod]['mutationPositionOnPDB'] = mutationPositionOnPDB
|
| 614 |
startGap = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[2]
|
|
|
|
| 626 |
info_per_model[mod][annot] = annotation_pos_on_pdb_
|
| 627 |
|
| 628 |
|
|
|
|
| 629 |
pdb_path = hf_hub_download(repo_id="HuBioDataLab/AlphafoldStructures", filename=f"AF-{uniprotID}-F{mod}-model_v4.pdb.gz",repo_type = 'dataset')
|
| 630 |
|
| 631 |
|
|
|
|
| 633 |
# file_content = f.read()
|
| 634 |
# st.write(file_content)
|
| 635 |
|
|
|
|
| 636 |
|
| 637 |
|
| 638 |
#st.write(get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan', 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
|
|
|
| 643 |
|
| 644 |
|
| 645 |
pdbSequence = convert_non_standard_amino_acids(pdbSequence)
|
| 646 |
+
|
|
|
|
|
|
|
| 647 |
|
| 648 |
atomSequence = ''
|
| 649 |
coords = []
|
| 650 |
resnums_for_sasa = []
|
|
|
|
| 651 |
with gzip.open(pdb_path, mode='rb') as f:
|
| 652 |
|
| 653 |
for line in f:
|
|
|
|
| 660 |
atomSequence += threeToOne(line[17:20].strip())
|
| 661 |
coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
|
| 662 |
resnums_for_sasa.append(line[22:26].strip())
|
|
|
|
| 663 |
#f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
|
| 664 |
aligner.mode = 'local'
|
| 665 |
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
|
|
|
|
| 672 |
# 'gzip') != None:
|
| 673 |
|
| 674 |
if alignments != None:
|
|
|
|
| 675 |
#alignments, coords, resnums_for_sasa = get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan',
|
| 676 |
# 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
| 677 |
# 'gzip')
|
|
|
|
| 683 |
#fullID = f'AF-{uniprotID}-F{mod}-model_v4.pdb.gz'
|
| 684 |
#st.write(fullID)
|
| 685 |
run_freesasa(pdb_path, Path(path_to_output_files / f'freesasa_files/AF-{uniprotID}-F{mod}.txt'), include_hetatms=True,outdir=None, force_rerun=False)
|
|
|
|
| 686 |
#calculate_freesasa(uniprotID, mod, existing_free_sasa, alphafold_path, path_to_output_files)
|
| 687 |
if (mutationPositionOnPDB != 'nan'):
|
|
|
|
| 688 |
if (int(mutationPositionOnPDB) <= 1400):
|
| 689 |
try:
|
| 690 |
coordMut = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[0]
|
|
|
|
| 695 |
coordMut = np.NaN
|
| 696 |
|
| 697 |
sasa_pos = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[2]
|
|
|
|
|
|
|
|
|
|
| 698 |
|
|
|
|
|
|
|
| 699 |
|
| 700 |
|
| 701 |
if sasa_pos != np.NaN:
|
|
|
|
| 703 |
for filename in list(Path(path_to_output_files / 'freesasa_files').glob("*"))[1:]:
|
| 704 |
|
| 705 |
try:
|
|
|
|
| 706 |
fname = list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper()
|
| 707 |
except IndexError:
|
| 708 |
|
| 709 |
st.write('IndexError')
|
| 710 |
fname = ''
|
|
|
|
| 711 |
if uniprotID == fname:
|
| 712 |
files = open(filename, 'r')
|
| 713 |
file = files.readlines()
|
| 714 |
for k in file:
|
|
|
|
| 715 |
if str(k.strip()[10:13].strip()) == str(sasa_pos):
|
| 716 |
st.write('WHY ')
|
| 717 |
st.write(str(k[4:7].strip()))
|
| 718 |
st.write('WHY 2')
|
| 719 |
+
st.write('kk', k)
|
| 720 |
sy.write('kk',k[4:7])
|
| 721 |
residue = str(k[4:7].strip())
|
| 722 |
|