Spaces:
Sleeping
Sleeping
Commit
·
d6533ab
1
Parent(s):
49b5fcf
Update code/alphafold_featureVector.py
Browse files- code/alphafold_featureVector.py +11 -14
code/alphafold_featureVector.py
CHANGED
|
@@ -117,32 +117,29 @@ def alphafold(input_set, mode, impute):
|
|
| 117 |
data = clean_data(input_set)
|
| 118 |
|
| 119 |
if len(data) == 0:
|
| 120 |
-
|
| 121 |
else:
|
| 122 |
data = add_uniprot_sequence(data)
|
| 123 |
-
st.write(data)
|
| 124 |
match = data[(data.wt_sequence_match == 'm')]
|
| 125 |
-
st.write(match)
|
| 126 |
org_len = len(match)
|
| 127 |
iso = data[(data.wt_sequence_match == 'i')]
|
| 128 |
noMatch = data[(data.wt_sequence_match != 'm') & (data.wt_sequence_match != 'i')]
|
| 129 |
-
st.write(noMatch)
|
| 130 |
if len(noMatch) == len(data) :
|
| 131 |
-
|
| 132 |
-
|
| 133 |
elif len(noMatch) > 0:
|
| 134 |
-
|
| 135 |
f'>> {len(noMatch)} of {len(data)} datapoints has not been mapped to any sequence. These datapoints are omitted.')
|
| 136 |
-
|
| 137 |
if len(iso) > 0:
|
| 138 |
-
|
| 139 |
-
|
| 140 |
if len(match) == 0:
|
| 141 |
-
|
| 142 |
-
|
| 143 |
else:
|
| 144 |
-
|
| 145 |
-
|
| 146 |
if (len(iso) != 0) | (len(noMatch) != 0):
|
| 147 |
print('Omitted datapoints are:', noMatch.datapoint.to_list() + iso.datapoint.to_list())
|
| 148 |
data = match[['uniprotID', 'wt', 'pos', 'mut', 'datapoint']]
|
|
|
|
| 117 |
data = clean_data(input_set)
|
| 118 |
|
| 119 |
if len(data) == 0:
|
| 120 |
+
st.write('Feature vectore generation terminated. Please enter a query or check your input format.')
|
| 121 |
else:
|
| 122 |
data = add_uniprot_sequence(data)
|
|
|
|
| 123 |
match = data[(data.wt_sequence_match == 'm')]
|
|
|
|
| 124 |
org_len = len(match)
|
| 125 |
iso = data[(data.wt_sequence_match == 'i')]
|
| 126 |
noMatch = data[(data.wt_sequence_match != 'm') & (data.wt_sequence_match != 'i')]
|
|
|
|
| 127 |
if len(noMatch) == len(data) :
|
| 128 |
+
st.write('>> Aminoacid at the position could not be mapped to canonical or isoform sequence. Please check the input amino acid.')
|
| 129 |
+
st.write('\n')
|
| 130 |
elif len(noMatch) > 0:
|
| 131 |
+
st.write(
|
| 132 |
f'>> {len(noMatch)} of {len(data)} datapoints has not been mapped to any sequence. These datapoints are omitted.')
|
| 133 |
+
st.write('\n')
|
| 134 |
if len(iso) > 0:
|
| 135 |
+
st.write(f'>> {len(iso)} of {len(data)} datapoints has been mapped to isoform sequences. These datapoints are omitted.')
|
| 136 |
+
st.write('\n')
|
| 137 |
if len(match) == 0:
|
| 138 |
+
st.write('>> Feature generation terminated due to failed mapping of input amino acid to UniProt sequence.')
|
| 139 |
+
st.write('\n')
|
| 140 |
else:
|
| 141 |
+
st.write(f'>> {len(match)} of {len(data)} datapoints has been mapped to canonical sequences. Proceeding with these datapoins.')
|
| 142 |
+
st.write('\n')
|
| 143 |
if (len(iso) != 0) | (len(noMatch) != 0):
|
| 144 |
print('Omitted datapoints are:', noMatch.datapoint.to_list() + iso.datapoint.to_list())
|
| 145 |
data = match[['uniprotID', 'wt', 'pos', 'mut', 'datapoint']]
|