|
|
import os |
|
|
from typing import List |
|
|
|
|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
from datasets import load_dataset, Dataset |
|
|
|
|
|
repo = 'hugging-science/m-boltz-submissions' |
|
|
CONFIGS = {'antibody':'Antibody–Antigen', 'ligand':'Allosteric–Orthosteric', 'final':'final'} |
|
|
|
|
|
|
|
|
COLUMNS = { |
|
|
"antibody": [ |
|
|
"group_name", |
|
|
"successful", |
|
|
"high", |
|
|
"medium", |
|
|
"acceptable", |
|
|
], |
|
|
"ligand": [ |
|
|
"group_name", |
|
|
"rmsd_top1_all", |
|
|
"rmsd_top1_allosteric", |
|
|
"rmsd_top1_orthosteric", |
|
|
"rmsd_top5_all", |
|
|
"structures_under_2a", |
|
|
], |
|
|
"final": [ |
|
|
"group_name", |
|
|
"repository_url", |
|
|
"commit_sha", |
|
|
"challenge_type", |
|
|
"description_link", |
|
|
], |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def _hf_token() -> str: |
|
|
token = os.getenv("HF_TOKEN") |
|
|
if not token: |
|
|
raise RuntimeError( |
|
|
"Missing HF_TOKEN. Add a write-enabled token in your Space secrets." |
|
|
) |
|
|
return token |
|
|
|
|
|
|
|
|
def _empty_df(columns: List[str]) -> pd.DataFrame: |
|
|
return pd.DataFrame(columns=columns) |
|
|
|
|
|
|
|
|
def load_df(config: str, columns: List[str]) -> pd.DataFrame: |
|
|
"""Load the 'train' split from a Hub dataset into a pandas DataFrame. |
|
|
Returns an empty DataFrame with the expected columns if the dataset doesn't exist yet. |
|
|
""" |
|
|
try: |
|
|
ds = load_dataset(repo, config, split="train", token=_hf_token()) |
|
|
df = ds.to_pandas() |
|
|
|
|
|
for c in columns: |
|
|
if c not in df.columns: |
|
|
df[c] = pd.NA |
|
|
return df[columns] |
|
|
except Exception: |
|
|
|
|
|
return _empty_df(columns) |
|
|
|
|
|
|
|
|
def push_df(config: str, df: pd.DataFrame) -> None: |
|
|
"""Overwrite the dataset's 'train' split on the Hub with the provided DataFrame. |
|
|
If the repo doesn't exist, this will create it under your account/org. |
|
|
""" |
|
|
|
|
|
ds = Dataset.from_pandas(df.reset_index(drop=True), preserve_index=False) |
|
|
|
|
|
ds.push_to_hub(repo, config_name=config, token=_hf_token()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def submit_antibody(group_name, successful, high, medium, acceptable): |
|
|
config = CONFIGS["antibody"] |
|
|
cols = COLUMNS["antibody"] |
|
|
df = load_df(config, cols) |
|
|
row = { |
|
|
"group_name": group_name or "", |
|
|
"successful": int(successful) if successful is not None else 0, |
|
|
"high": int(high) if high is not None else 0, |
|
|
"medium": int(medium) if medium is not None else 0, |
|
|
"acceptable": int(acceptable) if acceptable is not None else 0, |
|
|
} |
|
|
|
|
|
if group_name and not df.empty: |
|
|
df = df[df["group_name"] != group_name] |
|
|
df = pd.concat([df, pd.DataFrame([row])], ignore_index=True) |
|
|
push_df(config, df) |
|
|
|
|
|
return load_df(config, cols) |
|
|
|
|
|
|
|
|
def refresh_antibody(): |
|
|
return load_df(CONFIGS["antibody"], COLUMNS["antibody"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def submit_ligand(group_name, rmsd_top1_all, rmsd_top1_allosteric, rmsd_top1_orthosteric, rmsd_top5_all, structures_under_2a): |
|
|
config = CONFIGS["ligand"] |
|
|
cols = COLUMNS["ligand"] |
|
|
df = load_df(config, cols) |
|
|
row = { |
|
|
"group_name": group_name or "", |
|
|
"rmsd_top1_all": float(rmsd_top1_all) if rmsd_top1_all is not None else None, |
|
|
"rmsd_top1_allosteric": float(rmsd_top1_allosteric) if rmsd_top1_allosteric is not None else None, |
|
|
"rmsd_top1_orthosteric": float(rmsd_top1_orthosteric) if rmsd_top1_orthosteric is not None else None, |
|
|
"rmsd_top5_all": float(rmsd_top5_all) if rmsd_top5_all is not None else None, |
|
|
"structures_under_2a": int(structures_under_2a) if structures_under_2a is not None else 0, |
|
|
} |
|
|
|
|
|
if group_name and not df.empty: |
|
|
df = df[df["group_name"] != group_name] |
|
|
df = pd.concat([df, pd.DataFrame([row])], ignore_index=True) |
|
|
push_df(config, df) |
|
|
return load_df(config, cols) |
|
|
|
|
|
|
|
|
def refresh_ligand(): |
|
|
return load_df(CONFIGS["ligand"], COLUMNS["ligand"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def submit_final(group_name, repository_url, commit_sha, challenge_type, description_link): |
|
|
config = CONFIGS["final"] |
|
|
cols = COLUMNS["final"] |
|
|
df = load_df(config, cols) |
|
|
row = { |
|
|
"group_name": group_name or "", |
|
|
"repository_url": repository_url or "", |
|
|
"commit_sha": commit_sha or "", |
|
|
"challenge_type": challenge_type or "", |
|
|
"description_link": description_link or "", |
|
|
} |
|
|
|
|
|
if group_name and not df.empty: |
|
|
df = df[df["group_name"] != group_name] |
|
|
df = pd.concat([df, pd.DataFrame([row])], ignore_index=True) |
|
|
push_df(config, df) |
|
|
return load_df(config, cols) |
|
|
|
|
|
|
|
|
def refresh_final(): |
|
|
return load_df(CONFIGS["final"], COLUMNS["final"]) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Binding Challenges") as app: |
|
|
gr.Markdown("""# M-Boltz Hackathon\n |
|
|
Welcome to the M-Boltz Hack! This is the submission portal for the two binding challenges: Antibody–Antigen Binding and Allosteric–Orthosteric Ligand Binding.\n |
|
|
Please use the respective tabs to submit your results and view the leaderboard for each challenge. Once you are ready to make your final submission, please use the 'Final Submission' tab. If you have any questions or issues, don't hesistate to open a Discussion in the Community tab.\n |
|
|
""") |
|
|
|
|
|
with gr.Tab("Antibody–Antigen Binding Challenge"): |
|
|
gr.Markdown(""" |
|
|
# Antibody-Antigen Binding Challenge |
|
|
The goal of this challenge is to improve Boltz-2 accuracy for predicting the correct poses of a VHH binding to an antigen.\n |
|
|
Accuracy will be measured through the Capri-Q docking assessment classification scores and the final winner will be determined based on the number of successful top-1 predictions on our *internal* test set. However, you are encouraged to submit results on the training set during the hack to see where you stack up.\n |
|
|
A prediction is deemed successful if the Capri-Q classification is either "high", "medium", or "acceptable". |
|
|
If multiple entries reach the same number of successful predictions, ties are broken by looking at the number of predictions with "High" classification, then with "Medium" classification and finally with "Acceptable" classification. |
|
|
If there is still a tie then, we will look at the mean RMSD across all successful predictions. |
|
|
""") |
|
|
aa_group = gr.Textbox(label="Group Name", placeholder="Your group name") |
|
|
with gr.Row(): |
|
|
aa_successful = gr.Number(label="#Successful", value=0, precision=0) |
|
|
aa_high = gr.Number(label="#High", value=0, precision=0) |
|
|
aa_medium = gr.Number(label="#Medium", value=0, precision=0) |
|
|
aa_acceptable = gr.Number(label="#Acceptable", value=0, precision=0) |
|
|
with gr.Row(): |
|
|
aa_submit = gr.Button("Submit") |
|
|
aa_refresh = gr.Button("Refresh table") |
|
|
aa_df = gr.Dataframe( |
|
|
value=load_df(CONFIGS["antibody"], COLUMNS["antibody"]), |
|
|
label="Submissions (Antibody–Antigen)", |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
) |
|
|
aa_submit.click( |
|
|
submit_antibody, |
|
|
inputs=[aa_group, aa_successful, aa_high, aa_medium, aa_acceptable], |
|
|
outputs=aa_df, |
|
|
) |
|
|
aa_refresh.click(refresh_antibody, outputs=aa_df) |
|
|
|
|
|
with gr.Tab("Allosteric–Orthosteric Ligand Binding Challenge"): |
|
|
gr.Markdown(""" |
|
|
# Allosteric-Orthosteric Ligand Binding Challenge |
|
|
The goal of this challenge is to improve Boltz-2 accuracy for predicting the binding poses of either allosteric or orthosteric ligands.\n |
|
|
The winner will be determined by accuracy measured on our *internal* test set by calculating the RMSD between the top-1 prediction and the experimental pose. However, submit your intermediate results here to see where you stack up! |
|
|
""") |
|
|
li_group = gr.Textbox(label="Group Name", placeholder="Your group name") |
|
|
with gr.Row(): |
|
|
li_rmsd_top1_all = gr.Number(label="RMSD top-1 (all structures)") |
|
|
li_rmsd_top1_allosteric = gr.Number(label="RMSD top-1 (allosteric)") |
|
|
li_rmsd_top1_orthosteric = gr.Number(label="RMSD top-1 (orthosteric)") |
|
|
with gr.Row(): |
|
|
li_rmsd_top5_all = gr.Number(label="RMSD top-5 (all structures)") |
|
|
li_structures_under_2a = gr.Number(label="#structures with RMSD < 2A", value=0, precision=0) |
|
|
with gr.Row(): |
|
|
li_submit = gr.Button("Submit") |
|
|
li_refresh = gr.Button("Refresh table") |
|
|
li_df = gr.Dataframe( |
|
|
value=load_df(CONFIGS["ligand"], COLUMNS["ligand"]), |
|
|
label="Submissions (Ligand Binding)", |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
) |
|
|
li_submit.click( |
|
|
submit_ligand, |
|
|
inputs=[li_group, li_rmsd_top1_all, li_rmsd_top1_allosteric, li_rmsd_top1_orthosteric, li_rmsd_top5_all, li_structures_under_2a], |
|
|
outputs=li_df, |
|
|
) |
|
|
li_refresh.click(refresh_ligand, outputs=li_df) |
|
|
|
|
|
with gr.Tab("Final Submission"): |
|
|
fs_group = gr.Textbox(label="Group Name", placeholder="Your group name") |
|
|
fs_repo_url = gr.Textbox(label="Repository URL") |
|
|
fs_commit_sha = gr.Textbox(label="Commit SHA") |
|
|
fs_challenge_type = gr.Radio( |
|
|
["allosteric-orthosteric binding", "antibody-antigen binding"], |
|
|
label="Challenge Type" |
|
|
) |
|
|
fs_description_link = gr.Textbox(label="Link to Markdown Description", placeholder="Link to a markdown page in your repo") |
|
|
with gr.Row(): |
|
|
fs_submit = gr.Button("Submit") |
|
|
fs_refresh = gr.Button("Refresh table") |
|
|
fs_df = gr.Dataframe( |
|
|
value=load_df(CONFIGS["final"], COLUMNS["final"]), |
|
|
label="Final submissions", |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
) |
|
|
fs_submit.click( |
|
|
submit_final, |
|
|
inputs=[fs_group, fs_repo_url, fs_commit_sha, fs_challenge_type, fs_description_link], |
|
|
outputs=fs_df, |
|
|
) |
|
|
fs_refresh.click(refresh_final, outputs=fs_df) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |