Ekjaer
/

MLstructureMining

Tabular Classification

pair-distribution-function

Model card Files Files and versions

Ekjaer commited on Mar 3, 2024

Commit

750ff86

·

verified ·

1 Parent(s): 9345112

Update README.md

Files changed (1) hide show

README.md +42 -0

README.md CHANGED Viewed

@@ -45,7 +45,45 @@ Use the code below to get started with the model.
 import xgboost as xgb
 import pandas as pd
 N_CPU = 8  # Number of CPUs used
 # Load model
 bst = xgb.Booster({'nthread': N_CPU})
@@ -60,6 +98,10 @@ labels = pd.read_csv("labels.csv", index_col=0)
 # Do inference
 pred = bst.predict(data_xgb)
 ```

 import xgboost as xgb
 import pandas as pd
+def show_best(pred: np.ndarray,
+              best_list: np.ndarray,
+              df_stru_catalog: pd.DataFrame,
+              num_show: int) -> None:
+    """
+    Display the best predictions based on the model output.
+    Parameters
+    ----------
+    pred : np.ndarray
+        Predictions from the model.
+    best_list : np.ndarray
+        List of best predictions.
+    df_stru_catalog : pd.DataFrame
+        The structure catalog associated with the model.
+    num_show : int
+        Number of top predictions to show.
+    Returns
+    -------
+    None
+    """
+    for count, idx in enumerate(reversed(best_list[-num_show:])):
+        print(f"\n{count}) Probability: {pred[idx]*100:3.1f}%")
+        compo = clean_string(df_stru_catalog.iloc[idx]["composition"])
+        sgs = clean_string(df_stru_catalog.iloc[idx]["space_group_symmetry"])
+        print(f'    COD-IDs: {df_stru_catalog.iloc[idx]["Label"].rsplit(".",1)[0]}, composition: {compo[0]}, space group: {sgs[0]}')
+        if not pd.isna(df_stru_catalog.at[idx, "Similar"]):
+            similar_files = extract_filenames(df_stru_catalog.at[idx, "Similar"])
+            compo = clean_string(df_stru_catalog.iloc[idx]["composition"])
+            sgs = clean_string(df_stru_catalog.iloc[idx]["space_group_symmetry"])
+            for jdx in range(len(similar_files)):
+                print(f'    COD-IDs: {similar_files[jdx]}, composition: {compo[jdx]}, space group: {sgs[jdx]}')
 N_CPU = 8  # Number of CPUs used
+NUM_SHOW = 5  # Show to X best predictions
 # Load model
 bst = xgb.Booster({'nthread': N_CPU})
 # Do inference
 pred = bst.predict(data_xgb)
+# Show
+best_list = np.argsort(pred)
+show_best(pred[0], best_list[0], df_stru_catalog, NUM_SHOW)
 ```