| # (Optional) Activate your Poetry environment | |
| poetry shell | |
| # Generate a timestamp string (e.g., 20230404123056) | |
| TS=$(date '+%Y%m%d%H%M%S') | |
| CONFIG_FILE="configs/preprocessing_config_${TS}.json" | |
| # 1) Generate a JSON config file on the fly | |
| cat <<EOF > "$CONFIG_FILE" | |
| { | |
| "load_dir": "data", | |
| "save_dir": "data/processed", | |
| "min_gene_counts": null, | |
| "remove_assays": [], | |
| "max_mitochondrial_prop": null, | |
| "remove_cell_types": [], | |
| "hvg_method": null, | |
| "normalized_total": null, | |
| "median_dict": "teddy/data_processing/utils/medians/data/teddy_gene_medians.json", | |
| "log1p": false, | |
| "compute_medians": false, | |
| "median_column": "index", | |
| "reference_id_only": false | |
| } | |
| EOF | |
| # 2) Call preprocess.py, explicitly passing data_path, metadata_path, and config_path | |
| python teddy/data_processing/preprocessing/preprocess.py \ | |
| --data_path data/sample_data.h5ad \ | |
| --metadata_path data/sample_data_metadata.json \ | |
| --config_path "$CONFIG_FILE" | |