diff --git a/model.safetensors b/model.safetensors index 81357d66171f67ecbb3eca96c0f1ac33caffece9..88bc3d9e89cb4f170691aadaa3e0c732b5ac145e 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:526abd51fabadb3de567d0815ed209d5fa39e63259b23259d890655210d93882 +oid sha256:a1c0c21b3fde6eae3f5b0efab95e70962e03a706b8b735830938126492a75a97 size 3554214752 diff --git a/reward_data/all_rewards.csv b/reward_data/all_rewards.csv index 4df7385dd0291065939a232ad3c3d2308560eb05..47893dce04745bbcdb13880ec7a6150dfc4b0c09 100644 --- a/reward_data/all_rewards.csv +++ b/reward_data/all_rewards.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43c4b9eba74a847869fcf0cecbd263bb7249a038a139abb0e0a73e8c6d1251a0 -size 165166389 +oid sha256:7975414281b33335edf63f902814e9f8652d26def3823295373a47439f917c43 +size 180446461 diff --git a/reward_plots/advantage_plot_step_250.png b/reward_plots/advantage_plot_step_250.png new file mode 100644 index 0000000000000000000000000000000000000000..db31556f4d905fe3f5a5b1a91899a89d32fefff5 Binary files /dev/null and b/reward_plots/advantage_plot_step_250.png differ diff --git a/reward_plots/advantage_plot_step_260.png b/reward_plots/advantage_plot_step_260.png new file mode 100644 index 0000000000000000000000000000000000000000..af6aa6d6ee6d744d344e8ab7e342633e33549a31 Binary files /dev/null and b/reward_plots/advantage_plot_step_260.png differ diff --git a/reward_plots/advantage_plot_step_270.png b/reward_plots/advantage_plot_step_270.png new file mode 100644 index 0000000000000000000000000000000000000000..d533f55ecc79e1d187c4a5655c72a2239c80fda1 Binary files /dev/null and b/reward_plots/advantage_plot_step_270.png differ diff --git a/reward_plots/advantage_plot_step_280.png b/reward_plots/advantage_plot_step_280.png new file mode 100644 index 0000000000000000000000000000000000000000..3d482a4d619d2a113dfdc7537a4f8108262f6aef Binary files /dev/null and b/reward_plots/advantage_plot_step_280.png differ diff --git a/reward_plots/advantage_plot_step_290.png b/reward_plots/advantage_plot_step_290.png new file mode 100644 index 0000000000000000000000000000000000000000..f38fdec816a434828ba8b0eaa1f9f1b107fca4cd Binary files /dev/null and b/reward_plots/advantage_plot_step_290.png differ diff --git a/reward_plots/advantage_plot_step_300.png b/reward_plots/advantage_plot_step_300.png new file mode 100644 index 0000000000000000000000000000000000000000..b9433d6deeb2317382e6ee4000a8e58083e06b78 Binary files /dev/null and b/reward_plots/advantage_plot_step_300.png differ diff --git a/reward_plots/advantage_plot_step_310.png b/reward_plots/advantage_plot_step_310.png new file mode 100644 index 0000000000000000000000000000000000000000..63ce9e07ef744382e1aa036b8e26f0fa9f1bbd1b Binary files /dev/null and b/reward_plots/advantage_plot_step_310.png differ diff --git a/reward_plots/advantage_plot_step_320.png b/reward_plots/advantage_plot_step_320.png new file mode 100644 index 0000000000000000000000000000000000000000..f40c752ebd6a6c0c89885d88f1c42e3ca6a058ae Binary files /dev/null and b/reward_plots/advantage_plot_step_320.png differ diff --git a/reward_plots/advantage_plot_step_330.png b/reward_plots/advantage_plot_step_330.png new file mode 100644 index 0000000000000000000000000000000000000000..2ac52bae97f67efa1c29fafda11a562ec0644199 Binary files /dev/null and b/reward_plots/advantage_plot_step_330.png differ diff --git a/reward_plots/advantage_plot_step_340.png b/reward_plots/advantage_plot_step_340.png new file mode 100644 index 0000000000000000000000000000000000000000..1bbffc53f6311cc037383c3bbab8be5658b31c5c Binary files /dev/null and b/reward_plots/advantage_plot_step_340.png differ diff --git a/reward_plots/advantage_plot_step_350.png b/reward_plots/advantage_plot_step_350.png new file mode 100644 index 0000000000000000000000000000000000000000..483f90fa7eee1edb9bc79fa91b1b3c61f9e51add Binary files /dev/null and b/reward_plots/advantage_plot_step_350.png differ diff --git a/reward_plots/advantage_plot_step_360.png b/reward_plots/advantage_plot_step_360.png new file mode 100644 index 0000000000000000000000000000000000000000..87576a883960442b51b6f6fcf52c89150e6becae Binary files /dev/null and b/reward_plots/advantage_plot_step_360.png differ diff --git a/reward_plots/advantage_plot_step_370.png b/reward_plots/advantage_plot_step_370.png new file mode 100644 index 0000000000000000000000000000000000000000..4e9de9f4c35de17ede0aed8716c458c1d28a68c7 Binary files /dev/null and b/reward_plots/advantage_plot_step_370.png differ diff --git a/reward_plots/advantage_plot_step_380.png b/reward_plots/advantage_plot_step_380.png new file mode 100644 index 0000000000000000000000000000000000000000..d4461e8925cc33934cabe353008c84640dc5c836 Binary files /dev/null and b/reward_plots/advantage_plot_step_380.png differ diff --git a/reward_plots/advantage_plot_step_390.png b/reward_plots/advantage_plot_step_390.png new file mode 100644 index 0000000000000000000000000000000000000000..d45742669f28affe6c5f6fd0dae62842b5b0bf24 Binary files /dev/null and b/reward_plots/advantage_plot_step_390.png differ diff --git a/reward_plots/advantage_plot_step_400.png b/reward_plots/advantage_plot_step_400.png new file mode 100644 index 0000000000000000000000000000000000000000..148cbb6eeb4f6b7a6c87bfc11cf2b306021e65ea Binary files /dev/null and b/reward_plots/advantage_plot_step_400.png differ diff --git a/reward_plots/advantage_plot_step_410.png b/reward_plots/advantage_plot_step_410.png new file mode 100644 index 0000000000000000000000000000000000000000..a82c7ce33ba6e48b311435f22fba1e554b897b57 Binary files /dev/null and b/reward_plots/advantage_plot_step_410.png differ diff --git a/reward_plots/advantage_plot_step_420.png b/reward_plots/advantage_plot_step_420.png new file mode 100644 index 0000000000000000000000000000000000000000..c69b5e95fb351b9c4578b2824bfd258d15eb3045 Binary files /dev/null and b/reward_plots/advantage_plot_step_420.png differ diff --git a/reward_plots/advantage_plot_step_430.png b/reward_plots/advantage_plot_step_430.png new file mode 100644 index 0000000000000000000000000000000000000000..be67cf322dba93e30bfeafa5ddcb69cc46e227b1 Binary files /dev/null and b/reward_plots/advantage_plot_step_430.png differ diff --git a/reward_plots/advantage_plot_step_440.png b/reward_plots/advantage_plot_step_440.png new file mode 100644 index 0000000000000000000000000000000000000000..ad5e3a595567fc27a27b9283f4bafe65c6a24d67 Binary files /dev/null and b/reward_plots/advantage_plot_step_440.png differ diff --git a/reward_plots/advantage_plot_step_450.png b/reward_plots/advantage_plot_step_450.png new file mode 100644 index 0000000000000000000000000000000000000000..3b140d65e2a04c1af7ae00fe38d9a1e5a6adfbda Binary files /dev/null and b/reward_plots/advantage_plot_step_450.png differ diff --git a/reward_plots/advantage_plot_step_460.png b/reward_plots/advantage_plot_step_460.png new file mode 100644 index 0000000000000000000000000000000000000000..0d7a6531c29d21a08915688d5dc8320367545ca0 Binary files /dev/null and b/reward_plots/advantage_plot_step_460.png differ diff --git a/reward_plots/advantage_plot_step_470.png b/reward_plots/advantage_plot_step_470.png new file mode 100644 index 0000000000000000000000000000000000000000..70211c464d2164ab225f89243ea458301a628675 Binary files /dev/null and b/reward_plots/advantage_plot_step_470.png differ diff --git a/reward_plots/advantage_plot_step_480.png b/reward_plots/advantage_plot_step_480.png new file mode 100644 index 0000000000000000000000000000000000000000..a15aaa4a4d2caf1d1e80a10c50f8e5101a871532 Binary files /dev/null and b/reward_plots/advantage_plot_step_480.png differ diff --git a/reward_plots/advantage_plot_step_490.png b/reward_plots/advantage_plot_step_490.png new file mode 100644 index 0000000000000000000000000000000000000000..4f79ce8da6aef92cbec17aabb88876c4e3d75c98 Binary files /dev/null and b/reward_plots/advantage_plot_step_490.png differ diff --git a/reward_plots/reward_comparison_step_250.png b/reward_plots/reward_comparison_step_250.png new file mode 100644 index 0000000000000000000000000000000000000000..cd3a75923e517150e46bd9834aa5e66c40f349b8 Binary files /dev/null and b/reward_plots/reward_comparison_step_250.png differ diff --git a/reward_plots/reward_comparison_step_260.png b/reward_plots/reward_comparison_step_260.png new file mode 100644 index 0000000000000000000000000000000000000000..ed9815ebce2fd45b14fca0dc1688b18636eb68a3 Binary files /dev/null and b/reward_plots/reward_comparison_step_260.png differ diff --git a/reward_plots/reward_comparison_step_270.png b/reward_plots/reward_comparison_step_270.png new file mode 100644 index 0000000000000000000000000000000000000000..8938aa66caae9df17dead5eded0bd71f0679362a Binary files /dev/null and b/reward_plots/reward_comparison_step_270.png differ diff --git a/reward_plots/reward_comparison_step_280.png b/reward_plots/reward_comparison_step_280.png new file mode 100644 index 0000000000000000000000000000000000000000..63a2fe562236f7c5a6d72e4c9a8854aa785f8d5f Binary files /dev/null and b/reward_plots/reward_comparison_step_280.png differ diff --git a/reward_plots/reward_comparison_step_290.png b/reward_plots/reward_comparison_step_290.png new file mode 100644 index 0000000000000000000000000000000000000000..80e9eb15b5bcd2af9cec9c687192940dba29960d Binary files /dev/null and b/reward_plots/reward_comparison_step_290.png differ diff --git a/reward_plots/reward_comparison_step_300.png b/reward_plots/reward_comparison_step_300.png new file mode 100644 index 0000000000000000000000000000000000000000..e44acfc68965e93f67c0a08ea96ca28351d93122 Binary files /dev/null and b/reward_plots/reward_comparison_step_300.png differ diff --git a/reward_plots/reward_comparison_step_310.png b/reward_plots/reward_comparison_step_310.png new file mode 100644 index 0000000000000000000000000000000000000000..48c40d421b3461362e430b01f670023b8bab7427 Binary files /dev/null and b/reward_plots/reward_comparison_step_310.png differ diff --git a/reward_plots/reward_comparison_step_320.png b/reward_plots/reward_comparison_step_320.png new file mode 100644 index 0000000000000000000000000000000000000000..41cb60849b8bd6f52965d3b77babfa315a631d9d Binary files /dev/null and b/reward_plots/reward_comparison_step_320.png differ diff --git a/reward_plots/reward_comparison_step_330.png b/reward_plots/reward_comparison_step_330.png new file mode 100644 index 0000000000000000000000000000000000000000..e0efe85b6bab3a1ed6f996788b18e696f0b87e24 Binary files /dev/null and b/reward_plots/reward_comparison_step_330.png differ diff --git a/reward_plots/reward_comparison_step_340.png b/reward_plots/reward_comparison_step_340.png new file mode 100644 index 0000000000000000000000000000000000000000..5212cf3c1751ef94cfb54c84808e52350734b7f1 Binary files /dev/null and b/reward_plots/reward_comparison_step_340.png differ diff --git a/reward_plots/reward_comparison_step_350.png b/reward_plots/reward_comparison_step_350.png new file mode 100644 index 0000000000000000000000000000000000000000..80345693db8b5617a1546f6b4b9c7165c4a6db46 Binary files /dev/null and b/reward_plots/reward_comparison_step_350.png differ diff --git a/reward_plots/reward_comparison_step_360.png b/reward_plots/reward_comparison_step_360.png new file mode 100644 index 0000000000000000000000000000000000000000..1f06be43e409bc1d345d34f4c74a0b007a335b9a Binary files /dev/null and b/reward_plots/reward_comparison_step_360.png differ diff --git a/reward_plots/reward_comparison_step_370.png b/reward_plots/reward_comparison_step_370.png new file mode 100644 index 0000000000000000000000000000000000000000..b19a30c70129ac4e160980ac3ec8c3df60464604 Binary files /dev/null and b/reward_plots/reward_comparison_step_370.png differ diff --git a/reward_plots/reward_comparison_step_380.png b/reward_plots/reward_comparison_step_380.png new file mode 100644 index 0000000000000000000000000000000000000000..e9285137a33e5c5253a06837bb622252ccdcd884 Binary files /dev/null and b/reward_plots/reward_comparison_step_380.png differ diff --git a/reward_plots/reward_comparison_step_390.png b/reward_plots/reward_comparison_step_390.png new file mode 100644 index 0000000000000000000000000000000000000000..4370303911a715a30ee92a9c52e8ff289ab0aa78 Binary files /dev/null and b/reward_plots/reward_comparison_step_390.png differ diff --git a/reward_plots/reward_comparison_step_400.png b/reward_plots/reward_comparison_step_400.png new file mode 100644 index 0000000000000000000000000000000000000000..ed2215371b18020bfd0760caf6ad23d31d46e29d Binary files /dev/null and b/reward_plots/reward_comparison_step_400.png differ diff --git a/reward_plots/reward_comparison_step_410.png b/reward_plots/reward_comparison_step_410.png new file mode 100644 index 0000000000000000000000000000000000000000..cf444748231cce1b9bf6d9ffcaa7be386a8b7899 Binary files /dev/null and b/reward_plots/reward_comparison_step_410.png differ diff --git a/reward_plots/reward_comparison_step_420.png b/reward_plots/reward_comparison_step_420.png new file mode 100644 index 0000000000000000000000000000000000000000..71f10fdd9f51715730a6c644b6dbae997d74b813 Binary files /dev/null and b/reward_plots/reward_comparison_step_420.png differ diff --git a/reward_plots/reward_comparison_step_430.png b/reward_plots/reward_comparison_step_430.png new file mode 100644 index 0000000000000000000000000000000000000000..0a0499610ea946638f2d54dc50b729593153f7b2 Binary files /dev/null and b/reward_plots/reward_comparison_step_430.png differ diff --git a/reward_plots/reward_comparison_step_440.png b/reward_plots/reward_comparison_step_440.png new file mode 100644 index 0000000000000000000000000000000000000000..07e3d6bbe3518eff0d58f53f60d4f670cffad4dd Binary files /dev/null and b/reward_plots/reward_comparison_step_440.png differ diff --git a/reward_plots/reward_comparison_step_450.png b/reward_plots/reward_comparison_step_450.png new file mode 100644 index 0000000000000000000000000000000000000000..1ed3dc6c35f1c2c59c06d3bbcceba4cc473b9d37 Binary files /dev/null and b/reward_plots/reward_comparison_step_450.png differ diff --git a/reward_plots/reward_comparison_step_460.png b/reward_plots/reward_comparison_step_460.png new file mode 100644 index 0000000000000000000000000000000000000000..be6d00391a7339bfc955cd03cfda4412dfab59e7 Binary files /dev/null and b/reward_plots/reward_comparison_step_460.png differ diff --git a/reward_plots/reward_comparison_step_470.png b/reward_plots/reward_comparison_step_470.png new file mode 100644 index 0000000000000000000000000000000000000000..0820f8d9abf0ce50f319ff1dffdf15295153561e Binary files /dev/null and b/reward_plots/reward_comparison_step_470.png differ diff --git a/reward_plots/reward_comparison_step_480.png b/reward_plots/reward_comparison_step_480.png new file mode 100644 index 0000000000000000000000000000000000000000..04c9bc06af86145fce6797a2db8960c4298427be Binary files /dev/null and b/reward_plots/reward_comparison_step_480.png differ diff --git a/reward_plots/reward_comparison_step_490.png b/reward_plots/reward_comparison_step_490.png new file mode 100644 index 0000000000000000000000000000000000000000..c5a4d9d15409aa5c7d2232ef30a6d0d07996311a Binary files /dev/null and b/reward_plots/reward_comparison_step_490.png differ