diff --git a/model.safetensors b/model.safetensors index 3a18933d74baffa54797e9e26b3a70ef11aba782..515cf81b610b349f59557b2207056285283722ce 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:322077610c9bdb9d414492ab3e5c04da436e67c5a0c925c9296c6b44b093c759 +oid sha256:1d98ba19bca22aae235340178a672a43e55842ee59324f8afe927c23c8a32d93 size 3554214752 diff --git a/reward_data/all_rewards.csv b/reward_data/all_rewards.csv index ebcc6759f32c94d29c8b0cd47c52757f4f65578b..b1ff184eb641656fd5947c1625ab494418cac6f7 100644 --- a/reward_data/all_rewards.csv +++ b/reward_data/all_rewards.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13b12e988c7f82da5a3582da570201393d2f5b9a7b51816f8070b90efbf012d4 -size 128894049 +oid sha256:1ca629b352c1b6aa6c766eeb8cd6b723abdda8286b28f52a243e972169e6c8f0 +size 179745457 diff --git a/reward_plots/advantage_plot_step_150.png b/reward_plots/advantage_plot_step_150.png new file mode 100644 index 0000000000000000000000000000000000000000..a7dd963c15c3e3e606b2e3557b6b97b5de738e96 Binary files /dev/null and b/reward_plots/advantage_plot_step_150.png differ diff --git a/reward_plots/advantage_plot_step_160.png b/reward_plots/advantage_plot_step_160.png new file mode 100644 index 0000000000000000000000000000000000000000..7d82e08861816f035f1241c55dc74a62500b8103 Binary files /dev/null and b/reward_plots/advantage_plot_step_160.png differ diff --git a/reward_plots/advantage_plot_step_170.png b/reward_plots/advantage_plot_step_170.png new file mode 100644 index 0000000000000000000000000000000000000000..b45e893b2ad701faf97db8e76e31dc2d45c53e67 Binary files /dev/null and b/reward_plots/advantage_plot_step_170.png differ diff --git a/reward_plots/advantage_plot_step_180.png b/reward_plots/advantage_plot_step_180.png new file mode 100644 index 0000000000000000000000000000000000000000..f1c2916a0282a1be38c943fbcae7c0365cbd2689 Binary files /dev/null and b/reward_plots/advantage_plot_step_180.png differ diff --git a/reward_plots/advantage_plot_step_190.png b/reward_plots/advantage_plot_step_190.png new file mode 100644 index 0000000000000000000000000000000000000000..a3ee8dc4e75371fa53edf6d65e158a83319ea974 Binary files /dev/null and b/reward_plots/advantage_plot_step_190.png differ diff --git a/reward_plots/advantage_plot_step_200.png b/reward_plots/advantage_plot_step_200.png new file mode 100644 index 0000000000000000000000000000000000000000..14ab08122c016b76a21b4265db34cb40b976951e Binary files /dev/null and b/reward_plots/advantage_plot_step_200.png differ diff --git a/reward_plots/advantage_plot_step_210.png b/reward_plots/advantage_plot_step_210.png new file mode 100644 index 0000000000000000000000000000000000000000..98a69c19f5e71b81b5d1e004050fa25758f3d84a Binary files /dev/null and b/reward_plots/advantage_plot_step_210.png differ diff --git a/reward_plots/advantage_plot_step_220.png b/reward_plots/advantage_plot_step_220.png new file mode 100644 index 0000000000000000000000000000000000000000..ce1b6c4092fd16a8227d096967ffcdc79ec6ee9e Binary files /dev/null and b/reward_plots/advantage_plot_step_220.png differ diff --git a/reward_plots/advantage_plot_step_230.png b/reward_plots/advantage_plot_step_230.png new file mode 100644 index 0000000000000000000000000000000000000000..6b23995978b7f188d743f5cadd75896d84d6179e Binary files /dev/null and b/reward_plots/advantage_plot_step_230.png differ diff --git a/reward_plots/advantage_plot_step_240.png b/reward_plots/advantage_plot_step_240.png new file mode 100644 index 0000000000000000000000000000000000000000..6bbdd7c900d634f03a3466e59d7206562831dea0 Binary files /dev/null and b/reward_plots/advantage_plot_step_240.png differ diff --git a/reward_plots/advantage_plot_step_250.png b/reward_plots/advantage_plot_step_250.png new file mode 100644 index 0000000000000000000000000000000000000000..52d1213cc36b00bb87d01a898e3b1e9e3775c495 Binary files /dev/null and b/reward_plots/advantage_plot_step_250.png differ diff --git a/reward_plots/advantage_plot_step_260.png b/reward_plots/advantage_plot_step_260.png new file mode 100644 index 0000000000000000000000000000000000000000..f967d3b70161fc3f914d947dae9be854b74328c4 Binary files /dev/null and b/reward_plots/advantage_plot_step_260.png differ diff --git a/reward_plots/advantage_plot_step_270.png b/reward_plots/advantage_plot_step_270.png new file mode 100644 index 0000000000000000000000000000000000000000..563640d81c557ba06af72436847f6031f84a205d Binary files /dev/null and b/reward_plots/advantage_plot_step_270.png differ diff --git a/reward_plots/advantage_plot_step_280.png b/reward_plots/advantage_plot_step_280.png new file mode 100644 index 0000000000000000000000000000000000000000..ac7bfd901f9cdee9d7bfdc9f7b623df14e08fcc5 Binary files /dev/null and b/reward_plots/advantage_plot_step_280.png differ diff --git a/reward_plots/advantage_plot_step_290.png b/reward_plots/advantage_plot_step_290.png new file mode 100644 index 0000000000000000000000000000000000000000..34f1b0082bb150eb7ac58f1e54241aeb56000a58 Binary files /dev/null and b/reward_plots/advantage_plot_step_290.png differ diff --git a/reward_plots/advantage_plot_step_300.png b/reward_plots/advantage_plot_step_300.png new file mode 100644 index 0000000000000000000000000000000000000000..563c083415f13acf503fa4c9acdc21f6fffcd662 Binary files /dev/null and b/reward_plots/advantage_plot_step_300.png differ diff --git a/reward_plots/advantage_plot_step_310.png b/reward_plots/advantage_plot_step_310.png new file mode 100644 index 0000000000000000000000000000000000000000..e42fcc61f89a0e761c404cf30a6a3b444dda595d Binary files /dev/null and b/reward_plots/advantage_plot_step_310.png differ diff --git a/reward_plots/advantage_plot_step_320.png b/reward_plots/advantage_plot_step_320.png new file mode 100644 index 0000000000000000000000000000000000000000..13f17b51993f4decd5de6c7aaace49f233721e2c Binary files /dev/null and b/reward_plots/advantage_plot_step_320.png differ diff --git a/reward_plots/advantage_plot_step_330.png b/reward_plots/advantage_plot_step_330.png new file mode 100644 index 0000000000000000000000000000000000000000..831981ab877a6f60a60bf046655f0b699049091e Binary files /dev/null and b/reward_plots/advantage_plot_step_330.png differ diff --git a/reward_plots/advantage_plot_step_340.png b/reward_plots/advantage_plot_step_340.png new file mode 100644 index 0000000000000000000000000000000000000000..878f307f24b85526ea286c0fa3a7b9767775abf8 Binary files /dev/null and b/reward_plots/advantage_plot_step_340.png differ diff --git a/reward_plots/advantage_plot_step_350.png b/reward_plots/advantage_plot_step_350.png new file mode 100644 index 0000000000000000000000000000000000000000..f269f2001bc058bb9fc1d456e6f02c700eda080e Binary files /dev/null and b/reward_plots/advantage_plot_step_350.png differ diff --git a/reward_plots/advantage_plot_step_360.png b/reward_plots/advantage_plot_step_360.png new file mode 100644 index 0000000000000000000000000000000000000000..34c4a4b92f25764824e4b32787c7e34c92c4cba0 Binary files /dev/null and b/reward_plots/advantage_plot_step_360.png differ diff --git a/reward_plots/advantage_plot_step_370.png b/reward_plots/advantage_plot_step_370.png new file mode 100644 index 0000000000000000000000000000000000000000..679ef3f6075bb99fffd73b998342b5256793bae3 Binary files /dev/null and b/reward_plots/advantage_plot_step_370.png differ diff --git a/reward_plots/advantage_plot_step_380.png b/reward_plots/advantage_plot_step_380.png new file mode 100644 index 0000000000000000000000000000000000000000..de5fc6aa1514cfe1d50dc9fbb2571c6da43f633d Binary files /dev/null and b/reward_plots/advantage_plot_step_380.png differ diff --git a/reward_plots/advantage_plot_step_390.png b/reward_plots/advantage_plot_step_390.png new file mode 100644 index 0000000000000000000000000000000000000000..a429eaf032af8064a510ca7a5dcaa840b09c263a Binary files /dev/null and b/reward_plots/advantage_plot_step_390.png differ diff --git a/reward_plots/advantage_plot_step_400.png b/reward_plots/advantage_plot_step_400.png new file mode 100644 index 0000000000000000000000000000000000000000..5eafbca8698d63ea7196401302b10305eb6a7207 Binary files /dev/null and b/reward_plots/advantage_plot_step_400.png differ diff --git a/reward_plots/advantage_plot_step_410.png b/reward_plots/advantage_plot_step_410.png new file mode 100644 index 0000000000000000000000000000000000000000..fca0fb62d51fe0484812cce6bb898156bf220630 Binary files /dev/null and b/reward_plots/advantage_plot_step_410.png differ diff --git a/reward_plots/advantage_plot_step_420.png b/reward_plots/advantage_plot_step_420.png new file mode 100644 index 0000000000000000000000000000000000000000..75583a37331b4f8238e1c0378da1edf98ecb6e9d Binary files /dev/null and b/reward_plots/advantage_plot_step_420.png differ diff --git a/reward_plots/advantage_plot_step_430.png b/reward_plots/advantage_plot_step_430.png new file mode 100644 index 0000000000000000000000000000000000000000..4ed7f723bd115367ec84c2cd83b2e90f0517a783 Binary files /dev/null and b/reward_plots/advantage_plot_step_430.png differ diff --git a/reward_plots/advantage_plot_step_440.png b/reward_plots/advantage_plot_step_440.png new file mode 100644 index 0000000000000000000000000000000000000000..7e9dabb515dddf269d51aa67706b61b3a4d37c40 Binary files /dev/null and b/reward_plots/advantage_plot_step_440.png differ diff --git a/reward_plots/reward_comparison_step_150.png b/reward_plots/reward_comparison_step_150.png new file mode 100644 index 0000000000000000000000000000000000000000..1bb23b78f6df28758a39ed2ce67a9cbb7bd03de9 Binary files /dev/null and b/reward_plots/reward_comparison_step_150.png differ diff --git a/reward_plots/reward_comparison_step_160.png b/reward_plots/reward_comparison_step_160.png new file mode 100644 index 0000000000000000000000000000000000000000..86186a49bcdd7405622d26c726294de5abcc20ce Binary files /dev/null and b/reward_plots/reward_comparison_step_160.png differ diff --git a/reward_plots/reward_comparison_step_170.png b/reward_plots/reward_comparison_step_170.png new file mode 100644 index 0000000000000000000000000000000000000000..b6f00ac017125be07f8715828fd7c4e3d4df3223 Binary files /dev/null and b/reward_plots/reward_comparison_step_170.png differ diff --git a/reward_plots/reward_comparison_step_180.png b/reward_plots/reward_comparison_step_180.png new file mode 100644 index 0000000000000000000000000000000000000000..cd7839c275345225da7e5cf116f17ba1a8639a23 Binary files /dev/null and b/reward_plots/reward_comparison_step_180.png differ diff --git a/reward_plots/reward_comparison_step_190.png b/reward_plots/reward_comparison_step_190.png new file mode 100644 index 0000000000000000000000000000000000000000..cd25563a84ed9500fb3173feb8ebd598d70c02e1 Binary files /dev/null and b/reward_plots/reward_comparison_step_190.png differ diff --git a/reward_plots/reward_comparison_step_200.png b/reward_plots/reward_comparison_step_200.png new file mode 100644 index 0000000000000000000000000000000000000000..1502152adcc36d3dd2d077b943180bc5772b4ea1 Binary files /dev/null and b/reward_plots/reward_comparison_step_200.png differ diff --git a/reward_plots/reward_comparison_step_210.png b/reward_plots/reward_comparison_step_210.png new file mode 100644 index 0000000000000000000000000000000000000000..1c61f3d0c59e6595970ae413ac22ed7f1f7b6e25 Binary files /dev/null and b/reward_plots/reward_comparison_step_210.png differ diff --git a/reward_plots/reward_comparison_step_220.png b/reward_plots/reward_comparison_step_220.png new file mode 100644 index 0000000000000000000000000000000000000000..44b97ec987ea6a4978bfe11b1b43d0d6d36c2751 Binary files /dev/null and b/reward_plots/reward_comparison_step_220.png differ diff --git a/reward_plots/reward_comparison_step_230.png b/reward_plots/reward_comparison_step_230.png new file mode 100644 index 0000000000000000000000000000000000000000..4af82847ea067090851957d17038e4d90485362d Binary files /dev/null and b/reward_plots/reward_comparison_step_230.png differ diff --git a/reward_plots/reward_comparison_step_240.png b/reward_plots/reward_comparison_step_240.png new file mode 100644 index 0000000000000000000000000000000000000000..b9ee13a3c25492ad28a7ccc4327d0a5b8c657bfa Binary files /dev/null and b/reward_plots/reward_comparison_step_240.png differ diff --git a/reward_plots/reward_comparison_step_250.png b/reward_plots/reward_comparison_step_250.png new file mode 100644 index 0000000000000000000000000000000000000000..2ea54cd3058fb7eb02e72a757caaae0d10726e77 Binary files /dev/null and b/reward_plots/reward_comparison_step_250.png differ diff --git a/reward_plots/reward_comparison_step_260.png b/reward_plots/reward_comparison_step_260.png new file mode 100644 index 0000000000000000000000000000000000000000..01508f1ff0cdcb5aceec16f2bfdc7efffbdaceef Binary files /dev/null and b/reward_plots/reward_comparison_step_260.png differ diff --git a/reward_plots/reward_comparison_step_270.png b/reward_plots/reward_comparison_step_270.png new file mode 100644 index 0000000000000000000000000000000000000000..ba6edcd7fafefc4dc56d5bf3f9f574925d7d3027 Binary files /dev/null and b/reward_plots/reward_comparison_step_270.png differ diff --git a/reward_plots/reward_comparison_step_280.png b/reward_plots/reward_comparison_step_280.png new file mode 100644 index 0000000000000000000000000000000000000000..cf618a4709520013166c1521f9f3c535e63c7b42 Binary files /dev/null and b/reward_plots/reward_comparison_step_280.png differ diff --git a/reward_plots/reward_comparison_step_290.png b/reward_plots/reward_comparison_step_290.png new file mode 100644 index 0000000000000000000000000000000000000000..e95a80d2fb378339975a6fe1463566c65c087b81 Binary files /dev/null and b/reward_plots/reward_comparison_step_290.png differ diff --git a/reward_plots/reward_comparison_step_300.png b/reward_plots/reward_comparison_step_300.png new file mode 100644 index 0000000000000000000000000000000000000000..d41be99ae551f06da61ee90ed27424321f974887 Binary files /dev/null and b/reward_plots/reward_comparison_step_300.png differ diff --git a/reward_plots/reward_comparison_step_310.png b/reward_plots/reward_comparison_step_310.png new file mode 100644 index 0000000000000000000000000000000000000000..4510f3e900065e1b00ce85e901e8f1790078a40d Binary files /dev/null and b/reward_plots/reward_comparison_step_310.png differ diff --git a/reward_plots/reward_comparison_step_320.png b/reward_plots/reward_comparison_step_320.png new file mode 100644 index 0000000000000000000000000000000000000000..4c8b8ae30b753f8b34ca090e4911a60044fd6e65 Binary files /dev/null and b/reward_plots/reward_comparison_step_320.png differ diff --git a/reward_plots/reward_comparison_step_330.png b/reward_plots/reward_comparison_step_330.png new file mode 100644 index 0000000000000000000000000000000000000000..c49644cbc13e911e40dea717f5a5fc0b1c6f1420 Binary files /dev/null and b/reward_plots/reward_comparison_step_330.png differ diff --git a/reward_plots/reward_comparison_step_340.png b/reward_plots/reward_comparison_step_340.png new file mode 100644 index 0000000000000000000000000000000000000000..a35926630ae813eaf77751215337036b1b416d3e Binary files /dev/null and b/reward_plots/reward_comparison_step_340.png differ diff --git a/reward_plots/reward_comparison_step_350.png b/reward_plots/reward_comparison_step_350.png new file mode 100644 index 0000000000000000000000000000000000000000..b5564a8715514927437c69ae19ae5a38f1770221 Binary files /dev/null and b/reward_plots/reward_comparison_step_350.png differ diff --git a/reward_plots/reward_comparison_step_360.png b/reward_plots/reward_comparison_step_360.png new file mode 100644 index 0000000000000000000000000000000000000000..39ffe10831836f0a9e7f796ae6c1348384096e7f Binary files /dev/null and b/reward_plots/reward_comparison_step_360.png differ diff --git a/reward_plots/reward_comparison_step_370.png b/reward_plots/reward_comparison_step_370.png new file mode 100644 index 0000000000000000000000000000000000000000..f40ece8af10cd21bde89e762e035d08d0ffb9095 Binary files /dev/null and b/reward_plots/reward_comparison_step_370.png differ diff --git a/reward_plots/reward_comparison_step_380.png b/reward_plots/reward_comparison_step_380.png new file mode 100644 index 0000000000000000000000000000000000000000..9e4dee98150f8f9b04b091ee384aea7a51e9cf54 Binary files /dev/null and b/reward_plots/reward_comparison_step_380.png differ diff --git a/reward_plots/reward_comparison_step_390.png b/reward_plots/reward_comparison_step_390.png new file mode 100644 index 0000000000000000000000000000000000000000..f2c5d09d76228ee1c1581cc6436ff2aab8a4a3da Binary files /dev/null and b/reward_plots/reward_comparison_step_390.png differ diff --git a/reward_plots/reward_comparison_step_400.png b/reward_plots/reward_comparison_step_400.png new file mode 100644 index 0000000000000000000000000000000000000000..a4b2f4ad76bb71ab09df318c0e07622d9e86421b Binary files /dev/null and b/reward_plots/reward_comparison_step_400.png differ diff --git a/reward_plots/reward_comparison_step_410.png b/reward_plots/reward_comparison_step_410.png new file mode 100644 index 0000000000000000000000000000000000000000..533e0a2880ae7b4127bbb150e59f7fcfe0eff53f Binary files /dev/null and b/reward_plots/reward_comparison_step_410.png differ diff --git a/reward_plots/reward_comparison_step_420.png b/reward_plots/reward_comparison_step_420.png new file mode 100644 index 0000000000000000000000000000000000000000..5075226185ac9c6c6eedda81401dd30213a6a0a9 Binary files /dev/null and b/reward_plots/reward_comparison_step_420.png differ diff --git a/reward_plots/reward_comparison_step_430.png b/reward_plots/reward_comparison_step_430.png new file mode 100644 index 0000000000000000000000000000000000000000..f2ca780197be42b0d161858ebf3f839245668828 Binary files /dev/null and b/reward_plots/reward_comparison_step_430.png differ diff --git a/reward_plots/reward_comparison_step_440.png b/reward_plots/reward_comparison_step_440.png new file mode 100644 index 0000000000000000000000000000000000000000..1d7cee681efd83461f974ca224e7b365f84cf215 Binary files /dev/null and b/reward_plots/reward_comparison_step_440.png differ diff --git a/training_args.bin b/training_args.bin index 0b341e0b25eaf6d579a9d0b3460ff5df282ef0e1..4d077a4eba7d8239acc787e77118c82e85cbbfd7 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dcb2a258189d1878bab2a6f8439a361c5ba40cf054f483b72bd325d5106bacd9 +oid sha256:dca18296ef537dfcf0d816ab3cb71eced74ff761ea9e1e4dd5464ba4f50f2626 size 8504