| { | |
| "RelicEnv": { | |
| "qwen2.5-3b-instruct": 0.18, | |
| "qwen2.5-7b-instruct": 0.396, | |
| "qwen2.5-14b-instruct": 0.8, | |
| "qwen2.5-32b-instruct": 0.8560000000000001, | |
| "qwen2.5-72b-instruct": 0.892, | |
| "llama-3.1-8b-instruct": 0.21600000000000003, | |
| "llama-3.1-70b-instruct": 0.6639999999999999, | |
| "llama-3.2-3b-instruct": 0.164, | |
| "llama-3.3-70b-instruct": 0.836, | |
| "mistral-large-instruct-2411": 0.8560000000000001, | |
| "gemma-2-27b-it": 0.544, | |
| "gemma-2-9b-it": 0.36400000000000005, | |
| "deepseek-v3": 0.9359999999999999, | |
| "deepseek-r1": 0.916, | |
| "qwq-32b": 0.9560000000000001, | |
| "Average": 0.6384 | |
| }, | |
| "HerbEnv": { | |
| "qwen2.5-3b-instruct": 0.184, | |
| "qwen2.5-7b-instruct": 0.304, | |
| "qwen2.5-14b-instruct": 0.784, | |
| "qwen2.5-32b-instruct": 0.8400000000000001, | |
| "qwen2.5-72b-instruct": 0.8039999999999999, | |
| "llama-3.1-8b-instruct": 0.30000000000000004, | |
| "llama-3.1-70b-instruct": 0.568, | |
| "llama-3.2-3b-instruct": 0.128, | |
| "llama-3.3-70b-instruct": 0.612, | |
| "mistral-large-instruct-2411": 0.76, | |
| "gemma-2-27b-it": 0.504, | |
| "gemma-2-9b-it": 0.18000000000000002, | |
| "deepseek-v3": 0.968, | |
| "deepseek-r1": 0.9359999999999999, | |
| "qwq-32b": 0.924, | |
| "Average": 0.5863999999999999 | |
| }, | |
| "TransdimensionalEnv": { | |
| "qwen2.5-3b-instruct": 0.156, | |
| "qwen2.5-7b-instruct": 0.38400000000000006, | |
| "qwen2.5-14b-instruct": 0.836, | |
| "qwen2.5-32b-instruct": 0.876, | |
| "qwen2.5-72b-instruct": 0.836, | |
| "llama-3.1-8b-instruct": 0.44399999999999995, | |
| "llama-3.1-70b-instruct": 0.828, | |
| "llama-3.2-3b-instruct": 0.172, | |
| "llama-3.3-70b-instruct": 0.86, | |
| "mistral-large-instruct-2411": 0.86, | |
| "gemma-2-27b-it": 0.5599999999999999, | |
| "gemma-2-9b-it": 0.516, | |
| "deepseek-v3": 0.968, | |
| "deepseek-r1": 0.9359999999999999, | |
| "qwq-32b": 0.968, | |
| "Average": 0.6799999999999999 | |
| }, | |
| "SorcererEnv": { | |
| "qwen2.5-3b-instruct": 0.16, | |
| "qwen2.5-7b-instruct": 0.32400000000000007, | |
| "qwen2.5-14b-instruct": 0.8039999999999999, | |
| "qwen2.5-32b-instruct": 0.8240000000000001, | |
| "qwen2.5-72b-instruct": 0.8320000000000001, | |
| "llama-3.1-8b-instruct": 0.276, | |
| "llama-3.1-70b-instruct": 0.6639999999999999, | |
| "llama-3.2-3b-instruct": 0.196, | |
| "llama-3.3-70b-instruct": 0.7360000000000001, | |
| "mistral-large-instruct-2411": 0.8, | |
| "gemma-2-27b-it": 0.5640000000000001, | |
| "gemma-2-9b-it": 0.28800000000000003, | |
| "deepseek-v3": 0.8640000000000001, | |
| "deepseek-r1": 0.8240000000000001, | |
| "qwq-32b": 0.8400000000000001, | |
| "Average": 0.5997333333333333 | |
| }, | |
| "QuantumEnv": { | |
| "qwen2.5-3b-instruct": 0.196, | |
| "qwen2.5-7b-instruct": 0.532, | |
| "qwen2.5-14b-instruct": 0.8720000000000001, | |
| "qwen2.5-32b-instruct": 0.9039999999999999, | |
| "qwen2.5-72b-instruct": 0.916, | |
| "llama-3.1-8b-instruct": 0.45600000000000007, | |
| "llama-3.1-70b-instruct": 0.7999999999999999, | |
| "llama-3.2-3b-instruct": 0.168, | |
| "llama-3.3-70b-instruct": 0.8480000000000001, | |
| "mistral-large-instruct-2411": 0.8720000000000001, | |
| "gemma-2-27b-it": 0.744, | |
| "gemma-2-9b-it": 0.544, | |
| "deepseek-v3": 0.884, | |
| "deepseek-r1": 0.8640000000000001, | |
| "qwq-32b": 0.868, | |
| "Average": 0.6978666666666666 | |
| }, | |
| "AstronomyEnv": { | |
| "qwen2.5-3b-instruct": 0.172, | |
| "qwen2.5-7b-instruct": 0.42800000000000005, | |
| "qwen2.5-14b-instruct": 0.716, | |
| "qwen2.5-32b-instruct": 0.676, | |
| "qwen2.5-72b-instruct": 0.748, | |
| "llama-3.1-8b-instruct": 0.336, | |
| "llama-3.1-70b-instruct": 0.692, | |
| "llama-3.2-3b-instruct": 0.176, | |
| "llama-3.3-70b-instruct": 0.6519999999999999, | |
| "mistral-large-instruct-2411": 0.7999999999999999, | |
| "gemma-2-27b-it": 0.508, | |
| "gemma-2-9b-it": 0.372, | |
| "deepseek-v3": 0.748, | |
| "deepseek-r1": 0.8200000000000001, | |
| "qwq-32b": 0.852, | |
| "Average": 0.5797333333333333 | |
| }, | |
| "MusicGenresEnv": { | |
| "qwen2.5-3b-instruct": 0.22000000000000003, | |
| "qwen2.5-7b-instruct": 0.42000000000000004, | |
| "qwen2.5-14b-instruct": 0.72, | |
| "qwen2.5-32b-instruct": 0.716, | |
| "qwen2.5-72b-instruct": 0.696, | |
| "llama-3.1-8b-instruct": 0.35200000000000004, | |
| "llama-3.1-70b-instruct": 0.6280000000000001, | |
| "llama-3.2-3b-instruct": 0.136, | |
| "llama-3.3-70b-instruct": 0.592, | |
| "mistral-large-instruct-2411": 0.732, | |
| "gemma-2-27b-it": 0.44800000000000006, | |
| "gemma-2-9b-it": 0.332, | |
| "deepseek-v3": 0.748, | |
| "deepseek-r1": 0.792, | |
| "qwq-32b": 0.876, | |
| "Average": 0.5605333333333334 | |
| }, | |
| "CloudEnv": { | |
| "qwen2.5-3b-instruct": 0.21199999999999997, | |
| "qwen2.5-7b-instruct": 0.42000000000000004, | |
| "qwen2.5-14b-instruct": 0.76, | |
| "qwen2.5-32b-instruct": 0.656, | |
| "qwen2.5-72b-instruct": 0.712, | |
| "llama-3.1-8b-instruct": 0.42000000000000004, | |
| "llama-3.1-70b-instruct": 0.664, | |
| "llama-3.2-3b-instruct": 0.22800000000000004, | |
| "llama-3.3-70b-instruct": 0.696, | |
| "mistral-large-instruct-2411": 0.8360000000000001, | |
| "gemma-2-27b-it": 0.6, | |
| "gemma-2-9b-it": 0.4, | |
| "deepseek-v3": 0.8200000000000001, | |
| "deepseek-r1": 0.908, | |
| "qwq-32b": 0.9120000000000001, | |
| "Average": 0.6162666666666667 | |
| }, | |
| "CuisineEnv": { | |
| "qwen2.5-3b-instruct": 0.21600000000000003, | |
| "qwen2.5-7b-instruct": 0.316, | |
| "qwen2.5-14b-instruct": 0.6960000000000001, | |
| "qwen2.5-32b-instruct": 0.664, | |
| "qwen2.5-72b-instruct": 0.656, | |
| "llama-3.1-8b-instruct": 0.22799999999999998, | |
| "llama-3.1-70b-instruct": 0.476, | |
| "llama-3.2-3b-instruct": 0.152, | |
| "llama-3.3-70b-instruct": 0.44400000000000006, | |
| "mistral-large-instruct-2411": 0.644, | |
| "gemma-2-27b-it": 0.27599999999999997, | |
| "gemma-2-9b-it": 0.156, | |
| "deepseek-v3": 0.8400000000000001, | |
| "deepseek-r1": 0.7959999999999999, | |
| "qwq-32b": 0.8800000000000001, | |
| "Average": 0.49599999999999994 | |
| }, | |
| "PlantEnv": { | |
| "qwen2.5-3b-instruct": 0.168, | |
| "qwen2.5-7b-instruct": 0.236, | |
| "qwen2.5-14b-instruct": 0.34, | |
| "qwen2.5-32b-instruct": 0.22000000000000003, | |
| "qwen2.5-72b-instruct": 0.22799999999999998, | |
| "llama-3.1-8b-instruct": 0.148, | |
| "llama-3.1-70b-instruct": 0.16, | |
| "llama-3.2-3b-instruct": 0.084, | |
| "llama-3.3-70b-instruct": 0.07599999999999998, | |
| "mistral-large-instruct-2411": 0.264, | |
| "gemma-2-27b-it": 0.14400000000000002, | |
| "gemma-2-9b-it": 0.092, | |
| "deepseek-v3": 0.512, | |
| "deepseek-r1": 0.5, | |
| "qwq-32b": 0.548, | |
| "Average": 0.24800000000000003 | |
| }, | |
| "HistoricalEnv": { | |
| "qwen2.5-3b-instruct": 0.24, | |
| "qwen2.5-7b-instruct": 0.368, | |
| "qwen2.5-14b-instruct": 0.5800000000000001, | |
| "qwen2.5-32b-instruct": 0.476, | |
| "qwen2.5-72b-instruct": 0.512, | |
| "llama-3.1-8b-instruct": 0.332, | |
| "llama-3.1-70b-instruct": 0.616, | |
| "llama-3.2-3b-instruct": 0.2, | |
| "llama-3.3-70b-instruct": 0.652, | |
| "mistral-large-instruct-2411": 0.6880000000000001, | |
| "gemma-2-27b-it": 0.5, | |
| "gemma-2-9b-it": 0.376, | |
| "deepseek-v3": 0.748, | |
| "deepseek-r1": 0.828, | |
| "qwq-32b": 0.884, | |
| "Average": 0.5333333333333334 | |
| }, | |
| "GadgetEnv": { | |
| "qwen2.5-3b-instruct": 0.124, | |
| "qwen2.5-7b-instruct": 0.312, | |
| "qwen2.5-14b-instruct": 0.852, | |
| "qwen2.5-32b-instruct": 0.8640000000000001, | |
| "qwen2.5-72b-instruct": 0.892, | |
| "llama-3.1-8b-instruct": 0.284, | |
| "llama-3.1-70b-instruct": 0.692, | |
| "llama-3.2-3b-instruct": 0.11200000000000002, | |
| "llama-3.3-70b-instruct": 0.7360000000000001, | |
| "mistral-large-instruct-2411": 0.884, | |
| "gemma-2-27b-it": 0.32799999999999996, | |
| "gemma-2-9b-it": 0.184, | |
| "deepseek-v3": 0.9640000000000001, | |
| "deepseek-r1": 0.932, | |
| "qwq-32b": 0.932, | |
| "Average": 0.6061333333333334 | |
| }, | |
| "TimeTravelEnv": { | |
| "qwen2.5-3b-instruct": 0.128, | |
| "qwen2.5-7b-instruct": 0.292, | |
| "qwen2.5-14b-instruct": 0.808, | |
| "qwen2.5-32b-instruct": 0.828, | |
| "qwen2.5-72b-instruct": 0.8039999999999999, | |
| "llama-3.1-8b-instruct": 0.376, | |
| "llama-3.1-70b-instruct": 0.684, | |
| "llama-3.2-3b-instruct": 0.124, | |
| "llama-3.3-70b-instruct": 0.716, | |
| "mistral-large-instruct-2411": 0.884, | |
| "gemma-2-27b-it": 0.32799999999999996, | |
| "gemma-2-9b-it": 0.21600000000000003, | |
| "deepseek-v3": 0.9399999999999998, | |
| "deepseek-r1": 0.932, | |
| "qwq-32b": 0.924, | |
| "Average": 0.5989333333333333 | |
| }, | |
| "PollutionEnv": { | |
| "qwen2.5-3b-instruct": 0.136, | |
| "qwen2.5-7b-instruct": 0.328, | |
| "qwen2.5-14b-instruct": 0.792, | |
| "qwen2.5-32b-instruct": 0.7120000000000001, | |
| "qwen2.5-72b-instruct": 0.704, | |
| "llama-3.1-8b-instruct": 0.316, | |
| "llama-3.1-70b-instruct": 0.664, | |
| "llama-3.2-3b-instruct": 0.124, | |
| "llama-3.3-70b-instruct": 0.6960000000000001, | |
| "mistral-large-instruct-2411": 0.784, | |
| "gemma-2-27b-it": 0.336, | |
| "gemma-2-9b-it": 0.252, | |
| "deepseek-v3": 0.8640000000000001, | |
| "deepseek-r1": 0.8560000000000001, | |
| "qwq-32b": 0.852, | |
| "Average": 0.5610666666666666 | |
| }, | |
| "DemographicEnv": { | |
| "qwen2.5-3b-instruct": 0.072, | |
| "qwen2.5-7b-instruct": 0.42800000000000005, | |
| "qwen2.5-14b-instruct": 0.68, | |
| "qwen2.5-32b-instruct": 0.7799999999999999, | |
| "qwen2.5-72b-instruct": 0.7719999999999999, | |
| "llama-3.1-8b-instruct": 0.272, | |
| "llama-3.1-70b-instruct": 0.6239999999999999, | |
| "llama-3.2-3b-instruct": 0.176, | |
| "llama-3.3-70b-instruct": 0.748, | |
| "mistral-large-instruct-2411": 0.8200000000000001, | |
| "gemma-2-27b-it": 0.356, | |
| "gemma-2-9b-it": 0.156, | |
| "deepseek-v3": 0.8960000000000001, | |
| "deepseek-r1": 0.876, | |
| "qwq-32b": 0.8960000000000001, | |
| "Average": 0.5701333333333333 | |
| }, | |
| "GeneticEnv": { | |
| "qwen2.5-3b-instruct": 0.084, | |
| "qwen2.5-7b-instruct": 0.392, | |
| "qwen2.5-14b-instruct": 0.884, | |
| "qwen2.5-32b-instruct": 0.9279999999999999, | |
| "qwen2.5-72b-instruct": 0.9400000000000001, | |
| "llama-3.1-8b-instruct": 0.45999999999999996, | |
| "llama-3.1-70b-instruct": 0.9, | |
| "llama-3.2-3b-instruct": 0.192, | |
| "llama-3.3-70b-instruct": 0.916, | |
| "mistral-large-instruct-2411": 0.9040000000000001, | |
| "gemma-2-27b-it": 0.776, | |
| "gemma-2-9b-it": 0.548, | |
| "deepseek-v3": 0.984, | |
| "deepseek-r1": 0.952, | |
| "qwq-32b": 0.932, | |
| "Average": 0.7194666666666667 | |
| }, | |
| "CraftsmanEnv": { | |
| "qwen2.5-3b-instruct": 0.14400000000000002, | |
| "qwen2.5-7b-instruct": 0.256, | |
| "qwen2.5-14b-instruct": 0.624, | |
| "qwen2.5-32b-instruct": 0.736, | |
| "qwen2.5-72b-instruct": 0.664, | |
| "llama-3.1-8b-instruct": 0.22000000000000003, | |
| "llama-3.1-70b-instruct": 0.524, | |
| "llama-3.2-3b-instruct": 0.10800000000000001, | |
| "llama-3.3-70b-instruct": 0.41600000000000004, | |
| "mistral-large-instruct-2411": 0.7080000000000001, | |
| "gemma-2-27b-it": 0.324, | |
| "gemma-2-9b-it": 0.096, | |
| "deepseek-v3": 0.9, | |
| "deepseek-r1": 0.7879999999999999, | |
| "qwq-32b": 0.8160000000000001, | |
| "Average": 0.4882666666666667 | |
| }, | |
| "StarConstellationEnv": { | |
| "qwen2.5-3b-instruct": 0.1, | |
| "qwen2.5-7b-instruct": 0.332, | |
| "qwen2.5-14b-instruct": 0.5960000000000001, | |
| "qwen2.5-32b-instruct": 0.572, | |
| "qwen2.5-72b-instruct": 0.5840000000000001, | |
| "llama-3.1-8b-instruct": 0.376, | |
| "llama-3.1-70b-instruct": 0.4640000000000001, | |
| "llama-3.2-3b-instruct": 0.136, | |
| "llama-3.3-70b-instruct": 0.41200000000000003, | |
| "mistral-large-instruct-2411": 0.6120000000000001, | |
| "gemma-2-27b-it": 0.472, | |
| "gemma-2-9b-it": 0.22799999999999998, | |
| "deepseek-v3": 0.744, | |
| "deepseek-r1": 0.748, | |
| "qwq-32b": 0.736, | |
| "Average": 0.47413333333333335 | |
| }, | |
| "MythicalCreatureEnv": { | |
| "qwen2.5-3b-instruct": 0.2, | |
| "qwen2.5-7b-instruct": 0.324, | |
| "qwen2.5-14b-instruct": 0.632, | |
| "qwen2.5-32b-instruct": 0.712, | |
| "qwen2.5-72b-instruct": 0.668, | |
| "llama-3.1-8b-instruct": 0.31200000000000006, | |
| "llama-3.1-70b-instruct": 0.62, | |
| "llama-3.2-3b-instruct": 0.11200000000000002, | |
| "llama-3.3-70b-instruct": 0.648, | |
| "mistral-large-instruct-2411": 0.7480000000000001, | |
| "gemma-2-27b-it": 0.42799999999999994, | |
| "gemma-2-9b-it": 0.268, | |
| "deepseek-v3": 0.8400000000000001, | |
| "deepseek-r1": 0.8400000000000001, | |
| "qwq-32b": 0.852, | |
| "Average": 0.5469333333333333 | |
| }, | |
| "ArtStyleEnv": { | |
| "qwen2.5-3b-instruct": 0.136, | |
| "qwen2.5-7b-instruct": 0.332, | |
| "qwen2.5-14b-instruct": 0.78, | |
| "qwen2.5-32b-instruct": 0.8320000000000001, | |
| "qwen2.5-72b-instruct": 0.748, | |
| "llama-3.1-8b-instruct": 0.356, | |
| "llama-3.1-70b-instruct": 0.616, | |
| "llama-3.2-3b-instruct": 0.17200000000000001, | |
| "llama-3.3-70b-instruct": 0.6199999999999999, | |
| "mistral-large-instruct-2411": 0.828, | |
| "gemma-2-27b-it": 0.43200000000000005, | |
| "gemma-2-9b-it": 0.256, | |
| "deepseek-v3": 0.876, | |
| "deepseek-r1": 0.8200000000000001, | |
| "qwq-32b": 0.868, | |
| "Average": 0.5781333333333335 | |
| }, | |
| "CookingEnv": { | |
| "qwen2.5-3b-instruct": 0.13999999999999999, | |
| "qwen2.5-7b-instruct": 0.44799999999999995, | |
| "qwen2.5-14b-instruct": 0.76, | |
| "qwen2.5-32b-instruct": 0.7440000000000001, | |
| "qwen2.5-72b-instruct": 0.7, | |
| "llama-3.1-8b-instruct": 0.364, | |
| "llama-3.1-70b-instruct": 0.6839999999999999, | |
| "llama-3.2-3b-instruct": 0.156, | |
| "llama-3.3-70b-instruct": 0.656, | |
| "mistral-large-instruct-2411": 0.74, | |
| "gemma-2-27b-it": 0.48, | |
| "gemma-2-9b-it": 0.364, | |
| "deepseek-v3": 0.8640000000000001, | |
| "deepseek-r1": 0.812, | |
| "qwq-32b": 0.9, | |
| "Average": 0.5874666666666666 | |
| }, | |
| "HistoricalBattleEnv": { | |
| "qwen2.5-3b-instruct": 0.256, | |
| "qwen2.5-7b-instruct": 0.292, | |
| "qwen2.5-14b-instruct": 0.45999999999999996, | |
| "qwen2.5-32b-instruct": 0.476, | |
| "qwen2.5-72b-instruct": 0.42400000000000004, | |
| "llama-3.1-8b-instruct": 0.28400000000000003, | |
| "llama-3.1-70b-instruct": 0.492, | |
| "llama-3.2-3b-instruct": 0.148, | |
| "llama-3.3-70b-instruct": 0.62, | |
| "mistral-large-instruct-2411": 0.608, | |
| "gemma-2-27b-it": 0.388, | |
| "gemma-2-9b-it": 0.34, | |
| "deepseek-v3": 0.724, | |
| "deepseek-r1": 0.788, | |
| "qwq-32b": 0.8560000000000001, | |
| "Average": 0.47706666666666664 | |
| }, | |
| "FungalEnv": { | |
| "qwen2.5-3b-instruct": 0.15999999999999998, | |
| "qwen2.5-7b-instruct": 0.46399999999999997, | |
| "qwen2.5-14b-instruct": 0.664, | |
| "qwen2.5-32b-instruct": 0.728, | |
| "qwen2.5-72b-instruct": 0.6839999999999999, | |
| "llama-3.1-8b-instruct": 0.41600000000000004, | |
| "llama-3.1-70b-instruct": 0.5840000000000001, | |
| "llama-3.2-3b-instruct": 0.14, | |
| "llama-3.3-70b-instruct": 0.644, | |
| "mistral-large-instruct-2411": 0.7440000000000001, | |
| "gemma-2-27b-it": 0.536, | |
| "gemma-2-9b-it": 0.184, | |
| "deepseek-v3": 0.844, | |
| "deepseek-r1": 0.764, | |
| "qwq-32b": 0.7879999999999999, | |
| "Average": 0.5562666666666666 | |
| }, | |
| "CryptographyEnv": { | |
| "qwen2.5-3b-instruct": 0.24000000000000005, | |
| "qwen2.5-7b-instruct": 0.23199999999999998, | |
| "qwen2.5-14b-instruct": 0.508, | |
| "qwen2.5-32b-instruct": 0.5760000000000001, | |
| "qwen2.5-72b-instruct": 0.528, | |
| "llama-3.1-8b-instruct": 0.29600000000000004, | |
| "llama-3.1-70b-instruct": 0.524, | |
| "llama-3.2-3b-instruct": 0.11600000000000002, | |
| "llama-3.3-70b-instruct": 0.512, | |
| "mistral-large-instruct-2411": 0.6799999999999999, | |
| "gemma-2-27b-it": 0.328, | |
| "gemma-2-9b-it": 0.192, | |
| "deepseek-v3": 0.784, | |
| "deepseek-r1": 0.74, | |
| "qwq-32b": 0.8480000000000001, | |
| "Average": 0.4736 | |
| }, | |
| "StorageEnv": { | |
| "qwen2.5-3b-instruct": 0.22800000000000004, | |
| "qwen2.5-7b-instruct": 0.44000000000000006, | |
| "qwen2.5-14b-instruct": 0.852, | |
| "qwen2.5-32b-instruct": 0.884, | |
| "qwen2.5-72b-instruct": 0.8119999999999999, | |
| "llama-3.1-8b-instruct": 0.34800000000000003, | |
| "llama-3.1-70b-instruct": 0.724, | |
| "llama-3.2-3b-instruct": 0.21600000000000003, | |
| "llama-3.3-70b-instruct": 0.796, | |
| "mistral-large-instruct-2411": 0.8880000000000001, | |
| "gemma-2-27b-it": 0.596, | |
| "gemma-2-9b-it": 0.392, | |
| "deepseek-v3": 0.9640000000000001, | |
| "deepseek-r1": 0.9119999999999999, | |
| "qwq-32b": 0.944, | |
| "Average": 0.6663999999999999 | |
| }, | |
| "RoverEnv": { | |
| "qwen2.5-3b-instruct": 0.14400000000000002, | |
| "qwen2.5-7b-instruct": 0.236, | |
| "qwen2.5-14b-instruct": 0.8480000000000001, | |
| "qwen2.5-32b-instruct": 0.8360000000000001, | |
| "qwen2.5-72b-instruct": 0.796, | |
| "llama-3.1-8b-instruct": 0.28400000000000003, | |
| "llama-3.1-70b-instruct": 0.612, | |
| "llama-3.2-3b-instruct": 0.148, | |
| "llama-3.3-70b-instruct": 0.724, | |
| "mistral-large-instruct-2411": 0.828, | |
| "gemma-2-27b-it": 0.4600000000000001, | |
| "gemma-2-9b-it": 0.072, | |
| "deepseek-v3": 0.9200000000000002, | |
| "deepseek-r1": 0.9, | |
| "qwq-32b": 0.8720000000000001, | |
| "Average": 0.5786666666666668 | |
| }, | |
| "FashionEnv": { | |
| "qwen2.5-3b-instruct": 0.17200000000000001, | |
| "qwen2.5-7b-instruct": 0.304, | |
| "qwen2.5-14b-instruct": 0.8240000000000001, | |
| "qwen2.5-32b-instruct": 0.808, | |
| "qwen2.5-72b-instruct": 0.768, | |
| "llama-3.1-8b-instruct": 0.32, | |
| "llama-3.1-70b-instruct": 0.6, | |
| "llama-3.2-3b-instruct": 0.16399999999999998, | |
| "llama-3.3-70b-instruct": 0.6160000000000001, | |
| "mistral-large-instruct-2411": 0.756, | |
| "gemma-2-27b-it": 0.524, | |
| "gemma-2-9b-it": 0.292, | |
| "deepseek-v3": 0.86, | |
| "deepseek-r1": 0.756, | |
| "qwq-32b": 0.86, | |
| "Average": 0.5749333333333334 | |
| }, | |
| "LicenseEnv": { | |
| "qwen2.5-3b-instruct": 0.196, | |
| "qwen2.5-7b-instruct": 0.29200000000000004, | |
| "qwen2.5-14b-instruct": 0.556, | |
| "qwen2.5-32b-instruct": 0.44000000000000006, | |
| "qwen2.5-72b-instruct": 0.484, | |
| "llama-3.1-8b-instruct": 0.26, | |
| "llama-3.1-70b-instruct": 0.496, | |
| "llama-3.2-3b-instruct": 0.072, | |
| "llama-3.3-70b-instruct": 0.45999999999999996, | |
| "mistral-large-instruct-2411": 0.504, | |
| "gemma-2-27b-it": 0.37600000000000006, | |
| "gemma-2-9b-it": 0.296, | |
| "deepseek-v3": 0.556, | |
| "deepseek-r1": 0.52, | |
| "qwq-32b": 0.5800000000000001, | |
| "Average": 0.4058666666666667 | |
| }, | |
| "VirusClassificationEnv": { | |
| "qwen2.5-3b-instruct": 0.22000000000000003, | |
| "qwen2.5-7b-instruct": 0.28, | |
| "qwen2.5-14b-instruct": 0.384, | |
| "qwen2.5-32b-instruct": 0.38, | |
| "qwen2.5-72b-instruct": 0.42800000000000005, | |
| "llama-3.1-8b-instruct": 0.256, | |
| "llama-3.1-70b-instruct": 0.332, | |
| "llama-3.2-3b-instruct": 0.156, | |
| "llama-3.3-70b-instruct": 0.396, | |
| "mistral-large-instruct-2411": 0.532, | |
| "gemma-2-27b-it": 0.34, | |
| "gemma-2-9b-it": 0.31200000000000006, | |
| "deepseek-v3": 0.536, | |
| "deepseek-r1": 0.64, | |
| "qwq-32b": 0.684, | |
| "Average": 0.3917333333333333 | |
| }, | |
| "TestingEnv": { | |
| "qwen2.5-3b-instruct": 0.19200000000000003, | |
| "qwen2.5-7b-instruct": 0.22000000000000003, | |
| "qwen2.5-14b-instruct": 0.608, | |
| "qwen2.5-32b-instruct": 0.648, | |
| "qwen2.5-72b-instruct": 0.708, | |
| "llama-3.1-8b-instruct": 0.332, | |
| "llama-3.1-70b-instruct": 0.68, | |
| "llama-3.2-3b-instruct": 0.17200000000000001, | |
| "llama-3.3-70b-instruct": 0.7040000000000001, | |
| "mistral-large-instruct-2411": 0.764, | |
| "gemma-2-27b-it": 0.22799999999999998, | |
| "gemma-2-9b-it": 0.26, | |
| "deepseek-v3": 0.8880000000000001, | |
| "deepseek-r1": 0.764, | |
| "qwq-32b": 0.7999999999999999, | |
| "Average": 0.5312 | |
| }, | |
| "NarrativeDetectEnv": { | |
| "qwen2.5-3b-instruct": 0.148, | |
| "qwen2.5-7b-instruct": 0.30000000000000004, | |
| "qwen2.5-14b-instruct": 0.552, | |
| "qwen2.5-32b-instruct": 0.8440000000000001, | |
| "qwen2.5-72b-instruct": 0.76, | |
| "llama-3.1-8b-instruct": 0.28800000000000003, | |
| "llama-3.1-70b-instruct": 0.6279999999999999, | |
| "llama-3.2-3b-instruct": 0.10400000000000001, | |
| "llama-3.3-70b-instruct": 0.704, | |
| "mistral-large-instruct-2411": 0.7919999999999999, | |
| "gemma-2-27b-it": 0.328, | |
| "gemma-2-9b-it": 0.192, | |
| "deepseek-v3": 0.8560000000000001, | |
| "deepseek-r1": 0.748, | |
| "qwq-32b": 0.784, | |
| "Average": 0.5352 | |
| }, | |
| "RenewableEnergyEnv": { | |
| "qwen2.5-3b-instruct": 0.184, | |
| "qwen2.5-7b-instruct": 0.44399999999999995, | |
| "qwen2.5-14b-instruct": 0.648, | |
| "qwen2.5-32b-instruct": 0.932, | |
| "qwen2.5-72b-instruct": 0.8880000000000001, | |
| "llama-3.1-8b-instruct": 0.396, | |
| "llama-3.1-70b-instruct": 0.812, | |
| "llama-3.2-3b-instruct": 0.2, | |
| "llama-3.3-70b-instruct": 0.8240000000000001, | |
| "mistral-large-instruct-2411": 0.8560000000000001, | |
| "gemma-2-27b-it": 0.348, | |
| "gemma-2-9b-it": 0.188, | |
| "deepseek-v3": 0.96, | |
| "deepseek-r1": 0.9800000000000001, | |
| "qwq-32b": 0.9800000000000001, | |
| "Average": 0.6426666666666667 | |
| }, | |
| "CelestialEnv": { | |
| "qwen2.5-3b-instruct": 0.20400000000000001, | |
| "qwen2.5-7b-instruct": 0.252, | |
| "qwen2.5-14b-instruct": 0.728, | |
| "qwen2.5-32b-instruct": 0.792, | |
| "qwen2.5-72b-instruct": 0.7239999999999999, | |
| "llama-3.1-8b-instruct": 0.256, | |
| "llama-3.1-70b-instruct": 0.6920000000000001, | |
| "llama-3.2-3b-instruct": 0.192, | |
| "llama-3.3-70b-instruct": 0.744, | |
| "mistral-large-instruct-2411": 0.82, | |
| "gemma-2-27b-it": 0.528, | |
| "gemma-2-9b-it": 0.344, | |
| "deepseek-v3": 0.8480000000000001, | |
| "deepseek-r1": 0.8360000000000001, | |
| "qwq-32b": 0.8879999999999999, | |
| "Average": 0.5898666666666668 | |
| }, | |
| "SpiceEnv": { | |
| "qwen2.5-3b-instruct": 0.21199999999999997, | |
| "qwen2.5-7b-instruct": 0.332, | |
| "qwen2.5-14b-instruct": 0.672, | |
| "qwen2.5-32b-instruct": 0.476, | |
| "qwen2.5-72b-instruct": 0.5880000000000001, | |
| "llama-3.1-8b-instruct": 0.32799999999999996, | |
| "llama-3.1-70b-instruct": 0.40800000000000003, | |
| "llama-3.2-3b-instruct": 0.22000000000000003, | |
| "llama-3.3-70b-instruct": 0.336, | |
| "mistral-large-instruct-2411": 0.5800000000000001, | |
| "gemma-2-27b-it": 0.28400000000000003, | |
| "gemma-2-9b-it": 0.172, | |
| "deepseek-v3": 0.908, | |
| "deepseek-r1": 0.7679999999999999, | |
| "qwq-32b": 0.8720000000000001, | |
| "Average": 0.47706666666666664 | |
| }, | |
| "WildlifeEnv": { | |
| "qwen2.5-3b-instruct": 0.21600000000000003, | |
| "qwen2.5-7b-instruct": 0.352, | |
| "qwen2.5-14b-instruct": 0.644, | |
| "qwen2.5-32b-instruct": 0.592, | |
| "qwen2.5-72b-instruct": 0.616, | |
| "llama-3.1-8b-instruct": 0.316, | |
| "llama-3.1-70b-instruct": 0.544, | |
| "llama-3.2-3b-instruct": 0.23199999999999998, | |
| "llama-3.3-70b-instruct": 0.616, | |
| "mistral-large-instruct-2411": 0.628, | |
| "gemma-2-27b-it": 0.45199999999999996, | |
| "gemma-2-9b-it": 0.344, | |
| "deepseek-v3": 0.736, | |
| "deepseek-r1": 0.6040000000000001, | |
| "qwq-32b": 0.716, | |
| "Average": 0.5072 | |
| }, | |
| "VehicleEnv": { | |
| "qwen2.5-3b-instruct": 0.172, | |
| "qwen2.5-7b-instruct": 0.308, | |
| "qwen2.5-14b-instruct": 0.54, | |
| "qwen2.5-32b-instruct": 0.776, | |
| "qwen2.5-72b-instruct": 0.78, | |
| "llama-3.1-8b-instruct": 0.248, | |
| "llama-3.1-70b-instruct": 0.62, | |
| "llama-3.2-3b-instruct": 0.152, | |
| "llama-3.3-70b-instruct": 0.6960000000000001, | |
| "mistral-large-instruct-2411": 0.8800000000000001, | |
| "gemma-2-27b-it": 0.44799999999999995, | |
| "gemma-2-9b-it": 0.248, | |
| "deepseek-v3": 0.9199999999999999, | |
| "deepseek-r1": 0.9199999999999999, | |
| "qwq-32b": 0.916, | |
| "Average": 0.5749333333333333 | |
| }, | |
| "BeverageEnv": { | |
| "qwen2.5-3b-instruct": 0.128, | |
| "qwen2.5-7b-instruct": 0.296, | |
| "qwen2.5-14b-instruct": 0.792, | |
| "qwen2.5-32b-instruct": 0.6880000000000001, | |
| "qwen2.5-72b-instruct": 0.724, | |
| "llama-3.1-8b-instruct": 0.41200000000000003, | |
| "llama-3.1-70b-instruct": 0.6199999999999999, | |
| "llama-3.2-3b-instruct": 0.16399999999999998, | |
| "llama-3.3-70b-instruct": 0.5800000000000001, | |
| "mistral-large-instruct-2411": 0.748, | |
| "gemma-2-27b-it": 0.40800000000000003, | |
| "gemma-2-9b-it": 0.296, | |
| "deepseek-v3": 0.8800000000000001, | |
| "deepseek-r1": 0.7520000000000001, | |
| "qwq-32b": 0.844, | |
| "Average": 0.5554666666666667 | |
| }, | |
| "ControlEnv": { | |
| "qwen2.5-3b-instruct": 0.12800000000000003, | |
| "qwen2.5-7b-instruct": 0.364, | |
| "qwen2.5-14b-instruct": 0.68, | |
| "qwen2.5-32b-instruct": 0.8320000000000001, | |
| "qwen2.5-72b-instruct": 0.8400000000000001, | |
| "llama-3.1-8b-instruct": 0.364, | |
| "llama-3.1-70b-instruct": 0.656, | |
| "llama-3.2-3b-instruct": 0.15599999999999997, | |
| "llama-3.3-70b-instruct": 0.6320000000000001, | |
| "mistral-large-instruct-2411": 0.784, | |
| "gemma-2-27b-it": 0.4640000000000001, | |
| "gemma-2-9b-it": 0.18, | |
| "deepseek-v3": 0.9119999999999999, | |
| "deepseek-r1": 0.9119999999999999, | |
| "qwq-32b": 0.932, | |
| "Average": 0.5890666666666665 | |
| }, | |
| "CurrencyEnv": { | |
| "qwen2.5-3b-instruct": 0.252, | |
| "qwen2.5-7b-instruct": 0.392, | |
| "qwen2.5-14b-instruct": 0.8560000000000001, | |
| "qwen2.5-32b-instruct": 0.884, | |
| "qwen2.5-72b-instruct": 0.836, | |
| "llama-3.1-8b-instruct": 0.476, | |
| "llama-3.1-70b-instruct": 0.7520000000000001, | |
| "llama-3.2-3b-instruct": 0.22400000000000003, | |
| "llama-3.3-70b-instruct": 0.7000000000000001, | |
| "mistral-large-instruct-2411": 0.8960000000000001, | |
| "gemma-2-27b-it": 0.68, | |
| "gemma-2-9b-it": 0.196, | |
| "deepseek-v3": 0.9800000000000001, | |
| "deepseek-r1": 0.932, | |
| "qwq-32b": 0.9640000000000001, | |
| "Average": 0.668 | |
| }, | |
| "MarketingEnv": { | |
| "qwen2.5-3b-instruct": 0.12, | |
| "qwen2.5-7b-instruct": 0.34400000000000003, | |
| "qwen2.5-14b-instruct": 0.524, | |
| "qwen2.5-32b-instruct": 0.7479999999999999, | |
| "qwen2.5-72b-instruct": 0.732, | |
| "llama-3.1-8b-instruct": 0.30800000000000005, | |
| "llama-3.1-70b-instruct": 0.7040000000000001, | |
| "llama-3.2-3b-instruct": 0.14400000000000002, | |
| "llama-3.3-70b-instruct": 0.7639999999999999, | |
| "mistral-large-instruct-2411": 0.7600000000000001, | |
| "gemma-2-27b-it": 0.32400000000000007, | |
| "gemma-2-9b-it": 0.184, | |
| "deepseek-v3": 0.812, | |
| "deepseek-r1": 0.7959999999999999, | |
| "qwq-32b": 0.8320000000000001, | |
| "Average": 0.5397333333333333 | |
| }, | |
| "BotanicalEnv": { | |
| "qwen2.5-3b-instruct": 0.18800000000000003, | |
| "qwen2.5-7b-instruct": 0.316, | |
| "qwen2.5-14b-instruct": 0.9119999999999999, | |
| "qwen2.5-32b-instruct": 0.884, | |
| "qwen2.5-72b-instruct": 0.9039999999999999, | |
| "llama-3.1-8b-instruct": 0.4119999999999999, | |
| "llama-3.1-70b-instruct": 0.836, | |
| "llama-3.2-3b-instruct": 0.23600000000000004, | |
| "llama-3.3-70b-instruct": 0.8480000000000001, | |
| "mistral-large-instruct-2411": 0.8640000000000001, | |
| "gemma-2-27b-it": 0.604, | |
| "gemma-2-9b-it": 0.264, | |
| "deepseek-v3": 0.9040000000000001, | |
| "deepseek-r1": 0.9399999999999998, | |
| "qwq-32b": 0.968, | |
| "Average": 0.672 | |
| }, | |
| "CircusActEnv": { | |
| "qwen2.5-3b-instruct": 0.17200000000000001, | |
| "qwen2.5-7b-instruct": 0.32399999999999995, | |
| "qwen2.5-14b-instruct": 0.64, | |
| "qwen2.5-32b-instruct": 0.712, | |
| "qwen2.5-72b-instruct": 0.768, | |
| "llama-3.1-8b-instruct": 0.276, | |
| "llama-3.1-70b-instruct": 0.648, | |
| "llama-3.2-3b-instruct": 0.176, | |
| "llama-3.3-70b-instruct": 0.62, | |
| "mistral-large-instruct-2411": 0.748, | |
| "gemma-2-27b-it": 0.384, | |
| "gemma-2-9b-it": 0.29600000000000004, | |
| "deepseek-v3": 0.8640000000000001, | |
| "deepseek-r1": 0.82, | |
| "qwq-32b": 0.8720000000000001, | |
| "Average": 0.5546666666666668 | |
| }, | |
| "AudioDialectEnv": { | |
| "qwen2.5-3b-instruct": 0.128, | |
| "qwen2.5-7b-instruct": 0.312, | |
| "qwen2.5-14b-instruct": 0.5800000000000001, | |
| "qwen2.5-32b-instruct": 0.6, | |
| "qwen2.5-72b-instruct": 0.528, | |
| "llama-3.1-8b-instruct": 0.21600000000000003, | |
| "llama-3.1-70b-instruct": 0.4, | |
| "llama-3.2-3b-instruct": 0.132, | |
| "llama-3.3-70b-instruct": 0.32399999999999995, | |
| "mistral-large-instruct-2411": 0.68, | |
| "gemma-2-27b-it": 0.28, | |
| "gemma-2-9b-it": 0.11600000000000002, | |
| "deepseek-v3": 0.7520000000000001, | |
| "deepseek-r1": 0.7919999999999999, | |
| "qwq-32b": 0.8119999999999999, | |
| "Average": 0.4434666666666666 | |
| }, | |
| "LeadershipEnv": { | |
| "qwen2.5-3b-instruct": 0.164, | |
| "qwen2.5-7b-instruct": 0.372, | |
| "qwen2.5-14b-instruct": 0.7, | |
| "qwen2.5-32b-instruct": 0.732, | |
| "qwen2.5-72b-instruct": 0.7639999999999999, | |
| "llama-3.1-8b-instruct": 0.364, | |
| "llama-3.1-70b-instruct": 0.708, | |
| "llama-3.2-3b-instruct": 0.128, | |
| "llama-3.3-70b-instruct": 0.6920000000000001, | |
| "mistral-large-instruct-2411": 0.728, | |
| "gemma-2-27b-it": 0.46799999999999997, | |
| "gemma-2-9b-it": 0.20400000000000001, | |
| "deepseek-v3": 0.8200000000000001, | |
| "deepseek-r1": 0.748, | |
| "qwq-32b": 0.828, | |
| "Average": 0.5613333333333334 | |
| }, | |
| "TransportEnv": { | |
| "qwen2.5-3b-instruct": 0.196, | |
| "qwen2.5-7b-instruct": 0.372, | |
| "qwen2.5-14b-instruct": 0.716, | |
| "qwen2.5-32b-instruct": 0.732, | |
| "qwen2.5-72b-instruct": 0.8, | |
| "llama-3.1-8b-instruct": 0.316, | |
| "llama-3.1-70b-instruct": 0.648, | |
| "llama-3.2-3b-instruct": 0.15200000000000002, | |
| "llama-3.3-70b-instruct": 0.6000000000000001, | |
| "mistral-large-instruct-2411": 0.7879999999999999, | |
| "gemma-2-27b-it": 0.44399999999999995, | |
| "gemma-2-9b-it": 0.364, | |
| "deepseek-v3": 0.8640000000000001, | |
| "deepseek-r1": 0.8240000000000001, | |
| "qwq-32b": 0.9199999999999999, | |
| "Average": 0.5824 | |
| }, | |
| "EcologicalEnv": { | |
| "qwen2.5-3b-instruct": 0.152, | |
| "qwen2.5-7b-instruct": 0.45600000000000007, | |
| "qwen2.5-14b-instruct": 0.748, | |
| "qwen2.5-32b-instruct": 0.82, | |
| "qwen2.5-72b-instruct": 0.792, | |
| "llama-3.1-8b-instruct": 0.42000000000000004, | |
| "llama-3.1-70b-instruct": 0.692, | |
| "llama-3.2-3b-instruct": 0.21600000000000003, | |
| "llama-3.3-70b-instruct": 0.64, | |
| "mistral-large-instruct-2411": 0.772, | |
| "gemma-2-27b-it": 0.5680000000000001, | |
| "gemma-2-9b-it": 0.46799999999999997, | |
| "deepseek-v3": 0.868, | |
| "deepseek-r1": 0.8720000000000001, | |
| "qwq-32b": 0.8879999999999999, | |
| "Average": 0.6248 | |
| }, | |
| "MythicEnv": { | |
| "qwen2.5-3b-instruct": 0.132, | |
| "qwen2.5-7b-instruct": 0.36, | |
| "qwen2.5-14b-instruct": 0.744, | |
| "qwen2.5-32b-instruct": 0.74, | |
| "qwen2.5-72b-instruct": 0.672, | |
| "llama-3.1-8b-instruct": 0.236, | |
| "llama-3.1-70b-instruct": 0.596, | |
| "llama-3.2-3b-instruct": 0.12, | |
| "llama-3.3-70b-instruct": 0.576, | |
| "mistral-large-instruct-2411": 0.6960000000000001, | |
| "gemma-2-27b-it": 0.45599999999999996, | |
| "gemma-2-9b-it": 0.136, | |
| "deepseek-v3": 0.8960000000000001, | |
| "deepseek-r1": 0.8720000000000001, | |
| "qwq-32b": 0.8400000000000001, | |
| "Average": 0.5381333333333332 | |
| }, | |
| "EnzymeEnv": { | |
| "qwen2.5-3b-instruct": 0.252, | |
| "qwen2.5-7b-instruct": 0.43200000000000005, | |
| "qwen2.5-14b-instruct": 0.636, | |
| "qwen2.5-32b-instruct": 0.676, | |
| "qwen2.5-72b-instruct": 0.676, | |
| "llama-3.1-8b-instruct": 0.316, | |
| "llama-3.1-70b-instruct": 0.552, | |
| "llama-3.2-3b-instruct": 0.192, | |
| "llama-3.3-70b-instruct": 0.5640000000000001, | |
| "mistral-large-instruct-2411": 0.732, | |
| "gemma-2-27b-it": 0.43600000000000005, | |
| "gemma-2-9b-it": 0.264, | |
| "deepseek-v3": 0.8400000000000001, | |
| "deepseek-r1": 0.76, | |
| "qwq-32b": 0.804, | |
| "Average": 0.5421333333333334 | |
| }, | |
| "OSKernelEnv": { | |
| "qwen2.5-3b-instruct": 0.192, | |
| "qwen2.5-7b-instruct": 0.28400000000000003, | |
| "qwen2.5-14b-instruct": 0.8119999999999999, | |
| "qwen2.5-32b-instruct": 0.784, | |
| "qwen2.5-72b-instruct": 0.788, | |
| "llama-3.1-8b-instruct": 0.316, | |
| "llama-3.1-70b-instruct": 0.6920000000000001, | |
| "llama-3.2-3b-instruct": 0.128, | |
| "llama-3.3-70b-instruct": 0.74, | |
| "mistral-large-instruct-2411": 0.8559999999999999, | |
| "gemma-2-27b-it": 0.46399999999999997, | |
| "gemma-2-9b-it": 0.2, | |
| "deepseek-v3": 0.9480000000000001, | |
| "deepseek-r1": 0.96, | |
| "qwq-32b": 0.984, | |
| "Average": 0.6098666666666668 | |
| }, | |
| "MineralClassificationEnv": { | |
| "qwen2.5-3b-instruct": 0.11600000000000002, | |
| "qwen2.5-7b-instruct": 0.248, | |
| "qwen2.5-14b-instruct": 0.8320000000000001, | |
| "qwen2.5-32b-instruct": 0.9040000000000001, | |
| "qwen2.5-72b-instruct": 0.884, | |
| "llama-3.1-8b-instruct": 0.384, | |
| "llama-3.1-70b-instruct": 0.8240000000000001, | |
| "llama-3.2-3b-instruct": 0.14800000000000002, | |
| "llama-3.3-70b-instruct": 0.8960000000000001, | |
| "mistral-large-instruct-2411": 0.908, | |
| "gemma-2-27b-it": 0.508, | |
| "gemma-2-9b-it": 0.268, | |
| "deepseek-v3": 0.984, | |
| "deepseek-r1": 0.9199999999999999, | |
| "qwq-32b": 0.9640000000000001, | |
| "Average": 0.6525333333333333 | |
| }, | |
| "EconomicEnv": { | |
| "qwen2.5-3b-instruct": 0.136, | |
| "qwen2.5-7b-instruct": 0.24, | |
| "qwen2.5-14b-instruct": 0.8560000000000001, | |
| "qwen2.5-32b-instruct": 0.9199999999999999, | |
| "qwen2.5-72b-instruct": 0.8960000000000001, | |
| "llama-3.1-8b-instruct": 0.43600000000000005, | |
| "llama-3.1-70b-instruct": 0.808, | |
| "llama-3.2-3b-instruct": 0.152, | |
| "llama-3.3-70b-instruct": 0.8240000000000001, | |
| "mistral-large-instruct-2411": 0.924, | |
| "gemma-2-27b-it": 0.45199999999999996, | |
| "gemma-2-9b-it": 0.36, | |
| "deepseek-v3": 0.9559999999999998, | |
| "deepseek-r1": 0.9359999999999999, | |
| "qwq-32b": 0.9719999999999999, | |
| "Average": 0.6578666666666667 | |
| }, | |
| "DetectiveEnv": { | |
| "qwen2.5-3b-instruct": 0.168, | |
| "qwen2.5-7b-instruct": 0.38, | |
| "qwen2.5-14b-instruct": 0.836, | |
| "qwen2.5-32b-instruct": 0.884, | |
| "qwen2.5-72b-instruct": 0.8480000000000001, | |
| "llama-3.1-8b-instruct": 0.34800000000000003, | |
| "llama-3.1-70b-instruct": 0.74, | |
| "llama-3.2-3b-instruct": 0.248, | |
| "llama-3.3-70b-instruct": 0.792, | |
| "mistral-large-instruct-2411": 0.8960000000000001, | |
| "gemma-2-27b-it": 0.512, | |
| "gemma-2-9b-it": 0.33199999999999996, | |
| "deepseek-v3": 0.976, | |
| "deepseek-r1": 0.9640000000000001, | |
| "qwq-32b": 0.984, | |
| "Average": 0.6605333333333333 | |
| }, | |
| "ChessEnv": { | |
| "qwen2.5-3b-instruct": 0.184, | |
| "qwen2.5-7b-instruct": 0.27999999999999997, | |
| "qwen2.5-14b-instruct": 0.592, | |
| "qwen2.5-32b-instruct": 0.616, | |
| "qwen2.5-72b-instruct": 0.5720000000000001, | |
| "llama-3.1-8b-instruct": 0.188, | |
| "llama-3.1-70b-instruct": 0.6639999999999999, | |
| "llama-3.2-3b-instruct": 0.084, | |
| "llama-3.3-70b-instruct": 0.6280000000000001, | |
| "mistral-large-instruct-2411": 0.744, | |
| "gemma-2-27b-it": 0.30000000000000004, | |
| "gemma-2-9b-it": 0.096, | |
| "deepseek-v3": 0.696, | |
| "deepseek-r1": 0.6519999999999999, | |
| "qwq-32b": 0.664, | |
| "Average": 0.4639999999999999 | |
| }, | |
| "MythicalEnv": { | |
| "qwen2.5-3b-instruct": 0.2, | |
| "qwen2.5-7b-instruct": 0.336, | |
| "qwen2.5-14b-instruct": 0.8039999999999999, | |
| "qwen2.5-32b-instruct": 0.712, | |
| "qwen2.5-72b-instruct": 0.632, | |
| "llama-3.1-8b-instruct": 0.356, | |
| "llama-3.1-70b-instruct": 0.54, | |
| "llama-3.2-3b-instruct": 0.16, | |
| "llama-3.3-70b-instruct": 0.556, | |
| "mistral-large-instruct-2411": 0.728, | |
| "gemma-2-27b-it": 0.54, | |
| "gemma-2-9b-it": 0.404, | |
| "deepseek-v3": 0.9279999999999999, | |
| "deepseek-r1": 0.8959999999999999, | |
| "qwq-32b": 0.876, | |
| "Average": 0.5778666666666666 | |
| }, | |
| "ChemicalCompoundsEnv": { | |
| "qwen2.5-3b-instruct": 0.18, | |
| "qwen2.5-7b-instruct": 0.252, | |
| "qwen2.5-14b-instruct": 0.40800000000000003, | |
| "qwen2.5-32b-instruct": 0.30000000000000004, | |
| "qwen2.5-72b-instruct": 0.28400000000000003, | |
| "llama-3.1-8b-instruct": 0.148, | |
| "llama-3.1-70b-instruct": 0.28, | |
| "llama-3.2-3b-instruct": 0.14, | |
| "llama-3.3-70b-instruct": 0.18000000000000002, | |
| "mistral-large-instruct-2411": 0.43200000000000005, | |
| "gemma-2-27b-it": 0.23200000000000004, | |
| "gemma-2-9b-it": 0.13599999999999998, | |
| "deepseek-v3": 0.46799999999999997, | |
| "deepseek-r1": 0.624, | |
| "qwq-32b": 0.752, | |
| "Average": 0.32106666666666667 | |
| }, | |
| "ArchitecturalEnv": { | |
| "qwen2.5-3b-instruct": 0.20400000000000001, | |
| "qwen2.5-7b-instruct": 0.316, | |
| "qwen2.5-14b-instruct": 0.72, | |
| "qwen2.5-32b-instruct": 0.66, | |
| "qwen2.5-72b-instruct": 0.7120000000000001, | |
| "llama-3.1-8b-instruct": 0.256, | |
| "llama-3.1-70b-instruct": 0.556, | |
| "llama-3.2-3b-instruct": 0.132, | |
| "llama-3.3-70b-instruct": 0.508, | |
| "mistral-large-instruct-2411": 0.724, | |
| "gemma-2-27b-it": 0.488, | |
| "gemma-2-9b-it": 0.236, | |
| "deepseek-v3": 0.82, | |
| "deepseek-r1": 0.744, | |
| "qwq-32b": 0.8240000000000001, | |
| "Average": 0.5266666666666666 | |
| }, | |
| "ComputationEnv": { | |
| "qwen2.5-3b-instruct": 0.152, | |
| "qwen2.5-7b-instruct": 0.248, | |
| "qwen2.5-14b-instruct": 0.76, | |
| "qwen2.5-32b-instruct": 0.884, | |
| "qwen2.5-72b-instruct": 0.8560000000000001, | |
| "llama-3.1-8b-instruct": 0.32799999999999996, | |
| "llama-3.1-70b-instruct": 0.788, | |
| "llama-3.2-3b-instruct": 0.13999999999999999, | |
| "llama-3.3-70b-instruct": 0.8560000000000001, | |
| "mistral-large-instruct-2411": 0.828, | |
| "gemma-2-27b-it": 0.45199999999999996, | |
| "gemma-2-9b-it": 0.252, | |
| "deepseek-v3": 0.96, | |
| "deepseek-r1": 0.9399999999999998, | |
| "qwq-32b": 0.908, | |
| "Average": 0.6234666666666667 | |
| }, | |
| "MachinePartEnv": { | |
| "qwen2.5-3b-instruct": 0.14, | |
| "qwen2.5-7b-instruct": 0.32, | |
| "qwen2.5-14b-instruct": 0.8240000000000001, | |
| "qwen2.5-32b-instruct": 0.8800000000000001, | |
| "qwen2.5-72b-instruct": 0.828, | |
| "llama-3.1-8b-instruct": 0.376, | |
| "llama-3.1-70b-instruct": 0.8200000000000001, | |
| "llama-3.2-3b-instruct": 0.168, | |
| "llama-3.3-70b-instruct": 0.8960000000000001, | |
| "mistral-large-instruct-2411": 0.876, | |
| "gemma-2-27b-it": 0.508, | |
| "gemma-2-9b-it": 0.268, | |
| "deepseek-v3": 0.9719999999999999, | |
| "deepseek-r1": 0.952, | |
| "qwq-32b": 0.916, | |
| "Average": 0.6496 | |
| }, | |
| "LiteraryEnv": { | |
| "qwen2.5-3b-instruct": 0.10400000000000001, | |
| "qwen2.5-7b-instruct": 0.328, | |
| "qwen2.5-14b-instruct": 0.8800000000000001, | |
| "qwen2.5-32b-instruct": 0.9279999999999999, | |
| "qwen2.5-72b-instruct": 0.9, | |
| "llama-3.1-8b-instruct": 0.336, | |
| "llama-3.1-70b-instruct": 0.664, | |
| "llama-3.2-3b-instruct": 0.13999999999999999, | |
| "llama-3.3-70b-instruct": 0.664, | |
| "mistral-large-instruct-2411": 0.884, | |
| "gemma-2-27b-it": 0.44399999999999995, | |
| "gemma-2-9b-it": 0.13999999999999999, | |
| "deepseek-v3": 0.984, | |
| "deepseek-r1": 0.9119999999999999, | |
| "qwq-32b": 0.968, | |
| "Average": 0.6184 | |
| }, | |
| "MarineEnv": { | |
| "qwen2.5-3b-instruct": 0.144, | |
| "qwen2.5-7b-instruct": 0.384, | |
| "qwen2.5-14b-instruct": 0.8720000000000001, | |
| "qwen2.5-32b-instruct": 0.844, | |
| "qwen2.5-72b-instruct": 0.8320000000000001, | |
| "llama-3.1-8b-instruct": 0.308, | |
| "llama-3.1-70b-instruct": 0.636, | |
| "llama-3.2-3b-instruct": 0.12000000000000002, | |
| "llama-3.3-70b-instruct": 0.704, | |
| "mistral-large-instruct-2411": 0.7879999999999999, | |
| "gemma-2-27b-it": 0.484, | |
| "gemma-2-9b-it": 0.23199999999999998, | |
| "deepseek-v3": 0.884, | |
| "deepseek-r1": 0.9, | |
| "qwq-32b": 0.8880000000000001, | |
| "Average": 0.6013333333333334 | |
| }, | |
| "PhilosophyEnv": { | |
| "qwen2.5-3b-instruct": 0.144, | |
| "qwen2.5-7b-instruct": 0.3, | |
| "qwen2.5-14b-instruct": 0.7280000000000001, | |
| "qwen2.5-32b-instruct": 0.82, | |
| "qwen2.5-72b-instruct": 0.8719999999999999, | |
| "llama-3.1-8b-instruct": 0.32799999999999996, | |
| "llama-3.1-70b-instruct": 0.764, | |
| "llama-3.2-3b-instruct": 0.036000000000000004, | |
| "llama-3.3-70b-instruct": 0.796, | |
| "mistral-large-instruct-2411": 0.7879999999999999, | |
| "gemma-2-27b-it": 0.372, | |
| "gemma-2-9b-it": 0.28, | |
| "deepseek-v3": 0.844, | |
| "deepseek-r1": 0.78, | |
| "qwq-32b": 0.8320000000000001, | |
| "Average": 0.5789333333333334 | |
| }, | |
| "ArchaeologicalEnv": { | |
| "qwen2.5-3b-instruct": 0.18, | |
| "qwen2.5-7b-instruct": 0.38, | |
| "qwen2.5-14b-instruct": 0.58, | |
| "qwen2.5-32b-instruct": 0.608, | |
| "qwen2.5-72b-instruct": 0.5640000000000001, | |
| "llama-3.1-8b-instruct": 0.26, | |
| "llama-3.1-70b-instruct": 0.608, | |
| "llama-3.2-3b-instruct": 0.192, | |
| "llama-3.3-70b-instruct": 0.548, | |
| "mistral-large-instruct-2411": 0.64, | |
| "gemma-2-27b-it": 0.476, | |
| "gemma-2-9b-it": 0.30000000000000004, | |
| "deepseek-v3": 0.916, | |
| "deepseek-r1": 0.7040000000000001, | |
| "qwq-32b": 0.7559999999999999, | |
| "Average": 0.5141333333333333 | |
| }, | |
| "GemstoneEnv": { | |
| "qwen2.5-3b-instruct": 0.192, | |
| "qwen2.5-7b-instruct": 0.264, | |
| "qwen2.5-14b-instruct": 0.492, | |
| "qwen2.5-32b-instruct": 0.45599999999999996, | |
| "qwen2.5-72b-instruct": 0.44000000000000006, | |
| "llama-3.1-8b-instruct": 0.192, | |
| "llama-3.1-70b-instruct": 0.40800000000000003, | |
| "llama-3.2-3b-instruct": 0.15200000000000002, | |
| "llama-3.3-70b-instruct": 0.45599999999999996, | |
| "mistral-large-instruct-2411": 0.528, | |
| "gemma-2-27b-it": 0.33999999999999997, | |
| "gemma-2-9b-it": 0.256, | |
| "deepseek-v3": 0.5680000000000001, | |
| "deepseek-r1": 0.5680000000000001, | |
| "qwq-32b": 0.636, | |
| "Average": 0.3965333333333333 | |
| }, | |
| "MicrobiologyEnv": { | |
| "qwen2.5-3b-instruct": 0.14400000000000002, | |
| "qwen2.5-7b-instruct": 0.38400000000000006, | |
| "qwen2.5-14b-instruct": 0.752, | |
| "qwen2.5-32b-instruct": 0.7, | |
| "qwen2.5-72b-instruct": 0.844, | |
| "llama-3.1-8b-instruct": 0.316, | |
| "llama-3.1-70b-instruct": 0.512, | |
| "llama-3.2-3b-instruct": 0.12000000000000002, | |
| "llama-3.3-70b-instruct": 0.496, | |
| "mistral-large-instruct-2411": 0.764, | |
| "gemma-2-27b-it": 0.504, | |
| "gemma-2-9b-it": 0.172, | |
| "deepseek-v3": 0.9279999999999999, | |
| "deepseek-r1": 0.952, | |
| "qwq-32b": 0.932, | |
| "Average": 0.568 | |
| }, | |
| "SciFiEnv": { | |
| "qwen2.5-3b-instruct": 0.192, | |
| "qwen2.5-7b-instruct": 0.384, | |
| "qwen2.5-14b-instruct": 0.7879999999999999, | |
| "qwen2.5-32b-instruct": 0.776, | |
| "qwen2.5-72b-instruct": 0.7879999999999999, | |
| "llama-3.1-8b-instruct": 0.35200000000000004, | |
| "llama-3.1-70b-instruct": 0.664, | |
| "llama-3.2-3b-instruct": 0.164, | |
| "llama-3.3-70b-instruct": 0.588, | |
| "mistral-large-instruct-2411": 0.736, | |
| "gemma-2-27b-it": 0.52, | |
| "gemma-2-9b-it": 0.33599999999999997, | |
| "deepseek-v3": 0.9279999999999999, | |
| "deepseek-r1": 0.9199999999999999, | |
| "qwq-32b": 0.9, | |
| "Average": 0.6023999999999999 | |
| }, | |
| "HormoneEnv": { | |
| "qwen2.5-3b-instruct": 0.152, | |
| "qwen2.5-7b-instruct": 0.40800000000000003, | |
| "qwen2.5-14b-instruct": 0.7999999999999999, | |
| "qwen2.5-32b-instruct": 0.784, | |
| "qwen2.5-72b-instruct": 0.764, | |
| "llama-3.1-8b-instruct": 0.336, | |
| "llama-3.1-70b-instruct": 0.76, | |
| "llama-3.2-3b-instruct": 0.184, | |
| "llama-3.3-70b-instruct": 0.8480000000000001, | |
| "mistral-large-instruct-2411": 0.8, | |
| "gemma-2-27b-it": 0.524, | |
| "gemma-2-9b-it": 0.312, | |
| "deepseek-v3": 0.9480000000000001, | |
| "deepseek-r1": 0.944, | |
| "qwq-32b": 0.852, | |
| "Average": 0.6277333333333334 | |
| }, | |
| "SculptorEnv": { | |
| "qwen2.5-3b-instruct": 0.23200000000000004, | |
| "qwen2.5-7b-instruct": 0.4159999999999999, | |
| "qwen2.5-14b-instruct": 0.7079999999999999, | |
| "qwen2.5-32b-instruct": 0.636, | |
| "qwen2.5-72b-instruct": 0.6, | |
| "llama-3.1-8b-instruct": 0.22799999999999998, | |
| "llama-3.1-70b-instruct": 0.484, | |
| "llama-3.2-3b-instruct": 0.188, | |
| "llama-3.3-70b-instruct": 0.532, | |
| "mistral-large-instruct-2411": 0.684, | |
| "gemma-2-27b-it": 0.30000000000000004, | |
| "gemma-2-9b-it": 0.156, | |
| "deepseek-v3": 0.788, | |
| "deepseek-r1": 0.7479999999999999, | |
| "qwq-32b": 0.8119999999999999, | |
| "Average": 0.5008 | |
| }, | |
| "NeuroEnv": { | |
| "qwen2.5-3b-instruct": 0.10800000000000001, | |
| "qwen2.5-7b-instruct": 0.24400000000000005, | |
| "qwen2.5-14b-instruct": 0.8960000000000001, | |
| "qwen2.5-32b-instruct": 0.892, | |
| "qwen2.5-72b-instruct": 0.8879999999999999, | |
| "llama-3.1-8b-instruct": 0.512, | |
| "llama-3.1-70b-instruct": 0.8880000000000001, | |
| "llama-3.2-3b-instruct": 0.20400000000000001, | |
| "llama-3.3-70b-instruct": 0.9279999999999999, | |
| "mistral-large-instruct-2411": 0.8880000000000001, | |
| "gemma-2-27b-it": 0.72, | |
| "gemma-2-9b-it": 0.42800000000000005, | |
| "deepseek-v3": 0.952, | |
| "deepseek-r1": 0.932, | |
| "qwq-32b": 0.852, | |
| "Average": 0.6888000000000001 | |
| }, | |
| "OceanEnv": { | |
| "qwen2.5-3b-instruct": 0.2, | |
| "qwen2.5-7b-instruct": 0.45999999999999996, | |
| "qwen2.5-14b-instruct": 0.6160000000000001, | |
| "qwen2.5-32b-instruct": 0.6000000000000001, | |
| "qwen2.5-72b-instruct": 0.62, | |
| "llama-3.1-8b-instruct": 0.36400000000000005, | |
| "llama-3.1-70b-instruct": 0.5680000000000001, | |
| "llama-3.2-3b-instruct": 0.156, | |
| "llama-3.3-70b-instruct": 0.476, | |
| "mistral-large-instruct-2411": 0.656, | |
| "gemma-2-27b-it": 0.43200000000000005, | |
| "gemma-2-9b-it": 0.248, | |
| "deepseek-v3": 0.852, | |
| "deepseek-r1": 0.836, | |
| "qwq-32b": 0.8240000000000001, | |
| "Average": 0.5272000000000001 | |
| }, | |
| "MineralEnv": { | |
| "qwen2.5-3b-instruct": 0.14400000000000002, | |
| "qwen2.5-7b-instruct": 0.38, | |
| "qwen2.5-14b-instruct": 0.768, | |
| "qwen2.5-32b-instruct": 0.6960000000000001, | |
| "qwen2.5-72b-instruct": 0.684, | |
| "llama-3.1-8b-instruct": 0.29600000000000004, | |
| "llama-3.1-70b-instruct": 0.556, | |
| "llama-3.2-3b-instruct": 0.16, | |
| "llama-3.3-70b-instruct": 0.56, | |
| "mistral-large-instruct-2411": 0.66, | |
| "gemma-2-27b-it": 0.384, | |
| "gemma-2-9b-it": 0.17200000000000001, | |
| "deepseek-v3": 0.8480000000000001, | |
| "deepseek-r1": 0.82, | |
| "qwq-32b": 0.8720000000000001, | |
| "Average": 0.5333333333333333 | |
| }, | |
| "FishEnv": { | |
| "qwen2.5-3b-instruct": 0.188, | |
| "qwen2.5-7b-instruct": 0.38, | |
| "qwen2.5-14b-instruct": 0.732, | |
| "qwen2.5-32b-instruct": 0.668, | |
| "qwen2.5-72b-instruct": 0.7200000000000001, | |
| "llama-3.1-8b-instruct": 0.392, | |
| "llama-3.1-70b-instruct": 0.624, | |
| "llama-3.2-3b-instruct": 0.13599999999999998, | |
| "llama-3.3-70b-instruct": 0.616, | |
| "mistral-large-instruct-2411": 0.736, | |
| "gemma-2-27b-it": 0.508, | |
| "gemma-2-9b-it": 0.268, | |
| "deepseek-v3": 0.86, | |
| "deepseek-r1": 0.868, | |
| "qwq-32b": 0.924, | |
| "Average": 0.5746666666666667 | |
| }, | |
| "MartialArtsEnv": { | |
| "qwen2.5-3b-instruct": 0.184, | |
| "qwen2.5-7b-instruct": 0.43200000000000005, | |
| "qwen2.5-14b-instruct": 0.672, | |
| "qwen2.5-32b-instruct": 0.5640000000000001, | |
| "qwen2.5-72b-instruct": 0.56, | |
| "llama-3.1-8b-instruct": 0.276, | |
| "llama-3.1-70b-instruct": 0.54, | |
| "llama-3.2-3b-instruct": 0.2, | |
| "llama-3.3-70b-instruct": 0.52, | |
| "mistral-large-instruct-2411": 0.568, | |
| "gemma-2-27b-it": 0.4, | |
| "gemma-2-9b-it": 0.22400000000000003, | |
| "deepseek-v3": 0.784, | |
| "deepseek-r1": 0.716, | |
| "qwq-32b": 0.752, | |
| "Average": 0.4928 | |
| }, | |
| "RocketFuelEnv": { | |
| "qwen2.5-3b-instruct": 0.22800000000000004, | |
| "qwen2.5-7b-instruct": 0.41600000000000004, | |
| "qwen2.5-14b-instruct": 0.852, | |
| "qwen2.5-32b-instruct": 0.7879999999999999, | |
| "qwen2.5-72b-instruct": 0.8160000000000001, | |
| "llama-3.1-8b-instruct": 0.36, | |
| "llama-3.1-70b-instruct": 0.6799999999999999, | |
| "llama-3.2-3b-instruct": 0.184, | |
| "llama-3.3-70b-instruct": 0.7239999999999999, | |
| "mistral-large-instruct-2411": 0.828, | |
| "gemma-2-27b-it": 0.6279999999999999, | |
| "gemma-2-9b-it": 0.248, | |
| "deepseek-v3": 0.916, | |
| "deepseek-r1": 0.8960000000000001, | |
| "qwq-32b": 0.9040000000000001, | |
| "Average": 0.6312000000000001 | |
| }, | |
| "MLEnv": { | |
| "qwen2.5-3b-instruct": 0.088, | |
| "qwen2.5-7b-instruct": 0.392, | |
| "qwen2.5-14b-instruct": 0.6, | |
| "qwen2.5-32b-instruct": 0.748, | |
| "qwen2.5-72b-instruct": 0.792, | |
| "llama-3.1-8b-instruct": 0.304, | |
| "llama-3.1-70b-instruct": 0.672, | |
| "llama-3.2-3b-instruct": 0.10799999999999998, | |
| "llama-3.3-70b-instruct": 0.5960000000000001, | |
| "mistral-large-instruct-2411": 0.7639999999999999, | |
| "gemma-2-27b-it": 0.264, | |
| "gemma-2-9b-it": 0.156, | |
| "deepseek-v3": 0.808, | |
| "deepseek-r1": 0.652, | |
| "qwq-32b": 0.772, | |
| "Average": 0.5144 | |
| }, | |
| "PoliticalManifestoEnv": { | |
| "qwen2.5-3b-instruct": 0.184, | |
| "qwen2.5-7b-instruct": 0.312, | |
| "qwen2.5-14b-instruct": 0.76, | |
| "qwen2.5-32b-instruct": 0.852, | |
| "qwen2.5-72b-instruct": 0.7839999999999999, | |
| "llama-3.1-8b-instruct": 0.42400000000000004, | |
| "llama-3.1-70b-instruct": 0.62, | |
| "llama-3.2-3b-instruct": 0.128, | |
| "llama-3.3-70b-instruct": 0.692, | |
| "mistral-large-instruct-2411": 0.796, | |
| "gemma-2-27b-it": 0.45200000000000007, | |
| "gemma-2-9b-it": 0.152, | |
| "deepseek-v3": 0.86, | |
| "deepseek-r1": 0.792, | |
| "qwq-32b": 0.8800000000000001, | |
| "Average": 0.5792 | |
| }, | |
| "CoffeeEnv": { | |
| "qwen2.5-3b-instruct": 0.20400000000000001, | |
| "qwen2.5-7b-instruct": 0.38, | |
| "qwen2.5-14b-instruct": 0.7799999999999999, | |
| "qwen2.5-32b-instruct": 0.8039999999999999, | |
| "qwen2.5-72b-instruct": 0.764, | |
| "llama-3.1-8b-instruct": 0.31599999999999995, | |
| "llama-3.1-70b-instruct": 0.552, | |
| "llama-3.2-3b-instruct": 0.17200000000000001, | |
| "llama-3.3-70b-instruct": 0.6599999999999999, | |
| "mistral-large-instruct-2411": 0.828, | |
| "gemma-2-27b-it": 0.592, | |
| "gemma-2-9b-it": 0.364, | |
| "deepseek-v3": 0.9120000000000001, | |
| "deepseek-r1": 0.9279999999999999, | |
| "qwq-32b": 0.9359999999999999, | |
| "Average": 0.6128 | |
| }, | |
| "MotifAnalysisEnv": { | |
| "qwen2.5-3b-instruct": 0.096, | |
| "qwen2.5-7b-instruct": 0.332, | |
| "qwen2.5-14b-instruct": 0.5680000000000001, | |
| "qwen2.5-32b-instruct": 0.496, | |
| "qwen2.5-72b-instruct": 0.5920000000000001, | |
| "llama-3.1-8b-instruct": 0.244, | |
| "llama-3.1-70b-instruct": 0.36000000000000004, | |
| "llama-3.2-3b-instruct": 0.13999999999999999, | |
| "llama-3.3-70b-instruct": 0.22400000000000003, | |
| "mistral-large-instruct-2411": 0.46399999999999997, | |
| "gemma-2-27b-it": 0.18, | |
| "gemma-2-9b-it": 0.128, | |
| "deepseek-v3": 0.752, | |
| "deepseek-r1": 0.8240000000000001, | |
| "qwq-32b": 0.8640000000000001, | |
| "Average": 0.4176 | |
| }, | |
| "NutritionEnv": { | |
| "qwen2.5-3b-instruct": 0.132, | |
| "qwen2.5-7b-instruct": 0.22000000000000003, | |
| "qwen2.5-14b-instruct": 0.7920000000000001, | |
| "qwen2.5-32b-instruct": 0.8400000000000001, | |
| "qwen2.5-72b-instruct": 0.876, | |
| "llama-3.1-8b-instruct": 0.264, | |
| "llama-3.1-70b-instruct": 0.64, | |
| "llama-3.2-3b-instruct": 0.128, | |
| "llama-3.3-70b-instruct": 0.7040000000000001, | |
| "mistral-large-instruct-2411": 0.8320000000000001, | |
| "gemma-2-27b-it": 0.38, | |
| "gemma-2-9b-it": 0.20800000000000002, | |
| "deepseek-v3": 0.944, | |
| "deepseek-r1": 0.944, | |
| "qwq-32b": 0.9120000000000001, | |
| "Average": 0.5877333333333333 | |
| }, | |
| "MalwareEnv": { | |
| "qwen2.5-3b-instruct": 0.16, | |
| "qwen2.5-7b-instruct": 0.316, | |
| "qwen2.5-14b-instruct": 0.728, | |
| "qwen2.5-32b-instruct": 0.756, | |
| "qwen2.5-72b-instruct": 0.7200000000000001, | |
| "llama-3.1-8b-instruct": 0.268, | |
| "llama-3.1-70b-instruct": 0.5840000000000001, | |
| "llama-3.2-3b-instruct": 0.10800000000000001, | |
| "llama-3.3-70b-instruct": 0.548, | |
| "mistral-large-instruct-2411": 0.752, | |
| "gemma-2-27b-it": 0.252, | |
| "gemma-2-9b-it": 0.12, | |
| "deepseek-v3": 0.916, | |
| "deepseek-r1": 0.9, | |
| "qwq-32b": 0.916, | |
| "Average": 0.5362666666666667 | |
| }, | |
| "GeologicalEnv": { | |
| "qwen2.5-3b-instruct": 0.132, | |
| "qwen2.5-7b-instruct": 0.336, | |
| "qwen2.5-14b-instruct": 0.7639999999999999, | |
| "qwen2.5-32b-instruct": 0.748, | |
| "qwen2.5-72b-instruct": 0.676, | |
| "llama-3.1-8b-instruct": 0.28800000000000003, | |
| "llama-3.1-70b-instruct": 0.552, | |
| "llama-3.2-3b-instruct": 0.13999999999999999, | |
| "llama-3.3-70b-instruct": 0.508, | |
| "mistral-large-instruct-2411": 0.812, | |
| "gemma-2-27b-it": 0.41600000000000004, | |
| "gemma-2-9b-it": 0.164, | |
| "deepseek-v3": 0.9119999999999999, | |
| "deepseek-r1": 0.8480000000000001, | |
| "qwq-32b": 0.8880000000000001, | |
| "Average": 0.5456000000000001 | |
| }, | |
| "TheatricalEnv": { | |
| "qwen2.5-3b-instruct": 0.14400000000000002, | |
| "qwen2.5-7b-instruct": 0.42400000000000004, | |
| "qwen2.5-14b-instruct": 0.676, | |
| "qwen2.5-32b-instruct": 0.78, | |
| "qwen2.5-72b-instruct": 0.808, | |
| "llama-3.1-8b-instruct": 0.41200000000000003, | |
| "llama-3.1-70b-instruct": 0.7959999999999999, | |
| "llama-3.2-3b-instruct": 0.1, | |
| "llama-3.3-70b-instruct": 0.768, | |
| "mistral-large-instruct-2411": 0.844, | |
| "gemma-2-27b-it": 0.528, | |
| "gemma-2-9b-it": 0.28, | |
| "deepseek-v3": 0.884, | |
| "deepseek-r1": 0.8240000000000001, | |
| "qwq-32b": 0.908, | |
| "Average": 0.6117333333333335 | |
| }, | |
| "PrintingTechniqueEnv": { | |
| "qwen2.5-3b-instruct": 0.144, | |
| "qwen2.5-7b-instruct": 0.252, | |
| "qwen2.5-14b-instruct": 0.736, | |
| "qwen2.5-32b-instruct": 0.7200000000000001, | |
| "qwen2.5-72b-instruct": 0.776, | |
| "llama-3.1-8b-instruct": 0.4, | |
| "llama-3.1-70b-instruct": 0.54, | |
| "llama-3.2-3b-instruct": 0.16, | |
| "llama-3.3-70b-instruct": 0.548, | |
| "mistral-large-instruct-2411": 0.7040000000000001, | |
| "gemma-2-27b-it": 0.44000000000000006, | |
| "gemma-2-9b-it": 0.192, | |
| "deepseek-v3": 0.916, | |
| "deepseek-r1": 0.852, | |
| "qwq-32b": 0.9279999999999999, | |
| "Average": 0.5538666666666666 | |
| }, | |
| "StellarEnv": { | |
| "qwen2.5-3b-instruct": 0.132, | |
| "qwen2.5-7b-instruct": 0.388, | |
| "qwen2.5-14b-instruct": 0.6759999999999999, | |
| "qwen2.5-32b-instruct": 0.724, | |
| "qwen2.5-72b-instruct": 0.6960000000000001, | |
| "llama-3.1-8b-instruct": 0.30000000000000004, | |
| "llama-3.1-70b-instruct": 0.6040000000000001, | |
| "llama-3.2-3b-instruct": 0.16, | |
| "llama-3.3-70b-instruct": 0.6240000000000001, | |
| "mistral-large-instruct-2411": 0.732, | |
| "gemma-2-27b-it": 0.364, | |
| "gemma-2-9b-it": 0.23199999999999998, | |
| "deepseek-v3": 0.82, | |
| "deepseek-r1": 0.648, | |
| "qwq-32b": 0.776, | |
| "Average": 0.5250666666666667 | |
| }, | |
| "SoilEnv": { | |
| "qwen2.5-3b-instruct": 0.172, | |
| "qwen2.5-7b-instruct": 0.48, | |
| "qwen2.5-14b-instruct": 0.8320000000000001, | |
| "qwen2.5-32b-instruct": 0.788, | |
| "qwen2.5-72b-instruct": 0.8240000000000001, | |
| "llama-3.1-8b-instruct": 0.42400000000000004, | |
| "llama-3.1-70b-instruct": 0.64, | |
| "llama-3.2-3b-instruct": 0.22799999999999998, | |
| "llama-3.3-70b-instruct": 0.664, | |
| "mistral-large-instruct-2411": 0.76, | |
| "gemma-2-27b-it": 0.628, | |
| "gemma-2-9b-it": 0.44000000000000006, | |
| "deepseek-v3": 0.884, | |
| "deepseek-r1": 0.8039999999999999, | |
| "qwq-32b": 0.8480000000000001, | |
| "Average": 0.6277333333333334 | |
| }, | |
| "SoftwareEnv": { | |
| "qwen2.5-3b-instruct": 0.14800000000000002, | |
| "qwen2.5-7b-instruct": 0.40800000000000003, | |
| "qwen2.5-14b-instruct": 0.744, | |
| "qwen2.5-32b-instruct": 0.86, | |
| "qwen2.5-72b-instruct": 0.8400000000000001, | |
| "llama-3.1-8b-instruct": 0.4159999999999999, | |
| "llama-3.1-70b-instruct": 0.72, | |
| "llama-3.2-3b-instruct": 0.16799999999999998, | |
| "llama-3.3-70b-instruct": 0.784, | |
| "mistral-large-instruct-2411": 0.804, | |
| "gemma-2-27b-it": 0.528, | |
| "gemma-2-9b-it": 0.308, | |
| "deepseek-v3": 0.836, | |
| "deepseek-r1": 0.8360000000000001, | |
| "qwq-32b": 0.8800000000000001, | |
| "Average": 0.6186666666666667 | |
| }, | |
| "CarIdentificationEnv": { | |
| "qwen2.5-3b-instruct": 0.272, | |
| "qwen2.5-7b-instruct": 0.4, | |
| "qwen2.5-14b-instruct": 0.9120000000000001, | |
| "qwen2.5-32b-instruct": 0.916, | |
| "qwen2.5-72b-instruct": 0.9359999999999999, | |
| "llama-3.1-8b-instruct": 0.544, | |
| "llama-3.1-70b-instruct": 0.8400000000000001, | |
| "llama-3.2-3b-instruct": 0.124, | |
| "llama-3.3-70b-instruct": 0.852, | |
| "mistral-large-instruct-2411": 0.9119999999999999, | |
| "gemma-2-27b-it": 0.672, | |
| "gemma-2-9b-it": 0.376, | |
| "deepseek-v3": 0.992, | |
| "deepseek-r1": 0.952, | |
| "qwq-32b": 0.9879999999999999, | |
| "Average": 0.7125333333333334 | |
| }, | |
| "PharmaceuticalEnv": { | |
| "qwen2.5-3b-instruct": 0.156, | |
| "qwen2.5-7b-instruct": 0.32, | |
| "qwen2.5-14b-instruct": 0.7600000000000001, | |
| "qwen2.5-32b-instruct": 0.752, | |
| "qwen2.5-72b-instruct": 0.7559999999999999, | |
| "llama-3.1-8b-instruct": 0.28400000000000003, | |
| "llama-3.1-70b-instruct": 0.508, | |
| "llama-3.2-3b-instruct": 0.148, | |
| "llama-3.3-70b-instruct": 0.472, | |
| "mistral-large-instruct-2411": 0.756, | |
| "gemma-2-27b-it": 0.336, | |
| "gemma-2-9b-it": 0.128, | |
| "deepseek-v3": 0.8800000000000001, | |
| "deepseek-r1": 0.8640000000000001, | |
| "qwq-32b": 0.8, | |
| "Average": 0.528 | |
| }, | |
| "NetworkEnv": { | |
| "qwen2.5-3b-instruct": 0.184, | |
| "qwen2.5-7b-instruct": 0.36, | |
| "qwen2.5-14b-instruct": 0.66, | |
| "qwen2.5-32b-instruct": 0.716, | |
| "qwen2.5-72b-instruct": 0.716, | |
| "llama-3.1-8b-instruct": 0.43199999999999994, | |
| "llama-3.1-70b-instruct": 0.68, | |
| "llama-3.2-3b-instruct": 0.14400000000000002, | |
| "llama-3.3-70b-instruct": 0.7040000000000001, | |
| "mistral-large-instruct-2411": 0.78, | |
| "gemma-2-27b-it": 0.492, | |
| "gemma-2-9b-it": 0.392, | |
| "deepseek-v3": 0.8400000000000001, | |
| "deepseek-r1": 0.736, | |
| "qwq-32b": 0.828, | |
| "Average": 0.5776 | |
| }, | |
| "BirdNestEnv": { | |
| "qwen2.5-3b-instruct": 0.148, | |
| "qwen2.5-7b-instruct": 0.21200000000000002, | |
| "qwen2.5-14b-instruct": 0.48, | |
| "qwen2.5-32b-instruct": 0.33999999999999997, | |
| "qwen2.5-72b-instruct": 0.42400000000000004, | |
| "llama-3.1-8b-instruct": 0.16799999999999998, | |
| "llama-3.1-70b-instruct": 0.22400000000000003, | |
| "llama-3.2-3b-instruct": 0.084, | |
| "llama-3.3-70b-instruct": 0.20800000000000002, | |
| "mistral-large-instruct-2411": 0.492, | |
| "gemma-2-27b-it": 0.176, | |
| "gemma-2-9b-it": 0.128, | |
| "deepseek-v3": 0.764, | |
| "deepseek-r1": 0.756, | |
| "qwq-32b": 0.8119999999999999, | |
| "Average": 0.36106666666666676 | |
| }, | |
| "EnergyEnv": { | |
| "qwen2.5-3b-instruct": 0.15999999999999998, | |
| "qwen2.5-7b-instruct": 0.42000000000000004, | |
| "qwen2.5-14b-instruct": 0.7999999999999999, | |
| "qwen2.5-32b-instruct": 0.7, | |
| "qwen2.5-72b-instruct": 0.5880000000000001, | |
| "llama-3.1-8b-instruct": 0.29600000000000004, | |
| "llama-3.1-70b-instruct": 0.46799999999999997, | |
| "llama-3.2-3b-instruct": 0.18, | |
| "llama-3.3-70b-instruct": 0.396, | |
| "mistral-large-instruct-2411": 0.78, | |
| "gemma-2-27b-it": 0.35200000000000004, | |
| "gemma-2-9b-it": 0.196, | |
| "deepseek-v3": 0.916, | |
| "deepseek-r1": 0.8720000000000001, | |
| "qwq-32b": 0.8880000000000001, | |
| "Average": 0.5341333333333333 | |
| }, | |
| "LanguageEnv": { | |
| "qwen2.5-3b-instruct": 0.196, | |
| "qwen2.5-7b-instruct": 0.304, | |
| "qwen2.5-14b-instruct": 0.388, | |
| "qwen2.5-32b-instruct": 0.512, | |
| "qwen2.5-72b-instruct": 0.5599999999999999, | |
| "llama-3.1-8b-instruct": 0.23200000000000004, | |
| "llama-3.1-70b-instruct": 0.40800000000000003, | |
| "llama-3.2-3b-instruct": 0.144, | |
| "llama-3.3-70b-instruct": 0.336, | |
| "mistral-large-instruct-2411": 0.536, | |
| "gemma-2-27b-it": 0.20800000000000002, | |
| "gemma-2-9b-it": 0.172, | |
| "deepseek-v3": 0.724, | |
| "deepseek-r1": 0.716, | |
| "qwq-32b": 0.8119999999999999, | |
| "Average": 0.41653333333333337 | |
| }, | |
| "AlgorithmEnv": { | |
| "qwen2.5-3b-instruct": 0.1, | |
| "qwen2.5-7b-instruct": 0.28400000000000003, | |
| "qwen2.5-14b-instruct": 0.688, | |
| "qwen2.5-32b-instruct": 0.6960000000000001, | |
| "qwen2.5-72b-instruct": 0.66, | |
| "llama-3.1-8b-instruct": 0.35200000000000004, | |
| "llama-3.1-70b-instruct": 0.512, | |
| "llama-3.2-3b-instruct": 0.22399999999999998, | |
| "llama-3.3-70b-instruct": 0.484, | |
| "mistral-large-instruct-2411": 0.788, | |
| "gemma-2-27b-it": 0.268, | |
| "gemma-2-9b-it": 0.164, | |
| "deepseek-v3": 0.792, | |
| "deepseek-r1": 0.724, | |
| "qwq-32b": 0.812, | |
| "Average": 0.5032 | |
| }, | |
| "MathematicalEnv": { | |
| "qwen2.5-3b-instruct": 0.048, | |
| "qwen2.5-7b-instruct": 0.42800000000000005, | |
| "qwen2.5-14b-instruct": 0.7000000000000001, | |
| "qwen2.5-32b-instruct": 0.8119999999999999, | |
| "qwen2.5-72b-instruct": 0.792, | |
| "llama-3.1-8b-instruct": 0.316, | |
| "llama-3.1-70b-instruct": 0.8, | |
| "llama-3.2-3b-instruct": 0.12800000000000003, | |
| "llama-3.3-70b-instruct": 0.8400000000000001, | |
| "mistral-large-instruct-2411": 0.884, | |
| "gemma-2-27b-it": 0.268, | |
| "gemma-2-9b-it": 0.068, | |
| "deepseek-v3": 0.9119999999999999, | |
| "deepseek-r1": 0.876, | |
| "qwq-32b": 0.8160000000000001, | |
| "Average": 0.5792 | |
| }, | |
| "MusicalEnv": { | |
| "qwen2.5-3b-instruct": 0.04, | |
| "qwen2.5-7b-instruct": 0.336, | |
| "qwen2.5-14b-instruct": 0.8039999999999999, | |
| "qwen2.5-32b-instruct": 0.8560000000000001, | |
| "qwen2.5-72b-instruct": 0.8400000000000001, | |
| "llama-3.1-8b-instruct": 0.34400000000000003, | |
| "llama-3.1-70b-instruct": 0.68, | |
| "llama-3.2-3b-instruct": 0.088, | |
| "llama-3.3-70b-instruct": 0.8240000000000001, | |
| "mistral-large-instruct-2411": 0.884, | |
| "gemma-2-27b-it": 0.28, | |
| "gemma-2-9b-it": 0.11599999999999999, | |
| "deepseek-v3": 0.9480000000000001, | |
| "deepseek-r1": 0.892, | |
| "qwq-32b": 0.9039999999999999, | |
| "Average": 0.5890666666666668 | |
| }, | |
| "InventorEnv": { | |
| "qwen2.5-3b-instruct": 0.14800000000000002, | |
| "qwen2.5-7b-instruct": 0.43200000000000005, | |
| "qwen2.5-14b-instruct": 0.776, | |
| "qwen2.5-32b-instruct": 0.7999999999999999, | |
| "qwen2.5-72b-instruct": 0.772, | |
| "llama-3.1-8b-instruct": 0.4, | |
| "llama-3.1-70b-instruct": 0.7, | |
| "llama-3.2-3b-instruct": 0.188, | |
| "llama-3.3-70b-instruct": 0.616, | |
| "mistral-large-instruct-2411": 0.8039999999999999, | |
| "gemma-2-27b-it": 0.552, | |
| "gemma-2-9b-it": 0.364, | |
| "deepseek-v3": 0.9399999999999998, | |
| "deepseek-r1": 0.908, | |
| "qwq-32b": 0.9, | |
| "Average": 0.62 | |
| }, | |
| "MedicalEnv": { | |
| "qwen2.5-3b-instruct": 0.22000000000000003, | |
| "qwen2.5-7b-instruct": 0.544, | |
| "qwen2.5-14b-instruct": 0.8320000000000001, | |
| "qwen2.5-32b-instruct": 0.8800000000000001, | |
| "qwen2.5-72b-instruct": 0.8960000000000001, | |
| "llama-3.1-8b-instruct": 0.52, | |
| "llama-3.1-70b-instruct": 0.82, | |
| "llama-3.2-3b-instruct": 0.23200000000000004, | |
| "llama-3.3-70b-instruct": 0.8960000000000001, | |
| "mistral-large-instruct-2411": 0.8960000000000001, | |
| "gemma-2-27b-it": 0.692, | |
| "gemma-2-9b-it": 0.5760000000000001, | |
| "deepseek-v3": 0.9039999999999999, | |
| "deepseek-r1": 0.9359999999999999, | |
| "qwq-32b": 0.9199999999999999, | |
| "Average": 0.7175999999999999 | |
| }, | |
| "MusicEnv": { | |
| "qwen2.5-3b-instruct": 0.184, | |
| "qwen2.5-7b-instruct": 0.26, | |
| "qwen2.5-14b-instruct": 0.656, | |
| "qwen2.5-32b-instruct": 0.664, | |
| "qwen2.5-72b-instruct": 0.7559999999999999, | |
| "llama-3.1-8b-instruct": 0.356, | |
| "llama-3.1-70b-instruct": 0.596, | |
| "llama-3.2-3b-instruct": 0.10800000000000001, | |
| "llama-3.3-70b-instruct": 0.596, | |
| "mistral-large-instruct-2411": 0.6639999999999999, | |
| "gemma-2-27b-it": 0.45600000000000007, | |
| "gemma-2-9b-it": 0.28400000000000003, | |
| "deepseek-v3": 0.8119999999999999, | |
| "deepseek-r1": 0.868, | |
| "qwq-32b": 0.868, | |
| "Average": 0.5418666666666667 | |
| }, | |
| "FantasyEnv": { | |
| "qwen2.5-3b-instruct": 0.148, | |
| "qwen2.5-7b-instruct": 0.32, | |
| "qwen2.5-14b-instruct": 0.74, | |
| "qwen2.5-32b-instruct": 0.7879999999999999, | |
| "qwen2.5-72b-instruct": 0.5720000000000001, | |
| "llama-3.1-8b-instruct": 0.40800000000000003, | |
| "llama-3.1-70b-instruct": 0.676, | |
| "llama-3.2-3b-instruct": 0.152, | |
| "llama-3.3-70b-instruct": 0.704, | |
| "mistral-large-instruct-2411": 0.8240000000000001, | |
| "gemma-2-27b-it": 0.524, | |
| "gemma-2-9b-it": 0.324, | |
| "deepseek-v3": 0.9199999999999999, | |
| "deepseek-r1": 0.9719999999999999, | |
| "qwq-32b": 0.9719999999999999, | |
| "Average": 0.6029333333333332 | |
| }, | |
| "EducationEnv": { | |
| "qwen2.5-3b-instruct": 0.10400000000000001, | |
| "qwen2.5-7b-instruct": 0.268, | |
| "qwen2.5-14b-instruct": 0.828, | |
| "qwen2.5-32b-instruct": 0.9039999999999999, | |
| "qwen2.5-72b-instruct": 0.8480000000000001, | |
| "llama-3.1-8b-instruct": 0.5680000000000001, | |
| "llama-3.1-70b-instruct": 0.768, | |
| "llama-3.2-3b-instruct": 0.192, | |
| "llama-3.3-70b-instruct": 0.9039999999999999, | |
| "mistral-large-instruct-2411": 0.876, | |
| "gemma-2-27b-it": 0.624, | |
| "gemma-2-9b-it": 0.45999999999999996, | |
| "deepseek-v3": 0.9480000000000001, | |
| "deepseek-r1": 0.9, | |
| "qwq-32b": 0.9359999999999999, | |
| "Average": 0.6752 | |
| }, | |
| "ChemicalEnv": { | |
| "qwen2.5-3b-instruct": 0.264, | |
| "qwen2.5-7b-instruct": 0.44000000000000006, | |
| "qwen2.5-14b-instruct": 0.724, | |
| "qwen2.5-32b-instruct": 0.7040000000000001, | |
| "qwen2.5-72b-instruct": 0.72, | |
| "llama-3.1-8b-instruct": 0.36, | |
| "llama-3.1-70b-instruct": 0.62, | |
| "llama-3.2-3b-instruct": 0.16399999999999998, | |
| "llama-3.3-70b-instruct": 0.45999999999999996, | |
| "mistral-large-instruct-2411": 0.68, | |
| "gemma-2-27b-it": 0.44399999999999995, | |
| "gemma-2-9b-it": 0.316, | |
| "deepseek-v3": 0.8799999999999999, | |
| "deepseek-r1": 0.6799999999999999, | |
| "qwq-32b": 0.8200000000000001, | |
| "Average": 0.5517333333333333 | |
| }, | |
| "Average": { | |
| "qwen2.5-3b-instruct": 0.1655841584158416, | |
| "qwen2.5-7b-instruct": 0.34736633663366323, | |
| "qwen2.5-14b-instruct": 0.7148514851485149, | |
| "qwen2.5-32b-instruct": 0.7330693069306928, | |
| "qwen2.5-72b-instruct": 0.7272079207920793, | |
| "llama-3.1-8b-instruct": 0.3334653465346535, | |
| "llama-3.1-70b-instruct": 0.6271287128712871, | |
| "llama-3.2-3b-instruct": 0.15599999999999997, | |
| "llama-3.3-70b-instruct": 0.6372277227722771, | |
| "mistral-large-instruct-2411": 0.7573861386138615, | |
| "gemma-2-27b-it": 0.44522772277227735, | |
| "gemma-2-9b-it": 0.264, | |
| "deepseek-v3": 0.8605148514851484, | |
| "deepseek-r1": 0.8304554455445546, | |
| "qwq-32b": 0.8630891089108911 | |
| } | |
| } |