Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import re | |
| def extract_judge_hash(response): | |
| """ | |
| checklist ๋ณ๋ก yes, in, no๋ฅผ ํ๋จํ ์ ๋ณด๋ฅผ hash ํํ๋ก ๋ณํํ์ฌ ๋ฐํ | |
| """ | |
| content = response['response'] | |
| try: | |
| judge_content = content.lower().replace(' ', '').split('<answer>')[1].split('</answer>')[0] | |
| except: | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| pattern = r":yes|:inprogress|:no" | |
| matches = re.findall(pattern, judge_content) | |
| matches = [{':yes': 'y', ':inprogress': 'i', ':no': 'n'}[match] for match in matches] | |
| return ''.join(matches) | |
| def average_logits(response): | |
| """ | |
| yes, in, no๋ฅผ logits ๋ ๋ฒจ์์ ๊ณ์ฐ. | |
| """ | |
| judge_probs = response['judge_probs'] | |
| yes_ = np.mean([r['yes'] for r in judge_probs]) | |
| in_ = np.mean([r['in'] for r in judge_probs]) | |
| reward = yes_ + 0.5 * in_ | |
| return reward | |
| REWARD_PROCESSORS = { | |
| 'avg_logits': average_logits | |
| } | |
| REWARD_PROCESSOR_N_SAMPLES = { | |
| 'avg_logits': 5 | |
| } |