Spaces:

ZhiyuanZeng
/

RLVE_Gym

Running

App Files Files Community

ZhiyuanZeng commited on 21 days ago

Commit

c4bedee

1 Parent(s): 59b6e0f

misc

Browse files

Files changed (1) hide show

server/RLVE_Gym_environment.py +24 -14

server/RLVE_Gym_environment.py CHANGED Viewed

@@ -33,7 +33,15 @@ class RlveGymEnvironment(Environment):
         answer_markers: Optional[Tuple[str, str]] = None,
         initial_seed: int = None,
     ):
-        """Initialize the RLVE_Gym environment."""
         if environment_identifier is not None :
             self.environment_identifier = environment_identifier
@@ -69,10 +77,11 @@ class RlveGymEnvironment(Environment):
         Reset the environment.
         Returns:
-            problem_input: The generated problem input string (or None if generation failed)
-            verifier_result: None
-            success: Boolean indicating whether the reset was successful
-            message: The result of the reset
         """
         if (self.environment_identifier not in identifier2environment) or (
             self.environment_identifier not in identifier2controller
@@ -152,13 +161,15 @@ class RlveGymEnvironment(Environment):
         Execute a step in the environment by verifying the model output.
         Args:
-            action: RlveGymAction containing the output to verify
         Returns:
-            problem_input: The problem input string from the current state
-            verifier_result: Result of the verification containing accuracy and other metrics
-            success: Boolean indicating whether the step (verification) was successful
-            message: The result of the step
         """
         if self.problem is None:
             return RlveGymObservation(
@@ -197,9 +208,8 @@ class RlveGymEnvironment(Environment):
         Get the current environment state.
         Returns:
-            seed: The current random seed value for problem generation
-            problem_input: The generated problem input string (or None if generation failed)
-            num_samples: Number of samples taken so far
-            sum_accuracy: Sum of accuracies from verifications so far
         """
         return self._state

         answer_markers: Optional[Tuple[str, str]] = None,
         initial_seed: int = None,
     ):
+        """
+        Initialize the RLVE_Gym environment.
+        Args:
+            environment_identifier (str): The environment's identifier. Check server/Gym/environments/__init__.py for detailed usage.
+            difficulty (int): The difficulty of generated problems.
+            answer_markers (Tuple[str] of length 2): How the environment extracts the final answer from a model output.
+            initial_seed (int): The initial seed to use when generating the first problem. Whenever reset() is called, the seed will be incremented by 1.
+        """
         if environment_identifier is not None :
             self.environment_identifier = environment_identifier
         Reset the environment.
         Returns:
+            problem_input (Optional[str]): The input of the problem; if it is None, it means that the problem generation has not been run or has failed.
+            verifier_result (Optional[dict]): Contains reward as the raw reward, accuracy as the 0/1 correctness, and format_score as the 0/1 format correctness; if it is None, it means that the verification has failed.
+            success (bool): True or False indicates whether the operation succeeded.
+            message (str): The explanation of success.
+            reward (Optional[float]): The value is verifier_result["reward"] when verifier_result is not None (otherwise, reward is also None).
         """
         if (self.environment_identifier not in identifier2environment) or (
             self.environment_identifier not in identifier2controller
         Execute a step in the environment by verifying the model output.
         Args:
+            action (RlveGymAction): Contains a single field:
+                - output (str): The model's output to get verified.
         Returns:
+            problem_input (Optional[str]): The input of the problem; if it is None, it means that the problem generation has not been run or has failed.
+            verifier_result (Optional[dict]): Contains reward as the raw reward, accuracy as the 0/1 correctness, and format_score as the 0/1 format correctness; if it is None, it means that the verification has failed.
+            success (bool): True or False indicates whether the operation succeeded.
+            message (str): The explanation of success.
+            reward (Optional[float]): The value is verifier_result["reward"] when verifier_result is not None (otherwise, reward is also None).
         """
         if self.problem is None:
             return RlveGymObservation(
         Get the current environment state.
         Returns:
+            seed (int): The seed to use when running reset().
+            problem_input (Optional[str]): The input of the problem; if it is None, it means that the problem generation has not been run, or it failed.
+            num_samples (int) and sum_accuracy (int): The statistics of the result of `step(action)` so far for the current problem (the number of outputs sent to the verifier and the number of correct ones).
         """
         return self._state