Spaces:

ZhiyuanZeng
/

RLVE_Gym

Sleeping

File size: 8,522 Bytes

3bf8430

import random
from typing import Optional, List
from ...environment import VerifiableEnvironment


class BlockImage_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P1058
    prompt_template = \
r"""You are given a {M} × {N} rectangular grid, where each cell represents a stack of identical cube blocks. Each cube has size 1 × 1 × 1, and no rotation or flipping is allowed — all cubes are placed in the same orientation.
You are given a matrix representing the number of cubes stacked on each cell in the grid (the integer at row i and column j indicates how many cube blocks are stacked on the cell located at row i, column j):
{matrix}

The visual representation of a **single cube** follows this fixed format:

$$
\def\arraystretch{1e-10}
\begin{aligned}
&\verb!  +---+!\\
&\verb! /   /|!\\
&\verb!+---+ |!\quad\textsf{height}\\
&\verb!|   | +!\\
&\verb!|   |/ !\quad\textsf{width}\\
&\verb!+---+  !\\
& \quad\textsf{length}
\end{aligned}
$$

Each `+` represents a corner, `-` spans the cube’s length, `/` shows depth (width), and `|` shows height. Empty space in the final drawing should be represented using `.`.

The 3D isometric projection follows specific stacking rules:

- **Two cubes side by side (left/right):**
$$
\def\arraystretch{1e-10}
\begin{aligned}
\verb!..+---+---+!\\
\verb!./   /   /|!\\
\verb!+---+---+ |!\\
\verb!|   |   | +!\\
\verb!|   |   |/.!\\
\verb!+---+---+..!\\
\end{aligned}
$$

- **Two cubes stacked vertically (top/bottom):**
$$
\def\arraystretch{1e-10}
\begin{aligned}
\verb!..+---+!\\
\verb!./   /|!\\
\verb!+---+ |!\\
\verb!|   | +!\\
\verb!|   |/|!\\
\verb!+---+ |!\\
\verb!|   | +!\\
\verb!|   |/.!\\
\verb!+---+..!\\
\end{aligned}
$$

- **Two cubes front/back (depth):**
$$
\def\arraystretch{1e-10}
\begin{aligned}
\verb!....+---+!\\
\verb!.../   /|!\\
\verb!..+---+ |!\\
\verb!./   /| +!\\
\verb!+---+ |/.!\\
\verb!|   | +..!\\
\verb!|   |/...!\\
\verb!+---+....!\\
\end{aligned}
$$

The bottom-left corner of the lowest cube in cell ({M}, 1) (bottom row, first column) should align with the bottom-left of the entire drawing.

**Output Format:**
Your final output should be a string matrix of dimensions K × L (i.e., it has K lines separated by line breaks, with each line containing exactly L characters), where K is the number of rows and L is the number of columns **required to draw the 3D structure correctly** according to the rules above.

---

**Example 1**

When the rectangular grid is 1 × 2, and the number of cubes in each cell is as follows:
1 3

The output is (do **NOT** include the backticks or quotes — use the format below exactly):
```
......+---+
...../   /|
....+---+ |
....|   | +
....|   |/|
....+---+ |
..+-|   | +
./  |   |/|
+---+---+ |
|   |   | +
|   |   |/.
+---+---+..
```

---

**Example 2**

When the rectangular grid is 3 × 4, and the number of cubes in each cell is as follows:
2 2 1 2
2 2 1 1
3 2 1 2

The output is (do **NOT** include the backticks or quotes — use the format below exactly):
```
......+---+---+...+---+
..+---+  /   /|../   /|
./   /|-+---+ |.+---+ |
+---+ |/   /| +-|   | +
|   | +---+ |/+---+ |/|
|   |/   /| +/   /|-+ |
+---+---+ |/+---+ |/| +
|   |   | +-|   | + |/.
|   |   |/  |   |/| +..
+---+---+---+---+ |/...
|   |   |   |   | +....
|   |   |   |   |/.....
+---+---+---+---+......
```
"""

    def __init__(self,
                 max_height : int = 5,
                 wrong_format : float = -1.0, invalid_answer : int = -0.5, wrong_size : int = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 2.0,
                 **kwargs) :
        """
        Initialize the BlockImage_Environment instance.
        """
        super().__init__(**kwargs)

        self.max_height = max_height

        self.rewards = {
            "wrong_format" : wrong_format,
            "invalid_answer" : invalid_answer,
            "wrong_size" : wrong_size,
            "rewarding_strategy" : rewarding_strategy,
            "rewarding_weight" : rewarding_weight,
            "rewarding_beta" : rewarding_beta,
        }
    
    def _generate(self) -> None :
        assert "MAX_M_N" in self.parameter, "MAX_M_N is required in parameter"
        MAX_M_N = self.parameter["MAX_M_N"]
        assert MAX_M_N >= 1, "MAX_M_N should be greater than or equal to 1"

        M = self.parameter["M"] = random.randint(1, MAX_M_N)
        N = self.parameter["N"] = random.randint(1, MAX_M_N)
        grid = self.parameter["grid"] = [[random.randint(1, self.max_height) for j in range(N)] for i in range(M)]


        max_row = 0
        max_col = 0
        for i in range(M) :
            for j in range(N) :
                a = grid[i][j]
                t = M - i - 1
                cand_col = 2 * t + 4 * j + 6
                if cand_col > max_col :
                    max_col = cand_col
                cand_row = 2 * t + 3 * (a - 1) + 5
                if cand_row > max_row :
                    max_row = cand_row


        height = max_row + 1
        width = max_col + 1
        canvas = [['.' for _ in range(width)] for _ in range(height)]
        template = [
            "..+---+",
            "./   /|",
            "+---+ |",
            "|   | +",
            "|   |/.",
            "+---+.."
        ]


        for i in range(M) :
            for j in range(N) :
                a = grid[i][j]
                t = M - i - 1
                for k in range(a) :
                    x_offset = 2 * t + 4 * j
                    y_offset = 2 * t + 3 * k
                    for r in range(6) :
                        for c in range(7) :
                            ch = template[r][c]
                            if ch != '.' :
                                row_index = y_offset + (5 - r)
                                col_index = x_offset + c
                                canvas[row_index][col_index] = ch

        output_lines = []
        for row in range(height - 1, -1, -1) :
            output_lines.append("".join(canvas[row]))
        self.parameter["reference_answer"] = "\n".join(output_lines)
    
    def _prompt_generate(self) -> str :
        prompt = self.prompt_template
        prompt = prompt.replace("{M}", str(self.parameter["M"]))
        prompt = prompt.replace("{N}", str(self.parameter["N"]))
        prompt = prompt.replace("{matrix}", "\n".join(" ".join(map(str, row)) for row in self.parameter["grid"]))
        return prompt


    def _process(self, answer : Optional[str]) -> Optional[List[str]] :
        if answer is not None :
            answer = answer.strip()
            image = []
            for line in answer.splitlines() :
                line = line.strip()
                if line :
                    image.append(line)
            return image
        else :
            return None
    
    def scorer(self, output : str) -> float :
        processed_result = self.processor(output)
        if processed_result is not None :
            image = processed_result

            if not image :
                return self.rewards["wrong_format"]
            for row in image :
                if len(row) != len(image[0]) :
                    return self.rewards["wrong_format"]
                if not all(ch in ".+-/| " for ch in row) :
                    return self.rewards["invalid_answer"]
            
            gold_image = self.parameter["reference_answer"].split("\n")
            if len(image) != len(gold_image) :
                return self.rewards["wrong_size"]
            if len(image[0]) != len(gold_image[0]) :
                return self.rewards["wrong_size"]
            
            total_correct = 0
            for gold_row, row in zip(gold_image, image) :
                assert len(gold_row) == len(row)
                total_correct += sum(gold_row[i] == row[i] for i in range(len(gold_row)))
            total_cells = len(gold_image) * len(gold_image[0])
            
            if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" :
                return self.rewards["rewarding_weight"] * (((total_correct / total_cells)) ** self.rewards["rewarding_beta"])
            elif self.rewards["rewarding_strategy"] == "gold=answer" :
                return self.rewards["rewarding_weight"] * (total_correct == total_cells)
            else :
                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
        else :
            return self.rewards["wrong_format"]