Spaces:
Running
Running
File size: 5,748 Bytes
3bf8430 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
from math import gcd
from functools import lru_cache
import random
from typing import Optional
from ...environment import VerifiableEnvironment
class CirculatingDecimalCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1587
prompt_template = \
r"""Please count how many **distinct pure repeating decimals** (in terms of numeric value) exist in base ${K}$, that can be written as a reduced fraction $\frac{x}{y}$ where $1 \le x \le {N}$ and $1 \le y \le {M}$, with $x$ and $y$ being integers.
A number is called a **pure repeating decimal** if and only if it can be written in the form of $$a.\dot{c_1} c_2 c_3 \dots c_{p - 1} \dot{c_p}$$, where $a$ is an integer, $p \ge 1$, and each $c_i$ ($1 \le i \le p$) is a digit in base ${K}$.
Examples:
- In base 10, $0.454545\ldots = 0.\dot{4}\dot{5}$ is a pure repeating decimal; it can be written as $\frac{5}{11}$ or $\frac{10}{22}$.
- In contrast, $0.166666\ldots = 0.1\dot{6}$ is **not** pure repeating in base 10; it can be written as $\frac{1}{6}$.
Note:
- **Integers are considered pure repeating**, because their decimal part can be represented as a repeating sequence of 0s.
- **Finite decimals with non-zero fractional parts** are **not** considered pure repeating.
**Output Format:** Your final answer should be a single integer — the total number of such distinct pure repeating decimals."""
def __init__(self,
wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
**kwargs) :
"""
Initialize the CirculatingDecimalCounting_Environment instance.
"""
super().__init__(**kwargs)
self.rewards = {
"wrong_format" : wrong_format,
"rewarding_strategy" : rewarding_strategy,
"rewarding_weight" : rewarding_weight,
"rewarding_beta" : rewarding_beta,
}
def _generate(self) -> None :
assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
MAX_N = self.parameter["MAX_N"]
assert MAX_N >= 1, "MAX_N should be greater than or equal to 1"
N = self.parameter["N"] = random.randint(1, MAX_N)
assert "MAX_M" in self.parameter, "MAX_M is required in parameter"
MAX_M = self.parameter["MAX_M"]
assert MAX_M >= 1, "MAX_M should be greater than or equal to 1"
M = self.parameter["M"] = random.randint(1, MAX_M)
assert "MAX_K" in self.parameter, "MAX_K is required in parameter"
MAX_K = self.parameter["MAX_K"]
assert MAX_K >= 2, "MAX_K should be greater than or equal to 2"
K = self.parameter["K"] = random.randint(2, MAX_K)
LIM = min(M, max(K, int(M ** 0.5) + 1))
g = [0] * (K + 1)
for i in range(1, K + 1):
g[i] = g[i - 1] + (1 if gcd(i, K) == 1 else 0)
mu = [0] * (LIM + 1)
is_comp = [False] * (LIM + 1)
f = [0] * (LIM + 1)
primes = []
mu[1] = 1
f[1] = 1
def G(x):
return (x // K) * g[K] + g[x % K]
for i in range(2, LIM + 1):
if not is_comp[i]:
primes.append(i)
mu[i] = -1
for p in primes:
ip = i * p
if ip > LIM:
break
is_comp[ip] = True
if i % p == 0:
mu[ip] = 0
break
else:
mu[ip] = -mu[i]
f[i] = f[i - 1] + mu[i] * (G(i) - G(i - 1))
@lru_cache(None)
def F(x):
if x <= LIM:
return f[x]
res = 1
l = 2
while l <= x:
t = x // l
r = x // t
res -= F(t) * (G(r) - G(l - 1))
l = r + 1
return res
ans = 0
l = 1
up = min(N, M)
while l <= up:
n_div = N // l
m_div = M // l
r = min(N // n_div, M // m_div)
ans += n_div * G(m_div) * (F(r) - F(l - 1))
l = r + 1
assert ans > 0
self.parameter["reference_answer"] = ans
def _prompt_generate(self) -> str :
return self.prompt_template.replace(r"{K}", str(self.parameter["K"])) \
.replace(r"{N}", str(self.parameter["N"])) \
.replace(r"{M}", str(self.parameter["M"]))
def _process(self, answer : Optional[str]) -> Optional[int] :
if answer is not None :
answer = answer.strip()
try :
int_answer = int(answer)
return int_answer
except ValueError :
return None
else :
return None
def scorer(self, output : str) -> float :
processed_result = self.processor(output)
if processed_result is not None :
if processed_result <= 0 :
return self.rewards["wrong_format"]
if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
a, b = self.parameter["reference_answer"], processed_result
return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
elif self.rewards["rewarding_strategy"] == "gold=answer" :
return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
else :
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
else :
return self.rewards["wrong_format"] |