KOTH - Loaded RPS
Statistician (no longer playing)
import random
import collections
R, P, S = moves = range(3)
move_idx = {"R": R, "P": P, "S": S}
name = "RPS"
beat = (P, S, R)
beaten = (S, R, P)
def react(_0, _1, _2, _3, _4, opp_history):
if not opp_history:
return random.randrange(0, 3)
return beat[opp_history[-1]]
def anti_react(_0, _1, _2, _3, _4, opp_history):
if not opp_history:
return random.randrange(0, 3)
return beaten[opp_history[-1]]
def random_max(scores):
scores = [s + random.normalvariate(0, 1) for s in scores]
return scores.index(max(scores))
def greedy_margin(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
scores = [my_loaded[move] - opp_loaded[beat[move]] for move in moves]
return random_max(scores)
def anti_greedy(my_points, opp_pints, my_loaded, opp_loaded, my_history, opp_history):
scores = [-my_loaded[move] for move in moves]
return random_max(scores)
def recent_stats(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
opp_history = opp_history[-10:-1]
counts = collections.Counter(opp_history)
scores = [(counts[beaten[move]] + 1) * my_loaded[move] -
(counts[beat[move]] + 1) * opp_loaded[move] for move in moves]
return random_max(scores)
def statistician(_0, _1, _2, _3, my_history, opp_history):
m1 = []
o1 = []
my_loaded = [0] * 3
opp_loaded = [0] * 3
my_points = 0
opp_points = 0
strategies = [react, anti_react, greedy_margin, anti_greedy, recent_stats]
strategy_scores = [0 for _ in strategies]
for i, (mx, ox) in enumerate(zip(my_history, opp_history)):
mx = move_idx[mx]
ox = move_idx[ox]
for j, strategy in enumerate(strategies):
strategy_scores[j] *= 0.98
move = strategy(my_points, opp_points, my_loaded, opp_loaded, m1, o1)
if move == beat[ox]:
strategy_scores[j] += my_loaded[move]
elif move == beaten[ox]:
strategy_scores[j] -= opp_loaded[ox]
m1.append(mx)
o1.append(ox)
if mx == beat[ox]:
opp_loaded[ox] += 1
my_points += my_loaded[mx]
elif mx == beaten[ox]:
my_loaded[mx] += 1
opp_points += opp_loaded[ox]
else:
my_loaded[mx] += 0.5
opp_loaded[ox] += 0.5
strategy = strategies[random_max(strategy_scores)]
return name[strategy(my_points, opp_points, my_loaded, opp_loaded, m1, o1)]
Switches between a few simple strategies based on expected past performance
Statistician 2
import random
import collections
import numpy as np
R, P, S = moves = range(3)
move_idx = {"R": R, "P": P, "S": S}
names = "RPS"
beat = (P, S, R)
beaten = (S, R, P)
def react(my_loaded, opp_loaded, my_history, opp_history):
if not opp_history:
return random.randrange(0, 3)
counts = [0, 0, 0]
counts[beat[opp_history[-1]]] += 1
return counts
def random_max(scores):
scores = [s + random.normalvariate(0, 1) for s in scores]
return scores.index(max(scores))
def argmax(scores):
m = max(scores)
return [s == m for s in scores]
def greedy_margin(my_loaded, opp_loaded, my_history, opp_history):
scores = [my_loaded[move] - opp_loaded[beat[move]] for move in moves]
return argmax(scores)
recent_counts = None
def best_move(counts, my_loaded, opp_loaded):
scores = [(counts[beaten[move]] + 0.5) * my_loaded[move] -
(counts[beat[move]] + 0.5) * opp_loaded[move] for move in moves]
return argmax(scores)
def recent_stats(my_loaded, opp_loaded, my_history, opp_history):
if len(opp_history) >= 10:
recent_counts[opp_history[-10]] -= 1
recent_counts[opp_history[-1]] += 1
return best_move(recent_counts, my_loaded, opp_loaded)
order2_counts = None
def order2(my_loaded, opp_loaded, my_history, opp_history):
if len(my_history) >= 2:
base0 = 9 * my_history[-2] + 3 * opp_history[-2]
order2_counts[base0 + opp_history[-1]] += 1
base1 = 9 * my_history[-1] + 3 * opp_history[-1]
counts = [order2_counts[base1 + move] for move in moves]
return best_move(counts, my_loaded, opp_loaded)
def nash(my_loaded, opp_loaded, my_history, opp_history):
third = 1.0 / 3
p = np.full(3, third)
q = np.full(3, third)
u = np.array(my_loaded)
v = np.array(opp_loaded)
m0 = np.zeros(3)
m1 = np.zeros(3)
lr = 0.2
for _ in range(10):
de0 = u * np.roll(q, 1) - np.roll(v * q, 2)
de1 = v * np.roll(p, 1) - np.roll(u * p, 2)
m0 = 0.9 * m0 + 0.1 * de0
m1 = 0.9 * m1 + 0.1 * de1
p += lr * m0
q += lr * m1
p[p < 0] = 0
q[q < 0] = 0
tp, tq = np.sum(p), np.sum(q)
if tp == 0 or tq == 0:
return np.full(3, third)
p /= tp
q /= tq
lr *= 0.9
return p
strategies = [react, greedy_margin, recent_stats, order2, nash]
predictions = strategy_scores = mh = oh = None
def statistician2func(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
global strategy_scores, history, recent_counts, mh, oh, predictions, order2_counts
if not opp_history:
strategy_scores = [0 for _ in strategies]
recent_counts = collections.Counter()
order2_counts = collections.Counter()
mh, oh = [], []
predictions = None
return random.choice(names)
my_move = move_idx[my_history[-1]]
opp_move = move_idx[opp_history[-1]]
if predictions is not None:
for j, p in enumerate(predictions):
good = beat[opp_move]
bad = beaten[opp_move]
strategy_scores[j] += (my_loaded[good] * p[good] - opp_loaded[opp_move] * p[bad]) / sum(p)
mh.append(my_move)
oh.append(opp_move)
predictions = [strategy(my_loaded, opp_loaded, mh, oh) for strategy in strategies]
strategy = random_max(strategy_scores)
p = predictions[strategy]
r = random.random()
for i, pi in enumerate(p):
r -= pi
if r <= 0:
break
return names[i]
Nash
import numpy as np
import random
def nashfunc(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
third = 1.0 / 3
p = np.full(3, third)
q = np.full(3, third)
u = np.array(my_loaded)
v = np.array(opp_loaded)
m0 = np.zeros(3)
m1 = np.zeros(3)
lr = 0.2
for _ in range(10):
de0 = u * np.roll(q, 1) - np.roll(v * q, 2)
de1 = v * np.roll(p, 1) - np.roll(u * p, 2)
m0 = 0.9 * m0 + 0.1 * de0
m1 = 0.9 * m1 + 0.1 * de1
p += lr * m0
q += lr * m1
p[p < 0] = 0
q[q < 0] = 0
tp, tq = np.sum(p), np.sum(q)
if tp == 0 or tq == 0:
return random.choice("RPS")
p /= tp
q /= tq
lr *= 0.9
r = random.random()
for i, pi in enumerate(p):
r -= pi
if r <= 0:
break
return "RPS"[i]
Computes an approximate Nash equilibrium by gradient descent.
Weigher
I lost track of reasoning while experimenting with the code, but the basic idea is to estimate opponent's move probability by last 3 moves using some weights and multiply them by another weight which depends on loads. I thought that I can somehow use my_loaded
too, but I couldn't decide how, so left it out.
def weigher(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
idx = {"R": 0, "P": 1, "S": 2}
sc = [0, 0, 0]
for i, m in enumerate(reversed(opp_history[-3:])):
sc[idx[m]] += (1 / (1 + i))
for i in range(3):
sc[i] *= (opp_loaded[i] ** 2)
return "PSR"[sc.index(max(sc))]
Satan
Probably will be disqualified, because it's kind of cheating and it makes some assumptions about the testing function (it has to have opponent's function in a variable on its stack frame), but it doesn't technically break any current rules — it doesn't redefine or rewrite anything. It simply uses black magic to execute the opponent function to see what turn did/will they do. It cannot deal with randomness, but deterministic bots have no chance to defeat Satan.
def satan(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
import inspect, types
f = inspect.currentframe()
s = f.f_code.co_name
try:
for v in f.f_back.f_locals.values():
if isinstance(v, types.FunctionType) and v.__name__ != s:
try:
return "PSR"[{"R": 0, "P": 1, "S": 2}[
v(opp_points, my_points, opp_loaded, my_loaded, opp_history, my_history)]]
except:
continue
finally:
del f
Fitter
This Bot improves Pattern and fuses it with Economist (Pattern and Economist will no longer participate)
The improvement of Pattern is that the Bot now looks for two two kinds of patterns: Opponent reacting to his last play and opponent reacting to my last play. Then evaluates both predictions to use the one that fits the best.
From that pattern the Bot has now the probability for R, P and S. Taking that into account and the expected value of each play (as Economist did), the Bot plays the one that gives the most value.
import random
import numpy as np
def fitterfunc(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
t = len(opp_history)
RPS = ["R","P","S"]
if t <= 2:
return RPS[t]
elif t == 3:
return random.choice(RPS)
def n(c): return RPS.index(c)
total_me = np.zeros(shape=(3,3))
total_opp= np.zeros(shape=(3,3))
p_me = np.array([[1/3]*3]*3)
p_opp = np.array([[1/3]*3]*3)
for i in range(1, t):
total_me[n(my_history[i-1]), n(opp_history[i])] += 1
total_opp[n(opp_history[i-1]), n(opp_history[i])] += 1
for i in range(3):
if np.sum(total_me[i,:]) != 0:
p_me[i,:] = total_me[i,:] / np.sum(total_me[i,:])
if np.sum(total_opp[i,:]) != 0:
p_opp[i,:] = total_opp[i,:] / np.sum(total_opp[i,:])
error_me = 0
error_opp = 0
for i in range(1, t):
diff = 1 - p_me[n(my_history[i-1]), n(opp_history[i])]
error_me += diff * diff
diff = 1 - p_opp[n(opp_history[i-1]), n(opp_history[i])]
error_opp += diff * diff
if error_me < error_opp:
p = p_me[n(my_history[-1]),:]
else:
p = p_opp[n(opp_history[-1]),:]
# From here, right now I weight values, though not 100% is the best idea, I leave the alternative in case I'd feel like changing it
value = [(p[2]*my_loaded[0] - p[1]*opp_loaded[1], "R"), (p[0]*my_loaded[1] - p[2]*opp_loaded[2], "P"), (p[1]*my_loaded[2] - p[0]*opp_loaded[0], "S")]
value.sort()
if value[-1][0] > value[-2][0]:
return value[-1][1]
elif value[-1][0] > value[-3][0]:
return random.choice([value[-1][1], value[-2][1]])
else:
return random.choice(RPS)
# idx = p.tolist().index(max(p))
# return ["P", "S", "R"][idx]
Here are the two old codes
Pattern (no longer playing)
The Pattern tries to find patterns on his opponent. It looks what the opponent had played after the last play he did (giving more weight to the latter plays). Through that, it guesses what the opponent will play, and plays the countermatch to that.
import random
import numpy as np
def patternfunc(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
if len(opp_history) == 0:
return random.choice(["R","P","S"])
elif len(opp_history) == 1:
if opp_history == "R":
return "P"
elif opp_history == "P":
return "S"
elif opp_history == "S":
return "R"
p = np.array([1/3]*3)
c = opp_history[-1]
for i in range(1, len(opp_history)):
c0 = opp_history[i-1]
c1 = opp_history[i]
if c0 == c:
p *= .9
if c1 == "R":
p[0] += .1
elif c1 == "P":
p[1] += .1
elif c1 == "S":
p[2] += .1
idx = p.tolist().index(max(p))
return ["P", "S", "R"][idx]
Economist (no longer playing)
The Economist does the following: Guesses the probability of each play by the opponent by watching what he had played the last 9 turns. From that, computes the expected benefit of each play and goes with the one that has the best expected value.
import random
def economistfunc(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
if len(opp_history) == 0:
return random.choice(["R","P","S"])
if len(opp_history) > 9:
opp_history = opp_history[-10:-1]
p = [opp_history.count("R"), opp_history.count("P"), opp_history.count("S")]
value = [(p[2]*my_loaded[0] - p[1]*opp_loaded[1], "R"), (p[0]*my_loaded[1] - p[2]*opp_loaded[2], "P"), (p[1]*my_loaded[2] - p[0]*opp_loaded[0], "S")]
value.sort()
if value[-1][0] > value[-2][0]:
return value[-1][1]
elif value[-1][0] > value[-3][0]:
return random.choice([value[-1][1], value[-2][1]])
else:
return random.choice(["R","P","S"])