Cracking Python’s Random Module: Exploiting Linearity in the Mersenne Twister

6 minute read

Published: February 09, 2025

Python’s random isn’t random enough. Solving linear equations to predict Mersenne Twister outputs with partial state knowledge by exploiting the linearity in the Mersenne Twister PRNG. The challenge is taken from Cyber Jawara 2024 Finals and actually consist of two different challenge. The solution to this challenge was inspired by this writeup.

Baby MT

import random
import os
from libnum import s2n, n2s
from hashlib import sha256

n = 32
block_size = 624

FLAG = open('flag.txt').read()

def main():
    print("Is it true that MT19337 is named so because it has 19937 bits of state?")
    print("Well, let's test that out with one less shall we?")
    print()
    
    mask = input("Enter mask (624 * 32 bits): ")
    assert(len(mask) == block_size * n)
    assert(all(c in "01" for c in mask))
    assert(mask.count("1") == 19937 - 1)
    
    print()
    print("Alright, let's do 12 rounds of this")
    
    for _ in range(12):
        random.seed(os.urandom(24))
        for _ in range(block_size): random.getrandbits(n)
        arr = []
        for i in range(block_size):
            cur = random.getrandbits(n)
            cur = cur & int(mask[i*n:(i+1)*n], 2)
            arr.append(cur)
        print(f"Data dump incoming!: {arr}")
        print()
        
        for _ in range(block_size): random.getrandbits(n)
        print("Did you get that?")
        sum = 0
        for _ in range(block_size): sum += random.getrandbits(n)
        hash = sha256(n2s(sum)).hexdigest()
        
        print(f"Here's something to help you: {hash[:8]}")
        print("Now, what's the total hash?")
        s = input(">>> ").strip()
        if s != hash:
            print("Skill issue la!")
            exit(1)
        print("Wow, do it again I wasn't looking!")

    print(f"Good job! Here's your prize: {FLAG}")

if __name__ == '__main__':
    try:
        main()
    except:
        exit(1)

Key Points:

Requires 19,936-bit mask submission (19,937-1 bits set)
Provides 12 rounds challenges hash prediction of future random output

Tempered MT

import random
import os
from libnum import s2n, n2s
from hashlib import sha256

seed = s2n(os.urandom(12))
random.seed(seed)
n = 32
block_size = 624

# FLAG = open('flag.txt').read()
FLAG = "CJ2024{fake_flag}"

def main():
    print("They say the Mersenne Twister is predictable with 624 * 32 bits")
    print("Well, I'm giving you a lot more, so this should be easy right?")
    print()
    
    leak_mask = int(input("Enter leak mask: ").strip(), 16)
    if leak_mask.bit_count() > 8:
        print("Skill issue")
        exit(0)
    
    for _ in range(n):
        arr = []    
        for _ in range(block_size): arr.append(random.getrandbits(32) & leak_mask)
        print(arr)
    
    s = b""
    for _ in range(block_size): s += sha256(n2s(random.getrandbits(32))).digest()
    target = sha256(s).hexdigest()
    
    print("Okay, now let's compare notes!")
    s = input(">>> ").strip()
    if s == target:
        print("Gratz!!, here's your prize:")
        print(FLAG)
        exit(0)
    else:
        print("Meh, good luck next time!")


if __name__ == '__main__':
    try:
        main()
    except Exception as e:
        exit(1)

Key Points:

Accepts 8-bit leak mask specification
Provides 32 consecutive state leaks
Requires final SHA256 hash prediction of accumulated outputs

Solution

Both challenges exploit the fundamental linearity of MT19937’s state transitions. The Mersenne Twister’s complete linearity in GF(2) enables full state recovery through simple solving linear system. The detail of how mersenne build can be read here. You not need to actually understand the whole mersenne twister to solve this challenge. But here is the point that you need to know. First, the random output generated by mersenne twister can be decided from initial state that consist of $624 \times 32 - 31 = 19937$ bits. Second, the mersenne twister is a linear transformation given initial state as input. So, for given $k$-bit output. We get this equation

\[\mathbf{M} \cdot \mathbf{s} = \mathbf{r}\]

Where:

$\mathbf{M} \in \mathbb{F}_2^{k \times 19937}$: Known transformation matrix
$\mathbf{s} \in \mathbb{F}_2^{19937}$: Unknown initial state vector
$\mathbf{r} \in \mathbb{F}_2^k$: Observed output bits

Let standard basis of $\mathbb{F}_2^{19937}$ be $\mathbf{e}_1, \mathbf{e}_2, \ldots, \mathbf{e}_{19937}$. More explicitly

\[\mathbf{e}_i = (0, 0, \ldots, 1, \ldots, 0)\]

Where the $1$ is at the $i$-th position. Then, we can write $\mathbf{M}$ is

\[\mathbf{M} = \left[ \begin{array}{c|c|c|c} \mathbf{M}\mathbf{e}_1 & \mathbf{M}\mathbf{e}_2 & \cdots & \mathbf{M}\mathbf{e}_{19937} \end{array} \right]\]

Also, to actually solving the system you need $\mathbf{M}$ to have full rank that is $19937$. Here is the full script to solve both challenge.

Baby MT

import random
from tqdm import trange

RNG = random.Random()

def construct_a_row(RNG):
    row = []
    for i in range(624):
        RNG.getrandbits(32)

    for i in range(624):
        out = RNG.getrandbits(32)
        for _ in range(32):
            row += [out & 1]
            out >>= 1
    return row

L = []
for i in trange(19968):
    state = [0]*624
    temp = "0"*i + "1"*1 + "0"*(19968-1-i)
    for j in range(624):
        state[j] = int(temp[32*j:32*j+32],2)
    RNG.setstate((3,tuple(state+[624]),None))
    L.append(construct_a_row(RNG))

L = Matrix(GF(2),L)

M = L.T

# chosing row that make M full rank
M = M[list(range(19936)) + [19937], [0]+list(range(32,19968))]

# we must have full rank here so we can get inverse of matrix
assert M.rank() == 19937

# precompute the inverse
M_inv = M.inverse()

from pwn import process
from libnum import n2s
from hashlib import sha256

io = process(["python3", "chall.py"])

mask = "1" * (623 * 32) + "0" * 32
io.sendlineafter(b"Enter mask (624 * 32 bits): ", mask.encode())


for _ in range(12):
    print(f"Round {_}")
    io.recvuntil(b"Data dump incoming!: ")
    arr = eval(io.recvline().strip())
    io.recvuntil(b"help you: ")
    prefix_hash = io.recvline().strip().decode()

    known = []
    for num in arr[:-1]:
        for _ in range(32):
            known.append(num & 1)
            num >>= 1

    # we brute force one additional bit
    for b in range(2):
        test_known = known + [b]
        R = vector(GF(2), test_known)
        res = (M_inv * R).list()
        res = [res[0]] + [0]*31 + res[1:]
        init = "".join(list(map(str, res)))
        state = []
        for i in range(624):
            state.append(int(init[32*i:32*i+32],2))
        RNG = random.Random()
        RNG.setstate((3,tuple(state+[624]),None))

        for _ in range(3):
            [RNG.getrandbits(32) for _ in range(624)]

        hash_value = sha256(n2s(sum([RNG.getrandbits(32) for _ in range(624)]))).hexdigest()
        if hash_value.startswith(prefix_hash):
            io.sendlineafter(b">>> ", str(hash_value).encode())
            break

io.recvline()
print(io.recvline())

Tempered MT

import random
from tqdm import trange

RNG = random.Random()

def construct_a_row(RNG):
    row = []

    # only need 4 time iteration to get the full rank
    for i in range(4):
        for j in range(624):
            out = RNG.getrandbits(32)
            for _ in range(8):
                row += [out & 1]
                out >>= 4
    return row

L = []
for i in trange(19968):
    state = [0]*624
    temp = "0"*i + "1"*1 + "0"*(19968-1-i)
    for j in range(624):
        state[j] = int(temp[32*j:32*j+32],2)
    RNG.setstate((3,tuple(state+[624]),None))
    L.append(construct_a_row(RNG))

L = Matrix(GF(2),L)

M = L.T

M = M[:, [0]+list(range(32,19968))]

# we must have full rank here so we can get inverse of matrix
assert M.rank() == 19937

from pwn import process
from libnum import n2s
from hashlib import sha256

io = process(["python3", "chall.py"])

mask = "11111111"
io.sendlineafter(b"Enter leak mask: ", mask.encode())


known = []
arrs = []
for _ in range(4):
    arr = eval(io.recvline().strip())
    arrs.append(arr)
    for num in arr:
        for _ in range(8):
            known.append(num & 1)
            num >>= 4

R = vector(GF(2), known)
res = (M.solve_right(R)).list()
res = [res[0]] + [0]*31 + res[1:]
init = "".join(list(map(str, res)))
state = []
for i in range(624):
    state.append(int(init[32*i:32*i+32],2))
RNG = random.Random()
RNG.setstate((3,tuple(state+[624]),None))

for _ in range(32):
    [RNG.getrandbits(32) for _ in range(624)]

s = b""
for _ in range(624): s += sha256(n2s(RNG.getrandbits(32))).digest()
target = sha256(s).hexdigest()
io.sendlineafter(b">>> ", target.encode())

io.recvline()
print(io.recvline())

Share on

Bluesky Facebook LinkedIn X (formerly Twitter)

Mohammad Ferry Husnil Arif

Cracking Python’s Random Module: Exploiting Linearity in the Mersenne Twister

Baby MT

Tempered MT

Solution

Share on

You May Also Enjoy

CIMPA Research School on Arithmetic in Action: A Deep Dive into Lattice-Based Cryptography

TCP1P CTF 2024

Quotient Polynomial Ring and Isomorphism

My IMC 2024 Solution