#!/usr/bin/env python3 # make_binpack_halfka_fixed.py import os, sys, struct import numpy as np import chess # ----- HalfKA constants (matches NNUE HalfKA: 49,216 features) ----- NUM_SQ = 64 NUM_PT = 12 # 6 piece types × 2 colors, excluding kings in activation map below # Per-king bucket: 768 piece-square activations + 1 "activity" feature PER_K_BUCKET = NUM_PT * NUM_SQ - 2 * NUM_SQ + 1 # 12*64 - 2*64 + 1 = 768+1 FEAT_N = NUM_SQ * PER_K_BUCKET # 64 * (768 + 1) = 49,216 MAGIC = b'BINPACK\x00' def orient(is_white_pov: bool, sq: int) -> int: # same orientation trick used in HalfKP: flip ranks for black POV return (63 * (not is_white_pov)) ^ sq def piece_to_idx(p: chess.Piece, pov_white: bool) -> int: # 0..11 = (type-1)*2 + (color != pov) return (p.piece_type - 1) * 2 + (p.color != pov_white) def encode_halfka_dense(board: chess.Board) -> tuple[np.ndarray, int, int]: """ Return: dense feature vector (uint8) length 49,216, psqt_index (int32), layer_stack_index (int32) Layout: for king square K (POV side-to-move), a bucket of 769 floats: [0] = "activity" scalar (piece_count, excluding kings) [1..768] = piece-square one-hots for 12 piece kinds × 64 squares, BUT we skip king kinds (so effectively 10 kinds × 64 = 640), keeping the overall size at 768 by leaving the king kinds never set. """ pov_white = board.turn ksq = board.king(pov_white) if ksq is None: # skip illegal positions return None, None, None base = ksq * PER_K_BUCKET x = np.zeros(FEAT_N, dtype=np.uint8) piece_count = 0 for sq, p in board.piece_map().items(): if p.piece_type == chess.KING: continue piece_count += 1 idx = piece_to_idx(p, pov_white) # 0..11 if p.piece_type == chess.KING: continue # safety; we already skipped kings # map all 12 kinds but kings will never be set, preserving 768 slots rel_sq = orient(pov_white, sq) slot = 1 + idx * NUM_SQ + rel_sq # [1..768] # guard: stay in this king's bucket if 1 <= slot < PER_K_BUCKET: x[base + slot] = 1 # activity feature = piece count (int) x[base + 0] = piece_count if piece_count < 255 else 254 # bucket index by material to match training_data_loader.cpp idea: # psqt_index = (pieces - 1) // 4 (clamped 0..7), use same for layer_stack_index psqt_idx = max(0, min(7, (piece_count - 1) // 4)) ls_idx = psqt_idx return x, psqt_idx, ls_idx def parse_line(line: str): # Expect: " " parts = line.strip().split() if len(parts) < 7: return None fen = " ".join(parts[:6]) try: cp = float(parts[-1]) except: return None try: b = chess.Board(fen) except Exception: return None # Convert centipawns (side-to-move POV) -> pawns float32 return b, np.float32(cp / 100.0) def convert(in_path: str, out_path: str): # first pass: count valid cnt = 0 with open(in_path, "r") as f: for ln in f: parsed = parse_line(ln) if not parsed: continue b, _ = parsed feats, psqt_idx, ls_idx = encode_halfka_dense(b) if feats is None: continue cnt += 1 if cnt == 0: print("No valid positions.") return with open(out_path, "wb") as out, open(in_path, "r") as f: # Header: magic, feats(int32), count(int64) out.write(MAGIC) out.write(struct.pack(" ") sys.exit(1) inp, outp = sys.argv[1], sys.argv[2] if not os.path.exists(inp): print(f"Input missing: {inp}") sys.exit(1) convert(inp, outp)