commit f5696d41c11f1abc4d4c13cbefef5614e338afb9 Author: alexiondev <1363939+alexiondev@users.noreply.github.com> Date: Fri May 8 09:22:50 2026 -0400 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f65519e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/** diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..46a5d9a --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,119 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +All commands must be run from the project root using the local venv: + +```bash +# End-to-end identification (the main entry point) +.venv/bin/python identify.py + +# Train the model +.venv/bin/python -m src.models.train --epochs 50 --batch_size 64 --lr 1e-4 + +# Run inference only (no registry lookup) +.venv/bin/python src/models/inference.py + +# Generate/regenerate the fixed validation set (seed=42, 1000 samples, white bg) +.venv/bin/python -m src.data.generate_val_set + +# Generate/regenerate the fixed augmented test set (seed=99, 500 samples) +.venv/bin/python -m src.data.generate_aug_test_set + +# Generate a single sample image for visual inspection +.venv/bin/python src/data/high_fidelity_generator.py + +# Lint +.venv/bin/ruff check src/ + +# Type check +.venv/bin/mypy src/ +``` + +The package is installed in editable mode (`pip install -e .`); imports use `src.*` paths. + +**Important:** DataLoader workers use `multiprocessing`, so training must be invoked as a module (`python -m src.models.train`), not as a script piped via stdin — Python cannot resolve the worker `main_path` in that case. + +## Architecture + +The pipeline has five stages: + +``` +Image → Detector → Inference → Resolver → Registry + (cropped) (logits) (PIDs) (SQLite) +``` + +### 1. Detection (`src/utils/detector.py`) + +`SpindaDetector.detect_and_crop()` returns a **128×128 BGR** image, or `None`. + +Two-tier strategy, tried in order: +- **Tier 1 (screenshots/sprites):** HSV-filter red pixels → find individual spot blobs → cluster to 4 spots → derive crop from cluster centroid + `_SPOT_CROP_RATIO=5.5` (= 128 / 24.5 px span) with a `_SPOT_CENTER_OFFSET=0.056` downward shift so the spot centroid lands at 44.4 % from the top of the crop (matching the training canvas). +- **Tier 2 (real photos, spots merged):** Find the full Spinda body blob; score = `circularity + 0.2·log(area/min_area) − 0.1·|aspect − 1.12|`; crop the face (top 43/58 of body height) using blob width as scale reference. + +### 2. Model (`src/models/regression_model.py`) + +ResNet-18 backbone with the final FC replaced by `Linear(512, 8·16)`. Forward pass returns **(B, 8, 16)** — treating each of the 8 coordinates as a 16-class classification problem. Trained with `CrossEntropyLoss` on `view(-1, 16)` vs `view(-1)` targets; predictions use `argmax(dim=2)`. + +### 3. Training (`src/models/train.py`) + +- `SpindaDataset` (200 k virtual samples/epoch): generates a fresh random 32-bit PID per `__getitem__`, renders the sprite with a random background colour, then applies the full augmentation pipeline. +- `SpindaEvalDataset`: loads pre-generated images from disk (post-augmentation, pre-normalisation) and applies only the normalise step. Used for both `data/val/` (clean, seed=42) and `data/aug_test/` (augmented, seed=99). +- `_worker_init_fn` re-seeds Python `random` and NumPy per worker so forked workers generate distinct PIDs. +- Early stopping: patience = 10 epochs on clean-val exact-match rate. +- Best model checkpoint: `models/best_spinda_model.pth`. + +### 4. PID Encoding (domain invariant — must not be changed) + +The 8 model outputs map directly to hex nibbles of the 32-bit PID via the **ProfessorRex** convention: + +| Coord index | Nibble | Spot | Notes | +|-------------|--------|------|-------| +| 0 (TL_x) | `pid[-1]` | TL | no pixel offset | +| 1 (TL_y) | `pid[-2]` | TL | | +| 2 (TR_x) | `pid[-3]` | TR | +24 px | +| 3 (TR_y) | `pid[-4]` | TR | +1 px | +| 4 (BL_x) | `pid[3]` | BL | +6 px | +| 5 (BL_y) | `pid[2]` | BL | +18 px | +| 6 (BR_x) | `pid[1]` | BR | +18 px | +| 7 (BR_y) | `pid[0]` | BR | +19 px | + +`SpindaResolver.coordinates_to_pid()` reconstructs each byte as `(Y << 4) | X`; BDSP reverses the byte order. + +### 5. Registry (`src/registry/database.py`) + +SQLite at `data/spinda_registry.db`. Schema: `(fingerprint TEXT, pid_hex TEXT, UNIQUE)` with an index on `fingerprint`. `SpindaRegistry.add_entry()` is idempotent (ignores `IntegrityError`). + +## Data layout + +``` +data/ + val/ # 1000 fixed clean sprites, white bg (seed=42) — stable benchmark + metadata.json + sample_NNNN.png + aug_test/ # 500 fixed augmented images (seed=99) — domain-adaptation tracker + metadata.json + sample_NNNN.png + spinda_registry.db + +assets/ # Sprite assets used by the renderer + Spinda_Base_Top.png # 52×43 face layer + Spinda_Head.png # colourisation source for spots + Spot_{TL,TR,BL,BR}.png + +models/ + best_spinda_model.pth +``` + +`metadata.json` format: `[{"img_path": "...", "pid_hex": "...", "target": [int×8]}, ...]` + +## Key invariants + +- **Visual collisions:** ~1.3 % of fingerprints are shared by multiple PIDs (many-to-one mapping). `SpindaRegistry` stores `(fingerprint, pid_hex)` pairs with a unique constraint so `lookup_by_fingerprint` can return *all* matching PIDs — this is intentional, not a bug. + +- The **validation set** uses white backgrounds (no augmentation baked in) to give a stable epoch-comparable baseline. Do not add augmentation to `generate_val_set.py`. +- The **augmented test set** is pre-generated and fixed. Regenerating it changes the baseline; do so intentionally. +- The crop output size is always **128×128** regardless of tier. The model transform chain also resizes to 128×128, so the inference path is robust to re-size. +- `generate_high_fidelity_spinda()` always takes `bg_color` as a `(R, G, B)` tuple in PIL order (not BGR). diff --git a/correct_spinda.png b/correct_spinda.png new file mode 100644 index 0000000..912deef Binary files /dev/null and b/correct_spinda.png differ diff --git a/design doc.md b/design doc.md new file mode 100644 index 0000000..ff9ed8f --- /dev/null +++ b/design doc.md @@ -0,0 +1,69 @@ +# Spinda Coordinate Regression & Global Registry (SCRGR) + +**Date Created:** 2026-05-07 10:49:15 + +**Tags:** #MachineLearning #ComputerVision #Python #Pokemon #Spinda #Regression + +## The Problem + +There are $2^{32}$ (over 4.2 billion) Spinda variations, but identifying a specific pattern from a user-submitted photo or screenshot is currently a manual, error-prone process. Because a 32-bit PID determines the exact coordinates of four facial spots on a discrete $16 \times 16$ grid, a system is needed to automatically extract these coordinates and map them to their corresponding game data without requiring a massive, unsearchable database of raw images. + +## Context + +Spinda's visual appearance is deterministic. The PID is split into four bytes, each providing the $(x, y)$ coordinates for one of the four spots. + +- **Current State:** Existing tools can generate a pattern from a PID, but the inverse (Pattern → PID) is difficult due to "visual collisions" (multiple PIDs resulting in identical spot placements) and the noise inherent in real-world photography (glare, blur, and distortion). + +- **Technical Shift:** While initial discussions considered abstract image fingerprinting, the realization that the "identity" of a Spinda is mathematically defined by 8 discrete integers ($4 \text{ spots} \times 2 \text{ coordinates}$) allows for a more precise Regression-based approach. + + +## Design + +### Summary + +The proposed solution uses a **Coordinate Regression Model** to translate pixels into a 8-dimension vector of spatial coordinates. This vector is rounded to the nearest integers to match the game's internal $16 \times 16$ grid, providing a "Visual Fingerprint" that can be instantly looked up in a $O(1)$ hash map to identify associated PIDs. + +### Detailed Design + +#### 1. Synthetic Data Generation & Augmentation + +To facilitate a "smooth" training experience in Python, we will build a generator using libraries like `OpenCV` or `PIL`: + +- **Perfect Sprites:** Generate 2D Spinda faces with known ground-truth coordinates. + +- **Augmentation Pipeline:** Apply "Domain Randomization" to simulate real-world conditions: + + - **Spatial Transforms:** Slight rotations and tilts to mimic handheld photography. + + - **Sensor Noise:** Add Gaussian noise and Moiré patterns to simulate digital camera sensors. + + - **Grid Jitter:** Ensure the model learns the center-of-mass for a spot even if it is partially obscured. + + +#### 2. ML Architecture: Coordinate Regression + +Instead of a classification model, we will implement a **Regression CNN** (e.g., a modified ResNet or MobileNet backbone): + +- **Input:** A standardized $128 \times 128$ crop of the Spinda face. + +- **Output Layer:** A dense layer with 8 neurons using a linear activation function, representing $[\hat{x}_1, \hat{y}_1, \hat{x}_2, \hat{y}_2, \hat{x}_3, \hat{y}_3, \hat{x}_4, \hat{y}_4]$. + +- **Loss Function:** Mean Squared Error (MSE) to minimize the distance between predicted and actual grid coordinates. + + +#### 3. Deterministic "Snap-to-Grid" Matching + +Post-inference, the model's float outputs are processed to ensure mathematical accuracy: + +- **Rounding:** Outputs are rounded to the nearest integer within the $[0, 15]$ range. + +- **Hashing:** The 8 integers are concatenated into a unique string key (e.g., `"12-04-08-09-02-01-15-14"`). + +- **Collision Handling:** The database maps this key to a list of all PIDs that produce that visual output, accounting for the BDSP "Endian flip" and other internal overlaps. + + +#### 4. The Global Registry & Audit Trail + +- **Automated Documentation:** Successfully matched Spindas are added to a community database. + +- **Manual Review System:** For entries with low model confidence (e.g., if the floats were far from an integer before rounding), the system logs the original image for administrator "Approve/Reject" review to maintain data integrity. diff --git a/detected_spinda_crop.png b/detected_spinda_crop.png new file mode 100644 index 0000000..9d62d50 Binary files /dev/null and b/detected_spinda_crop.png differ diff --git a/identify.py b/identify.py new file mode 100644 index 0000000..8a64cee --- /dev/null +++ b/identify.py @@ -0,0 +1,77 @@ +import os +import sys +import cv2 +import torch +from src.models.inference import SpindaInference +from src.utils.resolver import SpindaResolver +from src.registry.database import SpindaRegistry +from src.data.high_fidelity_generator import generate_high_fidelity_spinda +from src.utils.detector import SpindaDetector # Import the detector + +def identify_spinda(image_path: str): + if not os.path.exists(image_path): + print(f"Error: File {image_path} not found.") + return + + print(f"--- Identifying Spinda in {image_path} ---") + + # 1. Detect and Crop Spinda + detector = SpindaDetector() + cropped_img = detector.detect_and_crop(image_path) + + if cropped_img is None: + print("Error: Could not detect Spinda in the image.") + return + + # Save cropped image for debug/visual check + cv2.imwrite("detected_spinda_crop.png", cropped_img) + print("Detected Spinda saved to detected_spinda_crop.png") + + # We need to save the cropped image to a temporary file for the inference model to read + temp_cropped_path = "temp_cropped_spinda.png" + cv2.imwrite(temp_cropped_path, cropped_img) + + # 2. Inference (Model Prediction) using the cropped image + try: + inf = SpindaInference(model_path="models/best_spinda_model.pth") + coords, fingerprint = inf.predict(temp_cropped_path) + except Exception as e: + print(f"Error during inference: {e}") + os.remove(temp_cropped_path) # Clean up temp file + return + finally: + os.remove(temp_cropped_path) # Clean up temp file + + print(f"Visual Fingerprint: {fingerprint}") + print(f"Predicted Grid Coordinates: {coords}") + + # 3. Resolution (Mathematical PIDs) + resolved = SpindaResolver.resolve_fingerprint(fingerprint) + print("\nPossible PIDs:") + print(f" Standard (Gen 3-8, HOME): 0x{resolved['standard']}") + print(f" BDSP (Big-Endian Flip): 0x{resolved['bdsp']}") + + # 4. Visual Verification + print("\nGenerating visual verification image...") + verify_img = generate_high_fidelity_spinda(int(resolved['standard'], 16)) + cv2.imwrite("prediction_verify.png", verify_img) + print("Verification image saved to: prediction_verify.png") + + # 5. Registry Lookup + reg = SpindaRegistry() + matches = reg.lookup_by_fingerprint(fingerprint) + + if matches: + print("\nMatches found in Global Registry:") + for pid in matches: + print(f" - Registered PID: 0x{pid}") + else: + print("\nNo matching entries in Global Registry.") + + print("\nNote: Accuracy depends on model training progress.") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python identify.py ") + else: + identify_spinda(sys.argv[1]) diff --git a/prediction_verify.png b/prediction_verify.png new file mode 100644 index 0000000..9d72984 Binary files /dev/null and b/prediction_verify.png differ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..57a13a7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +[project] +name = "scrgr" +version = "0.1.0" +description = "Spinda Coordinate Regression & Global Registry" +requires-python = ">=3.10" +dependencies = [ + "torch>=2.0.0", + "torchvision>=0.15.0", + "opencv-python>=4.7.0", + "Pillow>=9.5.0", + "numpy>=1.24.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.3.0", + "ruff>=0.0.270", + "mypy>=1.3.0", + "tqdm>=4.65.0", +] + +[tool.ruff] +line-length = 100 +select = ["E", "F", "I", "N", "UP", "B", "A", "C4"] + +[tool.mypy] +python_version = "3.10" +strict = true +ignore_missing_imports = true diff --git a/test_image.jpg b/test_image.jpg new file mode 100644 index 0000000..c906fe6 Binary files /dev/null and b/test_image.jpg differ diff --git a/test_image2.jpg b/test_image2.jpg new file mode 100644 index 0000000..583971e Binary files /dev/null and b/test_image2.jpg differ