Source code for raiiaf.chunks.metadata

"""Metadata chunk utilities for RAIIAF.

Provides validation against a JSON schema, compression/decompression helpers for the
metadata chunk (META), and manifest construction helpers.
"""

import jsonschema
from ..core.constants import JSON_SCHEMA
import json
from ..core.exceptions import raiiafMetadataError
import struct
import zstandard as zstd
from typing import Optional
from datetime import datetime, UTC


[docs] class raiiafMetadata: """Operations for RAIIAF metadata (META) chunks.""" def __init__(self): pass
[docs] def metadata_validator(self, manifest) -> bool: """Validate a metadata manifest using the JSON Schema. Args: manifest (dict): Metadata manifest. Returns: bool: True if it is valid. Raises: raiiafMetadataError: If the metadata is invalid. """ json_schema = JSON_SCHEMA schema = json.loads(json_schema) try: jsonschema.validate(instance=manifest, schema=schema) return True except Exception as e: raise raiiafMetadataError(f"Invalid metadata: {e}")
[docs] def metadata_compressor(self, manifest): """Compress a manifest into a META chunk. Args: manifest (dict): Manifest dictionary to serialize and compress. Returns: bytes: Compressed META chunk bytes. """ json_bytes = json.dumps(manifest, indent=2).encode("utf-8") chunk_type = b"META" chunk_flags = b"0000" chunk_size = len(json_bytes) header = struct.pack("<4s 4s I", chunk_type, chunk_flags, chunk_size) compressed = zstd.ZstdCompressor().compress(header + json_bytes) return compressed
[docs] def metadata_parser(self, compressed_chunk: bytes) -> dict: """Parse and decompress a META chunk. Args: compressed_chunk (bytes): Compressed metadata chunk. Returns: dict: Parsed metadata manifest. """ decompressor = zstd.ZstdDecompressor() chunk = decompressor.decompress(compressed_chunk) chunk_type, chunk_flags, chunk_size = struct.unpack("<4s 4s I", chunk[:12]) json_bytes = chunk[12 : 12 + chunk_size] manifest = json.loads(json_bytes.decode("utf-8")) return manifest
[docs] def build_manifest( self, version_major: int, version_minor: int, model_name: str, model_version: str, prompt: str, tags: list, chunk_records: list, generation_settings: Optional[dict] = None, hardware_info: Optional[dict] = None, ): """Build a manifest dictionary from inputs and chunk records. chunk_records must be a list of dicts, with each having:: { "type": str, "flags": str, "offset": int, "compressed_size": int, "uncompressed_size": int, "hash": str, "extra": dict } Args: version_major (int): Major version number. version_minor (int): Minor version number. model_name (str): Name of the model. model_version (str): Version of the model. prompt (str): Prompt used for generation. tags (list): List of tags. chunk_records (list): List of chunk record dictionaries. generation_settings (Optional[dict]): Generation settings. Defaults to predefined structure if not provided. hardware_info (Optional[dict]): Hardware information. Defaults to predefined structure if not provided. Returns: dict: Manifest dictionary. """ manifest = { "raiiaf_metadata": { "file_info": { "magic": "raiiaf", "version_major": version_major, "version_minor": version_minor, "file_size": 0, "chunk_count": len(chunk_records), }, "model_info": { "model_name": model_name, "version": model_version, "date": datetime.now(UTC).isoformat(), "prompt": prompt, "tags": tags, "generation_settings": generation_settings or { "seed": 0, "steps": 0, "sampler": "", "cfg_scale": 0.0, "scheduler": "", "eta": 0.0, "guidance": "", "precision": "fp16", "deterministic": True, }, "hardware_info": hardware_info or { "machine_name": "", "os": "", "cpu": "", "cpu_cores": 0, "gpu": [], "ram_gb": 0.0, "framework": "", "compute_lib": "", }, }, "chunks": [], } } # build chunk list with indexes for idx, rec in enumerate(chunk_records): # Use a robust mapping for compressed_size to handle different record schemas compressed_size = rec.get( "compressed_size", rec.get("compressed_size_header", rec.get("len_header", 0) + rec.get("len_data", 0)), ) manifest["raiiaf_metadata"]["chunks"].append( { "index": idx, "type": rec["type"], "flags": rec["flags"], "offset": rec["offset"], "compressed_size": compressed_size, "uncompressed_size": rec["uncompressed_size"], "hash": rec["hash"], "extra": rec.get("extra", {}), "compressed": rec.get("compressed", True), # Provide split points for compressed LATN chunks when available "len_header": rec.get("len_header"), "len_data": rec.get("len_data"), } ) return manifest