[
  {
    "arxiv_id": "arXiv:2604.03263",
    "title": "LPC-SM: Local Predictive Coding and Sparse Memory for Long-Context Language Modeling",
    "authors": [
      "Keqin Xie"
    ],
    "tags": [
      "cs.CL",
      "cs.AI",
      "cs.GL",
      "cs.NE"
    ],
    "date": "April 2026",
    "abstract": "Most current long-context language models still rely on attention to handle both local interaction and long-range state, which leaves relatively little room to test alternative decompositions of sequence modeling. We propose LPC-SM, a hybrid autoregressive architecture that separates local attention, persistent memory, predictive correction, and run-time control within the same block, and we use Orthogonal Novelty Transport (ONT) to govern slow-memory writes. We evaluate a 158M-parameter model...",
    "url": "https://arxiv.org/abs/2604.03263"
  },
  {
    "arxiv_id": "arXiv:2604.00886",
    "title": "PixelPrune: Pixel-Level Adaptive Visual Token Reduction via Predictive Coding",
    "authors": [
      "Nan Wang",
      "Zhiwei Jin",
      "Chen Chen",
      "Haonan Lu"
    ],
    "tags": [
      "cs.CV",
      "cs.AI",
      "cs.CL"
    ],
    "date": "April 2026",
    "abstract": "Document understanding and GUI interaction are among the highest-value applications of Vision-Language Models (VLMs), yet they impose exceptionally heavy computational burden: fine-grained text and small UI elements demand high-resolution inputs that produce tens of thousands of visual tokens. We observe that this cost is largely wasteful -- across document and GUI benchmarks, only 22-71% of image patches are pixel-unique...",
    "url": "https://arxiv.org/abs/2604.00886"
  },
  {
    "arxiv_id": "arXiv:2603.29090",
    "title": "HCLSM: Hierarchical Causal Latent State Machines for Object-Centric World Modeling",
    "authors": [
      "Jaber Jaber",
      "Osama Jaber"
    ],
    "tags": [
      "cs.LG",
      "cs.CV",
      "cs.RO"
    ],
    "date": "March 2026",
    "abstract": "World models that predict future states from video remain limited by flat latent representations that entangle objects, ignore causal structure, and collapse temporal dynamics into a single scale. We present HCLSM, a world model architecture that operates on three interconnected principles: object-centric decomposition via slot attention with spatial broadcast decoding, hierarchical temporal dynamics through a three-level engine combining selective state space models for continuous physics, sparse...",
    "url": "https://arxiv.org/abs/2603.29090"
  },
  {
    "arxiv_id": "arXiv:2603.28744",
    "title": "Stop Probing, Start Coding: Why Linear Probes and Sparse Autoencoders Fail at Compositional Generalisation",
    "authors": [
      "Vit\u00f3ria Barin Pacela",
      "Shruti Joshi",
      "Isabela Camacho",
      "Simon Lacoste-Julien",
      "David Klindt"
    ],
    "tags": [
      "cs.LG"
    ],
    "date": "March 2026",
    "abstract": "The linear representation hypothesis states that neural network activations encode high-level concepts as linear mixtures. However, under superposition, this encoding is a projection from a higher-dimensional concept space into a lower-dimensional activation space, and a linear decision boundary in the concept space need not remain linear after projection. In this setting, classical sparse coding methods with per-sample iterative inference leverage compressed sensing guarantees to recover latent...",
    "url": "https://arxiv.org/abs/2603.28744"
  },
  {
    "arxiv_id": "arXiv:2603.09972",
    "title": "From Data Statistics to Feature Geometry: How Correlations Shape Superposition",
    "authors": [
      "Lucas Prieto",
      "Edward Stevinson",
      "Melih Barsbey",
      "Tolga Birdal",
      "Pedro A. M. Mediano"
    ],
    "tags": [
      "cs.LG",
      "cs.AI",
      "cs.CV"
    ],
    "date": "March 2026",
    "abstract": "A central idea in mechanistic interpretability is that neural networks represent more features than they have dimensions, arranging them in superposition to form an over-complete basis. This framing has been influential, motivating dictionary learning approaches such as sparse autoencoders. However, superposition has mostly been studied in idealized settings where features are sparse and uncorrelated. In these settings, superposition is typically understood as introducing interference that must...",
    "url": "https://arxiv.org/abs/2603.09972"
  },
  {
    "arxiv_id": "arXiv:2603.04146",
    "title": "LISTA-Transformer Model Based on Sparse Coding and Attention Mechanism and Its Application in Fault Diagnosis",
    "authors": [
      "Shuang Liu",
      "Lina Zhao",
      "Tian Wang",
      "Huaqing Wang"
    ],
    "tags": [
      "cs.CV"
    ],
    "date": "March 2026",
    "abstract": "Driven by the continuous development of models such as Multi-Layer Perceptron, Convolutional Neural Network (CNN), and Transformer, deep learning has made breakthrough progress in fields such as computer vision and natural language processing, and has been successfully applied in practical scenarios such as image classification and industrial fault diagnosis. However, existing models still have certain limitations in local feature modeling and global dependency capture. Specifically, CNN is limited...",
    "url": "https://arxiv.org/abs/2603.04146"
  },
  {
    "arxiv_id": "arXiv:2603.20927",
    "title": "Active Inference for Physical AI Agents -- An Engineering Perspective",
    "authors": [
      "Bert de Vries"
    ],
    "tags": [
      "stat.ML",
      "cs.LG"
    ],
    "date": "March 2026",
    "abstract": "Physical AI agents, such as robots and other embodied systems operating under tight and fluctuating resource constraints, remain far less capable than biological agents in open-ended real-world environments. This paper argues that Active Inference (AIF), grounded in the Free Energy Principle, offers a principled foundation for closing that gap. We develop this argument from first principles, following a chain from probability theory through Bayesian machine learning and variational inference to...",
    "url": "https://arxiv.org/abs/2603.20927"
  },
  {
    "arxiv_id": "arXiv:2603.06831",
    "title": "Learning-Based Robust Control: Unifying Exploration and Distributional Robustness for Reliable Robotics via Free Energy",
    "authors": [
      "Hozefa Jesawada",
      "Giovanni Russo",
      "Abdalla Swikir",
      "Fares Abu-Dakka"
    ],
    "tags": [
      "cs.RO",
      "math.OC"
    ],
    "date": "March 2026",
    "abstract": "A key challenge towards reliable robotic control is devising computational models that can both learn policies and guarantee robustness when deployed in the field. Inspired by the free energy principle in computational neuroscience, to address these challenges, we propose a model for policy computation that jointly learns environment dynamics and rewards, while ensuring robustness to epistemic uncertainties. Expounding a distributionally robust free energy principle...",
    "url": "https://arxiv.org/abs/2603.06831"
  },
  {
    "arxiv_id": "arXiv:2602.02356",
    "title": "NAB: Neural Adaptive Binning for Sparse-View CT reconstruction",
    "authors": [
      "Wangduo Xie",
      "Matthew B. Blaschko"
    ],
    "tags": [
      "cs.CV",
      "cs.LG"
    ],
    "date": "February 2026",
    "abstract": "Computed Tomography (CT) plays a vital role in inspecting the internal structures of industrial objects. Furthermore, achieving high-quality CT reconstruction from sparse views is essential for reducing production costs. While classic implicit neural networks have shown promising results for sparse reconstruction, they are unable to leverage shape priors of objects. Motivated by the observation that numerous industrial objects exhibit rectangular structures, we propose a novel Neural Adaptive Bi...",
    "url": "https://arxiv.org/abs/2602.02356"
  },
  {
    "arxiv_id": "arXiv:2512.21129",
    "title": "Active inference and artificial reasoning",
    "authors": [
      "Karl Friston",
      "Lancelot Da Costa",
      "Alexander Tschantz",
      "Conor Heins",
      "Christopher Buckley",
      "Tim Verbelen",
      "Thomas Parr"
    ],
    "tags": [
      "q-bio.NC",
      "physics.data-an",
      "stat.ML"
    ],
    "date": "December 2025",
    "abstract": "This technical note considers the sampling of outcomes that provide the greatest amount of information about the structure of underlying world models. This generalisation furnishes a principled approach to structure learning under a plausible set of generative models or hypotheses. In active inference, policies - i.e., combinations of actions - are selected based on their expected free energy, which comprises expected information gain and value...",
    "url": "https://arxiv.org/abs/2512.21129"
  },
  {
    "arxiv_id": "arXiv:2512.10984",
    "title": "Developmental Symmetry-Loss: A Free-Energy Perspective on Brain-Inspired Invariance Learning",
    "authors": [
      "Arif D\u00f6nmez"
    ],
    "tags": [
      "q-bio.NC",
      "cs.AI",
      "cs.LG",
      "nlin.AO"
    ],
    "date": "December 2025",
    "abstract": "We propose Symmetry-Loss, a brain-inspired algorithmic principle that enforces invariance and equivariance through a differentiable constraint derived from environmental symmetries. The framework models learning as the iterative refinement of an effective symmetry group, paralleling developmental processes in which cortical representations align with the world's structure. By minimizing structural surprise, i.e. deviations from symmetry consistency, Symmetry-Loss operationalizes a Free-Energy...",
    "url": "https://arxiv.org/abs/2512.10984"
  },
  {
    "arxiv_id": "arXiv:2510.25053",
    "title": "Scalable predictive processing framework for multitask caregiving robots",
    "authors": [
      "Hayato Idei",
      "Tamon Miyake",
      "Tetsuya Ogata",
      "Yuichi Yamashita"
    ],
    "tags": [
      "cs.RO",
      "cs.AI",
      "cs.LG",
      "q-bio.NC"
    ],
    "date": "October 2025",
    "abstract": "The rapid aging of societies is intensifying demand for autonomous care robots; however, most existing systems are task-specific and rely on handcrafted preprocessing, limiting their ability to generalize across diverse scenarios. A prevailing theory in cognitive neuroscience proposes that the human brain operates through hierarchical predictive processing, which underlies flexible cognition and behavior by integrating multimodal sensory signals. Inspired by this principle, we introduce a hierar...",
    "url": "https://arxiv.org/abs/2510.25053"
  },
  {
    "arxiv_id": "arXiv:2510.23258",
    "title": "Deep Active Inference with Diffusion Policy and Multiple Timescale World Model for Real-World Exploration and Navigation",
    "authors": [
      "Riko Yokozawa",
      "Kentaro Fujii",
      "Yuta Nomura",
      "Shingo Murata"
    ],
    "tags": [
      "cs.RO",
      "cs.AI",
      "cs.LG"
    ],
    "date": "October 2025",
    "abstract": "Autonomous robotic navigation in real-world environments requires exploration to acquire environmental information as well as goal-directed navigation in order to reach specified targets. Active inference (AIF) based on the free-energy principle provides a unified framework for these behaviors by minimizing the expected free energy (EFE), thereby combining epistemic and extrinsic values. To realize this practically, we propose a deep AIF framework that integrates a diffusion policy as the policy...",
    "url": "https://arxiv.org/abs/2510.23258"
  },
  {
    "arxiv_id": "arXiv:2503.13223",
    "title": "Distributionally Robust Free Energy Principle for Decision-Making",
    "authors": [
      "Allahkaram Shafiei",
      "Hozefa Jesawada",
      "Karl Friston",
      "Giovanni Russo"
    ],
    "tags": [
      "cs.AI",
      "eess.SY",
      "math.OC"
    ],
    "date": "March 2025",
    "abstract": "Despite their groundbreaking performance, autonomous agents can misbehave when training and environmental conditions become inconsistent, with minor mismatches leading to undesirable behaviors or even catastrophic failures. Robustness towards these training-environment ambiguities is a core requirement for intelligent agents and its fulfillment is a long-standing challenge towards their real-world deployments. Here, we introduce a Distributionally Robust Free Energy model (DR-FREE) that instills...",
    "url": "https://arxiv.org/abs/2503.13223"
  },
  {
    "arxiv_id": "arXiv:2502.12654",
    "title": "Free Energy and Network Structure: Breaking Scale-Free Behaviour Through Information Processing Constraints",
    "authors": [
      "Peter R Williams",
      "Zhan Chen"
    ],
    "tags": [
      "cs.SI",
      "physics.soc-ph"
    ],
    "date": "February 2025",
    "abstract": "In this paper we show how The Free Energy Principle (FEP) can provide an explanation for why real-world networks deviate from scale-free behaviour, and how these characteristic deviations can emerge from constraints on information processing. We propose a minimal FEP model for node behaviour reveals three distinct regimes: when detection noise dominates, agents seek better information, reducing isolated agents compared to expectations from classical preferential attachment...",
    "url": "https://arxiv.org/abs/2502.12654"
  }
]