`IndisputableMonolith.Thermodynamics.FreeEnergyMonotone`

IndisputableMonolith/Thermodynamics/FreeEnergyMonotone.lean · 443 lines · 13 declarations
Explainer status: ready · generated 2026-05-14 23:50:44.895980+00:00
   1import Mathlib
   2import IndisputableMonolith.Thermodynamics.MaxEntFromCost
   3
   4/-!
   5# Free Energy Monotonicity
   6
   7This module proves that Recognition Free Energy (FR) is non-increasing
   8under RS dynamics (coarse-graining, equilibration).
   9
  10This is the Recognition Science version of the Second Law of Thermodynamics.
  11
  12## Main Results
  13
  141. **Coarse-graining decreases free energy**: Reducing state resolution cannot increase F_R
  152. **Relaxation decreases free energy**: Time evolution toward equilibrium decreases F_R
  163. **Arrow of Time**: The direction of time is defined by dF_R/dt ≤ 0
  17
  18## The Key Insight
  19
  20The monotonicity of F_R under coarse-graining is equivalent to:
  21- The Data Processing Inequality (DPI) for KL divergence
  22- The fact that the Gibbs distribution minimizes free energy
  23
  24## References
  25
  26- Cover & Thomas, "Elements of Information Theory", Ch. 2 (DPI)
  27- `Recognition-Science-Full-Theory.txt`, Section on Thermodynamic Stability
  28-/
  29
  30namespace IndisputableMonolith
  31namespace Thermodynamics
  32
  33open Real Cost RecognitionSystem
  34
  35/-- f(x) = x log x is convex on (0, ∞) -/
  36lemma mul_log_convexOn : ConvexOn ℝ (Set.Ioi 0) (fun x => x * log x) := by
  37  apply convexOn_of_deriv2_nonneg (convex_Ioi 0)
  38  · apply ContinuousOn.mul continuousOn_id (continuousOn_log.mono (Set.subset_refl _))
  39  · intro x hx
  40    -- deriv (x log x) = log x + 1
  41    -- deriv (log x + 1) = 1/x
  42    have h_deriv : deriv (fun y => y * log y) x = log x + 1 := by
  43      rw [deriv_mul differentiableAt_id' (differentiableAt_log hx.ne')]
  44      simp [hx.ne']
  45    have h_deriv2 : deriv (fun y => deriv (fun z => z * log z) y) x = 1/x := by
  46      rw [deriv_congr_ev (Filter.EventuallyIn.ext (fun y hy => deriv_mul differentiableAt_id' (differentiableAt_log (Set.mem_Ioi.mp hy).ne')) (𝓝 x))]
  47      · simp [hx.ne']
  48        rw [deriv_add (differentiableAt_log hx.ne') (differentiableAt_const 1)]
  49        simp [deriv_log hx.ne']
  50      · exact Filter.EventuallyIn.of_mem (𝓝 x) (Ioi_mem_nhds hx)
  51    rw [h_deriv2]
  52    apply div_nonneg (by norm_num) hx.le
  53
  54/-- **Log-Sum Inequality**: For positive sequences a, b with finite support,
  55    ∑ aᵢ log(aᵢ/bᵢ) ≥ (∑ aᵢ) log((∑ aᵢ)/(∑ bᵢ))
  56
  57    This is a consequence of Jensen's inequality for the convex function f(x) = x log x.
  58
  59    **PROOF STRUCTURE** (Cover & Thomas, "Elements of Information Theory", Theorem 2.7.1):
  60    1. Define weights wᵢ = bᵢ/B where B = ∑ bᵢ. Then ∑ wᵢ = 1.
  61    2. Define ratios xᵢ = aᵢ/bᵢ. Then ∑ wᵢ xᵢ = A/B where A = ∑ aᵢ.
  62    3. The function f(x) = x log x is convex on [0, ∞).
  63    4. By Jensen's inequality: f(∑ wᵢ xᵢ) ≤ ∑ wᵢ f(xᵢ).
  64    5. Substituting: (A/B) log(A/B) ≤ ∑ wᵢ (xᵢ log xᵢ).
  65    6. Multiplying by B: A log(A/B) ≤ ∑ aᵢ log(aᵢ/bᵢ).
  66
  67    **STATUS**: SCAFFOLD (classical information-theoretic result, requires Jensen machinery) -/
  68theorem log_sum_inequality {ι : Type*} [Fintype ι] [Nonempty ι] (a b : ι → ℝ)
  69    (ha : ∀ i, 0 < a i) (hb : ∀ i, 0 < b i) :
  70    ∑ i, a i * log (a i / b i) ≥ (∑ i, a i) * log ((∑ i, a i) / (∑ i, b i)) := by
  71  let A := ∑ i, a i
  72  let B := ∑ i, b i
  73  have hA_pos : 0 < A := Finset.sum_pos (fun i _ => ha i) Finset.univ_nonempty
  74  have hB_pos : 0 < B := Finset.sum_pos (fun i _ => hb i) Finset.univ_nonempty
  75  let w := fun i => b i / B
  76  let x := fun i => a i / b i
  77  have hw_nonneg : ∀ i, 0 ≤ w i := fun i => div_nonneg (hb i).le hB_pos.le
  78  have hw_sum : ∑ i, w i = 1 := by
  79    unfold w
  80    rw [← Finset.sum_div, div_self hB_pos.ne']
  81  have hx_pos : ∀ i, x i ∈ Set.Ioi (0 : ℝ) := fun i => div_pos (ha i) (hb i)
  82  have h_center : ∑ i, w i * x i = A / B := by
  83    unfold w x
  84    simp_rw [div_mul_div_cancel_left _ (hb _).ne']
  85    rw [← Finset.sum_div]
  86  -- Apply Jensen
  87  have h_jensen := mul_log_convexOn.map_sum_le hw_nonneg hw_sum (fun i _ => hx_pos i)
  88  rw [h_center] at h_jensen
  89  -- h_jensen: (A/B) * log (A/B) ≤ ∑ w i * (x i * log (x i))
  90  -- Multiply by B
  91  have h_final : B * ((A / B) * log (A / B)) ≤ B * (∑ i, w i * (x i * log (x i))) :=
  92    mul_le_mul_of_nonneg_left h_jensen hB_pos.le
  93  -- Simplify LHS
  94  have h_lhs : B * ((A / B) * log (A / B)) = A * log (A / B) := by
  95    field_simp [hB_pos.ne']
  96    ring
  97  -- Simplify RHS
  98  have h_rhs : B * (∑ i, w i * (x i * log (x i))) = ∑ i, a i * log (a i / b i) := by
  99    rw [Finset.mul_sum]
 100    apply Finset.sum_congr rfl
 101    intro i _
 102    unfold w x
 103    field_simp [hB_pos.ne', (hb i).ne']
 104    ring
 105  rw [h_lhs] at h_final
 106  rw [h_rhs] at h_final
 107  exact h_final
 108
 109/-- Log-Sum Inequality for conditional sums (fiberwise version).
 110
 111    **PROOF STRUCTURE**: Reduce to the main log_sum_inequality by restricting
 112    to elements satisfying the predicate P. -/
 113theorem log_sum_inequality_fiber {ι : Type*} [Fintype ι] (a b : ι → ℝ)
 114    (P : ι → Prop) [DecidablePred P]
 115    (ha_nonneg : ∀ i, 0 ≤ a i) (hb_nonneg : ∀ i, 0 ≤ b i)
 116    (ha_pos_fiber : ∀ i, P i → 0 < a i) (hb_pos_fiber : ∀ i, P i → 0 < b i)
 117    (h_nonempty : ∃ i, P i) :
 118    ∑ i, (if P i then a i * log (a i / b i) else 0) ≥
 119    (∑ i, if P i then a i else 0) * log ((∑ i, if P i then a i else 0) / (∑ i, if P i then b i else 0)) := by
 120  let ι' := {i // P i}
 121  have h_fintype : Fintype ι' := inferInstance
 122  have h_nonempty' : Nonempty ι' := by
 123    obtain ⟨i, hi⟩ := h_nonempty
 124    exact ⟨⟨i, hi⟩⟩
 125  let a' := fun (i' : ι') => a i'.val
 126  let b' := fun (i' : ι') => b i'.val
 127  have ha' : ∀ i' : ι', 0 < a' i' := fun i' => ha_pos_fiber i'.val i'.property
 128  have hb' : ∀ i' : ι', 0 < b' i' := fun i' => hb_pos_fiber i'.val i'.property
 129  -- Apply log_sum_inequality to subtype
 130  have h_ls := log_sum_inequality a' b' ha' hb'
 131  -- Translate sums
 132  rw [Finset.sum_subtype (Finset.univ : Finset ι) (fun i => P i)] at h_ls
 133  · simp only [Finset.mem_univ, forall_true_left, a', b'] at h_ls
 134    have h_lhs : ∑ i, (if P i then a i * log (a i / b i) else 0) = ∑ i in (Finset.univ.filter P), a i * log (a i / b i) := by
 135      rw [Finset.sum_filter]
 136    have h_lhs_eq : (∑ i : ι', a i.val * log (a i.val / b i.val)) = ∑ i, (if P i then a i * log (a i / b i) else 0) := by
 137      rw [Finset.sum_subtype]
 138      · simp only [Finset.mem_univ, forall_true_left]
 139      · intro i; simp only [Finset.mem_univ, forall_true_left, ite_self]
 140    have h_a_sum : (∑ i : ι', a i.val) = ∑ i, if P i then a i else 0 := by
 141      rw [Finset.sum_subtype]
 142      · simp only [Finset.mem_univ, forall_true_left]
 143      · intro i; simp only [Finset.mem_univ, forall_true_left, ite_self]
 144    have h_b_sum : (∑ i : ι', b i.val) = ∑ i, if P i then b i else 0 := by
 145      rw [Finset.sum_subtype]
 146      · simp only [Finset.mem_univ, forall_true_left]
 147      · intro i; simp only [Finset.mem_univ, forall_true_left, ite_self]
 148    rw [h_lhs_eq, h_a_sum, h_b_sum] at h_ls
 149    exact h_ls
 150  · intro i; simp only [Finset.mem_univ, forall_true_left]
 151
 152/-! ## Distribution Coarse-Graining -/
 153
 154/-- Push-forward of a probability distribution under a map φ: Ω → Ω'.
 155    p'(ω') = ∑_{ω : φ(ω) = ω'} p(ω) -/
 156noncomputable def push_forward (p : Ω → ℝ) (φ : Ω → Ω') : Ω' → ℝ :=
 157  fun ω' => ∑ ω, if φ ω = ω' then p ω else 0
 158
 159/-- Push-forward preserves non-negativity. -/
 160theorem push_forward_nonneg {p : Ω → ℝ} (hp : ∀ ω, 0 ≤ p ω) (φ : Ω → Ω') :
 161    ∀ ω', 0 ≤ push_forward p φ ω' := by
 162  intro ω'
 163  unfold push_forward
 164  apply Finset.sum_nonneg
 165  intro ω _
 166  split_ifs <;> [apply hp; rfl]
 167
 168/-- Push-forward preserves total probability. -/
 169theorem push_forward_sum_one {p : Ω → ℝ} (hp_sum : ∑ ω, p ω = 1) (φ : Ω → Ω') :
 170    ∑ ω', push_forward p φ ω' = 1 := by
 171  unfold push_forward
 172  rw [Finset.sum_comm]
 173  simp only [Finset.sum_ite_eq, Finset.mem_univ, ↓reduceIte]
 174  exact hp_sum
 175
 176/-- **Effective Cost**: The coarse-grained cost landscape.
 177    J'(ω') = -TR * ln (∑_{ω : φ(ω) = ω'} exp(-J(ω)/TR))
 178    This definition ensures the partition function is preserved under coarse-graining. -/
 179noncomputable def effective_cost (sys : RecognitionSystem) (X : Ω → ℝ) (φ : Ω → Ω') : Ω' → ℝ :=
 180  fun ω' => -sys.TR * log (∑ ω, if φ ω = ω' then exp (-Jcost (X ω) / sys.TR) else 0)
 181
 182/-- **THEOREM**: Partition function preservation under surjective coarse-graining.
 183
 184    The partition function is preserved under coarse-graining.
 185    Z = ∑_ω e^{-X_ω/T} = ∑_ω' ∑_{ω∈φ⁻¹(ω')} e^{-X_ω/T}
 186    By defining the effective cost J' appropriately, the inner sum becomes e^{-J'_ω'/T}.
 187
 188    **STATUS**: PROVEN assuming every coarse state has a preimage. -/
 189theorem partition_function_preserved (sys : RecognitionSystem) (X : Ω → ℝ) (φ : Ω → Ω')
 190    (h_surj : Function.Surjective φ) :
 191    partition_function sys X = ∑ ω', exp (-effective_cost sys X φ ω' / sys.TR) := by
 192  classical
 193  -- Unfold to Gibbs weights.
 194  unfold partition_function
 195  -- Show exp(-effective_cost/TR) recovers the fiber sum.
 196  have h_exp : ∀ ω', exp (-effective_cost sys X φ ω' / sys.TR) =
 197      ∑ ω, if φ ω = ω' then exp (-Jcost (X ω) / sys.TR) else 0 := by
 198    intro ω'
 199    unfold effective_cost
 200    -- Let s be the fiber sum.
 201    set s := ∑ ω, if φ ω = ω' then exp (-Jcost (X ω) / sys.TR) else 0 with hs
 202    have hpos : 0 < s := by
 203      obtain ⟨ω, hω⟩ := h_surj ω'
 204      have hterm : 0 < exp (-Jcost (X ω) / sys.TR) := exp_pos _
 205      have hnonneg : ∀ ω, 0 ≤ if φ ω = ω' then exp (-Jcost (X ω) / sys.TR) else 0 := by
 206        intro ω
 207        split_ifs
 208        · exact (le_of_lt (exp_pos _))
 209        · exact le_rfl
 210      have hmem : ω ∈ (Finset.univ : Finset Ω) := by simp
 211      have hle : exp (-Jcost (X ω) / sys.TR) ≤ s := by
 212        simpa [hs, hω] using
 213          (Finset.single_le_sum hnonneg (by simp [hmem, hω]))
 214      exact lt_of_lt_of_le hterm hle
 215    have hTR : sys.TR ≠ 0 := sys.TR_pos.ne'
 216    -- Simplify exponent and use exp_log.
 217    have hlog : exp (-(-sys.TR * log s) / sys.TR) = exp (log s) := by
 218      field_simp [hTR]
 219      ring
 220    simp [hs, hlog, Real.exp_log hpos.ne'] 
 221  -- Swap sums and collapse the indicator.
 222  calc
 223    ∑ ω, exp (-Jcost (X ω) / sys.TR)
 224        = ∑ ω', ∑ ω, if φ ω = ω' then exp (-Jcost (X ω) / sys.TR) else 0 := by
 225            rw [Finset.sum_comm]
 226            simp [Finset.sum_ite_eq, Finset.mem_univ]
 227    _ = ∑ ω', exp (-effective_cost sys X φ ω' / sys.TR) := by
 228            simp [h_exp]
 229
 230/-- **THEOREM**: Data Processing Inequality for Relative Entropy.
 231
 232    Coarse-graining reduces the distinguishability of distributions.
 233    D(p'‖q') ≤ D(p‖q) where p', q' are push-forwards.
 234
 235    **STATUS**: PROVEN using log-sum inequality on each fiber. -/
 236theorem data_processing_inequality (p q : Ω → ℝ) (φ : Ω → Ω')
 237    (hp : ∀ ω, 0 < p ω) (hq : ∀ ω, 0 < q ω)
 238    (hp_sum : ∑ ω, p ω = 1) (hq_sum : ∑ ω, q ω = 1) :
 239    kl_divergence (push_forward p φ) (push_forward q φ) ≤ kl_divergence p q := by
 240  classical
 241  have hp_nonneg : ∀ ω, 0 ≤ p ω := fun ω => (hp ω).le
 242  have hq_nonneg : ∀ ω, 0 ≤ q ω := fun ω => (hq ω).le
 243  -- Fiberwise log-sum inequality bounds each coarse term.
 244  have h_fiber_bound :
 245      ∀ ω', (if push_forward p φ ω' > 0 ∧ push_forward q φ ω' > 0 then
 246              push_forward p φ ω' * log (push_forward p φ ω' / push_forward q φ ω')
 247            else 0) ≤
 248            ∑ ω, if φ ω = ω' then p ω * log (p ω / q ω) else 0 := by
 249    intro ω'
 250    by_cases hne : ∃ ω, φ ω = ω'
 251    · have hp' : 0 < push_forward p φ ω' := by
 252        obtain ⟨ω, hω⟩ := hne
 253        have hterm : 0 < p ω := hp ω
 254        have hnonneg : ∀ ω, 0 ≤ if φ ω = ω' then p ω else 0 := by
 255          intro ω; split_ifs <;> [exact (hp ω).le, exact le_rfl]
 256        have hle : p ω ≤ push_forward p φ ω' := by
 257          simpa [push_forward, hω] using
 258            (Finset.single_le_sum hnonneg (by simp) (by simp [hω]))
 259        exact lt_of_lt_of_le hterm hle
 260      have hq' : 0 < push_forward q φ ω' := by
 261        obtain ⟨ω, hω⟩ := hne
 262        have hterm : 0 < q ω := hq ω
 263        have hnonneg : ∀ ω, 0 ≤ if φ ω = ω' then q ω else 0 := by
 264          intro ω; split_ifs <;> [exact (hq ω).le, exact le_rfl]
 265        have hle : q ω ≤ push_forward q φ ω' := by
 266          simpa [push_forward, hω] using
 267            (Finset.single_le_sum hnonneg (by simp) (by simp [hω]))
 268        exact lt_of_lt_of_le hterm hle
 269      have h_ls := log_sum_inequality_fiber p q (fun ω => φ ω = ω')
 270        hp_nonneg hq_nonneg (fun ω hω => hp ω) (fun ω hω => hq ω) hne
 271      have h_term :
 272          (if push_forward p φ ω' > 0 ∧ push_forward q φ ω' > 0 then
 273              push_forward p φ ω' * log (push_forward p φ ω' / push_forward q φ ω')
 274            else 0) =
 275          (∑ ω, if φ ω = ω' then p ω else 0) *
 276            log ((∑ ω, if φ ω = ω' then p ω else 0) / (∑ ω, if φ ω = ω' then q ω else 0)) := by
 277        simp [push_forward, hp', hq']
 278      -- Combine.
 279      simpa [h_term] using h_ls
 280    · -- Empty fiber: both push-forward masses are 0, term is 0.
 281      have hpf : push_forward p φ ω' = 0 := by
 282        unfold push_forward
 283        apply Finset.sum_eq_zero
 284        intro ω _
 285        split_ifs with h
 286        · exact (hne ⟨ω, h⟩).elim
 287        · rfl
 288      have hqf : push_forward q φ ω' = 0 := by
 289        unfold push_forward
 290        apply Finset.sum_eq_zero
 291        intro ω _
 292        split_ifs with h
 293        · exact (hne ⟨ω, h⟩).elim
 294        · rfl
 295      simp [hpf, hqf]
 296  -- Sum the fiber bounds.
 297  have h_sum :
 298      kl_divergence (push_forward p φ) (push_forward q φ) ≤
 299        ∑ ω', ∑ ω, if φ ω = ω' then p ω * log (p ω / q ω) else 0 := by
 300    unfold kl_divergence
 301    apply Finset.sum_le_sum
 302    intro ω' _
 303    exact h_fiber_bound ω'
 304  -- Swap sums and collapse indicators.
 305  have h_swap :
 306      (∑ ω', ∑ ω, if φ ω = ω' then p ω * log (p ω / q ω) else 0) =
 307        ∑ ω, p ω * log (p ω / q ω) := by
 308    rw [Finset.sum_comm]
 309    simp [Finset.sum_ite_eq, Finset.mem_univ]
 310  -- Finish by rewriting KL with positivity.
 311  have h_kl : kl_divergence p q = ∑ ω, p ω * log (p ω / q ω) := by
 312    unfold kl_divergence
 313    apply Finset.sum_congr rfl
 314    intro ω _
 315    simp [hp ω, hq ω]
 316  linarith [h_sum, h_swap, h_kl]
 317
 318/-- **THEOREM**: Free energy monotonicity under coarse-graining.
 319
 320    Reducing state resolution cannot increase the Recognition Free Energy.
 321    This is the statistical mechanics version of the Second Law.
 322
 323    **STATUS**: PROVEN assuming positivity and Gibbs/push-forward alignment. -/
 324theorem coarse_graining_decreases_free_energy
 325    (sys : RecognitionSystem) (X : Ω → ℝ)
 326    (p : ProbabilityDistribution Ω) (φ : Ω → Ω')
 327    (hp_pos : ∀ ω, 0 < p.p ω)
 328    (h_gibbs_push : ∀ ω',
 329      gibbs_measure sys (effective_cost sys X φ) ω' =
 330        push_forward (gibbs_measure sys X) φ ω')
 331    (h_gibbs_FR_eq :
 332      recognition_free_energy sys (gibbs_measure sys (effective_cost sys X φ))
 333        (effective_cost sys X φ) =
 334      recognition_free_energy sys (gibbs_measure sys X) X) :
 335    let p' := push_forward p.p φ
 336    let J' := effective_cost sys X φ
 337    recognition_free_energy sys p' J' ≤ recognition_free_energy sys p.p X := by
 338  intro p' J'
 339  classical
 340  -- Package the push-forward as a probability distribution.
 341  let p'pd : ProbabilityDistribution Ω' :=
 342    { p := p'
 343      nonneg := push_forward_nonneg p.nonneg φ
 344      sum_one := push_forward_sum_one p.sum_one φ }
 345  -- KL identity for fine and coarse levels.
 346  have hkl_p' := free_energy_kl_identity (sys:=sys) (X:=J') (q:=p'pd)
 347  have hkl_p := free_energy_kl_identity (sys:=sys) (X:=X) (q:=p)
 348  -- Data processing inequality on KL divergence.
 349  have h_dpi :=
 350    data_processing_inequality (p:=p.p) (q:=gibbs_measure sys X) (φ:=φ)
 351      hp_pos (fun ω => gibbs_measure_pos sys X ω)
 352      p.sum_one (gibbs_measure_sum_one sys X)
 353  have h_pf : push_forward (gibbs_measure sys X) φ = gibbs_measure sys J' := by
 354    funext ω'; symm; exact h_gibbs_push ω'
 355  have hkl_dec : kl_divergence p' (gibbs_measure sys J') ≤
 356      kl_divergence p.p (gibbs_measure sys X) := by
 357    simpa [p', h_pf] using h_dpi
 358  -- Compare free energies via KL identity.
 359  have h_diff :
 360      recognition_free_energy sys p' J' - recognition_free_energy sys p.p X =
 361        sys.TR * (kl_divergence p' (gibbs_measure sys J') -
 362                  kl_divergence p.p (gibbs_measure sys X)) +
 363        (recognition_free_energy sys (gibbs_measure sys J') J' -
 364         recognition_free_energy sys (gibbs_measure sys X) X) := by
 365    linarith [hkl_p', hkl_p]
 366  have hTR : 0 ≤ sys.TR := le_of_lt sys.TR_pos
 367  have h_gibbs_eq :
 368      recognition_free_energy sys (gibbs_measure sys J') J' -
 369      recognition_free_energy sys (gibbs_measure sys X) X = 0 := by
 370    simpa [h_gibbs_FR_eq]
 371  have h_nonpos :
 372      recognition_free_energy sys p' J' - recognition_free_energy sys p.p X ≤ 0 := by
 373    rw [h_diff, h_gibbs_eq, add_zero]
 374    have : kl_divergence p' (gibbs_measure sys J') -
 375             kl_divergence p.p (gibbs_measure sys X) ≤ 0 := by
 376      linarith [hkl_dec]
 377    exact mul_nonpos_of_nonneg_of_nonpos hTR this
 378  linarith
 379
 380/-- **Arrow of Time**: The direction of time in RS is defined by decreasing F_R. -/
 381def rs_arrow_of_time (sys : RecognitionSystem) (X : Ω → ℝ) : Prop :=
 382  ∀ (t₁ t₂ : ℝ), t₁ ≤ t₂ →
 383    ∀ (p : ℝ → ProbabilityDistribution Ω),
 384    -- If p(t) evolves via RS dynamics (approaching Gibbs equilibrium)
 385    -- then F_R decreases
 386    recognition_free_energy sys (p t₂).p X ≤ recognition_free_energy sys (p t₁).p X
 387
 388/-- **H-Theorem for Recognition**: The free energy decreases toward equilibrium.
 389
 390    If the system starts in any state and relaxes toward the Gibbs measure,
 391    then F_R decreases monotonically until it reaches F_R(Gibbs).
 392
 393    **Proof**: Uses the variational identity F_R(p) = F_R(Gibbs) + TR * D_KL(p || Gibbs).
 394    If D_KL decreases monotonically under the dynamics (h_relax hypothesis),
 395    then F_R must also decrease monotonically.
 396
 397    This is the Recognition Science version of Boltzmann's H-theorem. -/
 398theorem h_theorem_recognition
 399    (sys : RecognitionSystem) (X : Ω → ℝ)
 400    (p : ℝ → ProbabilityDistribution Ω)
 401    (t₁ t₂ : ℝ) (h : t₁ ≤ t₂)
 402    -- Assume p(t) is a valid relaxation trajectory
 403    (h_relax : ∀ t ε, ε > 0 →
 404      kl_divergence (p (t + ε)).p (gibbs_measure sys X) ≤
 405      kl_divergence (p t).p (gibbs_measure sys X)) :
 406    recognition_free_energy sys (p t₂).p X ≤ recognition_free_energy sys (p t₁).p X := by
 407  -- F_R(p) = F_R(Gibbs) + TR * D_KL(p || Gibbs)
 408  have h_kl_identity₁ := free_energy_kl_identity sys X (p t₁)
 409  have h_kl_identity₂ := free_energy_kl_identity sys X (p t₂)
 410
 411  by_cases heq : t₁ = t₂
 412  · rw [heq]
 413  · have hlt : t₁ < t₂ := lt_of_le_of_ne h heq
 414    have h_kl_dec : kl_divergence (p t₂).p (gibbs_measure sys X) ≤
 415                    kl_divergence (p t₁).p (gibbs_measure sys X) := by
 416      have := h_relax t₁ (t₂ - t₁) (sub_pos.mpr hlt)
 417      simp only [add_sub_cancel] at this
 418      exact this
 419
 420    -- F_R(p t₂) ≤ F_R(p t₁)  iff  F_R(p t₂) - F_R(p t₁) ≤ 0
 421    rw [← sub_nonpos]
 422    have : recognition_free_energy sys (p t₂).p X - recognition_free_energy sys (p t₁).p X =
 423           sys.TR * (kl_divergence (p t₂).p (gibbs_measure sys X) - kl_divergence (p t₁).p (gibbs_measure sys X)) := by
 424      linarith [h_kl_identity₁, h_kl_identity₂]
 425    rw [this]
 426    apply mul_nonpos_of_nonneg_of_nonpos
 427    · exact sys.TR_pos.le
 428    · linarith [h_kl_dec]
 429
 430/-- Status report for Free Energy Monotonicity module. -/
 431def free_energy_monotone_status : List (String × String) :=
 432  [ ("push_forward preserves prob", "THEOREM")
 433  , ("partition_function preserved", "SCAFFOLD")
 434  , ("data_processing_inequality", "SCAFFOLD")
 435  , ("coarse_graining_decreases_free_energy", "SCAFFOLD")
 436  , ("h_theorem_recognition", "PROVEN")
 437  ]
 438
 439#eval free_energy_monotone_status
 440
 441end Thermodynamics
 442end IndisputableMonolith
 443
source mirrored from github.com/jonwashburn/shape-of-logic