Probability.MDP.Histories

source

structure MDPs.MDP :

Type

Markov decision process

S : ℕ
states
S_ne : 0 < self.S
A : ℕ
actions
A_ne : 0 < self.A
P : Fin self.S → Fin self.A → Findist.Δ self.S
transition probability s, a, s'
r : Fin self.S → Fin self.A → Fin self.S → ℝ
reward function s, a, s'

Instances For

source

def MDPs.MDP.maxS (M : MDP) :

Fin M.S

Equations

M.maxS = ⟨M.S - 1, ⋯⟩

Instances For

source

def MDPs.MDP.maxA (M : MDP) :

Fin M.A

Equations

M.maxA = ⟨M.A - 1, ⋯⟩

Instances For

source

def MDPs.MDP.setS (M : MDP) :

Finset (Fin M.S)

Set of all states

Equations

M.setS = (Finset.range M.S).attachFin ⋯

Instances For

source

def MDPs.MDP.setA (M : MDP) :

Finset (Fin M.A)

Set of all actions

Equations

M.setA = (Finset.range M.A).attachFin ⋯

Instances For

source

def MDPs.MDP.SA (M : MDP) :

ℕ

Equations

M.SA = M.S * M.A

Instances For

source

theorem MDPs.MDP.SA_ne (M : MDP) :

0 < M.SA

source

inductive MDPs.Hist (M : MDP) :

Type

Represents a history. The state is type ℕ and action is type ℕ.

init {M : MDP} : Fin M.S → Hist M
foll {M : MDP} : Hist M → Fin M.A → Fin M.S → Hist M

Instances For

source

instance MDPs.instCoeFinSHist {M : MDP} :

Coe (Fin M.S) (Hist M)

Equations

MDPs.instCoeFinSHist = { coe := fun (s : Fin M.S) => MDPs.Hist.init s }

source

def MDPs.Hist.length {M : MDP} :

Hist M → ℕ

History's length = the number of actions taken

Equations

(MDPs.Hist.init a).length = 0
(h.foll a a_1).length = 1 + h.length

Instances For

source

def MDPs.MDP.HistT (M : MDP) (t : ℕ) :

Type

Equations

M.HistT t = { m : MDPs.Hist M // m.length = t }

Instances For

source

@[reducible, inline]

abbrev MDPs.HistNE (M : MDP) :

Type

Nonempty histories

Equations

MDPs.HistNE M = { m : MDPs.Hist M // m.length ≥ 1 }

Instances For

source

def MDPs.Hist.last {M : MDP} :

Hist M → Fin M.S

Returns the last state of the history

Equations

(MDPs.Hist.init a).last = a
(h.foll a a_1).last = a_1

Instances For

source

def MDPs.MDP.numhist (M : MDP) (t : ℕ) :

ℕ

Number of histories of length t.

Equations

M.numhist t = M.S * M.SA ^ t

Instances For

source

theorem MDPs.hist_len_zero {M : MDP} :

M.numhist 0 = M.S

source

def MDPs.MDP.idx_to_hist (M : MDP) (t : ℕ) (i : Fin (M.numhist t)) :

M.HistT t

Construct i-th history of length t

Equations

M.idx_to_hist Nat.zero i_2 = ⟨MDPs.Hist.init ⟨↑i_2, ⋯⟩, ⋯⟩
M.idx_to_hist t'.succ i_2 = ⟨(↑(M.idx_to_hist t' ⟨(↑i_2 - ↑i_2 % M.SA) / M.SA, ⋯⟩)).foll ⟨↑i_2 % M.SA / M.S % M.A, ⋯⟩ ⟨↑i_2 % M.SA % M.S, ⋯⟩, ⋯⟩

Instances For

source

theorem MDPs.Nat.sum_one_prod_cancel (n : ℕ) {m : ℕ} (h : 0 < m) :

(m - 1) * n + n = m * n

source

def MDPs.MDP.hist_to_idx (M : MDP) (h : Hist M) :

Fin (M.numhist h.length)

Compute the index of a history

Equations

M.hist_to_idx (MDPs.Hist.init a) = ⟨↑a, ⋯⟩
M.hist_to_idx (h_1.foll a a_1) = ⟨M.SA * ↑(M.hist_to_idx h_1) + (↑a * M.S + ↑a_1), ⋯⟩

Instances For

source

def MDPs.MDP.hist_to_idx' (M : MDP) (h : Hist M) :

ℕ × ℕ

Equations

M.hist_to_idx' h = (h.length, ↑(M.hist_to_idx h))

Instances For

source

def MDPs.MDP.idx_to_hist' (M : MDP) (ti : ℕ × ℕ) :

Hist M

Equations

M.idx_to_hist' ti = if h : ti.2 < M.numhist ti.1 then ↑(M.idx_to_hist ti.1 ⟨ti.2, h⟩) else MDPs.Hist.init ⟨0, ⋯⟩

Instances For

source

def MDPs.MDP.hist_idx_valid (M : MDP) :

Set (ℕ × ℕ)

Equations

M.hist_idx_valid = {ti : ℕ × ℕ | ti.2 < M.numhist ti.1}

Instances For

source

theorem MDPs.hist_idx_LeftInverse (M : MDP) :

Function.LeftInverse M.idx_to_hist' M.hist_to_idx'

source

theorem MDPs.hist_idx_RightInverse (M : MDP) :

Set.RightInvOn M.idx_to_hist' M.hist_to_idx' M.hist_idx_valid

source

def MDPs.Hist.prefix {M : MDP} (k : ℕ) (h : Hist M) :

Hist M

Return the prefix of hist of length k

Equations

MDPs.Hist.prefix k (MDPs.Hist.init a) = MDPs.Hist.init a
MDPs.Hist.prefix k (h_1.foll a a_1) = if h_1.length + 1 ≤ k then h_1.foll a a_1 else MDPs.Hist.prefix k h_1

Instances For

source

def MDPs.MDP.tuple2hist {M : MDP} :

Hist M × Fin M.A × Fin M.S → HistNE M

Equations

MDPs.MDP.tuple2hist (h, as) = ⟨h.foll as.1 as.2, ⋯⟩

Instances For

source

def MDPs.MDP.hist2tuple {M : MDP} :

HistNE M → Hist M × Fin M.A × Fin M.S

Equations

MDPs.MDP.hist2tuple ⟨h.foll a s, property⟩ = (h, a, s)

Instances For

source

theorem MDPs.linv_hist2tuple_tuple2hist {M : MDP} :

Function.LeftInverse MDP.hist2tuple MDP.tuple2hist

source

theorem MDPs.inj_tuple2hist_l1 {M : MDP} :

Function.Injective MDP.tuple2hist

source

theorem MDPs.inj_tuple2hist {M : MDP} :

Function.Injective (Subtype.val ∘ MDP.tuple2hist)

source

def MDPs.emb_tuple2hist_l1 {M : MDP} :

Hist M × Fin M.A × Fin M.S ↪ HistNE M

Equations

MDPs.emb_tuple2hist_l1 = { toFun := MDPs.MDP.tuple2hist, inj' := ⋯ }

Instances For

source

def MDPs.emb_tuple2hist {M : MDP} :

Hist M × Fin M.A × Fin M.S ↪ Hist M

Equations

MDPs.emb_tuple2hist = { toFun := fun (x : MDPs.Hist M × Fin M.A × Fin M.S) => ↑(MDPs.MDP.tuple2hist x), inj' := ⋯ }

Instances For

source

def MDPs.state2hist {M : MDP} (s : Fin M.S) :

Hist M

Equations

MDPs.state2hist s = MDPs.Hist.init s

Instances For

source

def MDPs.hist2state {M : MDP} :

Hist M → Fin M.S

Equations

MDPs.hist2state (MDPs.Hist.init a) = a
MDPs.hist2state (h.foll a a_1) = a_1

Instances For

source

theorem MDPs.linv_hist2state_state2hist {M : MDP} :

Function.LeftInverse hist2state state2hist

source

theorem MDPs.inj_state2hist {M : MDP} :

Function.Injective state2hist

source

def MDPs.state2hist_emb {M : MDP} :

Fin M.S ↪ Hist M

Equations

MDPs.state2hist_emb = { toFun := MDPs.state2hist, inj' := ⋯ }

Instances For

source

def MDPs.isprefix {M : MDP} :

Hist M → Hist M → Bool

Checks if the first hist is the prefix of the second hist.

Equations

One or more equations did not get rendered due to their size.
MDPs.isprefix (MDPs.Hist.init s₁) (MDPs.Hist.init s₂) = decide (s₁ = s₂)
MDPs.isprefix (MDPs.Hist.init s₁) (hp.foll a a_1) = MDPs.isprefix (MDPs.Hist.init s₁) hp
MDPs.isprefix (a.foll a_1 a_2) (MDPs.Hist.init a_3) = decide False

Instances For

source

def MDPs.Histories {M : MDP} (h : Hist M) :

ℕ → Finset (Hist M)

All histories that follow h for t decisions

Equations

MDPs.Histories h Nat.zero = {h}
MDPs.Histories h t.succ = Finset.map MDPs.emb_tuple2hist (MDPs.Histories h t ×ˢ M.setA ×ˢ M.setS)

Instances For

source

@[reducible, inline]

abbrev MDPs.ℋ {M : MDP} :

Hist M → ℕ → Finset (Hist M)

Equations

MDPs.ℋ = MDPs.Histories

Instances For

source

theorem MDPs.hist_lenth_eq_horizon {M : MDP} (h : Hist M) (t : ℕ) (h' : Hist M) :

h' ∈ ℋ h t → h'.length = h.length + t

source

def MDPs.HistoriesHorizon {M : MDP} :

ℕ → Finset (Hist M)

All histories of a given length

Equations

Instances For

source

@[reducible, inline]

abbrev MDPs.ℋₜ {M : MDP} :

ℕ → Finset (Hist M)

Equations

MDPs.ℋₜ = MDPs.HistoriesHorizon

Instances For