Probability.MDP.Histories

source

structure MDPs.MDP :

Type

Markov decision process

S : ℕ
states
S_ne : 0 < self.S
A : ℕ
actions
A_ne : 0 < self.A
P : Fin self.S → Fin self.A → Findist.Δ self.S
transition probability s, a, s'
r : Fin self.S → Fin self.A → Fin self.S → ℝ
reward function s, a, s'

Instances For

source

def MDPs.MDP.maxS (M : MDP) :

Fin M.S

Equations

M.maxS = ⟨M.S - 1, ⋯⟩

Instances For

source

def MDPs.MDP.maxA (M : MDP) :

Fin M.A

Equations

M.maxA = ⟨M.A - 1, ⋯⟩

Instances For

source

@[reducible, inline]

abbrev MDPs.MDP.St (M : MDP) :

Type

Equations

M.St = Fin M.S

Instances For

source

@[reducible, inline]

abbrev MDPs.MDP.At (M : MDP) :

Type

Equations

M.At = Fin M.A

Instances For

source

def MDPs.MDP.setS (M : MDP) :

Finset M.St

Set of all states

Equations

M.setS = Fintype.elems

Instances For

source

def MDPs.MDP.setA (M : MDP) :

Finset M.At

Set of all actions

Equations

M.setA = Fintype.elems

Instances For

source

theorem MDPs.MDP.inS (M : MDP) (s : M.St) :

s ∈ M.setS

source

theorem MDPs.MDP.inA (M : MDP) (a : M.At) :

a ∈ M.setA

source

def MDPs.MDP.SA (M : MDP) :

ℕ

Equations

M.SA = M.S * M.A

Instances For

source

theorem MDPs.MDP.SA_ne (M : MDP) :

0 < M.SA

source

inductive MDPs.Hist (M : MDP) :

Type

Represents a history. The state is type ℕ and action is type ℕ.

init {M : MDP} : Fin M.S → Hist M
foll {M : MDP} : Hist M → Fin M.A → Fin M.S → Hist M

Instances For

source

instance MDPs.instCoeFinSHist {M : MDP} :

Coe (Fin M.S) (Hist M)

Equations

MDPs.instCoeFinSHist = { coe := fun (s : Fin M.S) => MDPs.Hist.init s }

source

def MDPs.Hist.length {M : MDP} :

Hist M → ℕ

History's length = the number of actions taken

Equations

(MDPs.Hist.init a).length = 0
(h.foll a a_1).length = 1 + h.length

Instances For

source

def MDPs.MDP.HistT (M : MDP) (t : ℕ) :

Type

Equations

M.HistT t = { h : MDPs.Hist M // h.length = t }

Instances For

source

@[reducible, inline]

abbrev MDPs.HistNE (M : MDP) :

Type

Nonempty histories

Equations

MDPs.HistNE M = { m : MDPs.Hist M // m.length ≥ 1 }

Instances For

source

def MDPs.Hist.last {M : MDP} :

Hist M → Fin M.S

Returns the last state of the history

Equations

(MDPs.Hist.init a).last = a
(h.foll a a_1).last = a_1

Instances For

source

def MDPs.MDP.numhist (M : MDP) (t : ℕ) :

ℕ

Number of histories of length t.

Equations

M.numhist t = M.S * M.SA ^ t

Instances For

source

theorem MDPs.hist_len_zero {M : MDP} :

M.numhist 0 = M.S

source

def MDPs.MDP.idx_to_hist (M : MDP) (t : ℕ) (i : Fin (M.numhist t)) :

M.HistT t

Construct i-th history of length t

Equations

M.idx_to_hist Nat.zero i_2 = ⟨MDPs.Hist.init ⟨↑i_2, ⋯⟩, ⋯⟩
M.idx_to_hist t'.succ i_2 = ⟨(↑(M.idx_to_hist t' ⟨(↑i_2 - ↑i_2 % M.SA) / M.SA, ⋯⟩)).foll ⟨↑i_2 % M.SA / M.S % M.A, ⋯⟩ ⟨↑i_2 % M.SA % M.S, ⋯⟩, ⋯⟩

Instances For

source

theorem MDPs.Nat.sum_one_prod_cancel (n : ℕ) {m : ℕ} (h : 0 < m) :

(m - 1) * n + n = m * n

source

def MDPs.MDP.hist_to_idx (M : MDP) (h : Hist M) :

Fin (M.numhist h.length)

Compute the index of a history

Equations

M.hist_to_idx (MDPs.Hist.init a) = ⟨↑a, ⋯⟩
M.hist_to_idx (h_1.foll a a_1) = ⟨M.SA * ↑(M.hist_to_idx h_1) + (↑a * M.S + ↑a_1), ⋯⟩

Instances For

source

def MDPs.MDP.hist_to_idx' (M : MDP) (t : ℕ) (h : M.HistT t) :

Fin (M.numhist t)

A more convenient definition for constructing inverses

Equations

M.hist_to_idx' t h = ⋯ ▸ M.hist_to_idx ↑h

Instances For

source

def MDPs.MDP.idx_to_hist' (M : MDP) (t : ℕ) (i : Fin (M.numhist t)) :

M.HistT t

A more convenient definition for constructing inverses

Equations

M.idx_to_hist' t i = M.idx_to_hist t i

Instances For

source

def MDPs.MDP.hist_idx_valid (M : MDP) :

Set (ℕ × ℕ)

Equations

M.hist_idx_valid = {ti : ℕ × ℕ | ti.2 < M.numhist ti.1}

Instances For

source

theorem MDPs.state_of_hist_len0 (M : MDP) (h : M.HistT 0) :

∃ (s : Fin M.S), ↑h = Hist.init s

source

theorem MDPs.state_of_hist_len_t (M : MDP) (t : ℕ) (h : M.HistT t.succ) :

∃ (h' : Hist M) (a : Fin M.A) (s : Fin M.S), ↑h = h'.foll a s

source

theorem MDPs.hist_idx_LeftInverse (t : ℕ) (M : MDP) :

Function.LeftInverse (M.idx_to_hist' t) (M.hist_to_idx' t)

source

theorem MDPs.hist_idx_RightInverse (M : MDP) (t : ℕ) :

Function.RightInverse (M.idx_to_hist' t) (M.hist_to_idx' t)

source

def MDPs.Hist.prefix {M : MDP} (k : ℕ) (h : Hist M) :

Hist M

Return the prefix of hist of length k

Equations

MDPs.Hist.prefix k (MDPs.Hist.init a) = MDPs.Hist.init a
MDPs.Hist.prefix k (h_1.foll a a_1) = if h_1.length + 1 ≤ k then h_1.foll a a_1 else MDPs.Hist.prefix k h_1

Instances For

source

def MDPs.MDP.tuple2hist {M : MDP} :

Hist M × Fin M.A × Fin M.S → HistNE M

Equations

MDPs.MDP.tuple2hist (h, as) = ⟨h.foll as.1 as.2, ⋯⟩

Instances For

source

def MDPs.MDP.hist2tuple {M : MDP} :

HistNE M → Hist M × Fin M.A × Fin M.S

Equations

MDPs.MDP.hist2tuple ⟨h.foll a s, property⟩ = (h, a, s)

Instances For

source

theorem MDPs.linv_hist2tuple_tuple2hist {M : MDP} :

Function.LeftInverse MDP.hist2tuple MDP.tuple2hist

source

theorem MDPs.inj_tuple2hist_l1 {M : MDP} :

Function.Injective MDP.tuple2hist

source

theorem MDPs.inj_tuple2hist {M : MDP} :

Function.Injective (Subtype.val ∘ MDP.tuple2hist)

source

def MDPs.emb_tuple2hist_l1 {M : MDP} :

Hist M × Fin M.A × Fin M.S ↪ HistNE M

Equations

MDPs.emb_tuple2hist_l1 = { toFun := MDPs.MDP.tuple2hist, inj' := ⋯ }

Instances For

source

def MDPs.emb_tuple2hist {M : MDP} :

Hist M × Fin M.A × Fin M.S ↪ Hist M

Equations

MDPs.emb_tuple2hist = { toFun := fun (x : MDPs.Hist M × Fin M.A × Fin M.S) => ↑(MDPs.MDP.tuple2hist x), inj' := ⋯ }

Instances For

source

def MDPs.MDP.state2hist (M : MDP) (s : Fin M.S) :

Hist M

Equations

M.state2hist s = MDPs.Hist.init s

Instances For

source

def MDPs.MDP.hist2state (M : MDP) :

Hist M → Fin M.S

Equations

M.hist2state (MDPs.Hist.init a) = a
M.hist2state (h.foll a a_1) = a_1

Instances For

source

theorem MDPs.linv_hist2state_state2hist {M : MDP} :

Function.LeftInverse M.hist2state M.state2hist

source

theorem MDPs.inj_state2hist {M : MDP} :

Function.Injective M.state2hist

source

def MDPs.state2hist_emb {M : MDP} :

Fin M.S ↪ Hist M

Equations

MDPs.state2hist_emb = { toFun := M.state2hist, inj' := ⋯ }

Instances For

source

def MDPs.isprefix {M : MDP} :

Hist M → Hist M → Bool

Checks if the first hist is the prefix of the second hist.

Equations

One or more equations did not get rendered due to their size.
MDPs.isprefix (MDPs.Hist.init s₁) (MDPs.Hist.init s₂) = decide (s₁ = s₂)
MDPs.isprefix (MDPs.Hist.init s₁) (hp.foll a a_1) = MDPs.isprefix (MDPs.Hist.init s₁) hp
MDPs.isprefix (a.foll a_1 a_2) (MDPs.Hist.init a_3) = decide False

Instances For

source

def MDPs.Histories {M : MDP} (h : Hist M) :

ℕ → Finset (Hist M)

All histories that follow h for t decisions

Equations

MDPs.Histories h Nat.zero = {h}
MDPs.Histories h t.succ = Finset.map MDPs.emb_tuple2hist (MDPs.Histories h t ×ˢ M.setA ×ˢ M.setS)

Instances For

source

@[reducible, inline]

abbrev MDPs.ℋ {M : MDP} :

Hist M → ℕ → Finset (Hist M)

Equations

MDPs.ℋ = MDPs.Histories

Instances For

source

theorem MDPs.hist_lenth_eq_horizon {M : MDP} (h : Hist M) (t : ℕ) (h' : Hist M) :

h' ∈ ℋ h t → h'.length = h.length + t

source

@[simp]

theorem MDPs.hist_foll_nonempty {M : MDP} (h : Hist M) (a : M.At) (s : M.St) :

(h.foll a s).length > 0

source

theorem MDPs.hist_foll_len {M : MDP} (h : Hist M) (a : M.At) (s : M.St) :

(h.foll a s).length = h.length + 1

source

def MDPs.MDP.HistoriesHorizon (M : MDP) (t : ℕ) :

Finset (Hist M)

All histories of a given length

Equations

M.HistoriesHorizon Nat.zero = Finset.map MDPs.state2hist_emb M.setS
M.HistoriesHorizon t_1.succ = Finset.map MDPs.emb_tuple2hist (M.HistoriesHorizon t_1 ×ˢ M.setA ×ˢ M.setS)

Instances For

source

theorem MDPs.hist_horiz_complete {M : MDP} (t : ℕ) (h : M.HistT t) :

↑h ∈ M.HistoriesHorizon t

source

theorem MDPs.hist_horiz_exact {M : MDP} (t : ℕ) (h : Hist M) (hh : h ∈ M.HistoriesHorizon t) :

h.length = t

Shows that there are no extra histories in the finset

source

def MDPs.MDP.HistoriesHorizonT (M : MDP) (t : ℕ) :

Finset (M.HistT t)

Equations

M.HistoriesHorizonT t = Finset.map { toFun := fun (hh : { h : MDPs.Hist M // h ∈ M.HistoriesHorizon t }) => ⟨↑hh, ⋯⟩, inj' := ⋯ } (M.HistoriesHorizon t).attach

Instances For

source

theorem MDPs.hist_horiz_complete_t {M : MDP} (t : ℕ) (h : M.HistT t) :

h ∈ M.HistoriesHorizonT t

source

instance MDPs.instFintypeHistT (M : MDP) (t : ℕ) :

Fintype (M.HistT t)

Equations

MDPs.instFintypeHistT M t = { elems := M.HistoriesHorizonT t, complete := ⋯ }

source

@[reducible, inline]

abbrev MDPs.ℋₜ {M : MDP} :

ℕ → Finset (Hist M)

Equations

MDPs.ℋₜ = M.HistoriesHorizon

Instances For