Learned Scheduling of LDPC Decoders Based
- n Multi-armed Bandits
Salman Habib, Allison Beemer, and J¨
- rg Kliewer
Learned Scheduling of LDPC Decoders Based on Multi-armed Bandits - - PowerPoint PPT Presentation
Learned Scheduling of LDPC Decoders Based on Multi-armed Bandits Salman Habib, Allison Beemer, and J org Kliewer The Center for Wireless Information Processing, New Jersey Institute of Technology June 2020 IEEE International Symposium on
1 / 16
1 / 16
[Jeremy Zhang: Reinforcement Learning — Multi-Arm Bandit Implementation]
2 / 16
[Jeremy Zhang: Reinforcement Learning — Multi-Arm Bandit Implementation]
2 / 16
3 / 16
3 / 16
CNs VNs iteration 1 iteration 2
4 / 16
CNs VNs iteration 1 iteration 2
4 / 16
CNs VNs iteration 1 iteration 2
4 / 16
CNs VNs iteration 1 iteration 2
4 / 16
5 / 16
5 / 16
5 / 16
CNs VNs
6 / 16
CNs VNs
6 / 16
CNs VNs
6 / 16
CNs VNs
6 / 16
7 / 16
7 / 16
7 / 16
Input : L, H Output: reconstructed codeword
1 Initialization: 2
ℓ ← 0
3
mc→v ← 0
// for all CN to VN messages
4
mvi→c ← Li
// for all VN to CN messages
5
ˆ Lℓ ← L
6
ˆ Sℓ ← Hˆ Lℓ
7 foreach a ∈ [[m]] do 8
s(a)
ℓ
← gM(ˆ s(a)
ℓ )
// M-level quantization
9 end
// decoding starts
10 if stopping condition not satisfied or ℓ < ℓmax then 11
s ← index of Sℓ
12
update CN a according to an optimum scheduling policy
13
foreach vk ∈ N(a) do
14
compute and propagate ma→vk
15
foreach cj ∈ N(vk) \ a do
16
compute and propagate mvk→cj
17
end
18
ˆ L(k)
ℓ
←
c∈N (vk) mc→vk + Lk
// update LLR of vk
19
end
20
foreach CN j that is a neighbor of vk ∈ N(a) do
21
ˆ s(j)
ℓ
←
vi∈N (j) ˆ
L(i)
ℓ 22
s(j)
ℓ
← gM(ˆ s(j)
ℓ )
// update syndrome Sℓ
23
end
24
ℓ ← ℓ + 1
// update iteration
25 end
8 / 16
Input : L, H Output: reconstructed codeword
1 Initialization: 2
ℓ ← 0
3
mc→v ← 0
// for all CN to VN messages
4
mvi→c ← Li
// for all VN to CN messages
5
ˆ Lℓ ← L
6
ˆ Sℓ ← Hˆ Lℓ
7 foreach a ∈ [[m]] do 8
s(a)
ℓ
← gM(ˆ s(a)
ℓ )
// M-level quantization
9 end
// decoding starts
10 if stopping condition not satisfied or ℓ < ℓmax then 11
s ← index of Sℓ
12
update CN a according to an optimum scheduling policy
13
foreach vk ∈ N(a) do
14
compute and propagate ma→vk
15
foreach cj ∈ N(vk) \ a do
16
compute and propagate mvk→cj
17
end
18
ˆ L(k)
ℓ
←
c∈N (vk) mc→vk + Lk
// update LLR of vk
19
end
20
foreach CN j that is a neighbor of vk ∈ N(a) do
21
ˆ s(j)
ℓ
←
vi∈N (j) ˆ
L(i)
ℓ 22
s(j)
ℓ
← gM(ˆ s(j)
ℓ )
// update syndrome Sℓ
23
end
24
ℓ ← ℓ + 1
// update iteration
25 end
8 / 16
Input : L, H Output: reconstructed codeword
1 Initialization: 2
ℓ ← 0
3
mc→v ← 0
// for all CN to VN messages
4
mvi→c ← Li
// for all VN to CN messages
5
ˆ Lℓ ← L
6
ˆ Sℓ ← Hˆ Lℓ
7 foreach a ∈ [[m]] do 8
s(a)
ℓ
← gM(ˆ s(a)
ℓ )
// M-level quantization
9 end
// decoding starts
10 if stopping condition not satisfied or ℓ < ℓmax then 11
s ← index of Sℓ
12
update CN a according to an optimum scheduling policy
13
foreach vk ∈ N(a) do
14
compute and propagate ma→vk
15
foreach cj ∈ N(vk) \ a do
16
compute and propagate mvk→cj
17
end
18
ˆ L(k)
ℓ
←
c∈N (vk) mc→vk + Lk
// update LLR of vk
19
end
20
foreach CN j that is a neighbor of vk ∈ N(a) do
21
ˆ s(j)
ℓ
←
vi∈N (j) ˆ
L(i)
ℓ 22
s(j)
ℓ
← gM(ˆ s(j)
ℓ )
// update syndrome Sℓ
23
end
24
ℓ ← ℓ + 1
// update iteration
25 end
8 / 16
9 / 16
10 / 16
10 / 16
10 / 16
CNs VNs
clusters
11 / 16
CNs VNs
clusters
11 / 16
CNs VNs
clusters
11 / 16
CNs VNs
clusters
z ⌉−1} Qℓ(su, au) 11 / 16
CNs VNs
clusters
z ⌉−1} Qℓ(su, au)
11 / 16
Input : L , H Output: Estimated Qℓmax(su, au) for all u
1 Initialization: Q0(su, au) ← 0 for all su, au and u 2 for each L ∈ L do 3
ℓ ← 0
4
ˆ Lℓ ← L
5
ˆ Sℓ ← Hˆ Lℓ
6
foreach a ∈ [[m]] do
7
s(a)
ℓ
← gM(ˆ s(a)
ℓ )
// M-level quantization
8
end
9
while ℓ < ℓmax do
10
schedule CN au according to ǫ-greedy approach
11
S(u,z)
ℓ
← s(uz)
ℓ
, . . . , s(uz+z−1)
ℓ 12
su ← index of S(u,z)
ℓ 13
foreach vi ∈ N(au) do
14
compute and propagate mau→vi
15
foreach cj ∈ N(vi) \ au do
16
compute and propagate mvi→cj
17
end
18
ˆ L(i)
ℓ ← c∈N (vi) mc→vi + Li
// update LLR
19
end
20
foreach CN j that is a neighbor of vk ∈ N(au) do
21
ˆ s(j)
ℓ
←
vi∈N (j) ˆ
L(i)
ℓ 22
s(j)
ℓ
← gM(ˆ s(j)
ℓ )
// update syndrome Sℓ
23
end
24
s′
u ← index of updated S(u,z) ℓ 25
Rℓ(su, au, s′
u) ← highest residual of CN au 26
compute Qℓ+1(su, au)
27
ℓ ← ℓ + 1
// update iteration
28
end
29 end
12 / 16
Input : L , H Output: Estimated Qℓmax(su, au) for all u
1 Initialization: Q0(su, au) ← 0 for all su, au and u 2 for each L ∈ L do 3
ℓ ← 0
4
ˆ Lℓ ← L
5
ˆ Sℓ ← Hˆ Lℓ
6
foreach a ∈ [[m]] do
7
s(a)
ℓ
← gM(ˆ s(a)
ℓ )
// M-level quantization
8
end
9
while ℓ < ℓmax do
10
schedule CN au according to ǫ-greedy approach
11
S(u,z)
ℓ
← s(uz)
ℓ
, . . . , s(uz+z−1)
ℓ 12
su ← index of S(u,z)
ℓ 13
foreach vi ∈ N(au) do
14
compute and propagate mau→vi
15
foreach cj ∈ N(vi) \ au do
16
compute and propagate mvi→cj
17
end
18
ˆ L(i)
ℓ ← c∈N (vi) mc→vi + Li
// update LLR
19
end
20
foreach CN j that is a neighbor of vk ∈ N(au) do
21
ˆ s(j)
ℓ
←
vi∈N (j) ˆ
L(i)
ℓ 22
s(j)
ℓ
← gM(ˆ s(j)
ℓ )
// update syndrome Sℓ
23
end
24
s′
u ← index of updated S(u,z) ℓ 25
Rℓ(su, au, s′
u) ← highest residual of CN au 26
compute Qℓ+1(su, au)
27
ℓ ← ℓ + 1
// update iteration
28
end
29 end
12 / 16
// decoding starts
10 if stopping condition not satisfied or ℓ < ℓmax then 11
s ← index of Sℓ
12
update CN a according to an optimum scheduling policy
13
foreach vk ∈ N(a) do
14
compute and propagate ma→vk
15
foreach cj ∈ N(vk) \ a do
16
compute and propagate mvk→cj
17
end
18
ˆ L(k)
ℓ
←
c∈N (vk) mc→vk + Lk
// update LLR of vk
19
end
20
foreach CN j that is a neighbor of vk ∈ N(a) do
21
ˆ s(j)
ℓ
←
vi∈N (j) ˆ
L(i)
ℓ 22
s(j)
ℓ
← gM(ˆ s(j)
ℓ )
// update syndrome Sℓ
23
end
24
ℓ ← ℓ + 1
// update iteration
25 end
13 / 16
13 / 16
13 / 16
13 / 16
0.5 1 1.5 2
10 -2 10 -1
14 / 16
0.5 1 1.5 2
10 -2 10 -1
14 / 16
15 / 16
15 / 16
15 / 16
16 / 16
16 / 16
16 / 16
16 / 16