国内最全IT社区平台 联系我们 | 收藏本站
华晨云阿里云优惠2
您当前位置:首页 > php开源 > php教程 > 让你真正理解HMM(Hidden Markov Model)的算法演示程序

让你真正理解HMM(Hidden Markov Model)的算法演示程序

来源:程序员人生   发布时间:2014-12-17 07:57:56 阅读次数:3025次

HMM, 隐Markov模型, 在人脸, 步态, 语音辨认等领域有着广泛的用处.

通过以Javascript语言演示其使用方法, 读者可方便地理解其计算进程(其实,其实不难).

理论就不讲授了,直接看计算进程:

?<html> <head> <meta charset="UTF⑻"/> <meta author="alaclp@qq.com"/> <meta published="2014⑴1⑵8"/> <meta licence="public"/> <meta about="Hidden markov model"/> <title>让你真正理解隐Markov模型的计算示例</title> </head> <body> <h1>HMM Demo</h1> <hr/> <div id="info"> <h2>[0] HMM模型参数</h2> <b>状态S: {H, L}</b><br/> <b>初始状态I: {0.5, 0.5}</b><br/> <b>状态转移矩阵A:</b><br/> <table border="1px"> <tr> <td>a_ij</td><td>H</td><td>L</td> </tr> <tr> <td>H</td><td>0.5</td><td>0.5</td> </tr> <tr> <td>L</td><td>0.4</td><td>0.6</td> </tr> </table> <b>混淆矩阵B:</b><br/> <table border="1px"> <tr> <td>b_ik</td><td>A</td><td>C</td><td>T</td><td>G</td> </tr> <tr> <td>H</td><td>0.2</td><td>0.3</td><td>0.3</td><td>0.2</td> </tr> <tr> <td>L</td><td>0.3</td><td>0.2</td><td>0.2</td><td>0.3</td> </tr> </table> <br/> <h2>[1] 评估问题:使用上述模型,采取<font color='red'>Forward算法</font>计算产生GGCA观测结果的几率</h2> <h3>结果矩阵:</h3> <table border="1px"> <tr> <td>几率</td><td>初始</td> <td><div id="O1">G</div></td> <td><div id="O2">G</div></td> <td><div id="O3">C</div></td> <td><div id="O4">A</div></td> </tr> <tr> <td><div id="S0">H</div></td> <td>0.5</td> <td><div id="11"></div></td> <td><div id="12"></div></td> <td><div id="13"></div></td> <td><div id="14"></div></td> </tr> <tr> <td><div id="S1">L</div></td> <td>0.5</td> <td><div id="21"></div></td> <td><div id="22"></div></td> <td><div id="23"></div></td> <td><div id="24"></div></td> </tr> </table> <div id="prob"> </div> <br/> <div id="output"> <b>计算进程:</b><br/> </div> <hr/> <h2>[2] 解码问题: 给定观测序列GGCACTGAA,问产生该序列几率最大的状态路径是甚么?</h2> <b>把HMM模型换算为以2为底的对数值,则有</b><br/> <b>状态S: {H, L}</b><br/> <b>初始状态I: {⑴, ⑴}</b><br/> <b>状态转移矩阵A:</b><br/> <table border="1px"> <tr> <td>a_ij</td><td>H</td><td>L</td> </tr> <tr> <td>H</td><td>⑴</td><td>⑴</td> </tr> <tr> <td>L</td><td>⑴.322</td><td>-0.737</td> </tr> </table> <b>混淆矩阵B:</b><br/> <table border="1px"> <tr> <td>b_ik</td><td>A</td><td>C</td><td>T</td><td>G</td> </tr> <tr> <td>H</td><td>⑵.322</td><td>⑴.737</td><td>⑴.737</td><td>⑵.322</td> </tr> <tr> <td>L</td><td>⑴.737</td><td>⑵.322</td><td>⑵.322</td><td>⑴.737</td> </tr> </table> <h3>结果矩阵:</h3> <table border="1px"> <tr> <td>几率</td><td>初始</td> <td><div id="D1">G</div></td> <td><div id="D2">G</div></td> <td><div id="D3">C</div></td> <td><div id="D4">A</div></td> <td><div id="D5">C</div></td> <td><div id="D6">T</div></td> <td><div id="D7">G</div></td> <td><div id="D8">A</div></td> <td><div id="D9">A</div></td> </tr> <tr> <td><div id="S0">H</div></td> <td>⑴.0</td> <td><div id="V11"></div></td> <td><div id="V12"></div></td> <td><div id="V13"></div></td> <td><div id="V14"></div></td> <td><div id="V15"></div></td> <td><div id="V16"></div></td> <td><div id="V17"></div></td> <td><div id="V18"></div></td> <td><div id="V19"></div></td> </tr> <tr> <td><div id="S1">L</div></td> <td>⑴.0</td> <td><div id="V21"></div></td> <td><div id="V22"></div></td> <td><div id="V23"></div></td> <td><div id="V24"></div></td> <td><div id="V25"></div></td> <td><div id="V26"></div></td> <td><div id="V27"></div></td> <td><div id="V28"></div></td> <td><div id="V29"></div></td> </tr> </table> <div id="result1"></div> 计算进程:<br/> <div id="output1"><div> <script type="text/javascript"> //状态集合 var S = ['H', 'L']; //观测集合 var O = ['A', 'C', 'T', 'G']; //初始状态产生H和L的几率矩阵 var imat = [0.5, 0.5]; //不同状态间转换的几率矩阵---状态转换矩阵 var amat = {'HH': 0.5, 'HL':0.5, 'LH':0.4, 'LL': 0.6}; //由状态产生特定观测的几率矩阵---混淆矩阵 var bmat = {'HA': 0.2, 'HC': 0.3, 'HG': 0.3, 'HT': 0.2, 'LA': 0.3, 'LC': 0.2, 'LG': 0.2, 'LT': 0.3}; //测试用的观测结果 var dest = 'GGCA'; //设置几率计算矩阵 var pmat = new Array(S.length); for(var i = 0; i < S.length; i++) { pmat[i] = new Array(); for(var j = 0; j < dest.length; j++) pmat[i].push(0); } var out = document.getElementById("output"); var tmp; //计算评估问题算法---前向算法 //分为两个部份计算 //计算结果矩阵pmat第1列---由初始状态矩阵及混淆矩阵所决定 for(var i = 0; i < S.length; i++) { pmat[i][0] = imat[i] * bmat[S[i] + dest[0]]; document.getElementById(String(i + 1) + "1").innerHTML = pmat[i][0]; } //计算后续列的几率---由前1状态几率 * 状态间转换几率amat * 状态到观测几率矩阵bmat所决定 for(var i = 1; i < dest.length; i++) { for(var rowA = 0; rowA < S.length; rowA++) { //输出 out.innerHTML += "<b>" + String(rowA + 1) + "行" + String(i + 1) + "列</b><br/>"; for(var rowB = 0; rowB < S.length; rowB++) { if (rowA == rowB) { tmp = pmat[rowA][i⑴] * amat[S[rowA] + S[rowA]] * bmat[S[rowA] + dest[i]]; pmat[rowA][i] += tmp; //输出 out.innerHTML += S[rowA] + S[rowB] + dest[i] + ": " + String(pmat[rowA][i⑴]) + "*" + String(amat[S[rowA] + S[rowA]]) + "*" + String(bmat[S[rowA] + dest[i]]) + "=" + String(tmp) + "<br/>"; } else { tmp = pmat[rowB][i⑴] * amat[S[rowB] + S[rowA]] * bmat[S[rowA] + dest[i]]; pmat[rowA][i] += tmp; //输出 out.innerHTML += S[rowB] + S[rowA] + dest[i] + ": " + String(pmat[rowB][i⑴]) + "*" + String(amat[S[rowB] + S[rowA]]) + "*" + String(bmat[S[rowB] + dest[i]]) + "=" + String(tmp) + "<br/>"; } } //输出 out.innerHTML += "和为: <b>" + String(pmat[rowA][i]) + "</b><br/>"; document.getElementById(String(rowA + 1) + String(i + 1)).innerHTML = pmat[rowA][i]; } } //输出状态S产生观测序列的几率 var sm = 0; for(var i = 0; i < S.length; i++) sm += pmat[i][dest.length⑴]; document.getElementById("prob").innerHTML = "评估结果: HMM(S(H, L), O(A, T, C, G), imat, amat, bmat)产生观测结果" + dest + "的几率为: <font color='red'>" + String(sm) + "</font><br/>"; //------------------------ //获得最好路径问题---解码问题 //测试用的观测结果 var dest = 'GGCACTGAA'; //把数值矩阵转换为对数 for(var i = 0; i < S.length; i++) imat[i] = Math.log(imat[i]) / Math.LN2; for(var i = 0; i < S.length; i++) for(var j = 0; j < S.length; j++) { amat[S[i] + S[j]] = Math.log(amat[S[i] + S[j]]) / Math.LN2; } for(var i = 0; i < S.length; i++) for(var j = 0; j < O.length; j++) { bmat[S[i] + O[j]] = Math.log(bmat[S[i] + O[j]]) / Math.LN2; console.log( S[i] + "->" + O[j] + "=" + bmat[S[i] + O[j]] ); } //初始化几率计算矩阵pmat var pmat = new Array(S.length); for(var i = 0; i < S.length; i++) { pmat[i] = new Array(); for(var j = 0; j < dest.length; j++) pmat[i].push(0); } var out = document.getElementById("output1"); //计算解码问题算法---Viterbi算法 //分为两个部份计算 //计算结果矩阵pmat第1列---由初始状态矩阵及混淆矩阵所决定 var link = new Array(); var maxval = ⑴e15, maxid = ⑴; for(var i = 0; i < S.length; i++) { pmat[i][0] = imat[i] + bmat[S[i] + dest[0]]; if (pmat[i][0] > maxval) { maxval = pmat[i][0]; maxid = i; } document.getElementById("V" + String(i + 1) + "1").innerHTML = pmat[i][0]; } //存储第1个最大几率点 link.push(S[maxid]); //计算后续列的几率---由前1状态最大几率 * 前1状态到其他状态的转换几率amat * 当前状态对观测值的产生几率 //记录当前可能产生观测结果的最大几率 for(var i = 1; i < dest.length; i++) { var thisO = dest[i]; //计算由上次状态lastS动身,产生状态转换到S[rowA]产生观测值dest[i]的最大几率 for(var rowA = 0; rowA < S.length; rowA++) { var thisS = S[rowA]; var maxval = ⑴e15; //由thisS产生thisO的几率 var pp = bmat[thisS + thisO]; for(var rowB = 0; rowB < S.length; rowB++) { var lastS = S[rowB]; //由lastS到thisS的转移几率 var tp = amat[lastS + thisS]; //上次的历史几率 var lp = pmat[rowB][i⑴]; //总几率 var totalP = pp + tp + lp; if (totalP > maxval) { maxval = totalP; document.getElementById("V" + String(rowA + 1) + String(i + 1)).innerHTML = String(totalP); } out.innerHTML += "O" + String(i + 1) + ": " + lastS + "->" + thisS + "= " + String(pp) + "(" + thisS + "->" + thisO + ") +" + String(tp) + "(T: " + lastS + thisS + ") +" + String(lp) + "(" + lastS + ", " + String(i⑴) + ") ==>" + String(totalP) + "<br/>"; } pmat[rowA][i] = maxval; } if (pmat[0][i] > pmat[1][i]) link.push(S[0]); else link.push(S[1]); out.innerHTML += "最好: " + link[link.length - 1] + "<br/>"; } var out = document.getElementById("result1"); out.innerHTML = "最好状态序列:"; for(var i = 0; i < link.length; i++) out.innerHTML += link[i]; </script> </body> </html>

计算结果以下,方便大家检验:

HMM Demo


[0] HMM模型参数

状态S: {H, L}
初始状态I: {0.5, 0.5}
状态转移矩阵A:

a_ij H L
H 0.5 0.5
L 0.4 0.6
混淆矩阵B:
b_ik A C T G
H 0.2 0.3 0.3 0.2
L 0.3 0.2 0.2 0.3

[1] 评估问题:使用上述模型,采取Forward算法计算产生GGCA观测结果的几率

结果矩阵:

几率 初始
G
G
C
A
H
0.5
0.15
0.0345
0.008415
0.0013767000000000002
L
0.5
0.1
0.027
0.00669
0.00246645
评估结果: HMM(S(H, L), O(A, T, C, G), imat, amat, bmat)产生观测结果GGCA的几率为: 0.00384315

计算进程:
1行2列

HHG: 0.15*0.5*0.3=0.0225
LHG: 0.1*0.4*0.2=0.012000000000000002
和为: 0.0345
2行2列

HLG: 0.15*0.5*0.3=0.015
LLG: 0.1*0.6*0.2=0.012
和为: 0.027
1行3列

HHC: 0.0345*0.5*0.3=0.005175
LHC: 0.027*0.4*0.2=0.0032400000000000003
和为: 0.008415
2行3列

HLC: 0.0345*0.5*0.3=0.0034500000000000004
LLC: 0.027*0.6*0.2=0.00324
和为: 0.00669
1行4列

HHA: 0.008415*0.5*0.2=0.0008415000000000001
LHA: 0.00669*0.4*0.3=0.0005352
和为: 0.0013767000000000002
2行4列

HLA: 0.008415*0.5*0.2=0.0012622500000000001
LLA: 0.00669*0.6*0.3=0.0012041999999999997
和为: 0.00246645

[2] 解码问题: 给定观测序列GGCACTGAA,问产生该序列几率最大的状态路径是甚么?

把HMM模型换算为以2为底的对数值,则有
状态S: {H, L}
初始状态I: {⑴, ⑴}
状态转移矩阵A:

a_ij H L
H
L ⑴.322 -0.737
混淆矩阵B:
b_ik A C T G
H ⑵.322 ⑴.737 ⑴.737 ⑵.322
L ⑴.737 ⑵.322 ⑵.322 ⑴.737

结果矩阵:

几率 初始
G
G
C
A
C
T
G
A
A
H
⑴.0
⑵.7369655941662066
⑸.473931188332413
⑻.210896782498619
⑴1.53282487738598
⑴4.006756065718395
⑴7.328684160605757
⑴9.539580943104376
⑵2.861509037991738
⑵5.65736832121151
L
⑴.0
⑶.321928094887362
⑹.058893689053569
⑻.795859283219775
⑴0.947862376664826
⑴4.006756065718395
⑴6.480687254050807
⑴9.539580943104376
⑵2.013512131436787
⑵4.4874433197692
最好状态序列:HHHLLLLLL
计算进程:
O2: H->H= ⑴.7369655941662063(H->G) +⑴(T: HH) +⑵.7369655941662066(H, 0) ==>⑸.473931188332413
O2: L->H= ⑴.7369655941662063(H->G) +⑴.3219280948873622(T: LH) +⑶.321928094887362(L, 0) ==>⑹.380821783940931
O2: H->L= ⑵.321928094887362(L->G) +⑴(T: HL) +⑵.7369655941662066(H, 0) ==>⑹.058893689053569
O2: L->L= ⑵.321928094887362(L->G) +-0.7369655941662062(T: LL) +⑶.321928094887362(L, 0) ==>⑹.380821783940931
最好: H
O3: H->H= ⑴.7369655941662063(H->C) +⑴(T: HH) +⑸.473931188332413(H, 1) ==>⑻.210896782498619
O3: L->H= ⑴.7369655941662063(H->C) +⑴.3219280948873622(T: LH) +⑹.058893689053569(L, 1) ==>⑼.117787378107138
O3: H->L= ⑵.321928094887362(L->C) +⑴(T: HL) +⑸.473931188332413(H, 1) ==>⑻.795859283219775
O3: L->L= ⑵.321928094887362(L->C) +-0.7369655941662062(T: LL) +⑹.058893689053569(L, 1) ==>⑼.117787378107138
最好: H
O4: H->H= ⑵.321928094887362(H->A) +⑴(T: HH) +⑻.210896782498619(H, 2) ==>⑴1.53282487738598
O4: L->H= ⑵.321928094887362(H->A) +⑴.3219280948873622(T: LH) +⑻.795859283219775(L, 2) ==>⑴2.4397154729945
O4: H->L= ⑴.7369655941662063(L->A) +⑴(T: HL) +⑻.210896782498619(H, 2) ==>⑴0.947862376664826
O4: L->L= ⑴.7369655941662063(L->A) +-0.7369655941662062(T: LL) +⑻.795859283219775(L, 2) ==>⑴1.269790471552188
最好: L
O5: H->H= ⑴.7369655941662063(H->C) +⑴(T: HH) +⑴1.53282487738598(H, 3) ==>⑴4.269790471552188
O5: L->H= ⑴.7369655941662063(H->C) +⑴.3219280948873622(T: LH) +⑴0.947862376664826(L, 3) ==>⑴4.006756065718395
O5: H->L= ⑵.321928094887362(L->C) +⑴(T: HL) +⑴1.53282487738598(H, 3) ==>⑴4.854752972273342
O5: L->L= ⑵.321928094887362(L->C) +-0.7369655941662062(T: LL) +⑴0.947862376664826(L, 3) ==>⑴4.006756065718395
最好: L
O6: H->H= ⑵.321928094887362(H->T) +⑴(T: HH) +⑴4.006756065718395(H, 4) ==>⑴7.328684160605757
O6: L->H= ⑵.321928094887362(H->T) +⑴.3219280948873622(T: LH) +⑴4.006756065718395(L, 4) ==>⑴7.65061225549312
O6: H->L= ⑴.7369655941662063(L->T) +⑴(T: HL) +⑴4.006756065718395(H, 4) ==>⑴6.743721659884603
O6: L->L= ⑴.7369655941662063(L->T) +-0.7369655941662062(T: LL) +⑴4.006756065718395(L, 4) ==>⑴6.480687254050807
最好: L
O7: H->H= ⑴.7369655941662063(H->G) +⑴(T: HH) +⑴7.328684160605757(H, 5) ==>⑵0.065649754771965
O7: L->H= ⑴.7369655941662063(H->G) +⑴.3219280948873622(T: LH) +⑴6.480687254050807(L, 5) ==>⑴9.539580943104376
O7: H->L= ⑵.321928094887362(L->G) +⑴(T: HL) +⑴7.328684160605757(H, 5) ==>⑵0.65061225549312
O7: L->L= ⑵.321928094887362(L->G) +-0.7369655941662062(T: LL) +⑴6.480687254050807(L, 5) ==>⑴9.539580943104376
最好: L
O8: H->H= ⑵.321928094887362(H->A) +⑴(T: HH) +⑴9.539580943104376(H, 6) ==>⑵2.861509037991738
O8: L->H= ⑵.321928094887362(H->A) +⑴.3219280948873622(T: LH) +⑴9.539580943104376(L, 6) ==>⑵3.1834371328791
O8: H->L= ⑴.7369655941662063(L->A) +⑴(T: HL) +⑴9.539580943104376(H, 6) ==>⑵2.276546537270583
O8: L->L= ⑴.7369655941662063(L->A) +-0.7369655941662062(T: LL) +⑴9.539580943104376(L, 6) ==>⑵2.013512131436787
最好: L
O9: H->H= ⑵.321928094887362(H->A) +⑴(T: HH) +⑵2.861509037991738(H, 7) ==>⑵6.1834371328791
O9: L->H= ⑵.321928094887362(H->A) +⑴.3219280948873622(T: LH) +⑵2.013512131436787(L, 7) ==>⑵5.65736832121151
O9: H->L= ⑴.7369655941662063(L->A) +⑴(T: HL) +⑵2.861509037991738(H, 7) ==>⑵5.598474632157945
O9: L->L= ⑴.7369655941662063(L->A) +-0.7369655941662062(T: LL) +⑵2.013512131436787(L, 7) ==>⑵4.4874433197692
最好: L

生活不易,码农辛苦
如果您觉得本网站对您的学习有所帮助,可以手机扫描二维码进行捐赠
程序员人生
------分隔线----------------------------
分享到:
------分隔线----------------------------
关闭
程序员人生