LSTM::LSTM(const int input_dimension,
const int output_dimension,
const int max_batch_size,
const int max_sequence_length,
const bool use_bias)
: Function(input_dimension,
output_dimension,
max_batch_size,
max_sequence_length),
sigmoid_(),
tanh_() {
//这里的1维数组依然是前面那种类似的结构
int size = output_dimension * max_batch_size * max_sequence_length;
//lstm层的cell的输出
b_ = FastMalloc(size);
//保存cec的输入输出
cec_b_ = FastMalloc(size);
//cell的输入
cec_input_b_ = FastMalloc(size);
//保存输入控制门的输入输出
input_gate_b_ = FastMalloc(size);
//保存遗忘控制门的输入输出
forget_gate_b_ = FastMalloc(size);
//保存输出控制门的输入输出
output_gate_b_ = FastMalloc(size);
//_t_命名类指针都是会变动的,用于表示时间的变化
b_t_ = b_;
cec_input_b_t_ = cec_input_b_;
cec_b_t_ = cec_b_;
input_gate_b_t_ = input_gate_b_;
forget_gate_b_t_ = forget_gate_b_;
output_gate_b_t_ = output_gate_b_;
//这里不明白为啥要重新赋值,上面定义size时不就初始化为这个了嘛
size = output_dimension * max_batch_size * max_sequence_length;
//output gate的误差信号
cec_epsilon_ = FastMalloc(size);
delta_ = FastMalloc(size);
//输入控制门的误差
input_gate_delta_ = FastMalloc(size);
//遗忘控制门的误差
forget_gate_delta_ = FastMalloc(size);
//输出控制门的误差
output_gate_delta_ = FastMalloc(size);
//这里同上
cec_epsilon_t_ = cec_epsilon_;
delta_t_ = delta_;
input_gate_delta_t_ = input_gate_delta_;
forget_gate_delta_t_ = forget_gate_delta_;
output_gate_delta_t_ = output_gate_delta_;
//std::cout << "input_dimension: " << input_dimension << " output_dimension: " << output_dimension << std::endl;
//假定命令是myExample-i10-M12
//这里的input_dimension就是10,output_dimension就是12
size = input_dimension * output_dimension;
//这里的权值仅仅是输入层到该lstm层的
weights_ = FastMalloc(size);
//下面控制门的权重仅仅是输入层到控制门的
input_gate_weights_ = FastMalloc(size);
forget_gate_weights_ = FastMalloc(size);
output_gate_weights_ = FastMalloc(size);
momentum_weights_ = FastMalloc(size);
momentum_input_gate_weights_ = FastMalloc(size);
momentum_forget_gate_weights_ = FastMalloc(size);
momentum_output_gate_weights_ = FastMalloc(size);
//这部份权重是循环结构的,即前1时刻lstm层到当前时刻lstm层的连接
size = output_dimension * output_dimension;
recurrent_weights_ = FastMalloc(size);
input_gate_recurrent_weights_ = FastMalloc(size);
forget_gate_recurrent_weights_ = FastMalloc(size);
output_gate_recurrent_weights_ = FastMalloc(size);
momentum_recurrent_weights_ = FastMalloc(size);
momentum_input_gate_recurrent_weights_ = FastMalloc(size);
momentum_forget_gate_recurrent_weights_ = FastMalloc(size);
momentum_output_gate_recurrent_weights_ = FastMalloc(size);
//从上面的分配来看,容易知道控制门的输入来自于3部份: 1.输入层的输出 2.本层的前1时刻输出 3.来自cec状态的前1时刻输出
//lstm层的输入自于这两部份:1.输入层的输出 2.本层的前1时刻输出
//peephole connection,这是从cec到gate的连接
input_gate_peephole_weights_ = FastMalloc(output_dimension);
forget_gate_peephole_weights_ = FastMalloc(output_dimension);
output_gate_peephole_weights_ = FastMalloc(output_dimension);
momentum_input_gate_peephole_weights_ = FastMalloc(output_dimension);
momentum_forget_gate_peephole_weights_ = FastMalloc(output_dimension);
momentum_output_gate_peephole_weights_ = FastMalloc(output_dimension);
//从这里的分配来看,能够知道lstm层内部的结构:
//output_dimension的大小即是block的大小,每一个block大小包括1个cell,1个cell里面包括1个cec
//即output_dimension的大小就是cec个数,每一个cec与3个gate连接
//bias的设置
bias_ = use_bias ? FastMalloc(output_dimension) : nullptr;
input_gate_bias_ = use_bias ? FastMalloc(output_dimension) : nullptr;
forget_gate_bias_ = use_bias ? FastMalloc(output_dimension) : nullptr;
output_gate_bias_ = use_bias ? FastMalloc(output_dimension) : nullptr;
momentum_bias_ = use_bias ? FastMalloc(output_dimension) : nullptr;
momentum_input_gate_bias_ = use_bias ?
FastMalloc(output_dimension) : nullptr;
momentum_forget_gate_bias_ = use_bias ?
FastMalloc(output_dimension) : nullptr;
momentum_output_gate_bias_ = use_bias ?
FastMalloc(output_dimension) : nullptr;
}
const Real *LSTM::Evaluate(const Slice &slice, const Real x[]) {
//形参x依然表示前层的输入
//start为真表示起始时刻
const bool start = b_t_ == b_;
//OpenMP提供的并行功能
//下面两个section同时并行
#pragma omp parallel sections
{
//在带有peephole connection的lstm结构中,前向计算的顺序有要求
//1.先必须计算input gate和forget gate的输出
//2.计算cell输入和cec的状态
//3.计算output gate的输出
//4.计算cell的输出
#pragma omp section
//注意这里start的作用,起始时刻时,gate输入本来是包括peephole前1时刻cec的输出,和前1时刻层的输入两部份的
//但由于起始时刻,它们⑴时刻的输出状态相当于0,这里不做计算
//只有t>0,即非起始时刻后,才会有前1时刻的输出
//计算input gate的输出
EvaluateSubUnit(slice.size(),
input_gate_weights_,
input_gate_bias_,
start ? nullptr : input_gate_recurrent_weights_,
start ? nullptr : input_gate_peephole_weights_,
x,
b_t_ - GetOffset(),
cec_b_t_ - GetOffset(),
input_gate_b_t_,
&sigmoid_);
#pragma omp section
//计算forget的输出
EvaluateSubUnit(slice.size(),
forget_gate_weights_,
forget_gate_bias_,
start ? nullptr : forget_gate_recurrent_weights_,
start ? nullptr : forget_gate_peephole_weights_,
x,
b_t_ - GetOffset(),
cec_b_t_ - GetOffset(),
forget_gate_b_t_,
&sigmoid_);
}
//计算cell的输入,它的输入来自于两部份,1部份是输入层,1部份是前1时刻本层的输出
EvaluateSubUnit(slice.size(),
weights_,
bias_,
start ? nullptr : recurrent_weights_,
nullptr,
x,
b_t_ - GetOffset(),
nullptr,
cec_input_b_t_,
&tanh_);
const int size = slice.size() * output_dimension();
//cec_b_t_ <= cec_input_b_t_ * input_gate_b_t_
//计算cec的输入
FastMultiply(input_gate_b_t_, size, cec_input_b_t_, cec_b_t_);
//非起始时刻履行,这里这样限制的缘由是cec的输入来自于cell输入的1部份,还有cec前1状态的输出
//如果并不是起始时刻,是不存在cec前1状态的输出的
//另外要注意,cec的结构是线性的,即为了保证误差的常数流,激活函数用的是f(x) = x
//所以计算cec的输入后,自然也是它的输出
if (!start) {
//cec_b_t_ <= cec_b_t_ + forget_gate_b_t_*cec_b_(t⑴)_
FastMultiplyAdd(forget_gate_b_t_,
size,
cec_b_t_ - GetOffset(),
cec_b_t_);
}
//计算output gate的输出
EvaluateSubUnit(slice.size(),
output_gate_weights_,
output_gate_bias_,
start ? nullptr : output_gate_recurrent_weights_,
output_gate_peephole_weights_,
x,
b_t_ - GetOffset(),
cec_b_t_,
output_gate_b_t_,
&sigmoid_);
//这里将cec的输出拷贝到b_t_上了
FastCopy(cec_b_t_, size, b_t_);
//cec的输出经过tanh函数的紧缩
tanh_.Evaluate(output_dimension(), slice.size(), b_t_);
//现在b_t_是全部cell的输出
FastMultiply(b_t_, size, output_gate_b_t_, b_t_);
const Real *result = b_t_;
b_t_ += GetOffset();
cec_input_b_t_ += GetOffset();
cec_b_t_ += GetOffset();
input_gate_b_t_ += GetOffset();
forget_gate_b_t_ += GetOffset();
output_gate_b_t_ += GetOffset();
return result;
}
//该函数是计算lstm层的输出
void LSTM::EvaluateSubUnit(const int batch_size,
const Real weights[],
const Real bias[],
const Real recurrent_weights[],
const Real peephole_weights[],
const Real x[],
const Real recurrent_b_t[],
const Real cec_b_t[],
Real b_t[],
ActivationFunction *activation_function) {
//存在偏置,复制过去,在下次计算时就相当于把偏置加上去了
if (bias) {
for (int i = 0; i < batch_size; ++i)
FastCopy(bias, output_dimension(), b_t + i * output_dimension());
}
//b_t <= b_t + weights * x
//这里计算层的输入
FastMatrixMatrixMultiply(1.0,
weights,
false,
output_dimension(),
input_dimension(),
x,
false,
batch_size,
b_t);
//非起始时刻
//b_t <= b_t + recurrent_weights * recurrent_b_t
//这部份层的输入来自上1时刻层的输出乘以recurrent_weights
if (recurrent_weights) {
FastMatrixMatrixMultiply(1.0,
recurrent_weights,
false,
output_dimension(),
output_dimension(),
recurrent_b_t,
false,
batch_size,
b_t);
}
//非起始时刻
if (peephole_weights) {
#pragma omp parallel for
for (int i = 0; i < batch_size; ++i) {
//b_t <= b_t + peephole_weights * cec_b_t
//这里gate的输入来自于cec的部份
FastMultiplyAdd(peephole_weights,
output_dimension(),
cec_b_t + i * output_dimension(),
b_t + i * output_dimension());
}
}
//上面计算的b_t_都是输入,下面这步后经过了相应激活函数,变成了输出
activation_function->Evaluate(output_dimension(), batch_size, b_t);
}
void LSTM::ComputeDelta(const Slice &slice, FunctionPointer f) {
//从时刻t到0
b_t_ -= GetOffset();
cec_input_b_t_ -= GetOffset();
cec_b_t_ -= GetOffset();
input_gate_b_t_ -= GetOffset();
forget_gate_b_t_ -= GetOffset();
output_gate_b_t_ -= GetOffset();
// cell outputs
//计算输出层传到lstm层的误差delta_t_
f->AddDelta(slice, delta_t_);
//并不是句子末尾,如果当前时刻为t,要存在t+1时刻的相干计算
if (delta_t_ != delta_) {
//delta_t_ <= delta_t_ + recurrent_weights_ * delta_(t+1)_
//即计算t+1时刻lstm层的误差传到t时刻该层的误差
FastMatrixMatrixMultiply(1.0,
recurrent_weights_,
true,
output_dimension(),
output_dimension(),
delta_t_ - GetOffset(),
false,
slice.size(),
delta_t_);
//delta_t_ <= delta_t_ + input_gate_recurrent_weights_ * input_gate_delta_(t⑴)_
//input gate在t+1时刻的误差传到t时刻该层
FastMatrixMatrixMultiply(1.0,
input_gate_recurrent_weights_,
true,
output_dimension(),
output_dimension(),
input_gate_delta_t_ - GetOffset(),
false,
slice.size(),
delta_t_);
//delta_t_ <= delta_t_ + forget_gate_recurrent_weights_ * forget_gate_delta_(t⑴)_
//forget gate在t+1时刻的误差传到t时刻该层
FastMatrixMatrixMultiply(1.0,
forget_gate_recurrent_weights_,
true,
output_dimension(),
output_dimension(),
forget_gate_delta_t_ - GetOffset(),
false,
slice.size(),
delta_t_);
//delta_t_ <= delta_t_ + output_gate_recurrent_weights_ * output_gate_delta_(t⑴)_
//output gate在t+1时刻的误差传到t时刻该层
FastMatrixMatrixMultiply(1.0,
output_gate_recurrent_weights_,
true,
output_dimension(),
output_dimension(),
output_gate_delta_t_ - GetOffset(),
false,
slice.size(),
delta_t_);
}
//到这里delta_t_表示到达lstm层的误差,如果记L为目标函数,b为lstm层cell的输出
//现在delta_t_寄存的是?L/?b
// output gates, part I
const int size = slice.size() * output_dimension();
//将cec的输出复制到output_gate_delta_t_
FastCopy(cec_b_t_, size, output_gate_delta_t_);
//cec的输出经过tanh函数,依然寄存到output_gate_delta_t_
tanh_.Evaluate(output_dimension(), slice.size(), output_gate_delta_t_);
// states, part I
//cec_epsilon_t_ <= output_gate_b_t_ * delta_t_
//这行语句是计算到达输出控制门那儿的激活函数前的误差
FastMultiply(output_gate_b_t_, size, delta_t_, cec_epsilon_t_);
//下面计算的是到达cec的误差,寄存在cec_epsilon_t_,这只是流向cec误差的其中1部份
tanh_.MultiplyDerivative(output_dimension(),
slice.size(),
output_gate_delta_t_,
cec_epsilon_t_);
// output gates, part II
//output_gate_delta_t_ <= output_gate_delta_t_ * delta_t_
//这行语句是计算到达output gate的误差
FastMultiply(output_gate_delta_t_,
size,
delta_t_,
output_gate_delta_t_);
//下面计算的是output gate的误差信号,寄存在output_gate_delta_t_
sigmoid_.MultiplyDerivative(output_dimension(),
slice.size(),
output_gate_b_t_,
output_gate_delta_t_);
// states, part II
#pragma omp parallel for
for (int i = 0; i < (int) slice.size(); ++i) {
//cec_epsilon_t_ <= cec_epsilon_t_ + output_gate_peephole_weights_ * output_gate_delta_t_
//这部份是output gate的误差信号流过来的
FastMultiplyAdd(output_gate_peephole_weights_,
output_dimension(),
output_gate_delta_t_ + i * output_dimension(),
cec_epsilon_t_ + i * output_dimension());
}
//即非最末时刻
if (delta_t_ != delta_) {
//cec_epsilon_t_ <= cec_epsilon_t_ + forget_gate_b_(t+1)_ * cec_epsilon_(t+1)_
//这部份是从cec的t+1时刻那儿流过来的误差
FastMultiplyAdd(forget_gate_b_t_ + GetOffset(),
size,
cec_epsilon_t_ - GetOffset(),
cec_epsilon_t_);
#pragma omp parallel for
for (int i = 0; i < (int) slice.size(); ++i) {
//cec_epsilon_t_ <= cec_epsilon_t_ + input_gate_peephole_weights_ * input_gate_delta_(t+1)_
//从input gate那儿流过来的误差
FastMultiplyAdd(input_gate_peephole_weights_,
output_dimension(),
input_gate_delta_t_ - GetOffset() + i * output_dimension(),
cec_epsilon_t_ + i * output_dimension());
//从forget gate那儿流过来的误差
FastMultiplyAdd(
forget_gate_peephole_weights_,
output_dimension(),
forget_gate_delta_t_ - GetOffset() + i * output_dimension(),
cec_epsilon_t_ + i * output_dimension());
}
}
// cells
//delta_t_ <= input_gate_b_t_ * cec_epsilon_t_
//下面两句计算cell输入处的误差信号
FastMultiply(input_gate_b_t_, size, cec_epsilon_t_, delta_t_);
tanh_.MultiplyDerivative(output_dimension(),
slice.size(),
cec_input_b_t_,
delta_t_);
//到现在delta_t_表示cell输入处的误差信号
#pragma omp parallel sections
{
#pragma omp section
{
// forget gates
if (b_t_ != b_) {
//forget_gate_delta_t_ <= cec_epsilon_t_ * cec_b_(t⑴)_
//流向forget gate的误差
FastMultiply(cec_b_t_ - GetOffset(),
size,
cec_epsilon_t_,
forget_gate_delta_t_);
//计算forget gate的误差信号
sigmoid_.MultiplyDerivative(output_dimension(),
slice.size(),
forget_gate_b_t_,
forget_gate_delta_t_);
}
}
#pragma omp section
{
// input gates
//input_gate_delta_t_ <= cec_epsilon_t_ * cec_input_b_t_
//流向input gate的误差
FastMultiply(cec_epsilon_t_,
size,
cec_input_b_t_,
input_gate_delta_t_);
//计算input gate的误差信号
sigmoid_.MultiplyDerivative(output_dimension(),
slice.size(),
input_gate_b_t_,
input_gate_delta_t_);
}
}
}
//计算流向输入层的误差
void LSTM::AddDelta(const Slice &slice, Real delta_t[]) {
//delta_t <= delta_t + weights_ * delta_t_
//这里cell输入处的误差信号,流向输入层
FastMatrixMatrixMultiply(1.0,
weights_,
true,
input_dimension(),
output_dimension(),
delta_t_,
false,
slice.size(),
delta_t);
//delta_t <= input_gate_delta_t_ * input_gate_weights_ + delta_t
//input gate的误差信号流向输入层部份
FastMatrixMatrixMultiply(1.0,
input_gate_weights_,
true,
input_dimension(),
output_dimension(),
input_gate_delta_t_,
false,
slice.size(),
delta_t);
//delta_t <= forget_gate_delta_t_ * forget_gate_weights_ + delta_t
//forget gate的误差信号流向输入层部份
FastMatrixMatrixMultiply(1.0,
forget_gate_weights_,
true,
input_dimension(),
output_dimension(),
forget_gate_delta_t_,
false,
slice.size(),
delta_t);
//delta_t <= output_gate_delta_t_ * output_gate_weights_ + delta_t
//output gate的误差信号流向输入层部份
FastMatrixMatrixMultiply(1.0,
output_gate_weights_,
true,
input_dimension(),
output_dimension(),
output_gate_delta_t_,
false,
slice.size(),
delta_t);
//t+1时刻 -> t时刻
cec_epsilon_t_ += GetOffset();
delta_t_ += GetOffset();
input_gate_delta_t_ += GetOffset();
forget_gate_delta_t_ += GetOffset();
output_gate_delta_t_ += GetOffset();
}
const Real *LSTM::UpdateWeights(const Slice &slice,
const Real learning_rate,
const Real x[]) {
const int size = slice.size() * output_dimension();
//0到末尾时刻
cec_epsilon_t_ -= GetOffset();
delta_t_ -= GetOffset();
input_gate_delta_t_ -= GetOffset();
forget_gate_delta_t_ -= GetOffset();
output_gate_delta_t_ -= GetOffset();
#pragma omp parallel sections
{
#pragma omp section
{
if (bias_) {
for (size_t i = 0; i < slice.size(); ++i) {
//momentum_bias_ <= -learning_rate*delta_t_ + momentum_bias_
//这是对cell的bias的改变量累加
FastMultiplyByConstantAdd(-learning_rate,
delta_t_ + i * output_dimension(),
output_dimension(),
momentum_bias_);
}
}
}
#pragma omp section
{
if (input_gate_bias_) {
//momentum_input_gate_bias_ <= -learning_rate*input_gate_delta_t_ + momentum_input_gate_bias_
//这是对input gate的bias改变量累加
for (size_t i = 0; i < slice.size(); ++i) {
FastMultiplyByConstantAdd(-learning_rate,
input_gate_delta_t_ + i * output_dimension(),
output_dimension(),
momentum_input_gate_bias_);
}
}
}
#pragma omp section
{
//momentum_forget_gate_bias_ <= -learning_rate*forget_gate_delta_t_ + momentum_forget_gate_bias_
//这是对 forget gate的bias改变量累加
if (forget_gate_bias_) {
for (size_t i = 0; i < slice.size(); ++i) {
FastMultiplyByConstantAdd(-learning_rate,
forget_gate_delta_t_ + i * output_dimension(),
output_dimension(),
momentum_forget_gate_bias_);
}
}
}
#pragma omp section
{
//momentum_output_gate_bias_ <= -learning_rate*output_gate_delta_t_ + momentum_output_gate_bias_
//这是对 output gate的bias改变量累加
if (output_gate_bias_) {
for (size_t i = 0; i < slice.size(); ++i) {
FastMultiplyByConstantAdd(-learning_rate,
output_gate_delta_t_ + i * output_dimension(),
output_dimension(),
momentum_output_gate_bias_);
}
}
}
//以上部份是计算各个bias的改变量,但并未真正改变bias
#pragma omp section
{
//momentum_weights_ <= -learning_rate * delta_t_ * x + momentum_weights_
//这是计算输入层到lstm层权重的改变量
FastMatrixMatrixMultiply(-learning_rate,
delta_t_,
false,
output_dimension(),
slice.size(),
x,
true,
input_dimension(),
momentum_weights_);
}
#pragma omp section
{
//momentum_input_gate_weights_<= -learning_rate * input_gate_delta_t_ * x + momentum_input_gate_weights_
//这是计算输入层到 input gate 权重的改变量
FastMatrixMatrixMultiply(-learning_rate,
input_gate_delta_t_,
false,
output_dimension(),
slice.size(),
x,
true,
input_dimension(),
momentum_input_gate_weights_);
}
#pragma omp section
{
//momentum_forget_gate_weights_<= -learning_rate * forget_gate_delta_t_ * x + momentum_forget_gate_weights_
//这是计算输入层到 forget gate 权重的改变量
FastMatrixMatrixMultiply(-learning_rate,
forget_gate_delta_t_,
false,
output_dimension(),
slice.size(),
x,
true,
input_dimension(),
momentum_forget_gate_weights_);
}
#pragma omp section
{
//momentum_output_gate_weights_<= -learning_rate * output_gate_delta_t_ * x + momentum_output_gate_weights_
//这是计算输入层到 output gate 权重的改变量
FastMatrixMatrixMultiply(-learning_rate,
output_gate_delta_t_,
false,
output_dimension(),
slice.size(),
x,
true,
input_dimension(),
momentum_output_gate_weights_);
}
#pragma omp section
{
//momentum_recurrent_weights_<= -learning_rate * delta_t_ * b_(t⑴)_ + momentum_recurrent_weights_
//这是计算t⑴时刻lstm层到 t时刻本身权重的改变量
if (b_t_ != b_) {
FastMatrixMatrixMultiply(-learning_rate,
delta_t_,
false,
output_dimension(),
slice.size(),
b_t_ - GetOffset(),
true,
output_dimension(),
momentum_recurrent_weights_);
}
}
#pragma omp section
{
//momentum_input_gate_recurrent_weights_<= -learning_rate * input_gate_delta_t_ * b_(t⑴)_ + momentum_input_gate_recurrent_weights_
//这是计算t⑴时刻lstm层到 t时刻 input gate权重的改变量
if (b_t_ != b_) {
FastMatrixMatrixMultiply(-learning_rate,
input_gate_delta_t_,
false,
output_dimension(),
slice.size(),
b_t_ - GetOffset(),
true,
output_dimension(),
momentum_input_gate_recurrent_weights_);
}
}
#pragma omp section
{
//momentum_forget_gate_recurrent_weights_<= -learning_rate * forget_gate_delta_t_ * b_(t⑴)_ + momentum_forget_gate_recurrent_weights_
//这是计算t⑴时刻lstm层到 t时刻 forget gate权重的改变量
if (b_t_ != b_) {
FastMatrixMatrixMultiply(-learning_rate,
forget_gate_delta_t_,
false,
output_dimension(),
slice.size(),
b_t_ - GetOffset(),
true,
output_dimension(),
momentum_forget_gate_recurrent_weights_);
}
}
#pragma omp section
{
//momentum_output_gate_recurrent_weights_<= -learning_rate * output_gate_delta_t_ * b_(t⑴)_ + momentum_output_gate_recurrent_weights_
//这是计算t⑴时刻lstm层到 t时刻 output gate权重的改变量
if (b_t_ != b_) {
FastMatrixMatrixMultiply(-learning_rate,
output_gate_delta_t_,
false,
output_dimension(),
slice.size(),
b_t_ - GetOffset(),
true,
output_dimension(),
momentum_output_gate_recurrent_weights_);
}
}
//注意上面改变分为3部份:1.计算bias的改变量 2.计算输入层到cell各部份的权值改变量 3.计算t⑴时刻cell到t时刻cell各部份权重改变量
}
#pragma omp parallel sections
{
#pragma omp section
{
if (b_t_ != b_) {
// destroys ..._gate_delta_t_, but this will not be used later anyway
//input_gate_delta_t_ <= -learning_rate*input_gate_delta_t_
//下面计算后,就破坏了input gate的误差信号值了,不过后面也不会再使用了。
FastMultiplyByConstant(input_gate_delta_t_,
size,
-learning_rate,
input_gate_delta_t_);
for (size_t i = 0; i < slice.size(); ++i) {
//momentum_input_gate_peephole_weights_ <= momentum_input_gate_peephole_weights_ + input_gate_delta_t_ * cec_b_(t⑴)_
//计算 input gate到cec的权值改变量
FastMultiplyAdd(input_gate_delta_t_ + i * output_dimension(),
output_dimension(),
cec_b_t_ - GetOffset() + i * output_dimension(),
momentum_input_gate_peephole_weights_);
}
}
}
#pragma omp section
{
if (b_t_ != b_) {
//forget_gate_delta_t_ <= -learning_rate*forget_gate_delta_t_
FastMultiplyByConstant(forget_gate_delta_t_,
size,
-learning_rate,
forget_gate_delta_t_);
//momentum_forget_gate_peephole_weights_ <= momentum_forget_gate_peephole_weights_ + forget_gate_delta_t_ * cec_b_(t⑴)_
//计算 forget gate到cec的权值改变量
for (size_t i = 0; i < slice.size(); ++i) {
FastMultiplyAdd(forget_gate_delta_t_ + i * output_dimension(),
output_dimension(),
cec_b_t_ - GetOffset() + i * output_dimension(),
momentum_forget_gate_peephole_weights_);
}
}
}
#pragma omp section
{
//output_gate_delta_t_ <= -learning_rate*output_gate_delta_t_
FastMultiplyByConstant(output_gate_delta_t_,
size,
-learning_rate,
output_gate_delta_t_);
//momentum_output_gate_peephole_weights_ <= momentum_output_gate_peephole_weights_ + output_gate_delta_t_ * cec_b_(t⑴)_
//计算 forget gate到cec的权值改变量
for (size_t i = 0; i < slice.size(); ++i) {
FastMultiplyAdd(output_gate_delta_t_ + i * output_dimension(),
output_dimension(),
cec_b_t_ + i * output_dimension(),
momentum_output_gate_peephole_weights_);
}
}
}
const Real *result = b_t_;
// let b_t_ point to next time step
//朝下1个时刻走
b_t_ += GetOffset();
cec_input_b_t_ += GetOffset();
cec_b_t_ += GetOffset();
input_gate_b_t_ += GetOffset();
forget_gate_b_t_ += GetOffset();
output_gate_b_t_ += GetOffset();
return result;
}