-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAdamOptimizer.cpp
34 lines (32 loc) · 1.38 KB
/
AdamOptimizer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#include "AdamOptimizer.hpp"
#include <cmath>
AdamOptimizer::AdamOptimizer(std::vector<double>& params, double learning_rate, std::tuple<double, double> beta, double eps, double weight_decay, bool amsgrad) : params(params), beta1(std::get<0>(beta)), beta2(std::get<1>(beta)), eps(eps), weight_decay(weight_decay), amsgrad(amsgrad), t(0) {
m = std::vector<double>(params.size(), 0.0);
v = std::vector<double>(params.size(), 0.0);
if (amsgrad) {
v_hat.resize(params.size(), 0.0); //maximum of v
}
}
void AdamOptimizer::step(const std::vector<double>& grads) {
t++;
for (size_t i = 0; i < params.size(); i++) {
// Apply weight decay
double grad = grads[i] + weight_decay * params[i];
// Update biased first moment estimate
m[i] = beta1 * m[i] + (1 - beta1) * grad;
// Update biased second raw moment estimate
v[i] = beta2 * v[i] + (1 - beta2) * grad;
// compute bias-corrected second moment estimate
double m_hat = m[i] / (1 - pow(beta1, t));
double v_hat_value = v[i] / (1 - pow(beta2, t));
if (amsgrad) {
// update the maximum of V mannually
if (v_hat[i] < v[i]) {
v_hat[i] = v[i];
}
v_hat_value = v_hat[i];
}
//update parameters
params[i] -= learning_rate * m_hat / (std::sqrt(v_hat_value) + eps);
}
}