how to get the slope of a linear regression line using c++?
Here is a C++11 implementation:
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
double slope(const std::vector<double>& x, const std::vector<double>& y) {
const auto n = x.size();
const auto s_x = std::accumulate(x.begin(), x.end(), 0.0);
const auto s_y = std::accumulate(y.begin(), y.end(), 0.0);
const auto s_xx = std::inner_product(x.begin(), x.end(), x.begin(), 0.0);
const auto s_xy = std::inner_product(x.begin(), x.end(), y.begin(), 0.0);
const auto a = (n * s_xy - s_x * s_y) / (n * s_xx - s_x * s_x);
return a;
}
int main() {
std::vector<double> x{6, 5, 11, 7, 5, 4, 4};
std::vector<double> y{2, 3, 9, 1, 8, 7, 5};
std::cout << slope(x, y) << '\n'; // outputs 0.305556
}
You can add a test for the mathematical requirements (x.size() == y.size()
and x
is not constant) or, as the code above, assume that the user will take care of that.
Why don't you just write a simple code like this (not the best solution, for sure, just an example based on the help article):
double slope(const vector<double>& x, const vector<double>& y){
if(x.size() != y.size()){
throw exception("...");
}
size_t n = x.size();
double avgX = accumulate(x.begin(), x.end(), 0.0) / n;
double avgY = accumulate(y.begin(), y.end(), 0.0) / n;
double numerator = 0.0;
double denominator = 0.0;
for(size_t i=0; i<n; ++i){
numerator += (x[i] - avgX) * (y[i] - avgY);
denominator += (x[i] - avgX) * (x[i] - avgX);
}
if(denominator == 0.0){
throw exception("...");
}
return numerator / denominator;
}
Note that the third argument of accumulate function must be 0.0 rather than 0, otherwise the compiler will deduct its type as int
and there are great chances that the result of accumulate calls will be wrong (it's actually wrong using MSVC2010 and mingw-w64 when passing 0 as the third parameter).
The following is a templatized function I use for linear regression (fitting). It takes std::vector for data
template <typename T>
std::vector<T> GetLinearFit(const std::vector<T>& data)
{
T xSum = 0, ySum = 0, xxSum = 0, xySum = 0, slope, intercept;
std::vector<T> xData;
for (long i = 0; i < data.size(); i++)
{
xData.push_back(static_cast<T>(i));
}
for (long i = 0; i < data.size(); i++)
{
xSum += xData[i];
ySum += data[i];
xxSum += xData[i] * xData[i];
xySum += xData[i] * data[i];
}
slope = (data.size() * xySum - xSum * ySum) / (data.size() * xxSum - xSum * xSum);
intercept = (ySum - slope * xSum) / data.size();
std::vector<T> res;
res.push_back(slope);
res.push_back(intercept);
return res;
}
The function returns a vector with the first element being the slope, and the second element being the intercept of your linear regression.
Example to use it:
std::vector<double> myData;
myData.push_back(1);
myData.push_back(3);
myData.push_back(4);
myData.push_back(2);
myData.push_back(5);
std::vector<double> linearReg = GetLinearFit(myData);
double slope = linearReg[0];
double intercept = linearReg[1];
Notice that the function presumes you have a series of numbers for your x-axis (which is what I needed). You may change that in the function if you wish.