C/machine_learning/adaline_learning.c at master · sozelfist/C

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

/**

* \file

* \brief [Adaptive Linear Neuron

* (ADALINE)](https://en.wikipedia.org/wiki/ADALINE) implementation

* \details

* <img

* src="https://upload.wikimedia.org/wikipedia/commons/b/be/Adaline_flow_chart.gif"

* width="200px">

* [source](https://commons.wikimedia.org/wiki/File:Adaline_flow_chart.gif)

* ADALINE is one of the first and simplest single layer artificial neural

* network. The algorithm essentially implements a linear function

* \f[ f\left(x_0,x_1,x_2,\ldots\right) =

* \sum_j x_jw_j+\theta

* \f]

* where \f$x_j\f$ are the input features of a sample, \f$w_j\f$ are the

* coefficients of the linear function and \f$\theta\f$ is a constant. If we

* know the \f$w_j\f$, then for any given set of features, \f$y\f$ can be

* computed. Computing the \f$w_j\f$ is a supervised learning algorithm wherein

* a set of features and their corresponding outputs are given and weights are

* computed using stochastic gradient descent method.

* \author [Krishna Vedala](https://github.com/kvedala)

*/

#include <assert.h>

#include <limits.h>

#include <math.h>

#include <stdbool.h>

#include <stdio.h>

#include <stdlib.h>

#include <time.h>

/**

* @addtogroup machine_learning Machine learning algorithms

* @{

* @addtogroup adaline Adaline learning algorithm

* @{

*/

/** Maximum number of iterations to learn */

#define MAX_ADALINE_ITER 500 // INT_MAX

/** structure to hold adaline model parameters */

struct adaline

{

double eta; /**< learning rate of the algorithm */

double *weights; /**< weights of the neural network */

int num_weights; /**< number of weights of the neural network */

};

/** convergence accuracy \f$=1\times10^{-5}\f$ */

#define ADALINE_ACCURACY 1e-5

/**

* Default constructor

* \param[in] num_features number of features present

* \param[in] eta learning rate (optional, default=0.1)

* \returns new adaline model

*/

struct adaline new_adaline(const int num_features, const double eta)

{

if (eta <= 0.f || eta >= 1.f)

{

fprintf(stderr, "learning rate should be > 0 and < 1\n");

exit(EXIT_FAILURE);

}

// additional weight is for the constant bias term

int num_weights = num_features + 1;

struct adaline ada;

ada.eta = eta;

ada.num_weights = num_weights;

ada.weights = (double *)malloc(num_weights * sizeof(double));

if (!ada.weights)

{

perror("Unable to allocate error for weights!");

return ada;

}

// initialize with random weights in the range [-50, 49]

for (int i = 0; i < num_weights; i++) ada.weights[i] = 1.f;

// ada.weights[i] = (double)(rand() % 100) - 50);

return ada;

}

/** delete dynamically allocated memory

* \param[in] ada model from which the memory is to be freed.

*/

void delete_adaline(struct adaline *ada)

{

if (ada == NULL)

return;

free(ada->weights);

};

/** [Heaviside activation

* function](https://en.wikipedia.org/wiki/Heaviside_step_function) <img

* src="https://upload.wikimedia.org/wikipedia/commons/d/d9/Dirac_distribution_CDF.svg"

* width="200px"/>

* @param x activation function input

* @returns \f$f(x)= \begin{cases}1 & \forall\; x > 0\\ -1 & \forall\; x \le0

* \end{cases}\f$

*/

int adaline_activation(double x) { return x > 0 ? 1 : -1; }

/**

* Operator to print the weights of the model

* @param ada model for which the values to print

* @returns pointer to a NULL terminated string of formatted weights

*/

char *adaline_get_weights_str(const struct adaline *ada)

{

static char out[100]; // static so the value is persistent

sprintf(out, "<");

for (int i = 0; i < ada->num_weights; i++)

{

sprintf(out, "%s%.4g", out, ada->weights[i]);

if (i < ada->num_weights - 1)

sprintf(out, "%s, ", out);

}

sprintf(out, "%s>", out);

return out;

}

/**

* predict the output of the model for given set of features

*

* \param[in] ada adaline model to predict

* \param[in] x input vector

* \param[out] out optional argument to return neuron output before applying

* activation function (`NULL` to ignore)

* \returns model prediction output

*/

int adaline_predict(struct adaline *ada, const double *x, double *out)

{

double y = ada->weights[ada->num_weights - 1]; // assign bias value

for (int i = 0; i < ada->num_weights - 1; i++) y += x[i] * ada->weights[i];

if (out) // if out variable is not NULL

*out = y;

// quantizer: apply ADALINE threshold function

return adaline_activation(y);

}

/**

* Update the weights of the model using supervised learning for one feature

* vector

*

* \param[in] ada adaline model to fit

* \param[in] x feature vector

* \param[in] y known output value

* \returns correction factor

*/

double adaline_fit_sample(struct adaline *ada, const double *x, const int y)

{

/* output of the model with current weights */

int p = adaline_predict(ada, x, NULL);

int prediction_error = y - p; // error in estimation

double correction_factor = ada->eta * prediction_error;

/* update each weight, the last weight is the bias term */

for (int i = 0; i < ada->num_weights - 1; i++)

{

ada->weights[i] += correction_factor * x[i];

}

ada->weights[ada->num_weights - 1] += correction_factor; // update bias

return correction_factor;

}

/**

* Update the weights of the model using supervised learning for an array of

* vectors.

*

* \param[in] ada adaline model to train

* \param[in] X array of feature vector

* \param[in] y known output value for each feature vector

* \param[in] N number of training samples

*/

void adaline_fit(struct adaline *ada, double **X, const int *y, const int N)

{

double avg_pred_error = 1.f;

int iter;

for (iter = 0;

(iter < MAX_ADALINE_ITER) && (avg_pred_error > ADALINE_ACCURACY);

iter++)

{

avg_pred_error = 0.f;

// perform fit for each sample

for (int i = 0; i < N; i++)

{

double err = adaline_fit_sample(ada, X[i], y[i]);

avg_pred_error += fabs(err);

}

avg_pred_error /= N;

// Print updates every 200th iteration

// if (iter % 100 == 0)

printf("\tIter %3d: Training weights: %s\tAvg error: %.4f\n", iter,

adaline_get_weights_str(ada), avg_pred_error);

}

if (iter < MAX_ADALINE_ITER)

printf("Converged after %d iterations.\n", iter);

else

printf("Did not converged after %d iterations.\n", iter);

}

/** @}

* @}

*/

/**

* test function to predict points in a 2D coordinate system above the line

* \f$x=y\f$ as +1 and others as -1.

* Note that each point is defined by 2 values or 2 features.

* \param[in] eta learning rate (optional, default=0.01)

*/

void test1(double eta)

{

struct adaline ada = new_adaline(2, eta); // 2 features

const int N = 10; // number of sample points

const double saved_X[10][2] = {{0, 1}, {1, -2}, {2, 3}, {3, -1},

{4, 1}, {6, -5}, {-7, -3}, {-8, 5},

{-9, 2}, {-10, -15}};

double **X = (double **)malloc(N * sizeof(double *));

const int Y[10] = {1, -1, 1, -1, -1,

-1, 1, 1, 1, -1}; // corresponding y-values

for (int i = 0; i < N; i++)

{

X[i] = (double *)saved_X[i];

}

printf("------- Test 1 -------\n");

printf("Model before fit: %s\n", adaline_get_weights_str(&ada));

adaline_fit(&ada, X, Y, N);

printf("Model after fit: %s\n", adaline_get_weights_str(&ada));

double test_x[] = {5, -3};

int pred = adaline_predict(&ada, test_x, NULL);

printf("Predict for x=(5,-3): % d\n", pred);

assert(pred == -1);

printf(" ...passed\n");

double test_x2[] = {5, 8};

pred = adaline_predict(&ada, test_x2, NULL);

printf("Predict for x=(5, 8): % d\n", pred);

assert(pred == 1);

printf(" ...passed\n");

// for (int i = 0; i < N; i++)

// free(X[i]);

free(X);

delete_adaline(&ada);

}

/**

* test function to predict points in a 2D coordinate system above the line

* \f$x+3y=-1\f$ as +1 and others as -1.

* Note that each point is defined by 2 values or 2 features.

* The function will create random sample points for training and test purposes.

* \param[in] eta learning rate (optional, default=0.01)

*/

void test2(double eta)

{

struct adaline ada = new_adaline(2, eta); // 2 features

const int N = 50; // number of sample points

double **X = (double **)malloc(N * sizeof(double *));

int *Y = (int *)malloc(N * sizeof(int)); // corresponding y-values

for (int i = 0; i < N; i++) X[i] = (double *)malloc(2 * sizeof(double));

// generate sample points in the interval

// [-range2/100 , (range2-1)/100]

int range = 500; // sample points full-range

int range2 = range >> 1; // sample points half-range

for (int i = 0; i < N; i++)

{

double x0 = ((rand() % range) - range2) / 100.f;

double x1 = ((rand() % range) - range2) / 100.f;

X[i][0] = x0;

X[i][1] = x1;

Y[i] = (x0 + 3. * x1) > -1 ? 1 : -1;

}

printf("------- Test 2 -------\n");

printf("Model before fit: %s\n", adaline_get_weights_str(&ada));

adaline_fit(&ada, X, Y, N);

printf("Model after fit: %s\n", adaline_get_weights_str(&ada));

int N_test_cases = 5;

double test_x[2];

for (int i = 0; i < N_test_cases; i++)

{

double x0 = ((rand() % range) - range2) / 100.f;

double x1 = ((rand() % range) - range2) / 100.f;

test_x[0] = x0;

test_x[1] = x1;

int pred = adaline_predict(&ada, test_x, NULL);

printf("Predict for x=(% 3.2f,% 3.2f): % d\n", x0, x1, pred);

int expected_val = (x0 + 3. * x1) > -1 ? 1 : -1;

assert(pred == expected_val);

printf(" ...passed\n");

}

for (int i = 0; i < N; i++) free(X[i]);

free(X);

free(Y);

delete_adaline(&ada);

}

/**

* test function to predict points in a 3D coordinate system lying within the

* sphere of radius 1 and centre at origin as +1 and others as -1. Note that

* each point is defined by 3 values but we use 6 features. The function will

* create random sample points for training and test purposes.

* The sphere centred at origin and radius 1 is defined as:

* \f$x^2+y^2+z^2=r^2=1\f$ and if the \f$r^2<1\f$, point lies within the sphere

* else, outside.

*

* \param[in] eta learning rate (optional, default=0.01)

*/

void test3(double eta)

{

struct adaline ada = new_adaline(6, eta); // 2 features

const int N = 50; // number of sample points

double **X = (double **)malloc(N * sizeof(double *));

int *Y = (int *)malloc(N * sizeof(int)); // corresponding y-values

for (int i = 0; i < N; i++) X[i] = (double *)malloc(6 * sizeof(double));

// generate sample points in the interval

// [-range2/100 , (range2-1)/100]

int range = 200; // sample points full-range

int range2 = range >> 1; // sample points half-range

for (int i = 0; i < N; i++)

{

double x0 = ((rand() % range) - range2) / 100.f;

double x1 = ((rand() % range) - range2) / 100.f;

double x2 = ((rand() % range) - range2) / 100.f;

X[i][0] = x0;

X[i][1] = x1;

X[i][2] = x2;

X[i][3] = x0 * x0;

X[i][4] = x1 * x1;

X[i][5] = x2 * x2;

Y[i] = (x0 * x0 + x1 * x1 + x2 * x2) <= 1 ? 1 : -1;

}

printf("------- Test 3 -------\n");

printf("Model before fit: %s\n", adaline_get_weights_str(&ada));

adaline_fit(&ada, X, Y, N);

printf("Model after fit: %s\n", adaline_get_weights_str(&ada));

int N_test_cases = 5;

double test_x[6];

for (int i = 0; i < N_test_cases; i++)

{

double x0 = ((rand() % range) - range2) / 100.f;

double x1 = ((rand() % range) - range2) / 100.f;

double x2 = ((rand() % range) - range2) / 100.f;

test_x[0] = x0;

test_x[1] = x1;

test_x[2] = x2;

test_x[3] = x0 * x0;

test_x[4] = x1 * x1;

test_x[5] = x2 * x2;

int pred = adaline_predict(&ada, test_x, NULL);

printf("Predict for x=(% 3.2f,% 3.2f): % d\n", x0, x1, pred);

int expected_val = (x0 * x0 + x1 * x1 + x2 * x2) <= 1 ? 1 : -1;

assert(pred == expected_val);

printf(" ...passed\n");

}

for (int i = 0; i < N; i++) free(X[i]);

free(X);

free(Y);

delete_adaline(&ada);

}

/** Main function */

int main(int argc, char **argv)

{

srand(time(NULL)); // initialize random number generator

double eta = 0.1; // default value of eta

if (argc == 2) // read eta value from commandline argument if present

eta = strtof(argv[1], NULL);

test1(eta);

printf("Press ENTER to continue...\n");

getchar();

test2(eta);

printf("Press ENTER to continue...\n");

getchar();

test3(eta);

return 0;

}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

adaline_learning.c

adaline_learning.c

Files

adaline_learning.c

Latest commit

History

adaline_learning.c

File metadata and controls