Browse Source

Initial commit.

main
jeanne 7 months ago
parent
commit
411f66a254
  1. 6
      CMakeLists.txt
  2. 2
      LICENSE
  3. 3
      src/bin/CMakeLists.txt
  4. 11
      src/bin/mnist/CMakeLists.txt
  5. 473
      src/bin/mnist/src/main.c
  6. 37
      src/lib/CMakeLists.txt
  7. 111
      src/lib/include/neuralnet/matrix.h
  8. 64
      src/lib/include/neuralnet/neuralnet.h
  9. 42
      src/lib/include/neuralnet/train.h
  10. 3
      src/lib/include/neuralnet/types.h
  11. 21
      src/lib/src/activation.h
  12. 298
      src/lib/src/matrix.c
  13. 228
      src/lib/src/neuralnet.c
  14. 36
      src/lib/src/neuralnet_impl.h
  15. 346
      src/lib/src/train.c
  16. 350
      src/lib/test/matrix_test.c
  17. 92
      src/lib/test/neuralnet_test.c
  18. 185
      src/lib/test/test.h
  19. 3
      src/lib/test/test_main.c
  20. 22
      src/lib/test/test_util.h
  21. 67
      src/lib/test/train_linear_perceptron_non_origin_test.c
  22. 62
      src/lib/test/train_linear_perceptron_test.c
  23. 66
      src/lib/test/train_sigmoid_test.c
  24. 66
      src/lib/test/train_xor_test.c

6
CMakeLists.txt

@ -0,0 +1,6 @@
cmake_minimum_required(VERSION 3.0)
project(neuralnet)
add_subdirectory(src/lib)
add_subdirectory(src/bin)

2
LICENSE

@ -1,7 +1,7 @@
GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Copyright (C) 2022 Marc Sunet <https://shellblade.net/>
Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.

3
src/bin/CMakeLists.txt

@ -0,0 +1,3 @@
cmake_minimum_required(VERSION 3.0)
add_subdirectory(mnist)

11
src/bin/mnist/CMakeLists.txt

@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.0)
add_executable(mnist
src/main.c)
target_link_libraries(mnist PRIVATE
neuralnet
bsd
z)
target_compile_options(mnist PRIVATE -Wall -Wextra)

473
src/bin/mnist/src/main.c

@ -0,0 +1,473 @@
#include <neuralnet/matrix.h>
#include <neuralnet/neuralnet.h>
#include <neuralnet/train.h>
#include <zlib.h>
#include <assert.h>
#include <bsd/string.h>
#include <linux/limits.h>
#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
static const int TRAIN_ITERATIONS = 100;
static const int32_t IMAGE_FILE_MAGIC = 0x00000803;
static const int32_t LABEL_FILE_MAGIC = 0x00000801;
// Inputs of 0 cancel weights during training. This value is used to rescale the
// input pixels from [0,255] to [PIXEL_LOWER_BOUND, 1.0].
static const double PIXEL_LOWER_BOUND = 0.01;
// Scale the outputs to (0,1) since the sigmoid cannot produce 0 or 1.
static const double LABEL_LOWER_BOUND = 0.01;
static const double LABEL_UPPER_BOUND = 0.99;
// Epsilon used to compare R values.
static const double EPS = 1e-10;
#define min(a,b) ((a) < (b) ? (a) : (b))
typedef struct ImageSet {
nnMatrix images; // Images flattened into row vectors of the matrix.
nnMatrix labels; // One-hot-encoded labels.
int count; // Number of images and labels.
int rows; // Rows in an image.
int cols; // Columns in an image.
} ImageSet;
static void usage(const char* argv0) {
fprintf(stderr, "Usage: %s <path to mnist files directory> [num images]\n", argv0);
fprintf(stderr, "\n");
fprintf(stderr, " Use -1 for [num images] to use all the images in the data set\n");
}
static bool R_eq(R a, R b) {
return fabs(a-b) <= EPS;
}
static void PrintImage(const nnMatrix* images, int rows, int cols, int image_index) {
assert(images);
assert((0 <= image_index) && (image_index < images->rows));
// Top line.
for (int j = 0; j < cols/2; ++j) {
printf(" -");
}
printf("\n");
// Image.
const R* value = nnMatrixRow(images, image_index);
for (int i = 0; i < rows; ++i) {
printf("|");
for (int j = 0; j < cols; ++j) {
if (*value > 0.8) {
printf("#");
} else if (*value > 0.5) {
printf("*");
}
else if (*value > PIXEL_LOWER_BOUND) {
printf(":");
} else if (*value == 0.0) {
// Values should not be exactly 0, otherwise they cancel out weights
// during training.
printf("X");
} else {
printf(" ");
}
value++;
}
printf("|\n");
}
// Bottom line.
for (int j = 0; j < cols/2; ++j) {
printf(" -");
}
printf("\n");
}
static void PrintLabel(const nnMatrix* labels, int label_index) {
assert(labels);
assert((0 <= label_index) && (label_index < labels->rows));
// Compute the label from the one-hot encoding.
const R* value = nnMatrixRow(labels, label_index);
int label = -1;
for (int i = 0; i < 10; ++i) {
if (R_eq(*value++, LABEL_UPPER_BOUND)) {
label = i;
break;
}
}
assert((0 <= label) && (label <= 9));
printf("Label: %d ( ", label);
value = nnMatrixRow(labels, label_index);
for (int i = 0; i < 10; ++i) {
printf("%.3f ", *value++);
}
printf(")\n");
}
static R lerp(R a, R b, R t) {
return a + t*(b-a);
}
/// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0].
static R FormatPixel(uint8_t pixel) {
const R value = (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND;
assert(value >= PIXEL_LOWER_BOUND);
assert(value <= 1.0);
return value;
}
/// Rescales a one-hot-encoded label value to (0,1).
static R FormatLabel(R label) {
const R value = lerp(LABEL_LOWER_BOUND, LABEL_UPPER_BOUND, label);
assert(value > 0.0);
assert(value < 1.0);
return value;
}
static int32_t ReverseEndian32(int32_t x) {
const int32_t x0 = x & 0xff;
const int32_t x1 = (x >> 8) & 0xff;
const int32_t x2 = (x >> 16) & 0xff;
const int32_t x3 = (x >> 24) & 0xff;
return (x0 << 24) | (x1 << 16) | (x2 << 8) | x3;
}
static void ImageToMatrix(
const uint8_t* pixels, int num_pixels, int row, nnMatrix* images) {
assert(pixels);
assert(images);
for (int i = 0; i < num_pixels; ++i) {
const R pixel = FormatPixel(pixels[i]);
nnMatrixSet(images, row, i, pixel);
}
}
static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_set) {
assert(images_file != Z_NULL);
assert(image_set);
bool success = false;
uint8_t* pixels = 0;
int32_t magic, total_images, rows, cols;
if ( (gzread(images_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) ||
(gzread(images_file, (char*)&total_images, sizeof(int32_t)) != sizeof(int32_t)) ||
(gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) ||
(gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t)) ) {
fprintf(stderr, "Failed to read header\n");
goto cleanup;
}
magic = ReverseEndian32(magic);
total_images = ReverseEndian32(total_images);
rows = ReverseEndian32(rows);
cols = ReverseEndian32(cols);
if (magic != IMAGE_FILE_MAGIC) {
fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n",
magic, IMAGE_FILE_MAGIC);
goto cleanup;
}
printf("Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n",
magic, total_images, rows, cols);
total_images = max_num_images >= 0 ? min(total_images, max_num_images) : total_images;
// Images are flattened into single row vectors.
const int num_pixels = rows * cols;
image_set->images = nnMatrixMake(total_images, num_pixels);
image_set->count = total_images;
image_set->rows = rows;
image_set->cols = cols;
pixels = calloc(1, num_pixels);
if (!pixels) {
fprintf(stderr, "Failed to allocate image buffer\n");
goto cleanup;
}
for (int i = 0; i < total_images; ++i) {
const int bytes_read = gzread(images_file, pixels, num_pixels);
if (bytes_read < num_pixels) {
fprintf(stderr, "Failed to read image %d\n", i);
goto cleanup;
}
ImageToMatrix(pixels, num_pixels, i, &image_set->images);
}
success = true;
cleanup:
if (pixels) {
free(pixels);
}
if (!success) {
nnMatrixDel(&image_set->images);
}
return success;
}
static void OneHotEncode(const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) {
assert(labels_bytes);
assert(labels);
assert(labels->rows == num_labels);
assert(labels->cols == 10);
static const R one_hot[10][10] = {
{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
};
R* value = labels->values;
for (int i = 0; i < num_labels; ++i) {
const uint8_t label = labels_bytes[i];
const R* one_hot_value = one_hot[label];
for (int j = 0; j < 10; ++j) {
*value++ = FormatLabel(*one_hot_value++);
}
}
}
static int OneHotDecode(const nnMatrix* label_matrix) {
assert(label_matrix);
assert(label_matrix->cols == 1);
assert(label_matrix->rows == 10);
R max_value = 0;
int pos_max = 0;
for (int i = 0; i < 10; ++i) {
const R value = nnMatrixAt(label_matrix, 0, i);
if (value > max_value) {
max_value = value;
pos_max = i;
}
}
assert(pos_max >= 0);
assert(pos_max <= 10);
return pos_max;
}
static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_set) {
assert(labels_file != Z_NULL);
assert(image_set != 0);
bool success = false;
uint8_t* labels = 0;
int32_t magic, total_labels;
if ( (gzread(labels_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) ||
(gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != sizeof(int32_t)) ) {
fprintf(stderr, "Failed to read header\n");
goto cleanup;
}
magic = ReverseEndian32(magic);
total_labels = ReverseEndian32(total_labels);
if (magic != LABEL_FILE_MAGIC) {
fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n",
magic, LABEL_FILE_MAGIC);
goto cleanup;
}
printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels);
total_labels = max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels;
assert(image_set->count == total_labels);
// One-hot encoding of labels, 10 values (digits) per label.
image_set->labels = nnMatrixMake(total_labels, 10);
labels = calloc(total_labels, sizeof(uint8_t));
if (!labels) {
fprintf(stderr, "Failed to allocate labels buffer\n");
goto cleanup;
}
if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != total_labels) {
fprintf(stderr, "Failed to read labels\n");
goto cleanup;
}
OneHotEncode(labels, total_labels, &image_set->labels);
success = true;
cleanup:
if (labels) {
free(labels);
}
if (!success) {
nnMatrixDel(&image_set->labels);
}
return success;
}
int main(int argc, const char** argv) {
if (argc < 2) {
usage(argv[0]);
return 1;
}
bool success = false;
gzFile train_images_file = Z_NULL;
gzFile train_labels_file = Z_NULL;
gzFile test_images_file = Z_NULL;
gzFile test_labels_file = Z_NULL;
ImageSet train_set = { 0 };
ImageSet test_set = { 0 };
nnNeuralNetwork* net = 0;
nnQueryObject* query = 0;
const char* mnist_files_dir = argv[1];
const int max_num_images = argc > 2 ? atoi(argv[2]) : -1;
char train_labels_path[PATH_MAX];
char train_images_path[PATH_MAX];
char test_labels_path[PATH_MAX];
char test_images_path[PATH_MAX];
strlcpy(train_labels_path, mnist_files_dir, PATH_MAX);
strlcpy(train_images_path, mnist_files_dir, PATH_MAX);
strlcpy(test_labels_path, mnist_files_dir, PATH_MAX);
strlcpy(test_images_path, mnist_files_dir, PATH_MAX);
strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX);
strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX);
strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX);
strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX);
train_images_file = gzopen(train_images_path, "r");
if (train_images_file == Z_NULL) {
fprintf(stderr, "Failed to open file: %s\n", train_images_path);
goto cleanup;
}
train_labels_file = gzopen(train_labels_path, "r");
if (train_labels_file == Z_NULL) {
fprintf(stderr, "Failed to open file: %s\n", train_labels_path);
goto cleanup;
}
test_images_file = gzopen(test_images_path, "r");
if (test_images_file == Z_NULL) {
fprintf(stderr, "Failed to open file: %s\n", test_images_path);
goto cleanup;
}
test_labels_file = gzopen(test_labels_path, "r");
if (test_labels_file == Z_NULL) {
fprintf(stderr, "Failed to open file: %s\n", test_labels_path);
goto cleanup;
}
if (!ReadImages(train_images_file, max_num_images, &train_set)) {
goto cleanup;
}
if (!ReadLabels(train_labels_file, max_num_images, &train_set)) {
goto cleanup;
}
if (!ReadImages(test_images_file, max_num_images, &test_set)) {
goto cleanup;
}
if (!ReadLabels(test_labels_file, max_num_images, &test_set)) {
goto cleanup;
}
printf("\nTraining image/label pair examples:\n");
for (int i = 0; i < min(3, train_set.images.rows); ++i) {
PrintImage(&train_set.images, train_set.rows, train_set.cols, i);
PrintLabel(&train_set.labels, i);
printf("\n");
}
// Network definition.
const int image_size_pixels = train_set.rows * train_set.cols;
const int num_layers = 2;
const int layer_sizes[3] = { image_size_pixels, 100, 10 };
const nnActivation layer_activations[2] = { nnSigmoid, nnSigmoid };
if (!(net = nnMakeNet(num_layers, layer_sizes, layer_activations))) {
fprintf(stderr, "Failed to create neural network\n");
goto cleanup;
}
// Train.
printf("Training with up to %d images from the data set\n\n", max_num_images);
const nnTrainingParams training_params = {
.learning_rate = 0.1,
.max_iterations = TRAIN_ITERATIONS,
.seed = 0,
.weight_init = nnWeightInitNormal,
.debug = true,
};
nnTrain(net, &train_set.images, &train_set.labels, &training_params);
// Test.
int hits = 0;
query = nnMakeQueryObject(net, /*num_inputs=*/1);
for (int i = 0; i < test_set.count; ++i) {
const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1);
const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1);
nnQuery(net, query, &test_image);
const int test_label_expected = OneHotDecode(&test_label);
const int test_label_actual = OneHotDecode(nnNetOutputs(query));
if (test_label_actual == test_label_expected) {
++hits;
}
}
const R hit_ratio = (R)hits / (R)test_set.count;
printf("Test images: %d\n", test_set.count);
printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio*100);
success = true;
cleanup:
if (query) {
nnDeleteQueryObject(&query);
}
if (net) {
nnDeleteNet(&net);
}
nnMatrixDel(&train_set.images);
nnMatrixDel(&test_set.images);
if (train_images_file != Z_NULL) {
gzclose(train_images_file);
}
if (train_labels_file != Z_NULL) {
gzclose(train_labels_file);
}
if (test_images_file != Z_NULL) {
gzclose(test_images_file);
}
if (test_labels_file != Z_NULL) {
gzclose(test_labels_file);
}
return success ? 0 : 1;
}

37
src/lib/CMakeLists.txt

@ -0,0 +1,37 @@
cmake_minimum_required(VERSION 3.0)
# Library
add_library(neuralnet
src/matrix.c
src/neuralnet.c
src/train.c)
target_include_directories(neuralnet PUBLIC
include)
target_link_libraries(neuralnet PRIVATE
math # System math library.
random)
target_compile_options(neuralnet PRIVATE -Wall -Wextra)
# Test
add_executable(neuralnet-test
test/matrix_test.c
test/neuralnet_test.c
test/test_main.c
test/train_linear_perceptron_test.c
test/train_linear_perceptron_non_origin_test.c
test/train_sigmoid_test.c
test/train_xor_test.c)
target_link_libraries(neuralnet-test PRIVATE
neuralnet)
# So that we can include header files from the private implementation.
target_include_directories(neuralnet-test PRIVATE
src)
target_compile_options(neuralnet-test PRIVATE -DUNIT_TEST -Wall -Wextra)

111
src/lib/include/neuralnet/matrix.h

@ -0,0 +1,111 @@
#pragma once
#include <neuralnet/types.h>
#include <assert.h>
/// NxM matrix.
typedef struct nnMatrix {
int rows;
int cols;
R* values;
} nnMatrix;
/// Construct a matrix.
nnMatrix nnMatrixMake(int rows, int cols);
/// Delete a matrix and free its internal memory.
void nnMatrixDel(nnMatrix*);
/// Move a matrix.
///
/// |in| is an empty matrix after the move.
/// |out| is a matrix like |in| before the move.
void nnMatrixMove(nnMatrix* in, nnMatrix* out);
/// Deep-copy a matrix.
void nnMatrixCopy(const nnMatrix* in, nnMatrix* out);
/// Write the matrix values into an array in a row-major fashion.
void nnMatrixToArray(const nnMatrix* in, R* out);
/// Write the given row of a matrix into an array.
void nnMatrixRowToArray(const nnMatrix* in, int row, R* out);
/// Copy a column from a source to a target matrix.
void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out);
/// Mutable borrow of a matrix.
nnMatrix nnMatrixBorrow(nnMatrix* in);
/// Mutable borrow of a subrange of rows of a matrix.
nnMatrix nnMatrixBorrowRows(nnMatrix* in, int row_start, int num_rows);
/// Initialize the matrix from an array of values.
///
/// The array must hold values in a row-major fashion.
void nnMatrixInit(nnMatrix*, const R* values);
/// Initialize all matrix values to a given constant.
void nnMatrixInitConstant(nnMatrix*, R value);
/// Multiply two matrices.
void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
/// Matrix multiply-add.
///
/// out = left + (right * scale)
void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out);
/// Matrix multiply-subtract.
///
/// out = left - (right * scale)
void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out);
/// Hadamard product of two matrices.
void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
/// Add two matrices.
void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
/// Subtract two matrices.
void nnMatrixSub(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
/// Adds a row vector to all rows of the matrix.
void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out);
/// Scale a matrix.
void nnMatrixScale(nnMatrix*, R scale);
/// Transpose a matrix.
/// |in| must be different than |out|.
void nnMatrixTranspose(const nnMatrix* in, nnMatrix* out);
/// Threshold the values of a matrix using a greater-than operator.
///
/// out[x,y] = 1 if in[x,y] > threshold else 0
void nnMatrixGt(const nnMatrix* in, R threshold, nnMatrix* out);
/// Return the matrix value at the given row and column.
static inline R nnMatrixAt(const nnMatrix* matrix, int row, int col) {
assert(matrix);
return matrix->values[row * matrix->cols + col];
}
/// Set the matrix value at the given row and column.
static inline void nnMatrixSet(nnMatrix* matrix, int row, int col, R value) {
assert(matrix);
matrix->values[row * matrix->cols + col] = value;
}
/// Return a pointer to the given row in the matrix.
static inline const R* nnMatrixRow(const nnMatrix* matrix, int row) {
assert(matrix);
return &matrix->values[row * matrix->cols];
}
/// Return a mutable pointer to the given row in the matrix.
static inline R* nnMatrixRow_mut(nnMatrix* matrix, int row) {
assert(matrix);
return &matrix->values[row * matrix->cols];
}

64
src/lib/include/neuralnet/neuralnet.h

@ -0,0 +1,64 @@
#pragma once
#include <neuralnet/types.h>
typedef struct nnMatrix nnMatrix;
typedef struct nnNeuralNetwork nnNeuralNetwork;
typedef struct nnQueryObject nnQueryObject;
/// Neuron activation.
typedef enum nnActivation {
nnIdentity,
nnSigmoid,
nnRelu,
} nnActivation;
/// Create a network.
nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations);
/// Delete the network and free its internal memory.
void nnDeleteNet(nnNeuralNetwork**);
/// Set the network's weights.
void nnSetWeights(nnNeuralNetwork*, const R* weights);
/// Set the network's biases.
void nnSetBiases(nnNeuralNetwork*, const R* biases);
/// Query the network.
///
/// |input| is a matrix of inputs, one row per input and as many columns as the
/// input's dimension.
///
/// The query object's output matrix (see nnQueryOutputs()) is a matrix of
/// outputs, one row per output and as many columns as the output's dimension.
void nnQuery(const nnNeuralNetwork*, nnQueryObject*, const nnMatrix* input);
/// Query the network, array version.
void nnQueryArray(const nnNeuralNetwork*, nnQueryObject*, const R* input, R* output);
/// Create a query object.
///
/// The query object holds all the internal memory required to query a network.
/// Query objects allocate all memory up front so that network queries can run
/// without additional memory allocation.
nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int num_inputs);
/// Delete the query object and free its internal memory.
void nnDeleteQueryObject(nnQueryObject**);
/// Return the outputs of the query.
const nnMatrix* nnNetOutputs(const nnQueryObject*);
/// Return the network's input size.
int nnNetInputSize(const nnNeuralNetwork*);
/// Return the network's output size.
int nnNetOutputSize(const nnNeuralNetwork*);
/// Return the layer's input size.
int nnLayerInputSize(const nnMatrix* weights);
/// Return the layer's output size.
int nnLayerOutputSize(const nnMatrix* weights);

42
src/lib/include/neuralnet/train.h

@ -0,0 +1,42 @@
#pragma once
#include <neuralnet/neuralnet.h>
#include <stdbool.h>
#include <stdint.h>
typedef struct nnMatrix nnMatrix;
/// Weight initialization strategy.
///
/// Note that regardless of strategy, a layer's weights are scaled by the
/// layer's size. This is to avoid saturation when, e.g., using a sigmoid
/// activation with many inputs. Thus, a (0,1) initialization is really
/// (0,scale), for example.
typedef enum nnWeightInitStrategy {
nnWeightInit01, // (0,1) range.
nnWeightInit11, // (-1,+1) range.
nnWeightInitNormal, // Normal distribution.
} nnWeightInitStrategy;
/// Network training parameters.
typedef struct nnTrainingParams {
R learning_rate;
int max_iterations;
uint64_t seed;
nnWeightInitStrategy weight_init;
bool debug;
} nnTrainingParams;
/// Train the network.
///
/// |inputs| is a matrix of inputs, one row per input and as many columns as
/// the input's dimension.
///
/// |targets| is a matrix of targets, one row per target and as many columns as
/// the target's dimension.
void nnTrain(
nnNeuralNetwork*,
const nnMatrix* inputs,
const nnMatrix* targets,
const nnTrainingParams*);

3
src/lib/include/neuralnet/types.h

@ -0,0 +1,3 @@
#pragma once
typedef double R;

21
src/lib/src/activation.h

@ -0,0 +1,21 @@
#pragma once
#include <neuralnet/types.h>
#include <math.h>
static inline R sigmoid(R x) {
return 1. / (1. + exp(-x));
}
static inline R relu(R x) {
return fmax(0, x);
}
#define NN_MAP_ARRAY(f, in, out, size) \
for (int i = 0; i < size; ++i) { \
out[i] = f(in[i]); \
}
#define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size)
#define relu_array(in, out, size) NN_MAP_ARRAY(relu, in, out, size)

298
src/lib/src/matrix.c

@ -0,0 +1,298 @@
#include <neuralnet/matrix.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
nnMatrix nnMatrixMake(int rows, int cols) {
R* values = calloc(rows * cols, sizeof(R));
assert(values != 0);
return (nnMatrix) {
.rows = rows,
.cols = cols,
.values = values,
};
}
void nnMatrixDel(nnMatrix* matrix) {
assert(matrix != 0);
if (matrix->values != 0) {
free(matrix->values);
matrix->values = 0;
matrix->rows = 0;
matrix->cols = 0;
}
}
void nnMatrixMove(nnMatrix* in, nnMatrix* out) {
assert(in);
assert(out);
out->rows = in->rows;
out->cols = in->cols;
out->values = in->values;
in->rows = 0;
in->cols = 0;
in->values = 0;
}
void nnMatrixCopy(const nnMatrix* in, nnMatrix* out) {
assert(in);
assert(out);
assert(in->rows == out->rows);
assert(in->cols == out->cols);
const R* in_value = in->values;
R* out_value = out->values;
for (int i = 0; i < in->rows * in->cols; ++i) {
*out_value++ = *in_value++;
}
}
void nnMatrixToArray(const nnMatrix* in, R* out) {
assert(in);
assert(out);
const R* values = in->values;
for (int i = 0; i < in->rows * in->cols; ++i) {
*out++ = *values++;
}
}
void nnMatrixRowToArray(const nnMatrix* in, int row, R* out) {
assert(in);
assert(out);
const R* values = in->values + row * in->cols;
for (int i = 0; i < in->cols; ++i) {
*out++ = *values++;
}
}
void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out) {
assert(in);
assert(out);
assert(in->rows == out->rows);
assert(col_in < in->cols);
assert(col_out < out->cols);
for (int row = 0; row < in->rows; ++row) {
nnMatrixSet(out, row, col_out, nnMatrixAt(in, row, col_in));
}
}
nnMatrix nnMatrixBorrow(nnMatrix* in) {
assert(in);
nnMatrix out;
out.rows = in->rows;
out.cols = in->cols;
out.values = in->values;
return out;
}
nnMatrix nnMatrixBorrowRows(nnMatrix* in, int row_start, int num_rows) {
assert(in);
assert(row_start < in->rows);
assert(row_start + num_rows <= in->rows);
nnMatrix out;
out.rows = num_rows;
out.cols = in->cols;
out.values = nnMatrixRow_mut(in, row_start);
return out;
}
void nnMatrixInit(nnMatrix* matrix, const R* values) {
assert(matrix);
assert(values);
memcpy(matrix->values, values, matrix->rows * matrix->cols * sizeof(R));
}
void nnMatrixInitConstant(nnMatrix* matrix, R value) {
assert(matrix);
for (int i = 0; i < matrix->rows * matrix->cols; ++i) {
matrix->values[i] = value;
}
}
void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
assert(left != 0);
assert(right != 0);
assert(out != 0);
assert(out != left);
assert(out != right);
assert(left->cols == right->rows);
assert(out->rows == left->rows);
assert(out->cols == right->cols);
R* out_value = out->values;
for (int i = 0; i < left->rows; ++i) {
const R* left_row = &left->values[i * left->cols];
for (int j = 0; j < right->cols; ++j) {
const R* right_col = &right->values[j];
*out_value = 0;
// Vector dot product.
for (int k = 0; k < left->cols; ++k) {
*out_value += left_row[k] * right_col[0];
right_col += right->cols; // Next row in the column.
}
out_value++;
}
}
}
void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) {
assert(left);
assert(right);
assert(out);
assert(left->rows == right->rows);
assert(left->cols == right->cols);
assert(left->rows == out->rows);
assert(left->cols == out->cols);
const R* left_value = left->values;
const R* right_value = right->values;
R* out_value = out->values;
for (int i = 0; i < left->rows * left->cols; ++i) {
*out_value++ = *left_value++ + *right_value++ * scale;
}
}
void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) {
assert(left);
assert(right);
assert(out);
assert(left->rows == right->rows);
assert(left->cols == right->cols);
assert(left->rows == out->rows);
assert(left->cols == out->cols);
const R* left_value = left->values;
const R* right_value = right->values;
R* out_value = out->values;
for (int i = 0; i < left->rows * left->cols; ++i) {
*out_value++ = *left_value++ - *right_value++ * scale;
}
}
void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
assert(left != 0);
assert(right != 0);
assert(out != 0);
assert(left->rows == right->rows);
assert(left->cols == right->cols);
assert(left->rows == out->rows);
assert(left->cols == out->cols);
R* left_value = left->values;
R* right_value = right->values;
R* out_value = out->values;
for (int i = 0; i < left->rows * left->cols; ++i) {
*out_value++ = *left_value++ * *right_value++;
}
}
void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
assert(left);
assert(right);
assert(out);
assert(left->rows == right->rows);
assert(left->cols == right->cols);
assert(left->rows == out->rows);
assert(left->cols == out->cols);
const R* left_value = left->values;
const R* right_value = right->values;
R* out_value = out->values;
for (int i = 0; i < left->rows * left->cols; ++i) {
*out_value++ = *left_value++ + *right_value++;
}
}
void nnMatrixSub(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
assert(left);
assert(right);
assert(out);
assert(left->rows == right->rows);
assert(left->cols == right->cols);
assert(left->rows == out->rows);
assert(left->cols == out->cols);
const R* left_value = left->values;
const R* right_value = right->values;
R* out_value = out->values;
for (int i = 0; i < left->rows * left->cols; ++i) {
*out_value++ = *left_value++ - *right_value++;
}
}
void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out) {
assert(matrix);
assert(row);
assert(out);
assert(row->rows == 1);
assert(matrix->cols == row->cols);
assert(matrix->rows == out->rows);
assert(matrix->cols == out->cols);
const R* matrix_value = matrix->values;
R* out_value = out->values;
for (int i = 0; i < matrix->rows; ++i) {
const R* row_value = row->values;
for (int j = 0; j < row->cols; ++j) {
*out_value++ = *matrix_value++ + *row_value++;
}
}
}
void nnMatrixScale(nnMatrix* matrix, R scale) {
assert(matrix);
R* value = matrix->values;
for (int i = 0; i < matrix->rows * matrix->cols; ++i) {
*value++ *= scale;
}
}
void nnMatrixTranspose(const nnMatrix* in, nnMatrix* out) {
assert(in);
assert(out);
assert(in != out);
assert(in->rows == out->cols);
assert(in->cols == out->rows);
for (int i = 0; i < in->rows; ++i) {
for (int j = 0; j < in->cols; ++j) {
nnMatrixSet(out, j, i, nnMatrixAt(in, i, j));
}
}
}
void nnMatrixGt(const nnMatrix* in, R threshold, nnMatrix* out) {
assert(in);
assert(out);
assert(in->rows == out->rows);
assert(in->cols == out->cols);
const R* in_value = in->values;
R* out_value = out->values;
for (int i = 0; i < in->rows * in->cols; ++i) {
*out_value++ = (*in_value++) > threshold ? 1 : 0;
}
}

228
src/lib/src/neuralnet.c

@ -0,0 +1,228 @@
#include <neuralnet/neuralnet.h>
#include <neuralnet/matrix.h>
#include "activation.h"
#include "neuralnet_impl.h"
#include <assert.h>
#include <stdlib.h>
nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations) {
assert(num_layers > 0);
assert(layer_sizes);
assert(activations);
nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork));
if (net == 0) {
return 0;
}
net->num_layers = num_layers;
net->weights = calloc(num_layers, sizeof(nnMatrix));
net->biases = calloc(num_layers, sizeof(nnMatrix));
net->activations = calloc(num_layers, sizeof(nnActivation));
if ( (net->weights == 0) || (net->biases == 0) || (net->activations == 0) ) {
nnDeleteNet(&net);
return 0;
}
for (int l = 0; l < num_layers; ++l) {
// layer_sizes = { input layer size, first hidden layer size, ...}
const int layer_input_size = layer_sizes[l];
const int layer_output_size = layer_sizes[l+1];
// We store the transpose of the weight matrix as written in textbooks.
// Our vectors are row vectors and the matrices row-major.
const int rows = layer_input_size;
const int cols = layer_output_size;
net->weights[l] = nnMatrixMake(rows, cols);
net->biases[l] = nnMatrixMake(1, cols);
net->activations[l] = activations[l];
}
return net;
}
void nnDeleteNet(nnNeuralNetwork** net) {
if ( (!net) || (!(*net)) ) {
return;
}
if ((*net)->weights != 0) {
for (int l = 0; l < (*net)->num_layers; ++l) {
nnMatrixDel(&(*net)->weights[l]);
}
free((*net)->weights);
(*net)->weights = 0;
}
if ((*net)->biases != 0) {
for (int l = 0; l < (*net)->num_layers; ++l) {
nnMatrixDel(&(*net)->biases[l]);
}
free((*net)->biases);
(*net)->biases = 0;
}
if ((*net)->activations) {
free((*net)->activations);
(*net)->activations = 0;
}
free(*net);
*net = 0;
}
void nnSetWeights(nnNeuralNetwork* net, const R* weights) {
assert(net);
assert(weights);
for (int l = 0; l < net->num_layers; ++l) {
nnMatrix* layer_weights = &net->weights[l];
R* layer_values = layer_weights->values;
for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) {
*layer_values++ = *weights++;
}
}
}
void nnSetBiases(nnNeuralNetwork* net, const R* biases) {
assert(net);
assert(biases);
for (int l = 0; l < net->num_layers; ++l) {
nnMatrix* layer_biases = &net->biases[l];
R* layer_values = layer_biases->values;
for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) {
*layer_values++ = *biases++;
}
}
}
void nnQuery(const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* input) {
assert(net);
assert(query);
assert(input);
assert(net->num_layers == query->num_layers);
assert(input->rows <= query->network_outputs->rows);
assert(input->cols == nnNetInputSize(net));
for (int i = 0; i < input->rows; ++i) {
// Not mutating the input, but we need the cast to borrow.
nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1);
for (int l = 0; l < net->num_layers; ++l) {
const nnMatrix* layer_weights = &net->weights[l];
const nnMatrix* layer_biases = &net->biases[l];
// Y^T = (W*X)^T = X^T*W^T
//
// TODO: If we had a row-row matrix multiplication, we could compute:
// Y^T = W ** X^T
// The row-row multiplication could be more cache-friendly. We just need
// to store W as is, without transposing.
// We could also rewrite the original Mul function to go row x row,
// decomposing the multiplication. Preserving the original meaning of Mul
// makes everything clearer.
nnMatrix output_vector = nnMatrixBorrowRows(&query->layer_outputs[l], i, 1);
nnMatrixMul(&input_vector, layer_weights, &output_vector);
nnMatrixAddRow(&output_vector, layer_biases, &output_vector);
switch (net->activations[l]) {
case nnIdentity:
break; // Nothing to do for the identity function.
case nnSigmoid:
sigmoid_array(output_vector.values, output_vector.values, output_vector.cols);
break;
case nnRelu:
relu_array(output_vector.values, output_vector.values, output_vector.cols);
break;
default:
assert(0);
}
input_vector = output_vector; // Borrow.
}
}
}
void nnQueryArray(const nnNeuralNetwork* net, nnQueryObject* query, const R* input, R* output) {
assert(net);
assert(query);
assert(input);
assert(output);
assert(net->num_layers > 0);
nnMatrix input_vector = nnMatrixMake(net->weights[0].cols, 1);
nnMatrixInit(&input_vector, input);
nnQuery(net, query, &input_vector);
nnMatrixRowToArray(query->network_outputs, 0, output);
}
nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) {
assert(net);
assert(num_inputs > 0);
assert(net->num_layers > 0);
nnQueryObject* query = calloc(1, sizeof(nnQueryObject));
if (!query) {
return 0;
}
query->num_layers = net->num_layers;
// Allocate the intermediate layer output matrices.
query->layer_outputs = calloc(net->num_layers, sizeof(nnMatrix));
if (!query->layer_outputs) {
free(query);
return 0;
}
for (int l = 0; l < net->num_layers; ++l) {
const nnMatrix* layer_weights = &net->weights[l];
const int layer_output_size = nnLayerOutputSize(layer_weights);
query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size);
}
query->network_outputs = &query->layer_outputs[net->num_layers - 1];
return query;
}
void nnDeleteQueryObject(nnQueryObject** query) {
if ( (!query) || (!(*query)) ) {
return;
}
if ((*query)->layer_outputs != 0) {
for (int l = 0; l < (*query)->num_layers; ++l) {
nnMatrixDel(&(*query)->layer_outputs[l]);
}
}
free((*query)->layer_outputs);
free(*query);
*query = 0;
}
const nnMatrix* nnNetOutputs(const nnQueryObject* query) {
assert(query);
return query->network_outputs;
}
int nnNetInputSize(const nnNeuralNetwork* net) {
assert(net);
assert(net->num_layers > 0);
return net->weights[0].rows;
}
int nnNetOutputSize(const nnNeuralNetwork* net) {
assert(net);
assert(net->num_layers > 0);
return net->weights[net->num_layers - 1].cols;
}
int nnLayerInputSize(const nnMatrix* weights) {
assert(weights);
return weights->rows;
}
int nnLayerOutputSize(const nnMatrix* weights) {
assert(weights);
return weights->cols;
}

36
src/lib/src/neuralnet_impl.h

@ -0,0 +1,36 @@
#pragma once
#include <neuralnet/matrix.h>
/// Neural network object.
///
/// We store the transposes of the weight matrices so that we can do forward
/// passes with a minimal amount of work. That is, if in paper we write:
///
/// [w11 w21]
/// [w12 w22]
///
/// then the weight matrix in memory is stored as the following array:
///
/// w11 w12 w21 w22
typedef struct nnNeuralNetwork {
int num_layers; // Number of non-input layers (hidden + output).
nnMatrix* weights; // One matrix per non-input layer.
nnMatrix* biases; // One vector per non-input layer.
nnActivation* activations; // One per non-input layer.
} nnNeuralNetwork;
/// A query object that holds all the memory necessary to query a network.
///
/// |layer_outputs| is an array of matrices of intermediate layer outputs. There
/// is one matrix per intermediate layer. Each matrix holds the layer's output,
/// with one row per input, and as many columns as the layer's output size (the
/// output vector is transposed.)
///
/// |network_outputs| points to the last output matrix in |layer_outputs| for
/// convenience.
typedef struct nnQueryObject {
int num_layers;
nnMatrix* layer_outputs; // Output matrices, one output per layer.
nnMatrix* network_outputs; // Points to the last output matrix.
} nnTrainingQueryObject;

346
src/lib/src/train.c

@ -0,0 +1,346 @@
#include <neuralnet/train.h>
#include <neuralnet/matrix.h>
#include "neuralnet_impl.h"
#include <random/mt19937-64.h>
#include <random/normal.h>
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define LOGD printf
// If debug mode is requested, we will show progress every this many iterations.
static const int PROGRESS_THRESHOLD = 5; // %
/// Computes the total MSE from the output error matrix.
R ComputeMSE(const nnMatrix* errors) {
R sum_sq = 0;
const int N = errors->rows * errors->cols;
const R* value = errors->values;
for (int i = 0; i < N; ++i) {
sum_sq += *value * *value;
value++;
}
return sum_sq / (R)N;
}
/// Holds the bits required to compute a sigmoid gradient.
typedef struct nnSigmoidGradientElements {
nnMatrix ones; // A vector of just ones, same size as the layer.
} nnSigmoidGradientElements;
/// Holds the various elements required to compute gradients. These depend on
/// what activation function are used, so they'll potentially be different for
/// each layer. A data type is defined for these because we allocate all the
/// required memory up front before entering the training loop.
typedef struct nnGradientElements {
nnActivation type;
// Gradient vector, same size as the layer.
// This will contain the gradient expression except for the output value of
// the previous layer.
nnMatrix gradient;
union {
nnSigmoidGradientElements sigmoid;
};
} nnGradientElements;
// Initialize the network's weights randomly and set their biases to 0.
void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy strategy) {
assert(net);
mt19937_64 rng = mt19937_64_make();
mt19937_64_init(&rng, seed);
for (int l = 0; l < net->num_layers; ++l) {
nnMatrix* weights = &net->weights[l];
nnMatrix* biases = &net->biases[l];
const R layer_size = (R)nnLayerInputSize(weights);
const R scale = 1. / layer_size;
const R stdev = 1. / sqrt((R)layer_size);
const R sigma = stdev * stdev;
R* value = weights->values;
for (int k = 0; k < weights->rows * weights->cols; ++k) {
switch (strategy) {
case nnWeightInit01: {
const R x01 = mt19937_64_gen_real3(&rng); // (0, +1) interval.
*value++ = scale * x01;
break;
}
case nnWeightInit11: {
const R x11 = mt19937_64_gen_real4(&rng); // (-1, +1) interval.
*value++ = scale * x11;
break;
}
case nnWeightInitNormal:
// Using initialization with a normal distribution of standard
// deviation 1 / sqrt(num_layer_weights) to prevent saturation when
// multiplying inputs.
const R u01 = mt19937_64_gen_real3(&rng); // (0, +1) interval.
const R v01 = mt19937_64_gen_real3(&rng); // (0, +1) interval.
R z0, z1;
normal2(u01, v01, &z0, &z1);
z0 = normal_transform(z0, /*mu=*/0, sigma);
z1 = normal_transform(z1, /*mu=*/0, sigma);
*value++ = z0;
if (k < weights->rows * weights->cols - 1) {
*value++ = z1;
++k;
}
break;
default:
assert(false);
}
}
// Initialize biases.
// 0 is used so that functions originally go through the origin.
value = biases->values;
for (int k = 0; k < biases->rows * biases->cols; ++k, ++value) {
*value = 0;
}
}
}
// |inputs| has one row vector per sample.
// |targets| has one row vector per sample.
//
// For now, each iteration trains with one sample (row) at a time.
void nnTrain(
nnNeuralNetwork* net,
const nnMatrix* inputs,
const nnMatrix* targets,
const nnTrainingParams* params) {
assert(net);
assert(inputs);
assert(targets);
assert(params);
assert(nnNetOutputSize(net) == targets->cols);
assert(net->num_layers > 0);
// Allocate error vectors to hold the backpropagated error values.
// For now, these are one row vector per layer, meaning that we will train
// with one sample at a time.
nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix));
// Allocate the weight transpose matrices up front for backpropagation.
nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix));
// Allocate the weight delta matrices.
nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix));
// Allocate the data structures required to compute gradients.
// This depends on each layer's activation type.
nnGradientElements* gradient_elems = calloc(net->num_layers, sizeof(nnGradientElements));
// Allocate the output transpose vectors for weight delta calculation.
// This is one column vector per layer.
nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix));
assert(errors != 0);
assert(weights_T != 0);
assert(weight_deltas != 0);
assert(gradient_elems);
assert(outputs_T);
for (int l = 0; l < net->num_layers; ++l) {
const nnMatrix* layer_weights = &net->weights[l];
const int layer_output_size = net->weights[l].cols;
const nnActivation activation = net->activations[l];
errors[l] = nnMatrixMake(1, layer_weights->cols);
weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows);
nnMatrixTranspose(layer_weights, &weights_T[l]);
weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols);
outputs_T[l] = nnMatrixMake(layer_output_size, 1);
// Allocate the gradient elements and vectors for weight delta calculation.
nnGradientElements* elems = &gradient_elems[l];
elems->type = activation;
switch (activation) {
case nnIdentity:
break; // Gradient vector will be borrowed, no need to allocate.
case nnSigmoid:
elems->gradient = nnMatrixMake(1, layer_output_size);
// Allocate the 1s vectors.
elems->sigmoid.ones = nnMatrixMake(1, layer_output_size);