CUDA
C/C++    Fortran   

examples/machine_learning/neuralnetwork.cpp

#include <arrayfire.h>
#include <stdio.h>
#include "ppm_utils.h"

using namespace af;

int num_input_nodes  = 0; // image pixels
int num_hidden_nodes = 256;
int num_output_nodes = 2; // 1,0

int training_iterations = 200;
double learning_rate = 0.9;

array input_weights;  //Convert input to hidden nodes (hidden nodes, input nodes)
array output_weights; //Convert hidden nodes to output nodes (output nodes, hidden nodes)

//Get output nodes
array forward_propogate(const array& input, array& hidden) {
    //Get input for the hidden nodes
    array hiddenNodesValues = matmul(input_weights, input) / num_input_nodes;

    //apply the activation function (Logistic function)
    hiddenNodesValues = 1 / (1 + pow(2.71828, -hiddenNodesValues, true));

    //Get input for output nodes
    array outputNodesValues = matmul(output_weights, hiddenNodesValues) / num_hidden_nodes;

    //apply the activation function (Logistic function)
    outputNodesValues = 1 / (1 + pow(2.71828, -outputNodesValues, true));

    hidden = hiddenNodesValues;

    return outputNodesValues;
}

//Determine error of training example and correct
void backward_propogate(const array& input, const array& hidden, const array& result, const array& expectation, array& gradient_input_weights, array& gradient_output_weights) {
    //Determine error for each layer
    array output_error = expectation - result;
    array hidden_nodes_error = matmul(output_weights.T(), output_error);
    array input_nodes_error = matmul(input_weights.T(), hidden_nodes_error);

    //get derivatives
    array deriv_hidden = hidden * (1 - hidden);
    array deriv_output = result * (1 - result);

    //Correct weights using gradient
    gradient_input_weights = learning_rate
                             * tile(hidden_nodes_error, 1, num_input_nodes)
                             * tile(deriv_hidden, 1, num_input_nodes)
                             * tile(input.T(), num_hidden_nodes);
    gradient_output_weights = learning_rate
                              * tile(output_error, 1, num_hidden_nodes)
                              * tile(deriv_output, 1, num_hidden_nodes)
                              * tile(hidden.T(), num_output_nodes);
}

void train(array samples, array expectations) {
    for (unsigned int i = 0; i < samples.dims()[0]; i++) {
        //Pull out sample for use (Column vectors)
        array input = samples(i, span).T();
        array expectation = expectations.col(i);

        array hidden;

        array result = forward_propogate(input, hidden);
        array gradient_input_weights, gradient_output_weights;

        backward_propogate(input, hidden, result, expectation, gradient_input_weights, gradient_output_weights);

        input_weights += gradient_input_weights;
        output_weights += gradient_output_weights;
    }
}

void neural_network_demo(bool console) {
    //Load samples and expected results
    num_input_nodes = 128 * 128; // image pixels

    array samples = join(0,
                         flat(load_gray_ppm("images/AMD1.ppm")).T(),
                         flat(load_gray_ppm("images/Intel1.ppm")).T(),
                         flat(load_gray_ppm("images/AMD2.ppm")).T());
    samples = join(0,
                   samples,
                   flat(load_gray_ppm("images/Intel2.ppm")).T(),
                   flat(load_gray_ppm("images/AMD3.ppm")).T(),
                   flat(load_gray_ppm("images/Intel3.ppm")).T());
    //NOTE: Grayscale images should be 0-1 for this

    float expect[] = {1, 0, 0, 1};
    array expectations(2, 2, expect);
    expectations = tile(expectations, 1, 3);

    //Initialize neural network to something
    input_weights = randu(num_hidden_nodes, num_input_nodes);
    output_weights = randu(num_output_nodes, num_hidden_nodes);

    printf("Training %d times\n", training_iterations);
    timer::start();

    //train many times
    for (int i = 0; i < training_iterations; i++) {
        printf(" iteration %d \n", i);
        train(samples, expectations);
    }

    printf("Done Training\n");

    printf("\nTook %g seconds per sample\n\n", timer::stop() / training_iterations / samples.col(0).elements());

    //Load comparision and test
    array AMDtest = load_gray_ppm("images/ModifiedAMD.ppm");
    array Inteltest = load_gray_ppm("images/ModifiedIntel.ppm");

    array garbage;
    array AMDresult = forward_propogate(flat(AMDtest) , garbage);
    array Intelresult = forward_propogate(flat(Inteltest) , garbage);

    float AMDresultAMD = AMDresult(0).scalar<float>();
    float AMDresultIntel = AMDresult(1).scalar<float>();
    float IntelresultAMD = Intelresult(0).scalar<float>();
    float IntelresultIntel = Intelresult(1).scalar<float>();

    printf("AMD Image: %f AMD, %f Intel\nIntel Image: %f AMD, %f Intel\n", AMDresultAMD, AMDresultIntel, IntelresultAMD, IntelresultIntel);

    if (!console) {
        char AMDTitle[50];
        char IntelTitle[50];

        sprintf(AMDTitle, "%f %s %f %s", AMDresultAMD, "AMD", AMDresultIntel, "Intel");
        sprintf(IntelTitle, "%f %s %f %s", IntelresultAMD, "AMD", IntelresultIntel, "Intel");

        fig("color","gray");
        fig("sub",2,1,1); image(AMDtest);       fig("title",AMDTitle);
        fig("sub",2,1,2); image(Inteltest);     fig("title",IntelTitle);

        printf("Press [enter]...\n");
        getchar();
    }
}

int main(int argc, char** argv) {
    bool console = false;
    if (argc > 2 || (argc == 2 && argv[1][0] != '-')) {
        printf("usage: %s [-]\n", argv[0]);
        return -1;
    } else if (argc == 2 && argv[1][0] == '-') {
        console = true;
    }

    try {
        printf("** ArrayFire Neural Network Demo **\n\n");
        neural_network_demo(console);
    } catch (af::exception& e) {
        fprintf(stderr, "%s\n", e.what());
    }

#ifdef WIN32
    if (!(argc == 2 && argv[1][0] == '-')) {
        printf("Hit [enter]...");
        getchar();
    }
#endif

    return 0;
}