Tensorflow bindings

Basic XOR example

The following example demonstrates the Tensorflow bindings using the XOR function as example. The input features and the desired output labels are provided using placeholder values. The network is trained using gradient descent. Finally the network is demonstrated using the input features.

(use-modules (aiscm core) (aiscm tensorflow) (ice-9 format))

(define features (arr <float> (0 0) (0 1) (1 0) (1 1)))
(define labels (arr <float> (0) (1) (1) (0)))

(define s (make-session))

(define x (tf-placeholder #:dtype <float> #:shape '(-1 2) #:name "x"))
(define y (tf-placeholder #:dtype <float> #:shape '(-1 1) #:name "y"))
(define m1 (tf-variable #:dtype <float> #:shape '(2 2) #:name "m1"))
(run s '() (tf-assign m1 (tf-truncated-normal (tf-shape m1) #:dtype <float>)))
(define b1 (tf-variable #:dtype <float> #:shape '(2) #:name "b1"))
(run s '() (tf-assign b1 (fill <float> '(2) 0.0)))
(define h1 (tf-tanh (tf-add (tf-mat-mul x m1) b1)))

(define m2 (tf-variable #:dtype <float> #:shape '(2 1) #:name "m2"))
(run s '() (tf-assign m2 (tf-truncated-normal (tf-shape m2) #:dtype <float>)))
(define b2 (tf-variable #:dtype <float> #:shape '(1) #:name "b2"))
(run s '() (tf-assign b2 (arr <float> 0)))
(define ys (tf-sigmoid (tf-add (tf-mat-mul h1 m2) b2) #:name "ys"))

(define one (tf-cast 1 #:DstT <float>))
(define cost (tf-neg (tf-mean (tf-add (tf-mul y (tf-log ys)) (tf-mul (tf-sub one y) (tf-log (tf-sub one ys)))) (arr <int> 0 1))))

(define vars (list m1 b1 m2 b2))
(define gradients (tf-add-gradient cost vars))

(define alpha (tf-cast 1.0 #:DstT <float>))
(define step (map (lambda (v g) (tf-assign v (tf-sub v (tf-mul g alpha)))) vars gradients))

(for-each (lambda _ (run s (list (cons x features) (cons y labels)) step)) (iota 250))
(format #t "~a~&" (run s (list (cons x features)) ys))

MNIST example

The MNIST dataset is a benchmark dataset for handwritten digit recognition.

mnist.jpg

The following example is a convolutional neural network achieving an error rate below 3%.

(use-modules (oop goops)
             (ice-9 binary-ports)
             (ice-9 format)
             (srfi srfi-1)
             (rnrs bytevectors)
             (system foreign)
             (aiscm core)
             (aiscm xorg)
             (aiscm util)
             (aiscm tensorflow))

(define (read-images file-name)
  (let* [(f     (open-file file-name "rb"))
         (magic (bytevector-u32-ref (get-bytevector-n f 4) 0 (endianness big)))
         (n     (bytevector-u32-ref (get-bytevector-n f 4) 0 (endianness big)))
         (h     (bytevector-u32-ref (get-bytevector-n f 4) 0 (endianness big)))
         (w     (bytevector-u32-ref (get-bytevector-n f 4) 0 (endianness big)))]
    (if (not (eqv? magic 2051)) (error "Images file has wrong magic number"))
    (let [(bv (get-bytevector-n f (* n h w)))]
      (make (multiarray <ubyte> 3) #:memory (bytevector->pointer bv) #:shape (list n h w)))))

(define (read-labels file-name)
  (let* [(f     (open-file file-name "rb"))
         (magic (bytevector-u32-ref (get-bytevector-n f 4) 0 (endianness big)))
         (n     (bytevector-u32-ref (get-bytevector-n f 4) 0 (endianness big)))]
    (if (not (eqv? magic 2049)) (error "Label file has wrong magic number"))
    (let [(bv (get-bytevector-n f n))]
      (make (multiarray <ubyte> 1) #:memory (bytevector->pointer bv) #:shape (list n)))))

; Load MNIST data set available at http://yann.lecun.com/exdb/mnist/
(define images (read-images "train-images-idx3-ubyte"))
(define labels (read-labels "train-labels-idx1-ubyte"))
(define n (car (shape images)))

; Create Tensorflow session
(define s (make-session))

; Define placeholders for features (images) and labels
(define x (tf-placeholder #:dtype <ubyte> #:shape '(-1 28 28)))
(define y (tf-placeholder #:dtype <ubyte> #:shape '(-1)))

; Determine parameters for feature scaling
(define (sqr x) (* x x))
(define count (apply * (shape images)))
(define average (exact->inexact (/ (sum images) count)))
(define stddev (sqrt (/ (sum (sqr (- images average))) count)))

; Scale features and reshape to 4D array
(define r1 (tf-reshape (tf-mul (/ 1 stddev) (tf-sub (tf-cast x #:DstT <double>) average)) (arr <int> -1 28 28 1)))

; First convolutional layer with max-pooling and ReLU activation function.
(define k1 (tf-variable #:dtype <double> #:shape '(3 3 1 4)))
(define b1 (tf-variable #:dtype <double> #:shape '(4)))
(define c1 (tf-add (tf-conv2d r1 k1 #:strides '(1 1 1 1) #:padding 'VALID) b1))
(define p1 (tf-relu (tf-max-pool c1 #:strides '(1 2 2 1) #:ksize '(1 2 2 1) #:padding 'VALID)))

; Second convolutional layer with max-pooling and ReLU activation function.
(define k2 (tf-variable #:dtype <double> #:shape '(3 3 4 16)))
(define b2 (tf-variable #:dtype <double> #:shape '(16)))
(define c2 (tf-add (tf-conv2d p1 k2 #:strides '(1 1 1 1) #:padding 'VALID) b2))
(define p2 (tf-relu (tf-max-pool c2 #:strides '(1 2 2 1) #:ksize '(1 2 2 1) #:padding 'VALID)))

; Reshape to 2D array
(define d (* 5 5 16))
(define r2 (tf-reshape p2 (to-array <int> (list -1 d))))

; First fully connected layer with bias units and ReLU activation function.
(define m3 (tf-variable #:dtype <double> #:shape (list d 40)))
(define b3 (tf-variable #:dtype <double> #:shape '(40)))
(define l3 (tf-relu (tf-add (tf-mat-mul r2 m3) b3)))

; Second fully connected layer with bias units and softmax activation function.
(define m4 (tf-variable #:dtype <double> #:shape '(40 10)))
(define b4 (tf-variable #:dtype <double> #:shape '(10)))
(define l (tf-softmax (tf-add (tf-mat-mul l3 m4) b4)))

; Classification result of neural network.
(define prediction (tf-arg-max l 1 #:name "prediction"))

; Random initialization of network parameters
(define initializers
  (list (tf-assign k1 (tf-mul (sqrt (/ 2 (* 3 3))) (tf-truncated-normal (arr <int> 3 3 1 4) #:dtype <double>)))
        (tf-assign b1 (fill <double> '(4) 0.0))
        (tf-assign k2 (tf-mul (sqrt (/ 2 (* 3 3 4))) (tf-truncated-normal (arr <int> 3 3 4 16) #:dtype <double>)))
        (tf-assign b2 (fill <double> '(16) 0.0))
        (tf-assign m3 (tf-mul (sqrt (/ 2 d)) (tf-truncated-normal (to-array <int> (list d 40)) #:dtype <double>)))
        (tf-assign b3 (fill <double> '(40) 0.0))
        (tf-assign m4 (tf-mul (/ 2 (sqrt 40)) (tf-truncated-normal (arr <int> 40 10) #:dtype <double>)))
        (tf-assign b4 (fill <double> '(10) 0.0))))
(run s '() initializers)

; List of all network parameters
(define vars (list k1 k2 m3 b3 m4 b4))

; Logistic loss function
(define yh (tf-one-hot y 10 1.0 0.0))
(define (safe-log x) (tf-log (tf-maximum x 1e-10)))
(define (invert x) (tf-sub 1.0 x))
(define loss (tf-neg (tf-mean (tf-add (tf-mul yh (safe-log l)) (tf-mul (invert yh) (safe-log (invert l)))) (arr <int> 0 1))))

; Regularization term
(define regularization (tf-add (tf-mean (tf-square (tf-abs m3)) (arr <int> 0 1)) (tf-mean (tf-square (tf-abs m4)) (arr <int> 0 1))))

; Overall cost
(define la 0.02)
(define cost (tf-add loss (tf-mul la regularization)))

; Implement gradient descent step
(define gradients (tf-add-gradient cost vars))
(define alpha 0.4)
(define step (map (lambda (v g) (tf-assign v (tf-sub v (tf-mul g alpha)))) vars gradients))

; Perform gradient descent
(define j 0.0)
(for-each
  (lambda (epoch)
    (for-each
      (lambda (i)
        (let* [(range (cons i (+ i 50)))
               (batch (list (cons x (unroll (get images range))) (cons y (get labels range))))
               (js    (run s batch cost))]
          (set! j (+ (* 0.99 j) (* 0.01 js)))
          (format #t "\repoch ~2d, ~5d/~5d: ~6,4f" epoch i n j)
          (run s batch step)))
      (iota (/ n 50) 0 50)))
  (iota 3))
(format #t "~&")

; Load MNIST test data.
(define test-images (read-images "t10k-images-idx3-ubyte"))
(define test-labels (read-labels "t10k-labels-idx1-ubyte"))
(define n-test (car (shape test-images)))

; Display cost function result for (part of) training and test data
(define j (run s (list (cons x (unroll (get images '(0 . 10000)))) (cons y (get labels '(0 . 10000)))) loss))
(define jt (run s (list (cons x test-images) (cons y test-labels)) loss))
(format #t "train: ~6,4f; test: ~6,4f~&" j jt)

; Determine error rate
(define predicted (run s (list (cons x test-images)) prediction))
(define n-correct (sum (where (eq predicted test-labels) 1 0)))
(format #t "error rate: ~6,4f~&" (- 1.0 (/ n-correct n-test)))

; Display individual results
(define time (clock))
(define i -1)
(show
  (lambda (dsp)
    (set! i (1+ i))
    (let* [(image (get test-images (modulo i 10000)))
           (pred (get (run s (list (cons x image)) prediction) 0))]
      (synchronise image (- i (elapsed time)) (event-loop dsp))
      (format #t "~a~&" pred)
      image))
  #:width 280
  #:io IO-OPENGL)

AIscm documentation generated by Pandoc 2023-02-14