python - Simple Neural Network with MNIST stuck at 17% missclasification error -


hello im putting code inspection because ive been playing neural network implementation in python few weeks , cant seem reach misclasification error below 17% times 16%. ve been trying different learning rate values, different hidden neurons number , still not lot of mprovement. im aware implementation basic traditional neural network, expecting better results according other implementations ive seen in internet. hope of interest guys, cool if point me new ideas problem in code, or maybe think best can traditional implementation , should add new, cool too.

in case, here code, hope readable enough, tried simple possible, since way understand how neural networks work.

edit:perhaps question not clear, like, if of interest guys, me find details in current implementation improve misclasification error below 17% because apparently best implementation can do. thankful advise or idea, im interested in topic, im beginner , great have smart ideas can me improve implementation.

file: mnist_dataset.py - extract mnist data

import numpy np struct import unpack  train_input_file = open("dataset/train-images-idx3-ubyte", "rb") train_output_file = open("/dataset/train-labels-idx1-ubyte", "rb") test_input_file = open("dataset/t10k-images-idx3-ubyte", "rb") test_output_file = open("dataset/t10k-labels-idx1-ubyte", "rb")   def readdata(f,labels = false,scale = 1):     header = hex(unpack('>l',np.fromfile(f,dtype=np.int32,count=1)[0])[0]) num = int(unpack('>l',np.fromfile(f,dtype=np.int32,count=1)[0])[0]) col = 1 row = 1  if labels == false:     row = int(unpack('>l',np.fromfile(f,dtype=np.int32,count=1)[0])[0])     col = int(unpack('>l',np.fromfile(f,dtype=np.int32,count=1)[0])[0])      data = np.zeros((int(num/scale),col*row))     in range(0,int(num/scale),1):     data[i] = np.fromfile(f,dtype=np.ubyte,count=col*row) return data   def getmnistdata():     def norm(v):         return v/255  train_input = readdata(train_input_file, scale=1)/255.0 train_out = readdata(train_output_file, true,scale=1) test_input = readdata(test_input_file)/255.0 test_out = readdata(test_output_file, true)  print "train input: " + str(train_input.shape) print "train output: " + str(train_out.shape) print "test input: " + str(test_input.shape) print "test output: " + str(test_out.shape)  train_input_file.close() train_output_file.close() test_input_file.close() test_output_file.close() return (train_input,train_out,test_input,test_out) 

file: nn.py - neural network implementation

import mnist_dataset import numpy np import random import matplotlib.pyplot plt   def encode_data_10(v):     e = (0.0) * np.ones((1, 10), dtype=float)     e[:, int(v)] = 1.0     return e.tolist()  def encode_data_1(v):     n = -1.0 + ((0.2)*v)     return n  x_train, y_train, x_test, y_test =  mnist_dataset.getmnistdata()  learning_rate = 1.0 iter = 3000 sample_size = 30 num_hidden_neurons = 500 num_output_neurons = 10  if num_output_neurons > 1:     y_train = np.matrix(np.array(map(encode_data_10,y_train)))     y_test = np.matrix(np.array(map(encode_data_10,y_test))) else:     y_train = np.matrix(map(encode_data_1,y_train))     y_test = np.matrix(map(encode_data_1,y_test))   def getsample(sample_size,x,y):     r = random.sample(xrange(1, len(y), 1), sample_size)     x_r = np.zeros((sample_size,x.shape[1]))     y_r = np.zeros((sample_size,y.shape[1]))     i,n in enumerate(r):         x_r[i] = x[n]         y_r[i] = y[n]     return (x_r,y_r)  inputvector, targetvector = getsample(sample_size, x_train, y_train)     hiddenweights = np.mat(np.random.random((num_hidden_neurons, x_train.shape[1]))) print "w0 shape: " + str(hiddenweights.shape) outputweights = np.mat(np.random.random((num_output_neurons,num_hidden_neurons))) print "w1 shape: " + str(outputweights.shape)   def act_func_l1(a):     return (1.0/(1 + np.exp(-a)))  def der_act_func_l1(a):     return act_func_l1(a)*(1.0 - act_func_l1(a))   def feedforward(l0):     global hiddenweights     global outputweights      z1 = l0 * hiddenweights.t      layer1 = np.matrix(act_func_l1(np.asarray(z1)))      z2 = layer1 * outputweights.t      layer2 = act_func_l1(np.asarray(z2))      return (layer1,layer2)  def miss(x,y):     layer1, layer2 = feedforward(x)      def c(n):         if n > 0.5:             return 1.0         else:             return 0.0      layer2 = map(lambda v: map(c, v), layer2)      def cc(t):         return np.abs(cmp(np.array(y[t[0]]).tolist()[0], np.array(t[1]).tolist()))     return (np.sum(map(cc, enumerate(layer2))))  miss_x = np.zeros((iter, 1)) j in xrange(iter):      hiddenactualinput = inputvector * hiddenweights.t      hiddenoutputvector = np.matrix(act_func_l1(np.asarray(hiddenactualinput)))      outputactualinput = hiddenoutputvector * outputweights.t      outputvector = act_func_l1(np.asarray(outputactualinput))       layer2_error2 = np.square(outputvector - targetvector)     print "error: " + str(np.mean(np.abs(layer2_error2)))       m = miss(x_test,y_test)     miss_x[j] = m     print str(j) + " - misses (%): " + str(m)     if m <= 2000:         learning_rate = 0.05       outputdelta = np.mat(der_act_func_l1(np.asarray(outputvector))*np.asarray(outputvector - targetvector))     hiddendelta =  np.mat(der_act_func_l1(np.asarray(hiddenoutputvector)) * np.asarray((outputdelta*outputweights)))      hiddenweights = np.mat(hiddenweights.t - (learning_rate*np.asarray(inputvector.t*hiddendelta))).t     outputweights = np.mat(outputweights.t - (learning_rate*np.asarray(hiddenoutputvector.t*outputdelta))).t      inputvector, targetvector = getsample(sample_size, x_train, y_train)  plt.plot(xrange(iter), miss_x, label = 'miss rate(%)') plt.legend(loc='upper right') plt.show() 


Comments