code/artificial_intelligence/src/convolutional_neural_network/cnn.ipynb
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
training_set = pd.read_csv('train.csv')
test_set = pd.read_csv('test.csv')
training_set.head()
test_set.head()
x_train = training_set.iloc[:, 1:].values
y_train = training_set.iloc[:, 0:1].values
x_test = test_set.iloc[:, :].values
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
x_train = x_train/255.
x_test = x_test/255.
from keras.utils import np_utils
y_train = np_utils.to_categorical(y_train, num_classes = 10)
y_train
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
classifier = Sequential()
classifier.add(Conv2D(32, (5, 5), padding = 'Same', input_shape = (28, 28, 1), activation = 'relu'))
#classifier.add(MaxPool2D(pool_size = (2, 2)))
#classifier.add(Conv2D(32, (5, 5), padding = 'Same', activation = 'relu'))
classifier.add(MaxPool2D(pool_size = (2, 2)))
classifier.add(Dropout(0.25))
classifier.add(Conv2D(64, (3, 3),padding = 'Same', activation = 'relu'))
classifier.add(Conv2D(64, (3, 3),padding = 'Same', activation = 'relu'))
classifier.add(MaxPool2D(pool_size = (2, 2), strides = (2, 2)))
classifier.add(Dropout(0.25))
#classifier.add(Conv2D(64, (3, 3), activation = 'relu'))
#classifier.add(MaxPooling2D(pool_size = (2, 2)))
classifier.add(Flatten())
classifier.add(Dense(units = 256, activation = 'relu'))
classifier.add(Dropout(0.5))
classifier.add(Dense(units = 10, activation = 'sigmoid'))
classifier.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['accuracy'])
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=10,
zoom_range = 0.1,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=False,
vertical_flip=False)
datagen.fit(x_train)
track = classifier.fit_generator(datagen.flow(x_train,y_train, batch_size= 128),
epochs = 30, steps_per_epoch=x_train.shape[0]//128)
track.history.keys()
plt.subplot(2, 1, 1)
plt.plot(track.history['loss'])
plt.title("Model Loss")
plt.subplot(2, 1, 2)
plt.plot(track.history['acc'])
plt.title("Model Accuracy")
predict = classifier.predict(x_test)
predict
predict = np.argmax(predict, axis = 1)
predict
submit = pd.DataFrame(predict, columns = ['Label'])
submit.reset_index(inplace = True)
submit['index'] = submit['index'] + 1
submit.rename(columns = {'index' : 'ImageId'}, inplace = True)
submit.index = submit['ImageId']
submit = submit.drop('ImageId', axis = 1)
submit.head()
submit.to_csv('digit_submit.csv')
submit.shape
n = 9
plt.figure(figsize = (20, 8))
for i in range(n):
ax = plt.subplot(1, n, i+1)
plt.imshow(x_test[i].reshape(28, 28))
plt.show()