{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 机器学习与社会科学应用"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第七章 神经网络"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第二节 卷积神经网络"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<font face=\"宋体\" >郭峰    \n",
    "    教授、博士生导师  \n",
    "上海财经大学公共经济与管理学院  \n",
    "上海财经大学数实融合与智能治理实验室    \n",
    "    邮箱：guofengsfi@163.com</font> "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<font face=\"宋体\" >本节目录：  \n",
    "2.1.导入模块    \n",
    "2.2.加载数据  \n",
    "2.3.预处理数据  \n",
    "2.4.定义模型结构  \n",
    "2.5.编译  \n",
    "2.6.训练  \n",
    "2.7.评估模型</font> "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.1.导入模块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.models import Sequential  #from keras.models import Sequential\n",
    "#接下来，我们导入各种层(各种形状各异积木)\n",
    "from keras.layers import Conv2D, MaxPool2D\n",
    "from keras.layers import Dense, Flatten\n",
    "\n",
    "#最后，我们导入to_categorical函数，以便之后对数据进行转换\n",
    "#from keras.utils import to_categorical\n",
    "\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2.加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#MNIST是一个非常有名的手写数字数据集，我们可以使用Keras轻松加载它\n",
    "from keras.datasets import mnist\n",
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "print(x_train.shape)# (60000, 28, 28) 60000个样本，它们都是28像素x28像素的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#看一下这些手写数字长什么样\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "plt.imshow(x_train[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.3.预处理数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#使用Keras是必须显式声明输入图像深度的尺寸。例如，具有所有3个RGB通道的全色图像的深度为3。\n",
    "#我们的MNIST图像的深度为1，但我们必须明确声明。\n",
    "#也就是说，我们希望将数据集从形状(n,rows,cols)转换为(n,rows,cols,channels)。\n",
    "img_x, img_y = 28, 28\n",
    "x_train = x_train.reshape(x_train.shape[0], img_x, img_y, 1)\n",
    "x_test = x_test.reshape(x_test.shape[0], img_x, img_y, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#除此之外，我们将数据标准化一下：\n",
    "x_train = x_train.astype('float32')\n",
    "x_test = x_test.astype('float32')\n",
    "x_train /= 255\n",
    "x_test /= 255"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将标记值(y_train, y_test)转换为One-Hot Encode的形式\n",
    "y_train = tf.keras.utils.to_categorical(y_train, 10)\n",
    "y_test = tf.keras.utils.to_categorical(y_test, 10)\n",
    "print(y_train.shape) # (60000, 10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.4.定义模型结构"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=(img_x, img_y, 1))) #一层卷积层，包含了32个卷积核，大小为5*5\n",
    "model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))  #一个最大池化层，池化大小为2*2\n",
    "model.add(Conv2D(64, kernel_size=(5,5), activation='relu'))  #添加一个卷积层，包含64个卷积和，每个卷积核仍未5*5\n",
    "model.add(MaxPool2D(pool_size=(2,2), strides=(2,2))) #一个最大池化层，池化大小为2*2\n",
    "model.add(Flatten())  #压平层\n",
    "model.add(Dense(1000, activation='relu'))   #来一个全连接层\n",
    "model.add(Dense(10, activation='softmax'))  #最后为分类层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#另一个定义\n",
    "# build the neural net 建模型(卷积—relu-卷积-relu-池化-relu-卷积-relu-池化-全连接)\n",
    "#model = Sequential()\n",
    "#model.add(Conv2D(32, kernel_size=(3, 3),\n",
    "#                 activation='relu',\n",
    "#                 input_shape=input_shape)) # 32个过滤器，过滤器大小是3×3，32×26×26\n",
    "#model.add(Conv2D(64, (3, 3), activation='relu')) #64×24×24\n",
    "#model.add(MaxPooling2D(pool_size=(2, 2)))# 向下取样\n",
    "#model.add(Dropout(0.25))\n",
    "#model.add(Flatten()) #降维：将64×12×12降为1维（即把他们相乘起来）#\n",
    "#model.add(Dense(128, activation='relu'))\n",
    "#model.add(Dropout(0.5))\n",
    "#model.add(Dense(num_classes, activation='softmax')) #全连接2层\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.5.编译"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#现在，只需要编译模型，就可以开始训练了。当编译模型时，我们声明了损失函数和优化器（SGD，Adam等）。\n",
    "model.compile(optimizer='adam',\n",
    "              loss='categorical_crossentropy',\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.6.训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(x_train, y_train, batch_size=128, epochs=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.7.评估模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "score = model.evaluate(x_test, y_test)\n",
    "print('acc', score[1]) # acc 0.9926"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
