一以贯之搭建神经网络的过程

在神经网络中预测识别和预测的过程中,其实都是一个函数的对应关系: 的函数关系,为了找到这个函数的关系,我们需要做大量的训练,具体的过程可以总结下面几个步骤:

  1. 获得随机矩阵w1b1,经过一个激活函数,得到隐藏层。

  2. 获得随机矩阵w2b2,计算出对应的预测y

  3. 利用y与样本y_的差距,有平方差和交叉熵等方式,来定义损失函数

  4. 利用指数衰减学习率,计算出学习率

  5. 用滑动平均计算输出的参数的平均值

  6. 利用梯度下降的方法,减少损失函数的差距

  7. 用with结构初始化所有的参数

  8. 利用for循环喂入数据,反复训练模型

    利用这个八个步骤拟合出的一个圆的函数,代码和结果如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    # coding:utf-8
    import os
    import tensorflow as tf
    import numpy as np
    import matplotlib.pyplot as plt

    xLine = 300
    aLine = 10
    BATCHSIZE = 10
    regularizer=0.001
    BASE_LEARN_RATE=0.001
    LEARNING_RATE_DECAY = 0.99
    MOVING_VERAGE_DECAY=0.99

    # 1 生成随机矩阵x
    rdm = np.random.RandomState(2)
    X = rdm.randn(xLine, 2)
    print("1.生成X:", X)

    # 2 生成结果集Y
    Y = [int(x1 * x1 + x2 * x2 < 2) for (x1, x2) in X]
    print("2.生成Y:", Y)
    Y_c = [['red' if y else 'blue'] for y in Y]

    print("3. 生成Y_c:", Y_c)
    X = np.vstack(X).reshape(-1, 2)
    Y = np.vstack(Y).reshape(-1, 1)
    print("4. 生成X:", X)
    print("5. 生成Y:", Y)
    # plt.scatter(X[:, 0], X[:, 1], c=np.squeeze(Y_c))
    # plt.show()

    # 3. 这里如何保存数据集为样本?

    # 4. 训练样本占位
    x = tf.placeholder(tf.float32, shape=(None, 2))
    y_ = tf.placeholder(tf.float32, shape=(None, 1))
    global_step=tf.Variable(0,trainable=False)

    # 5. 获得随机矩阵w1和b1,计算隐藏层a
    w1 = tf.Variable(tf.random_normal(shape=([2, aLine]))) # 2*100
    tf.add_to_collection("losses",tf.contrib.layers.l2_regularizer(regularizer)(w1))
    b1 =tf.Variable(tf.constant(0.01, shape=[aLine])) # 3000*100

    # 6.使用激活函数计算隐藏层a
    a = tf.nn.relu(tf.matmul(x, w1) + b1) # 3000*2 x 2*100 +3000*100

    # 7. 获得随机矩阵w2和b2 计算预测值y。
    w2 = tf.Variable(tf.random_normal(shape=([aLine, 1])))
    tf.add_to_collection("losses",tf.contrib.layers.l2_regularizer(regularizer)(w2))
    b2 = tf.Variable(tf.random_normal(shape=([1])))
    y = tf.matmul(a, w2) + b2

    # 8. 损失函数
    loss = tf.reduce_mean(tf.square(y - y_))


    # loss = tf.reduce_mean(tf.square(y - y_))
    # 滑动学习率
    learning_rate=tf.train.exponential_decay(BASE_LEARN_RATE,
    global_step,
    BATCHSIZE,
    LEARNING_RATE_DECAY,
    staircase=True)
    # 9. 梯度下降方法训练
    train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)

    # 滑动平均
    # ema=tf.train.ExponentialMovingAverage(MOVING_VERAGE_DECAY,global_step)
    # ema_op=ema.apply(tf.trainable_variables())
    # with tf.control_dependencies([train_step,ema_op]):
    # train_op=tf.no_op(name="train")

    saver=tf.train.Saver()

    # 10. 开始训练
    with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    STEPS = 40000
    for i in range(STEPS):
    start = (i * BATCHSIZE) % xLine
    end = start + BATCHSIZE

    # print(Y[start:end])
    sess.run(train_step, feed_dict={
    x: X[start:end],
    y_: Y[start:end]
    })

    if i % 2000 == 0:
    # learning_rate_val=sess.run(learning_rate)
    # print("learning_rate_val:",learning_rate_val)
    loss_mse_v = sess.run(loss, feed_dict={x: X, y_: Y})
    print("After %d training steps,loss on all data is %s" % (i, loss_mse_v))
    saver.save(sess,os.path.join("./model/","model.ckpt"))
    # print(global_step)

    xx, yy = np.mgrid[-3:3:0.1, -3:3:.01]

    grid = np.c_[xx.ravel(), yy.ravel()]

    probs = sess.run(y, feed_dict={x: grid})
    probs = probs.reshape(xx.shape)

    print("w1:", sess.run(w1))
    print("b1:", sess.run(b1))
    print("w2:", sess.run(w2))
    print("b2:", sess.run(b2))

    plt.scatter(X[:, 0], X[:, 1], c=np.squeeze(Y_c))
    plt.contour(xx, yy, probs, levels=[.5])
    plt.show()


一以贯之搭建神经网络的过程