理解LSTM:
http://colah.github.io/posts/2015-08-Understanding-LSTMs/
使用LSTM识别手写数字,准确率98.4%。
# -*- coding:utf-8 -*-
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.examples.tutorials.mnist import input_data
# 首先导入数据,看一下数据的形式
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
print("load mnist finished!")
LEARNING_RATE = 1e-3
INPUT_SIZE = 28 # 100
TIMESTEP_SIZE = 28
HIDDEN_SIZE = 256
LAYER_NUMBER = 2
CLASS_NUMBER = 10
ITER_NUMBER = 2000
batch_size = tf.placeholder(tf.int32)
keep_prob = tf.placeholder(tf.float32)
xs = tf.placeholder(tf.float32, [None, 784])
image_xs = tf.reshape(xs, [-1, TIMESTEP_SIZE, INPUT_SIZE]) # -1 表示该维度由后面的维度自动计算得到
ys = tf.placeholder(tf.float32, [None, CLASS_NUMBER])
def get_drop():
# **步骤2:定义一层 LSTM_cell,只需要说明 hidden_size, 它会自动匹配输入的 X 的维度
lstm_cell = rnn.BasicLSTMCell(num_units=HIDDEN_SIZE, forget_bias=1.0, state_is_tuple=True)
# **步骤3:添加 dropout layer, 一般只设置 output_keep_prob
drop = rnn.DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=keep_prob)
return drop
# **步骤4:调用 MultiRNNCell 来实现多层 LSTM
multi_lstm_cell = rnn.MultiRNNCell([get_drop() for _ in range(LAYER_NUMBER)], state_is_tuple=True)
# **步骤5:用全零来初始化state
state = multi_lstm_cell.zero_state(batch_size, dtype=tf.float32)
# ** 当 time_major==False 时, outputs.shape = [batch_size, timestep_size, hidden_size]
# ** 所以,可以取 h_state = outputs[:, -1, :] 作为最后输出
# ** state.shape = [layer_num, 2, batch_size, hidden_size],
# ** 或者,可以取 h_state = state[-1][1] 作为最后输出
# ** 最后输出维度是 [batch_size, hidden_size]
# outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=X, initial_state=init_state, time_major=False)
# h_state = outputs[:, -1, :] # 或者 h_state = state[-1][1]
# *************** 为了更好的理解 LSTM 工作原理,我们把上面 步骤6 中的函数自己来实现 ***************
# 通过查看文档你会发现, RNNCell 都提供了一个 __call__()函数(见最后附),我们可以用它来展开实现LSTM按时间步迭代。
# **步骤6:方法二,按时间步展开计算
outputs = list()
with tf.variable_scope('RNN'):
for timestep in range(TIMESTEP_SIZE):
if timestep > 0:
tf.get_variable_scope().reuse_variables()
(cell_output, state) = multi_lstm_cell(image_xs[:, timestep, :], state)
outputs.append(cell_output)
h_state = outputs[-1]
# 上面 LSTM 部分的输出会是一个 [hidden_size] 的tensor,我们要分类的话,还需要接一个 softmax 层
# 首先定义 softmax 的连接权重矩阵和偏置
# out_W = tf.placeholder(tf.float32, [hidden_size, class_num], name='out_Weights')
# out_bias = tf.placeholder(tf.float32, [class_num], name='out_bias')
# 开始训练和测试
weightes = tf.Variable(tf.truncated_normal([HIDDEN_SIZE, CLASS_NUMBER], stddev=0.1), dtype=tf.float32)
biases = tf.Variable(tf.constant(0.1, shape=[CLASS_NUMBER]), dtype=tf.float32)
predict_ys = tf.nn.softmax(tf.matmul(h_state, weightes) + biases)
# 损失和评估函数
cross_entropy = -tf.reduce_mean(ys * tf.log(predict_ys))
train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(predict_ys,1), tf.argmax(ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init_op = tf.global_variables_initializer()
# 设置 GPU 按需增长
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(init_op)
[test_xs, test_ys] = [mnist.test.images, mnist.test.labels]
for i in range(ITER_NUMBER):
[train_xs, train_ys] = mnist.train.next_batch(128)
sess.run(train_op, feed_dict={xs: train_xs, ys: train_ys, keep_prob: 0.5, batch_size:128})
if (i + 1) % 50 == 0:
train_accuracy = sess.run(accuracy, feed_dict={
xs: train_xs, ys: train_ys, keep_prob: 1.0, batch_size:128})
test_accuracy = sess.run(accuracy, feed_dict={
xs: test_xs, ys: test_ys, keep_prob: 1.0, batch_size: mnist.test.images.shape[0]})
# mnist.train.epochs_completed 表示已经迭代完成epoch数
print("Iter%d, step %d, train-test %g-%g" % (mnist.train.epochs_completed, (i + 1), train_accuracy, test_accuracy))输出:
Iter0, step 50, train-test 0.828125-0.7162 Iter0, step 100, train-test 0.859375-0.8559 Iter0, step 150, train-test 0.90625-0.8824 Iter0, step 200, train-test 0.953125-0.924 Iter0, step 250, train-test 0.976562-0.9277 Iter0, step 300, train-test 0.945312-0.9409 Iter0, step 350, train-test 0.96875-0.945 Iter0, step 400, train-test 0.96875-0.9606 Iter1, step 450, train-test 0.96875-0.9478 Iter1, step 500, train-test 0.984375-0.9668 Iter1, step 550, train-test 0.984375-0.9595 Iter1, step 600, train-test 0.960938-0.969 Iter1, step 650, train-test 0.984375-0.9665 Iter1, step 700, train-test 1-0.9682 Iter1, step 750, train-test 0.976562-0.9642 Iter1, step 800, train-test 0.984375-0.9736 Iter1, step 850, train-test 0.96875-0.9679 Iter2, step 900, train-test 0.945312-0.9705 Iter2, step 950, train-test 0.992188-0.9744 Iter2, step 1000, train-test 0.96875-0.9776 Iter2, step 1050, train-test 0.976562-0.9772 Iter2, step 1100, train-test 0.992188-0.9804 Iter2, step 1150, train-test 0.992188-0.9766 Iter2, step 1200, train-test 1-0.9786 Iter2, step 1250, train-test 0.960938-0.9789 Iter3, step 1300, train-test 0.96875-0.9831 Iter3, step 1350, train-test 0.992188-0.9754 Iter3, step 1400, train-test 0.984375-0.9813 Iter3, step 1450, train-test 0.984375-0.9806 Iter3, step 1500, train-test 1-0.9824 Iter3, step 1550, train-test 1-0.9794 Iter3, step 1600, train-test 0.984375-0.9819 Iter3, step 1650, train-test 1-0.9843 Iter3, step 1700, train-test 0.992188-0.9805 Iter4, step 1750, train-test 0.96875-0.9819 Iter4, step 1800, train-test 1-0.9792 Iter4, step 1850, train-test 1-0.9805 Iter4, step 1900, train-test 0.984375-0.9831 Iter4, step 1950, train-test 1-0.9805 Iter4, step 2000, train-test 0.992188-0.9844
参考:
http://blog.csdn.net/jerr__y/article/details/61195257
http://blog.csdn.net/u014595019/article/details/52759104
LSTM文本多分类:http://blog.csdn.net/u010223750/article/details/53334313?locationNum=7&fps=1