seq-lstm的实现

一句话[‘Everybody’, ‘read’, ‘that’, ‘book’]
每次一个词当成一个序列, 只取最终输出,则变为(1,50) 四个词叠加变为(4,50)
一句话的输入为[5, 6, 7, 8]
经过嵌入层变为(4,100)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 26 10:23:09 2017

@author: bynn
"""

import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.autograd import Variable

training_data = [("The dog ate the apple".split(),["DET", "NN", "V", "DET", "NN"]),
("Everybody read that book".split(), ["NN", "V", "DET","NN"])]

#word_to_idx {'Everybody': 5,'The': 0,'apple': 4,'ate': 2,'book': 8,'dog': 1,'read': 6,'that': 7,'the': 3}
#tag_to_idx {'DET': 0, 'NN': 1, 'V': 2}
word_to_idx = {}
tag_to_idx = {}
for context, tag in training_data:
for word in context:
if word not in word_to_idx:
word_to_idx[word] = len(word_to_idx)
for label in tag:
if label not in tag_to_idx:
tag_to_idx[label] = len(tag_to_idx)
alphabet = 'abcdefghijklmnopqrstuvwxyz'
#character_to_idx {'a': 0,'b': 1,'c': 2,'d': 3,....}
character_to_idx = {}
for i in range(len(alphabet)):
character_to_idx[alphabet[i]] = i


class CharLSTM(nn.Module):
def __init__(self, n_char, char_dim, char_hidden): #参数为 26,10,50
super(CharLSTM, self).__init__()
self.char_embedding = nn.Embedding(n_char, char_dim) #参数为 26,10
self.char_lstm = nn.LSTM(char_dim, char_hidden, batch_first=True) #参数为 10,50

def forward(self, x): #输入为(batch_1,字母数) 数字形式
x = self.char_embedding(x) #输出为(batch_1,字母数,10)
# h分为(h_n , c_n)
_, h = self.char_lstm(x)
return h[0] #输出为(1,batch_1,50)

'''
一句话['Everybody', 'read', 'that', 'book']
每次一个词当成一个序列, 只取最终输出,则变为(1,50) 四个词叠加变为(4,50)
一句话的输入为[5, 6, 7, 8]
经过嵌入层变为(4,100)
'''
class LSTMTagger(nn.Module):
# 参数为 9 26 10 100 50 128 3
def __init__(self, n_word, n_char, char_dim, n_dim, char_hidden, n_hidden, n_tag):
super(LSTMTagger, self).__init__()
self.word_embedding = nn.Embedding(n_word, n_dim) # 9,100
self.char_lstm = CharLSTM(n_char, char_dim, char_hidden) # 26,10,50
self.lstm = nn.LSTM(n_dim + char_hidden, n_hidden, batch_first=True) #150,128
self.linear1 = nn.Linear(n_hidden, n_tag)

def forward(self, x, word): #输入为 [5, 6, 7, 8] ['Everybody', 'read', 'that', 'book']
char = torch.FloatTensor()
for each in word:
char_list = []
for letter in each: #每次读入一个单词
char_list.append(character_to_idx[letter.lower()])
char_list = torch.LongTensor(char_list) #(字母数) 数字形式
char_list = char_list.unsqueeze(0) #(1,字母数) 数字形式
if torch.cuda.is_available():
tempchar = self.char_lstm(Variable(char_list).cuda()) #每个单词输出为(1,1,50)
else:
tempchar = self.char_lstm(Variable(char_list))
tempchar = tempchar.squeeze(0) #输出为(1,50) 每个词的最终输出
char = torch.cat((char, tempchar.cpu().data), 0) #(1,50)叠加一直到(4,50), char是张量
#char = char.squeeze(1) #不知道有什么用
if torch.cuda.is_available():
char = char.cuda()
char = Variable(char) #char再变成变量
x = self.word_embedding(x) #输入size为 (4) 输出为(4,100)
x = torch.cat((x, char), 1) #(4,150) 注意:这里是合并
x = x.unsqueeze(0) #(1,4,150)
x, _ = self.lstm(x) #(1,4,128)
x = x.squeeze(0) #(4,128)
x = self.linear1(x) #(4,3)
y = F.log_softmax(x) #(4,3)
return y


model = LSTMTagger(
len(word_to_idx), len(character_to_idx), 10, 100, 50, 128, len(tag_to_idx))

if torch.cuda.is_available():
model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-2)


def make_sequence(x, dic):
idx = [dic[i] for i in x]
idx = Variable(torch.LongTensor(idx))
return idx

#word_to_idx {'Everybody': 5,'The': 0,'apple': 4,'ate': 2,'book': 8,'dog': 1,'read': 6,'that': 7,'the': 3}
#tag_to_idx {'DET': 0, 'NN': 1, 'V': 2}
#character_to_idx {'a': 0,'b': 1,'c': 2,'d': 3,....}
for epoch in range(300):
print('*' * 10)
print('epoch {}'.format(epoch + 1))
running_loss = 0
for data in training_data:
word, tag = data #word是['Everybody', 'read', 'that', 'book'] tag是["NN", "V", "DET","NN"]
word_list = make_sequence(word, word_to_idx) #[5, 6, 7, 8] size()是(4)
tag = make_sequence(tag, tag_to_idx) #[1, 2, 0, 1] size()是(4)
if torch.cuda.is_available():
word_list = word_list.cuda()
tag = tag.cuda()
# forward
out = model(word_list, word) #[5, 6, 7, 8] ['Everybody', 'read', 'that', 'book']
loss = criterion(out, tag) #out为softmax(4,3) tag为(4)未one-hot化
running_loss += loss.data[0]
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Loss: {}'.format(running_loss / len(data)))
print()

input = make_sequence("Everybody ate the apple".split(), word_to_idx)
if torch.cuda.is_available():
input = input.cuda()

out = model(input, "Everybody ate the apple".split())
print(out)