seq-lstm的实现

一句话[‘Everybody’, ‘read’, ‘that’, ‘book’]
每次一个词当成一个序列, 只取最终输出，则变为（1,50）四个词叠加变为（4,50）
一句话的输入为[5, 6, 7, 8]
经过嵌入层变为(4,100)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 26 10:23:09 2017

@author: bynn
"""

import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.autograd import Variable

training_data = [("The dog ate the apple".split(),["DET", "NN", "V", "DET", "NN"]),
                 ("Everybody read that book".split(), ["NN", "V", "DET","NN"])]

#word_to_idx  {'Everybody': 5,'The': 0,'apple': 4,'ate': 2,'book': 8,'dog': 1,'read': 6,'that': 7,'the': 3}
#tag_to_idx  {'DET': 0, 'NN': 1, 'V': 2}
word_to_idx = {}
tag_to_idx = {}
for context, tag in training_data:
    for word in context:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
    for label in tag:
        if label not in tag_to_idx:
            tag_to_idx[label] = len(tag_to_idx)
alphabet = 'abcdefghijklmnopqrstuvwxyz'
#character_to_idx   {'a': 0,'b': 1,'c': 2,'d': 3,....}
character_to_idx = {}
for i in range(len(alphabet)):
    character_to_idx[alphabet[i]] = i


class CharLSTM(nn.Module):
    def __init__(self, n_char, char_dim, char_hidden):    #参数为 26,10,50
        super(CharLSTM, self).__init__()
        self.char_embedding = nn.Embedding(n_char, char_dim)   #参数为 26,10 
        self.char_lstm = nn.LSTM(char_dim, char_hidden, batch_first=True)  #参数为 10,50

    def forward(self, x):                        #输入为(batch_1,字母数)     数字形式
        x = self.char_embedding(x)               #输出为(batch_1,字母数,10)
        # h分为(h_n , c_n)
        _, h = self.char_lstm(x)                 
        return h[0]                              #输出为(1,batch_1,50)

'''
一句话['Everybody', 'read', 'that', 'book']
每次一个词当成一个序列, 只取最终输出，则变为（1,50） 四个词叠加变为（4,50）
一句话的输入为[5, 6, 7, 8]
经过嵌入层变为(4,100)
'''
class LSTMTagger(nn.Module):
    # 参数为             9        26      10      100       50          128       3
    def __init__(self, n_word, n_char, char_dim, n_dim, char_hidden, n_hidden, n_tag):
        super(LSTMTagger, self).__init__()
        self.word_embedding = nn.Embedding(n_word, n_dim)             # 9,100
        self.char_lstm = CharLSTM(n_char, char_dim, char_hidden)      # 26,10,50
        self.lstm = nn.LSTM(n_dim + char_hidden, n_hidden, batch_first=True)   #150,128
        self.linear1 = nn.Linear(n_hidden, n_tag)

    def forward(self, x, word):    #输入为 [5, 6, 7, 8]  ['Everybody', 'read', 'that', 'book'] 
        char = torch.FloatTensor()
        for each in word:
            char_list = []
            for letter in each:                          #每次读入一个单词
                char_list.append(character_to_idx[letter.lower()])
            char_list = torch.LongTensor(char_list)      #(字母数)     数字形式
            char_list = char_list.unsqueeze(0)           #(1,字母数)   数字形式
            if torch.cuda.is_available():
                tempchar = self.char_lstm(Variable(char_list).cuda()) #每个单词输出为(1,1,50)
            else:
                tempchar = self.char_lstm(Variable(char_list))
            tempchar = tempchar.squeeze(0)    #输出为(1,50)   每个词的最终输出
            char = torch.cat((char, tempchar.cpu().data), 0)  #(1,50)叠加一直到(4,50), char是张量
        #char = char.squeeze(1)                         #不知道有什么用
        if torch.cuda.is_available():
            char = char.cuda()
        char = Variable(char)                      #char再变成变量
        x = self.word_embedding(x)      #输入size为 (4)   输出为(4,100)
        x = torch.cat((x, char), 1)     #(4,150)     注意：这里是合并
        x = x.unsqueeze(0)              #(1,4,150)
        x, _ = self.lstm(x)             #(1,4,128)
        x = x.squeeze(0)                #(4,128)
        x = self.linear1(x)             #(4,3)
        y = F.log_softmax(x)            #(4,3)
        return y


model = LSTMTagger(
    len(word_to_idx), len(character_to_idx), 10, 100, 50, 128, len(tag_to_idx))

if torch.cuda.is_available():
    model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-2)


def make_sequence(x, dic):
    idx = [dic[i] for i in x]
    idx = Variable(torch.LongTensor(idx))
    return idx

#word_to_idx  {'Everybody': 5,'The': 0,'apple': 4,'ate': 2,'book': 8,'dog': 1,'read': 6,'that': 7,'the': 3}
#tag_to_idx  {'DET': 0, 'NN': 1, 'V': 2}
#character_to_idx   {'a': 0,'b': 1,'c': 2,'d': 3,....}
for epoch in range(300):
    print('*' * 10)
    print('epoch {}'.format(epoch + 1))
    running_loss = 0
    for data in training_data:
        word, tag = data      #word是['Everybody', 'read', 'that', 'book'] tag是["NN", "V", "DET","NN"]
        word_list = make_sequence(word, word_to_idx)   #[5, 6, 7, 8]   size()是(4)
        tag = make_sequence(tag, tag_to_idx)           #[1, 2, 0, 1]    size()是(4)
        if torch.cuda.is_available():
            word_list = word_list.cuda()
            tag = tag.cuda()
        # forward
        out = model(word_list, word)       #[5, 6, 7, 8]    ['Everybody', 'read', 'that', 'book'] 
        loss = criterion(out, tag)         #out为softmax（4,3） tag为（4）未one-hot化
        running_loss += loss.data[0]
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('Loss: {}'.format(running_loss / len(data)))
print()

input = make_sequence("Everybody ate the apple".split(), word_to_idx)
if torch.cuda.is_available():
    input = input.cuda()

out = model(input, "Everybody ate the apple".split())
print(out)