Python系列:NLP系列二:命名实体识别(NER)、用深度学习实现命名实体识别(NER)
import torch
import torch.nn as nn
from torch.nn import functional as F
class NERModel(nn.Module):
def __init__(self, vocab, embeddings, tag_to_idx):
super(NERModel, self).__init__()
self.embedding = nn.Embedding(len(vocab), 64)
self.embedding.weight = nn.Parameter(torch.tensor(embeddings, dtype=torch.float))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(64, 64, bidirectional=True)
self.hidden2tag = nn.Linear(128, len(tag_to_idx))
def forward(self, input_seq):
embeds = self.embedding(input_seq)
lstm_out, _ = self.lstm(embeds.view(len(input_seq), 1, -1))
tag_space = self.hidden2tag(lstm_out.view(1, -1))
tag_scores = F.log_softmax(tag_space, dim=1)
return tag_scores
# 示例用法
vocab = {'hello': 1, 'world': 2}
embeddings = [[1, 2], [3, 4]] # 假设的嵌入矩阵,实际应该从文件中加载
tag_to_idx = {'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4}
model = NERModel(vocab, embeddings, tag_to_idx)
input_seq = torch.tensor([1, 2]) # 假设的输入序列
output = model(input_seq)
print(output)
这段代码定义了一个基于PyTorch的简单命名实体识别模型,它使用了嵌入层、双向长短期记忆单元(LSTM)和全连接层。示例用法展示了如何实例化模型并对输入序列进行处理。
评论已关闭