Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn as nn | |
| import json | |
| def attention(Q, K, V): | |
| d = K.shape[-1] | |
| QK = Q @ K.transpose(-2, -1) | |
| QK_d = QK / (d ** 0.5) | |
| weights = torch.softmax(QK_d, axis=-1) | |
| outputs = weights @ V | |
| return outputs | |
| class Attention(torch.nn.Module): | |
| def __init__(self, emb_dim, n_heads): | |
| super(Attention, self).__init__() | |
| self.emb_dim = emb_dim | |
| self.n_heads = n_heads | |
| def forward(self, X): | |
| batch_size, seq_len, emb_dim = X.size() # (batch_size, seq_len, emb_dim) | |
| n_heads = self.n_heads | |
| emb_dim_per_head = emb_dim // n_heads | |
| assert emb_dim == self.emb_dim | |
| assert emb_dim_per_head * n_heads == emb_dim | |
| X = X.transpose(1, 2) | |
| output = attention(X, X, X) # (batch_size, n_heads, seq_len, emb_dim_per_head) | |
| output = output.transpose(1, 2) # (batch_size, seq_len, n_heads, emb_dim_per_head) | |
| output = output.contiguous().view(batch_size, seq_len, emb_dim) # (batch_size, seq_len, emb_dim) | |
| return output | |
| class ClassifierAttention(nn.Module): | |
| def __init__(self, vocab_size, emb_dim, padding_idx, hidden_size, n_layers, attention_heads, hidden_layer_units, dropout): | |
| super(ClassifierAttention, self).__init__() | |
| self.embedding = nn.Embedding( | |
| num_embeddings = vocab_size, | |
| embedding_dim = emb_dim, | |
| padding_idx = padding_idx | |
| ) | |
| self.rnn_1 = nn.LSTM( | |
| emb_dim, | |
| hidden_size, | |
| n_layers, | |
| bidirectional = False, | |
| batch_first = True, | |
| ) | |
| self.attention = Attention(hidden_size, attention_heads) | |
| self.rnn_2 = nn.LSTM( | |
| hidden_size, | |
| hidden_size, | |
| n_layers, | |
| bidirectional = False, | |
| batch_first = True, | |
| ) | |
| self.dropout = nn.Dropout(dropout) | |
| hidden_layer_units = [hidden_size, *hidden_layer_units] | |
| self.hidden_layers = nn.ModuleList([]) | |
| for in_unit, out_unit in zip(hidden_layer_units[:-1], hidden_layer_units[1:]): | |
| self.hidden_layers.append(nn.Linear(in_unit, out_unit)) | |
| self.hidden_layers.append(nn.ReLU()) | |
| self.hidden_layers.append(self.dropout) | |
| self.hidden_layers.append(nn.Linear(hidden_layer_units[-1], 1)) | |
| self.sigmoid = nn.Sigmoid() | |
| def forward(self, x): | |
| # x: (batch_size, seq_len) | |
| out = self.embedding(x) # (batch_size, seq_len, emb_dim) | |
| out, (hidden_state, cell_state) = self.rnn_1(out) | |
| out = self.attention(out) # (batch_size, seq_len, emb_dim) | |
| out = self.dropout(out) | |
| output, (hidden_state, cell_state) = self.rnn_2(out) | |
| out = hidden_state[-1] # (batch_size, hidden_size) | |
| out = self.dropout(out) | |
| # (batch_size, seq_len, hidden_dim) | |
| # (n_layers*n_direction, batch_size, hidden_size) | |
| # (n_layers*n_direction, batch_size, hidden_size) | |
| for layer in self.hidden_layers: | |
| out = layer(out) | |
| out = self.sigmoid(out) # (batch_size, 1) | |
| out = out.squeeze(-1) # (batch_size) | |
| return out | |
| def get_model(model_path, params_path): | |
| with open(params_path, 'rb') as f: | |
| params = json.load(f) | |
| model = ClassifierAttention(*params) | |
| model.load_state_dict(torch.load(model_path)) | |
| model.eval() | |
| return model |