nirdizati_light.explanation.wrappers.lrp_wrapper
1import numpy as np 2from numpy import newaxis as na 3 4from nirdizati_light.predictive_model.common import get_tensor 5from nirdizati_light.predictive_model.predictive_model import drop_columns 6 7 8def lrp_explain(CONF, predictive_model, full_test_df, encoder): 9 test_df = drop_columns(full_test_df) 10 test_tensor = get_tensor(CONF, test_df) 11 12 explainer = _init_explainer(predictive_model.model, test_tensor[0]) 13 importances = _get_explanation(explainer, full_test_df, encoder) 14 15 return importances 16 17 18def _init_explainer(model, tensor): 19 return LSTM_bidi(model, tensor) # load trained LSTM model 20 21 22def _get_explanation(explainer, target_df, encoder): 23 trace_ids = target_df['trace_id'].to_list() 24 targets_label_enc = np.argmax(np.array(target_df['label'].to_list()), axis=1) 25 df = drop_columns(target_df) 26 prefix_names = df.columns 27 encoder.decode(df) 28 prefixes = df.to_numpy().tolist() 29 30 importances = {} 31 for trace_id, prefix_words, label in zip(trace_ids, prefixes, targets_label_enc): 32 Rx, Rx_rev, R_rest = explainer.lrp(prefix_words, label) 33 words_importances = np.sum(Rx + Rx_rev, axis=1) 34 importances[str(trace_id)] = [] 35 for i_word, word in enumerate(prefix_words): 36 importances[str(trace_id)].append([prefix_names[i_word], word, words_importances[i_word]]) 37 38 return importances 39 40 41""" 42@author: Leila Arras 43@maintainer: Leila Arras 44@date: 21.06.2017 45@version: 1.0+ 46@copyright: Copyright (c) 2017, Leila Arras, Gregoire Montavon, Klaus-Robert Mueller, Wojciech Samek 47@license: see LICENSE file in repository root 48""" 49 50 51class LSTM_bidi: 52 53 def __init__(self, model, input_encoded): 54 """ 55 Load trained model from file. 56 """ 57 58 # self.args = CONF 59 60 # input (i.e., one-hot encoded activity + one-hot encoded data attributes) 61 self.E = input_encoded 62 63 # model weights 64 self.model = model 65 66 """ 67 Assumptions: 68 - bias bxh_left and bxh_right is not stored by Keras 69 - bias of output layer is also set to 0 70 """ 71 72 # LSTM left encoder 73 self.Wxh_Left = model.layers[1].get_weights()[0].T # shape 4d*e // kernel left lstm layer // d = neurons 74 # self.bxh_Left = model["bxh_Left"] # shape 4d; not in Keras 75 self.Whh_Left = model.layers[1].get_weights()[1].T # shape 4d*d // recurrent kernel left lstm layer 76 self.bhh_Left = model.layers[1].get_weights()[2].T # shape 4d // biases left lstm layer 77 78 # LSTM right encoder 79 self.Wxh_Right = model.layers[1].get_weights()[3].T # shape 4d*e // kernel right lstm layer 80 # self.bxh_Right = model["bxh_Right"]; not in Keras 81 self.Whh_Right = model.layers[1].get_weights()[4].T # shape 4d*d // recurrent kernel right lstm layer 82 self.bhh_Right = model.layers[1].get_weights()[5].T # shape 4d // biases right lstm layer 83 84 # linear output layer Note: Keras does not provide two output weight vector of the bi-lslm cell; 85 # therefore, we divided the vector in two equal parts 86 self.Why_Left = model.layers[2].get_weights()[0].T # shape C*d 87 self.Why_Left = self.Why_Left[:, 0:100] 88 self.Why_Right = model.layers[2].get_weights()[0].T # shape C*d 89 self.Why_Right = self.Why_Right[:, 100:200] 90 91 def set_input(self, w, delete_pos=None): 92 """ 93 Build the numerical input sequence x/x_rev from the word indices w (+ initialize hidden layers h, c). 94 Optionally delete words at positions delete_pos. 95 """ 96 T = len(w) # prefix length 97 d = int(self.Wxh_Left.shape[0] / 4) # hidden layer dimensions 98 e = self.E.shape[1] # one-hot dimensions; previous, e = self.args.dim 99 x = self.E # encoded input 100 101 if delete_pos is not None: 102 x[delete_pos, :] = np.zeros((len(delete_pos), e)) 103 104 self.w = w 105 self.x = x 106 self.x_rev = x[::-1, :].copy() 107 108 self.h_Left = np.zeros((T + 1, d)) 109 self.c_Left = np.zeros((T + 1, d)) 110 self.h_Right = np.zeros((T + 1, d)) 111 self.c_Right = np.zeros((T + 1, d)) 112 113 def forward(self): 114 """ 115 Standard forward pass. 116 Compute the hidden layer values (assuming input x/x_rev was previously set) 117 """ 118 T = len(self.w) 119 d = int(self.Wxh_Left.shape[0] / 4) 120 # gate indices (assuming the gate ordering in the LSTM weights is i,g,f,o): 121 idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int) # indices of gates i,f,o together 122 idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d, 123 4 * d) # indices of gates i,g,f,o separately 124 125 # initialize 126 self.gates_xh_Left = np.zeros((T, 4 * d)) 127 self.gates_hh_Left = np.zeros((T, 4 * d)) 128 self.gates_pre_Left = np.zeros((T, 4 * d)) # gates pre-activation 129 self.gates_Left = np.zeros((T, 4 * d)) # gates activation 130 131 self.gates_xh_Right = np.zeros((T, 4 * d)) 132 self.gates_hh_Right = np.zeros((T, 4 * d)) 133 self.gates_pre_Right = np.zeros((T, 4 * d)) 134 self.gates_Right = np.zeros((T, 4 * d)) 135 136 for t in range(T): 137 self.gates_xh_Left[t] = np.dot(self.Wxh_Left, self.x[t]) 138 self.gates_hh_Left[t] = np.dot(self.Whh_Left, self.h_Left[t - 1]) 139 self.gates_pre_Left[t] = self.gates_xh_Left[t] + self.gates_hh_Left[t] + self.bhh_Left # + self.bxh_Left 140 self.gates_Left[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Left[t, idx])) 141 self.gates_Left[t, idx_g] = np.tanh(self.gates_pre_Left[t, idx_g]) 142 self.c_Left[t] = self.gates_Left[t, idx_f] * self.c_Left[t - 1] + self.gates_Left[t, idx_i] * \ 143 self.gates_Left[t, idx_g] 144 self.h_Left[t] = self.gates_Left[t, idx_o] * np.tanh(self.c_Left[t]) 145 146 self.gates_xh_Right[t] = np.dot(self.Wxh_Right, self.x_rev[t]) 147 self.gates_hh_Right[t] = np.dot(self.Whh_Right, self.h_Right[t - 1]) 148 self.gates_pre_Right[t] = self.gates_xh_Right[t] + self.gates_hh_Right[ 149 t] + self.bhh_Right # + self.bxh_Right 150 self.gates_Right[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Right[t, idx])) 151 self.gates_Right[t, idx_g] = np.tanh(self.gates_pre_Right[t, idx_g]) 152 self.c_Right[t] = self.gates_Right[t, idx_f] * self.c_Right[t - 1] + self.gates_Right[t, idx_i] * \ 153 self.gates_Right[t, idx_g] 154 self.h_Right[t] = self.gates_Right[t, idx_o] * np.tanh(self.c_Right[t]) 155 156 self.y_Left = np.dot(self.Why_Left, self.h_Left[T - 1]) 157 self.y_Right = np.dot(self.Why_Right, self.h_Right[T - 1]) 158 self.s = self.y_Left + self.y_Right 159 160 return self.s.copy() # prediction scores 161 162 def lrp(self, w, LRP_class, eps=0.001, bias_factor=0.0): 163 """ 164 Perform Layer-wise Relevance Propagation (LRP) forward and especially backward pass. 165 Compute the hidden layer relevances by performing LRP for the target class LRP_class 166 (according to the papers: 167 - https://doi.org/10.1371/journal.pone.0130140 168 - https://doi.org/10.18653/v1/W17-5221) 169 :param w: words of prefix 170 :param LRP_class: target activity 171 :param eps: lrp parameter 172 :param bias_factor: lrp parameter 173 :return: relevance scores 174 """ 175 176 # forward pass 177 self.set_input(w) 178 self.forward() 179 180 T = len(self.w) 181 d = int(self.Wxh_Left.shape[0] / 4) 182 e = self.E.shape[1] # previously, e = self.args.dim 183 C = self.Why_Left.shape[0] # number of classes 184 idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int) # indices of gates i,f,o together 185 idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d, 186 4 * d) # indices of gates i,g,f,o separately 187 188 # initialize 189 Rx = np.zeros(self.x.shape) 190 Rx_rev = np.zeros(self.x.shape) 191 192 Rh_Left = np.zeros((T + 1, d)) 193 Rc_Left = np.zeros((T + 1, d)) 194 Rg_Left = np.zeros((T, d)) # gate g only 195 Rh_Right = np.zeros((T + 1, d)) 196 Rc_Right = np.zeros((T + 1, d)) 197 Rg_Right = np.zeros((T, d)) # gate g only 198 199 Rout_mask = np.zeros((C)) 200 Rout_mask[LRP_class] = 1.0 201 202 # format reminder: lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor) 203 Rh_Left[T - 1] = lrp_linear(self.h_Left[T - 1], self.Why_Left.T, np.zeros((C)), self.s, self.s * Rout_mask, 204 2 * d, eps, bias_factor, debug=False) 205 Rh_Right[T - 1] = lrp_linear(self.h_Right[T - 1], self.Why_Right.T, np.zeros((C)), self.s, self.s * Rout_mask, 206 2 * d, eps, bias_factor, debug=False) 207 208 for t in reversed(range(T)): 209 Rc_Left[t] += Rh_Left[t] 210 Rc_Left[t - 1] = lrp_linear(self.gates_Left[t, idx_f] * self.c_Left[t - 1], np.identity(d), np.zeros((d)), 211 self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False) 212 Rg_Left[t] = lrp_linear(self.gates_Left[t, idx_i] * self.gates_Left[t, idx_g], np.identity(d), 213 np.zeros((d)), self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False) 214 Rx[t] = lrp_linear(self.x[t], self.Wxh_Left[idx_g].T, self.bhh_Left[idx_g], # self.bxh_Left[idx_g] + 215 self.gates_pre_Left[t, idx_g], Rg_Left[t], d + e, eps, bias_factor, debug=False) 216 Rh_Left[t - 1] = lrp_linear(self.h_Left[t - 1], self.Whh_Left[idx_g].T, 217 self.bhh_Left[idx_g], self.gates_pre_Left[t, idx_g], # self.bxh_Left[idx_g] + 218 Rg_Left[t], d + e, eps, bias_factor, debug=False) 219 220 Rc_Right[t] += Rh_Right[t] 221 Rc_Right[t - 1] = lrp_linear(self.gates_Right[t, idx_f] * self.c_Right[t - 1], np.identity(d), 222 np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, 223 debug=False) 224 Rg_Right[t] = lrp_linear(self.gates_Right[t, idx_i] * self.gates_Right[t, idx_g], np.identity(d), 225 np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, debug=False) 226 Rx_rev[t] = lrp_linear(self.x_rev[t], self.Wxh_Right[idx_g].T, 227 self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g], # self.bxh_Right[idx_g] + 228 Rg_Right[t], d + e, eps, bias_factor, debug=False) 229 Rh_Right[t - 1] = lrp_linear(self.h_Right[t - 1], self.Whh_Right[idx_g].T, 230 self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g], 231 # self.bxh_Right[idx_g] + 232 Rg_Right[t], d + e, eps, bias_factor, debug=False) 233 234 return Rx, Rx_rev[::-1, :], Rh_Left[-1].sum() + Rc_Left[-1].sum() + Rh_Right[-1].sum() + Rc_Right[-1].sum() 235 236 237def lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor=0.0, debug=False): 238 """ 239 LRP for a linear layer with input dim D and output dim M. 240 Args: 241 - hin: forward pass input, of shape (D,) 242 - w: connection weights, of shape (D, M) 243 - b: biases, of shape (M,) 244 - hout: forward pass output, of shape (M,) (unequal to np.dot(w.T,hin)+b if more than one incoming layer!) 245 - Rout: relevance at layer output, of shape (M,) 246 - bias_nb_units: total number of connected lower-layer units (onto which the bias/stabilizer contribution is redistributed for sanity check) 247 - eps: stabilizer (small positive number) 248 - bias_factor: set to 1.0 to check global relevance conservation, otherwise use 0.0 to ignore bias/stabilizer redistribution (recommended) 249 Returns: 250 - Rin: relevance at layer input, of shape (D,) 251 """ 252 sign_out = np.where(hout[na, :] >= 0, 1., -1.) # shape (1, M) 253 254 numer = (w * hin[:, na]) + (bias_factor * (b[na, :] * 1. + eps * sign_out * 1.) / bias_nb_units) # shape (D, M) 255 # Note: here we multiply the bias_factor with both the bias b and the stabilizer eps since in fact 256 # using the term (b[na,:]*1. + eps*sign_out*1.) / bias_nb_units in the numerator is only useful for sanity check 257 # (in the initial paper version we were using (bias_factor*b[na,:]*1. + eps*sign_out*1.) / bias_nb_units instead) 258 259 denom = hout[na, :] + (eps * sign_out * 1.) # shape (1, M) 260 261 message = (numer / denom) * Rout[na, :] # shape (D, M) 262 263 Rin = message.sum(axis=1) # shape (D,) 264 265 if debug: 266 print("local diff: ", Rout.sum() - Rin.sum()) 267 # Note: - local layer relevance conservation if bias_factor==1.0 and bias_nb_units==D (i.e. when only one 268 # incoming layer) - global network relevance conservation if bias_factor==1.0 and bias_nb_units set accordingly 269 # to the total number of lower-layer connections -> can be used for sanity check 270 271 return Rin
def
lrp_explain(CONF, predictive_model, full_test_df, encoder):
9def lrp_explain(CONF, predictive_model, full_test_df, encoder): 10 test_df = drop_columns(full_test_df) 11 test_tensor = get_tensor(CONF, test_df) 12 13 explainer = _init_explainer(predictive_model.model, test_tensor[0]) 14 importances = _get_explanation(explainer, full_test_df, encoder) 15 16 return importances
class
LSTM_bidi:
52class LSTM_bidi: 53 54 def __init__(self, model, input_encoded): 55 """ 56 Load trained model from file. 57 """ 58 59 # self.args = CONF 60 61 # input (i.e., one-hot encoded activity + one-hot encoded data attributes) 62 self.E = input_encoded 63 64 # model weights 65 self.model = model 66 67 """ 68 Assumptions: 69 - bias bxh_left and bxh_right is not stored by Keras 70 - bias of output layer is also set to 0 71 """ 72 73 # LSTM left encoder 74 self.Wxh_Left = model.layers[1].get_weights()[0].T # shape 4d*e // kernel left lstm layer // d = neurons 75 # self.bxh_Left = model["bxh_Left"] # shape 4d; not in Keras 76 self.Whh_Left = model.layers[1].get_weights()[1].T # shape 4d*d // recurrent kernel left lstm layer 77 self.bhh_Left = model.layers[1].get_weights()[2].T # shape 4d // biases left lstm layer 78 79 # LSTM right encoder 80 self.Wxh_Right = model.layers[1].get_weights()[3].T # shape 4d*e // kernel right lstm layer 81 # self.bxh_Right = model["bxh_Right"]; not in Keras 82 self.Whh_Right = model.layers[1].get_weights()[4].T # shape 4d*d // recurrent kernel right lstm layer 83 self.bhh_Right = model.layers[1].get_weights()[5].T # shape 4d // biases right lstm layer 84 85 # linear output layer Note: Keras does not provide two output weight vector of the bi-lslm cell; 86 # therefore, we divided the vector in two equal parts 87 self.Why_Left = model.layers[2].get_weights()[0].T # shape C*d 88 self.Why_Left = self.Why_Left[:, 0:100] 89 self.Why_Right = model.layers[2].get_weights()[0].T # shape C*d 90 self.Why_Right = self.Why_Right[:, 100:200] 91 92 def set_input(self, w, delete_pos=None): 93 """ 94 Build the numerical input sequence x/x_rev from the word indices w (+ initialize hidden layers h, c). 95 Optionally delete words at positions delete_pos. 96 """ 97 T = len(w) # prefix length 98 d = int(self.Wxh_Left.shape[0] / 4) # hidden layer dimensions 99 e = self.E.shape[1] # one-hot dimensions; previous, e = self.args.dim 100 x = self.E # encoded input 101 102 if delete_pos is not None: 103 x[delete_pos, :] = np.zeros((len(delete_pos), e)) 104 105 self.w = w 106 self.x = x 107 self.x_rev = x[::-1, :].copy() 108 109 self.h_Left = np.zeros((T + 1, d)) 110 self.c_Left = np.zeros((T + 1, d)) 111 self.h_Right = np.zeros((T + 1, d)) 112 self.c_Right = np.zeros((T + 1, d)) 113 114 def forward(self): 115 """ 116 Standard forward pass. 117 Compute the hidden layer values (assuming input x/x_rev was previously set) 118 """ 119 T = len(self.w) 120 d = int(self.Wxh_Left.shape[0] / 4) 121 # gate indices (assuming the gate ordering in the LSTM weights is i,g,f,o): 122 idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int) # indices of gates i,f,o together 123 idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d, 124 4 * d) # indices of gates i,g,f,o separately 125 126 # initialize 127 self.gates_xh_Left = np.zeros((T, 4 * d)) 128 self.gates_hh_Left = np.zeros((T, 4 * d)) 129 self.gates_pre_Left = np.zeros((T, 4 * d)) # gates pre-activation 130 self.gates_Left = np.zeros((T, 4 * d)) # gates activation 131 132 self.gates_xh_Right = np.zeros((T, 4 * d)) 133 self.gates_hh_Right = np.zeros((T, 4 * d)) 134 self.gates_pre_Right = np.zeros((T, 4 * d)) 135 self.gates_Right = np.zeros((T, 4 * d)) 136 137 for t in range(T): 138 self.gates_xh_Left[t] = np.dot(self.Wxh_Left, self.x[t]) 139 self.gates_hh_Left[t] = np.dot(self.Whh_Left, self.h_Left[t - 1]) 140 self.gates_pre_Left[t] = self.gates_xh_Left[t] + self.gates_hh_Left[t] + self.bhh_Left # + self.bxh_Left 141 self.gates_Left[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Left[t, idx])) 142 self.gates_Left[t, idx_g] = np.tanh(self.gates_pre_Left[t, idx_g]) 143 self.c_Left[t] = self.gates_Left[t, idx_f] * self.c_Left[t - 1] + self.gates_Left[t, idx_i] * \ 144 self.gates_Left[t, idx_g] 145 self.h_Left[t] = self.gates_Left[t, idx_o] * np.tanh(self.c_Left[t]) 146 147 self.gates_xh_Right[t] = np.dot(self.Wxh_Right, self.x_rev[t]) 148 self.gates_hh_Right[t] = np.dot(self.Whh_Right, self.h_Right[t - 1]) 149 self.gates_pre_Right[t] = self.gates_xh_Right[t] + self.gates_hh_Right[ 150 t] + self.bhh_Right # + self.bxh_Right 151 self.gates_Right[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Right[t, idx])) 152 self.gates_Right[t, idx_g] = np.tanh(self.gates_pre_Right[t, idx_g]) 153 self.c_Right[t] = self.gates_Right[t, idx_f] * self.c_Right[t - 1] + self.gates_Right[t, idx_i] * \ 154 self.gates_Right[t, idx_g] 155 self.h_Right[t] = self.gates_Right[t, idx_o] * np.tanh(self.c_Right[t]) 156 157 self.y_Left = np.dot(self.Why_Left, self.h_Left[T - 1]) 158 self.y_Right = np.dot(self.Why_Right, self.h_Right[T - 1]) 159 self.s = self.y_Left + self.y_Right 160 161 return self.s.copy() # prediction scores 162 163 def lrp(self, w, LRP_class, eps=0.001, bias_factor=0.0): 164 """ 165 Perform Layer-wise Relevance Propagation (LRP) forward and especially backward pass. 166 Compute the hidden layer relevances by performing LRP for the target class LRP_class 167 (according to the papers: 168 - https://doi.org/10.1371/journal.pone.0130140 169 - https://doi.org/10.18653/v1/W17-5221) 170 :param w: words of prefix 171 :param LRP_class: target activity 172 :param eps: lrp parameter 173 :param bias_factor: lrp parameter 174 :return: relevance scores 175 """ 176 177 # forward pass 178 self.set_input(w) 179 self.forward() 180 181 T = len(self.w) 182 d = int(self.Wxh_Left.shape[0] / 4) 183 e = self.E.shape[1] # previously, e = self.args.dim 184 C = self.Why_Left.shape[0] # number of classes 185 idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int) # indices of gates i,f,o together 186 idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d, 187 4 * d) # indices of gates i,g,f,o separately 188 189 # initialize 190 Rx = np.zeros(self.x.shape) 191 Rx_rev = np.zeros(self.x.shape) 192 193 Rh_Left = np.zeros((T + 1, d)) 194 Rc_Left = np.zeros((T + 1, d)) 195 Rg_Left = np.zeros((T, d)) # gate g only 196 Rh_Right = np.zeros((T + 1, d)) 197 Rc_Right = np.zeros((T + 1, d)) 198 Rg_Right = np.zeros((T, d)) # gate g only 199 200 Rout_mask = np.zeros((C)) 201 Rout_mask[LRP_class] = 1.0 202 203 # format reminder: lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor) 204 Rh_Left[T - 1] = lrp_linear(self.h_Left[T - 1], self.Why_Left.T, np.zeros((C)), self.s, self.s * Rout_mask, 205 2 * d, eps, bias_factor, debug=False) 206 Rh_Right[T - 1] = lrp_linear(self.h_Right[T - 1], self.Why_Right.T, np.zeros((C)), self.s, self.s * Rout_mask, 207 2 * d, eps, bias_factor, debug=False) 208 209 for t in reversed(range(T)): 210 Rc_Left[t] += Rh_Left[t] 211 Rc_Left[t - 1] = lrp_linear(self.gates_Left[t, idx_f] * self.c_Left[t - 1], np.identity(d), np.zeros((d)), 212 self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False) 213 Rg_Left[t] = lrp_linear(self.gates_Left[t, idx_i] * self.gates_Left[t, idx_g], np.identity(d), 214 np.zeros((d)), self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False) 215 Rx[t] = lrp_linear(self.x[t], self.Wxh_Left[idx_g].T, self.bhh_Left[idx_g], # self.bxh_Left[idx_g] + 216 self.gates_pre_Left[t, idx_g], Rg_Left[t], d + e, eps, bias_factor, debug=False) 217 Rh_Left[t - 1] = lrp_linear(self.h_Left[t - 1], self.Whh_Left[idx_g].T, 218 self.bhh_Left[idx_g], self.gates_pre_Left[t, idx_g], # self.bxh_Left[idx_g] + 219 Rg_Left[t], d + e, eps, bias_factor, debug=False) 220 221 Rc_Right[t] += Rh_Right[t] 222 Rc_Right[t - 1] = lrp_linear(self.gates_Right[t, idx_f] * self.c_Right[t - 1], np.identity(d), 223 np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, 224 debug=False) 225 Rg_Right[t] = lrp_linear(self.gates_Right[t, idx_i] * self.gates_Right[t, idx_g], np.identity(d), 226 np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, debug=False) 227 Rx_rev[t] = lrp_linear(self.x_rev[t], self.Wxh_Right[idx_g].T, 228 self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g], # self.bxh_Right[idx_g] + 229 Rg_Right[t], d + e, eps, bias_factor, debug=False) 230 Rh_Right[t - 1] = lrp_linear(self.h_Right[t - 1], self.Whh_Right[idx_g].T, 231 self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g], 232 # self.bxh_Right[idx_g] + 233 Rg_Right[t], d + e, eps, bias_factor, debug=False) 234 235 return Rx, Rx_rev[::-1, :], Rh_Left[-1].sum() + Rc_Left[-1].sum() + Rh_Right[-1].sum() + Rc_Right[-1].sum()
LSTM_bidi(model, input_encoded)
54 def __init__(self, model, input_encoded): 55 """ 56 Load trained model from file. 57 """ 58 59 # self.args = CONF 60 61 # input (i.e., one-hot encoded activity + one-hot encoded data attributes) 62 self.E = input_encoded 63 64 # model weights 65 self.model = model 66 67 """ 68 Assumptions: 69 - bias bxh_left and bxh_right is not stored by Keras 70 - bias of output layer is also set to 0 71 """ 72 73 # LSTM left encoder 74 self.Wxh_Left = model.layers[1].get_weights()[0].T # shape 4d*e // kernel left lstm layer // d = neurons 75 # self.bxh_Left = model["bxh_Left"] # shape 4d; not in Keras 76 self.Whh_Left = model.layers[1].get_weights()[1].T # shape 4d*d // recurrent kernel left lstm layer 77 self.bhh_Left = model.layers[1].get_weights()[2].T # shape 4d // biases left lstm layer 78 79 # LSTM right encoder 80 self.Wxh_Right = model.layers[1].get_weights()[3].T # shape 4d*e // kernel right lstm layer 81 # self.bxh_Right = model["bxh_Right"]; not in Keras 82 self.Whh_Right = model.layers[1].get_weights()[4].T # shape 4d*d // recurrent kernel right lstm layer 83 self.bhh_Right = model.layers[1].get_weights()[5].T # shape 4d // biases right lstm layer 84 85 # linear output layer Note: Keras does not provide two output weight vector of the bi-lslm cell; 86 # therefore, we divided the vector in two equal parts 87 self.Why_Left = model.layers[2].get_weights()[0].T # shape C*d 88 self.Why_Left = self.Why_Left[:, 0:100] 89 self.Why_Right = model.layers[2].get_weights()[0].T # shape C*d 90 self.Why_Right = self.Why_Right[:, 100:200]
Load trained model from file.
model
Assumptions:
- bias bxh_left and bxh_right is not stored by Keras
- bias of output layer is also set to 0
def
set_input(self, w, delete_pos=None):
92 def set_input(self, w, delete_pos=None): 93 """ 94 Build the numerical input sequence x/x_rev from the word indices w (+ initialize hidden layers h, c). 95 Optionally delete words at positions delete_pos. 96 """ 97 T = len(w) # prefix length 98 d = int(self.Wxh_Left.shape[0] / 4) # hidden layer dimensions 99 e = self.E.shape[1] # one-hot dimensions; previous, e = self.args.dim 100 x = self.E # encoded input 101 102 if delete_pos is not None: 103 x[delete_pos, :] = np.zeros((len(delete_pos), e)) 104 105 self.w = w 106 self.x = x 107 self.x_rev = x[::-1, :].copy() 108 109 self.h_Left = np.zeros((T + 1, d)) 110 self.c_Left = np.zeros((T + 1, d)) 111 self.h_Right = np.zeros((T + 1, d)) 112 self.c_Right = np.zeros((T + 1, d))
Build the numerical input sequence x/x_rev from the word indices w (+ initialize hidden layers h, c). Optionally delete words at positions delete_pos.
def
forward(self):
114 def forward(self): 115 """ 116 Standard forward pass. 117 Compute the hidden layer values (assuming input x/x_rev was previously set) 118 """ 119 T = len(self.w) 120 d = int(self.Wxh_Left.shape[0] / 4) 121 # gate indices (assuming the gate ordering in the LSTM weights is i,g,f,o): 122 idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int) # indices of gates i,f,o together 123 idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d, 124 4 * d) # indices of gates i,g,f,o separately 125 126 # initialize 127 self.gates_xh_Left = np.zeros((T, 4 * d)) 128 self.gates_hh_Left = np.zeros((T, 4 * d)) 129 self.gates_pre_Left = np.zeros((T, 4 * d)) # gates pre-activation 130 self.gates_Left = np.zeros((T, 4 * d)) # gates activation 131 132 self.gates_xh_Right = np.zeros((T, 4 * d)) 133 self.gates_hh_Right = np.zeros((T, 4 * d)) 134 self.gates_pre_Right = np.zeros((T, 4 * d)) 135 self.gates_Right = np.zeros((T, 4 * d)) 136 137 for t in range(T): 138 self.gates_xh_Left[t] = np.dot(self.Wxh_Left, self.x[t]) 139 self.gates_hh_Left[t] = np.dot(self.Whh_Left, self.h_Left[t - 1]) 140 self.gates_pre_Left[t] = self.gates_xh_Left[t] + self.gates_hh_Left[t] + self.bhh_Left # + self.bxh_Left 141 self.gates_Left[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Left[t, idx])) 142 self.gates_Left[t, idx_g] = np.tanh(self.gates_pre_Left[t, idx_g]) 143 self.c_Left[t] = self.gates_Left[t, idx_f] * self.c_Left[t - 1] + self.gates_Left[t, idx_i] * \ 144 self.gates_Left[t, idx_g] 145 self.h_Left[t] = self.gates_Left[t, idx_o] * np.tanh(self.c_Left[t]) 146 147 self.gates_xh_Right[t] = np.dot(self.Wxh_Right, self.x_rev[t]) 148 self.gates_hh_Right[t] = np.dot(self.Whh_Right, self.h_Right[t - 1]) 149 self.gates_pre_Right[t] = self.gates_xh_Right[t] + self.gates_hh_Right[ 150 t] + self.bhh_Right # + self.bxh_Right 151 self.gates_Right[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Right[t, idx])) 152 self.gates_Right[t, idx_g] = np.tanh(self.gates_pre_Right[t, idx_g]) 153 self.c_Right[t] = self.gates_Right[t, idx_f] * self.c_Right[t - 1] + self.gates_Right[t, idx_i] * \ 154 self.gates_Right[t, idx_g] 155 self.h_Right[t] = self.gates_Right[t, idx_o] * np.tanh(self.c_Right[t]) 156 157 self.y_Left = np.dot(self.Why_Left, self.h_Left[T - 1]) 158 self.y_Right = np.dot(self.Why_Right, self.h_Right[T - 1]) 159 self.s = self.y_Left + self.y_Right 160 161 return self.s.copy() # prediction scores
Standard forward pass. Compute the hidden layer values (assuming input x/x_rev was previously set)
def
lrp(self, w, LRP_class, eps=0.001, bias_factor=0.0):
163 def lrp(self, w, LRP_class, eps=0.001, bias_factor=0.0): 164 """ 165 Perform Layer-wise Relevance Propagation (LRP) forward and especially backward pass. 166 Compute the hidden layer relevances by performing LRP for the target class LRP_class 167 (according to the papers: 168 - https://doi.org/10.1371/journal.pone.0130140 169 - https://doi.org/10.18653/v1/W17-5221) 170 :param w: words of prefix 171 :param LRP_class: target activity 172 :param eps: lrp parameter 173 :param bias_factor: lrp parameter 174 :return: relevance scores 175 """ 176 177 # forward pass 178 self.set_input(w) 179 self.forward() 180 181 T = len(self.w) 182 d = int(self.Wxh_Left.shape[0] / 4) 183 e = self.E.shape[1] # previously, e = self.args.dim 184 C = self.Why_Left.shape[0] # number of classes 185 idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int) # indices of gates i,f,o together 186 idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d, 187 4 * d) # indices of gates i,g,f,o separately 188 189 # initialize 190 Rx = np.zeros(self.x.shape) 191 Rx_rev = np.zeros(self.x.shape) 192 193 Rh_Left = np.zeros((T + 1, d)) 194 Rc_Left = np.zeros((T + 1, d)) 195 Rg_Left = np.zeros((T, d)) # gate g only 196 Rh_Right = np.zeros((T + 1, d)) 197 Rc_Right = np.zeros((T + 1, d)) 198 Rg_Right = np.zeros((T, d)) # gate g only 199 200 Rout_mask = np.zeros((C)) 201 Rout_mask[LRP_class] = 1.0 202 203 # format reminder: lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor) 204 Rh_Left[T - 1] = lrp_linear(self.h_Left[T - 1], self.Why_Left.T, np.zeros((C)), self.s, self.s * Rout_mask, 205 2 * d, eps, bias_factor, debug=False) 206 Rh_Right[T - 1] = lrp_linear(self.h_Right[T - 1], self.Why_Right.T, np.zeros((C)), self.s, self.s * Rout_mask, 207 2 * d, eps, bias_factor, debug=False) 208 209 for t in reversed(range(T)): 210 Rc_Left[t] += Rh_Left[t] 211 Rc_Left[t - 1] = lrp_linear(self.gates_Left[t, idx_f] * self.c_Left[t - 1], np.identity(d), np.zeros((d)), 212 self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False) 213 Rg_Left[t] = lrp_linear(self.gates_Left[t, idx_i] * self.gates_Left[t, idx_g], np.identity(d), 214 np.zeros((d)), self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False) 215 Rx[t] = lrp_linear(self.x[t], self.Wxh_Left[idx_g].T, self.bhh_Left[idx_g], # self.bxh_Left[idx_g] + 216 self.gates_pre_Left[t, idx_g], Rg_Left[t], d + e, eps, bias_factor, debug=False) 217 Rh_Left[t - 1] = lrp_linear(self.h_Left[t - 1], self.Whh_Left[idx_g].T, 218 self.bhh_Left[idx_g], self.gates_pre_Left[t, idx_g], # self.bxh_Left[idx_g] + 219 Rg_Left[t], d + e, eps, bias_factor, debug=False) 220 221 Rc_Right[t] += Rh_Right[t] 222 Rc_Right[t - 1] = lrp_linear(self.gates_Right[t, idx_f] * self.c_Right[t - 1], np.identity(d), 223 np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, 224 debug=False) 225 Rg_Right[t] = lrp_linear(self.gates_Right[t, idx_i] * self.gates_Right[t, idx_g], np.identity(d), 226 np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, debug=False) 227 Rx_rev[t] = lrp_linear(self.x_rev[t], self.Wxh_Right[idx_g].T, 228 self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g], # self.bxh_Right[idx_g] + 229 Rg_Right[t], d + e, eps, bias_factor, debug=False) 230 Rh_Right[t - 1] = lrp_linear(self.h_Right[t - 1], self.Whh_Right[idx_g].T, 231 self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g], 232 # self.bxh_Right[idx_g] + 233 Rg_Right[t], d + e, eps, bias_factor, debug=False) 234 235 return Rx, Rx_rev[::-1, :], Rh_Left[-1].sum() + Rc_Left[-1].sum() + Rh_Right[-1].sum() + Rc_Right[-1].sum()
Perform Layer-wise Relevance Propagation (LRP) forward and especially backward pass. Compute the hidden layer relevances by performing LRP for the target class LRP_class (according to the papers: - https://doi.org/10.1371/journal.pone.0130140 - https://doi.org/10.18653/v1/W17-5221)
Parameters
- w: words of prefix
- LRP_class: target activity
- eps: lrp parameter
- bias_factor: lrp parameter
Returns
relevance scores
def
lrp_linear( hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor=0.0, debug=False):
238def lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor=0.0, debug=False): 239 """ 240 LRP for a linear layer with input dim D and output dim M. 241 Args: 242 - hin: forward pass input, of shape (D,) 243 - w: connection weights, of shape (D, M) 244 - b: biases, of shape (M,) 245 - hout: forward pass output, of shape (M,) (unequal to np.dot(w.T,hin)+b if more than one incoming layer!) 246 - Rout: relevance at layer output, of shape (M,) 247 - bias_nb_units: total number of connected lower-layer units (onto which the bias/stabilizer contribution is redistributed for sanity check) 248 - eps: stabilizer (small positive number) 249 - bias_factor: set to 1.0 to check global relevance conservation, otherwise use 0.0 to ignore bias/stabilizer redistribution (recommended) 250 Returns: 251 - Rin: relevance at layer input, of shape (D,) 252 """ 253 sign_out = np.where(hout[na, :] >= 0, 1., -1.) # shape (1, M) 254 255 numer = (w * hin[:, na]) + (bias_factor * (b[na, :] * 1. + eps * sign_out * 1.) / bias_nb_units) # shape (D, M) 256 # Note: here we multiply the bias_factor with both the bias b and the stabilizer eps since in fact 257 # using the term (b[na,:]*1. + eps*sign_out*1.) / bias_nb_units in the numerator is only useful for sanity check 258 # (in the initial paper version we were using (bias_factor*b[na,:]*1. + eps*sign_out*1.) / bias_nb_units instead) 259 260 denom = hout[na, :] + (eps * sign_out * 1.) # shape (1, M) 261 262 message = (numer / denom) * Rout[na, :] # shape (D, M) 263 264 Rin = message.sum(axis=1) # shape (D,) 265 266 if debug: 267 print("local diff: ", Rout.sum() - Rin.sum()) 268 # Note: - local layer relevance conservation if bias_factor==1.0 and bias_nb_units==D (i.e. when only one 269 # incoming layer) - global network relevance conservation if bias_factor==1.0 and bias_nb_units set accordingly 270 # to the total number of lower-layer connections -> can be used for sanity check 271 272 return Rin
LRP for a linear layer with input dim D and output dim M. Args:
- hin: forward pass input, of shape (D,)
- w: connection weights, of shape (D, M)
- b: biases, of shape (M,)
- hout: forward pass output, of shape (M,) (unequal to np.dot(w.T,hin)+b if more than one incoming layer!)
- Rout: relevance at layer output, of shape (M,)
- bias_nb_units: total number of connected lower-layer units (onto which the bias/stabilizer contribution is redistributed for sanity check)
- eps: stabilizer (small positive number)
- bias_factor: set to 1.0 to check global relevance conservation, otherwise use 0.0 to ignore bias/stabilizer redistribution (recommended) Returns:
- Rin: relevance at layer input, of shape (D,)