nirdizati_light.explanation.wrappers.lrp_wrapper

  1import numpy as np
  2from numpy import newaxis as na
  3
  4from nirdizati_light.predictive_model.common import get_tensor
  5from nirdizati_light.predictive_model.predictive_model import drop_columns
  6
  7
  8def lrp_explain(CONF, predictive_model, full_test_df, encoder):
  9    test_df = drop_columns(full_test_df)
 10    test_tensor = get_tensor(CONF, test_df)
 11
 12    explainer = _init_explainer(predictive_model.model, test_tensor[0])
 13    importances = _get_explanation(explainer, full_test_df, encoder)
 14
 15    return importances
 16
 17
 18def _init_explainer(model, tensor):
 19    return LSTM_bidi(model, tensor)  # load trained LSTM model
 20
 21
 22def _get_explanation(explainer, target_df, encoder):
 23    trace_ids = target_df['trace_id'].to_list()
 24    targets_label_enc = np.argmax(np.array(target_df['label'].to_list()), axis=1)
 25    df = drop_columns(target_df)
 26    prefix_names = df.columns
 27    encoder.decode(df)
 28    prefixes = df.to_numpy().tolist()
 29
 30    importances = {}
 31    for trace_id, prefix_words, label in zip(trace_ids, prefixes, targets_label_enc):
 32        Rx, Rx_rev, R_rest = explainer.lrp(prefix_words, label)
 33        words_importances = np.sum(Rx + Rx_rev, axis=1)
 34        importances[str(trace_id)] = []
 35        for i_word, word in enumerate(prefix_words):
 36            importances[str(trace_id)].append([prefix_names[i_word], word, words_importances[i_word]])
 37
 38    return importances
 39
 40
 41"""
 42@author: Leila Arras
 43@maintainer: Leila Arras
 44@date: 21.06.2017
 45@version: 1.0+
 46@copyright: Copyright (c) 2017, Leila Arras, Gregoire Montavon, Klaus-Robert Mueller, Wojciech Samek
 47@license: see LICENSE file in repository root
 48"""
 49
 50
 51class LSTM_bidi:
 52
 53    def __init__(self, model, input_encoded):
 54        """
 55        Load trained model from file.
 56        """
 57
 58        # self.args = CONF
 59
 60        # input (i.e., one-hot encoded activity + one-hot encoded data attributes)
 61        self.E = input_encoded
 62
 63        # model weights
 64        self.model = model
 65
 66        """
 67        Assumptions:
 68        - bias bxh_left and bxh_right is not stored by Keras
 69        - bias of output layer is also set to 0
 70        """
 71
 72        # LSTM left encoder
 73        self.Wxh_Left = model.layers[1].get_weights()[0].T  # shape 4d*e // kernel left lstm layer // d = neurons
 74        # self.bxh_Left = model["bxh_Left"]  # shape 4d; not in Keras
 75        self.Whh_Left = model.layers[1].get_weights()[1].T  # shape 4d*d // recurrent kernel left lstm layer
 76        self.bhh_Left = model.layers[1].get_weights()[2].T  # shape 4d // biases left lstm layer
 77
 78        # LSTM right encoder
 79        self.Wxh_Right = model.layers[1].get_weights()[3].T  # shape 4d*e // kernel right lstm layer
 80        # self.bxh_Right = model["bxh_Right"]; not in Keras
 81        self.Whh_Right = model.layers[1].get_weights()[4].T  # shape 4d*d // recurrent kernel right lstm layer
 82        self.bhh_Right = model.layers[1].get_weights()[5].T  # shape 4d // biases right lstm layer
 83
 84        # linear output layer Note: Keras does not provide two output weight vector of the bi-lslm cell;
 85        # therefore, we divided the vector in two equal parts
 86        self.Why_Left = model.layers[2].get_weights()[0].T  # shape C*d
 87        self.Why_Left = self.Why_Left[:, 0:100]
 88        self.Why_Right = model.layers[2].get_weights()[0].T  # shape C*d
 89        self.Why_Right = self.Why_Right[:, 100:200]
 90
 91    def set_input(self, w, delete_pos=None):
 92        """
 93        Build the numerical input sequence x/x_rev from the word indices w (+ initialize hidden layers h, c).
 94        Optionally delete words at positions delete_pos.
 95        """
 96        T = len(w)  # prefix length
 97        d = int(self.Wxh_Left.shape[0] / 4)  # hidden layer dimensions
 98        e = self.E.shape[1]  # one-hot dimensions; previous, e = self.args.dim
 99        x = self.E  # encoded input
100
101        if delete_pos is not None:
102            x[delete_pos, :] = np.zeros((len(delete_pos), e))
103
104        self.w = w
105        self.x = x
106        self.x_rev = x[::-1, :].copy()
107
108        self.h_Left = np.zeros((T + 1, d))
109        self.c_Left = np.zeros((T + 1, d))
110        self.h_Right = np.zeros((T + 1, d))
111        self.c_Right = np.zeros((T + 1, d))
112
113    def forward(self):
114        """
115        Standard forward pass.
116        Compute the hidden layer values (assuming input x/x_rev was previously set)
117        """
118        T = len(self.w)
119        d = int(self.Wxh_Left.shape[0] / 4)
120        # gate indices (assuming the gate ordering in the LSTM weights is i,g,f,o):
121        idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int)  # indices of gates i,f,o together
122        idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d,
123                                                                                                              4 * d)  # indices of gates i,g,f,o separately
124
125        # initialize
126        self.gates_xh_Left = np.zeros((T, 4 * d))
127        self.gates_hh_Left = np.zeros((T, 4 * d))
128        self.gates_pre_Left = np.zeros((T, 4 * d))  # gates pre-activation
129        self.gates_Left = np.zeros((T, 4 * d))  # gates activation
130
131        self.gates_xh_Right = np.zeros((T, 4 * d))
132        self.gates_hh_Right = np.zeros((T, 4 * d))
133        self.gates_pre_Right = np.zeros((T, 4 * d))
134        self.gates_Right = np.zeros((T, 4 * d))
135
136        for t in range(T):
137            self.gates_xh_Left[t] = np.dot(self.Wxh_Left, self.x[t])
138            self.gates_hh_Left[t] = np.dot(self.Whh_Left, self.h_Left[t - 1])
139            self.gates_pre_Left[t] = self.gates_xh_Left[t] + self.gates_hh_Left[t] + self.bhh_Left  # + self.bxh_Left
140            self.gates_Left[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Left[t, idx]))
141            self.gates_Left[t, idx_g] = np.tanh(self.gates_pre_Left[t, idx_g])
142            self.c_Left[t] = self.gates_Left[t, idx_f] * self.c_Left[t - 1] + self.gates_Left[t, idx_i] * \
143                             self.gates_Left[t, idx_g]
144            self.h_Left[t] = self.gates_Left[t, idx_o] * np.tanh(self.c_Left[t])
145
146            self.gates_xh_Right[t] = np.dot(self.Wxh_Right, self.x_rev[t])
147            self.gates_hh_Right[t] = np.dot(self.Whh_Right, self.h_Right[t - 1])
148            self.gates_pre_Right[t] = self.gates_xh_Right[t] + self.gates_hh_Right[
149                t] + self.bhh_Right  # + self.bxh_Right
150            self.gates_Right[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Right[t, idx]))
151            self.gates_Right[t, idx_g] = np.tanh(self.gates_pre_Right[t, idx_g])
152            self.c_Right[t] = self.gates_Right[t, idx_f] * self.c_Right[t - 1] + self.gates_Right[t, idx_i] * \
153                              self.gates_Right[t, idx_g]
154            self.h_Right[t] = self.gates_Right[t, idx_o] * np.tanh(self.c_Right[t])
155
156        self.y_Left = np.dot(self.Why_Left, self.h_Left[T - 1])
157        self.y_Right = np.dot(self.Why_Right, self.h_Right[T - 1])
158        self.s = self.y_Left + self.y_Right
159
160        return self.s.copy()  # prediction scores
161
162    def lrp(self, w, LRP_class, eps=0.001, bias_factor=0.0):
163        """
164        Perform Layer-wise Relevance Propagation (LRP) forward and especially backward pass.
165        Compute the hidden layer relevances by performing LRP for the target class LRP_class
166        (according to the papers:
167            - https://doi.org/10.1371/journal.pone.0130140
168            - https://doi.org/10.18653/v1/W17-5221)
169        :param w: words of prefix
170        :param LRP_class: target activity
171        :param eps: lrp parameter
172        :param bias_factor: lrp parameter
173        :return: relevance scores
174        """
175
176        # forward pass
177        self.set_input(w)
178        self.forward()
179
180        T = len(self.w)
181        d = int(self.Wxh_Left.shape[0] / 4)
182        e = self.E.shape[1]  # previously, e = self.args.dim
183        C = self.Why_Left.shape[0]  # number of classes
184        idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int)  # indices of gates i,f,o together
185        idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d,
186                                                                                                              4 * d)  # indices of gates i,g,f,o separately
187
188        # initialize
189        Rx = np.zeros(self.x.shape)
190        Rx_rev = np.zeros(self.x.shape)
191
192        Rh_Left = np.zeros((T + 1, d))
193        Rc_Left = np.zeros((T + 1, d))
194        Rg_Left = np.zeros((T, d))  # gate g only
195        Rh_Right = np.zeros((T + 1, d))
196        Rc_Right = np.zeros((T + 1, d))
197        Rg_Right = np.zeros((T, d))  # gate g only
198
199        Rout_mask = np.zeros((C))
200        Rout_mask[LRP_class] = 1.0
201
202        # format reminder: lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor)
203        Rh_Left[T - 1] = lrp_linear(self.h_Left[T - 1], self.Why_Left.T, np.zeros((C)), self.s, self.s * Rout_mask,
204                                    2 * d, eps, bias_factor, debug=False)
205        Rh_Right[T - 1] = lrp_linear(self.h_Right[T - 1], self.Why_Right.T, np.zeros((C)), self.s, self.s * Rout_mask,
206                                     2 * d, eps, bias_factor, debug=False)
207
208        for t in reversed(range(T)):
209            Rc_Left[t] += Rh_Left[t]
210            Rc_Left[t - 1] = lrp_linear(self.gates_Left[t, idx_f] * self.c_Left[t - 1], np.identity(d), np.zeros((d)),
211                                        self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False)
212            Rg_Left[t] = lrp_linear(self.gates_Left[t, idx_i] * self.gates_Left[t, idx_g], np.identity(d),
213                                    np.zeros((d)), self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False)
214            Rx[t] = lrp_linear(self.x[t], self.Wxh_Left[idx_g].T, self.bhh_Left[idx_g],  # self.bxh_Left[idx_g] +
215                               self.gates_pre_Left[t, idx_g], Rg_Left[t], d + e, eps, bias_factor, debug=False)
216            Rh_Left[t - 1] = lrp_linear(self.h_Left[t - 1], self.Whh_Left[idx_g].T,
217                                        self.bhh_Left[idx_g], self.gates_pre_Left[t, idx_g],  # self.bxh_Left[idx_g] +
218                                        Rg_Left[t], d + e, eps, bias_factor, debug=False)
219
220            Rc_Right[t] += Rh_Right[t]
221            Rc_Right[t - 1] = lrp_linear(self.gates_Right[t, idx_f] * self.c_Right[t - 1], np.identity(d),
222                                         np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor,
223                                         debug=False)
224            Rg_Right[t] = lrp_linear(self.gates_Right[t, idx_i] * self.gates_Right[t, idx_g], np.identity(d),
225                                     np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, debug=False)
226            Rx_rev[t] = lrp_linear(self.x_rev[t], self.Wxh_Right[idx_g].T,
227                                   self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g],  # self.bxh_Right[idx_g] +
228                                   Rg_Right[t], d + e, eps, bias_factor, debug=False)
229            Rh_Right[t - 1] = lrp_linear(self.h_Right[t - 1], self.Whh_Right[idx_g].T,
230                                         self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g],
231                                         # self.bxh_Right[idx_g] +
232                                         Rg_Right[t], d + e, eps, bias_factor, debug=False)
233
234        return Rx, Rx_rev[::-1, :], Rh_Left[-1].sum() + Rc_Left[-1].sum() + Rh_Right[-1].sum() + Rc_Right[-1].sum()
235
236
237def lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor=0.0, debug=False):
238    """
239    LRP for a linear layer with input dim D and output dim M.
240    Args:
241    - hin:            forward pass input, of shape (D,)
242    - w:              connection weights, of shape (D, M)
243    - b:              biases, of shape (M,)
244    - hout:           forward pass output, of shape (M,) (unequal to np.dot(w.T,hin)+b if more than one incoming layer!)
245    - Rout:           relevance at layer output, of shape (M,)
246    - bias_nb_units:  total number of connected lower-layer units (onto which the bias/stabilizer contribution is redistributed for sanity check)
247    - eps:            stabilizer (small positive number)
248    - bias_factor:    set to 1.0 to check global relevance conservation, otherwise use 0.0 to ignore bias/stabilizer redistribution (recommended)
249    Returns:
250    - Rin:            relevance at layer input, of shape (D,)
251    """
252    sign_out = np.where(hout[na, :] >= 0, 1., -1.)  # shape (1, M)
253
254    numer = (w * hin[:, na]) + (bias_factor * (b[na, :] * 1. + eps * sign_out * 1.) / bias_nb_units)  # shape (D, M)
255    # Note: here we multiply the bias_factor with both the bias b and the stabilizer eps since in fact
256    # using the term (b[na,:]*1. + eps*sign_out*1.) / bias_nb_units in the numerator is only useful for sanity check
257    # (in the initial paper version we were using (bias_factor*b[na,:]*1. + eps*sign_out*1.) / bias_nb_units instead)
258
259    denom = hout[na, :] + (eps * sign_out * 1.)  # shape (1, M)
260
261    message = (numer / denom) * Rout[na, :]  # shape (D, M)
262
263    Rin = message.sum(axis=1)  # shape (D,)
264
265    if debug:
266        print("local diff: ", Rout.sum() - Rin.sum())
267    # Note: - local layer relevance conservation if bias_factor==1.0 and bias_nb_units==D (i.e. when only one
268    # incoming layer) - global network relevance conservation if bias_factor==1.0 and bias_nb_units set accordingly
269    # to the total number of lower-layer connections -> can be used for sanity check
270
271    return Rin
def lrp_explain(CONF, predictive_model, full_test_df, encoder):
 9def lrp_explain(CONF, predictive_model, full_test_df, encoder):
10    test_df = drop_columns(full_test_df)
11    test_tensor = get_tensor(CONF, test_df)
12
13    explainer = _init_explainer(predictive_model.model, test_tensor[0])
14    importances = _get_explanation(explainer, full_test_df, encoder)
15
16    return importances
class LSTM_bidi:
 52class LSTM_bidi:
 53
 54    def __init__(self, model, input_encoded):
 55        """
 56        Load trained model from file.
 57        """
 58
 59        # self.args = CONF
 60
 61        # input (i.e., one-hot encoded activity + one-hot encoded data attributes)
 62        self.E = input_encoded
 63
 64        # model weights
 65        self.model = model
 66
 67        """
 68        Assumptions:
 69        - bias bxh_left and bxh_right is not stored by Keras
 70        - bias of output layer is also set to 0
 71        """
 72
 73        # LSTM left encoder
 74        self.Wxh_Left = model.layers[1].get_weights()[0].T  # shape 4d*e // kernel left lstm layer // d = neurons
 75        # self.bxh_Left = model["bxh_Left"]  # shape 4d; not in Keras
 76        self.Whh_Left = model.layers[1].get_weights()[1].T  # shape 4d*d // recurrent kernel left lstm layer
 77        self.bhh_Left = model.layers[1].get_weights()[2].T  # shape 4d // biases left lstm layer
 78
 79        # LSTM right encoder
 80        self.Wxh_Right = model.layers[1].get_weights()[3].T  # shape 4d*e // kernel right lstm layer
 81        # self.bxh_Right = model["bxh_Right"]; not in Keras
 82        self.Whh_Right = model.layers[1].get_weights()[4].T  # shape 4d*d // recurrent kernel right lstm layer
 83        self.bhh_Right = model.layers[1].get_weights()[5].T  # shape 4d // biases right lstm layer
 84
 85        # linear output layer Note: Keras does not provide two output weight vector of the bi-lslm cell;
 86        # therefore, we divided the vector in two equal parts
 87        self.Why_Left = model.layers[2].get_weights()[0].T  # shape C*d
 88        self.Why_Left = self.Why_Left[:, 0:100]
 89        self.Why_Right = model.layers[2].get_weights()[0].T  # shape C*d
 90        self.Why_Right = self.Why_Right[:, 100:200]
 91
 92    def set_input(self, w, delete_pos=None):
 93        """
 94        Build the numerical input sequence x/x_rev from the word indices w (+ initialize hidden layers h, c).
 95        Optionally delete words at positions delete_pos.
 96        """
 97        T = len(w)  # prefix length
 98        d = int(self.Wxh_Left.shape[0] / 4)  # hidden layer dimensions
 99        e = self.E.shape[1]  # one-hot dimensions; previous, e = self.args.dim
100        x = self.E  # encoded input
101
102        if delete_pos is not None:
103            x[delete_pos, :] = np.zeros((len(delete_pos), e))
104
105        self.w = w
106        self.x = x
107        self.x_rev = x[::-1, :].copy()
108
109        self.h_Left = np.zeros((T + 1, d))
110        self.c_Left = np.zeros((T + 1, d))
111        self.h_Right = np.zeros((T + 1, d))
112        self.c_Right = np.zeros((T + 1, d))
113
114    def forward(self):
115        """
116        Standard forward pass.
117        Compute the hidden layer values (assuming input x/x_rev was previously set)
118        """
119        T = len(self.w)
120        d = int(self.Wxh_Left.shape[0] / 4)
121        # gate indices (assuming the gate ordering in the LSTM weights is i,g,f,o):
122        idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int)  # indices of gates i,f,o together
123        idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d,
124                                                                                                              4 * d)  # indices of gates i,g,f,o separately
125
126        # initialize
127        self.gates_xh_Left = np.zeros((T, 4 * d))
128        self.gates_hh_Left = np.zeros((T, 4 * d))
129        self.gates_pre_Left = np.zeros((T, 4 * d))  # gates pre-activation
130        self.gates_Left = np.zeros((T, 4 * d))  # gates activation
131
132        self.gates_xh_Right = np.zeros((T, 4 * d))
133        self.gates_hh_Right = np.zeros((T, 4 * d))
134        self.gates_pre_Right = np.zeros((T, 4 * d))
135        self.gates_Right = np.zeros((T, 4 * d))
136
137        for t in range(T):
138            self.gates_xh_Left[t] = np.dot(self.Wxh_Left, self.x[t])
139            self.gates_hh_Left[t] = np.dot(self.Whh_Left, self.h_Left[t - 1])
140            self.gates_pre_Left[t] = self.gates_xh_Left[t] + self.gates_hh_Left[t] + self.bhh_Left  # + self.bxh_Left
141            self.gates_Left[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Left[t, idx]))
142            self.gates_Left[t, idx_g] = np.tanh(self.gates_pre_Left[t, idx_g])
143            self.c_Left[t] = self.gates_Left[t, idx_f] * self.c_Left[t - 1] + self.gates_Left[t, idx_i] * \
144                             self.gates_Left[t, idx_g]
145            self.h_Left[t] = self.gates_Left[t, idx_o] * np.tanh(self.c_Left[t])
146
147            self.gates_xh_Right[t] = np.dot(self.Wxh_Right, self.x_rev[t])
148            self.gates_hh_Right[t] = np.dot(self.Whh_Right, self.h_Right[t - 1])
149            self.gates_pre_Right[t] = self.gates_xh_Right[t] + self.gates_hh_Right[
150                t] + self.bhh_Right  # + self.bxh_Right
151            self.gates_Right[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Right[t, idx]))
152            self.gates_Right[t, idx_g] = np.tanh(self.gates_pre_Right[t, idx_g])
153            self.c_Right[t] = self.gates_Right[t, idx_f] * self.c_Right[t - 1] + self.gates_Right[t, idx_i] * \
154                              self.gates_Right[t, idx_g]
155            self.h_Right[t] = self.gates_Right[t, idx_o] * np.tanh(self.c_Right[t])
156
157        self.y_Left = np.dot(self.Why_Left, self.h_Left[T - 1])
158        self.y_Right = np.dot(self.Why_Right, self.h_Right[T - 1])
159        self.s = self.y_Left + self.y_Right
160
161        return self.s.copy()  # prediction scores
162
163    def lrp(self, w, LRP_class, eps=0.001, bias_factor=0.0):
164        """
165        Perform Layer-wise Relevance Propagation (LRP) forward and especially backward pass.
166        Compute the hidden layer relevances by performing LRP for the target class LRP_class
167        (according to the papers:
168            - https://doi.org/10.1371/journal.pone.0130140
169            - https://doi.org/10.18653/v1/W17-5221)
170        :param w: words of prefix
171        :param LRP_class: target activity
172        :param eps: lrp parameter
173        :param bias_factor: lrp parameter
174        :return: relevance scores
175        """
176
177        # forward pass
178        self.set_input(w)
179        self.forward()
180
181        T = len(self.w)
182        d = int(self.Wxh_Left.shape[0] / 4)
183        e = self.E.shape[1]  # previously, e = self.args.dim
184        C = self.Why_Left.shape[0]  # number of classes
185        idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int)  # indices of gates i,f,o together
186        idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d,
187                                                                                                              4 * d)  # indices of gates i,g,f,o separately
188
189        # initialize
190        Rx = np.zeros(self.x.shape)
191        Rx_rev = np.zeros(self.x.shape)
192
193        Rh_Left = np.zeros((T + 1, d))
194        Rc_Left = np.zeros((T + 1, d))
195        Rg_Left = np.zeros((T, d))  # gate g only
196        Rh_Right = np.zeros((T + 1, d))
197        Rc_Right = np.zeros((T + 1, d))
198        Rg_Right = np.zeros((T, d))  # gate g only
199
200        Rout_mask = np.zeros((C))
201        Rout_mask[LRP_class] = 1.0
202
203        # format reminder: lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor)
204        Rh_Left[T - 1] = lrp_linear(self.h_Left[T - 1], self.Why_Left.T, np.zeros((C)), self.s, self.s * Rout_mask,
205                                    2 * d, eps, bias_factor, debug=False)
206        Rh_Right[T - 1] = lrp_linear(self.h_Right[T - 1], self.Why_Right.T, np.zeros((C)), self.s, self.s * Rout_mask,
207                                     2 * d, eps, bias_factor, debug=False)
208
209        for t in reversed(range(T)):
210            Rc_Left[t] += Rh_Left[t]
211            Rc_Left[t - 1] = lrp_linear(self.gates_Left[t, idx_f] * self.c_Left[t - 1], np.identity(d), np.zeros((d)),
212                                        self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False)
213            Rg_Left[t] = lrp_linear(self.gates_Left[t, idx_i] * self.gates_Left[t, idx_g], np.identity(d),
214                                    np.zeros((d)), self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False)
215            Rx[t] = lrp_linear(self.x[t], self.Wxh_Left[idx_g].T, self.bhh_Left[idx_g],  # self.bxh_Left[idx_g] +
216                               self.gates_pre_Left[t, idx_g], Rg_Left[t], d + e, eps, bias_factor, debug=False)
217            Rh_Left[t - 1] = lrp_linear(self.h_Left[t - 1], self.Whh_Left[idx_g].T,
218                                        self.bhh_Left[idx_g], self.gates_pre_Left[t, idx_g],  # self.bxh_Left[idx_g] +
219                                        Rg_Left[t], d + e, eps, bias_factor, debug=False)
220
221            Rc_Right[t] += Rh_Right[t]
222            Rc_Right[t - 1] = lrp_linear(self.gates_Right[t, idx_f] * self.c_Right[t - 1], np.identity(d),
223                                         np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor,
224                                         debug=False)
225            Rg_Right[t] = lrp_linear(self.gates_Right[t, idx_i] * self.gates_Right[t, idx_g], np.identity(d),
226                                     np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, debug=False)
227            Rx_rev[t] = lrp_linear(self.x_rev[t], self.Wxh_Right[idx_g].T,
228                                   self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g],  # self.bxh_Right[idx_g] +
229                                   Rg_Right[t], d + e, eps, bias_factor, debug=False)
230            Rh_Right[t - 1] = lrp_linear(self.h_Right[t - 1], self.Whh_Right[idx_g].T,
231                                         self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g],
232                                         # self.bxh_Right[idx_g] +
233                                         Rg_Right[t], d + e, eps, bias_factor, debug=False)
234
235        return Rx, Rx_rev[::-1, :], Rh_Left[-1].sum() + Rc_Left[-1].sum() + Rh_Right[-1].sum() + Rc_Right[-1].sum()
LSTM_bidi(model, input_encoded)
54    def __init__(self, model, input_encoded):
55        """
56        Load trained model from file.
57        """
58
59        # self.args = CONF
60
61        # input (i.e., one-hot encoded activity + one-hot encoded data attributes)
62        self.E = input_encoded
63
64        # model weights
65        self.model = model
66
67        """
68        Assumptions:
69        - bias bxh_left and bxh_right is not stored by Keras
70        - bias of output layer is also set to 0
71        """
72
73        # LSTM left encoder
74        self.Wxh_Left = model.layers[1].get_weights()[0].T  # shape 4d*e // kernel left lstm layer // d = neurons
75        # self.bxh_Left = model["bxh_Left"]  # shape 4d; not in Keras
76        self.Whh_Left = model.layers[1].get_weights()[1].T  # shape 4d*d // recurrent kernel left lstm layer
77        self.bhh_Left = model.layers[1].get_weights()[2].T  # shape 4d // biases left lstm layer
78
79        # LSTM right encoder
80        self.Wxh_Right = model.layers[1].get_weights()[3].T  # shape 4d*e // kernel right lstm layer
81        # self.bxh_Right = model["bxh_Right"]; not in Keras
82        self.Whh_Right = model.layers[1].get_weights()[4].T  # shape 4d*d // recurrent kernel right lstm layer
83        self.bhh_Right = model.layers[1].get_weights()[5].T  # shape 4d // biases right lstm layer
84
85        # linear output layer Note: Keras does not provide two output weight vector of the bi-lslm cell;
86        # therefore, we divided the vector in two equal parts
87        self.Why_Left = model.layers[2].get_weights()[0].T  # shape C*d
88        self.Why_Left = self.Why_Left[:, 0:100]
89        self.Why_Right = model.layers[2].get_weights()[0].T  # shape C*d
90        self.Why_Right = self.Why_Right[:, 100:200]

Load trained model from file.

E
model

Assumptions:

  • bias bxh_left and bxh_right is not stored by Keras
  • bias of output layer is also set to 0
Wxh_Left
Whh_Left
bhh_Left
Wxh_Right
Whh_Right
bhh_Right
Why_Left
Why_Right
def set_input(self, w, delete_pos=None):
 92    def set_input(self, w, delete_pos=None):
 93        """
 94        Build the numerical input sequence x/x_rev from the word indices w (+ initialize hidden layers h, c).
 95        Optionally delete words at positions delete_pos.
 96        """
 97        T = len(w)  # prefix length
 98        d = int(self.Wxh_Left.shape[0] / 4)  # hidden layer dimensions
 99        e = self.E.shape[1]  # one-hot dimensions; previous, e = self.args.dim
100        x = self.E  # encoded input
101
102        if delete_pos is not None:
103            x[delete_pos, :] = np.zeros((len(delete_pos), e))
104
105        self.w = w
106        self.x = x
107        self.x_rev = x[::-1, :].copy()
108
109        self.h_Left = np.zeros((T + 1, d))
110        self.c_Left = np.zeros((T + 1, d))
111        self.h_Right = np.zeros((T + 1, d))
112        self.c_Right = np.zeros((T + 1, d))

Build the numerical input sequence x/x_rev from the word indices w (+ initialize hidden layers h, c). Optionally delete words at positions delete_pos.

def forward(self):
114    def forward(self):
115        """
116        Standard forward pass.
117        Compute the hidden layer values (assuming input x/x_rev was previously set)
118        """
119        T = len(self.w)
120        d = int(self.Wxh_Left.shape[0] / 4)
121        # gate indices (assuming the gate ordering in the LSTM weights is i,g,f,o):
122        idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int)  # indices of gates i,f,o together
123        idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d,
124                                                                                                              4 * d)  # indices of gates i,g,f,o separately
125
126        # initialize
127        self.gates_xh_Left = np.zeros((T, 4 * d))
128        self.gates_hh_Left = np.zeros((T, 4 * d))
129        self.gates_pre_Left = np.zeros((T, 4 * d))  # gates pre-activation
130        self.gates_Left = np.zeros((T, 4 * d))  # gates activation
131
132        self.gates_xh_Right = np.zeros((T, 4 * d))
133        self.gates_hh_Right = np.zeros((T, 4 * d))
134        self.gates_pre_Right = np.zeros((T, 4 * d))
135        self.gates_Right = np.zeros((T, 4 * d))
136
137        for t in range(T):
138            self.gates_xh_Left[t] = np.dot(self.Wxh_Left, self.x[t])
139            self.gates_hh_Left[t] = np.dot(self.Whh_Left, self.h_Left[t - 1])
140            self.gates_pre_Left[t] = self.gates_xh_Left[t] + self.gates_hh_Left[t] + self.bhh_Left  # + self.bxh_Left
141            self.gates_Left[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Left[t, idx]))
142            self.gates_Left[t, idx_g] = np.tanh(self.gates_pre_Left[t, idx_g])
143            self.c_Left[t] = self.gates_Left[t, idx_f] * self.c_Left[t - 1] + self.gates_Left[t, idx_i] * \
144                             self.gates_Left[t, idx_g]
145            self.h_Left[t] = self.gates_Left[t, idx_o] * np.tanh(self.c_Left[t])
146
147            self.gates_xh_Right[t] = np.dot(self.Wxh_Right, self.x_rev[t])
148            self.gates_hh_Right[t] = np.dot(self.Whh_Right, self.h_Right[t - 1])
149            self.gates_pre_Right[t] = self.gates_xh_Right[t] + self.gates_hh_Right[
150                t] + self.bhh_Right  # + self.bxh_Right
151            self.gates_Right[t, idx] = 1.0 / (1.0 + np.exp(- self.gates_pre_Right[t, idx]))
152            self.gates_Right[t, idx_g] = np.tanh(self.gates_pre_Right[t, idx_g])
153            self.c_Right[t] = self.gates_Right[t, idx_f] * self.c_Right[t - 1] + self.gates_Right[t, idx_i] * \
154                              self.gates_Right[t, idx_g]
155            self.h_Right[t] = self.gates_Right[t, idx_o] * np.tanh(self.c_Right[t])
156
157        self.y_Left = np.dot(self.Why_Left, self.h_Left[T - 1])
158        self.y_Right = np.dot(self.Why_Right, self.h_Right[T - 1])
159        self.s = self.y_Left + self.y_Right
160
161        return self.s.copy()  # prediction scores

Standard forward pass. Compute the hidden layer values (assuming input x/x_rev was previously set)

def lrp(self, w, LRP_class, eps=0.001, bias_factor=0.0):
163    def lrp(self, w, LRP_class, eps=0.001, bias_factor=0.0):
164        """
165        Perform Layer-wise Relevance Propagation (LRP) forward and especially backward pass.
166        Compute the hidden layer relevances by performing LRP for the target class LRP_class
167        (according to the papers:
168            - https://doi.org/10.1371/journal.pone.0130140
169            - https://doi.org/10.18653/v1/W17-5221)
170        :param w: words of prefix
171        :param LRP_class: target activity
172        :param eps: lrp parameter
173        :param bias_factor: lrp parameter
174        :return: relevance scores
175        """
176
177        # forward pass
178        self.set_input(w)
179        self.forward()
180
181        T = len(self.w)
182        d = int(self.Wxh_Left.shape[0] / 4)
183        e = self.E.shape[1]  # previously, e = self.args.dim
184        C = self.Why_Left.shape[0]  # number of classes
185        idx = np.hstack((np.arange(0, d), np.arange(2 * d, 4 * d))).astype(int)  # indices of gates i,f,o together
186        idx_i, idx_g, idx_f, idx_o = np.arange(0, d), np.arange(d, 2 * d), np.arange(2 * d, 3 * d), np.arange(3 * d,
187                                                                                                              4 * d)  # indices of gates i,g,f,o separately
188
189        # initialize
190        Rx = np.zeros(self.x.shape)
191        Rx_rev = np.zeros(self.x.shape)
192
193        Rh_Left = np.zeros((T + 1, d))
194        Rc_Left = np.zeros((T + 1, d))
195        Rg_Left = np.zeros((T, d))  # gate g only
196        Rh_Right = np.zeros((T + 1, d))
197        Rc_Right = np.zeros((T + 1, d))
198        Rg_Right = np.zeros((T, d))  # gate g only
199
200        Rout_mask = np.zeros((C))
201        Rout_mask[LRP_class] = 1.0
202
203        # format reminder: lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor)
204        Rh_Left[T - 1] = lrp_linear(self.h_Left[T - 1], self.Why_Left.T, np.zeros((C)), self.s, self.s * Rout_mask,
205                                    2 * d, eps, bias_factor, debug=False)
206        Rh_Right[T - 1] = lrp_linear(self.h_Right[T - 1], self.Why_Right.T, np.zeros((C)), self.s, self.s * Rout_mask,
207                                     2 * d, eps, bias_factor, debug=False)
208
209        for t in reversed(range(T)):
210            Rc_Left[t] += Rh_Left[t]
211            Rc_Left[t - 1] = lrp_linear(self.gates_Left[t, idx_f] * self.c_Left[t - 1], np.identity(d), np.zeros((d)),
212                                        self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False)
213            Rg_Left[t] = lrp_linear(self.gates_Left[t, idx_i] * self.gates_Left[t, idx_g], np.identity(d),
214                                    np.zeros((d)), self.c_Left[t], Rc_Left[t], 2 * d, eps, bias_factor, debug=False)
215            Rx[t] = lrp_linear(self.x[t], self.Wxh_Left[idx_g].T, self.bhh_Left[idx_g],  # self.bxh_Left[idx_g] +
216                               self.gates_pre_Left[t, idx_g], Rg_Left[t], d + e, eps, bias_factor, debug=False)
217            Rh_Left[t - 1] = lrp_linear(self.h_Left[t - 1], self.Whh_Left[idx_g].T,
218                                        self.bhh_Left[idx_g], self.gates_pre_Left[t, idx_g],  # self.bxh_Left[idx_g] +
219                                        Rg_Left[t], d + e, eps, bias_factor, debug=False)
220
221            Rc_Right[t] += Rh_Right[t]
222            Rc_Right[t - 1] = lrp_linear(self.gates_Right[t, idx_f] * self.c_Right[t - 1], np.identity(d),
223                                         np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor,
224                                         debug=False)
225            Rg_Right[t] = lrp_linear(self.gates_Right[t, idx_i] * self.gates_Right[t, idx_g], np.identity(d),
226                                     np.zeros((d)), self.c_Right[t], Rc_Right[t], 2 * d, eps, bias_factor, debug=False)
227            Rx_rev[t] = lrp_linear(self.x_rev[t], self.Wxh_Right[idx_g].T,
228                                   self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g],  # self.bxh_Right[idx_g] +
229                                   Rg_Right[t], d + e, eps, bias_factor, debug=False)
230            Rh_Right[t - 1] = lrp_linear(self.h_Right[t - 1], self.Whh_Right[idx_g].T,
231                                         self.bhh_Right[idx_g], self.gates_pre_Right[t, idx_g],
232                                         # self.bxh_Right[idx_g] +
233                                         Rg_Right[t], d + e, eps, bias_factor, debug=False)
234
235        return Rx, Rx_rev[::-1, :], Rh_Left[-1].sum() + Rc_Left[-1].sum() + Rh_Right[-1].sum() + Rc_Right[-1].sum()

Perform Layer-wise Relevance Propagation (LRP) forward and especially backward pass. Compute the hidden layer relevances by performing LRP for the target class LRP_class (according to the papers: - https://doi.org/10.1371/journal.pone.0130140 - https://doi.org/10.18653/v1/W17-5221)

Parameters
  • w: words of prefix
  • LRP_class: target activity
  • eps: lrp parameter
  • bias_factor: lrp parameter
Returns

relevance scores

def lrp_linear( hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor=0.0, debug=False):
238def lrp_linear(hin, w, b, hout, Rout, bias_nb_units, eps, bias_factor=0.0, debug=False):
239    """
240    LRP for a linear layer with input dim D and output dim M.
241    Args:
242    - hin:            forward pass input, of shape (D,)
243    - w:              connection weights, of shape (D, M)
244    - b:              biases, of shape (M,)
245    - hout:           forward pass output, of shape (M,) (unequal to np.dot(w.T,hin)+b if more than one incoming layer!)
246    - Rout:           relevance at layer output, of shape (M,)
247    - bias_nb_units:  total number of connected lower-layer units (onto which the bias/stabilizer contribution is redistributed for sanity check)
248    - eps:            stabilizer (small positive number)
249    - bias_factor:    set to 1.0 to check global relevance conservation, otherwise use 0.0 to ignore bias/stabilizer redistribution (recommended)
250    Returns:
251    - Rin:            relevance at layer input, of shape (D,)
252    """
253    sign_out = np.where(hout[na, :] >= 0, 1., -1.)  # shape (1, M)
254
255    numer = (w * hin[:, na]) + (bias_factor * (b[na, :] * 1. + eps * sign_out * 1.) / bias_nb_units)  # shape (D, M)
256    # Note: here we multiply the bias_factor with both the bias b and the stabilizer eps since in fact
257    # using the term (b[na,:]*1. + eps*sign_out*1.) / bias_nb_units in the numerator is only useful for sanity check
258    # (in the initial paper version we were using (bias_factor*b[na,:]*1. + eps*sign_out*1.) / bias_nb_units instead)
259
260    denom = hout[na, :] + (eps * sign_out * 1.)  # shape (1, M)
261
262    message = (numer / denom) * Rout[na, :]  # shape (D, M)
263
264    Rin = message.sum(axis=1)  # shape (D,)
265
266    if debug:
267        print("local diff: ", Rout.sum() - Rin.sum())
268    # Note: - local layer relevance conservation if bias_factor==1.0 and bias_nb_units==D (i.e. when only one
269    # incoming layer) - global network relevance conservation if bias_factor==1.0 and bias_nb_units set accordingly
270    # to the total number of lower-layer connections -> can be used for sanity check
271
272    return Rin

LRP for a linear layer with input dim D and output dim M. Args:

  • hin: forward pass input, of shape (D,)
  • w: connection weights, of shape (D, M)
  • b: biases, of shape (M,)
  • hout: forward pass output, of shape (M,) (unequal to np.dot(w.T,hin)+b if more than one incoming layer!)
  • Rout: relevance at layer output, of shape (M,)
  • bias_nb_units: total number of connected lower-layer units (onto which the bias/stabilizer contribution is redistributed for sanity check)
  • eps: stabilizer (small positive number)
  • bias_factor: set to 1.0 to check global relevance conservation, otherwise use 0.0 to ignore bias/stabilizer redistribution (recommended) Returns:
  • Rin: relevance at layer input, of shape (D,)