Sunday, May 29, 2022

Implementing Gradient Explanations for a HuggingFace Text Classification Model


def get_gradients(text, model, tokenizer):

def get_correct_span_mask(correct_index, token_size):
span_mask = np.zeros((1, token_size))
span_mask[0, correct_index] = 1
span_mask = tf.constant(span_mask, dtype='float32')
return span_mask

embedding_matrix = model.bert.embeddings.weights[0]
encoded_tokens = tokenizer(text, return_tensors="tf")
token_ids = list(encoded_tokens["input_ids"].numpy()[0])
vocab_size = embedding_matrix.get_shape()[0]


token_ids_tensor = tf.constant([token_ids], dtype='int32')
token_ids_tensor_one_hot = tf.one_hot(token_ids_tensor, vocab_size)

with tf.GradientTape(watch_accessed_variables=False) as tape:

tape.watch(token_ids_tensor_one_hot)


inputs_embeds = tf.matmul(token_ids_tensor_one_hot,embedding_matrix)


pred_scores = model({"inputs_embeds": inputs_embeds, "attention_mask": encoded_tokens["attention_mask"] } ).logits
max_class = tf.argmax(pred_scores, axis=1).numpy()[0]


score_mask = get_correct_span_mask(max_class, pred_scores.shape[1])


predict_correct_class = tf.reduce_sum(pred_scores * score_mask )


gradient_non_normalized = tf.norm(
tape.gradient(predict_correct_class, token_ids_tensor_one_hot),axis=2)


gradient_tensor = (
gradient_non_normalized /
tf.reduce_max(gradient_non_normalized)
)
gradients = gradient_tensor[0].numpy().tolist()
token_words = tokenizer.convert_ids_to_tokens(token_ids)

prediction_label= "political" if max_class == 1 else "general"
return gradients, token_words , prediction_label


from Hacker News https://ift.tt/seQYtbG

No comments:

Post a Comment

Note: Only a member of this blog may post a comment.