SymmetricalDataSecurity: Implementing Gradient Explanations for a HuggingFace Text Classification Model

Sunday, May 29, 2022

Implementing Gradient Explanations for a HuggingFace Text Classification Model


def get_gradients(text, model, tokenizer):


  def get_correct_span_mask(correct_index, token_size):
    span_mask = np.zeros((1, token_size))
    span_mask[0, correct_index] = 1
    span_mask = tf.constant(span_mask, dtype='float32')
    return span_mask


  embedding_matrix = model.bert.embeddings.weights[0]
  encoded_tokens = tokenizer(text, return_tensors="tf")
  token_ids = list(encoded_tokens["input_ids"].numpy()[0])
  vocab_size = embedding_matrix.get_shape()[0]


  
  token_ids_tensor = tf.constant([token_ids], dtype='int32')
  token_ids_tensor_one_hot = tf.one_hot(token_ids_tensor, vocab_size)


  with tf.GradientTape(watch_accessed_variables=False) as tape:
    
    tape.watch(token_ids_tensor_one_hot)


    
    inputs_embeds = tf.matmul(token_ids_tensor_one_hot,embedding_matrix)


    
    pred_scores = model({"inputs_embeds": inputs_embeds, "attention_mask": encoded_tokens["attention_mask"] } ).logits
    max_class = tf.argmax(pred_scores, axis=1).numpy()[0]


    
    score_mask = get_correct_span_mask(max_class, pred_scores.shape[1])


    
    predict_correct_class = tf.reduce_sum(pred_scores * score_mask )


    
    gradient_non_normalized = tf.norm(
        tape.gradient(predict_correct_class, token_ids_tensor_one_hot),axis=2)


    
    gradient_tensor = (
        gradient_non_normalized /
        tf.reduce_max(gradient_non_normalized)
    )
    gradients = gradient_tensor[0].numpy().tolist()
    token_words = tokenizer.convert_ids_to_tokens(token_ids)


    prediction_label= "political" if max_class == 1 else "general"
  return gradients, token_words , prediction_label

from Hacker News https://ift.tt/seQYtbG

SymmetricalDataSecurity

Sunday, May 29, 2022

Implementing Gradient Explanations for a HuggingFace Text Classification Model

No comments:

Post a Comment

Blog Archive

Search This Blog

Total Pageviews