Spaces:
Sleeping
Sleeping
| # """ | |
| # Author: Amir Hossein Kargaran | |
| # Date: August, 2023 | |
| # Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification. | |
| # MIT License | |
| # Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980 | |
| # """ | |
| import gradio as gr | |
| from io import BytesIO | |
| import base64 | |
| from fasttext.FastText import _FastText | |
| import re | |
| import lime.lime_text | |
| import numpy as np | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download | |
| # Load the FastText language identification model from Hugging Face Hub | |
| model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin") | |
| # Create the FastText classifier | |
| classifier = _FastText(model_path) | |
| def remove_label_prefix(item): | |
| """ | |
| Remove label prefix from an item | |
| """ | |
| return item.replace('__label__', '') | |
| def remove_label_prefix_list(input_list): | |
| """ | |
| Remove label prefix from list or list of list | |
| """ | |
| if isinstance(input_list[0], list): | |
| # If the first element is a list, it's a list of lists | |
| return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list] | |
| else: | |
| # Otherwise, it's a simple list | |
| return [remove_label_prefix(item) for item in input_list] | |
| # Get the sorted class names from the classifier | |
| class_names = remove_label_prefix_list(classifier.labels) | |
| class_names = np.sort(class_names) | |
| num_class = len(class_names) | |
| def tokenize_string(string): | |
| """ | |
| Splits the string into words similar to FastText's method. | |
| """ | |
| return string.split() | |
| explainer = lime.lime_text.LimeTextExplainer( | |
| split_expression=tokenize_string, | |
| bow=False, | |
| class_names=class_names | |
| ) | |
| def fasttext_prediction_in_sklearn_format(classifier, texts): | |
| """ | |
| Converts FastText predictions into Scikit-Learn format predictions. | |
| """ | |
| res = [] | |
| labels, probabilities = classifier.predict(texts, num_class) | |
| # Remove label prefix | |
| labels = remove_label_prefix_list(labels) | |
| for label, probs, text in zip(labels, probabilities, texts): | |
| order = np.argsort(np.array(label)) | |
| res.append(probs[order]) | |
| return np.array(res) | |
| def generate_explanation_html(input_sentence): | |
| """ | |
| Generates an explanation HTML file using LIME for the input sentence. | |
| """ | |
| preprocessed_sentence = input_sentence # No need to preprocess anymore | |
| exp = explainer.explain_instance( | |
| preprocessed_sentence, | |
| classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x), | |
| top_labels=2, | |
| num_features=20, | |
| ) | |
| output_html_filename = "explanation.html" | |
| exp.save_to_file(output_html_filename) | |
| return output_html_filename | |
| def download_html_file(html_filename): | |
| """ | |
| Downloads the content of the given HTML file. | |
| """ | |
| with open(html_filename, "rb") as file: | |
| html_content = file.read() | |
| return html_content | |
| input_sentence = gr.inputs.Textbox(label="Input Sentence") # Change the label if needed | |
| output_explanation = gr.outputs.File(label="Download Explanation HTML") | |
| gr.Interface( | |
| fn=generate_explanation_html, | |
| inputs=input_sentence, | |
| outputs=output_explanation, | |
| allow_flagging='never' | |
| ).launch() | |