kargaranamir HF Staff commited on
Commit
dae968e
·
1 Parent(s): 5253665

Delete app_v0.py

Browse files
Files changed (1) hide show
  1. app_v0.py +0 -113
app_v0.py DELETED
@@ -1,113 +0,0 @@
1
- # """
2
- # Author: Amir Hossein Kargaran
3
- # Date: August, 2023
4
-
5
- # Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.
6
-
7
- # MIT License
8
-
9
- # Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980
10
- # """
11
-
12
- import gradio as gr
13
- from io import BytesIO
14
- import base64
15
- from fasttext.FastText import _FastText
16
- import re
17
- import lime.lime_text
18
- import numpy as np
19
- from pathlib import Path
20
- from huggingface_hub import hf_hub_download
21
-
22
- # Load the FastText language identification model from Hugging Face Hub
23
- model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")
24
-
25
- # Create the FastText classifier
26
- classifier = _FastText(model_path)
27
-
28
- def remove_label_prefix(item):
29
- """
30
- Remove label prefix from an item
31
- """
32
- return item.replace('__label__', '')
33
-
34
- def remove_label_prefix_list(input_list):
35
- """
36
- Remove label prefix from list or list of list
37
- """
38
- if isinstance(input_list[0], list):
39
- # If the first element is a list, it's a list of lists
40
- return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list]
41
- else:
42
- # Otherwise, it's a simple list
43
- return [remove_label_prefix(item) for item in input_list]
44
-
45
-
46
- # Get the sorted class names from the classifier
47
- class_names = remove_label_prefix_list(classifier.labels)
48
- class_names = np.sort(class_names)
49
- num_class = len(class_names)
50
-
51
-
52
- def tokenize_string(string):
53
- """
54
- Splits the string into words similar to FastText's method.
55
- """
56
- return string.split()
57
-
58
- explainer = lime.lime_text.LimeTextExplainer(
59
- split_expression=tokenize_string,
60
- bow=False,
61
- class_names=class_names
62
- )
63
-
64
- def fasttext_prediction_in_sklearn_format(classifier, texts):
65
- """
66
- Converts FastText predictions into Scikit-Learn format predictions.
67
- """
68
- res = []
69
- labels, probabilities = classifier.predict(texts, num_class)
70
-
71
- # Remove label prefix
72
- labels = remove_label_prefix_list(labels)
73
-
74
- for label, probs, text in zip(labels, probabilities, texts):
75
- order = np.argsort(np.array(label))
76
- res.append(probs[order])
77
-
78
- return np.array(res)
79
-
80
- def generate_explanation_html(input_sentence):
81
- """
82
- Generates an explanation HTML file using LIME for the input sentence.
83
- """
84
- preprocessed_sentence = input_sentence # No need to preprocess anymore
85
- exp = explainer.explain_instance(
86
- preprocessed_sentence,
87
- classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
88
- top_labels=2,
89
- num_features=20,
90
- )
91
-
92
- output_html_filename = "explanation.html"
93
- exp.save_to_file(output_html_filename)
94
-
95
- return output_html_filename
96
-
97
- def download_html_file(html_filename):
98
- """
99
- Downloads the content of the given HTML file.
100
- """
101
- with open(html_filename, "rb") as file:
102
- html_content = file.read()
103
- return html_content
104
-
105
- input_sentence = gr.inputs.Textbox(label="Input Sentence") # Change the label if needed
106
- output_explanation = gr.outputs.File(label="Download Explanation HTML")
107
-
108
- gr.Interface(
109
- fn=generate_explanation_html,
110
- inputs=input_sentence,
111
- outputs=output_explanation,
112
- allow_flagging='never'
113
- ).launch()