Spaces:
Running
Running
Commit
·
53fe897
1
Parent(s):
3833563
Unify dataset checking, show header dataset
Browse files- app_text_classification.py +40 -22
- text_classification_ui_helpers.py +59 -21
app_text_classification.py
CHANGED
|
@@ -2,14 +2,16 @@ import uuid
|
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
|
| 5 |
-
from io_utils import
|
| 6 |
-
from text_classification_ui_helpers import (
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
|
| 14 |
|
| 15 |
MAX_LABELS = 40
|
|
@@ -38,16 +40,19 @@ def get_demo():
|
|
| 38 |
)
|
| 39 |
|
| 40 |
with gr.Row():
|
| 41 |
-
dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
|
| 42 |
-
dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
|
| 43 |
|
| 44 |
with gr.Row():
|
| 45 |
example_btn = gr.Button(
|
| 46 |
-
"Auto-align Columns & Get Sample Prediction",
|
| 47 |
-
visible=True,
|
| 48 |
variant="primary",
|
| 49 |
-
interactive=False
|
|
|
|
| 50 |
|
|
|
|
|
|
|
| 51 |
with gr.Row():
|
| 52 |
example_input = gr.HTML(visible=False)
|
| 53 |
with gr.Row():
|
|
@@ -103,18 +108,29 @@ def get_demo():
|
|
| 103 |
)
|
| 104 |
|
| 105 |
with gr.Row():
|
| 106 |
-
logs = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
dataset_id_input.change(
|
| 109 |
-
|
| 110 |
inputs=[dataset_id_input],
|
| 111 |
-
outputs=[dataset_config_input],
|
| 112 |
)
|
| 113 |
|
| 114 |
dataset_config_input.change(
|
| 115 |
-
|
| 116 |
inputs=[dataset_id_input, dataset_config_input],
|
| 117 |
-
outputs=[dataset_split_input],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
)
|
| 119 |
|
| 120 |
scanners.change(write_scanners, inputs=[scanners, uid_label])
|
|
@@ -155,15 +171,17 @@ def get_demo():
|
|
| 155 |
model_id_input.change,
|
| 156 |
dataset_id_input.change,
|
| 157 |
dataset_config_input.change,
|
| 158 |
-
dataset_split_input.change
|
|
|
|
| 159 |
fn=precheck_model_ds_enable_example_btn,
|
| 160 |
inputs=[
|
| 161 |
model_id_input,
|
| 162 |
dataset_id_input,
|
| 163 |
dataset_config_input,
|
| 164 |
dataset_split_input,
|
| 165 |
-
],
|
| 166 |
-
outputs=[example_btn]
|
|
|
|
| 167 |
|
| 168 |
gr.on(
|
| 169 |
triggers=[
|
|
@@ -222,6 +240,6 @@ def get_demo():
|
|
| 222 |
gr.on(
|
| 223 |
triggers=[label.input for label in column_mappings],
|
| 224 |
fn=enable_run_btn,
|
| 225 |
-
inputs=None,
|
| 226 |
outputs=[run_btn],
|
| 227 |
)
|
|
|
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
|
| 5 |
+
from io_utils import get_logs_file, read_scanners, write_scanners
|
| 6 |
+
from text_classification_ui_helpers import (
|
| 7 |
+
align_columns_and_show_prediction,
|
| 8 |
+
check_dataset,
|
| 9 |
+
deselect_run_inference,
|
| 10 |
+
precheck_model_ds_enable_example_btn,
|
| 11 |
+
select_run_mode,
|
| 12 |
+
try_submit,
|
| 13 |
+
write_column_mapping_to_config,
|
| 14 |
+
)
|
| 15 |
from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
|
| 16 |
|
| 17 |
MAX_LABELS = 40
|
|
|
|
| 40 |
)
|
| 41 |
|
| 42 |
with gr.Row():
|
| 43 |
+
dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False, allow_custom_value=True)
|
| 44 |
+
dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False, allow_custom_value=True)
|
| 45 |
|
| 46 |
with gr.Row():
|
| 47 |
example_btn = gr.Button(
|
| 48 |
+
"Auto-align Columns & Get Sample Prediction",
|
| 49 |
+
visible=True,
|
| 50 |
variant="primary",
|
| 51 |
+
interactive=False,
|
| 52 |
+
)
|
| 53 |
|
| 54 |
+
with gr.Row():
|
| 55 |
+
first_line_ds = gr.DataFrame(label="Dataset preview", visible=False)
|
| 56 |
with gr.Row():
|
| 57 |
example_input = gr.HTML(visible=False)
|
| 58 |
with gr.Row():
|
|
|
|
| 108 |
)
|
| 109 |
|
| 110 |
with gr.Row():
|
| 111 |
+
logs = gr.Textbox(
|
| 112 |
+
value=get_logs_file,
|
| 113 |
+
label="Giskard Bot Evaluation Log:",
|
| 114 |
+
visible=False,
|
| 115 |
+
every=0.5,
|
| 116 |
+
)
|
| 117 |
|
| 118 |
dataset_id_input.change(
|
| 119 |
+
check_dataset,
|
| 120 |
inputs=[dataset_id_input],
|
| 121 |
+
outputs=[dataset_config_input, dataset_split_input, first_line_ds],
|
| 122 |
)
|
| 123 |
|
| 124 |
dataset_config_input.change(
|
| 125 |
+
check_dataset,
|
| 126 |
inputs=[dataset_id_input, dataset_config_input],
|
| 127 |
+
outputs=[dataset_config_input, dataset_split_input, first_line_ds],
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
dataset_split_input.change(
|
| 131 |
+
check_dataset,
|
| 132 |
+
inputs=[dataset_id_input, dataset_config_input, dataset_split_input],
|
| 133 |
+
outputs=[dataset_config_input, dataset_split_input, first_line_ds],
|
| 134 |
)
|
| 135 |
|
| 136 |
scanners.change(write_scanners, inputs=[scanners, uid_label])
|
|
|
|
| 171 |
model_id_input.change,
|
| 172 |
dataset_id_input.change,
|
| 173 |
dataset_config_input.change,
|
| 174 |
+
dataset_split_input.change,
|
| 175 |
+
],
|
| 176 |
fn=precheck_model_ds_enable_example_btn,
|
| 177 |
inputs=[
|
| 178 |
model_id_input,
|
| 179 |
dataset_id_input,
|
| 180 |
dataset_config_input,
|
| 181 |
dataset_split_input,
|
| 182 |
+
],
|
| 183 |
+
outputs=[example_btn],
|
| 184 |
+
)
|
| 185 |
|
| 186 |
gr.on(
|
| 187 |
triggers=[
|
|
|
|
| 240 |
gr.on(
|
| 241 |
triggers=[label.input for label in column_mappings],
|
| 242 |
fn=enable_run_btn,
|
| 243 |
+
inputs=None, # FIXME
|
| 244 |
outputs=[run_btn],
|
| 245 |
)
|
text_classification_ui_helpers.py
CHANGED
|
@@ -7,15 +7,27 @@ import uuid
|
|
| 7 |
|
| 8 |
import datasets
|
| 9 |
import gradio as gr
|
|
|
|
| 10 |
from transformers.pipelines import TextClassificationPipeline
|
| 11 |
|
| 12 |
-
from io_utils import (
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
MAX_LABELS = 40
|
| 21 |
MAX_FEATURES = 20
|
|
@@ -32,24 +44,50 @@ HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"
|
|
| 32 |
LEADERBOARD = "giskard-bot/evaluator-leaderboard"
|
| 33 |
|
| 34 |
|
| 35 |
-
|
| 36 |
-
try:
|
| 37 |
-
# write_column_mapping(None, uid) # reset column mapping
|
| 38 |
-
configs = datasets.get_dataset_config_names(dataset_id)
|
| 39 |
-
return gr.Dropdown(configs, value=configs[0], visible=True)
|
| 40 |
-
except Exception:
|
| 41 |
-
# Dataset may not exist
|
| 42 |
-
pass
|
| 43 |
|
| 44 |
|
| 45 |
-
def
|
|
|
|
|
|
|
|
|
|
| 46 |
try:
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
# Dataset may not exist
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
def select_run_mode(run_inf):
|
|
|
|
| 7 |
|
| 8 |
import datasets
|
| 9 |
import gradio as gr
|
| 10 |
+
import pandas as pd
|
| 11 |
from transformers.pipelines import TextClassificationPipeline
|
| 12 |
|
| 13 |
+
from io_utils import (
|
| 14 |
+
get_yaml_path,
|
| 15 |
+
read_column_mapping,
|
| 16 |
+
save_job_to_pipe,
|
| 17 |
+
write_column_mapping,
|
| 18 |
+
write_log_to_user_file,
|
| 19 |
+
)
|
| 20 |
+
from text_classification import (
|
| 21 |
+
check_model,
|
| 22 |
+
get_example_prediction,
|
| 23 |
+
get_labels_and_features_from_dataset,
|
| 24 |
+
)
|
| 25 |
+
from wordings import (
|
| 26 |
+
CHECK_CONFIG_OR_SPLIT_RAW,
|
| 27 |
+
CONFIRM_MAPPING_DETAILS_FAIL_RAW,
|
| 28 |
+
MAPPING_STYLED_ERROR_WARNING,
|
| 29 |
+
get_styled_input,
|
| 30 |
+
)
|
| 31 |
|
| 32 |
MAX_LABELS = 40
|
| 33 |
MAX_FEATURES = 20
|
|
|
|
| 44 |
LEADERBOARD = "giskard-bot/evaluator-leaderboard"
|
| 45 |
|
| 46 |
|
| 47 |
+
logger = logging.getLogger(__file__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
+
def check_dataset(dataset_id, dataset_config=None, dataset_split=None):
|
| 51 |
+
configs = ["default"]
|
| 52 |
+
splits = ["default"]
|
| 53 |
+
logger.info(f"Loading {dataset_id}, {dataset_config}, {dataset_split}")
|
| 54 |
try:
|
| 55 |
+
configs = datasets.get_dataset_config_names(dataset_id)
|
| 56 |
+
splits = list(
|
| 57 |
+
datasets.load_dataset(
|
| 58 |
+
dataset_id, configs[0] if not dataset_config else dataset_config
|
| 59 |
+
).keys()
|
| 60 |
+
)
|
| 61 |
+
if dataset_config == None:
|
| 62 |
+
dataset_config = configs[0]
|
| 63 |
+
dataset_split = splits[0]
|
| 64 |
+
elif dataset_split == None:
|
| 65 |
+
dataset_split = splits[0]
|
| 66 |
+
except Exception as e:
|
| 67 |
# Dataset may not exist
|
| 68 |
+
logger.warn(
|
| 69 |
+
f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
|
| 70 |
+
)
|
| 71 |
+
if dataset_config == None:
|
| 72 |
+
return (
|
| 73 |
+
gr.Dropdown(configs, value=configs[0], visible=True),
|
| 74 |
+
gr.Dropdown(splits, value=splits[0], visible=True),
|
| 75 |
+
gr.DataFrame(pd.DataFrame(), visible=False),
|
| 76 |
+
)
|
| 77 |
+
elif dataset_split == None:
|
| 78 |
+
return (
|
| 79 |
+
gr.Dropdown(configs, value=dataset_config, visible=True),
|
| 80 |
+
gr.Dropdown(splits, value=splits[0], visible=True),
|
| 81 |
+
gr.DataFrame(pd.DataFrame(), visible=False),
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
dataset_dict = datasets.load_dataset(dataset_id, dataset_config)
|
| 85 |
+
dataframe: pd.DataFrame = dataset_dict[dataset_split].to_pandas().head(5)
|
| 86 |
+
return (
|
| 87 |
+
gr.Dropdown(configs, value=dataset_config, visible=True),
|
| 88 |
+
gr.Dropdown(splits, value=dataset_split, visible=True),
|
| 89 |
+
gr.DataFrame(dataframe, visible=True),
|
| 90 |
+
)
|
| 91 |
|
| 92 |
|
| 93 |
def select_run_mode(run_inf):
|