Update app.py
Browse files
app.py
CHANGED
|
@@ -161,8 +161,9 @@ def tokenize_text(text, chosen_model, better_tokenization=False):
|
|
| 161 |
|
| 162 |
return gr.HighlightedText(output, color_map)
|
| 163 |
|
| 164 |
-
leaderboard_description = """The
|
| 165 |
-
dataset
|
|
|
|
| 166 |
"""
|
| 167 |
|
| 168 |
with gr.Blocks() as demo:
|
|
|
|
| 161 |
|
| 162 |
return gr.HighlightedText(output, color_map)
|
| 163 |
|
| 164 |
+
leaderboard_description = """The `Total Number of Tokens` in this leaderboard is based on the total number of tokens summed on the Arabic section of [rasaif-translations](https://huggingface.co/datasets/MohamedRashad/rasaif-translations) dataset.
|
| 165 |
+
This dataset was chosen because it represents Arabic Fusha text in a small and consentrated manner.
|
| 166 |
+
A tokenizer that scores high in this leaderboard will be efficient in parsing Arabic in its different dialects and forms.
|
| 167 |
"""
|
| 168 |
|
| 169 |
with gr.Blocks() as demo:
|