Upload tokenizer.json
Browse files- tokenizer.json +104 -2
tokenizer.json
CHANGED
|
@@ -3486,8 +3486,110 @@
|
|
| 3486 |
"tch": 2348,
|
| 3487 |
"sch": 2349,
|
| 3488 |
"🙊": 2350,
|
| 3489 |
-
"🤭": 2351
|
| 3490 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3491 |
"merges": [
|
| 3492 |
"t h",
|
| 3493 |
"i n",
|
|
|
|
| 3486 |
"tch": 2348,
|
| 3487 |
"sch": 2349,
|
| 3488 |
"🙊": 2350,
|
| 3489 |
+
"🤭": 2351,
|
| 3490 |
+
"€": 2352,
|
| 3491 |
+
"أ": 2353,
|
| 3492 |
+
"إ": 2354,
|
| 3493 |
+
"ئ": 2355,
|
| 3494 |
+
"آ": 2356,
|
| 3495 |
+
"ؤ": 2357,
|
| 3496 |
+
"ﻻ": 2358,
|
| 3497 |
+
"ﺃ": 2359,
|
| 3498 |
+
"ę": 2360,
|
| 3499 |
+
"ą": 2361,
|
| 3500 |
+
"ż": 2362,
|
| 3501 |
+
"ś": 2363,
|
| 3502 |
+
"ć": 2364,
|
| 3503 |
+
"ń": 2365,
|
| 3504 |
+
"ź": 2366,
|
| 3505 |
+
"Ś": 2367,
|
| 3506 |
+
"Ź": 2368,
|
| 3507 |
+
"Ż": 2369,
|
| 3508 |
+
"Ć": 2370,
|
| 3509 |
+
"Š": 2371,
|
| 3510 |
+
"Ő": 2372,
|
| 3511 |
+
"й": 2373,
|
| 3512 |
+
"ё": 2374,
|
| 3513 |
+
"Й": 2375,
|
| 3514 |
+
"Ё": 2376,
|
| 3515 |
+
"が": 2377,
|
| 3516 |
+
"で": 2378,
|
| 3517 |
+
"じ": 2379,
|
| 3518 |
+
"だ": 2380,
|
| 3519 |
+
"ど": 2381,
|
| 3520 |
+
"ば": 2382,
|
| 3521 |
+
"げ": 2383,
|
| 3522 |
+
"ご": 2384,
|
| 3523 |
+
"ぶ": 2385,
|
| 3524 |
+
"ぎ": 2386,
|
| 3525 |
+
",": 2387,
|
| 3526 |
+
"(": 2388,
|
| 3527 |
+
":": 2389,
|
| 3528 |
+
";": 2390,
|
| 3529 |
+
"?": 2391,
|
| 3530 |
+
"!": 2392,
|
| 3531 |
+
"#": 2393,
|
| 3532 |
+
" )": 2394,
|
| 3533 |
+
"ά": 2395,
|
| 3534 |
+
"ό": 2396,
|
| 3535 |
+
"ί": 2397,
|
| 3536 |
+
"έ": 2398,
|
| 3537 |
+
"ή": 2399,
|
| 3538 |
+
"ύ": 2400,
|
| 3539 |
+
"ώ": 2401,
|
| 3540 |
+
"Έ": 2402,
|
| 3541 |
+
"Ό": 2403,
|
| 3542 |
+
"Ή": 2404,
|
| 3543 |
+
"ž": 2405,
|
| 3544 |
+
"š": 2406,
|
| 3545 |
+
"ū": 2407,
|
| 3546 |
+
"ş": 2408,
|
| 3547 |
+
"Ō": 2409,
|
| 3548 |
+
"ī": 2410,
|
| 3549 |
+
"č": 2411,
|
| 3550 |
+
"ř": 2412,
|
| 3551 |
+
"ă": 2413,
|
| 3552 |
+
"이": 2414,
|
| 3553 |
+
"기": 2415,
|
| 3554 |
+
"요": 2416,
|
| 3555 |
+
"에": 2417,
|
| 3556 |
+
"다": 2418,
|
| 3557 |
+
"을": 2419,
|
| 3558 |
+
"은": 2420,
|
| 3559 |
+
"서": 2421,
|
| 3560 |
+
"니": 2422,
|
| 3561 |
+
"어": 2423,
|
| 3562 |
+
"ě": 2424,
|
| 3563 |
+
"ů": 2425,
|
| 3564 |
+
"Č": 2426,
|
| 3565 |
+
"ň": 2427,
|
| 3566 |
+
"ď": 2428,
|
| 3567 |
+
"ť": 2429,
|
| 3568 |
+
"♭": 2430,
|
| 3569 |
+
"ľ": 2431,
|
| 3570 |
+
"ĺ": 2432,
|
| 3571 |
+
"ğ": 2433,
|
| 3572 |
+
"İ": 2434,
|
| 3573 |
+
"Ş": 2435,
|
| 3574 |
+
"ड़": 2436,
|
| 3575 |
+
"ढ़": 2437,
|
| 3576 |
+
"ज़": 2438,
|
| 3577 |
+
"फ़": 2439,
|
| 3578 |
+
"ख़": 2440,
|
| 3579 |
+
"क़": 2441,
|
| 3580 |
+
"ग़": 2442,
|
| 3581 |
+
"Ά": 2443,
|
| 3582 |
+
"ϊ": 2444,
|
| 3583 |
+
"Ί": 2445,
|
| 3584 |
+
"Ύ": 2446,
|
| 3585 |
+
"Ώ": 2447,
|
| 3586 |
+
"ΐ": 2448,
|
| 3587 |
+
"ϋ": 2449,
|
| 3588 |
+
"ũ": 2450,
|
| 3589 |
+
"ụ": 2451,
|
| 3590 |
+
"ọ": 2452,
|
| 3591 |
+
"ạ": 2453
|
| 3592 |
+
},
|
| 3593 |
"merges": [
|
| 3594 |
"t h",
|
| 3595 |
"i n",
|