v5
Browse files- tokenizer-wordlevel.json +329 -1
tokenizer-wordlevel.json
CHANGED
|
@@ -345,7 +345,335 @@
|
|
| 345 |
"Bigl": 196,
|
| 346 |
"dag": 197,
|
| 347 |
"neq": 198,
|
| 348 |
-
"simeq": 199
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
},
|
| 350 |
"unk_token": "[UNK]"
|
| 351 |
}
|
|
|
|
| 345 |
"Bigl": 196,
|
| 346 |
"dag": 197,
|
| 347 |
"neq": 198,
|
| 348 |
+
"simeq": 199,
|
| 349 |
+
"textstyle": 200,
|
| 350 |
+
"circ": 201,
|
| 351 |
+
"bigg": 202,
|
| 352 |
+
"biggl": 203,
|
| 353 |
+
"biggr": 204,
|
| 354 |
+
"oint": 205,
|
| 355 |
+
"longrightarrow": 206,
|
| 356 |
+
"not": 207,
|
| 357 |
+
"boldmath": 208,
|
| 358 |
+
"bigr": 209,
|
| 359 |
+
"ddot": 210,
|
| 360 |
+
"bigl": 211,
|
| 361 |
+
"oplus": 212,
|
| 362 |
+
"put": 213,
|
| 363 |
+
"nonumber": 214,
|
| 364 |
+
"Xi": 215,
|
| 365 |
+
"\\|": 216,
|
| 366 |
+
"le": 217,
|
| 367 |
+
"check": 218,
|
| 368 |
+
"propto": 219,
|
| 369 |
+
"triangle": 220,
|
| 370 |
+
"hline": 221,
|
| 371 |
+
"--": 222,
|
| 372 |
+
"varrho": 223,
|
| 373 |
+
"vdots": 224,
|
| 374 |
+
"ge": 225,
|
| 375 |
+
"imath": 226,
|
| 376 |
+
"Bigg": 227,
|
| 377 |
+
"sp": 228,
|
| 378 |
+
"leftrightarrow": 229,
|
| 379 |
+
"forall": 230,
|
| 380 |
+
"iota": 231,
|
| 381 |
+
"scriptscriptstyle": 232,
|
| 382 |
+
"bot": 233,
|
| 383 |
+
"lbrack": 234,
|
| 384 |
+
"line": 235,
|
| 385 |
+
"parallel": 236,
|
| 386 |
+
"textrm": 237,
|
| 387 |
+
"scriptsize": 238,
|
| 388 |
+
"it": 239,
|
| 389 |
+
"Rightarrow": 240,
|
| 390 |
+
"phantom": 241,
|
| 391 |
+
"mapsto": 242,
|
| 392 |
+
"subset": 243,
|
| 393 |
+
"sf": 244,
|
| 394 |
+
"jmath": 245,
|
| 395 |
+
"binom": 246,
|
| 396 |
+
"Biggr": 247,
|
| 397 |
+
"Biggl": 248,
|
| 398 |
+
"Upsilon": 249,
|
| 399 |
+
"tiny": 250,
|
| 400 |
+
"overrightarrow": 251,
|
| 401 |
+
"wp": 252,
|
| 402 |
+
"scriptstyle": 253,
|
| 403 |
+
"ne": 254,
|
| 404 |
+
"ll": 255,
|
| 405 |
+
"kern": 256,
|
| 406 |
+
"bullet": 257,
|
| 407 |
+
"downarrow": 258,
|
| 408 |
+
"gg": 259,
|
| 409 |
+
"atop": 260,
|
| 410 |
+
"breve": 261,
|
| 411 |
+
"uparrow": 262,
|
| 412 |
+
"cong": 263,
|
| 413 |
+
"vee": 264,
|
| 414 |
+
"bigoplus": 265,
|
| 415 |
+
"Im": 266,
|
| 416 |
+
"small": 267,
|
| 417 |
+
"rbrack": 268,
|
| 418 |
+
"underbrace": 269,
|
| 419 |
+
"makebox": 270,
|
| 420 |
+
"sb": 271,
|
| 421 |
+
"varpi": 272,
|
| 422 |
+
"cap": 273,
|
| 423 |
+
"ddots": 274,
|
| 424 |
+
"mathsf": 275,
|
| 425 |
+
"cup": 276,
|
| 426 |
+
"lbrace": 277,
|
| 427 |
+
"rbrace": 278,
|
| 428 |
+
"slash": 279,
|
| 429 |
+
"upsilon": 280,
|
| 430 |
+
"\\#": 281,
|
| 431 |
+
"Re": 282,
|
| 432 |
+
"Longrightarrow": 283,
|
| 433 |
+
"vspace": 284,
|
| 434 |
+
"acute": 285,
|
| 435 |
+
"mit": 286,
|
| 436 |
+
"rightharpoonup": 287,
|
| 437 |
+
"supset": 288,
|
| 438 |
+
"raisebox": 289,
|
| 439 |
+
"varsigma": 290,
|
| 440 |
+
"Leftrightarrow": 291,
|
| 441 |
+
"noalign": 292,
|
| 442 |
+
"longleftrightarrow": 293,
|
| 443 |
+
"large": 294,
|
| 444 |
+
"circle": 295,
|
| 445 |
+
"bigtriangleup": 296,
|
| 446 |
+
"null": 297,
|
| 447 |
+
"Large": 298,
|
| 448 |
+
"footnotesize": 299,
|
| 449 |
+
"\"": 300,
|
| 450 |
+
"raise": 301,
|
| 451 |
+
"vphantom": 302,
|
| 452 |
+
"leftarrow": 303,
|
| 453 |
+
"protect": 304,
|
| 454 |
+
"Vert": 305,
|
| 455 |
+
"llap": 306,
|
| 456 |
+
"buildrel": 307,
|
| 457 |
+
"Longleftrightarrow": 308,
|
| 458 |
+
"`": 309,
|
| 459 |
+
"enspace": 310,
|
| 460 |
+
"overleftarrow": 311,
|
| 461 |
+
"sl": 312,
|
| 462 |
+
"diamond": 313,
|
| 463 |
+
"hfill": 314,
|
| 464 |
+
"rfloor": 315,
|
| 465 |
+
"ule": 316,
|
| 466 |
+
"bigotimes": 317,
|
| 467 |
+
"doteq": 318,
|
| 468 |
+
"tt": 319,
|
| 469 |
+
"cdotp": 320,
|
| 470 |
+
"textbf": 321,
|
| 471 |
+
"unitlength": 322,
|
| 472 |
+
"emptyset": 323,
|
| 473 |
+
"mm": 324,
|
| 474 |
+
"---": 325,
|
| 475 |
+
"cm": 326,
|
| 476 |
+
"mathop": 327,
|
| 477 |
+
"fbox": 328,
|
| 478 |
+
"ref": 329,
|
| 479 |
+
"aleph": 330,
|
| 480 |
+
"backslash": 331,
|
| 481 |
+
"\\-": 332,
|
| 482 |
+
"label": 333,
|
| 483 |
+
"sharp": 334,
|
| 484 |
+
"longmapsto": 335,
|
| 485 |
+
"overbrace": 336,
|
| 486 |
+
"relax": 337,
|
| 487 |
+
"subseteq": 338,
|
| 488 |
+
"textup": 339,
|
| 489 |
+
"mathit": 340,
|
| 490 |
+
"flat": 341,
|
| 491 |
+
"vskip": 342,
|
| 492 |
+
"bigcup": 343,
|
| 493 |
+
"Object": 344,
|
| 494 |
+
"ni": 345,
|
| 495 |
+
"object": 346,
|
| 496 |
+
"odot": 347,
|
| 497 |
+
"setlength": 348,
|
| 498 |
+
"\\/": 349,
|
| 499 |
+
"colon": 350,
|
| 500 |
+
"strut": 351,
|
| 501 |
+
"thinspace": 352,
|
| 502 |
+
"bigwedge": 353,
|
| 503 |
+
"lfloor": 354,
|
| 504 |
+
"smallskip": 355,
|
| 505 |
+
"pounds": 356,
|
| 506 |
+
"ominus": 357,
|
| 507 |
+
"land": 358,
|
| 508 |
+
"longleftarrow": 359,
|
| 509 |
+
"bmod": 360,
|
| 510 |
+
"\\*": 361,
|
| 511 |
+
"bigtriangledown": 362,
|
| 512 |
+
"medskip": 363,
|
| 513 |
+
"multicolumn": 364,
|
| 514 |
+
"arraystretch": 365,
|
| 515 |
+
"enskip": 366,
|
| 516 |
+
"framebox": 367,
|
| 517 |
+
"hookrightarrow": 368,
|
| 518 |
+
"hrule": 369,
|
| 519 |
+
"parbox": 370,
|
| 520 |
+
"vline": 371,
|
| 521 |
+
"vrule": 372,
|
| 522 |
+
"?": 373,
|
| 523 |
+
"renewcommand": 374,
|
| 524 |
+
"setminus": 375,
|
| 525 |
+
"pt": 376,
|
| 526 |
+
"bigcap": 377,
|
| 527 |
+
"hfil": 378,
|
| 528 |
+
"lower": 379,
|
| 529 |
+
"natural": 380,
|
| 530 |
+
"rlap": 381,
|
| 531 |
+
"diamondsuit": 382,
|
| 532 |
+
"space": 383,
|
| 533 |
+
"textit": 384,
|
| 534 |
+
"vector": 385,
|
| 535 |
+
"ddagger": 386,
|
| 536 |
+
"pmod": 387,
|
| 537 |
+
"texttt": 388,
|
| 538 |
+
"thicklines": 389,
|
| 539 |
+
"top": 390,
|
| 540 |
+
"LARGE": 391,
|
| 541 |
+
"sc": 392,
|
| 542 |
+
"smash": 393,
|
| 543 |
+
"triangleright": 394,
|
| 544 |
+
"Downarrow": 395,
|
| 545 |
+
"\\&": 396,
|
| 546 |
+
"bigcirc": 397,
|
| 547 |
+
"bigm": 398,
|
| 548 |
+
"exists": 399,
|
| 549 |
+
"searrow": 400,
|
| 550 |
+
"surd": 401,
|
| 551 |
+
"vdash": 402,
|
| 552 |
+
"arraycolsep": 403,
|
| 553 |
+
"hphantom": 404,
|
| 554 |
+
"normalsize": 405,
|
| 555 |
+
"oval": 406,
|
| 556 |
+
"special": 407,
|
| 557 |
+
"sqcup": 408,
|
| 558 |
+
"textnormal": 409,
|
| 559 |
+
"14": 410,
|
| 560 |
+
"Huge": 411,
|
| 561 |
+
"\\[": 412,
|
| 562 |
+
"\\]": 413,
|
| 563 |
+
"cite": 414,
|
| 564 |
+
"lefteqn": 415,
|
| 565 |
+
"mathbin": 416,
|
| 566 |
+
"mathrel": 417,
|
| 567 |
+
"mkern": 418,
|
| 568 |
+
"AA": 419,
|
| 569 |
+
"Biggm": 420,
|
| 570 |
+
"\\'": 421,
|
| 571 |
+
"footnote": 422,
|
| 572 |
+
"itshape": 423,
|
| 573 |
+
"lceil": 424,
|
| 574 |
+
"multiput": 425,
|
| 575 |
+
"sqcap": 426,
|
| 576 |
+
"supseteq": 427,
|
| 577 |
+
"textsf": 428,
|
| 578 |
+
"unboldmath": 429,
|
| 579 |
+
"16": 430,
|
| 580 |
+
"@": 431,
|
| 581 |
+
"Bigm": 432,
|
| 582 |
+
"Longleftarrow": 433,
|
| 583 |
+
"\\(": 434,
|
| 584 |
+
"\\)": 435,
|
| 585 |
+
"ae": 436,
|
| 586 |
+
"amalg": 437,
|
| 587 |
+
"asymp": 438,
|
| 588 |
+
"crcr": 439,
|
| 589 |
+
"do": 440,
|
| 590 |
+
"ensuremath": 441,
|
| 591 |
+
"hskip": 442,
|
| 592 |
+
"linethickness": 443,
|
| 593 |
+
"mathclose": 444,
|
| 594 |
+
"mathopen": 445,
|
| 595 |
+
"nulldelimiterspace": 446,
|
| 596 |
+
"ooalign": 447,
|
| 597 |
+
"prec": 448,
|
| 598 |
+
"qbezier": 449,
|
| 599 |
+
"ss": 450,
|
| 600 |
+
"triangleleft": 451,
|
| 601 |
+
"bigskip": 452,
|
| 602 |
+
"bigsqcup": 453,
|
| 603 |
+
"ddag": 454,
|
| 604 |
+
"fboxsep": 455,
|
| 605 |
+
"grave": 456,
|
| 606 |
+
"lgroup": 457,
|
| 607 |
+
"mathord": 458,
|
| 608 |
+
"mathtt": 459,
|
| 609 |
+
"nearrow": 460,
|
| 610 |
+
"notin": 461,
|
| 611 |
+
"oslash": 462,
|
| 612 |
+
"preceq": 463,
|
| 613 |
+
"protectu": 464,
|
| 614 |
+
"rgroup": 465,
|
| 615 |
+
"rightleftharpoons": 466,
|
| 616 |
+
"setcounter": 467,
|
| 617 |
+
"skew": 468,
|
| 618 |
+
"smallint": 469,
|
| 619 |
+
"smile": 470,
|
| 620 |
+
"succ": 471,
|
| 621 |
+
"succeq": 472,
|
| 622 |
+
"swarrow": 473,
|
| 623 |
+
"vcenter": 474,
|
| 624 |
+
"vss": 475,
|
| 625 |
+
"SS": 476,
|
| 626 |
+
"arrowvert": 477,
|
| 627 |
+
"atopwithdelims": 478,
|
| 628 |
+
"cline": 479,
|
| 629 |
+
"em": 480,
|
| 630 |
+
"footnotemark": 481,
|
| 631 |
+
"hss": 482,
|
| 632 |
+
"lq": 483,
|
| 633 |
+
"mathnormal": 484,
|
| 634 |
+
"mathstrut": 485,
|
| 635 |
+
"mathversion": 486,
|
| 636 |
+
"mskip": 487,
|
| 637 |
+
"nolinebreak": 488,
|
| 638 |
+
"ointop": 489,
|
| 639 |
+
"rightarrowfill": 490,
|
| 640 |
+
"symbol": 491,
|
| 641 |
+
"tabcolsep": 492,
|
| 642 |
+
"verb": 493,
|
| 643 |
+
"#": 494,
|
| 644 |
+
"10": 495,
|
| 645 |
+
"20": 496,
|
| 646 |
+
"23": 497,
|
| 647 |
+
"25": 498,
|
| 648 |
+
"\\\"": 499,
|
| 649 |
+
"\\^": 500,
|
| 650 |
+
"biggm": 501,
|
| 651 |
+
"bigvee": 502,
|
| 652 |
+
"brace": 503,
|
| 653 |
+
"brack": 504,
|
| 654 |
+
"coprod": 505,
|
| 655 |
+
"def": 506,
|
| 656 |
+
"dotfill": 507,
|
| 657 |
+
"emph": 508,
|
| 658 |
+
"everymath": 509,
|
| 659 |
+
"expandafter": 510,
|
| 660 |
+
"fill": 511,
|
| 661 |
+
"huge": 512,
|
| 662 |
+
"leavevmode": 513,
|
| 663 |
+
"mathaccent": 514,
|
| 664 |
+
"newcommand": 515,
|
| 665 |
+
"of": 516,
|
| 666 |
+
"overwithdelims": 517,
|
| 667 |
+
"protectE": 518,
|
| 668 |
+
"protectZ": 519,
|
| 669 |
+
"protecte": 520,
|
| 670 |
+
"protectm": 521,
|
| 671 |
+
"rceil": 522,
|
| 672 |
+
"romannumeral": 523,
|
| 673 |
+
"root": 524,
|
| 674 |
+
"scshape": 525,
|
| 675 |
+
"textcircled": 526,
|
| 676 |
+
"uppercase": 527
|
| 677 |
},
|
| 678 |
"unk_token": "[UNK]"
|
| 679 |
}
|