Kajiura Teruno
commited on
Commit
·
e6418f0
1
Parent(s):
511f535
tokenizer
Browse files- removed.jsonl +0 -0
- vocamaru_log.txt +132 -0
removed.jsonl
ADDED
|
File without changes
|
vocamaru_log.txt
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[パラメータ数] google/mt5-small 300176768
|
| 2 |
+
[新しいモデルの保存先] mt5_emp
|
| 3 |
+
[LOG] 特殊語彙 id=0 type=3 <pad> 0.0
|
| 4 |
+
[LOG] 特殊語彙 id=1 type=3 </s> 0.0
|
| 5 |
+
[LOG] 特殊語彙 id=2 type=2 <unk> 0.0
|
| 6 |
+
[LOG] 特殊語彙 id=250000 type=4 ▁<extra_id_99> 0.0
|
| 7 |
+
[LOG] 特殊語彙 id=250001 type=4 ▁<extra_id_98> 0.0
|
| 8 |
+
[LOG] 特殊語彙 id=250002 type=4 ▁<extra_id_97> 0.0
|
| 9 |
+
[LOG] 特殊語彙 id=250003 type=4 ▁<extra_id_96> 0.0
|
| 10 |
+
[LOG] 特殊語彙 id=250004 type=4 ▁<extra_id_95> 0.0
|
| 11 |
+
[LOG] 特殊語彙 id=250005 type=4 ▁<extra_id_94> 0.0
|
| 12 |
+
[LOG] 特殊語彙 id=250006 type=4 ▁<extra_id_93> 0.0
|
| 13 |
+
[LOG] 特殊語彙 id=250007 type=4 ▁<extra_id_92> 0.0
|
| 14 |
+
[LOG] 特殊語彙 id=250008 type=4 ▁<extra_id_91> 0.0
|
| 15 |
+
[LOG] 特殊語彙 id=250009 type=4 ▁<extra_id_90> 0.0
|
| 16 |
+
[LOG] 特殊語彙 id=250010 type=4 ▁<extra_id_89> 0.0
|
| 17 |
+
[LOG] 特殊語彙 id=250011 type=4 ▁<extra_id_88> 0.0
|
| 18 |
+
[LOG] 特殊語彙 id=250012 type=4 ▁<extra_id_87> 0.0
|
| 19 |
+
[LOG] 特殊語彙 id=250013 type=4 ▁<extra_id_86> 0.0
|
| 20 |
+
[LOG] 特殊語彙 id=250014 type=4 ▁<extra_id_85> 0.0
|
| 21 |
+
[LOG] 特殊語彙 id=250015 type=4 ▁<extra_id_84> 0.0
|
| 22 |
+
[LOG] 特殊語彙 id=250016 type=4 ▁<extra_id_83> 0.0
|
| 23 |
+
[LOG] 特殊語彙 id=250017 type=4 ▁<extra_id_82> 0.0
|
| 24 |
+
[LOG] 特殊語彙 id=250018 type=4 ▁<extra_id_81> 0.0
|
| 25 |
+
[LOG] 特殊語彙 id=250019 type=4 ▁<extra_id_80> 0.0
|
| 26 |
+
[LOG] 特殊語彙 id=250020 type=4 ▁<extra_id_79> 0.0
|
| 27 |
+
[LOG] 特殊語彙 id=250021 type=4 ▁<extra_id_78> 0.0
|
| 28 |
+
[LOG] 特殊語彙 id=250022 type=4 ▁<extra_id_77> 0.0
|
| 29 |
+
[LOG] 特殊語彙 id=250023 type=4 ▁<extra_id_76> 0.0
|
| 30 |
+
[LOG] 特殊語彙 id=250024 type=4 ▁<extra_id_75> 0.0
|
| 31 |
+
[LOG] 特殊語彙 id=250025 type=4 ▁<extra_id_74> 0.0
|
| 32 |
+
[LOG] 特殊語彙 id=250026 type=4 ▁<extra_id_73> 0.0
|
| 33 |
+
[LOG] 特殊語彙 id=250027 type=4 ▁<extra_id_72> 0.0
|
| 34 |
+
[LOG] 特殊語彙 id=250028 type=4 ▁<extra_id_71> 0.0
|
| 35 |
+
[LOG] 特殊語彙 id=250029 type=4 ▁<extra_id_70> 0.0
|
| 36 |
+
[LOG] 特殊語彙 id=250030 type=4 ▁<extra_id_69> 0.0
|
| 37 |
+
[LOG] 特殊語彙 id=250031 type=4 ▁<extra_id_68> 0.0
|
| 38 |
+
[LOG] 特殊語彙 id=250032 type=4 ▁<extra_id_67> 0.0
|
| 39 |
+
[LOG] 特殊語彙 id=250033 type=4 ▁<extra_id_66> 0.0
|
| 40 |
+
[LOG] 特殊語彙 id=250034 type=4 ▁<extra_id_65> 0.0
|
| 41 |
+
[LOG] 特殊語彙 id=250035 type=4 ▁<extra_id_64> 0.0
|
| 42 |
+
[LOG] 特殊語彙 id=250036 type=4 ▁<extra_id_63> 0.0
|
| 43 |
+
[LOG] 特殊語彙 id=250037 type=4 ▁<extra_id_62> 0.0
|
| 44 |
+
[LOG] 特殊語彙 id=250038 type=4 ▁<extra_id_61> 0.0
|
| 45 |
+
[LOG] 特殊語彙 id=250039 type=4 ▁<extra_id_60> 0.0
|
| 46 |
+
[LOG] 特殊語彙 id=250040 type=4 ▁<extra_id_59> 0.0
|
| 47 |
+
[LOG] 特殊語彙 id=250041 type=4 ▁<extra_id_58> 0.0
|
| 48 |
+
[LOG] 特殊語彙 id=250042 type=4 ▁<extra_id_57> 0.0
|
| 49 |
+
[LOG] 特殊語彙 id=250043 type=4 ▁<extra_id_56> 0.0
|
| 50 |
+
[LOG] 特殊語彙 id=250044 type=4 ▁<extra_id_55> 0.0
|
| 51 |
+
[LOG] 特殊語彙 id=250045 type=4 ▁<extra_id_54> 0.0
|
| 52 |
+
[LOG] 特殊語彙 id=250046 type=4 ▁<extra_id_53> 0.0
|
| 53 |
+
[LOG] 特殊語彙 id=250047 type=4 ▁<extra_id_52> 0.0
|
| 54 |
+
[LOG] 特殊語彙 id=250048 type=4 ▁<extra_id_51> 0.0
|
| 55 |
+
[LOG] 特殊語彙 id=250049 type=4 ▁<extra_id_50> 0.0
|
| 56 |
+
[LOG] 特殊語彙 id=250050 type=4 ▁<extra_id_49> 0.0
|
| 57 |
+
[LOG] 特殊語彙 id=250051 type=4 ▁<extra_id_48> 0.0
|
| 58 |
+
[LOG] 特殊語彙 id=250052 type=4 ▁<extra_id_47> 0.0
|
| 59 |
+
[LOG] 特殊語彙 id=250053 type=4 ▁<extra_id_46> 0.0
|
| 60 |
+
[LOG] 特殊語彙 id=250054 type=4 ▁<extra_id_45> 0.0
|
| 61 |
+
[LOG] 特殊語彙 id=250055 type=4 ▁<extra_id_44> 0.0
|
| 62 |
+
[LOG] 特殊語彙 id=250056 type=4 ▁<extra_id_43> 0.0
|
| 63 |
+
[LOG] 特殊語彙 id=250057 type=4 ▁<extra_id_42> 0.0
|
| 64 |
+
[LOG] 特殊語彙 id=250058 type=4 ▁<extra_id_41> 0.0
|
| 65 |
+
[LOG] 特殊語彙 id=250059 type=4 ▁<extra_id_40> 0.0
|
| 66 |
+
[LOG] 特殊語彙 id=250060 type=4 ▁<extra_id_39> 0.0
|
| 67 |
+
[LOG] 特殊語彙 id=250061 type=4 ▁<extra_id_38> 0.0
|
| 68 |
+
[LOG] 特殊語彙 id=250062 type=4 ▁<extra_id_37> 0.0
|
| 69 |
+
[LOG] 特殊語彙 id=250063 type=4 ▁<extra_id_36> 0.0
|
| 70 |
+
[LOG] 特殊語彙 id=250064 type=4 ▁<extra_id_35> 0.0
|
| 71 |
+
[LOG] 特殊語彙 id=250065 type=4 ▁<extra_id_34> 0.0
|
| 72 |
+
[LOG] 特殊語彙 id=250066 type=4 ▁<extra_id_33> 0.0
|
| 73 |
+
[LOG] 特殊語彙 id=250067 type=4 ▁<extra_id_32> 0.0
|
| 74 |
+
[LOG] 特殊語彙 id=250068 type=4 ▁<extra_id_31> 0.0
|
| 75 |
+
[LOG] 特殊語彙 id=250069 type=4 ▁<extra_id_30> 0.0
|
| 76 |
+
[LOG] 特殊語彙 id=250070 type=4 ▁<extra_id_29> 0.0
|
| 77 |
+
[LOG] 特殊語彙 id=250071 type=4 ▁<extra_id_28> 0.0
|
| 78 |
+
[LOG] 特殊語彙 id=250072 type=4 ▁<extra_id_27> 0.0
|
| 79 |
+
[LOG] 特殊語彙 id=250073 type=4 ▁<extra_id_26> 0.0
|
| 80 |
+
[LOG] 特殊語彙 id=250074 type=4 ▁<extra_id_25> 0.0
|
| 81 |
+
[LOG] 特殊語彙 id=250075 type=4 ▁<extra_id_24> 0.0
|
| 82 |
+
[LOG] 特殊語彙 id=250076 type=4 ▁<extra_id_23> 0.0
|
| 83 |
+
[LOG] 特殊語彙 id=250077 type=4 ▁<extra_id_22> 0.0
|
| 84 |
+
[LOG] 特殊語彙 id=250078 type=4 ▁<extra_id_21> 0.0
|
| 85 |
+
[LOG] 特殊語彙 id=250079 type=4 ▁<extra_id_20> 0.0
|
| 86 |
+
[LOG] 特殊語彙 id=250080 type=4 ▁<extra_id_19> 0.0
|
| 87 |
+
[LOG] 特殊語彙 id=250081 type=4 ▁<extra_id_18> 0.0
|
| 88 |
+
[LOG] 特殊語彙 id=250082 type=4 ▁<extra_id_17> 0.0
|
| 89 |
+
[LOG] 特殊語彙 id=250083 type=4 ▁<extra_id_16> 0.0
|
| 90 |
+
[LOG] 特殊���彙 id=250084 type=4 ▁<extra_id_15> 0.0
|
| 91 |
+
[LOG] 特殊語彙 id=250085 type=4 ▁<extra_id_14> 0.0
|
| 92 |
+
[LOG] 特殊語彙 id=250086 type=4 ▁<extra_id_13> 0.0
|
| 93 |
+
[LOG] 特殊語彙 id=250087 type=4 ▁<extra_id_12> 0.0
|
| 94 |
+
[LOG] 特殊語彙 id=250088 type=4 ▁<extra_id_11> 0.0
|
| 95 |
+
[LOG] 特殊語彙 id=250089 type=4 ▁<extra_id_10> 0.0
|
| 96 |
+
[LOG] 特殊語彙 id=250090 type=4 ▁<extra_id_9> 0.0
|
| 97 |
+
[LOG] 特殊語彙 id=250091 type=4 ▁<extra_id_8> 0.0
|
| 98 |
+
[LOG] 特殊語彙 id=250092 type=4 ▁<extra_id_7> 0.0
|
| 99 |
+
[LOG] 特殊語彙 id=250093 type=4 ▁<extra_id_6> 0.0
|
| 100 |
+
[LOG] 特殊語彙 id=250094 type=4 ▁<extra_id_5> 0.0
|
| 101 |
+
[LOG] 特殊語彙 id=250095 type=4 ▁<extra_id_4> 0.0
|
| 102 |
+
[LOG] 特殊語彙 id=250096 type=4 ▁<extra_id_3> 0.0
|
| 103 |
+
[LOG] 特殊語彙 id=250097 type=4 ▁<extra_id_2> 0.0
|
| 104 |
+
[LOG] 特殊語彙 id=250098 type=4 ▁<extra_id_1> 0.0
|
| 105 |
+
[LOG] 特殊語彙 id=250099 type=4 ▁<extra_id_0> 0.0
|
| 106 |
+
[全語彙数] 249741
|
| 107 |
+
[スコア統計] s
|
| 108 |
+
count 249741.000000
|
| 109 |
+
mean -13.801495
|
| 110 |
+
std 1.294186
|
| 111 |
+
min -20.280148
|
| 112 |
+
25% -14.515090
|
| 113 |
+
50% -13.899386
|
| 114 |
+
75% -13.113684
|
| 115 |
+
max -2.320359
|
| 116 |
+
[置き換える語彙数] 0
|
| 117 |
+
[重複語数] 3612 トリム数 0
|
| 118 |
+
全角ゴミ 189
|
| 119 |
+
半角ゴミ 10054
|
| 120 |
+
記号ゴミ 231
|
| 121 |
+
[記号ゴミ] 10474
|
| 122 |
+
数字ゴミ 7052
|
| 123 |
+
[数字重複] 7052
|
| 124 |
+
[消去可能な字句] 21138
|
| 125 |
+
[実際に置き換える語] 0
|
| 126 |
+
mt5_emp PreTrainedTokenizer(name_or_path='mt5_emp', vocab_size=250100, model_max_len=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>'})
|
| 127 |
+
<nl><nl> [1042, 272, 280, 2988, 272, 280, 669, 1]
|
| 128 |
+
<123> <100> <1> [1042, 11352, 669, 1042, 2251, 669, 1042, 153002, 1]
|
| 129 |
+
<extra_id_0><extra_id_99> [250099, 250000, 1]
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
[1]
|