futosane commited on
Commit
3d3face
·
1 Parent(s): 457b294

Update space

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .vscode/settings.json +47 -0
  2. .vscode/tasks.json +28 -0
  3. FinGPT-master.zip +3 -0
  4. FinGPT.ipynb +868 -0
  5. FinNLP/.gitignore +7 -0
  6. FinNLP/.gitmodules +6 -0
  7. FinNLP/LICENSE +21 -0
  8. FinNLP/README.md +363 -0
  9. FinNLP/demo/README.md +15 -0
  10. FinNLP/docs/FinNLP/docs/index.md +128 -0
  11. FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_Company_Announcement.ipynb +783 -0
  12. FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_EarningCalls.ipynb +186 -0
  13. FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_News.ipynb +0 -0
  14. FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_SEC_Filings.ipynb +0 -0
  15. FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_Social_Media.ipynb +2261 -0
  16. FinNLP/docs/FinNLP/docs/zh/index.md +127 -0
  17. FinNLP/docs/FinNLP/mkdocs.yml +44 -0
  18. FinNLP/docs/FinNLP/site/404.html +629 -0
  19. FinNLP/docs/FinNLP/site/assets/images/favicon.png +0 -0
  20. FinNLP/docs/FinNLP/site/assets/javascripts/bundle.51198bba.min.js +0 -0
  21. FinNLP/docs/FinNLP/site/assets/javascripts/bundle.51198bba.min.js.map +0 -0
  22. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ar.min.js +1 -0
  23. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.da.min.js +18 -0
  24. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.de.min.js +18 -0
  25. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.du.min.js +18 -0
  26. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.es.min.js +18 -0
  27. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.fi.min.js +18 -0
  28. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.fr.min.js +18 -0
  29. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.hi.min.js +1 -0
  30. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.hu.min.js +18 -0
  31. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.it.min.js +18 -0
  32. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ja.min.js +1 -0
  33. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.jp.min.js +1 -0
  34. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ko.min.js +1 -0
  35. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.multi.min.js +1 -0
  36. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.nl.min.js +18 -0
  37. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.no.min.js +18 -0
  38. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.pt.min.js +18 -0
  39. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ro.min.js +18 -0
  40. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ru.min.js +18 -0
  41. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +1 -0
  42. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.sv.min.js +18 -0
  43. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ta.min.js +1 -0
  44. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.th.min.js +1 -0
  45. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.tr.min.js +18 -0
  46. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.vi.min.js +1 -0
  47. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.zh.min.js +1 -0
  48. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/tinyseg.js +206 -0
  49. FinNLP/docs/FinNLP/site/assets/javascripts/lunr/wordcut.js +0 -0
  50. FinNLP/docs/FinNLP/site/assets/javascripts/workers/search.208ed371.min.js +42 -0
.vscode/settings.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files.associations": {
3
+ "atomic": "cpp",
4
+ "bit": "cpp",
5
+ "cctype": "cpp",
6
+ "clocale": "cpp",
7
+ "cmath": "cpp",
8
+ "compare": "cpp",
9
+ "concepts": "cpp",
10
+ "cstddef": "cpp",
11
+ "cstdint": "cpp",
12
+ "cstdio": "cpp",
13
+ "cstdlib": "cpp",
14
+ "cstring": "cpp",
15
+ "ctime": "cpp",
16
+ "cwchar": "cpp",
17
+ "exception": "cpp",
18
+ "initializer_list": "cpp",
19
+ "ios": "cpp",
20
+ "iosfwd": "cpp",
21
+ "iostream": "cpp",
22
+ "istream": "cpp",
23
+ "iterator": "cpp",
24
+ "limits": "cpp",
25
+ "memory": "cpp",
26
+ "new": "cpp",
27
+ "ostream": "cpp",
28
+ "stdexcept": "cpp",
29
+ "streambuf": "cpp",
30
+ "system_error": "cpp",
31
+ "tuple": "cpp",
32
+ "type_traits": "cpp",
33
+ "typeinfo": "cpp",
34
+ "utility": "cpp",
35
+ "xfacet": "cpp",
36
+ "xiosbase": "cpp",
37
+ "xlocale": "cpp",
38
+ "xlocinfo": "cpp",
39
+ "xlocnum": "cpp",
40
+ "xmemory": "cpp",
41
+ "xstddef": "cpp",
42
+ "xstring": "cpp",
43
+ "xtr1common": "cpp",
44
+ "xutility": "cpp"
45
+ },
46
+ "cmake.ignoreCMakeListsMissing": true
47
+ }
.vscode/tasks.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tasks": [
3
+ {
4
+ "type": "cppbuild",
5
+ "label": "C/C++: g++.exe 生成活动文件",
6
+ "command": "C:\\Program Files\\mingw64\\bin\\g++.exe",
7
+ "args": [
8
+ "-fdiagnostics-color=always",
9
+ "-g",
10
+ "${file}",
11
+ "-o",
12
+ "${fileDirname}\\${fileBasenameNoExtension}.exe"
13
+ ],
14
+ "options": {
15
+ "cwd": "${fileDirname}"
16
+ },
17
+ "problemMatcher": [
18
+ "$gcc"
19
+ ],
20
+ "group": {
21
+ "kind": "build",
22
+ "isDefault": true
23
+ },
24
+ "detail": "调试器生成的任务。"
25
+ }
26
+ ],
27
+ "version": "2.0.0"
28
+ }
FinGPT-master.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5532a3e67103f1e0ef1d2eef7639ab7ec8e6ca6076e6cbc72d37b472507a5205
3
+ size 11524574
FinGPT.ipynb ADDED
@@ -0,0 +1,868 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# FinGPT"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "## Part 1: Preparing the Data"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "markdown",
19
+ "metadata": {},
20
+ "source": [
21
+ "1.1 Initialize Directories:"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": null,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "import os\n",
31
+ "import shutil\n",
32
+ "\n",
33
+ "jsonl_path = \"data/dataset_new.jsonl\"\n",
34
+ "save_path = 'data/dataset_new'\n",
35
+ "\n",
36
+ "\n",
37
+ "if os.path.exists(jsonl_path):\n",
38
+ " os.remove(jsonl_path)\n",
39
+ "\n",
40
+ "if os.path.exists(save_path):\n",
41
+ " shutil.rmtree(save_path)\n",
42
+ "\n",
43
+ "directory = \"data\"\n",
44
+ "if not os.path.exists(directory):\n",
45
+ " os.makedirs(directory)"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "markdown",
50
+ "metadata": {},
51
+ "source": [
52
+ "1.2 Load and Prepare Dataset:"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "from datasets import load_dataset\n",
62
+ "import datasets\n",
63
+ "\n",
64
+ "dic = {\n",
65
+ " 0:\"negative\",\n",
66
+ " 1:'positive',\n",
67
+ " 2:'neutral',\n",
68
+ "}\n",
69
+ "\n",
70
+ "tfns = load_dataset('zeroshot/twitter-financial-news-sentiment')\n",
71
+ "tfns = tfns['train']\n",
72
+ "tfns = tfns.to_pandas()\n",
73
+ "tfns['label'] = tfns['label'].apply(lambda x:dic[x])\n",
74
+ "tfns['instruction'] = 'What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}.'\n",
75
+ "tfns.columns = ['input', 'output', 'instruction']\n",
76
+ "tfns = datasets.Dataset.from_pandas(tfns)\n",
77
+ "tfns"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "markdown",
82
+ "metadata": {},
83
+ "source": [
84
+ "1.3 Concatenate and Shuffle Dataset"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": null,
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "tmp_dataset = datasets.concatenate_datasets([tfns]*2)\n",
94
+ "train_dataset = tmp_dataset\n",
95
+ "print(tmp_dataset.num_rows)\n",
96
+ "\n",
97
+ "all_dataset = train_dataset.shuffle(seed = 42)\n",
98
+ "all_dataset.shape"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "markdown",
103
+ "metadata": {},
104
+ "source": [
105
+ "## Part 2: Dataset Formatting and Tokenization"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "markdown",
110
+ "metadata": {},
111
+ "source": [
112
+ "2.1 Dataset Formatting:"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": null,
118
+ "metadata": {},
119
+ "outputs": [],
120
+ "source": [
121
+ "import json\n",
122
+ "from tqdm.notebook import tqdm\n",
123
+ "\n",
124
+ "\n",
125
+ "def format_example(example: dict) -> dict:\n",
126
+ " context = f\"Instruction: {example['instruction']}\\n\"\n",
127
+ " if example.get(\"input\"):\n",
128
+ " context += f\"Input: {example['input']}\\n\"\n",
129
+ " context += \"Answer: \"\n",
130
+ " target = example[\"output\"]\n",
131
+ " return {\"context\": context, \"target\": target}\n",
132
+ "\n",
133
+ "\n",
134
+ "data_list = []\n",
135
+ "for item in all_dataset.to_pandas().itertuples():\n",
136
+ " tmp = {}\n",
137
+ " tmp[\"instruction\"] = item.instruction\n",
138
+ " tmp[\"input\"] = item.input\n",
139
+ " tmp[\"output\"] = item.output\n",
140
+ " data_list.append(tmp)\n",
141
+ "\n",
142
+ "\n",
143
+ "# save to a jsonl file\n",
144
+ "with open(\"data/dataset_new.jsonl\", 'w') as f:\n",
145
+ " for example in tqdm(data_list, desc=\"formatting..\"):\n",
146
+ " f.write(json.dumps(format_example(example)) + '\\n')"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "markdown",
151
+ "metadata": {},
152
+ "source": [
153
+ "2.2 Tokenization"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": null,
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "from transformers import AutoTokenizer, AutoConfig\n",
163
+ "\n",
164
+ "model_name = \"THUDM/chatglm2-6b\"\n",
165
+ "jsonl_path = \"data/dataset_new.jsonl\" # updated path\n",
166
+ "save_path = 'data/dataset_new' # updated path\n",
167
+ "max_seq_length = 512\n",
168
+ "skip_overlength = True\n",
169
+ "\n",
170
+ "# The preprocess function tokenizes the prompt and target, combines them into input IDs,\n",
171
+ "# and then trims or pads the sequence to the maximum sequence length.\n",
172
+ "def preprocess(tokenizer, config, example, max_seq_length):\n",
173
+ " prompt = example[\"context\"]\n",
174
+ " target = example[\"target\"]\n",
175
+ " prompt_ids = tokenizer.encode(prompt, max_length=max_seq_length, truncation=True)\n",
176
+ " target_ids = tokenizer.encode(\n",
177
+ " target,\n",
178
+ " max_length=max_seq_length,\n",
179
+ " truncation=True,\n",
180
+ " add_special_tokens=False)\n",
181
+ " input_ids = prompt_ids + target_ids + [config.eos_token_id]\n",
182
+ " return {\"input_ids\": input_ids, \"seq_len\": len(prompt_ids)}\n",
183
+ "\n",
184
+ "# The read_jsonl function reads each line from the JSONL file, preprocesses it using the preprocess function,\n",
185
+ "# and then yields each preprocessed example.\n",
186
+ "def read_jsonl(path, max_seq_length, skip_overlength=False):\n",
187
+ " tokenizer = AutoTokenizer.from_pretrained(\n",
188
+ " model_name, trust_remote_code=True)\n",
189
+ " config = AutoConfig.from_pretrained(\n",
190
+ " model_name, trust_remote_code=True, device_map='auto')\n",
191
+ " with open(path, \"r\") as f:\n",
192
+ " for line in tqdm(f.readlines()):\n",
193
+ " example = json.loads(line)\n",
194
+ " feature = preprocess(tokenizer, config, example, max_seq_length)\n",
195
+ " if skip_overlength and len(feature[\"input_ids\"]) > max_seq_length:\n",
196
+ " continue\n",
197
+ " feature[\"input_ids\"] = feature[\"input_ids\"][:max_seq_length]\n",
198
+ " yield feature"
199
+ ]
200
+ },
201
+ {
202
+ "cell_type": "markdown",
203
+ "metadata": {},
204
+ "source": [
205
+ "2.3 Save the dataset"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "code",
210
+ "execution_count": null,
211
+ "metadata": {},
212
+ "outputs": [],
213
+ "source": [
214
+ "# The script then creates a Hugging Face Dataset object from the generator and saves it to disk.\n",
215
+ "save_path = './data/dataset_new'\n",
216
+ "\n",
217
+ "dataset = datasets.Dataset.from_generator(\n",
218
+ " lambda: read_jsonl(jsonl_path, max_seq_length, skip_overlength)\n",
219
+ " )\n",
220
+ "dataset.save_to_disk(save_path)"
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "markdown",
225
+ "metadata": {},
226
+ "source": [
227
+ "## Part 3: Setup FinGPT training parameters with LoRA on ChatGlm2–6b"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "markdown",
232
+ "metadata": {},
233
+ "source": [
234
+ "3.1 Training Arguments Setup:"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": 1,
240
+ "metadata": {},
241
+ "outputs": [
242
+ {
243
+ "name": "stderr",
244
+ "output_type": "stream",
245
+ "text": [
246
+ "W0801 20:19:58.973000 23260 site-packages\\torch\\distributed\\elastic\\multiprocessing\\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.\n"
247
+ ]
248
+ }
249
+ ],
250
+ "source": [
251
+ "from typing import List, Dict, Optional\n",
252
+ "import torch\n",
253
+ "from loguru import logger\n",
254
+ "from transformers import (\n",
255
+ " AutoModel,\n",
256
+ " AutoTokenizer,\n",
257
+ " TrainingArguments,\n",
258
+ " Trainer,\n",
259
+ " BitsAndBytesConfig\n",
260
+ ")\n",
261
+ "from peft import (\n",
262
+ " TaskType,\n",
263
+ " LoraConfig,\n",
264
+ " get_peft_model,\n",
265
+ " set_peft_model_state_dict,\n",
266
+ " prepare_model_for_kbit_training,\n",
267
+ " prepare_model_for_int8_training,\n",
268
+ ")\n",
269
+ "from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING\n",
270
+ "\n",
271
+ "training_args = TrainingArguments(\n",
272
+ " output_dir='./finetuned_model', # saved model path\n",
273
+ " # max_steps=10000,\n",
274
+ " num_train_epochs = 2,\n",
275
+ " per_device_train_batch_size=4,\n",
276
+ " gradient_accumulation_steps=8,\n",
277
+ " learning_rate=1e-4,\n",
278
+ " weight_decay=0.01,\n",
279
+ " warmup_steps=10,\n",
280
+ " save_steps=50,\n",
281
+ " fp16=True,\n",
282
+ " # bf16=True,\n",
283
+ " torch_compile = False,\n",
284
+ " load_best_model_at_end = True,\n",
285
+ " evaluation_strategy=\"steps\",\n",
286
+ " remove_unused_columns=False,\n",
287
+ " logging_steps = 50,\n",
288
+ " eval_steps = 50,\n",
289
+ " logging_dir='./logs',\n",
290
+ " report_to=\"tensorboard\",\n",
291
+ " )"
292
+ ]
293
+ },
294
+ {
295
+ "cell_type": "markdown",
296
+ "metadata": {},
297
+ "source": [
298
+ "3.2 Quantization Config Setup:"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": 2,
304
+ "metadata": {},
305
+ "outputs": [],
306
+ "source": [
307
+ "# Quantization\n",
308
+ "q_config = BitsAndBytesConfig(load_in_4bit=True,\n",
309
+ " bnb_4bit_quant_type='nf4',\n",
310
+ " bnb_4bit_use_double_quant=True,\n",
311
+ " bnb_4bit_compute_dtype=torch.float16\n",
312
+ " )"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "markdown",
317
+ "metadata": {},
318
+ "source": [
319
+ "3.3 Model Loading & Preparation:"
320
+ ]
321
+ },
322
+ {
323
+ "cell_type": "code",
324
+ "execution_count": 3,
325
+ "metadata": {},
326
+ "outputs": [
327
+ {
328
+ "name": "stderr",
329
+ "output_type": "stream",
330
+ "text": [
331
+ "d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\huggingface_hub\\file_download.py:945: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
332
+ " warnings.warn(\n"
333
+ ]
334
+ },
335
+ {
336
+ "data": {
337
+ "application/vnd.jupyter.widget-view+json": {
338
+ "model_id": "4bc8b4c85e974cfe806fda92d57ad1c3",
339
+ "version_major": 2,
340
+ "version_minor": 0
341
+ },
342
+ "text/plain": [
343
+ "Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]"
344
+ ]
345
+ },
346
+ "metadata": {},
347
+ "output_type": "display_data"
348
+ },
349
+ {
350
+ "name": "stderr",
351
+ "output_type": "stream",
352
+ "text": [
353
+ "d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\huggingface_hub\\file_download.py:945: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
354
+ " warnings.warn(\n",
355
+ "d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\peft\\utils\\other.py:145: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.\n",
356
+ " warnings.warn(\n"
357
+ ]
358
+ }
359
+ ],
360
+ "source": [
361
+ "# Load tokenizer & model\n",
362
+ "# need massive space\n",
363
+ "model_name = \"THUDM/chatglm2-6b\"\n",
364
+ "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
365
+ "model = AutoModel.from_pretrained(\n",
366
+ " model_name,\n",
367
+ " quantization_config=q_config,\n",
368
+ " trust_remote_code=True,\n",
369
+ " device='cuda'\n",
370
+ " )\n",
371
+ "model = prepare_model_for_int8_training(model, use_gradient_checkpointing=True)"
372
+ ]
373
+ },
374
+ {
375
+ "cell_type": "markdown",
376
+ "metadata": {},
377
+ "source": [
378
+ "3.4 LoRA Config & Setup:"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": 4,
384
+ "metadata": {},
385
+ "outputs": [
386
+ {
387
+ "name": "stdout",
388
+ "output_type": "stream",
389
+ "text": [
390
+ "trainable params: 1949696 || all params: 3390261248 || trainable%: 0.05750872447219737\n",
391
+ "trainable params: 1,949,696 || all params: 6,245,533,696 || trainable%: 0.031217444255383614\n"
392
+ ]
393
+ }
394
+ ],
395
+ "source": [
396
+ "def print_trainable_parameters(model):\n",
397
+ " \"\"\"\n",
398
+ " Prints the number of trainable parameters in the model.\n",
399
+ " \"\"\"\n",
400
+ " trainable_params = 0\n",
401
+ " all_param = 0\n",
402
+ " for _, param in model.named_parameters():\n",
403
+ " all_param += param.numel()\n",
404
+ " if param.requires_grad:\n",
405
+ " trainable_params += param.numel()\n",
406
+ " print(\n",
407
+ " f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\"\n",
408
+ " )\n",
409
+ "\n",
410
+ "\n",
411
+ "# LoRA\n",
412
+ "target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING['chatglm']\n",
413
+ "lora_config = LoraConfig(\n",
414
+ " task_type=TaskType.CAUSAL_LM,\n",
415
+ " inference_mode=False,\n",
416
+ " r=8,\n",
417
+ " lora_alpha=32,\n",
418
+ " lora_dropout=0.1,\n",
419
+ " target_modules=target_modules,\n",
420
+ " bias='none',\n",
421
+ ")\n",
422
+ "model = get_peft_model(model, lora_config)\n",
423
+ "print_trainable_parameters(model)\n",
424
+ "\n",
425
+ "resume_from_checkpoint = None\n",
426
+ "if resume_from_checkpoint is not None:\n",
427
+ " checkpoint_name = os.path.join(resume_from_checkpoint, 'pytorch_model.bin')\n",
428
+ " if not os.path.exists(checkpoint_name):\n",
429
+ " checkpoint_name = os.path.join(\n",
430
+ " resume_from_checkpoint, 'adapter_model.bin'\n",
431
+ " )\n",
432
+ " resume_from_checkpoint = False\n",
433
+ " if os.path.exists(checkpoint_name):\n",
434
+ " logger.info(f'Restarting from {checkpoint_name}')\n",
435
+ " adapters_weights = torch.load(checkpoint_name)\n",
436
+ " set_peft_model_state_dict(model, adapters_weights)\n",
437
+ " else:\n",
438
+ " logger.info(f'Checkpoint {checkpoint_name} not found')\n",
439
+ "\n",
440
+ "model.print_trainable_parameters()"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "markdown",
445
+ "metadata": {},
446
+ "source": [
447
+ "## Part 4: Loading Data and Training FinGPT"
448
+ ]
449
+ },
450
+ {
451
+ "cell_type": "markdown",
452
+ "metadata": {},
453
+ "source": [
454
+ "4.1 Loading Your Data:"
455
+ ]
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "execution_count": 5,
460
+ "metadata": {},
461
+ "outputs": [],
462
+ "source": [
463
+ "# load data\n",
464
+ "from datasets import load_from_disk\n",
465
+ "import datasets\n",
466
+ "import os\n",
467
+ "\n",
468
+ "dataset = datasets.load_from_disk(\"./data/dataset_new\")\n",
469
+ "dataset = dataset.train_test_split(0.2, shuffle=True, seed = 42)"
470
+ ]
471
+ },
472
+ {
473
+ "cell_type": "markdown",
474
+ "metadata": {},
475
+ "source": [
476
+ "4.2 Training Configuration and Launch:"
477
+ ]
478
+ },
479
+ {
480
+ "cell_type": "code",
481
+ "execution_count": 6,
482
+ "metadata": {},
483
+ "outputs": [
484
+ {
485
+ "name": "stderr",
486
+ "output_type": "stream",
487
+ "text": [
488
+ "d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\accelerator.py:449: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n",
489
+ " self.scaler = torch.cuda.amp.GradScaler(**kwargs)\n",
490
+ "You are adding a <class 'transformers.integrations.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is\n",
491
+ ":DefaultFlowCallback\n",
492
+ "TensorBoardCallback\n",
493
+ "d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
494
+ " warnings.warn(\n"
495
+ ]
496
+ },
497
+ {
498
+ "data": {
499
+ "application/vnd.jupyter.widget-view+json": {
500
+ "model_id": "27521448ffea440ba40770ef24937509",
501
+ "version_major": 2,
502
+ "version_minor": 0
503
+ },
504
+ "text/plain": [
505
+ " 0%| | 0/954 [00:00<?, ?it/s]"
506
+ ]
507
+ },
508
+ "metadata": {},
509
+ "output_type": "display_data"
510
+ },
511
+ {
512
+ "name": "stderr",
513
+ "output_type": "stream",
514
+ "text": [
515
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n",
516
+ "d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\_dynamo\\eval_frame.py:1005: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.5 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
517
+ " return fn(*args, **kwargs)\n"
518
+ ]
519
+ },
520
+ {
521
+ "name": "stdout",
522
+ "output_type": "stream",
523
+ "text": [
524
+ "{'loss': 12.8503, 'learning_rate': 9.597457627118645e-05, 'epoch': 0.1}\n"
525
+ ]
526
+ },
527
+ {
528
+ "data": {
529
+ "application/vnd.jupyter.widget-view+json": {
530
+ "model_id": "afe29ea804ea4e178d58a6be2379b268",
531
+ "version_major": 2,
532
+ "version_minor": 0
533
+ },
534
+ "text/plain": [
535
+ " 0%| | 0/478 [00:00<?, ?it/s]"
536
+ ]
537
+ },
538
+ "metadata": {},
539
+ "output_type": "display_data"
540
+ },
541
+ {
542
+ "name": "stdout",
543
+ "output_type": "stream",
544
+ "text": [
545
+ "{'eval_loss': 6.350033760070801, 'eval_runtime': 90.0344, 'eval_samples_per_second': 42.406, 'eval_steps_per_second': 5.309, 'epoch': 0.1}\n"
546
+ ]
547
+ },
548
+ {
549
+ "name": "stderr",
550
+ "output_type": "stream",
551
+ "text": [
552
+ "d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\_dynamo\\eval_frame.py:1005: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.5 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
553
+ " return fn(*args, **kwargs)\n"
554
+ ]
555
+ },
556
+ {
557
+ "ename": "KeyboardInterrupt",
558
+ "evalue": "",
559
+ "output_type": "error",
560
+ "traceback": [
561
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
562
+ "\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)",
563
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 63\u001b[39m\n\u001b[32m 54\u001b[39m writer = SummaryWriter()\n\u001b[32m 55\u001b[39m trainer = ModifiedTrainer(\n\u001b[32m 56\u001b[39m model=model,\n\u001b[32m 57\u001b[39m args=training_args, \u001b[38;5;66;03m# Trainer args\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 61\u001b[39m callbacks=[TensorBoardCallback(writer)],\n\u001b[32m 62\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m63\u001b[39m trainer.train()\n\u001b[32m 64\u001b[39m writer.close()\n\u001b[32m 65\u001b[39m \u001b[38;5;66;03m# save model\u001b[39;00m\n",
564
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\trainer.py:1645\u001b[39m, in \u001b[36mTrainer.train\u001b[39m\u001b[34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[39m\n\u001b[32m 1640\u001b[39m \u001b[38;5;28mself\u001b[39m.model_wrapped = \u001b[38;5;28mself\u001b[39m.model\n\u001b[32m 1642\u001b[39m inner_training_loop = find_executable_batch_size(\n\u001b[32m 1643\u001b[39m \u001b[38;5;28mself\u001b[39m._inner_training_loop, \u001b[38;5;28mself\u001b[39m._train_batch_size, args.auto_find_batch_size\n\u001b[32m 1644\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1645\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m inner_training_loop(\n\u001b[32m 1646\u001b[39m args=args,\n\u001b[32m 1647\u001b[39m resume_from_checkpoint=resume_from_checkpoint,\n\u001b[32m 1648\u001b[39m trial=trial,\n\u001b[32m 1649\u001b[39m ignore_keys_for_eval=ignore_keys_for_eval,\n\u001b[32m 1650\u001b[39m )\n",
565
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\trainer.py:1938\u001b[39m, in \u001b[36mTrainer._inner_training_loop\u001b[39m\u001b[34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[39m\n\u001b[32m 1935\u001b[39m \u001b[38;5;28mself\u001b[39m.control = \u001b[38;5;28mself\u001b[39m.callback_handler.on_step_begin(args, \u001b[38;5;28mself\u001b[39m.state, \u001b[38;5;28mself\u001b[39m.control)\n\u001b[32m 1937\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m.accelerator.accumulate(model):\n\u001b[32m-> \u001b[39m\u001b[32m1938\u001b[39m tr_loss_step = \u001b[38;5;28mself\u001b[39m.training_step(model, inputs)\n\u001b[32m 1940\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[32m 1941\u001b[39m args.logging_nan_inf_filter\n\u001b[32m 1942\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[32m 1943\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m (torch.isnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch.isinf(tr_loss_step))\n\u001b[32m 1944\u001b[39m ):\n\u001b[32m 1945\u001b[39m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[32m 1946\u001b[39m tr_loss += tr_loss / (\u001b[32m1\u001b[39m + \u001b[38;5;28mself\u001b[39m.state.global_step - \u001b[38;5;28mself\u001b[39m._globalstep_last_logged)\n",
566
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\trainer.py:2759\u001b[39m, in \u001b[36mTrainer.training_step\u001b[39m\u001b[34m(self, model, inputs)\u001b[39m\n\u001b[32m 2756\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb.reduce_mean().detach().to(\u001b[38;5;28mself\u001b[39m.args.device)\n\u001b[32m 2758\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m.compute_loss_context_manager():\n\u001b[32m-> \u001b[39m\u001b[32m2759\u001b[39m loss = \u001b[38;5;28mself\u001b[39m.compute_loss(model, inputs)\n\u001b[32m 2761\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.args.n_gpu > \u001b[32m1\u001b[39m:\n\u001b[32m 2762\u001b[39m loss = loss.mean() \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n",
567
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 3\u001b[39m, in \u001b[36mModifiedTrainer.compute_loss\u001b[39m\u001b[34m(self, model, inputs)\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcompute_loss\u001b[39m(\u001b[38;5;28mself\u001b[39m, model, inputs):\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m model(\n\u001b[32m 4\u001b[39m input_ids=inputs[\u001b[33m\"\u001b[39m\u001b[33minput_ids\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m 5\u001b[39m labels=inputs[\u001b[33m\"\u001b[39m\u001b[33mlabels\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m 6\u001b[39m ).loss\n",
568
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n",
569
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
570
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\utils\\operations.py:687\u001b[39m, in \u001b[36mconvert_outputs_to_fp32.<locals>.forward\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 686\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(*args, **kwargs):\n\u001b[32m--> \u001b[39m\u001b[32m687\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m model_forward(*args, **kwargs)\n",
571
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\utils\\operations.py:675\u001b[39m, in \u001b[36mConvertOutputsToFp32.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 674\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args, **kwargs):\n\u001b[32m--> \u001b[39m\u001b[32m675\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28mself\u001b[39m.model_forward(*args, **kwargs))\n",
572
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\amp\\autocast_mode.py:44\u001b[39m, in \u001b[36mautocast_decorator.<locals>.decorate_autocast\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 41\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(func)\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdecorate_autocast\u001b[39m(*args, **kwargs):\n\u001b[32m 43\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[32m---> \u001b[39m\u001b[32m44\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m func(*args, **kwargs)\n",
573
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\peft\\peft_model.py:1091\u001b[39m, in \u001b[36mPeftModelForCausalLM.forward\u001b[39m\u001b[34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[39m\n\u001b[32m 1089\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m peft_config.peft_type == PeftType.POLY:\n\u001b[32m 1090\u001b[39m kwargs[\u001b[33m\"\u001b[39m\u001b[33mtask_ids\u001b[39m\u001b[33m\"\u001b[39m] = task_ids\n\u001b[32m-> \u001b[39m\u001b[32m1091\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.base_model(\n\u001b[32m 1092\u001b[39m input_ids=input_ids,\n\u001b[32m 1093\u001b[39m attention_mask=attention_mask,\n\u001b[32m 1094\u001b[39m inputs_embeds=inputs_embeds,\n\u001b[32m 1095\u001b[39m labels=labels,\n\u001b[32m 1096\u001b[39m output_attentions=output_attentions,\n\u001b[32m 1097\u001b[39m output_hidden_states=output_hidden_states,\n\u001b[32m 1098\u001b[39m return_dict=return_dict,\n\u001b[32m 1099\u001b[39m **kwargs,\n\u001b[32m 1100\u001b[39m )\n\u001b[32m 1102\u001b[39m batch_size = _get_batch_size(input_ids, inputs_embeds)\n\u001b[32m 1103\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1104\u001b[39m \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n",
574
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n",
575
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
576
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\peft\\tuners\\tuners_utils.py:160\u001b[39m, in \u001b[36mBaseTuner.forward\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 159\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args: Any, **kwargs: Any):\n\u001b[32m--> \u001b[39m\u001b[32m160\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.model.forward(*args, **kwargs)\n",
577
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py:165\u001b[39m, in \u001b[36madd_hook_to_module.<locals>.new_forward\u001b[39m\u001b[34m(module, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m165\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m module._hf_hook.post_forward(module, output)\n",
578
+ "\u001b[36mFile \u001b[39m\u001b[32m~/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py:937\u001b[39m, in \u001b[36mChatGLMForConditionalGeneration.forward\u001b[39m\u001b[34m(self, input_ids, position_ids, attention_mask, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, return_last_logit)\u001b[39m\n\u001b[32m 934\u001b[39m use_cache = use_cache \u001b[38;5;28;01mif\u001b[39;00m use_cache \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m.config.use_cache\n\u001b[32m 935\u001b[39m return_dict = return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m.config.use_return_dict\n\u001b[32m--> \u001b[39m\u001b[32m937\u001b[39m transformer_outputs = \u001b[38;5;28mself\u001b[39m.transformer(\n\u001b[32m 938\u001b[39m input_ids=input_ids,\n\u001b[32m 939\u001b[39m position_ids=position_ids,\n\u001b[32m 940\u001b[39m attention_mask=attention_mask,\n\u001b[32m 941\u001b[39m past_key_values=past_key_values,\n\u001b[32m 942\u001b[39m inputs_embeds=inputs_embeds,\n\u001b[32m 943\u001b[39m use_cache=use_cache,\n\u001b[32m 944\u001b[39m output_hidden_states=output_hidden_states,\n\u001b[32m 945\u001b[39m return_dict=return_dict,\n\u001b[32m 946\u001b[39m )\n\u001b[32m 948\u001b[39m hidden_states = transformer_outputs[\u001b[32m0\u001b[39m]\n\u001b[32m 949\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m return_last_logit:\n",
579
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n",
580
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
581
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py:165\u001b[39m, in \u001b[36madd_hook_to_module.<locals>.new_forward\u001b[39m\u001b[34m(module, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m165\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m module._hf_hook.post_forward(module, output)\n",
582
+ "\u001b[36mFile \u001b[39m\u001b[32m~/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py:830\u001b[39m, in \u001b[36mChatGLMModel.forward\u001b[39m\u001b[34m(self, input_ids, position_ids, attention_mask, full_attention_mask, past_key_values, inputs_embeds, use_cache, output_hidden_states, return_dict)\u001b[39m\n\u001b[32m 827\u001b[39m rotary_pos_emb = rotary_pos_emb.transpose(\u001b[32m0\u001b[39m, \u001b[32m1\u001b[39m).contiguous()\n\u001b[32m 829\u001b[39m \u001b[38;5;66;03m# Run encoder.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m830\u001b[39m hidden_states, presents, all_hidden_states, all_self_attentions = \u001b[38;5;28mself\u001b[39m.encoder(\n\u001b[32m 831\u001b[39m inputs_embeds, full_attention_mask, rotary_pos_emb=rotary_pos_emb,\n\u001b[32m 832\u001b[39m kv_caches=past_key_values, use_cache=use_cache, output_hidden_states=output_hidden_states\n\u001b[32m 833\u001b[39m )\n\u001b[32m 835\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m return_dict:\n\u001b[32m 836\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(v \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m [hidden_states, presents, all_hidden_states, all_self_attentions] \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n",
583
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n",
584
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
585
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py:165\u001b[39m, in \u001b[36madd_hook_to_module.<locals>.new_forward\u001b[39m\u001b[34m(module, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m165\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m module._hf_hook.post_forward(module, output)\n",
586
+ "\u001b[36mFile \u001b[39m\u001b[32m~/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py:631\u001b[39m, in \u001b[36mGLMTransformer.forward\u001b[39m\u001b[34m(self, hidden_states, attention_mask, rotary_pos_emb, kv_caches, use_cache, output_hidden_states)\u001b[39m\n\u001b[32m 629\u001b[39m layer = \u001b[38;5;28mself\u001b[39m._get_layer(index)\n\u001b[32m 630\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.gradient_checkpointing \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m.training:\n\u001b[32m--> \u001b[39m\u001b[32m631\u001b[39m layer_ret = torch.utils.checkpoint.checkpoint(\n\u001b[32m 632\u001b[39m layer,\n\u001b[32m 633\u001b[39m hidden_states,\n\u001b[32m 634\u001b[39m attention_mask,\n\u001b[32m 635\u001b[39m rotary_pos_emb,\n\u001b[32m 636\u001b[39m kv_caches[index],\n\u001b[32m 637\u001b[39m use_cache\n\u001b[32m 638\u001b[39m )\n\u001b[32m 639\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 640\u001b[39m layer_ret = layer(\n\u001b[32m 641\u001b[39m hidden_states,\n\u001b[32m 642\u001b[39m attention_mask,\n\u001b[32m (...)\u001b[39m\u001b[32m 645\u001b[39m use_cache=use_cache\n\u001b[32m 646\u001b[39m )\n",
587
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\_compile.py:53\u001b[39m, in \u001b[36m_disable_dynamo.<locals>.inner\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 50\u001b[39m disable_fn = torch._dynamo.disable(fn, recursive, wrapping=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m 51\u001b[39m fn.__dynamo_disable = disable_fn \u001b[38;5;66;03m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m53\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m disable_fn(*args, **kwargs)\n",
588
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\_dynamo\\eval_frame.py:1005\u001b[39m, in \u001b[36mDisableContext.__call__.<locals>._fn\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 1003\u001b[39m _maybe_set_eval_frame(_callback_from_stance(\u001b[38;5;28mself\u001b[39m.callback))\n\u001b[32m 1004\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1005\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m fn(*args, **kwargs)\n\u001b[32m 1006\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 1007\u001b[39m set_eval_frame(\u001b[38;5;28;01mNone\u001b[39;00m)\n",
589
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\utils\\checkpoint.py:488\u001b[39m, in \u001b[36mcheckpoint\u001b[39m\u001b[34m(function, use_reentrant, context_fn, determinism_check, debug, *args, **kwargs)\u001b[39m\n\u001b[32m 483\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m context_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m noop_context_fn \u001b[38;5;129;01mor\u001b[39;00m debug \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[32m 484\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 485\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPassing `context_fn` or `debug` is only supported when \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 486\u001b[39m \u001b[33m\"\u001b[39m\u001b[33muse_reentrant=False.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 487\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m488\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m CheckpointFunction.apply(function, preserve, *args)\n\u001b[32m 489\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 490\u001b[39m gen = _checkpoint_without_reentrant_generator(\n\u001b[32m 491\u001b[39m function, preserve, context_fn, determinism_check, debug, *args, **kwargs\n\u001b[32m 492\u001b[39m )\n",
590
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\autograd\\function.py:581\u001b[39m, in \u001b[36mFunction.apply\u001b[39m\u001b[34m(cls, *args, **kwargs)\u001b[39m\n\u001b[32m 578\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch._C._are_functorch_transforms_active():\n\u001b[32m 579\u001b[39m \u001b[38;5;66;03m# See NOTE: [functorch vjp and autograd interaction]\u001b[39;00m\n\u001b[32m 580\u001b[39m args = _functorch.utils.unwrap_dead_wrappers(args)\n\u001b[32m--> \u001b[39m\u001b[32m581\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m().apply(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 583\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_setup_ctx_defined:\n\u001b[32m 584\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[32m 585\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mIn order to use an autograd.Function with functorch transforms \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 586\u001b[39m \u001b[33m\"\u001b[39m\u001b[33m(vmap, grad, jvp, jacrev, ...), it must override the setup_context \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 587\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mstaticmethod. For more details, please see \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 588\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mhttps://pytorch.org/docs/main/notes/extending.func.html\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 589\u001b[39m )\n",
591
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\utils\\checkpoint.py:262\u001b[39m, in \u001b[36mCheckpointFunction.forward\u001b[39m\u001b[34m(ctx, run_function, preserve_rng_state, *args)\u001b[39m\n\u001b[32m 259\u001b[39m ctx.save_for_backward(*tensor_inputs)\n\u001b[32m 261\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m torch.no_grad():\n\u001b[32m--> \u001b[39m\u001b[32m262\u001b[39m outputs = run_function(*args)\n\u001b[32m 263\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m outputs\n",
592
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n",
593
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
594
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py:165\u001b[39m, in \u001b[36madd_hook_to_module.<locals>.new_forward\u001b[39m\u001b[34m(module, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m165\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m module._hf_hook.post_forward(module, output)\n",
595
+ "\u001b[36mFile \u001b[39m\u001b[32m~/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py:562\u001b[39m, in \u001b[36mGLMBlock.forward\u001b[39m\u001b[34m(self, hidden_states, attention_mask, rotary_pos_emb, kv_cache, use_cache)\u001b[39m\n\u001b[32m 559\u001b[39m layernorm_input = residual + layernorm_input\n\u001b[32m 561\u001b[39m \u001b[38;5;66;03m# Layer norm post the self attention.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m562\u001b[39m layernorm_output = \u001b[38;5;28mself\u001b[39m.post_attention_layernorm(layernorm_input)\n\u001b[32m 564\u001b[39m \u001b[38;5;66;03m# MLP.\u001b[39;00m\n\u001b[32m 565\u001b[39m mlp_output = \u001b[38;5;28mself\u001b[39m.mlp(layernorm_output)\n",
596
+ "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1949\u001b[39m, in \u001b[36mModule.__getattr__\u001b[39m\u001b[34m(self, name)\u001b[39m\n\u001b[32m 1944\u001b[39m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks = OrderedDict()\n\u001b[32m 1946\u001b[39m \u001b[38;5;66;03m# It is crucial that the return type is not annotated as `Any`, otherwise type checking\u001b[39;00m\n\u001b[32m 1947\u001b[39m \u001b[38;5;66;03m# on `torch.nn.Module` and all its subclasses is largely disabled as a result. See:\u001b[39;00m\n\u001b[32m 1948\u001b[39m \u001b[38;5;66;03m# https://github.com/pytorch/pytorch/pull/115074\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1949\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__getattr__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name: \u001b[38;5;28mstr\u001b[39m) -> Union[Tensor, \u001b[33m\"\u001b[39m\u001b[33mModule\u001b[39m\u001b[33m\"\u001b[39m]:\n\u001b[32m 1950\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m_parameters\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.\u001b[34m__dict__\u001b[39m:\n\u001b[32m 1951\u001b[39m _parameters = \u001b[38;5;28mself\u001b[39m.\u001b[34m__dict__\u001b[39m[\u001b[33m\"\u001b[39m\u001b[33m_parameters\u001b[39m\u001b[33m\"\u001b[39m]\n",
597
+ "\u001b[31mKeyboardInterrupt\u001b[39m: "
598
+ ]
599
+ }
600
+ ],
601
+ "source": [
602
+ "class ModifiedTrainer(Trainer):\n",
603
+ " def compute_loss(self, model, inputs):\n",
604
+ " return model(\n",
605
+ " input_ids=inputs[\"input_ids\"],\n",
606
+ " labels=inputs[\"labels\"],\n",
607
+ " ).loss\n",
608
+ "\n",
609
+ " def prediction_step(self, model: torch.nn.Module, inputs, prediction_loss_only: bool, ignore_keys = None):\n",
610
+ " with torch.no_grad():\n",
611
+ " res = model(\n",
612
+ " input_ids=inputs[\"input_ids\"].to(model.device),\n",
613
+ " labels=inputs[\"labels\"].to(model.device),\n",
614
+ " ).loss\n",
615
+ " return (res, None, None)\n",
616
+ "\n",
617
+ " def save_model(self, output_dir=None, _internal_call=False):\n",
618
+ " from transformers.trainer import TRAINING_ARGS_NAME\n",
619
+ "\n",
620
+ " os.makedirs(output_dir, exist_ok=True)\n",
621
+ " torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))\n",
622
+ " saved_params = {\n",
623
+ " k: v.to(\"cpu\") for k, v in self.model.named_parameters() if v.requires_grad\n",
624
+ " }\n",
625
+ " torch.save(saved_params, os.path.join(output_dir, \"adapter_model.bin\"))\n",
626
+ "\n",
627
+ "def data_collator(features: list) -> dict:\n",
628
+ " len_ids = [len(feature[\"input_ids\"]) for feature in features]\n",
629
+ " longest = max(len_ids)\n",
630
+ " input_ids = []\n",
631
+ " labels_list = []\n",
632
+ " for ids_l, feature in sorted(zip(len_ids, features), key=lambda x: -x[0]):\n",
633
+ " ids = feature[\"input_ids\"]\n",
634
+ " seq_len = feature[\"seq_len\"]\n",
635
+ " labels = (\n",
636
+ " [tokenizer.pad_token_id] * (seq_len - 1) + ids[(seq_len - 1) :] + [tokenizer.pad_token_id] * (longest - ids_l)\n",
637
+ " )\n",
638
+ " ids = ids + [tokenizer.pad_token_id] * (longest - ids_l)\n",
639
+ " _ids = torch.LongTensor(ids)\n",
640
+ " labels_list.append(torch.LongTensor(labels))\n",
641
+ " input_ids.append(_ids)\n",
642
+ " input_ids = torch.stack(input_ids)\n",
643
+ " labels = torch.stack(labels_list)\n",
644
+ " return {\n",
645
+ " \"input_ids\": input_ids,\n",
646
+ " \"labels\": labels,\n",
647
+ " }\n",
648
+ "\n",
649
+ "from torch.utils.tensorboard import SummaryWriter\n",
650
+ "from transformers.integrations import TensorBoardCallback\n",
651
+ "\n",
652
+ "# Train\n",
653
+ "# Took about 10 compute units\n",
654
+ "# Took 1 hour to train\n",
655
+ "writer = SummaryWriter()\n",
656
+ "trainer = ModifiedTrainer(\n",
657
+ " model=model,\n",
658
+ " args=training_args, # Trainer args\n",
659
+ " train_dataset=dataset[\"train\"], # Training set\n",
660
+ " eval_dataset=dataset[\"test\"], # Testing set\n",
661
+ " data_collator=data_collator, # Data Collator\n",
662
+ " callbacks=[TensorBoardCallback(writer)],\n",
663
+ ")\n",
664
+ "trainer.train()\n",
665
+ "writer.close()\n",
666
+ "# save model\n",
667
+ "model.save_pretrained(training_args.output_dir)"
668
+ ]
669
+ },
670
+ {
671
+ "cell_type": "markdown",
672
+ "metadata": {},
673
+ "source": [
674
+ "## Part 5: Inference and Benchmarks using FinGPT"
675
+ ]
676
+ },
677
+ {
678
+ "cell_type": "markdown",
679
+ "metadata": {},
680
+ "source": [
681
+ "### 5.1 Load the model"
682
+ ]
683
+ },
684
+ {
685
+ "cell_type": "code",
686
+ "execution_count": 4,
687
+ "metadata": {},
688
+ "outputs": [
689
+ {
690
+ "name": "stdout",
691
+ "output_type": "stream",
692
+ "text": [
693
+ "Path exists.\n"
694
+ ]
695
+ },
696
+ {
697
+ "data": {
698
+ "application/vnd.jupyter.widget-view+json": {
699
+ "model_id": "014c4259e386457ca0892de97c5a8ec3",
700
+ "version_major": 2,
701
+ "version_minor": 0
702
+ },
703
+ "text/plain": [
704
+ "Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]"
705
+ ]
706
+ },
707
+ "metadata": {},
708
+ "output_type": "display_data"
709
+ }
710
+ ],
711
+ "source": [
712
+ "import sys\n",
713
+ "import os\n",
714
+ "\n",
715
+ "current_working_directory = os.getcwd()\n",
716
+ "\n",
717
+ "finnlp_path = os.path.join(current_working_directory, 'FinNLP')\n",
718
+ "sys.path.append(finnlp_path)\n",
719
+ "\n",
720
+ "from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
721
+ "\n",
722
+ "from peft import PeftModel\n",
723
+ "import torch\n",
724
+ "\n",
725
+ "# # Load benchmark datasets from FinNLP\n",
726
+ "# from finnlp.benchmarks.fpb import test_fpb\n",
727
+ "# from finnlp.benchmarks.fiqa import test_fiqa , add_instructions\n",
728
+ "# from finnlp.benchmarks.tfns import test_tfns\n",
729
+ "# from finnlp.benchmarks.nwgi import test_nwgi\n",
730
+ "from fpb import test_fpb\n",
731
+ "from fiqa import test_fiqa, add_instructions\n",
732
+ "from tfns import test_tfns\n",
733
+ "from nwgi import test_nwgi\n",
734
+ "\n",
735
+ "# load model from google drive\n",
736
+ "# from google.colab import drive\n",
737
+ "# drive.mount('/content/drive')\n",
738
+ "\n",
739
+ "\n",
740
+ "# Define the path you want to check\n",
741
+ "path_to_check = \"./finetuned_model_bak\"\n",
742
+ "\n",
743
+ "# Check if the specified path exists\n",
744
+ "if os.path.exists(path_to_check):\n",
745
+ " print(\"Path exists.\")\n",
746
+ "else:\n",
747
+ " print(\"Path does not exist.\")\n",
748
+ "\n",
749
+ "# ## load the chatglm2-6b base model\n",
750
+ "# peft_model = training_args.output_dir\n",
751
+ "\n",
752
+ "# tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)\n",
753
+ "# model = AutoModel.from_pretrained(base_model, trust_remote_code=True, load_in_8bit=True, device_map=\"auto\")\n",
754
+ "\n",
755
+ "# model = PeftModel.from_pretrained(model, peft_model)\n",
756
+ "\n",
757
+ "# model = model.eval()\n",
758
+ "\n",
759
+ "# # load our finetuned model\n",
760
+ "base_model = \"THUDM/chatglm2-6b\"\n",
761
+ "peft_model = \"./finetuned_model_bak\"\n",
762
+ "\n",
763
+ "# Quantization\n",
764
+ "q_config = BitsAndBytesConfig(load_in_4bit=True,\n",
765
+ " bnb_4bit_quant_type='nf4',\n",
766
+ " bnb_4bit_use_double_quant=True,\n",
767
+ " bnb_4bit_compute_dtype=torch.float16\n",
768
+ " )\n",
769
+ "\n",
770
+ "tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)\n",
771
+ "model = AutoModel.from_pretrained(base_model, trust_remote_code=True, quantization_config=q_config, device_map=\"cuda\")\n",
772
+ "\n",
773
+ "model = PeftModel.from_pretrained(model, peft_model)\n",
774
+ "model = model.eval()"
775
+ ]
776
+ },
777
+ {
778
+ "cell_type": "markdown",
779
+ "metadata": {},
780
+ "source": [
781
+ "5.2 Run Benchmarks:"
782
+ ]
783
+ },
784
+ {
785
+ "cell_type": "code",
786
+ "execution_count": 5,
787
+ "metadata": {},
788
+ "outputs": [
789
+ {
790
+ "name": "stdout",
791
+ "output_type": "stream",
792
+ "text": [
793
+ "\n",
794
+ "\n",
795
+ "Prompt example:\n",
796
+ "Instruction: What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}.\n",
797
+ "Input: $ALLY - Ally Financial pulls outlook https://t.co/G9Zdi1boy5\n",
798
+ "Answer: \n",
799
+ "\n",
800
+ "\n",
801
+ "Total len: 2388. Batchsize: 8. Total steps: 299\n"
802
+ ]
803
+ },
804
+ {
805
+ "name": "stderr",
806
+ "output_type": "stream",
807
+ "text": [
808
+ "100%|██████████| 299/299 [00:48<00:00, 6.23it/s]"
809
+ ]
810
+ },
811
+ {
812
+ "name": "stdout",
813
+ "output_type": "stream",
814
+ "text": [
815
+ "Acc: 0.8756281407035176. F1 macro: 0.8401912464851741. F1 micro: 0.8756281407035176. F1 weighted (BloombergGPT): 0.8753926635410131. \n"
816
+ ]
817
+ },
818
+ {
819
+ "name": "stderr",
820
+ "output_type": "stream",
821
+ "text": [
822
+ "\n"
823
+ ]
824
+ }
825
+ ],
826
+ "source": [
827
+ "batch_size = 8\n",
828
+ "\n",
829
+ "# TFNS Test Set, len 2388\n",
830
+ "# Available: 84.85 compute units\n",
831
+ "res = test_tfns(model, tokenizer, batch_size = batch_size)\n",
832
+ "# Available: 83.75 compute units\n",
833
+ "# Took about 1 compute unite to inference\n",
834
+ "\n",
835
+ "\n",
836
+ "# FPB, len 1212\n",
837
+ "# res = test_fpb(model, tokenizer, batch_size = batch_size)\n",
838
+ "\n",
839
+ "# FiQA, len 275\n",
840
+ "# res = test_fiqa(model, tokenizer, prompt_fun = add_instructions, batch_size = batch_size)\n",
841
+ "\n",
842
+ "# NWGI, len 4047\n",
843
+ "# res = test_nwgi(model, tokenizer, batch_size = batch_size)"
844
+ ]
845
+ }
846
+ ],
847
+ "metadata": {
848
+ "kernelspec": {
849
+ "display_name": "fingpt-env",
850
+ "language": "python",
851
+ "name": "python3"
852
+ },
853
+ "language_info": {
854
+ "codemirror_mode": {
855
+ "name": "ipython",
856
+ "version": 3
857
+ },
858
+ "file_extension": ".py",
859
+ "mimetype": "text/x-python",
860
+ "name": "python",
861
+ "nbconvert_exporter": "python",
862
+ "pygments_lexer": "ipython3",
863
+ "version": "3.11.13"
864
+ }
865
+ },
866
+ "nbformat": 4,
867
+ "nbformat_minor": 2
868
+ }
FinNLP/.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ /demo/chatgpt-trading/token_.py
2
+ demo/chatgpt-trading/token_.py
3
+ */token_.py
4
+ *token_.py
5
+
6
+ */__pycache__/*
7
+ *__pycache__*
FinNLP/.gitmodules ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [submodule "stocknet-dataset"]
2
+ path = stocknet-dataset
3
+ url = https://github.com/yumoxu/stocknet-dataset.git
4
+ [submodule "Astock"]
5
+ path = Astock
6
+ url = https://github.com/JinanZou/Astock.git
FinNLP/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 AI4Finance Foundation Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
FinNLP/README.md ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <img align="center" width="30%" alt="image" src="https://github.com/AI4Finance-Foundation/FinGPT/assets/31713746/e0371951-1ce1-488e-aa25-0992dafcc139">
3
+ </div>
4
+
5
+ # FinNLP: Internet-scale Financial Data
6
+
7
+ [![Downloads](https://static.pepy.tech/badge/finnlp)]([https://pepy.tech/project/finnlp](https://pepy.tech/project/finnlp))
8
+ [![Downloads](https://static.pepy.tech/badge/finnlp/week)](https://pepy.tech/project/finnlp)
9
+ [![Python 3.8](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/)
10
+ [![PyPI](https://img.shields.io/pypi/v/finnlp.svg)](https://pypi.org/project/finnlp/)
11
+ ![License](https://img.shields.io/github/license/AI4Finance-Foundation/finnlp.svg?color=brightgreen)
12
+
13
+ FinNLP provides a playground for all people interested in LLMs and NLP in Finance. Here we provide full pipelines for LLM training and finetuning in the field of finance.
14
+
15
+ ![Visitors](https://api.visitorbadge.io/api/VisitorHit?user=AI4Finance-Foundation&repo=FinNLP&countColor=%23B17A)
16
+
17
+
18
+ ## Ⅰ. How to Use
19
+
20
+ ### 1. News
21
+
22
+ * US
23
+
24
+ ``` python
25
+ # Finnhub (Yahoo Finance, Reuters, SeekingAlpha, CNBC...)
26
+ from finnlp.data_sources.news.finnhub_date_range import Finnhub_Date_Range
27
+
28
+ start_date = "2023-01-01"
29
+ end_date = "2023-01-03"
30
+ config = {
31
+ "use_proxy": "us_free", # use proxies to prvent ip blocking
32
+ "max_retry": 5,
33
+ "proxy_pages": 5,
34
+ "token": "YOUR_FINNHUB_TOKEN" # Available at https://finnhub.io/dashboard
35
+ }
36
+
37
+ news_downloader = Finnhub_Date_Range(config) # init
38
+ news_downloader.download_date_range_stock(start_date,end_date) # Download headers
39
+ news_downloader.gather_content() # Download contents
40
+ df = news_downloader.dataframe
41
+ selected_columns = ["headline", "content"]
42
+ df[selected_columns].head(10)
43
+
44
+ --------------------
45
+
46
+ # headline content
47
+ # 0 My 26-Stock $349k Portfolio Gets A Nice Petrob... Home\nInvesting Strategy\nPortfolio Strategy\n...
48
+ # 1 Apple’s Market Cap Slides Below $2 Trillion fo... Error
49
+ # 2 US STOCKS-Wall St starts the year with a dip; ... (For a Reuters live blog on U.S., UK and Europ...
50
+ # 3 Buy 4 January Dogs Of The Dow, Watch 4 More Home\nDividends\nDividend Quick Picks\nBuy 4 J...
51
+ # 4 Apple's stock market value falls below $2 tril... Jan 3 (Reuters) - Apple Inc's \n(AAPL.O)\n sto...
52
+ # 5 CORRECTED-UPDATE 1-Apple's stock market value ... Jan 3 (Reuters) - Apple Inc's \n(AAPL.O)\n sto...
53
+ # 6 Apple Stock Falls Amid Report Of Product Order... Apple stock got off to a slow start in 2023 as...
54
+ # 7 US STOCKS-Wall St starts the year with a dip; ... Summary\nCompanies\nTesla shares plunge on Q4 ...
55
+ # 8 More than $1 trillion wiped off value of Apple... apple store\nMore than $1 trillion has been wi...
56
+ # 9 McLean's Iridium inks agreement to put its sat... The company hasn't named its partner, but it's...
57
+ ```
58
+
59
+
60
+
61
+ * China
62
+
63
+ ``` python
64
+ # Sina Finance
65
+ from finnlp.data_sources.news.sina_finance_date_range import Sina_Finance_Date_Range
66
+
67
+ start_date = "2016-01-01"
68
+ end_date = "2016-01-02"
69
+ config = {
70
+ "use_proxy": "china_free", # use proxies to prvent ip blocking
71
+ "max_retry": 5,
72
+ "proxy_pages": 5,
73
+ }
74
+
75
+ news_downloader = Sina_Finance_Date_Range(config) # init
76
+ news_downloader.download_date_range_all(start_date,end_date) # Download headers
77
+ news_downloader.gather_content() # Download contents
78
+ df = news_downloader.dataframe
79
+ selected_columns = ["title", "content"]
80
+ df[selected_columns].head(10)
81
+
82
+ --------------------
83
+
84
+ # title content
85
+ # 0 分析师:伊朗重回国际原油市场无法阻止 新浪美股讯 北京时间1月1日晚CNBC称,加拿大皇家银行(RBC)分析师Helima Cro...
86
+ # 1 FAA:波音767的逃生扶梯存在缺陷 新浪美股讯 北京时间1日晚,美国联邦航空局(FAA)要求航空公司对波音767机型的救生扶梯进...
87
+ # 2 非制造业新订单指数创新高 需求回升力度明显 中新社北京1月1日电 (记者 刘长忠)记者1日从中国物流与采购联合会获悉,在最新发布的201...
88
+ # 3 雷曼兄弟针对大和证券提起索赔诉讼 新浪美股讯 北京时间1日下午共同社称,2008年破产的美国金融巨头雷曼兄弟公司的清算法人日前...
89
+ # 4 国内钢铁PMI有所回升 钢市低迷形势有所改善 新华社上海1月1日专电(记者李荣)据中物联钢铁物流专业委员会1日发布的指数报告,2015年1...
90
+ # 5 马息岭凸显朝鲜旅游体育战略 新浪美股北京时间1日讯 三位单板滑雪手将成为最早拜访马息岭滑雪场的西方专业运动员,他们本月就...
91
+ # 6 五洲船舶破产清算 近十年来首现国有船厂倒闭 (原标题:中国首家国有船厂破产���闭)\n低迷的中国造船市场,多年来首次出现国有船厂破产清算的...
92
+ # 7 过半城市房价环比上涨 百城住宅均价加速升温 资料图。中新社记者 武俊杰 摄\n中新社北京1月1日电 (记者 庞无忌)中国房地产市场在20...
93
+ # 8 经济学人:巴西病根到底在哪里 新浪美股北京时间1日讯 原本,巴西人是该高高兴兴迎接2016年的。8月间,里约热内卢将举办南...
94
+ # 9 中国首家国有船厂破产倒闭:五洲船舶目前已停工 低迷的中国造船市场,多年来首次出现国有船厂破产清算的一幕。浙江海运集团旗下的五洲船舶修造公司...
95
+
96
+ # Eastmoney 东方财富
97
+ from finnlp.data_sources.news.eastmoney_streaming import Eastmoney_Streaming
98
+
99
+ pages = 3
100
+ stock = "600519"
101
+ config = {
102
+ "use_proxy": "china_free",
103
+ "max_retry": 5,
104
+ "proxy_pages": 5,
105
+ }
106
+
107
+ news_downloader = Eastmoney_Streaming(config)
108
+ news_downloader.download_streaming_stock(stock,pages)
109
+ df = news_downloader.dataframe
110
+ selected_columns = ["title", "create time"]
111
+ df[selected_columns].head(10)
112
+
113
+ --------------------
114
+
115
+ # title create time
116
+ # 0 茅台2022年报的12个小秘密 04-09 19:40
117
+ # 1 东北证券维持贵州茅台买入评级 预计2023年净利润同比 04-09 11:24
118
+ # 2 贵州茅台:融资余额169.34亿元,创近一年新低(04-07 04-08 07:30
119
+ # 3 贵州茅台:融资净买入1248.48万元,融资余额169.79亿 04-07 07:28
120
+ # 4 贵州茅台公益基金会正式成立 04-06 12:29
121
+ # 5 贵州茅台04月04日获沪股通增持19.55万股 04-05 07:48
122
+ # 6 贵州茅台:融资余额169.66亿元,创近一年新低(04-04 04-05 07:30
123
+ # 7 4月4日北向资金最新动向(附十大成交股) 04-04 18:48
124
+ # 8 大宗交易:贵州茅台成交235.9万元,成交价1814.59元( 04-04 17:21
125
+ # 9 第一上海证券维持贵州茅台买入评级 目标价2428.8元 04-04 09:30
126
+ ```
127
+
128
+ ### 2. Social Media
129
+
130
+ * US
131
+
132
+ ``` python
133
+ # Stocktwits
134
+ from finnlp.data_sources.social_media.stocktwits_streaming import Stocktwits_Streaming
135
+
136
+ pages = 3
137
+ stock = "AAPL"
138
+ config = {
139
+ "use_proxy": "us_free",
140
+ "max_retry": 5,
141
+ "proxy_pages": 2,
142
+ }
143
+
144
+ downloader = Stocktwits_Streaming(config)
145
+ downloader.download_date_range_stock(stock, pages)
146
+ selected_columns = ["created_at", "body"]
147
+ downloader.dataframe[selected_columns].head(10)
148
+
149
+ --------------------
150
+
151
+ # created_at body
152
+ # 0 2023-04-07T15:24:22Z NANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL...
153
+ # 1 2023-04-07T15:17:43Z $AAPL $SPY \n \nhttps://amp.scmp.com/news/chi...
154
+ # 2 2023-04-07T15:17:25Z $AAPL $GOOG $AMZN I took a Trump today. \n\nH...
155
+ # 3 2023-04-07T15:16:54Z $SPY $AAPL will take this baby down, time for ...
156
+ # 4 2023-04-07T15:11:37Z $SPY $3T it ALREADY DID - look at the pre-COV...
157
+ # 5 2023-04-07T15:10:29Z $AAPL $QQQ $STUDY We are on to the next one! A...
158
+ # 6 2023-04-07T15:06:00Z $AAPL was analyzed by 48 analysts. The buy con...
159
+ # 7 2023-04-07T14:54:29Z $AAPL both retiring. \n \nCraig....
160
+ # 8 2023-04-07T14:40:06Z $SPY $QQQ $TSLA $AAPL SPY 500 HAS STARTED🚀😍 BI...
161
+ # 9 2023-04-07T14:38:57Z Nancy 🩵 (Tim) $AAPL
162
+ ```
163
+
164
+ ``` python
165
+ # Reddit Wallstreetbets
166
+ from finnlp.data_sources.social_media.reddit_streaming import Reddit_Streaming
167
+
168
+ pages = 3
169
+ config = {
170
+ "use_proxy": "us_free",
171
+ "max_retry": 5,
172
+ "proxy_pages": 2,
173
+ }
174
+
175
+ downloader = Reddit_Streaming(config)
176
+ downloader.download_streaming_all(pages)
177
+ selected_columns = ["created", "title"]
178
+ downloader.dataframe[selected_columns].head(10)
179
+
180
+ --------------------
181
+
182
+ # created title
183
+ # 0 2023-04-07 15:39:34 Y’all making me feel like spooderman
184
+ # 1 2022-12-21 04:09:42 Do you track your investments in a spreadsheet...
185
+ # 2 2022-12-21 04:09:42 Do you track your investments in a spreadsheet...
186
+ # 3 2023-04-07 15:29:23 Can a Blackberry holder get some help 🥺
187
+ # 4 2023-04-07 14:49:55 The week of CPI and FOMC Minutes… 4-6-23 SPY/ ...
188
+ # 5 2023-04-07 14:19:22 Well let’s hope your job likes you, thanks Jerome
189
+ # 6 2023-04-07 14:06:32 Does anyone else feel an overwhelming sense of...
190
+ # 7 2023-04-07 13:47:59 Watermarked Jesus explains the market being cl...
191
+ # 8 2023-04-07 13:26:23 Jobs report shows 236,000 gain in March. Hot l...
192
+ # 9 2023-04-07 13:07:15 The recession is over! Let's buy more stocks!
193
+ ```
194
+
195
+ * China (Weibo)
196
+
197
+ ``` python
198
+ # Weibo
199
+ from finnlp.data_sources.social_media.weibo_date_range import Weibo_Date_Range
200
+
201
+ start_date = "2016-01-01"
202
+ end_date = "2016-01-02"
203
+ stock = "茅台"
204
+ config = {
205
+ "use_proxy": "china_free",
206
+ "max_retry": 5,
207
+ "proxy_pages": 5,
208
+ "cookies": "Your_Login_Cookies",
209
+ }
210
+
211
+ downloader = Weibo_Date_Range(config)
212
+ downloader.download_date_range_stock(start_date, end_date, stock = stock)
213
+ df = downloader.dataframe
214
+ df = df.drop_duplicates()
215
+ selected_columns = ["date", "content"]
216
+ df[selected_columns].head(10)
217
+
218
+ --------------------
219
+
220
+ # date content
221
+ # 0 2016-01-01 #舆论之锤#唯品会发声明证实销售假茅台-手机腾讯网O网页链接分享来自浏览器!
222
+ # 2 2016-01-01 2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原...
223
+ # 6 2016-01-01 2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原...
224
+ # 17 2016-01-01 开心,今天喝了两斤酒(茅台+扎二)三个人,开心!
225
+ # 18 2016-01-01 一家专卖假货的网站某宝,你该学学了!//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品...
226
+ # 19 2016-01-01 一家专卖假货的网站//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品会售假茅台:供货商...
227
+ # 20 2016-01-01 前几天说了几点不看好茅台的理由,今年过节喝点茅台支持下,个人口感,茅台比小五好喝,茅台依然是...
228
+ # 21 2016-01-01 老杜酱酒已到货,从明天起正式在甘肃武威开卖。可以不相信我说的话,但一定不要怀疑@杜子建的为人...
229
+ # 22 2016-01-01 【唯品会售假茅台后续:供货商被刑拘顾客获十倍补偿】此前,有网友投诉其在唯品会购买的茅台酒质量...
230
+ # 23 2016-01-01 唯品会卖假茅台,供货商被刑拘,买家获十倍补偿8888元|此前,有网友在网络论坛发贴(唯品会宣...
231
+ ```
232
+
233
+ ### 3. Company Announcement
234
+
235
+ * US
236
+
237
+ ``` python
238
+ # SEC
239
+ from finnlp.data_sources.company_announcement.sec import SEC_Announcement
240
+
241
+ start_date = "2020-01-01"
242
+ end_date = "2020-06-01"
243
+ stock = "AAPL"
244
+ config = {
245
+ "use_proxy": "us_free",
246
+ "max_retry": 5,
247
+ "proxy_pages": 3,
248
+ }
249
+
250
+ downloader = SEC_Announcement(config)
251
+ downloader.download_date_range_stock(start_date, end_date, stock = stock)
252
+ selected_columns = ["file_date", "display_names", "content"]
253
+ downloader.dataframe[selected_columns].head(10)
254
+
255
+ --------------------
256
+
257
+ # file_date display_names content
258
+ # 0 2020-05-12 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
259
+ # 1 2020-04-30 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
260
+ # 2 2020-04-17 [O'BRIEN DEIRDRE (CIK 0001767094), Apple Inc.... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
261
+ # 3 2020-04-17 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
262
+ # 4 2020-04-09 [Maestri Luca (CIK 0001513362), Apple Inc. (... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
263
+ # 5 2020-04-03 [WILLIAMS JEFFREY E (CIK 0001496686), Apple I... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
264
+ # 6 2020-04-03 [Maestri Luca (CIK 0001513362), Apple Inc. (... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
265
+ # 7 2020-02-28 [WAGNER SUSAN (CIK 0001059235), Apple Inc. (... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
266
+ # 8 2020-02-28 [LEVINSON ARTHUR D (CIK 0001214128), Apple In... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
267
+ # 9 2020-02-28 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... SEC Form 4 \n FORM 4UNITED STATES SECURITIES...
268
+ ```
269
+
270
+ * China
271
+
272
+ ``` python
273
+ # Juchao
274
+ from finnlp.data_sources.company_announcement.juchao import Juchao_Announcement
275
+
276
+ start_date = "2020-01-01"
277
+ end_date = "2020-06-01"
278
+ stock = "000001"
279
+ config = {
280
+ "use_proxy": "china_free",
281
+ "max_retry": 5,
282
+ "proxy_pages": 3,
283
+ }
284
+
285
+ downloader = Juchao_Announcement(config)
286
+ downloader.download_date_range_stock(start_date, end_date, stock = stock, get_content = True, delate_pdf = True)
287
+ selected_columns = ["announcementTime", "shortTitle","Content"]
288
+ downloader.dataframe[selected_columns].head(10)
289
+
290
+ --------------------
291
+
292
+ # announcementTime shortTitle Content
293
+ # 0 2020-05-27 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 证券代码: 000001 证券简称:平安银行 ...
294
+ # 1 2020-05-22 2019年年度权益分派实施公告 1 证券代码: 000001 证券简称:平安银行 ...
295
+ # 2 2020-05-20 关于获准发行小微企业贷款专项金融债券的公告 证券代码: 000001 证券简称:平安银行 ...
296
+ # 3 2020-05-16 监事会决议公告 1 证券代码: 000001 证券简称: 平安银行 ...
297
+ # 4 2020-05-15 2019年年度股东大会决议公告 1 证券代码: 000001 证券简称:平安银行 ...
298
+ # 5 2020-05-15 2019年年度股东大会的法律意见书 北京总部 电话 : (86 -10) 8519 -1300 传真 : (86 -10...
299
+ # 6 2020-04-30 中信证券股份有限公司、平安证券股份有限公司关于公司关联交易有关事项的核查意见 1 中信证券股份有限公司 、平安证券股份有限 公司 关于平安银行股份有限公司 关联交易 有...
300
+ # 7 2020-04-30 独立董事独立意见 1 平安银行股份有限公��独立董事独立意见 根据《关于在上市公司建立独立董事制度的指导...
301
+ # 8 2020-04-30 关联交易公告 1 证券代码: 000001 证券简称:平安银行 ...
302
+ # 9 2020-04-21 2020年第一季度报告全文 证券代码: 000001 证券简称:平安银行 ...
303
+ ```
304
+
305
+
306
+ ## Ⅱ. Data Sources
307
+
308
+ ### 1. News
309
+
310
+ | Platform | Data Type | Related Market | Specified Company | Range Type | Limits | Support |
311
+ | :----------------------------------------------------------: | :--------: | :------------: | :----------------------------------------------------------: | :---------------: | :-------------------: | ------------------------------------------------------------ |
312
+ | Yahoo | Financial News | US Stocks | √ | Date Range | N/A | √ |
313
+ | Reuters | General News | US Stocks | × | Date Range | N/A | Soon |
314
+ | Seeking Alpha | Financial News | US Stocks | √ | Streaming | N/A | √ |
315
+ | Sina | Financial News | CN Stocks | × | Date Range | N/A | √ |
316
+ | Eastmoney | Financial News | CN Stocks | √ | Date Range | N/A | √ |
317
+ | Yicai | Financial News | CN Stocks | √ | Date Range | N/A | Soon |
318
+ | CCTV | General News | CN Stocks | × | Date Range | N/A | √ |
319
+ | US Mainstream Media | Financial News | US Stocks | √ | Date Range | Account (Free) | √ |
320
+ | CN Mainstream Media | Financial News | CN Stocks | × | Date Range | Account (¥500/year) | √ |
321
+
322
+ ### 2. Social Media
323
+
324
+ | Platform | Data Type | Related Market | Specified Company | Range Type | Source Type | Limits | Support |
325
+ | :---------------------: | :-------: | :------------: | :---------------: | :--------: | :---------: | :-----: | :-----: |
326
+ | Twitter | Tweets | US Stocks | √ | Date Range | Official | N/A | √ |
327
+ | Twitter | Sentiment | US Stocks | √ | Date Range | Third Party | N/A | √ |
328
+ | StockTwits | Tweets | US Stocks | √ | Lastest | Official | N/A | √ |
329
+ | Reddit (wallstreetbets) | Threads | US Stocks | × | Lastest | Official | N/A | √ |
330
+ | Reddit | Sentiment | US Stocks | √ | Date Range | Third Party | N/A | √ |
331
+ | Weibo | Tweets | CN Stocks | √ | Date Range | Official | Cookies | √ |
332
+ | Weibo | Tweets | CN Stocks | √ | Lastest | Official | N/A | √ |
333
+
334
+ ### 3. Company Announcement
335
+ | Platform | Data Type | Related Market | Specified Company | Range Type | Source Type | Limits | Support |
336
+ | :-----------------------: | :-------: | :------------: | :---------------: | :--------: | :---------: | :----: | :-----: |
337
+ | Juchao (Official Website) | Text | CN Stocks | √ | Date Range | Official | N/A | √ |
338
+ | SEC (Official Website) | Text | US Stocks | √ | Date Range | Official | N/A | √ |
339
+ | Sina | Text | CN Stocks | √ | Lastest | Third Party | N/A | √ |
340
+
341
+
342
+ ### 4. Data Sets
343
+ | Data Source | Type | Stocks | Dates | Available |
344
+ | :--------------: | :----: | :----: | :-------: | :--------------: |
345
+ | [AShare](https://github.com/JinanZou/Astock) | News | 3680 | 2018-07-01 to 2021-11-30 | √ |
346
+ | [stocknet-dataset](https://github.com/yumoxu/stocknet-dataset) | Tweets | 87 | 2014-01-02 to 2015-12-30 | √ |
347
+ | [CHRNN](https://github.com/wuhuizhe/CHRNN) | Tweets | 38 | 2017-01-03 to 2017-12-28 | √ |
348
+
349
+ ## Ⅲ. Large Language Models (LLMs)
350
+ * [ChatGPT (GPT 3.5)](https://openai.com/blog/chatgpt)
351
+ * [GPT 4.0](https://openai.com/research/gpt-4)
352
+ * [ChatGLM](https://github.com/THUDM/ChatGLM-6B)
353
+ * [PaLM](https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html)
354
+ * [LLaMA](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/)
355
+ * [FinBERT](https://github.com/yya518/FinBERT)
356
+ * [Hugging Face](https://huggingface.co/)
357
+
358
+ ## LICENSE
359
+
360
+ MIT License
361
+
362
+ **Disclaimer: We are sharing codes for academic purposes under the MIT education license. Nothing herein is financial advice, and NOT a recommendation to trade real money. Please use common sense and always first consult a professional before trading or investing.**
363
+
FinNLP/demo/README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Demos:
2
+
3
+ ### Ⅰ. ChatGPT Tradings
4
+
5
+ 1. [Trade with ChatGPT](https://github.com/AI4Finance-Foundation/ChatGPT-for-FinTech/tree/master/demo/chatgpt-trading-v1)
6
+ * Using the ChatGPT to give us trading suggestions.
7
+ * On [Ashare (News)](https://github.com/JinanZou/Astock) and A share Market ( `Maotai (贵州茅台 600519)` )
8
+ ![image-20230220011335859](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202302200113884.png)
9
+ 2. [Trade like ChatGPT](https://github.com/AI4Finance-Foundation/ChatGPT-for-FinTech/tree/master/demo/chatgpt-trading-v2)
10
+ * Using ChatGPT's language model, GPT-3 to create an FinRL agent that trades as smartly as ChatGPT
11
+ * On [stocknet-dataset (Tweets)](https://github.com/yumoxu/stocknet-dataset) and US Stocks Market (`AAPL`)
12
+ ![image-20230216004801458](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202302181558796.png)
13
+ ### Ⅱ. Sentiment Classify
14
+
15
+ 1. [Shares News Sentiment Classify.](https://github.com/AI4Finance-Foundation/ChatGPT-for-FinTech/blob/master/demo/shares_news_sentiment_classify.py)
FinNLP/docs/FinNLP/docs/index.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLMs in financial world and Internet-scale Financial Data
2
+
3
+ The demos are shown in [FinGPT](https://github.com/AI4Finance-Foundation/FinGPT) and the data sources and supporting codes are in [FinNLP](https://github.com/AI4Finance-Foundation/FinNLP)
4
+
5
+ 中文版请点击[这里](./zh/index.md)
6
+
7
+ **Disclaimer: We are sharing codes for academic purpose under the MIT education license. Nothing herein is financial advice, and NOT a recommendation to trade real money. Please use common sense and always first consult a professional before trading or investing.**
8
+
9
+ ## Ⅰ. Architecture
10
+
11
+ ![image-20230505200244043](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052002139.png)
12
+
13
+ * The whole project is made up of 4 parts:
14
+
15
+ * The first part is the **Data Source**, Here, we **gather past and streaming data** from the Internet.
16
+
17
+ * Next, we push the data to the **Data Engineering** part where we **clean the data, tokenize the data and do the prompt engineering**
18
+
19
+ * Then, the data is pushed to **LLMs**. Here, we may use LLMs in different kind of ways. We can not only use the collected data to train our own **light-weight fine-tuning models** but we can also use those data and **trained models** or **LLM APIs** to support our applications
20
+ * The last part would be the **application** part, here we can use data and LLMs to make many interesting applications.
21
+
22
+ ## Ⅱ. Data Sources
23
+
24
+ ![image-20230505200446477](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052004539.png)
25
+
26
+ * Due to space limitations, we only show a few of them.
27
+
28
+ ### 1. [News](jupyter/Data_Sources_News.ipynb)
29
+
30
+ | Platform | Data Type | Related Market | Specified Company | Range Type | Source Type | Limits | Docs (1e4) | Support |
31
+ | :----------------------------------------------------------: | :--------: | :------------: | :----------------------------------------------------------: | :---------------: | :--------: | :-------------------: | ------------------------------------------------------------ | ------------------------------------------------------------ |
32
+ | Yahoo | Financial News | US Stocks | √ | Date Range | Official | N/A | 1,500+ | √ |
33
+ | Reuters | Financial News | US Stocks | × | Date Range | Official | N/A | 1,500+ | √ |
34
+ | Sina | Financial News | CN Stocks | × | Date Range | Official | N/A | 2,000+ | √ |
35
+ | Eastmoney | Financial News | CN Stocks | √ | Date Range | Official | N/A | 1,000+ | √ |
36
+ | Yicai | Financial News | CN Stocks | √ | Date Range | Official | N/A | 500+ | Soon |
37
+ | CCTV | Governemnt News | CN Stocks | × | Date Range | Third party | N/A | 4 | √ |
38
+ | US Mainstream | Financial News | US Stocks | √ | Date Range | Third party | Account (Free) | 3,200+ | √ |
39
+ | CN Mainstream | Financial News | CN Stocks | × | Date Range | Third party | ¥500/year | 3000+ | √ |
40
+
41
+ * FinGPT may have **fewer docs** than Bloomberg, we're on the **same order of magnitude.**
42
+
43
+ ### 2. [Social Media](jupyter/Data_Sources_Social_Media.iypnb)
44
+
45
+ | Platform | Data Type | Related Market | Specified Company | Range Type | Source Type | Limits | Docs (1e4) | Support |
46
+ | :---------------------: | :-------: | :------------: | :---------------: | :--------: | :---------: | :-----: | ---------- | :-----: |
47
+ | Twitter | Tweets | US Stocks | √ | Date Range | Official | N/A | 18,000+ | √ |
48
+ | StockTwits | Tweets | US Stocks | √ | Lastest | Official | N/A | 160,000+ | √ |
49
+ | Reddit (wallstreetbets) | Threads | US Stocks | × | Lastest | Official | N/A | 9+ | √ |
50
+ | Weibo | Tweets | CN Stocks | √ | Date Range | Official | Cookies | 1,400,000+ | √ |
51
+ | Weibo | Tweets | CN Stocks | √ | Lastest | Official | N/A | 1,400,000+ | √ |
52
+
53
+ * In **BloomberGPT**, they **don’t collect social media data**, but we believe that **public opinion is one of the most important factors interfering the stock market.**
54
+
55
+ ### 3. [Company Announcement](jupyter/Data_Sources_Company_Announcement.ipynb)
56
+
57
+ | Platform | Data Type | Related Market | Specified Company | Range Type | Source Type | Limits | Docs (1e4) | Support |
58
+ | :-----------------------: | :-------: | :------------: | :---------------: | :--------: | :---------: | :----: | ---------- | :-----: |
59
+ | Juchao (Official Website) | Text | CN Stocks | √ | Date Range | Official | N/A | 2,790+ | √ |
60
+ | SEC (Official Website) | Text | US Stocks | √ | Date Range | Official | N/A | 1,440+ | √ |
61
+
62
+ * Since we collect data from different stock markets, we have **more filing docs** than Bloomberg GPT.
63
+
64
+ ### 4. Trends
65
+
66
+ | Platform | Data Type | Related Market | Data Source | Specified Company | Range Type | Source Type | Limits |
67
+ | :-------------------------------------------------------: | :-------: | :------------: | :-----------------------------------------------------: | :---------------: | :--------: | :---------: | :----: |
68
+ | [Google Trends](https://trends.google.com/trends/explore) | Index | US Stocks | [Google Trends](./finnlp/data_sources/trends/google.py) | √ | Date Range | Official | N/A |
69
+ | [Baidu Index](https://index.baidu.com/v2/index.html#/) | Index | CN Stocks | Soon | - | - | - | - |
70
+
71
+
72
+ ### 5. Data Sets
73
+ | Data Source | Type | Stocks | Dates | Avaliable |
74
+ | :--------------: | :----: | :----: | :-------: | :--------------: |
75
+ | [AShare](https://github.com/JinanZou/Astock) | News | 3680 | 2018-07-01 to 2021-11-30 | √ |
76
+ | [stocknet-dataset](https://github.com/yumoxu/stocknet-dataset) | Tweets | 87 | 2014-01-02 to 2015-12-30 | √ |
77
+ | [CHRNN](https://github.com/wuhuizhe/CHRNN) | Tweets | 38 | 2017-01-03 to 2017-12-28 | √ |
78
+
79
+ ## Ⅲ. Models
80
+
81
+ ![image-20230505200618504](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052006541.png)
82
+
83
+ * In data-centric NLP, we don’t train the model from the beginning. We only **call APIs** and **do light-weight fine-tunings.**
84
+ * The left part is some LLM APIs that we may use and the middle part is the models that we may use to perform fine-tunings and the right part is some of the **Fine-tuning methods**
85
+
86
+ ### 1. Fine-tuning: Tensor Layers (LoRA)
87
+
88
+ ![image-20230505200944411](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052009480.png)
89
+
90
+ * In FinGPT, we fine-tune a pre-trained LLM using a new financial dataset.**High-quality labeled data** is one of the most **important key** to many successful LLMs including ChatGPT
91
+ * However, those high-quality labeled data are often very **expensive and time-consuming** and we may need help from professional finance experts.
92
+ * If our goal is to use LLMs to analyze financial-related text data and help with quantitative trading, why not **let the market do the labeling** for us?
93
+ * So here, we use the related stock price change percent of each news as the output label, we use the threshold to split the label into three groups **positive, negative, and neutral,** and use them and the **label of the news sentiment**.
94
+ * In correspondence, we also ask the model to select one of positive, negative, and neutral as the output in the **prompt engineer** part so we the make the best use of the pre-trained information
95
+ * By using LoRA we may reduced the trainable parameters **from 6.17B to 3.67M**
96
+ * As the table presents, compared with chatGLM, FinGPT can achieve large improvement on multiple metrics. it may be **inappropriate** to **use our model to quantitative trading directly.** Since most **news titles are neutral**, most of the **original outputs of the LLMs are Neutral**, so LLM **perform poorly in positive and negative labels** and **those** **labels** are what might be **useful in quantitative trading.**
97
+ * However, **after fine-tuning**, we have witness **huge improvements in the prediction of** **positive and negative labels.**
98
+ * That’s also **why the model can achieve positive trading results**.
99
+
100
+ ### 2. Fine-tuning: Reinforcement Learning on Stock Prices (RLSP)
101
+
102
+ ![image-20230505201209946](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052012996.png)
103
+
104
+ * In the same way, we may use RL on Stock Prices (RLSP) to replace RL on Human feedback used by ChatGPT.
105
+
106
+ ## Ⅳ. Applications
107
+
108
+ ### 1. Robo Advisor
109
+
110
+ ![image-20230505201913233](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052019296.png)
111
+
112
+ * **ChatGPT can make the investment advises just like a pro**.
113
+ * In this example the **raising stock price** of the Apple is **in accordance with** ChatGPT’s **prediction made by the analysis of news**
114
+
115
+ ### 2. Quantitative Trading
116
+
117
+ ![image-20230505201841001](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052018035.png)
118
+
119
+ * We may also use News, Social media tweet or filing to **build sentiment factors**, the right part is the trading results just by the signal of the twitter tweets and ChatGPT, the data is from a data set called [stocknet-dataset](https://link.zhihu.com/?target=https%3A//github.com/yumoxu/stocknet-dataset).
120
+ * As you may see from the picture, the trading signals generated by ChatGPT are **so good** that we may **even achieve good results just by trading according to twitter sentiment factors.**
121
+ * So we may even **achieve better results by combining price factors**.
122
+
123
+ ### 3. Low-code development
124
+
125
+ ![image-20230505202028292](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052020363.png)
126
+
127
+ * We can use the help of LLMs to write codes.
128
+ * The right part shows how we can develop our factors and other codes **quickly and efficiently.**
FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_Company_Announcement.ipynb ADDED
@@ -0,0 +1,783 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import sys\n",
10
+ "sys.path.append(\"../../FinNLP\")"
11
+ ]
12
+ },
13
+ {
14
+ "attachments": {},
15
+ "cell_type": "markdown",
16
+ "metadata": {},
17
+ "source": [
18
+ "### SEC"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 2,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "from finnlp.data_sources.company_announcement.sec import SEC_Announcement"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 3,
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "start_date = \"2020-01-01\"\n",
37
+ "end_date = \"2020-06-01\"\n",
38
+ "stock = \"AAPL\"\n",
39
+ "config = {\n",
40
+ " \"use_proxy\": \"us_free\",\n",
41
+ " \"max_retry\": 5,\n",
42
+ " \"proxy_pages\": 3,\n",
43
+ "}\n"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 4,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "name": "stderr",
53
+ "output_type": "stream",
54
+ "text": [
55
+ "Checking ips: 100%|██████████| 45/45 [01:42<00:00, 2.28s/it]\n"
56
+ ]
57
+ },
58
+ {
59
+ "name": "stdout",
60
+ "output_type": "stream",
61
+ "text": [
62
+ "Get proxy ips: 45.\n",
63
+ "Usable proxy ips: 44.\n"
64
+ ]
65
+ },
66
+ {
67
+ "name": "stderr",
68
+ "output_type": "stream",
69
+ "text": [
70
+ "Downloading by item...: 100%|██████████| 39/39 [01:39<00:00, 2.54s/it]\n"
71
+ ]
72
+ }
73
+ ],
74
+ "source": [
75
+ "downloader = SEC_Announcement(config)\n",
76
+ "downloader.download_date_range_stock(start_date, end_date, stock = stock)"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 7,
82
+ "metadata": {},
83
+ "outputs": [
84
+ {
85
+ "data": {
86
+ "text/html": [
87
+ "<div>\n",
88
+ "<style scoped>\n",
89
+ " .dataframe tbody tr th:only-of-type {\n",
90
+ " vertical-align: middle;\n",
91
+ " }\n",
92
+ "\n",
93
+ " .dataframe tbody tr th {\n",
94
+ " vertical-align: top;\n",
95
+ " }\n",
96
+ "\n",
97
+ " .dataframe thead th {\n",
98
+ " text-align: right;\n",
99
+ " }\n",
100
+ "</style>\n",
101
+ "<table border=\"1\" class=\"dataframe\">\n",
102
+ " <thead>\n",
103
+ " <tr style=\"text-align: right;\">\n",
104
+ " <th></th>\n",
105
+ " <th>_id</th>\n",
106
+ " <th>ciks</th>\n",
107
+ " <th>period_ending</th>\n",
108
+ " <th>root_form</th>\n",
109
+ " <th>file_num</th>\n",
110
+ " <th>display_names</th>\n",
111
+ " <th>xsl</th>\n",
112
+ " <th>sequence</th>\n",
113
+ " <th>file_date</th>\n",
114
+ " <th>biz_states</th>\n",
115
+ " <th>sics</th>\n",
116
+ " <th>form</th>\n",
117
+ " <th>adsh</th>\n",
118
+ " <th>film_num</th>\n",
119
+ " <th>biz_locations</th>\n",
120
+ " <th>file_type</th>\n",
121
+ " <th>file_description</th>\n",
122
+ " <th>inc_states</th>\n",
123
+ " <th>ite</th>\n",
124
+ " <th>content</th>\n",
125
+ " </tr>\n",
126
+ " </thead>\n",
127
+ " <tbody>\n",
128
+ " <tr>\n",
129
+ " <th>0</th>\n",
130
+ " <td>0000320193-20-000056:wf-form4_158932261319105.xml</td>\n",
131
+ " <td>[0001631982, 0000320193]</td>\n",
132
+ " <td>2020-05-08</td>\n",
133
+ " <td>4</td>\n",
134
+ " <td>[]</td>\n",
135
+ " <td>[KONDO CHRIS (CIK 0001631982), Apple Inc. (A...</td>\n",
136
+ " <td>xslF345X03</td>\n",
137
+ " <td>1</td>\n",
138
+ " <td>2020-05-12</td>\n",
139
+ " <td>[]</td>\n",
140
+ " <td>[3571]</td>\n",
141
+ " <td>4</td>\n",
142
+ " <td>0000320193-20-000056</td>\n",
143
+ " <td>[]</td>\n",
144
+ " <td>[, ]</td>\n",
145
+ " <td>4</td>\n",
146
+ " <td>FORM 4</td>\n",
147
+ " <td>[, CA, ]</td>\n",
148
+ " <td>[]</td>\n",
149
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
150
+ " </tr>\n",
151
+ " <tr>\n",
152
+ " <th>1</th>\n",
153
+ " <td>0000320193-20-000054:wf-form4_158829658358801.xml</td>\n",
154
+ " <td>[0001051401, 0000320193]</td>\n",
155
+ " <td>2020-04-28</td>\n",
156
+ " <td>4</td>\n",
157
+ " <td>[001-36743]</td>\n",
158
+ " <td>[JUNG ANDREA (CIK 0001051401), Apple Inc. (A...</td>\n",
159
+ " <td>xslF345X03</td>\n",
160
+ " <td>1</td>\n",
161
+ " <td>2020-04-30</td>\n",
162
+ " <td>[CA]</td>\n",
163
+ " <td>[3571]</td>\n",
164
+ " <td>4</td>\n",
165
+ " <td>0000320193-20-000054</td>\n",
166
+ " <td>[20838087]</td>\n",
167
+ " <td>[, Cupertino, CA]</td>\n",
168
+ " <td>4</td>\n",
169
+ " <td>FORM 4</td>\n",
170
+ " <td>[, CA]</td>\n",
171
+ " <td>[]</td>\n",
172
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
173
+ " </tr>\n",
174
+ " </tbody>\n",
175
+ "</table>\n",
176
+ "</div>"
177
+ ],
178
+ "text/plain": [
179
+ " _id \\\n",
180
+ "0 0000320193-20-000056:wf-form4_158932261319105.xml \n",
181
+ "1 0000320193-20-000054:wf-form4_158829658358801.xml \n",
182
+ "\n",
183
+ " ciks period_ending root_form file_num \\\n",
184
+ "0 [0001631982, 0000320193] 2020-05-08 4 [] \n",
185
+ "1 [0001051401, 0000320193] 2020-04-28 4 [001-36743] \n",
186
+ "\n",
187
+ " display_names xsl sequence \\\n",
188
+ "0 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... xslF345X03 1 \n",
189
+ "1 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... xslF345X03 1 \n",
190
+ "\n",
191
+ " file_date biz_states sics form adsh film_num \\\n",
192
+ "0 2020-05-12 [] [3571] 4 0000320193-20-000056 [] \n",
193
+ "1 2020-04-30 [CA] [3571] 4 0000320193-20-000054 [20838087] \n",
194
+ "\n",
195
+ " biz_locations file_type file_description inc_states ite \\\n",
196
+ "0 [, ] 4 FORM 4 [, CA, ] [] \n",
197
+ "1 [, Cupertino, CA] 4 FORM 4 [, CA] [] \n",
198
+ "\n",
199
+ " content \n",
200
+ "0 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
201
+ "1 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... "
202
+ ]
203
+ },
204
+ "execution_count": 7,
205
+ "metadata": {},
206
+ "output_type": "execute_result"
207
+ }
208
+ ],
209
+ "source": [
210
+ "df = downloader.dataframe\n",
211
+ "# df = df.drop_duplicates()\n",
212
+ "df.head(2)"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": 8,
218
+ "metadata": {},
219
+ "outputs": [
220
+ {
221
+ "data": {
222
+ "text/plain": [
223
+ "(21, 20)"
224
+ ]
225
+ },
226
+ "execution_count": 8,
227
+ "metadata": {},
228
+ "output_type": "execute_result"
229
+ }
230
+ ],
231
+ "source": [
232
+ "df.shape"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": 9,
238
+ "metadata": {},
239
+ "outputs": [
240
+ {
241
+ "data": {
242
+ "text/html": [
243
+ "<div>\n",
244
+ "<style scoped>\n",
245
+ " .dataframe tbody tr th:only-of-type {\n",
246
+ " vertical-align: middle;\n",
247
+ " }\n",
248
+ "\n",
249
+ " .dataframe tbody tr th {\n",
250
+ " vertical-align: top;\n",
251
+ " }\n",
252
+ "\n",
253
+ " .dataframe thead th {\n",
254
+ " text-align: right;\n",
255
+ " }\n",
256
+ "</style>\n",
257
+ "<table border=\"1\" class=\"dataframe\">\n",
258
+ " <thead>\n",
259
+ " <tr style=\"text-align: right;\">\n",
260
+ " <th></th>\n",
261
+ " <th>file_date</th>\n",
262
+ " <th>display_names</th>\n",
263
+ " <th>content</th>\n",
264
+ " </tr>\n",
265
+ " </thead>\n",
266
+ " <tbody>\n",
267
+ " <tr>\n",
268
+ " <th>0</th>\n",
269
+ " <td>2020-05-12</td>\n",
270
+ " <td>[KONDO CHRIS (CIK 0001631982), Apple Inc. (A...</td>\n",
271
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
272
+ " </tr>\n",
273
+ " <tr>\n",
274
+ " <th>1</th>\n",
275
+ " <td>2020-04-30</td>\n",
276
+ " <td>[JUNG ANDREA (CIK 0001051401), Apple Inc. (A...</td>\n",
277
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
278
+ " </tr>\n",
279
+ " <tr>\n",
280
+ " <th>2</th>\n",
281
+ " <td>2020-04-17</td>\n",
282
+ " <td>[O'BRIEN DEIRDRE (CIK 0001767094), Apple Inc....</td>\n",
283
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
284
+ " </tr>\n",
285
+ " <tr>\n",
286
+ " <th>3</th>\n",
287
+ " <td>2020-04-17</td>\n",
288
+ " <td>[KONDO CHRIS (CIK 0001631982), Apple Inc. (A...</td>\n",
289
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
290
+ " </tr>\n",
291
+ " <tr>\n",
292
+ " <th>4</th>\n",
293
+ " <td>2020-04-09</td>\n",
294
+ " <td>[Maestri Luca (CIK 0001513362), Apple Inc. (...</td>\n",
295
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
296
+ " </tr>\n",
297
+ " <tr>\n",
298
+ " <th>5</th>\n",
299
+ " <td>2020-04-03</td>\n",
300
+ " <td>[WILLIAMS JEFFREY E (CIK 0001496686), Apple I...</td>\n",
301
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
302
+ " </tr>\n",
303
+ " <tr>\n",
304
+ " <th>6</th>\n",
305
+ " <td>2020-04-03</td>\n",
306
+ " <td>[Maestri Luca (CIK 0001513362), Apple Inc. (...</td>\n",
307
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
308
+ " </tr>\n",
309
+ " <tr>\n",
310
+ " <th>7</th>\n",
311
+ " <td>2020-02-28</td>\n",
312
+ " <td>[WAGNER SUSAN (CIK 0001059235), Apple Inc. (...</td>\n",
313
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
314
+ " </tr>\n",
315
+ " <tr>\n",
316
+ " <th>8</th>\n",
317
+ " <td>2020-02-28</td>\n",
318
+ " <td>[LEVINSON ARTHUR D (CIK 0001214128), Apple In...</td>\n",
319
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
320
+ " </tr>\n",
321
+ " <tr>\n",
322
+ " <th>9</th>\n",
323
+ " <td>2020-02-28</td>\n",
324
+ " <td>[JUNG ANDREA (CIK 0001051401), Apple Inc. (A...</td>\n",
325
+ " <td>SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...</td>\n",
326
+ " </tr>\n",
327
+ " </tbody>\n",
328
+ "</table>\n",
329
+ "</div>"
330
+ ],
331
+ "text/plain": [
332
+ " file_date display_names \\\n",
333
+ "0 2020-05-12 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... \n",
334
+ "1 2020-04-30 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... \n",
335
+ "2 2020-04-17 [O'BRIEN DEIRDRE (CIK 0001767094), Apple Inc.... \n",
336
+ "3 2020-04-17 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... \n",
337
+ "4 2020-04-09 [Maestri Luca (CIK 0001513362), Apple Inc. (... \n",
338
+ "5 2020-04-03 [WILLIAMS JEFFREY E (CIK 0001496686), Apple I... \n",
339
+ "6 2020-04-03 [Maestri Luca (CIK 0001513362), Apple Inc. (... \n",
340
+ "7 2020-02-28 [WAGNER SUSAN (CIK 0001059235), Apple Inc. (... \n",
341
+ "8 2020-02-28 [LEVINSON ARTHUR D (CIK 0001214128), Apple In... \n",
342
+ "9 2020-02-28 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... \n",
343
+ "\n",
344
+ " content \n",
345
+ "0 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
346
+ "1 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
347
+ "2 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
348
+ "3 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
349
+ "4 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
350
+ "5 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
351
+ "6 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
352
+ "7 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
353
+ "8 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
354
+ "9 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... "
355
+ ]
356
+ },
357
+ "execution_count": 9,
358
+ "metadata": {},
359
+ "output_type": "execute_result"
360
+ }
361
+ ],
362
+ "source": [
363
+ "selected_columns = [\"file_date\", \"display_names\", \"content\"]\n",
364
+ "df[selected_columns].head(10)"
365
+ ]
366
+ },
367
+ {
368
+ "attachments": {},
369
+ "cell_type": "markdown",
370
+ "metadata": {},
371
+ "source": [
372
+ "### Juchao"
373
+ ]
374
+ },
375
+ {
376
+ "cell_type": "code",
377
+ "execution_count": 10,
378
+ "metadata": {},
379
+ "outputs": [],
380
+ "source": [
381
+ "from finnlp.data_sources.company_announcement.juchao import Juchao_Announcement"
382
+ ]
383
+ },
384
+ {
385
+ "cell_type": "code",
386
+ "execution_count": 11,
387
+ "metadata": {},
388
+ "outputs": [],
389
+ "source": [
390
+ "start_date = \"2020-01-01\"\n",
391
+ "end_date = \"2020-06-01\"\n",
392
+ "stock = \"000001\"\n",
393
+ "config = {\n",
394
+ " \"use_proxy\": \"china_free\",\n",
395
+ " \"max_retry\": 5,\n",
396
+ " \"proxy_pages\": 3,\n",
397
+ "}\n"
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "code",
402
+ "execution_count": 14,
403
+ "metadata": {},
404
+ "outputs": [
405
+ {
406
+ "name": "stderr",
407
+ "output_type": "stream",
408
+ "text": [
409
+ "Gathering free ips by pages...: 100%|██████████| 3/3 [00:05<00:00, 1.86s/it]\n",
410
+ "Checking ips: 100%|██████████| 45/45 [00:48<00:00, 1.09s/it]\n"
411
+ ]
412
+ },
413
+ {
414
+ "name": "stdout",
415
+ "output_type": "stream",
416
+ "text": [
417
+ "获取到的代理ip数量: 45 。Get proxy ips: 45.\n",
418
+ "能用的代理数量: 6。Usable proxy ips: 6.\n"
419
+ ]
420
+ },
421
+ {
422
+ "data": {
423
+ "application/vnd.jupyter.widget-view+json": {
424
+ "model_id": "1bb13261e75147929b30222347ab9cc5",
425
+ "version_major": 2,
426
+ "version_minor": 0
427
+ },
428
+ "text/plain": [
429
+ "Downloading by page...: 0%| | 0/2 [00:00<?, ?it/s]"
430
+ ]
431
+ },
432
+ "metadata": {},
433
+ "output_type": "display_data"
434
+ },
435
+ {
436
+ "data": {
437
+ "application/vnd.jupyter.widget-view+json": {
438
+ "model_id": "011ea7c465ad4e1aaccf09714b8e3e19",
439
+ "version_major": 2,
440
+ "version_minor": 0
441
+ },
442
+ "text/plain": [
443
+ "Getting the text data...: 0%| | 0/42 [00:00<?, ?it/s]"
444
+ ]
445
+ },
446
+ "metadata": {},
447
+ "output_type": "display_data"
448
+ }
449
+ ],
450
+ "source": [
451
+ "downloader = Juchao_Announcement(config)\n",
452
+ "downloader.download_date_range_stock(start_date, end_date, stock = stock, get_content = True, delate_pdf = True)"
453
+ ]
454
+ },
455
+ {
456
+ "cell_type": "code",
457
+ "execution_count": 15,
458
+ "metadata": {},
459
+ "outputs": [
460
+ {
461
+ "data": {
462
+ "text/html": [
463
+ "<div>\n",
464
+ "<style scoped>\n",
465
+ " .dataframe tbody tr th:only-of-type {\n",
466
+ " vertical-align: middle;\n",
467
+ " }\n",
468
+ "\n",
469
+ " .dataframe tbody tr th {\n",
470
+ " vertical-align: top;\n",
471
+ " }\n",
472
+ "\n",
473
+ " .dataframe thead th {\n",
474
+ " text-align: right;\n",
475
+ " }\n",
476
+ "</style>\n",
477
+ "<table border=\"1\" class=\"dataframe\">\n",
478
+ " <thead>\n",
479
+ " <tr style=\"text-align: right;\">\n",
480
+ " <th></th>\n",
481
+ " <th>id</th>\n",
482
+ " <th>secCode</th>\n",
483
+ " <th>secName</th>\n",
484
+ " <th>orgId</th>\n",
485
+ " <th>announcementId</th>\n",
486
+ " <th>announcementTitle</th>\n",
487
+ " <th>announcementTime</th>\n",
488
+ " <th>adjunctUrl</th>\n",
489
+ " <th>adjunctSize</th>\n",
490
+ " <th>adjunctType</th>\n",
491
+ " <th>...</th>\n",
492
+ " <th>important</th>\n",
493
+ " <th>batchNum</th>\n",
494
+ " <th>announcementContent</th>\n",
495
+ " <th>orgName</th>\n",
496
+ " <th>tileSecName</th>\n",
497
+ " <th>shortTitle</th>\n",
498
+ " <th>announcementTypeName</th>\n",
499
+ " <th>secNameList</th>\n",
500
+ " <th>PDF_path</th>\n",
501
+ " <th>Content</th>\n",
502
+ " </tr>\n",
503
+ " </thead>\n",
504
+ " <tbody>\n",
505
+ " <tr>\n",
506
+ " <th>0</th>\n",
507
+ " <td>None</td>\n",
508
+ " <td>000001</td>\n",
509
+ " <td>平安银行</td>\n",
510
+ " <td>gssz0000001</td>\n",
511
+ " <td>1207862647</td>\n",
512
+ " <td>关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告</td>\n",
513
+ " <td>2020-05-27</td>\n",
514
+ " <td>finalpage/2020-05-27/1207862647.PDF</td>\n",
515
+ " <td>148</td>\n",
516
+ " <td>PDF</td>\n",
517
+ " <td>...</td>\n",
518
+ " <td>None</td>\n",
519
+ " <td>None</td>\n",
520
+ " <td></td>\n",
521
+ " <td>None</td>\n",
522
+ " <td>平安银行</td>\n",
523
+ " <td>关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告</td>\n",
524
+ " <td>None</td>\n",
525
+ " <td>None</td>\n",
526
+ " <td>removed</td>\n",
527
+ " <td>证券代码: 000001 证券简称:平安银行 ...</td>\n",
528
+ " </tr>\n",
529
+ " <tr>\n",
530
+ " <th>1</th>\n",
531
+ " <td>None</td>\n",
532
+ " <td>000001</td>\n",
533
+ " <td>平安银行</td>\n",
534
+ " <td>gssz0000001</td>\n",
535
+ " <td>1207843688</td>\n",
536
+ " <td>2019年年度权益分派实施公告</td>\n",
537
+ " <td>2020-05-22</td>\n",
538
+ " <td>finalpage/2020-05-22/1207843688.PDF</td>\n",
539
+ " <td>214</td>\n",
540
+ " <td>PDF</td>\n",
541
+ " <td>...</td>\n",
542
+ " <td>None</td>\n",
543
+ " <td>None</td>\n",
544
+ " <td></td>\n",
545
+ " <td>None</td>\n",
546
+ " <td>平安银行</td>\n",
547
+ " <td>2019年年度权益分派实施公告</td>\n",
548
+ " <td>None</td>\n",
549
+ " <td>None</td>\n",
550
+ " <td>removed</td>\n",
551
+ " <td>1 证券代码: 000001 证券简称:平安银行 ...</td>\n",
552
+ " </tr>\n",
553
+ " </tbody>\n",
554
+ "</table>\n",
555
+ "<p>2 rows × 25 columns</p>\n",
556
+ "</div>"
557
+ ],
558
+ "text/plain": [
559
+ " id secCode secName orgId announcementId \\\n",
560
+ "0 None 000001 平安银行 gssz0000001 1207862647 \n",
561
+ "1 None 000001 平安银行 gssz0000001 1207843688 \n",
562
+ "\n",
563
+ " announcementTitle announcementTime \\\n",
564
+ "0 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 2020-05-27 \n",
565
+ "1 2019年年度权益分派实施公告 2020-05-22 \n",
566
+ "\n",
567
+ " adjunctUrl adjunctSize adjunctType ... \\\n",
568
+ "0 finalpage/2020-05-27/1207862647.PDF 148 PDF ... \n",
569
+ "1 finalpage/2020-05-22/1207843688.PDF 214 PDF ... \n",
570
+ "\n",
571
+ " important batchNum announcementContent orgName tileSecName \\\n",
572
+ "0 None None None 平安银行 \n",
573
+ "1 None None None 平安银行 \n",
574
+ "\n",
575
+ " shortTitle announcementTypeName secNameList PDF_path \\\n",
576
+ "0 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 None None removed \n",
577
+ "1 2019年年度权益分派实施公告 None None removed \n",
578
+ "\n",
579
+ " Content \n",
580
+ "0 证券代码: 000001 证券简��:平安银行 ... \n",
581
+ "1 1 证券代码: 000001 证券简称:平安银行 ... \n",
582
+ "\n",
583
+ "[2 rows x 25 columns]"
584
+ ]
585
+ },
586
+ "execution_count": 15,
587
+ "metadata": {},
588
+ "output_type": "execute_result"
589
+ }
590
+ ],
591
+ "source": [
592
+ "df = downloader.dataframe\n",
593
+ "df.head(2)"
594
+ ]
595
+ },
596
+ {
597
+ "cell_type": "code",
598
+ "execution_count": 16,
599
+ "metadata": {},
600
+ "outputs": [
601
+ {
602
+ "data": {
603
+ "text/plain": [
604
+ "(42, 25)"
605
+ ]
606
+ },
607
+ "execution_count": 16,
608
+ "metadata": {},
609
+ "output_type": "execute_result"
610
+ }
611
+ ],
612
+ "source": [
613
+ "df.shape"
614
+ ]
615
+ },
616
+ {
617
+ "cell_type": "code",
618
+ "execution_count": 17,
619
+ "metadata": {},
620
+ "outputs": [
621
+ {
622
+ "data": {
623
+ "text/html": [
624
+ "<div>\n",
625
+ "<style scoped>\n",
626
+ " .dataframe tbody tr th:only-of-type {\n",
627
+ " vertical-align: middle;\n",
628
+ " }\n",
629
+ "\n",
630
+ " .dataframe tbody tr th {\n",
631
+ " vertical-align: top;\n",
632
+ " }\n",
633
+ "\n",
634
+ " .dataframe thead th {\n",
635
+ " text-align: right;\n",
636
+ " }\n",
637
+ "</style>\n",
638
+ "<table border=\"1\" class=\"dataframe\">\n",
639
+ " <thead>\n",
640
+ " <tr style=\"text-align: right;\">\n",
641
+ " <th></th>\n",
642
+ " <th>announcementTime</th>\n",
643
+ " <th>shortTitle</th>\n",
644
+ " <th>Content</th>\n",
645
+ " </tr>\n",
646
+ " </thead>\n",
647
+ " <tbody>\n",
648
+ " <tr>\n",
649
+ " <th>0</th>\n",
650
+ " <td>2020-05-27</td>\n",
651
+ " <td>关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告</td>\n",
652
+ " <td>证券代码: 000001 证券简称:平安银行 ...</td>\n",
653
+ " </tr>\n",
654
+ " <tr>\n",
655
+ " <th>1</th>\n",
656
+ " <td>2020-05-22</td>\n",
657
+ " <td>2019年年度权益分派实施公告</td>\n",
658
+ " <td>1 证券代码: 000001 证券简称:平安银行 ...</td>\n",
659
+ " </tr>\n",
660
+ " <tr>\n",
661
+ " <th>2</th>\n",
662
+ " <td>2020-05-20</td>\n",
663
+ " <td>关于获准发行小微企业贷款专项金融债券的公告</td>\n",
664
+ " <td>证券代码: 000001 证券简称:平安银行 ...</td>\n",
665
+ " </tr>\n",
666
+ " <tr>\n",
667
+ " <th>3</th>\n",
668
+ " <td>2020-05-16</td>\n",
669
+ " <td>监事会决议公告</td>\n",
670
+ " <td>1 证券代码: 000001 证券简称: 平安银行 ...</td>\n",
671
+ " </tr>\n",
672
+ " <tr>\n",
673
+ " <th>4</th>\n",
674
+ " <td>2020-05-15</td>\n",
675
+ " <td>2019年年度股东大会决议公告</td>\n",
676
+ " <td>1 证券代码: 000001 证券简称:平安银行 ...</td>\n",
677
+ " </tr>\n",
678
+ " <tr>\n",
679
+ " <th>5</th>\n",
680
+ " <td>2020-05-15</td>\n",
681
+ " <td>2019年年度股东大会的法律意见书</td>\n",
682
+ " <td>北京总部 电话 : (86 -10) 8519 -1300 传真 : (86 -10...</td>\n",
683
+ " </tr>\n",
684
+ " <tr>\n",
685
+ " <th>6</th>\n",
686
+ " <td>2020-04-30</td>\n",
687
+ " <td>中信证券股份有限公司、平安证券股份有限公司关于公司关联交易有关事项的核查意见</td>\n",
688
+ " <td>1 中信证券股份有限公司 、平安证券股份有限 公司 关于平安银行股份有限公司 关联交易 有...</td>\n",
689
+ " </tr>\n",
690
+ " <tr>\n",
691
+ " <th>7</th>\n",
692
+ " <td>2020-04-30</td>\n",
693
+ " <td>独立董事独立意见</td>\n",
694
+ " <td>1 平安银行股份有限公司独立董事独立意见 根据《关于在上市公司建立独立董事制度的指导...</td>\n",
695
+ " </tr>\n",
696
+ " <tr>\n",
697
+ " <th>8</th>\n",
698
+ " <td>2020-04-30</td>\n",
699
+ " <td>关联交易公告</td>\n",
700
+ " <td>1 证券代码: 000001 证券简称:平安银行 ...</td>\n",
701
+ " </tr>\n",
702
+ " <tr>\n",
703
+ " <th>9</th>\n",
704
+ " <td>2020-04-21</td>\n",
705
+ " <td>2020年第一季度报告全文</td>\n",
706
+ " <td>证券代码: 000001 证券简称:平安银行 ...</td>\n",
707
+ " </tr>\n",
708
+ " </tbody>\n",
709
+ "</table>\n",
710
+ "</div>"
711
+ ],
712
+ "text/plain": [
713
+ " announcementTime shortTitle \\\n",
714
+ "0 2020-05-27 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 \n",
715
+ "1 2020-05-22 2019年年度权益分派实施公告 \n",
716
+ "2 2020-05-20 关于获准发行��微企业贷款专项金融债券的公告 \n",
717
+ "3 2020-05-16 监事会决议公告 \n",
718
+ "4 2020-05-15 2019年年度股东大会决议公告 \n",
719
+ "5 2020-05-15 2019年年度股东大会的法律意见书 \n",
720
+ "6 2020-04-30 中信证券股份有限公司、平安证券股份有限公司关于公司关联交易有关事项的核查意见 \n",
721
+ "7 2020-04-30 独立董事独立意见 \n",
722
+ "8 2020-04-30 关联交易公告 \n",
723
+ "9 2020-04-21 2020年第一季度报告全文 \n",
724
+ "\n",
725
+ " Content \n",
726
+ "0 证券代码: 000001 证券简称:平安银行 ... \n",
727
+ "1 1 证券代码: 000001 证券简称:平安银行 ... \n",
728
+ "2 证券代码: 000001 证券简称:平安银行 ... \n",
729
+ "3 1 证券代码: 000001 证券简称: 平安银行 ... \n",
730
+ "4 1 证券代码: 000001 证券简称:平安银行 ... \n",
731
+ "5 北京总部 电话 : (86 -10) 8519 -1300 传真 : (86 -10... \n",
732
+ "6 1 中信证券股份有限公司 、平安证券股份有限 公司 关于平安银行股份有限公司 关联交易 有... \n",
733
+ "7 1 平安银行股份有限公司独立董事独立意见 根据《关于在上市公司建立独立董事制度的指导... \n",
734
+ "8 1 证券代码: 000001 证券简称:平安银行 ... \n",
735
+ "9 证券代码: 000001 证券简称:平安银行 ... "
736
+ ]
737
+ },
738
+ "execution_count": 17,
739
+ "metadata": {},
740
+ "output_type": "execute_result"
741
+ }
742
+ ],
743
+ "source": [
744
+ "selected_columns = [\"announcementTime\", \"shortTitle\",\"Content\"]\n",
745
+ "df[selected_columns].head(10)"
746
+ ]
747
+ },
748
+ {
749
+ "cell_type": "code",
750
+ "execution_count": null,
751
+ "metadata": {},
752
+ "outputs": [],
753
+ "source": []
754
+ }
755
+ ],
756
+ "metadata": {
757
+ "kernelspec": {
758
+ "display_name": "finrl",
759
+ "language": "python",
760
+ "name": "python3"
761
+ },
762
+ "language_info": {
763
+ "codemirror_mode": {
764
+ "name": "ipython",
765
+ "version": 3
766
+ },
767
+ "file_extension": ".py",
768
+ "mimetype": "text/x-python",
769
+ "name": "python",
770
+ "nbconvert_exporter": "python",
771
+ "pygments_lexer": "ipython3",
772
+ "version": "3.7.12"
773
+ },
774
+ "orig_nbformat": 4,
775
+ "vscode": {
776
+ "interpreter": {
777
+ "hash": "afd6dc03c9be451573fc2885de79a969af6a24a159f11a3ead741ab7a9ff405f"
778
+ }
779
+ }
780
+ },
781
+ "nbformat": 4,
782
+ "nbformat_minor": 2
783
+ }
FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_EarningCalls.ipynb ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "os.chdir('../../../..')\n",
11
+ "# print(os.getcwd())"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "from finnlp.data_sources.earning_calls import EarningCallTranscripts"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "markdown",
25
+ "metadata": {},
26
+ "source": [
27
+ "The Earning call transcripts takes in three arguments\n",
28
+ "\n",
29
+ "* Year\n",
30
+ "* Ticker symbol\n",
31
+ "* Quarter name from the list [\"Q1\",\"Q2\",\"Q3\",\"Q4\"]"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 3,
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "loader = EarningCallTranscripts(2023,'AAPL','Q3')\n",
41
+ "docs = loader.load_data()"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": 4,
47
+ "metadata": {},
48
+ "outputs": [
49
+ {
50
+ "name": "stdout",
51
+ "output_type": "stream",
52
+ "text": [
53
+ "{'text': \"Operator: Good day, and welcome to the Apple Q3 Fiscal Year 2023 Earnings Conference Call. Today's call is being recorded. At this time, for opening remarks and introductions, I would like to turn the call over to Saori Casey, Vice President of Finance. Please go ahead.\\nSaori Casey: Thank you. Good afternoon, and thank you for joining us. Speaking first today is Apple's CEO, Tim Cook; and he'll be followed by CFO, Luca Maestri. After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including, without limitation, those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation and future business outlook, including the potential impact of macroeconomic conditions on the company's business and the results of operations. These statements involve risks and uncertainties that may cause actual results or trends to differ materially from our forecast. For more information, please refer to the risk factors discussed in Apple's most recently filed annual report on Form 10-K and the Form 8-K filed with the SEC today, along with the associated press release. Apple assumes no obligation to update any forward-looking statements, which speak only as of the date they are made. I'd now like to turn the call over to Tim for introductory remarks.\\nTim Cook: Thank you, Saori. Good afternoon, everyone, and thanks for joining us. Today, Apple is reporting revenue of $81.8 billion for the June quarter, better than our expectations. We continued to see strong results in emerging markets, driven by robust sales of iPhone with June quarter total revenue records in India, Indonesia, Mexico, the Philippines, Poland, Saudi Arabia, Turkey and the UAE. We set June quarter records in a number of other countries as well, including France, the Netherlands and Austria. And we set an all-time revenue record in Services driven by more than $1 billion paid subscriptions. We continued to face an uneven macroeconomic environment, including nearly 4 percentage points of foreign exchange headwinds. On a constant currency basis, we grew compared to the prior year's quarter in aggregate and in the majority of markets we track. We continue to manage deliberately and innovate relentlessly, and we are driven by the sense of possibility those efforts inspire. To that end, before I turn to the quarter in more detail, I want to take a moment to acknowledge the unprecedented innovations we were proud to announce at our Worldwide Developers Conference. In addition to extraordinary new Macs and incredible updates to our software platforms, we had the chance to introduce the world to spatial computing. We were so pleased to share the revolutionary Apple Vision Pro with the world, a bold new product unlike anything else created before. Apple Vision Pro is a marvel of engineering, built on decades of innovation only possible at Apple. It is the most advanced personal electronic device ever created, and we've been thrilled by the reaction from press, analysts, developers and content creators who've had the chance to try it. We can't wait to get it into customers' hands early next year. Now let me share more with you on our June quarter results beginning with iPhone. iPhone revenue came in at $39.7 billion for the quarter, down 2% from the year ago quarter's record performance. On a constant currency basis, iPhone revenue grew, and we had a June quarter record for switchers, reflecting the popularity of the iPhone lineup. iPhone 14 customers continue to praise the exceptional battery life and essential health and safety features, while iPhone 14 Plus users are loving the new larger screen size. And with Dynamic Island, Always-On display and the most powerful camera system ever in an iPhone, the iPhone 14 Pro lineup is our best ever. Turning to Mac. We recorded $6.8 billion in revenue, down 7% year-over-year. We are proud to have completed the transition of our entire Mac lineup to run exclusively on Apple silicon. We are also excited to have introduced the new 15-inch MacBook Air during the quarter, the world's best 15-inch laptop and one of the best Macs we've ever made. And we launched 2 new powerhouses in computing, Mac Studio with M2 Max and M2 Ultra and Mac Pro with M2 Ultra, which are the most powerful Macs we've ever made. iPad revenue was $5.8 billion for the June quarter, down 20% year-over-year, in part due to a difficult compare because of the timing of the iPad Air launch last year. Customers are loving iPad's versatility and exceptional value. There was a great deal of excitement from creatives when we brought Final Cut Pro and Logic Pro to iPad this spring. And with the back-to-school season in full swing, iPad has the power to help students tackle the toughest assignments. Across Wearables, Home and Accessories, revenue was $8.3 billion, up 2% year-over-year and in line with our expectations. Packed with features to empower users to live a healthier life, Apple Watch and Apple Watch Ultra continue to help people take the next step on their wellness journey. As I mentioned earlier, last quarter, we held our biggest and most exciting WWDC yet. We were thrilled to welcome developers from across the globe to Apple Park, both in person and virtually, and to share some stunning new announcements with the world. In addition to Apple Vision Pro and the new Macs that we introduced, we had the chance to reveal some truly remarkable new innovations to our software platforms. From exciting new features like Live Voicemail and StandBy in iOS 17, to new tools for users to work, play and personalize their experience in macOS Sonoma and iPadOS 17, to a fresh design and new workout capabilities in watchOS 10, there's so much coming later this year to empower users to get more out of their devices, and we think they're going to instantly love these new features. It was also an exciting quarter for Services where revenue reached $21.2 billion and saw a sequential acceleration to an 8% year-over-year increase, better than we expected. We set an all-time revenue record for total services and in a number of categories, including video, AppleCare, cloud and payment services. Since we introduced Apple Pay almost a decade ago, customers have been loving how easy it is to make purchases online, in apps and in stores. We're also pleased to see Apple Card build on the success of Apple Pay. Designed with our users' financial health in mind, Apple Card has become one of the most successful credit card programs in the U.S. with award-winning customer satisfaction. And this spring, we introduced a new high-yield savings account for Apple Card customers, which has become incredibly popular, with customers already making more than $10 billion in deposits. Meanwhile, Apple TV+ continues to provide a spectacular showcase of imaginative storytelling. Recently, fans welcomed new series like Hijack and Silo as well as returning fan favorites like Foundation and The Afterparty. In the few years since its launch, Apple TV+ has earned more than 1,500 nominations and 370 wins. That includes the 54 Emmy Award nominations across 13 titles that Apple TV+ received last month. It's also been an exciting time for sports on Apple TV+. Soccer legend Lionel Messi made his debut with Major League Soccer last month, and fans all over the world tuned in with MLS Season Pass. We are excited about our MLS partnership, and we're thrilled to see Messi suiting up with Inter Miami. And just in time for summer concert season, Apple Music launched new discovery features celebrating live music, including venue guides in Apple Maps and set lists from tours of major artists. These new features and others join a lineup of updates coming later this year to make Services more powerful, more useful and more fun than ever. Everything we do is in service of our customers, and retail is where we bring the best of Apple. During the quarter, we opened the Apple Store online in Vietnam, and we're excited to connect with more customers there. We also redesigned our first-ever Apple Store located in Tysons Corner, Northern Virginia, with inclusive, innovative and sustainable design enhancements. We opened a beautiful new store beneath our new London headquarters in the historic Battersea Power Station. And the performance of the stores we opened in India this spring exceeded our initial expectations. With every product we create, every feature we develop and every interaction we share with our customers, we lead with the values we stand for. We believe in creating technology that serves all of humanity, which is why accessibility has always been a core value that we embed in everything we do. On Global Accessibility Awareness Day, we unveiled some extraordinary new tools for cognitive, vision, hearing and mobile accessibility that will be available later this year, including Assistive Access, which distills apps to their most essential features, and Personal Voice, which allows users to create a synthesized voice that sounds just like them. Building technology and service of our customers also means protecting their privacy, which we believe is a fundamental human right. That's why we were pleased to announce major updates to Safari Private Browsing, Communication Safety and Lockdown Mode to further safeguard our users. And as part of our efforts to build a better world, we announced that we've more than doubled our initial commitment to our Racial Equity and Justice Initiative to more than $200 million. We will continue to do our part to support education, economic empowerment and criminal justice reform work. And while supporting efforts to advance equity and opportunity, we continue to build a culture of belonging at Apple and a workforce that reflects the communities we serve. Through our environmental work, we're making strides in our commitment to leave the world better than we found it. Last month, Apple joined with global nonprofit Acumen in a new effort to improve livelihoods in India through clean energy innovation, and we are as committed as ever to our Apple 2030 goal to be carbon neutral across our entire supply chain and the life cycle of our products. We've long held that education is the great equalizer. With that in mind, we're expanding Apple Learning Coach, a free professional learning program that teaches educators how to get more out of Apple technology in the classroom. Today, we welcome more than 1,900 educators across the U.S. to the program. By the end of the year, we'll offer Apple Learning Coach in 12 more countries. As we're connecting with teachers, we're also celebrating the graduations of students at our app developer academies around the world. From Detroit, to Naples, to Riyadh and more, we're excited to watch these talented developers embark on careers in coding and find ways to make a positive difference in their communities. Apple remains a champion of innovation, a company fueled by boundless creativity, driven by a deep sense of mission and guided by the unshakable belief that a great idea can change the world. Looking ahead, we'll continue to manage for the long term, always pushing the limits of what's possible and always putting the customer at the center of everything we do. With that, I'll turn it over to Luca.\\nLuca Maestri: Thank you, Tim, and good afternoon, everyone. Revenue for the June quarter was $81.8 billion, down 1% from last year and better than our expectations despite nearly 4 percentage points of negative impact from foreign exchange. On a constant currency basis, our revenue grew year-over-year in total and in the majority of the markets we track. We set June quarter records in both Europe and Greater China and continue to see strong performance across our emerging markets driven by iPhone. Products revenue was $60.6 billion, down 4% from last year, as we faced FX headwinds and an uneven macroeconomic environment. However, our installed base reached an all-time high across all geographic segments, driven by a June quarter record for iPhone switchers and high new-to rates in Mac, iPad and Watch, coupled with very high levels of customer satisfaction and loyalty. Our Services revenue set an all-time record of $21.2 billion, up 8% year-over-year and grew double digits in constant currency. Our performance was strong around the world as we reach all-time Services revenue records in Americas and Europe and June quarter records in Greater China and rest of Asia Pacific. Company gross margin was 44.5%, a record level for the June quarter and up 20 basis points sequentially, driven by cost savings and favorable mix shift towards Services, partially offset by a seasonal loss of leverage. Products gross margin was 35.4%, down 130 basis points from last quarter due to seasonal loss of leverage and mix, partially offset by favorable costs. Services gross margin was 70.5%, decreasing 50 basis points sequentially. Operating expenses of $13.4 billion were below the low end of the guidance range we provided at the beginning of the quarter and decelerated from the March quarter. We continue to take a deliberate approach in managing our spend with strong focus on innovation and new product development. The results of these actions delivered net income of $19.9 billion, diluted earnings per share of $1.26, up 5% versus last year, and very strong operating cash flow of $26.4 billion. Let me now provide more detail for each of our revenue categories. iPhone revenue was $39.7 billion, down 2% year-over-year but grew on a constant currency basis. We set revenue records in several markets around the world, including an all-time record in India and June quarter records in Latin America, the Middle East and Africa, Indonesia, the Philippines, Italy, the Netherlands and the U.K. Our iPhone active installed base grew to a new all-time high, thanks to a June quarter record in switchers. This is a testament to our extremely high levels of customer satisfaction, which 451 Research recently measured at 98% for the iPhone 14 family in the U.S. Mac generated $6.8 billion in revenue, down 7% year-over-year. We continue to invest in our Mac portfolio. And this past quarter, we were pleased to complete the transition to Apple silicon for the entire lineup. This transition has driven both strong upgrade activity and a high number of new customers. In fact, almost half of Mac buyers during the quarter were new to the product. We also saw reported customer satisfaction of 96% for Mac in the U.S. iPad revenue was $5.8 billion, down 20% year-over-year and in line with our expectations. These results were driven by a difficult compare against the full quarter impact of the iPad Air launch in the prior year. At the same time, we continue to attract a large number of new customers to the iPad installed base with over half of the customers who purchased iPads during the quarter being new to the product. And the latest reports from 451 Research indicate customer satisfaction of 96% in the U.S. Wearables, Home and Accessories revenue was $8.3 billion, up 2% year-over-year, with a June quarter record in Greater China and strong performance in several emerging markets. We continue to see Apple Watch expand its reach with about 2/3 of customers purchasing an Apple Watch during the quarter being new to the product. And this is combined with very high levels of customer satisfaction, which was recently reported at 98% in the United States. Moving on to Services. We reached a new all-time revenue record of $21.2 billion with year-over-year growth accelerating sequentially to 8% and up double digits in constant currency. In addition to the all-time records Tim mentioned earlier, we also set June quarter records for advertising, App Store and Music. We are very pleased with our performance in Services, which is a direct reflection of our ecosystem's strength. First, our installed base of over 2 billion active devices continues to grow at a nice pace and establishes a solid foundation for the future expansion of our ecosystem. Second, we see increased customer engagement with our services. Both our transacting accounts and paid accounts grew double digits year-over-year, each reaching a new all-time high. Third, our paid subscriptions showed strong growth. This past quarter, we reached an important milestone and passed 1 billion paid subscriptions across the services on our platform, up 150 million during the last 12 months and nearly double the number of paid subscriptions we had only 3 years ago. And finally, we continue to improve the breadth and the quality of our current services. From 20 new games on Apple Arcade, to brand-new content on Apple TV+, to the launch of our high-yield savings account with Apple Card, our customers are loving these enhanced offerings. Turning to the enterprise market. Our customers are leveraging Apple products every day to help improve productivity and attract talent. Blackstone, a global investment management firm, is expanding its Apple footprint from their corporate iPhone fleet to now offering the MacBook Air powered by M2 to all of their corporate employees and portfolio companies. Gilead, a leading biopharmaceutical company, has deployed thousands of iPads globally to their sales team. Over the last 6 months, they have also doubled their Mac user base by making MacBook Air available to more employees with a focus on user experience and strong security. Let me now turn to our cash position and capital return program. We ended the quarter with over $166 billion in cash and marketable securities. We repaid $7.5 billion in maturing debt while issuing $5.2 billion of new debt and increasing commercial paper by $2 billion, leaving us with total debt of $109 billion. As a result, net cash was $57 billion at the end of the quarter. During the quarter, we returned over $24 billion to shareholders, including $3.8 billion in dividends and equivalents and $18 billion through open market repurchases of 103 million Apple shares. We continue to believe there is great value in our stock and maintain our target of reaching a net cash neutral position over time. As we move ahead into the September quarter, I'd like to review our outlook, which includes the types of forward-looking information that Saori referred to at the beginning of the call. We expect our September quarter year-over-year revenue performance to be similar to the June quarter, assuming that the macroeconomic outlook doesn't worsen from what we are projecting today for the current quarter. Foreign exchange will continue to be a headwind, and we expect a negative year-over-year revenue impact of over 2 percentage points. We expect iPhone and Services year-over-year performance to accelerate from the June quarter. Also, we expect the revenue for both Mac and iPad to decline by double digits year-over-year due to difficult compares, particularly on the Mac. For both products, we experienced supply disruptions from factory shutdowns in the June quarter a year ago and were able to fulfill significant pent-up demand in the year ago September quarter. We expect gross margin to be between 44% and 45%. We expect OpEx to be between $13.5 billion and $13.7 billion. We expect OI&E to be around negative $250 million, excluding any potential impact from the mark-to-market of minority investments, and our tax rate to be around 16%. Finally, today, our Board of Directors has declared a cash dividend of $0.24 per share of common stock payable on August 17, 2023, to shareholders of record as of August 14, 2023. With that, let's open the call to questions.\\nSaori Casey: Thank you, Luca. [Operator Instructions]. Operator, may we have the first question, please?\\nOperator: [Operator Instructions]. We will go ahead and take our first question from Shannon Cross with Credit Suisse.\\nShannon Cross: Tim, you mentioned -- and actually, Luca, too, you mentioned an uneven macro environment during the quarter several times on the call. I'm wondering if you can talk on a geographic basis about some of the trends you're seeing in iPhone. I'm specifically wondering how demand is trending within...\\nLuca Maestri: Sure. Shannon, I'll answer it. I didn't get the end of your question.\\nOperator: I think she has dropped.\\nLuca Maestri: Okay. Well, let me answer the question for the part that I could follow. So on a geographic basis, we've had great performance for iPhone in emerging markets. We set June quarter records in many of the emerging markets. We grew in total double digits. And the performance was strong across the board in emerging markets from China, where our performance improved from minus 3% to plus 8% in the June quarter and we grew double digits in constant currency, to many other areas around the world from India, where, again, we set a June quarter record with very strong performance there, Indonesia, Southeast Asia, in general, Latin America, Middle East. And so it's been really good there. We -- also, as you can see from our geographic segments, we had a slight acceleration of performance in the Americas, primarily in the United States, but we declined there because the smartphone market has been in a decline for the last couple of quarters in the United States.\\nShannon Cross: Sorry about that. I'm not sure why I cut off. In terms of gross margin, you were at the high end of the range [Technical Difficulty] and you guided to 45% at the high end, which is, I think, higher than I remember in 20 years of covering you. So how should we think about puts and takes of gross margin? And it seems like there's like a perfect storm of good things. So I just -- maybe if you can talk about how you're thinking about it more holistically.\\nLuca Maestri: I think you remember correctly, Shannon, because the 44.5% for the June quarter is an all-time record for us in June. We were up 20 basis points sequentially. It was driven by cost savings and a mix shift towards Services, which obviously helps company gross margins, partially offset by the seasonal loss of leverage. We have a commodity environment that is favorable to us. Our product mix is quite strong at this point. And so with the exception of foreign exchange, which continues to be a drag, and it was a significant drag on a year-over-year basis, yes, we are in a good position right now. We are in a good position for the June quarter. And as I mentioned, we expect similar level of gross margins for the same reasons, frankly, for the September quarter.\\nOperator: Our next question comes from Wamsi Mohan of Bank of America.\\nWamsi Mohan: Luca, can you just give us a little more color around the guidance? Your overall revenue performance, you called out similar. Obviously, you absorbed a higher FX impact this quarter versus your guide. And you also noted Services acceleration. So just wondering, when you think about that comment on iPhone acceleration, is that on a reported basis? Is that constant currency basis? And is there something that's changing in terms of seasonality perhaps for you that is causing not as much step-up in product revenue as typical on a sequential basis? And I have a follow-up.\\nLuca Maestri: Yes. So all our comments are in reported currency, not in constant currency in relation to the outlook. And we said acceleration sequentially for iPhone and for Services. But we're also pointing out -- and this is where I think, Wamsi, you're referring to some seasonality issues. We also said that for Mac and iPad, we expect to decline double digits. And the reason for that is that we have a very difficult compare versus last year. You remember that a year ago, in the June quarter, we had factory shutdowns for both Mac and iPad. And so we were able to fill the pent-up demand from those shutdowns during the September quarter. So an unusual level of activity that we had a year ago. And so now, obviously, the compare is difficult. So we expect both iPad and Mac to be down double digits, which offset the acceleration that I mentioned for iPhone and Services.\\nWamsi Mohan: Okay. And Tim, I was wondering if you could update us on what percent of iPhones are sold on some type of installment basis now versus full upfront payment on a global basis. And maybe some thoughts on if you expect similar promotional activity from carriers, especially in the U.S., that seem to be grappling with a lot of cash flow issues this particular year.\\nLuca Maestri: Wamsi, I'll take it. We've done a really good job over the last few years with affordability programs around the world directly in our direct channel and with our partners around the world. The majority of iPhones, at this point, are sold using some kind of a program, trade-ins, installments, some kind of financing. And that percentage, which again, it's well over 50%, is very similar across developed and emerging markets. We want to do more of that because we think it really helps reduce the affordability threshold for our products. And we think it is also one of the reasons why our product mix has been very strong during the last couple of cycles. So we will continue to push on that front.\\nOperator: Our next question is from David Vogt with UBS.\\nDavid Vogt: I just wanted to follow up on 2 points that both you, Tim, and Luca made about growth and maybe commodities. So just to be clear, I know you're talking about an acceleration in iPhone, but the comp is about 2 points easier from FX. So I just want to understand, is that on a like-for-like basis, excluding the currency improvement of about 2 points from the June quarter to the September quarter? And from a commodity perspective, I know last quarter, you talked about buying a lot of inventory at favorable prices, which was an incredibly smart strategy. Where do you sit today? And what's sort of the timing or the duration of that commodity sort of backlog that you have as we think about next quarter and the subsequent quarters? How far does that get you out into the future from this favorable cost dynamic?\\nLuca Maestri: Let me start again. I just want to be clear about the guidance, the outlook guidance that we provided. We're referring entirely to reported numbers. So they take into account the fact that we have a slight improvement in foreign exchange. So when I talk about similar performance, I refer to reported performance in the June quarter and then the reported performance in the September quarter. And again, we expect, on a reported basis, our iPhone performance to accelerate, our Services performance to accelerate, and iPad and Mac to decline double digits. On the commodity front, as I mentioned, the environment is favorable. We always make sure that we take advantage of the opportunities that are available in the market, and we will continue to do that going forward.\\nDavid Vogt: Luca, any sense of how long that gives you a run rate today based on what you currently have? Can you give us a sense for at least the short-term tailwind?\\nLuca Maestri: I don't want to speculate past the September quarter because that's the horizon where we provide guidance. And I've said that the guidance for September is 44% to 45%, which you know is historically very high. And so obviously, that reflects a favorable environment for us.\\nOperator: Our next question is from Erik Woodring with Morgan Stanley.\\nErik Woodring: I have 2 as well. Maybe if we just start kind of big picture, Tim or Luca. I was wondering if you could just kind of share some incremental color on how you think the consumer is behaving today versus 90 days ago and maybe how that differs by region. Meaning, are there any signs that consumer is incrementally more willing to spend on things like consumer electronics? Or is there still relative caution in the market? Are there any regions where you're seeing more strength in the consumer? And how sustainable do you think some of that strength or weakness could be based on some of the KPIs you track? And then I have a follow-up.\\nTim Cook: Yes. David, it's Tim. If you sort of step around the world, we did exceptionally well in emerging markets last quarter and even better on a constant currency basis. And so emerging markets were -- was a strength. If you look at China, in China, we went from a negative 3% in Q2 to a plus 8% in Q3. And so in China, we had an acceleration. If you look at the U.S., which is in the -- obviously in the Americas segment, it is the vast majority of what's in there, there was also a slight acceleration sequentially, although the Americas is still declining somewhat year-over-year, as you can see on the data sheet. The primary reason for that is that it's a challenging smartphone market in the U.S. currently. And then in Europe, Europe saw a record quarter and -- for the June quarter, a record. And so some really good signs in most places in the world.\\nErik Woodring: Awesome. And then maybe, Luca, a question for you. I think it's been about 3 quarters now where we've seen OpEx either grow below historical seasonality or come in below your expectations. I think this is the first time we've seen R&D grow less than 10% year-over-year since fiscal 2Q 2007. So can you maybe just talk about some of the cost actions you're taking? And as you look forward, what are the indicators that you're really evaluating that would give you greater confidence in perhaps returning back to a more seasonal cadence of OpEx spending? Or is this just a new normal that we should be expecting? That's it for me.\\nLuca Maestri: Obviously, we look at the environment, and we know that this has been an uncertain period for the last few quarters. And so we decided to be deliberate in what we do in terms of controlling our spend, and there's many areas across the company that we're working on and we've been quite effective at slowing down the spend. We slowed down also the hiring within the company in several areas. And we're very pleased with our ability to decelerate some of the expense growth taking into account the overall macro situation. We will continue to manage deliberately. You can see that we continue to grow our R&D costs faster than the rest of the company. SG&A is actually growing at a much slower pace because obviously, our focus continues to be in innovation and product development, and we'll continue to do that.\\nOperator: Our next question is from Michael Ng with Goldman Sachs.\\nMichael Ng: I just have 2 questions as well. First, it was encouraging to see the Services outperformance in the quarter, up double digits on an FX-neutral basis, and more Services acceleration next quarter on a reported basis. I was just wondering if you could just talk a little bit more about key underlying drivers for the confidence in the Services acceleration next quarter, understanding that FX a little bit. But anything to call out as it relates to things in Apple Search Ads that's helping. You're obviously making a lot of investments in Apple TV+ between MLS and the Canal+ deal. So any thoughts there would be great.\\nLuca Maestri: Yes, Michael, you're correct. I mean clearly, we've seen an improvement in the June quarter, and we expect further improvement in the September quarter. In June, the performance was across the board. Tim and I mentioned we set records really across the board. We had all-time records in cloud, in video, in AppleCare, in payments and June quarter records in App Store, advertising and Music. So we saw improvement in all our Services categories. We think the situation will continue to improve as we go through September. And that's very positive because not only good for the financial results, but obviously, it shows a high level of engagement of our customers in the ecosystem, which is very important for us. And it's really the sum of all the things that I mentioned in my prepared remarks. It goes from the fact that our installed base continues to grow, so we've got a larger pool of customers, to the fact that our customers are more engaged as we have more transacting accounts and paid accounts on the ecosystem. And the subscriptions business is very healthy with growth of 150 million paid subscriptions just in the last 12 months. It's almost double to what we had 3 years ago. And of course, we are providing more and more content to our users. And so the combination of all these things gives us good confidence for September.\\nMichael Ng: Great. And just as a related follow-up, it's about the hardware installed base and Services ARPU. I was curious when you talked about the Services strength, you talked about the 2 billion-plus installed base. When you think about the opportunity to increase the Services ARPU, do you really think about it internally on a per-active-iPhone user basis or on a per-device basis? Said differently, I'm just curious where you think about -- whether you think there's an incremental opportunity for those users that have multiple devices. Do you really see a big Services ARPU uplift in that respect?\\nLuca Maestri: Well, we know that customers that own more than one device are typically more engaged in our ecosystem. And so obviously, they tend to also spend more on the Services front. I would say the biggest opportunity is that we know that there's a lot of customers that we have that are very familiar with our ecosystem. They are engaged in the ecosystem. But still today, they're using only the portion of the ecosystem that is free. And so we think that by offering better content and more content over time, we're going to be able to attract more of them as paid customers.\\nOperator: Our next question is from Amit Daryanani with Evercore.\\nAmit Daryanani: I have 2 as well. I guess, Luca, maybe if you can talk about Wearables a bit. The growth over there, I think, in constant currency was fairly impressive at plus 6%. Can you just touch on maybe what's driving that? And then how do we think about the Wearables segment heading into the September quarter? I know you talked about a bunch of other ones, but how do we think about Wearables into September as well?\\nLuca Maestri: Sorry, Amit, I didn't get the -- what are you referring to?\\nAmit Daryanani: Yes. Sorry. I was hoping you could talk a bit about the Wearables segment because the growth over there was fairly impressive. And then how do you think about it into September as well?\\nLuca Maestri: Yes. On the Wearables front, we had really good performance in Greater China. And that's, again, very important for us. It was a June quarter record for Greater China. Very important for us because, again, it shows that the engagement with the ecosystem in a market that is so important for us like China continues to grow. It means that there's more and more customers that are owning more than the iPhone. Also, we continue to grow the installed base of the category very quickly because, as I mentioned, 2/3 of every buyer of Apple Watch during the course of the June quarter was new to the product. And so that is all additive to the installed base. So it's just great to see that the AirPods continue to be a great success in the marketplace for us. And so things are moving in the right direction there. It's become a very large business for us in Wearables, Home and Accessories. The last 12 months, we've done $40 billion of business, which is nearly the size of a Fortune 100 company. So it's become very important, and it's allowed us to diversify both our revenues and our earnings.\\nAmit Daryanani: That's really helpful. And then if I could just follow up, the Europe growth, the growth in Europe at up 5% is totally notable as well. I think you have a few emerging markets that you put in Europe as well. But I would love to understand what's happening in Europe and if there's a way to think about sort of Western Europe or developed world versus emerging markets over there.\\nLuca Maestri: Yes. It's been very good, primarily on the emerging market side of Europe. We include India and the Middle East and Central and Eastern Europe into the Europe segment. But as we mentioned at the beginning of the call, we had a number of markets that did very well, like France, like Italy, the Netherlands, Austria. So it was a good quarter for Europe.\\nOperator: Our next question is from Harsh Kumar with Piper Sandler.\\nHarsh Kumar: I have one for Luca and then later on one for Tim. So Luca, for some time now, for many quarters, you've had a currency headwind or foreign exchange currency headwind. It's conceivable that as rates start to come down, hopefully next year that the dollar weakens. Could you take us through the mechanism of how that will work on your revenues and for your costs?\\nLuca Maestri: So we tend -- we try to hedge our foreign exchange exposures because we think it's the right approach for the company in terms of minimizing the volatility that necessarily happens from the movements of currencies. We cannot effectively hedge every single exposure around the world because in some cases, it is not possible. In other cases, it is prohibitively expensive. But we tend to cover all the major currency payers that we have. About 60% of our business is outside the United States. So it's a very, very large and, I would say, very effective hedging program. And so we set up these hedges, and they tend to roll over very regularly. And then we replace them with new hedges at the new spot rate. So the impact that we're going to have on revenue and cost will depend on where the spot rates are at different points in time. And therefore, because of the way the program works, tends to be a bit of a lag in both directions as the foreign exchange moves over time.\\nHarsh Kumar: Understood. Very helpful. And for Tim, Tim, historically, for the last many years, carriers in at least the U.S., which I think is your largest market for iPhone, have had programs to help folks upgrade, whether they give a cash rebate or you bring in your old phone, something like that. I was curious, as you get into your peak December quarter, if you're aware of these programs are in place. And the reason why I'm asking is I think earlier, you mentioned that more than 50% of your phones are sold through some kind of program. I assume the number is even higher in the U.S.\\nTim Cook: I don't want to get into revealing specifics in the different carriers. But generally speaking, I would think that it would be quite easy to find a promotion on a phone, provided you're hooking up to a service and either switching services, carriers or upgrading your phone at the same carrier. I think both of those cases today that you can find promotions out there, and I would expect that you'd be able to do that in the December time frame as well.\\nOperator: Our next question is from Aaron Rakers with Wells Fargo.\\nAaron Rakers: I have two as well. So first of all, I just want to kind of ask Tim. Strategically, as we think about the Services growth and kind of the content expansion behind that, I'm curious if you could help us maybe appreciate what you've seen from a sporting perspective in terms of the engagement with MLS, the engagement with Major League Baseball, and how strategically you're thinking about expansion in sports as a key driver of Services growth going forward.\\nTim Cook: We're focused on original content, as you know, with TV+. And so we're all about giving great storytellers the venue to tell great stories and hopefully get us all to think a little deeper. And sport is a part of that because sport is the ultimate original story. And for MLS, we're -- we could not be happier with how the partnership is going. It's clearly in the early days, but we are beating our expectation in terms of subscribers, and the fact that Messi went to Inter Miami helped us out there a bit. And so we're very excited about it.\\nAaron Rakers: Yes. And as a quick follow-up, I'm just curious, an update on -- you mentioned in your prepared remarks the continued growth that you've seen in India. I'm curious how we think about that market opportunity looking forward. Is there anything that you see evolving that could accelerate the opportunity for iPhone in that large mobile market?\\nTim Cook: We did hit a June quarter revenue record in India, and we grew strong double digits. We also opened our first 2 retail stores during the quarter. And it's -- of course, it's early going currently, but they're currently beating our expectation in terms of how they're doing. We continue to work on building out the channel and putting more investment in our direct-to-consumer offers as well. And so I think if you look at it, it's the second largest smartphone market in the world. And it's -- so we ought to be doing really well there. And where I'm really pleased with our growth there, we're still -- we still have a very, very modest and low share in the smartphone market. And so I think that it's a huge opportunity for us. And we're putting the -- all of our energies in making that occur.\\nOperator: Our next question comes from Sidney Ho with Deutsche Bank.\\nSidney Ho: Your -- I just wanted to ask about the AI side of things. Your strategy on AI seems quite different than many of your peers, at least you don't talk too much about that, how much you invest in it. Maybe you can elaborate a little bit on that. But related to that, how do you see your investment in this area turning into financial performance in the future? Is it mainly through faster upgrade cycle, maybe higher ASP? Or are you thinking about maybe additional services that you can capitalize on that? And then I have a follow-up.\\nTim Cook: If you take a step back, we view AI and machine learning as core fundamental technologies that are integral to virtually every product that we build. And so if you think about WWDC in June, we announced some features that will be coming in iOS 17 this fall, like Personal Voice and Live Voicemail. Previously, we had announced lifesaving features like fall detection and crash detection and ECG. None of these features that I just mentioned and many, many more would be possible without AI and machine learning. And so it's absolutely critical to us. And of course, we've been doing research across a wide range of AI technologies, including generative AI for years. We're going to continue investing and innovating and responsibly advancing our products with these technologies with the goal of enriching people's lives. And so that's what it's all about for us. And as you know, we tend to announce things as they come to market, and that's our MO, and I'd like to stick to that.\\nSidney Ho: Okay. That's fair. Maybe as a follow-up is related to -- you talked about WWDC, where you actually introduced Vision Pro there. Clearly, a very big announcement there. How should we think about the revenue ramp related to the Vision Pro? Is there any catalysts that we should be thinking about that will drive an inflection of that product?\\nTim Cook: Yes. There's enormous excitement around the Vision Pro. We're excited internally. Everybody that's been through the demos are blown away, whether you're talking about press or analysts or developers. We are now shipping units to the developer community for them to begin working on their apps. And we're looking forward to shipping early next year. And so we could not be more excited with that. I'm using the product daily. And so we're not going to forecast revenues and so forth on the call today, but we're very excited about it.\\nOperator: We will take our last question from Krish Sankar with TD Cowen.\\nKrish Sankar: I have two of them as well. Number one, on iPhone, Tim, you mentioned about the record number of switchers in the quarter. I'm kind of curious how to think about, given the weak macro and consumer spending, how is the replacement cycle for iPhone? Is it similar, longer, shorter versus prior years? And can you talk a little bit about the demand linearity of iPhone during the June quarter? And then I have a follow-up.\\nTim Cook: Switchers were a very key part of our iPhone results for the quarter. We did set a record. We set a record in Greater China, in particular, and it was at the heart of our results there. And we continue to try to convince more and more people to switch because of our -- the experience and the ecosystem and -- that we can offer them. And so I think switching is a huge opportunity for us. In terms of the upgrade cycle and so forth, it's very difficult to estimate real time what is going on with the upgrade cycle. I would say, if you think about the iPhone results year-over-year, you have to think about the SE announcement in the year ago quarter, the iPhone SE announcement in the year ago quarter. And so that provides a bit of a headwind on the comp. But as Luca said, as he talked about how we're viewing Q4, the September quarter, we see iPhone accelerating in Q4.\\nKrish Sankar: Got it. Very helpful, Tim. And then my final question is on your retail stores, you obviously have a very large retail footprint and many of your stores seem to have been open for over a year now. How is the foot traffic there? And how do you think about sales or the retail trends in the June quarter and implications for the back half of this year on a seasonality basis?\\nTim Cook: I'm sorry, are you talking about our retail stores?\\nKrish Sankar: Yes, yes, your retail stores.\\nTim Cook: Yes. The -- if you look at retail, it's a key part of our go-to-market approach, and it will be so key and such a competitive advantage with Vision Pro. It will give us the opportunity to launch a new product and demo to many people in the stores. And so it has many advantages in it. And we continue to roll out more stores. As you know, we just opened 2 in India last quarter. We're -- there's still a lot of countries out there that don't have Apple stores that we would like to go into. And so we continue to see it as a key part of how we go to market and love the experience that we can provide customers there.\\nSaori Casey: A replay of today's call will be available for two weeks on Apple Podcasts, at a webcast of apple.com/investor and via telephone. The number for the telephone replay is 866-583-1035. Please enter the confirmation code 2553017, followed by the pound sign. These replays will be available by approximately 5 p.m. Pacific Time today. Members of the press with additional questions can contact Josh Rosenstock at 408-862-1142. Financial analysts can contact me, Saori Casey, with additional questions at 408-974-3123 while Suhasini Chandramouli is on her maternity leave. Thank you again for joining us.\\nOperator: Once again, this does conclude today's conference. We do appreciate your participation.\", 'metadata': {'ticker': 'AAPL', 'quarter': 'Q3', 'date_time': '2023-08-03 21:47:09', 'speakers_list': ['Michael Ng', 'Luca Maestri', 'Saori Casey', 'Harsh Kumar', 'Sidney Ho', 'Aaron Rakers', 'Operator', 'Tim Cook', 'Amit Daryanani', 'Wamsi Mohan', 'Erik Woodring', 'Shannon Cross', 'David Vogt', 'Krish Sankar']}}\n"
54
+ ]
55
+ }
56
+ ],
57
+ "source": [
58
+ "print(docs)"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": 6,
64
+ "metadata": {},
65
+ "outputs": [
66
+ {
67
+ "name": "stdout",
68
+ "output_type": "stream",
69
+ "text": [
70
+ "Operator: Good day, and welcome to the Apple Q3 Fiscal Year 2023 Earnings Conference Call. Today's call is being recorded. At this time, for opening remarks and introductions, I would like to turn the call over to Saori Casey, Vice President of Finance. Please go ahead.\n",
71
+ "Saori Casey: Thank you. Good afternoon, and thank you for joining us. Speaking first today is Apple's CEO, Tim Cook; and he'll be followed by CFO, Luca Maestri. After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including, without limitation, those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation and future business outlook, including the potential impact of macroeconomic conditions on the company's business and the results of operations. These statements involve risks and uncertainties that may cause actual results or trends to differ materially from our forecast. For more information, please refer to the risk factors discussed in Apple's most recently filed annual report on Form 10-K and the Form 8-K filed with the SEC today, along with the associated press release. Apple assumes no obligation to update any forward-looking statements, which speak only as of the date they are made. I'd now like to turn the call over to Tim for introductory remarks.\n",
72
+ "Tim Cook: Thank you, Saori. Good afternoon, everyone, and thanks for joining us. Today, Apple is reporting revenue of $81.8 billion for the June quarter, better than our expectations. We continued to see strong results in emerging markets, driven by robust sales of iPhone with June quarter total revenue records in India, Indonesia, Mexico, the Philippines, Poland, Saudi Arabia, Turkey and the UAE. We set June quarter records in a number of other countries as well, including France, the Netherlands and Austria. And we set an all-time revenue record in Services driven by more than $1 billion paid subscriptions. We continued to face an uneven macroeconomic environment, including nearly 4 percentage points of foreign exchange headwinds. On a constant currency basis, we grew compared to the prior year's quarter in aggregate and in the majority of markets we track. We continue to manage deliberately and innovate relentlessly, and we are driven by the sense of possibility those efforts inspire. To that end, before I turn to the quarter in more detail, I want to take a moment to acknowledge the unprecedented innovations we were proud to announce at our Worldwide Developers Conference. In addition to extraordinary new Macs and incredible updates to our software platforms, we had the chance to introduce the world to spatial computing. We were so pleased to share the revolutionary Apple Vision Pro with the world, a bold new product unlike anything else created before. Apple Vision Pro is a marvel of engineering, built on decades of innovation only possible at Apple. It is the most advanced personal electronic device ever created, and we've been thrilled by the reaction from press, analysts, developers and content creators who've had the chance to try it. We can't wait to get it into customers' hands early next year. Now let me share more with you on our June quarter results beginning with iPhone. iPhone revenue came in at $39.7 billion for the quarter, down 2% from the year ago quarter's record performance. On a constant currency basis, iPhone revenue grew, and we had a June quarter record for switchers, reflecting the popularity of the iPhone lineup. iPhone 14 customers continue to praise the exceptional battery life and essential health and safety features, while iPhone 14 Plus users are loving the new larger screen size. And with Dynamic Island, Always-On display and the most powerful camera system ever in an iPhone, the iPhone 14 Pro lineup is our best ever. Turning to Mac. We recorded $6.8 billion in revenue, down 7% year-over-year. We are proud to have completed the transition of our entire Mac lineup to run exclusively on Apple silicon. We are also excited to have introduced the new 15-inch MacBook Air during the quarter, the world's best 15-inch laptop and one of the best Macs we've ever made. And we launched 2 new powerhouses in computing, Mac Studio with M2 Max and M2 Ultra and Mac Pro with M2 Ultra, which are the most powerful Macs we've ever made. iPad revenue was $5.8 billion for the June quarter, down 20% year-over-year, in part due to a difficult compare because of the timing of the iPad Air launch last year. Customers are loving iPad's versatility and exceptional value. There was a great deal of excitement from creatives when we brought Final Cut Pro and Logic Pro to iPad this spring. And with the back-to-school season in full swing, iPad has the power to help students tackle the toughest assignments. Across Wearables, Home and Accessories, revenue was $8.3 billion, up 2% year-over-year and in line with our expectations. Packed with features to empower users to live a healthier life, Apple Watch and Apple Watch Ultra continue to help people take the next step on their wellness journey. As I mentioned earlier, last quarter, we held our biggest and most exciting WWDC yet. We were thrilled to welcome developers from across the globe to Apple Park, both in person and virtually, and to share some stunning new announcements with the world. In addition to Apple Vision Pro and the new Macs that we introduced, we had the chance to reveal some truly remarkable new innovations to our software platforms. From exciting new features like Live Voicemail and StandBy in iOS 17, to new tools for users to work, play and personalize their experience in macOS Sonoma and iPadOS 17, to a fresh design and new workout capabilities in watchOS 10, there's so much coming later this year to empower users to get more out of their devices, and we think they're going to instantly love these new features. It was also an exciting quarter for Services where revenue reached $21.2 billion and saw a sequential acceleration to an 8% year-over-year increase, better than we expected. We set an all-time revenue record for total services and in a number of categories, including video, AppleCare, cloud and payment services. Since we introduced Apple Pay almost a decade ago, customers have been loving how easy it is to make purchases online, in apps and in stores. We're also pleased to see Apple Card build on the success of Apple Pay. Designed with our users' financial health in mind, Apple Card has become one of the most successful credit card programs in the U.S. with award-winning customer satisfaction. And this spring, we introduced a new high-yield savings account for Apple Card customers, which has become incredibly popular, with customers already making more than $10 billion in deposits. Meanwhile, Apple TV+ continues to provide a spectacular showcase of imaginative storytelling. Recently, fans welcomed new series like Hijack and Silo as well as returning fan favorites like Foundation and The Afterparty. In the few years since its launch, Apple TV+ has earned more than 1,500 nominations and 370 wins. That includes the 54 Emmy Award nominations across 13 titles that Apple TV+ received last month. It's also been an exciting time for sports on Apple TV+. Soccer legend Lionel Messi made his debut with Major League Soccer last month, and fans all over the world tuned in with MLS Season Pass. We are excited about our MLS partnership, and we're thrilled to see Messi suiting up with Inter Miami. And just in time for summer concert season, Apple Music launched new discovery features celebrating live music, including venue guides in Apple Maps and set lists from tours of major artists. These new features and others join a lineup of updates coming later this year to make Services more powerful, more useful and more fun than ever. Everything we do is in service of our customers, and retail is where we bring the best of Apple. During the quarter, we opened the Apple Store online in Vietnam, and we're excited to connect with more customers there. We also redesigned our first-ever Apple Store located in Tysons Corner, Northern Virginia, with inclusive, innovative and sustainable design enhancements. We opened a beautiful new store beneath our new London headquarters in the historic Battersea Power Station. And the performance of the stores we opened in India this spring exceeded our initial expectations. With every product we create, every feature we develop and every interaction we share with our customers, we lead with the values we stand for. We believe in creating technology that serves all of humanity, which is why accessibility has always been a core value that we embed in everything we do. On Global Accessibility Awareness Day, we unveiled some extraordinary new tools for cognitive, vision, hearing and mobile accessibility that will be available later this year, including Assistive Access, which distills apps to their most essential features, and Personal Voice, which allows users to create a synthesized voice that sounds just like them. Building technology and service of our customers also means protecting their privacy, which we believe is a fundamental human right. That's why we were pleased to announce major updates to Safari Private Browsing, Communication Safety and Lockdown Mode to further safeguard our users. And as part of our efforts to build a better world, we announced that we've more than doubled our initial commitment to our Racial Equity and Justice Initiative to more than $200 million. We will continue to do our part to support education, economic empowerment and criminal justice reform work. And while supporting efforts to advance equity and opportunity, we continue to build a culture of belonging at Apple and a workforce that reflects the communities we serve. Through our environmental work, we're making strides in our commitment to leave the world better than we found it. Last month, Apple joined with global nonprofit Acumen in a new effort to improve livelihoods in India through clean energy innovation, and we are as committed as ever to our Apple 2030 goal to be carbon neutral across our entire supply chain and the life cycle of our products. We've long held that education is the great equalizer. With that in mind, we're expanding Apple Learning Coach, a free professional learning program that teaches educators how to get more out of Apple technology in the classroom. Today, we welcome more than 1,900 educators across the U.S. to the program. By the end of the year, we'll offer Apple Learning Coach in 12 more countries. As we're connecting with teachers, we're also celebrating the graduations of students at our app developer academies around the world. From Detroit, to Naples, to Riyadh and more, we're excited to watch these talented developers embark on careers in coding and find ways to make a positive difference in their communities. Apple remains a champion of innovation, a company fueled by boundless creativity, driven by a deep sense of mission and guided by the unshakable belief that a great idea can change the world. Looking ahead, we'll continue to manage for the long term, always pushing the limits of what's possible and always putting the customer at the center of everything we do. With that, I'll turn it over to Luca.\n",
73
+ "Luca Maestri: Thank you, Tim, and good afternoon, everyone. Revenue for the June quarter was $81.8 billion, down 1% from last year and better than our expectations despite nearly 4 percentage points of negative impact from foreign exchange. On a constant currency basis, our revenue grew year-over-year in total and in the majority of the markets we track. We set June quarter records in both Europe and Greater China and continue to see strong performance across our emerging markets driven by iPhone. Products revenue was $60.6 billion, down 4% from last year, as we faced FX headwinds and an uneven macroeconomic environment. However, our installed base reached an all-time high across all geographic segments, driven by a June quarter record for iPhone switchers and high new-to rates in Mac, iPad and Watch, coupled with very high levels of customer satisfaction and loyalty. Our Services revenue set an all-time record of $21.2 billion, up 8% year-over-year and grew double digits in constant currency. Our performance was strong around the world as we reach all-time Services revenue records in Americas and Europe and June quarter records in Greater China and rest of Asia Pacific. Company gross margin was 44.5%, a record level for the June quarter and up 20 basis points sequentially, driven by cost savings and favorable mix shift towards Services, partially offset by a seasonal loss of leverage. Products gross margin was 35.4%, down 130 basis points from last quarter due to seasonal loss of leverage and mix, partially offset by favorable costs. Services gross margin was 70.5%, decreasing 50 basis points sequentially. Operating expenses of $13.4 billion were below the low end of the guidance range we provided at the beginning of the quarter and decelerated from the March quarter. We continue to take a deliberate approach in managing our spend with strong focus on innovation and new product development. The results of these actions delivered net income of $19.9 billion, diluted earnings per share of $1.26, up 5% versus last year, and very strong operating cash flow of $26.4 billion. Let me now provide more detail for each of our revenue categories. iPhone revenue was $39.7 billion, down 2% year-over-year but grew on a constant currency basis. We set revenue records in several markets around the world, including an all-time record in India and June quarter records in Latin America, the Middle East and Africa, Indonesia, the Philippines, Italy, the Netherlands and the U.K. Our iPhone active installed base grew to a new all-time high, thanks to a June quarter record in switchers. This is a testament to our extremely high levels of customer satisfaction, which 451 Research recently measured at 98% for the iPhone 14 family in the U.S. Mac generated $6.8 billion in revenue, down 7% year-over-year. We continue to invest in our Mac portfolio. And this past quarter, we were pleased to complete the transition to Apple silicon for the entire lineup. This transition has driven both strong upgrade activity and a high number of new customers. In fact, almost half of Mac buyers during the quarter were new to the product. We also saw reported customer satisfaction of 96% for Mac in the U.S. iPad revenue was $5.8 billion, down 20% year-over-year and in line with our expectations. These results were driven by a difficult compare against the full quarter impact of the iPad Air launch in the prior year. At the same time, we continue to attract a large number of new customers to the iPad installed base with over half of the customers who purchased iPads during the quarter being new to the product. And the latest reports from 451 Research indicate customer satisfaction of 96% in the U.S. Wearables, Home and Accessories revenue was $8.3 billion, up 2% year-over-year, with a June quarter record in Greater China and strong performance in several emerging markets. We continue to see Apple Watch expand its reach with about 2/3 of customers purchasing an Apple Watch during the quarter being new to the product. And this is combined with very high levels of customer satisfaction, which was recently reported at 98% in the United States. Moving on to Services. We reached a new all-time revenue record of $21.2 billion with year-over-year growth accelerating sequentially to 8% and up double digits in constant currency. In addition to the all-time records Tim mentioned earlier, we also set June quarter records for advertising, App Store and Music. We are very pleased with our performance in Services, which is a direct reflection of our ecosystem's strength. First, our installed base of over 2 billion active devices continues to grow at a nice pace and establishes a solid foundation for the future expansion of our ecosystem. Second, we see increased customer engagement with our services. Both our transacting accounts and paid accounts grew double digits year-over-year, each reaching a new all-time high. Third, our paid subscriptions showed strong growth. This past quarter, we reached an important milestone and passed 1 billion paid subscriptions across the services on our platform, up 150 million during the last 12 months and nearly double the number of paid subscriptions we had only 3 years ago. And finally, we continue to improve the breadth and the quality of our current services. From 20 new games on Apple Arcade, to brand-new content on Apple TV+, to the launch of our high-yield savings account with Apple Card, our customers are loving these enhanced offerings. Turning to the enterprise market. Our customers are leveraging Apple products every day to help improve productivity and attract talent. Blackstone, a global investment management firm, is expanding its Apple footprint from their corporate iPhone fleet to now offering the MacBook Air powered by M2 to all of their corporate employees and portfolio companies. Gilead, a leading biopharmaceutical company, has deployed thousands of iPads globally to their sales team. Over the last 6 months, they have also doubled their Mac user base by making MacBook Air available to more employees with a focus on user experience and strong security. Let me now turn to our cash position and capital return program. We ended the quarter with over $166 billion in cash and marketable securities. We repaid $7.5 billion in maturing debt while issuing $5.2 billion of new debt and increasing commercial paper by $2 billion, leaving us with total debt of $109 billion. As a result, net cash was $57 billion at the end of the quarter. During the quarter, we returned over $24 billion to shareholders, including $3.8 billion in dividends and equivalents and $18 billion through open market repurchases of 103 million Apple shares. We continue to believe there is great value in our stock and maintain our target of reaching a net cash neutral position over time. As we move ahead into the September quarter, I'd like to review our outlook, which includes the types of forward-looking information that Saori referred to at the beginning of the call. We expect our September quarter year-over-year revenue performance to be similar to the June quarter, assuming that the macroeconomic outlook doesn't worsen from what we are projecting today for the current quarter. Foreign exchange will continue to be a headwind, and we expect a negative year-over-year revenue impact of over 2 percentage points. We expect iPhone and Services year-over-year performance to accelerate from the June quarter. Also, we expect the revenue for both Mac and iPad to decline by double digits year-over-year due to difficult compares, particularly on the Mac. For both products, we experienced supply disruptions from factory shutdowns in the June quarter a year ago and were able to fulfill significant pent-up demand in the year ago September quarter. We expect gross margin to be between 44% and 45%. We expect OpEx to be between $13.5 billion and $13.7 billion. We expect OI&E to be around negative $250 million, excluding any potential impact from the mark-to-market of minority investments, and our tax rate to be around 16%. Finally, today, our Board of Directors has declared a cash dividend of $0.24 per share of common stock payable on August 17, 2023, to shareholders of record as of August 14, 2023. With that, let's open the call to questions.\n",
74
+ "Saori Casey: Thank you, Luca. [Operator Instructions]. Operator, may we have the first question, please?\n",
75
+ "Operator: [Operator Instructions]. We will go ahead and take our first question from Shannon Cross with Credit Suisse.\n",
76
+ "Shannon Cross: Tim, you mentioned -- and actually, Luca, too, you mentioned an uneven macro environment during the quarter several times on the call. I'm wondering if you can talk on a geographic basis about some of the trends you're seeing in iPhone. I'm specifically wondering how demand is trending within...\n",
77
+ "Luca Maestri: Sure. Shannon, I'll answer it. I didn't get the end of your question.\n",
78
+ "Operator: I think she has dropped.\n",
79
+ "Luca Maestri: Okay. Well, let me answer the question for the part that I could follow. So on a geographic basis, we've had great performance for iPhone in emerging markets. We set June quarter records in many of the emerging markets. We grew in total double digits. And the performance was strong across the board in emerging markets from China, where our performance improved from minus 3% to plus 8% in the June quarter and we grew double digits in constant currency, to many other areas around the world from India, where, again, we set a June quarter record with very strong performance there, Indonesia, Southeast Asia, in general, Latin America, Middle East. And so it's been really good there. We -- also, as you can see from our geographic segments, we had a slight acceleration of performance in the Americas, primarily in the United States, but we declined there because the smartphone market has been in a decline for the last couple of quarters in the United States.\n",
80
+ "Shannon Cross: Sorry about that. I'm not sure why I cut off. In terms of gross margin, you were at the high end of the range [Technical Difficulty] and you guided to 45% at the high end, which is, I think, higher than I remember in 20 years of covering you. So how should we think about puts and takes of gross margin? And it seems like there's like a perfect storm of good things. So I just -- maybe if you can talk about how you're thinking about it more holistically.\n",
81
+ "Luca Maestri: I think you remember correctly, Shannon, because the 44.5% for the June quarter is an all-time record for us in June. We were up 20 basis points sequentially. It was driven by cost savings and a mix shift towards Services, which obviously helps company gross margins, partially offset by the seasonal loss of leverage. We have a commodity environment that is favorable to us. Our product mix is quite strong at this point. And so with the exception of foreign exchange, which continues to be a drag, and it was a significant drag on a year-over-year basis, yes, we are in a good position right now. We are in a good position for the June quarter. And as I mentioned, we expect similar level of gross margins for the same reasons, frankly, for the September quarter.\n",
82
+ "Operator: Our next question comes from Wamsi Mohan of Bank of America.\n",
83
+ "Wamsi Mohan: Luca, can you just give us a little more color around the guidance? Your overall revenue performance, you called out similar. Obviously, you absorbed a higher FX impact this quarter versus your guide. And you also noted Services acceleration. So just wondering, when you think about that comment on iPhone acceleration, is that on a reported basis? Is that constant currency basis? And is there something that's changing in terms of seasonality perhaps for you that is causing not as much step-up in product revenue as typical on a sequential basis? And I have a follow-up.\n",
84
+ "Luca Maestri: Yes. So all our comments are in reported currency, not in constant currency in relation to the outlook. And we said acceleration sequentially for iPhone and for Services. But we're also pointing out -- and this is where I think, Wamsi, you're referring to some seasonality issues. We also said that for Mac and iPad, we expect to decline double digits. And the reason for that is that we have a very difficult compare versus last year. You remember that a year ago, in the June quarter, we had factory shutdowns for both Mac and iPad. And so we were able to fill the pent-up demand from those shutdowns during the September quarter. So an unusual level of activity that we had a year ago. And so now, obviously, the compare is difficult. So we expect both iPad and Mac to be down double digits, which offset the acceleration that I mentioned for iPhone and Services.\n",
85
+ "Wamsi Mohan: Okay. And Tim, I was wondering if you could update us on what percent of iPhones are sold on some type of installment basis now versus full upfront payment on a global basis. And maybe some thoughts on if you expect similar promotional activity from carriers, especially in the U.S., that seem to be grappling with a lot of cash flow issues this particular year.\n",
86
+ "Luca Maestri: Wamsi, I'll take it. We've done a really good job over the last few years with affordability programs around the world directly in our direct channel and with our partners around the world. The majority of iPhones, at this point, are sold using some kind of a program, trade-ins, installments, some kind of financing. And that percentage, which again, it's well over 50%, is very similar across developed and emerging markets. We want to do more of that because we think it really helps reduce the affordability threshold for our products. And we think it is also one of the reasons why our product mix has been very strong during the last couple of cycles. So we will continue to push on that front.\n",
87
+ "Operator: Our next question is from David Vogt with UBS.\n",
88
+ "David Vogt: I just wanted to follow up on 2 points that both you, Tim, and Luca made about growth and maybe commodities. So just to be clear, I know you're talking about an acceleration in iPhone, but the comp is about 2 points easier from FX. So I just want to understand, is that on a like-for-like basis, excluding the currency improvement of about 2 points from the June quarter to the September quarter? And from a commodity perspective, I know last quarter, you talked about buying a lot of inventory at favorable prices, which was an incredibly smart strategy. Where do you sit today? And what's sort of the timing or the duration of that commodity sort of backlog that you have as we think about next quarter and the subsequent quarters? How far does that get you out into the future from this favorable cost dynamic?\n",
89
+ "Luca Maestri: Let me start again. I just want to be clear about the guidance, the outlook guidance that we provided. We're referring entirely to reported numbers. So they take into account the fact that we have a slight improvement in foreign exchange. So when I talk about similar performance, I refer to reported performance in the June quarter and then the reported performance in the September quarter. And again, we expect, on a reported basis, our iPhone performance to accelerate, our Services performance to accelerate, and iPad and Mac to decline double digits. On the commodity front, as I mentioned, the environment is favorable. We always make sure that we take advantage of the opportunities that are available in the market, and we will continue to do that going forward.\n",
90
+ "David Vogt: Luca, any sense of how long that gives you a run rate today based on what you currently have? Can you give us a sense for at least the short-term tailwind?\n",
91
+ "Luca Maestri: I don't want to speculate past the September quarter because that's the horizon where we provide guidance. And I've said that the guidance for September is 44% to 45%, which you know is historically very high. And so obviously, that reflects a favorable environment for us.\n",
92
+ "Operator: Our next question is from Erik Woodring with Morgan Stanley.\n",
93
+ "Erik Woodring: I have 2 as well. Maybe if we just start kind of big picture, Tim or Luca. I was wondering if you could just kind of share some incremental color on how you think the consumer is behaving today versus 90 days ago and maybe how that differs by region. Meaning, are there any signs that consumer is incrementally more willing to spend on things like consumer electronics? Or is there still relative caution in the market? Are there any regions where you're seeing more strength in the consumer? And how sustainable do you think some of that strength or weakness could be based on some of the KPIs you track? And then I have a follow-up.\n",
94
+ "Tim Cook: Yes. David, it's Tim. If you sort of step around the world, we did exceptionally well in emerging markets last quarter and even better on a constant currency basis. And so emerging markets were -- was a strength. If you look at China, in China, we went from a negative 3% in Q2 to a plus 8% in Q3. And so in China, we had an acceleration. If you look at the U.S., which is in the -- obviously in the Americas segment, it is the vast majority of what's in there, there was also a slight acceleration sequentially, although the Americas is still declining somewhat year-over-year, as you can see on the data sheet. The primary reason for that is that it's a challenging smartphone market in the U.S. currently. And then in Europe, Europe saw a record quarter and -- for the June quarter, a record. And so some really good signs in most places in the world.\n",
95
+ "Erik Woodring: Awesome. And then maybe, Luca, a question for you. I think it's been about 3 quarters now where we've seen OpEx either grow below historical seasonality or come in below your expectations. I think this is the first time we've seen R&D grow less than 10% year-over-year since fiscal 2Q 2007. So can you maybe just talk about some of the cost actions you're taking? And as you look forward, what are the indicators that you're really evaluating that would give you greater confidence in perhaps returning back to a more seasonal cadence of OpEx spending? Or is this just a new normal that we should be expecting? That's it for me.\n",
96
+ "Luca Maestri: Obviously, we look at the environment, and we know that this has been an uncertain period for the last few quarters. And so we decided to be deliberate in what we do in terms of controlling our spend, and there's many areas across the company that we're working on and we've been quite effective at slowing down the spend. We slowed down also the hiring within the company in several areas. And we're very pleased with our ability to decelerate some of the expense growth taking into account the overall macro situation. We will continue to manage deliberately. You can see that we continue to grow our R&D costs faster than the rest of the company. SG&A is actually growing at a much slower pace because obviously, our focus continues to be in innovation and product development, and we'll continue to do that.\n",
97
+ "Operator: Our next question is from Michael Ng with Goldman Sachs.\n",
98
+ "Michael Ng: I just have 2 questions as well. First, it was encouraging to see the Services outperformance in the quarter, up double digits on an FX-neutral basis, and more Services acceleration next quarter on a reported basis. I was just wondering if you could just talk a little bit more about key underlying drivers for the confidence in the Services acceleration next quarter, understanding that FX a little bit. But anything to call out as it relates to things in Apple Search Ads that's helping. You're obviously making a lot of investments in Apple TV+ between MLS and the Canal+ deal. So any thoughts there would be great.\n",
99
+ "Luca Maestri: Yes, Michael, you're correct. I mean clearly, we've seen an improvement in the June quarter, and we expect further improvement in the September quarter. In June, the performance was across the board. Tim and I mentioned we set records really across the board. We had all-time records in cloud, in video, in AppleCare, in payments and June quarter records in App Store, advertising and Music. So we saw improvement in all our Services categories. We think the situation will continue to improve as we go through September. And that's very positive because not only good for the financial results, but obviously, it shows a high level of engagement of our customers in the ecosystem, which is very important for us. And it's really the sum of all the things that I mentioned in my prepared remarks. It goes from the fact that our installed base continues to grow, so we've got a larger pool of customers, to the fact that our customers are more engaged as we have more transacting accounts and paid accounts on the ecosystem. And the subscriptions business is very healthy with growth of 150 million paid subscriptions just in the last 12 months. It's almost double to what we had 3 years ago. And of course, we are providing more and more content to our users. And so the combination of all these things gives us good confidence for September.\n",
100
+ "Michael Ng: Great. And just as a related follow-up, it's about the hardware installed base and Services ARPU. I was curious when you talked about the Services strength, you talked about the 2 billion-plus installed base. When you think about the opportunity to increase the Services ARPU, do you really think about it internally on a per-active-iPhone user basis or on a per-device basis? Said differently, I'm just curious where you think about -- whether you think there's an incremental opportunity for those users that have multiple devices. Do you really see a big Services ARPU uplift in that respect?\n",
101
+ "Luca Maestri: Well, we know that customers that own more than one device are typically more engaged in our ecosystem. And so obviously, they tend to also spend more on the Services front. I would say the biggest opportunity is that we know that there's a lot of customers that we have that are very familiar with our ecosystem. They are engaged in the ecosystem. But still today, they're using only the portion of the ecosystem that is free. And so we think that by offering better content and more content over time, we're going to be able to attract more of them as paid customers.\n",
102
+ "Operator: Our next question is from Amit Daryanani with Evercore.\n",
103
+ "Amit Daryanani: I have 2 as well. I guess, Luca, maybe if you can talk about Wearables a bit. The growth over there, I think, in constant currency was fairly impressive at plus 6%. Can you just touch on maybe what's driving that? And then how do we think about the Wearables segment heading into the September quarter? I know you talked about a bunch of other ones, but how do we think about Wearables into September as well?\n",
104
+ "Luca Maestri: Sorry, Amit, I didn't get the -- what are you referring to?\n",
105
+ "Amit Daryanani: Yes. Sorry. I was hoping you could talk a bit about the Wearables segment because the growth over there was fairly impressive. And then how do you think about it into September as well?\n",
106
+ "Luca Maestri: Yes. On the Wearables front, we had really good performance in Greater China. And that's, again, very important for us. It was a June quarter record for Greater China. Very important for us because, again, it shows that the engagement with the ecosystem in a market that is so important for us like China continues to grow. It means that there's more and more customers that are owning more than the iPhone. Also, we continue to grow the installed base of the category very quickly because, as I mentioned, 2/3 of every buyer of Apple Watch during the course of the June quarter was new to the product. And so that is all additive to the installed base. So it's just great to see that the AirPods continue to be a great success in the marketplace for us. And so things are moving in the right direction there. It's become a very large business for us in Wearables, Home and Accessories. The last 12 months, we've done $40 billion of business, which is nearly the size of a Fortune 100 company. So it's become very important, and it's allowed us to diversify both our revenues and our earnings.\n",
107
+ "Amit Daryanani: That's really helpful. And then if I could just follow up, the Europe growth, the growth in Europe at up 5% is totally notable as well. I think you have a few emerging markets that you put in Europe as well. But I would love to understand what's happening in Europe and if there's a way to think about sort of Western Europe or developed world versus emerging markets over there.\n",
108
+ "Luca Maestri: Yes. It's been very good, primarily on the emerging market side of Europe. We include India and the Middle East and Central and Eastern Europe into the Europe segment. But as we mentioned at the beginning of the call, we had a number of markets that did very well, like France, like Italy, the Netherlands, Austria. So it was a good quarter for Europe.\n",
109
+ "Operator: Our next question is from Harsh Kumar with Piper Sandler.\n",
110
+ "Harsh Kumar: I have one for Luca and then later on one for Tim. So Luca, for some time now, for many quarters, you've had a currency headwind or foreign exchange currency headwind. It's conceivable that as rates start to come down, hopefully next year that the dollar weakens. Could you take us through the mechanism of how that will work on your revenues and for your costs?\n",
111
+ "Luca Maestri: So we tend -- we try to hedge our foreign exchange exposures because we think it's the right approach for the company in terms of minimizing the volatility that necessarily happens from the movements of currencies. We cannot effectively hedge every single exposure around the world because in some cases, it is not possible. In other cases, it is prohibitively expensive. But we tend to cover all the major currency payers that we have. About 60% of our business is outside the United States. So it's a very, very large and, I would say, very effective hedging program. And so we set up these hedges, and they tend to roll over very regularly. And then we replace them with new hedges at the new spot rate. So the impact that we're going to have on revenue and cost will depend on where the spot rates are at different points in time. And therefore, because of the way the program works, tends to be a bit of a lag in both directions as the foreign exchange moves over time.\n",
112
+ "Harsh Kumar: Understood. Very helpful. And for Tim, Tim, historically, for the last many years, carriers in at least the U.S., which I think is your largest market for iPhone, have had programs to help folks upgrade, whether they give a cash rebate or you bring in your old phone, something like that. I was curious, as you get into your peak December quarter, if you're aware of these programs are in place. And the reason why I'm asking is I think earlier, you mentioned that more than 50% of your phones are sold through some kind of program. I assume the number is even higher in the U.S.\n",
113
+ "Tim Cook: I don't want to get into revealing specifics in the different carriers. But generally speaking, I would think that it would be quite easy to find a promotion on a phone, provided you're hooking up to a service and either switching services, carriers or upgrading your phone at the same carrier. I think both of those cases today that you can find promotions out there, and I would expect that you'd be able to do that in the December time frame as well.\n",
114
+ "Operator: Our next question is from Aaron Rakers with Wells Fargo.\n",
115
+ "Aaron Rakers: I have two as well. So first of all, I just want to kind of ask Tim. Strategically, as we think about the Services growth and kind of the content expansion behind that, I'm curious if you could help us maybe appreciate what you've seen from a sporting perspective in terms of the engagement with MLS, the engagement with Major League Baseball, and how strategically you're thinking about expansion in sports as a key driver of Services growth going forward.\n",
116
+ "Tim Cook: We're focused on original content, as you know, with TV+. And so we're all about giving great storytellers the venue to tell great stories and hopefully get us all to think a little deeper. And sport is a part of that because sport is the ultimate original story. And for MLS, we're -- we could not be happier with how the partnership is going. It's clearly in the early days, but we are beating our expectation in terms of subscribers, and the fact that Messi went to Inter Miami helped us out there a bit. And so we're very excited about it.\n",
117
+ "Aaron Rakers: Yes. And as a quick follow-up, I'm just curious, an update on -- you mentioned in your prepared remarks the continued growth that you've seen in India. I'm curious how we think about that market opportunity looking forward. Is there anything that you see evolving that could accelerate the opportunity for iPhone in that large mobile market?\n",
118
+ "Tim Cook: We did hit a June quarter revenue record in India, and we grew strong double digits. We also opened our first 2 retail stores during the quarter. And it's -- of course, it's early going currently, but they're currently beating our expectation in terms of how they're doing. We continue to work on building out the channel and putting more investment in our direct-to-consumer offers as well. And so I think if you look at it, it's the second largest smartphone market in the world. And it's -- so we ought to be doing really well there. And where I'm really pleased with our growth there, we're still -- we still have a very, very modest and low share in the smartphone market. And so I think that it's a huge opportunity for us. And we're putting the -- all of our energies in making that occur.\n",
119
+ "Operator: Our next question comes from Sidney Ho with Deutsche Bank.\n",
120
+ "Sidney Ho: Your -- I just wanted to ask about the AI side of things. Your strategy on AI seems quite different than many of your peers, at least you don't talk too much about that, how much you invest in it. Maybe you can elaborate a little bit on that. But related to that, how do you see your investment in this area turning into financial performance in the future? Is it mainly through faster upgrade cycle, maybe higher ASP? Or are you thinking about maybe additional services that you can capitalize on that? And then I have a follow-up.\n",
121
+ "Tim Cook: If you take a step back, we view AI and machine learning as core fundamental technologies that are integral to virtually every product that we build. And so if you think about WWDC in June, we announced some features that will be coming in iOS 17 this fall, like Personal Voice and Live Voicemail. Previously, we had announced lifesaving features like fall detection and crash detection and ECG. None of these features that I just mentioned and many, many more would be possible without AI and machine learning. And so it's absolutely critical to us. And of course, we've been doing research across a wide range of AI technologies, including generative AI for years. We're going to continue investing and innovating and responsibly advancing our products with these technologies with the goal of enriching people's lives. And so that's what it's all about for us. And as you know, we tend to announce things as they come to market, and that's our MO, and I'd like to stick to that.\n",
122
+ "Sidney Ho: Okay. That's fair. Maybe as a follow-up is related to -- you talked about WWDC, where you actually introduced Vision Pro there. Clearly, a very big announcement there. How should we think about the revenue ramp related to the Vision Pro? Is there any catalysts that we should be thinking about that will drive an inflection of that product?\n",
123
+ "Tim Cook: Yes. There's enormous excitement around the Vision Pro. We're excited internally. Everybody that's been through the demos are blown away, whether you're talking about press or analysts or developers. We are now shipping units to the developer community for them to begin working on their apps. And we're looking forward to shipping early next year. And so we could not be more excited with that. I'm using the product daily. And so we're not going to forecast revenues and so forth on the call today, but we're very excited about it.\n",
124
+ "Operator: We will take our last question from Krish Sankar with TD Cowen.\n",
125
+ "Krish Sankar: I have two of them as well. Number one, on iPhone, Tim, you mentioned about the record number of switchers in the quarter. I'm kind of curious how to think about, given the weak macro and consumer spending, how is the replacement cycle for iPhone? Is it similar, longer, shorter versus prior years? And can you talk a little bit about the demand linearity of iPhone during the June quarter? And then I have a follow-up.\n",
126
+ "Tim Cook: Switchers were a very key part of our iPhone results for the quarter. We did set a record. We set a record in Greater China, in particular, and it was at the heart of our results there. And we continue to try to convince more and more people to switch because of our -- the experience and the ecosystem and -- that we can offer them. And so I think switching is a huge opportunity for us. In terms of the upgrade cycle and so forth, it's very difficult to estimate real time what is going on with the upgrade cycle. I would say, if you think about the iPhone results year-over-year, you have to think about the SE announcement in the year ago quarter, the iPhone SE announcement in the year ago quarter. And so that provides a bit of a headwind on the comp. But as Luca said, as he talked about how we're viewing Q4, the September quarter, we see iPhone accelerating in Q4.\n",
127
+ "Krish Sankar: Got it. Very helpful, Tim. And then my final question is on your retail stores, you obviously have a very large retail footprint and many of your stores seem to have been open for over a year now. How is the foot traffic there? And how do you think about sales or the retail trends in the June quarter and implications for the back half of this year on a seasonality basis?\n",
128
+ "Tim Cook: I'm sorry, are you talking about our retail stores?\n",
129
+ "Krish Sankar: Yes, yes, your retail stores.\n",
130
+ "Tim Cook: Yes. The -- if you look at retail, it's a key part of our go-to-market approach, and it will be so key and such a competitive advantage with Vision Pro. It will give us the opportunity to launch a new product and demo to many people in the stores. And so it has many advantages in it. And we continue to roll out more stores. As you know, we just opened 2 in India last quarter. We're -- there's still a lot of countries out there that don't have Apple stores that we would like to go into. And so we continue to see it as a key part of how we go to market and love the experience that we can provide customers there.\n",
131
+ "Saori Casey: A replay of today's call will be available for two weeks on Apple Podcasts, at a webcast of apple.com/investor and via telephone. The number for the telephone replay is 866-583-1035. Please enter the confirmation code 2553017, followed by the pound sign. These replays will be available by approximately 5 p.m. Pacific Time today. Members of the press with additional questions can contact Josh Rosenstock at 408-862-1142. Financial analysts can contact me, Saori Casey, with additional questions at 408-974-3123 while Suhasini Chandramouli is on her maternity leave. Thank you again for joining us.\n",
132
+ "Operator: Once again, this does conclude today's conference. We do appreciate your participation.\n"
133
+ ]
134
+ }
135
+ ],
136
+ "source": [
137
+ "print(docs['text'])"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": 7,
143
+ "metadata": {},
144
+ "outputs": [
145
+ {
146
+ "name": "stdout",
147
+ "output_type": "stream",
148
+ "text": [
149
+ "{'ticker': 'AAPL', 'quarter': 'Q3', 'date_time': '2023-08-03 21:47:09', 'speakers_list': ['Michael Ng', 'Luca Maestri', 'Saori Casey', 'Harsh Kumar', 'Sidney Ho', 'Aaron Rakers', 'Operator', 'Tim Cook', 'Amit Daryanani', 'Wamsi Mohan', 'Erik Woodring', 'Shannon Cross', 'David Vogt', 'Krish Sankar']}\n"
150
+ ]
151
+ }
152
+ ],
153
+ "source": [
154
+ "print(docs['metadata'])"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": null,
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": []
163
+ }
164
+ ],
165
+ "metadata": {
166
+ "kernelspec": {
167
+ "display_name": "Python 3",
168
+ "language": "python",
169
+ "name": "python3"
170
+ },
171
+ "language_info": {
172
+ "codemirror_mode": {
173
+ "name": "ipython",
174
+ "version": 3
175
+ },
176
+ "file_extension": ".py",
177
+ "mimetype": "text/x-python",
178
+ "name": "python",
179
+ "nbconvert_exporter": "python",
180
+ "pygments_lexer": "ipython3",
181
+ "version": "3.10.12"
182
+ }
183
+ },
184
+ "nbformat": 4,
185
+ "nbformat_minor": 2
186
+ }
FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_News.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_SEC_Filings.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
FinNLP/docs/FinNLP/docs/jupyter/Data_Sources_Social_Media.ipynb ADDED
@@ -0,0 +1,2261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import sys\n",
10
+ "sys.path.append(\"../../FinNLP\")"
11
+ ]
12
+ },
13
+ {
14
+ "attachments": {},
15
+ "cell_type": "markdown",
16
+ "metadata": {},
17
+ "source": [
18
+ "### Eastmoney"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 2,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "from finnlp.data_sources.social_media.eastmoney_streaming import Eastmoney_Streaming"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 3,
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "pages = 3\n",
37
+ "stock = \"600519\""
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 4,
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "Downloading ... 0 1 2 "
50
+ ]
51
+ }
52
+ ],
53
+ "source": [
54
+ "downloader = Eastmoney_Streaming()\n",
55
+ "downloader.download_streaming_stock(stock, pages)"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 5,
61
+ "metadata": {},
62
+ "outputs": [
63
+ {
64
+ "data": {
65
+ "text/plain": [
66
+ "(241, 92)"
67
+ ]
68
+ },
69
+ "execution_count": 5,
70
+ "metadata": {},
71
+ "output_type": "execute_result"
72
+ }
73
+ ],
74
+ "source": [
75
+ "downloader.dataframe.shape"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": 6,
81
+ "metadata": {},
82
+ "outputs": [
83
+ {
84
+ "data": {
85
+ "text/html": [
86
+ "<div>\n",
87
+ "<style scoped>\n",
88
+ " .dataframe tbody tr th:only-of-type {\n",
89
+ " vertical-align: middle;\n",
90
+ " }\n",
91
+ "\n",
92
+ " .dataframe tbody tr th {\n",
93
+ " vertical-align: top;\n",
94
+ " }\n",
95
+ "\n",
96
+ " .dataframe thead th {\n",
97
+ " text-align: right;\n",
98
+ " }\n",
99
+ "</style>\n",
100
+ "<table border=\"1\" class=\"dataframe\">\n",
101
+ " <thead>\n",
102
+ " <tr style=\"text-align: right;\">\n",
103
+ " <th></th>\n",
104
+ " <th>post_id</th>\n",
105
+ " <th>post_title</th>\n",
106
+ " <th>stockbar_code</th>\n",
107
+ " <th>stockbar_name</th>\n",
108
+ " <th>stockbar_type</th>\n",
109
+ " <th>user_id</th>\n",
110
+ " <th>user_nickname</th>\n",
111
+ " <th>user_extendinfos</th>\n",
112
+ " <th>post_click_count</th>\n",
113
+ " <th>post_forward_count</th>\n",
114
+ " <th>...</th>\n",
115
+ " <th>relate_topic</th>\n",
116
+ " <th>zwpage_flag</th>\n",
117
+ " <th>source_post_comment_count</th>\n",
118
+ " <th>post_atuser</th>\n",
119
+ " <th>reply_list</th>\n",
120
+ " <th>content_type</th>\n",
121
+ " <th>repost_state</th>\n",
122
+ " <th>reptile_state</th>\n",
123
+ " <th>allow_likes_state</th>\n",
124
+ " <th>post_is_hot</th>\n",
125
+ " </tr>\n",
126
+ " </thead>\n",
127
+ " <tbody>\n",
128
+ " <tr>\n",
129
+ " <th>0</th>\n",
130
+ " <td>1324058647</td>\n",
131
+ " <td>贵州茅台:每股派25.911元 6月30日共计派发现金红利325.49亿元</td>\n",
132
+ " <td>600519</td>\n",
133
+ " <td>贵州茅台吧</td>\n",
134
+ " <td>100.0</td>\n",
135
+ " <td>7344113638256342</td>\n",
136
+ " <td>贵州茅台资讯</td>\n",
137
+ " <td>{'user_accreditinfos': None, 'deactive': '0', ...</td>\n",
138
+ " <td>3799</td>\n",
139
+ " <td>14</td>\n",
140
+ " <td>...</td>\n",
141
+ " <td>NaN</td>\n",
142
+ " <td>NaN</td>\n",
143
+ " <td>NaN</td>\n",
144
+ " <td>NaN</td>\n",
145
+ " <td>NaN</td>\n",
146
+ " <td>NaN</td>\n",
147
+ " <td>NaN</td>\n",
148
+ " <td>NaN</td>\n",
149
+ " <td>NaN</td>\n",
150
+ " <td>NaN</td>\n",
151
+ " </tr>\n",
152
+ " </tbody>\n",
153
+ "</table>\n",
154
+ "<p>1 rows × 92 columns</p>\n",
155
+ "</div>"
156
+ ],
157
+ "text/plain": [
158
+ " post_id post_title stockbar_code \\\n",
159
+ "0 1324058647 贵州茅台:每股派25.911元 6月30日共计派发现金红利325.49亿元 600519 \n",
160
+ "\n",
161
+ " stockbar_name stockbar_type user_id user_nickname \\\n",
162
+ "0 贵州茅台吧 100.0 7344113638256342 贵州茅台资讯 \n",
163
+ "\n",
164
+ " user_extendinfos post_click_count \\\n",
165
+ "0 {'user_accreditinfos': None, 'deactive': '0', ... 3799 \n",
166
+ "\n",
167
+ " post_forward_count ... relate_topic zwpage_flag \\\n",
168
+ "0 14 ... NaN NaN \n",
169
+ "\n",
170
+ " source_post_comment_count post_atuser reply_list content_type \\\n",
171
+ "0 NaN NaN NaN NaN \n",
172
+ "\n",
173
+ " repost_state reptile_state allow_likes_state post_is_hot \n",
174
+ "0 NaN NaN NaN NaN \n",
175
+ "\n",
176
+ "[1 rows x 92 columns]"
177
+ ]
178
+ },
179
+ "execution_count": 6,
180
+ "metadata": {},
181
+ "output_type": "execute_result"
182
+ }
183
+ ],
184
+ "source": [
185
+ "downloader.dataframe.head(1)"
186
+ ]
187
+ },
188
+ {
189
+ "cell_type": "code",
190
+ "execution_count": 10,
191
+ "metadata": {},
192
+ "outputs": [
193
+ {
194
+ "data": {
195
+ "text/html": [
196
+ "<div>\n",
197
+ "<style scoped>\n",
198
+ " .dataframe tbody tr th:only-of-type {\n",
199
+ " vertical-align: middle;\n",
200
+ " }\n",
201
+ "\n",
202
+ " .dataframe tbody tr th {\n",
203
+ " vertical-align: top;\n",
204
+ " }\n",
205
+ "\n",
206
+ " .dataframe thead th {\n",
207
+ " text-align: right;\n",
208
+ " }\n",
209
+ "</style>\n",
210
+ "<table border=\"1\" class=\"dataframe\">\n",
211
+ " <thead>\n",
212
+ " <tr style=\"text-align: right;\">\n",
213
+ " <th></th>\n",
214
+ " <th>post_title</th>\n",
215
+ " <th>user_nickname</th>\n",
216
+ " <th>stockbar_name</th>\n",
217
+ " <th>post_click_count</th>\n",
218
+ " <th>post_forward_count</th>\n",
219
+ " <th>post_comment_count</th>\n",
220
+ " <th>post_publish_time</th>\n",
221
+ " <th>post_last_time</th>\n",
222
+ " <th>post_display_time</th>\n",
223
+ " </tr>\n",
224
+ " </thead>\n",
225
+ " <tbody>\n",
226
+ " <tr>\n",
227
+ " <th>0</th>\n",
228
+ " <td>贵州茅台:每股派25.911元 6月30日共计派发现金红利325.49亿元</td>\n",
229
+ " <td>贵州茅台资讯</td>\n",
230
+ " <td>贵州茅台吧</td>\n",
231
+ " <td>3799</td>\n",
232
+ " <td>14</td>\n",
233
+ " <td>15</td>\n",
234
+ " <td>2023-06-25 22:17:50</td>\n",
235
+ " <td>2023-06-26 03:12:47</td>\n",
236
+ " <td>2023-06-25 22:17:50</td>\n",
237
+ " </tr>\n",
238
+ " <tr>\n",
239
+ " <th>1</th>\n",
240
+ " <td>贵州茅台:贵州茅台2022年年度权益分派实施公告</td>\n",
241
+ " <td>贵州茅台资讯</td>\n",
242
+ " <td>贵州茅台吧</td>\n",
243
+ " <td>6423</td>\n",
244
+ " <td>47</td>\n",
245
+ " <td>17</td>\n",
246
+ " <td>2023-06-25 15:32:42</td>\n",
247
+ " <td>2023-06-26 00:57:39</td>\n",
248
+ " <td>2023-06-26 00:00:00</td>\n",
249
+ " </tr>\n",
250
+ " <tr>\n",
251
+ " <th>2</th>\n",
252
+ " <td>将派发现金红利325.49亿元!贵州茅台上市以来累计分红超2000亿元</td>\n",
253
+ " <td>贵州茅台资讯</td>\n",
254
+ " <td>贵州茅台吧</td>\n",
255
+ " <td>460</td>\n",
256
+ " <td>1</td>\n",
257
+ " <td>0</td>\n",
258
+ " <td>2023-06-25 23:49:07</td>\n",
259
+ " <td>2023-06-25 23:49:07</td>\n",
260
+ " <td>2023-06-25 23:49:07</td>\n",
261
+ " </tr>\n",
262
+ " <tr>\n",
263
+ " <th>3</th>\n",
264
+ " <td>茅台冰淇淋悄然卖数亿 年轻市场真被抓住了吗</td>\n",
265
+ " <td>贵州茅台资讯</td>\n",
266
+ " <td>贵州茅台吧</td>\n",
267
+ " <td>2612</td>\n",
268
+ " <td>15</td>\n",
269
+ " <td>11</td>\n",
270
+ " <td>2023-06-24 07:03:53</td>\n",
271
+ " <td>2023-06-25 18:48:21</td>\n",
272
+ " <td>2023-06-24 07:03:53</td>\n",
273
+ " </tr>\n",
274
+ " <tr>\n",
275
+ " <th>4</th>\n",
276
+ " <td>白酒本周跌5.49%原因是什么?下周怎么看?</td>\n",
277
+ " <td>NaN</td>\n",
278
+ " <td>NaN</td>\n",
279
+ " <td>10197</td>\n",
280
+ " <td>4</td>\n",
281
+ " <td>25</td>\n",
282
+ " <td>2023-06-24 12:29:53</td>\n",
283
+ " <td>2023-06-25 23:12:49</td>\n",
284
+ " <td>2023-06-24 12:29:53</td>\n",
285
+ " </tr>\n",
286
+ " <tr>\n",
287
+ " <th>5</th>\n",
288
+ " <td>本周持仓与下周交易计划</td>\n",
289
+ " <td>满仓日记</td>\n",
290
+ " <td>财富号评论吧</td>\n",
291
+ " <td>547</td>\n",
292
+ " <td>2</td>\n",
293
+ " <td>1</td>\n",
294
+ " <td>2023-06-25 20:30:54</td>\n",
295
+ " <td>2023-06-26 03:19:08</td>\n",
296
+ " <td>2023-06-25 20:30:54</td>\n",
297
+ " </tr>\n",
298
+ " <tr>\n",
299
+ " <th>6</th>\n",
300
+ " <td>茅台酒的估值真的是高</td>\n",
301
+ " <td>菩萨小跟班888</td>\n",
302
+ " <td>贵州茅台吧</td>\n",
303
+ " <td>33</td>\n",
304
+ " <td>0</td>\n",
305
+ " <td>0</td>\n",
306
+ " <td>2023-06-26 03:02:14</td>\n",
307
+ " <td>2023-06-26 03:02:14</td>\n",
308
+ " <td>2023-06-26 03:02:14</td>\n",
309
+ " </tr>\n",
310
+ " <tr>\n",
311
+ " <th>7</th>\n",
312
+ " <td>茅台里面的资金估计要出来支持一些中小微企业政策导向[吃瓜]</td>\n",
313
+ " <td>菩萨小跟班888</td>\n",
314
+ " <td>贵州茅台吧</td>\n",
315
+ " <td>24</td>\n",
316
+ " <td>0</td>\n",
317
+ " <td>0</td>\n",
318
+ " <td>2023-06-26 01:50:12</td>\n",
319
+ " <td>2023-06-26 01:50:12</td>\n",
320
+ " <td>2023-06-26 01:50:12</td>\n",
321
+ " </tr>\n",
322
+ " <tr>\n",
323
+ " <th>8</th>\n",
324
+ " <td>每股市值收益率,还没有银行定期利息高呢。(远离泡沫浮云地震带)</td>\n",
325
+ " <td>章鱼帝的智慧</td>\n",
326
+ " <td>贵州茅台吧</td>\n",
327
+ " <td>33</td>\n",
328
+ " <td>0</td>\n",
329
+ " <td>1</td>\n",
330
+ " <td>2023-06-25 22:48:49</td>\n",
331
+ " <td>2023-06-26 01:20:04</td>\n",
332
+ " <td>2023-06-25 22:48:49</td>\n",
333
+ " </tr>\n",
334
+ " <tr>\n",
335
+ " <th>9</th>\n",
336
+ " <td>6月最后的倔强(浪潮信息,昆仑万维,鸿博股份)赛道复苏。</td>\n",
337
+ " <td>夏夏爱美丽</td>\n",
338
+ " <td>财富号评论吧</td>\n",
339
+ " <td>2459</td>\n",
340
+ " <td>0</td>\n",
341
+ " <td>34</td>\n",
342
+ " <td>2023-06-25 22:16:03</td>\n",
343
+ " <td>2023-06-26 00:45:53</td>\n",
344
+ " <td>2023-06-25 22:16:03</td>\n",
345
+ " </tr>\n",
346
+ " </tbody>\n",
347
+ "</table>\n",
348
+ "</div>"
349
+ ],
350
+ "text/plain": [
351
+ " post_title user_nickname stockbar_name \\\n",
352
+ "0 贵州茅台:每股派25.911元 6月30日共计派发现金红利325.49亿元 贵州茅台资讯 贵州茅台吧 \n",
353
+ "1 贵州茅台:贵州茅台2022年年度权益分派实施公告 贵州茅台资讯 贵州茅台吧 \n",
354
+ "2 将派发现金红利325.49亿元!贵州茅台上市以来累计分红超2000亿元 贵州茅台资讯 贵州茅台吧 \n",
355
+ "3 茅台冰淇淋悄然卖数亿 年轻市场真被抓住了吗 贵州茅台资讯 贵州茅台吧 \n",
356
+ "4 白酒本周跌5.49%原因是什么?下周怎么看? NaN NaN \n",
357
+ "5 本周持仓与下周交易计划 满仓日记 财富号评论吧 \n",
358
+ "6 茅台酒的估值真的是高 菩萨小跟班888 贵州茅台吧 \n",
359
+ "7 茅台里面的资金估计要出来支持一些中小微企业政策导向[吃瓜] 菩萨小跟班888 贵州茅台吧 \n",
360
+ "8 每股市值收益率,还没有银行定期利息高呢。(远离泡沫浮云地震带) 章鱼帝的智慧 贵州茅台吧 \n",
361
+ "9 6月最后的倔强(浪潮信息,昆仑万维,鸿博股份)赛道复苏。 夏夏爱美丽 财富号评论吧 \n",
362
+ "\n",
363
+ " post_click_count post_forward_count post_comment_count \\\n",
364
+ "0 3799 14 15 \n",
365
+ "1 6423 47 17 \n",
366
+ "2 460 1 0 \n",
367
+ "3 2612 15 11 \n",
368
+ "4 10197 4 25 \n",
369
+ "5 547 2 1 \n",
370
+ "6 33 0 0 \n",
371
+ "7 24 0 0 \n",
372
+ "8 33 0 1 \n",
373
+ "9 2459 0 34 \n",
374
+ "\n",
375
+ " post_publish_time post_last_time post_display_time \n",
376
+ "0 2023-06-25 22:17:50 2023-06-26 03:12:47 2023-06-25 22:17:50 \n",
377
+ "1 2023-06-25 15:32:42 2023-06-26 00:57:39 2023-06-26 00:00:00 \n",
378
+ "2 2023-06-25 23:49:07 2023-06-25 23:49:07 2023-06-25 23:49:07 \n",
379
+ "3 2023-06-24 07:03:53 2023-06-25 18:48:21 2023-06-24 07:03:53 \n",
380
+ "4 2023-06-24 12:29:53 2023-06-25 23:12:49 2023-06-24 12:29:53 \n",
381
+ "5 2023-06-25 20:30:54 2023-06-26 03:19:08 2023-06-25 20:30:54 \n",
382
+ "6 2023-06-26 03:02:14 2023-06-26 03:02:14 2023-06-26 03:02:14 \n",
383
+ "7 2023-06-26 01:50:12 2023-06-26 01:50:12 2023-06-26 01:50:12 \n",
384
+ "8 2023-06-25 22:48:49 2023-06-26 01:20:04 2023-06-25 22:48:49 \n",
385
+ "9 2023-06-25 22:16:03 2023-06-26 00:45:53 2023-06-25 22:16:03 "
386
+ ]
387
+ },
388
+ "execution_count": 10,
389
+ "metadata": {},
390
+ "output_type": "execute_result"
391
+ }
392
+ ],
393
+ "source": [
394
+ "selected_columns = [\"post_title\",\"user_nickname\", \"stockbar_name\" ,\"post_click_count\", \"post_forward_count\", \"post_comment_count\", \"post_publish_time\", \"post_last_time\", \"post_display_time\"]\n",
395
+ "downloader.dataframe[selected_columns].head(10)"
396
+ ]
397
+ },
398
+ {
399
+ "attachments": {},
400
+ "cell_type": "markdown",
401
+ "metadata": {},
402
+ "source": [
403
+ "### Facebook get cookies"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": null,
409
+ "metadata": {},
410
+ "outputs": [],
411
+ "source": [
412
+ "from selenium import webdriver\n",
413
+ "import json\n",
414
+ "\n",
415
+ "browser = webdriver.ChromiumEdge()\n",
416
+ "browser.get('https://www.facebook.com')\n"
417
+ ]
418
+ },
419
+ {
420
+ "attachments": {},
421
+ "cell_type": "markdown",
422
+ "metadata": {},
423
+ "source": [
424
+ "#### Please login your account in the brower"
425
+ ]
426
+ },
427
+ {
428
+ "cell_type": "code",
429
+ "execution_count": null,
430
+ "metadata": {},
431
+ "outputs": [],
432
+ "source": [
433
+ "cookies = browser.get_cookies() \n",
434
+ "with open(\"cookies.json\", \"w\", encoding=\"utf-8\") as cks:\n",
435
+ " json.dump(cookies, cks)"
436
+ ]
437
+ },
438
+ {
439
+ "attachments": {},
440
+ "cell_type": "markdown",
441
+ "metadata": {},
442
+ "source": [
443
+ "### Facebook"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": 2,
449
+ "metadata": {},
450
+ "outputs": [],
451
+ "source": [
452
+ "from finnlp.data_sources.social_media.facebook_streaming import Facebook_Streaming\n",
453
+ "import json"
454
+ ]
455
+ },
456
+ {
457
+ "cell_type": "code",
458
+ "execution_count": 4,
459
+ "metadata": {},
460
+ "outputs": [],
461
+ "source": [
462
+ "# load cookies\n",
463
+ "with open(\"cookies.json\", \"r\", encoding=\"utf-8\") as cks: \n",
464
+ " cookies = json.load(cks)"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": 5,
470
+ "metadata": {},
471
+ "outputs": [],
472
+ "source": [
473
+ "config = {\n",
474
+ " \"cookies\":cookies, \n",
475
+ " \"headless\": False,\n",
476
+ " \"stealth_path\":\"../../FinNLP/finnlp/data_sources/social_media/stealth.min.js\"\n",
477
+ " }\n",
478
+ "pages = 3\n",
479
+ "stock = \"AAPL\""
480
+ ]
481
+ },
482
+ {
483
+ "cell_type": "code",
484
+ "execution_count": 6,
485
+ "metadata": {},
486
+ "outputs": [
487
+ {
488
+ "name": "stderr",
489
+ "output_type": "stream",
490
+ "text": [
491
+ "100%|██████████| 17/17 [00:57<00:00, 3.37s/it]"
492
+ ]
493
+ },
494
+ {
495
+ "name": "stdout",
496
+ "output_type": "stream",
497
+ "text": [
498
+ "Only support the first page now!\n"
499
+ ]
500
+ },
501
+ {
502
+ "name": "stderr",
503
+ "output_type": "stream",
504
+ "text": [
505
+ "\n"
506
+ ]
507
+ }
508
+ ],
509
+ "source": [
510
+ "downloader = Facebook_Streaming(config)\n",
511
+ "downloader.download_streaming_stock(stock, pages)"
512
+ ]
513
+ },
514
+ {
515
+ "cell_type": "code",
516
+ "execution_count": 7,
517
+ "metadata": {},
518
+ "outputs": [
519
+ {
520
+ "data": {
521
+ "text/html": [
522
+ "<div>\n",
523
+ "<style scoped>\n",
524
+ " .dataframe tbody tr th:only-of-type {\n",
525
+ " vertical-align: middle;\n",
526
+ " }\n",
527
+ "\n",
528
+ " .dataframe tbody tr th {\n",
529
+ " vertical-align: top;\n",
530
+ " }\n",
531
+ "\n",
532
+ " .dataframe thead th {\n",
533
+ " text-align: right;\n",
534
+ " }\n",
535
+ "</style>\n",
536
+ "<table border=\"1\" class=\"dataframe\">\n",
537
+ " <thead>\n",
538
+ " <tr style=\"text-align: right;\">\n",
539
+ " <th></th>\n",
540
+ " <th>content</th>\n",
541
+ " <th>date</th>\n",
542
+ " </tr>\n",
543
+ " </thead>\n",
544
+ " <tbody>\n",
545
+ " <tr>\n",
546
+ " <th>6</th>\n",
547
+ " <td>AAPL (Stock Market)</td>\n",
548
+ " <td>4h󰞋󰙷</td>\n",
549
+ " </tr>\n",
550
+ " <tr>\n",
551
+ " <th>8</th>\n",
552
+ " <td>Day 7\\nIntroduction to Stock Market\\nWhat you ...</td>\n",
553
+ " <td>6h󰞋󰙷</td>\n",
554
+ " </tr>\n",
555
+ " <tr>\n",
556
+ " <th>11</th>\n",
557
+ " <td>US: AAPL new high and breakout from two-year r...</td>\n",
558
+ " <td>1d󰞋󰙷</td>\n",
559
+ " </tr>\n",
560
+ " </tbody>\n",
561
+ "</table>\n",
562
+ "</div>"
563
+ ],
564
+ "text/plain": [
565
+ " content date\n",
566
+ "6 AAPL (Stock Market) 4h󰞋󰙷\n",
567
+ "8 Day 7\\nIntroduction to Stock Market\\nWhat you ... 6h󰞋󰙷\n",
568
+ "11 US: AAPL new high and breakout from two-year r... 1d󰞋󰙷"
569
+ ]
570
+ },
571
+ "execution_count": 7,
572
+ "metadata": {},
573
+ "output_type": "execute_result"
574
+ }
575
+ ],
576
+ "source": [
577
+ "downloader.dataframe"
578
+ ]
579
+ },
580
+ {
581
+ "attachments": {},
582
+ "cell_type": "markdown",
583
+ "metadata": {},
584
+ "source": [
585
+ "### Xueqiu / 雪球"
586
+ ]
587
+ },
588
+ {
589
+ "cell_type": "code",
590
+ "execution_count": 2,
591
+ "metadata": {},
592
+ "outputs": [],
593
+ "source": [
594
+ "from finnlp.data_sources.social_media.xueqiu_streaming import Xueqiu_Streaming"
595
+ ]
596
+ },
597
+ {
598
+ "cell_type": "code",
599
+ "execution_count": 3,
600
+ "metadata": {},
601
+ "outputs": [],
602
+ "source": [
603
+ "pages = 3\n",
604
+ "stock = \"茅台\""
605
+ ]
606
+ },
607
+ {
608
+ "cell_type": "code",
609
+ "execution_count": 4,
610
+ "metadata": {},
611
+ "outputs": [
612
+ {
613
+ "name": "stdout",
614
+ "output_type": "stream",
615
+ "text": [
616
+ "Downloading ... 0 1 2 "
617
+ ]
618
+ }
619
+ ],
620
+ "source": [
621
+ "downloader = Xueqiu_Streaming()\n",
622
+ "downloader.download_streaming_stock(stock, pages)"
623
+ ]
624
+ },
625
+ {
626
+ "cell_type": "code",
627
+ "execution_count": 5,
628
+ "metadata": {},
629
+ "outputs": [
630
+ {
631
+ "data": {
632
+ "text/plain": [
633
+ "(29, 53)"
634
+ ]
635
+ },
636
+ "execution_count": 5,
637
+ "metadata": {},
638
+ "output_type": "execute_result"
639
+ }
640
+ ],
641
+ "source": [
642
+ "downloader.dataframe.shape"
643
+ ]
644
+ },
645
+ {
646
+ "cell_type": "code",
647
+ "execution_count": 6,
648
+ "metadata": {},
649
+ "outputs": [
650
+ {
651
+ "data": {
652
+ "text/html": [
653
+ "<div>\n",
654
+ "<style scoped>\n",
655
+ " .dataframe tbody tr th:only-of-type {\n",
656
+ " vertical-align: middle;\n",
657
+ " }\n",
658
+ "\n",
659
+ " .dataframe tbody tr th {\n",
660
+ " vertical-align: top;\n",
661
+ " }\n",
662
+ "\n",
663
+ " .dataframe thead th {\n",
664
+ " text-align: right;\n",
665
+ " }\n",
666
+ "</style>\n",
667
+ "<table border=\"1\" class=\"dataframe\">\n",
668
+ " <thead>\n",
669
+ " <tr style=\"text-align: right;\">\n",
670
+ " <th></th>\n",
671
+ " <th>blocked</th>\n",
672
+ " <th>blocking</th>\n",
673
+ " <th>canEdit</th>\n",
674
+ " <th>commentId</th>\n",
675
+ " <th>controversial</th>\n",
676
+ " <th>created_at</th>\n",
677
+ " <th>description</th>\n",
678
+ " <th>donate_count</th>\n",
679
+ " <th>donate_snowcoin</th>\n",
680
+ " <th>editable</th>\n",
681
+ " <th>...</th>\n",
682
+ " <th>truncated_by</th>\n",
683
+ " <th>type</th>\n",
684
+ " <th>user</th>\n",
685
+ " <th>user_id</th>\n",
686
+ " <th>view_count</th>\n",
687
+ " <th>firstImg</th>\n",
688
+ " <th>pic_sizes</th>\n",
689
+ " <th>edited_at</th>\n",
690
+ " <th>quote_cards</th>\n",
691
+ " <th>symbol_id</th>\n",
692
+ " </tr>\n",
693
+ " </thead>\n",
694
+ " <tbody>\n",
695
+ " <tr>\n",
696
+ " <th>0</th>\n",
697
+ " <td>False</td>\n",
698
+ " <td>False</td>\n",
699
+ " <td>True</td>\n",
700
+ " <td>0</td>\n",
701
+ " <td>False</td>\n",
702
+ " <td>2023-06-25 12:15:07</td>\n",
703
+ " <td>&lt;a href=\"http://xueqiu.com/S/SZ000860\" target=...</td>\n",
704
+ " <td>0</td>\n",
705
+ " <td>0</td>\n",
706
+ " <td>True</td>\n",
707
+ " <td>...</td>\n",
708
+ " <td>0</td>\n",
709
+ " <td>2</td>\n",
710
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
711
+ " <td>8364804052</td>\n",
712
+ " <td>471</td>\n",
713
+ " <td>NaN</td>\n",
714
+ " <td>NaN</td>\n",
715
+ " <td>NaN</td>\n",
716
+ " <td>NaN</td>\n",
717
+ " <td>NaN</td>\n",
718
+ " </tr>\n",
719
+ " </tbody>\n",
720
+ "</table>\n",
721
+ "<p>1 rows × 53 columns</p>\n",
722
+ "</div>"
723
+ ],
724
+ "text/plain": [
725
+ " blocked blocking canEdit commentId controversial created_at \\\n",
726
+ "0 False False True 0 False 2023-06-25 12:15:07 \n",
727
+ "\n",
728
+ " description donate_count \\\n",
729
+ "0 <a href=\"http://xueqiu.com/S/SZ000860\" target=... 0 \n",
730
+ "\n",
731
+ " donate_snowcoin editable ... truncated_by type \\\n",
732
+ "0 0 True ... 0 2 \n",
733
+ "\n",
734
+ " user user_id view_count \\\n",
735
+ "0 {'allow_all_stock': False, 'block_status': 0, ... 8364804052 471 \n",
736
+ "\n",
737
+ " firstImg pic_sizes edited_at quote_cards symbol_id \n",
738
+ "0 NaN NaN NaN NaN NaN \n",
739
+ "\n",
740
+ "[1 rows x 53 columns]"
741
+ ]
742
+ },
743
+ "execution_count": 6,
744
+ "metadata": {},
745
+ "output_type": "execute_result"
746
+ }
747
+ ],
748
+ "source": [
749
+ "downloader.dataframe.head(1)"
750
+ ]
751
+ },
752
+ {
753
+ "cell_type": "code",
754
+ "execution_count": 7,
755
+ "metadata": {},
756
+ "outputs": [
757
+ {
758
+ "data": {
759
+ "text/html": [
760
+ "<div>\n",
761
+ "<style scoped>\n",
762
+ " .dataframe tbody tr th:only-of-type {\n",
763
+ " vertical-align: middle;\n",
764
+ " }\n",
765
+ "\n",
766
+ " .dataframe tbody tr th {\n",
767
+ " vertical-align: top;\n",
768
+ " }\n",
769
+ "\n",
770
+ " .dataframe thead th {\n",
771
+ " text-align: right;\n",
772
+ " }\n",
773
+ "</style>\n",
774
+ "<table border=\"1\" class=\"dataframe\">\n",
775
+ " <thead>\n",
776
+ " <tr style=\"text-align: right;\">\n",
777
+ " <th></th>\n",
778
+ " <th>created_at</th>\n",
779
+ " <th>description</th>\n",
780
+ " <th>title</th>\n",
781
+ " <th>text</th>\n",
782
+ " <th>target</th>\n",
783
+ " <th>source</th>\n",
784
+ " <th>user</th>\n",
785
+ " </tr>\n",
786
+ " </thead>\n",
787
+ " <tbody>\n",
788
+ " <tr>\n",
789
+ " <th>0</th>\n",
790
+ " <td>2023-06-25 12:15:07</td>\n",
791
+ " <td>&lt;a href=\"http://xueqiu.com/S/SZ000860\" target=...</td>\n",
792
+ " <td></td>\n",
793
+ " <td>&lt;a href=\"http://xueqiu.com/S/SZ000860\" target=...</td>\n",
794
+ " <td>/8364804052/253976413</td>\n",
795
+ " <td>Android</td>\n",
796
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
797
+ " </tr>\n",
798
+ " <tr>\n",
799
+ " <th>1</th>\n",
800
+ " <td>2023-06-25 12:14:22</td>\n",
801
+ " <td>&lt;a href=\"http://xueqiu.com/S/SH600519\" target=...</td>\n",
802
+ " <td></td>\n",
803
+ " <td>&lt;p&gt;&lt;a href=\"http://xueqiu.com/S/SH600519\" targ...</td>\n",
804
+ " <td>/4631817224/253976390</td>\n",
805
+ " <td>雪球</td>\n",
806
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
807
+ " </tr>\n",
808
+ " <tr>\n",
809
+ " <th>2</th>\n",
810
+ " <td>2023-06-25 12:13:01</td>\n",
811
+ " <td>...提高。白酒:五粮液、迎驾贡酒、&lt;span class='highlight'&gt;茅台&lt;/...</td>\n",
812
+ " <td>6.25 赛道和白马的机会</td>\n",
813
+ " <td>&lt;p&gt;这个假期外围的环境不太好,已经是基本共识了。明天开盘大A承压低开也基本是一致预期。这么...</td>\n",
814
+ " <td>/4322952939/253976335</td>\n",
815
+ " <td>雪球</td>\n",
816
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
817
+ " </tr>\n",
818
+ " <tr>\n",
819
+ " <th>3</th>\n",
820
+ " <td>2023-06-25 11:58:55</td>\n",
821
+ " <td>茅台发生活费了</td>\n",
822
+ " <td></td>\n",
823
+ " <td>茅台发生活费了&lt;br/&gt;&lt;img class=\"ke_img\" src=\"https://x...</td>\n",
824
+ " <td>/4653939718/253975764</td>\n",
825
+ " <td>iPhone</td>\n",
826
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
827
+ " </tr>\n",
828
+ " <tr>\n",
829
+ " <th>4</th>\n",
830
+ " <td>2023-06-25 11:54:05</td>\n",
831
+ " <td>...业绩及股价,形成正反馈。当年&lt;span class='highlight'&gt;茅台&lt;/s...</td>\n",
832
+ " <td>持仓吹票,共同致富</td>\n",
833
+ " <td>&lt;p&gt;&lt;a href=\"http://xueqiu.com/k?q=%23%E4%BB%A5...</td>\n",
834
+ " <td>/8113901491/253975613</td>\n",
835
+ " <td>Android</td>\n",
836
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
837
+ " </tr>\n",
838
+ " <tr>\n",
839
+ " <th>5</th>\n",
840
+ " <td>2023-06-25 11:50:11</td>\n",
841
+ " <td>微酒酒业快讯,6月25日,酒业新闻一览-·企业动态·-01&lt;span class='high...</td>\n",
842
+ " <td>6.25:&lt;span class='highlight'&gt;茅&lt;/span&gt;&lt;span cla...</td>\n",
843
+ " <td>&lt;p&gt;&lt;img class=\"ke_img\" src=\"https://xqimg.imed...</td>\n",
844
+ " <td>/3615583399/253975485</td>\n",
845
+ " <td>雪球</td>\n",
846
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
847
+ " </tr>\n",
848
+ " <tr>\n",
849
+ " <th>6</th>\n",
850
+ " <td>2023-06-25 11:48:42</td>\n",
851
+ " <td>&lt;a href=\"http://xueqiu.com/S/SH603027\" target=...</td>\n",
852
+ " <td></td>\n",
853
+ " <td>&lt;a href=\"http://xueqiu.com/S/SH603027\" target=...</td>\n",
854
+ " <td>/2659542807/253975430</td>\n",
855
+ " <td>iPhone</td>\n",
856
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
857
+ " </tr>\n",
858
+ " <tr>\n",
859
+ " <th>7</th>\n",
860
+ " <td>2023-06-25 11:45:54</td>\n",
861
+ " <td>段永平说:我不鼓励小散投&lt;a href=\"https://xueqiu.com/S/AAPL...</td>\n",
862
+ " <td></td>\n",
863
+ " <td>段永平说:我不鼓励小散投&lt;a href=\"https://xueqiu.com/S/AAPL...</td>\n",
864
+ " <td>/9456980430/253975338</td>\n",
865
+ " <td>iPhone</td>\n",
866
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
867
+ " </tr>\n",
868
+ " <tr>\n",
869
+ " <th>8</th>\n",
870
+ " <td>2023-06-25 11:33:01</td>\n",
871
+ " <td>泸州老窖酒传统酿制技艺第二十三代传承人·国窖1573·曾娜大师鉴藏版,端午举杯小酒。&lt;br/...</td>\n",
872
+ " <td></td>\n",
873
+ " <td>泸州老窖酒传统酿制技艺第二十三代传承人·国窖1573·曾娜大师鉴藏版,端午举杯小酒。&lt;br/...</td>\n",
874
+ " <td>/9893982765/253974916</td>\n",
875
+ " <td>Android</td>\n",
876
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
877
+ " </tr>\n",
878
+ " <tr>\n",
879
+ " <th>9</th>\n",
880
+ " <td>2023-06-25 11:25:44</td>\n",
881
+ " <td>...酒店中,白酒卖得最好的往往不是&lt;span class='highlight'&gt;茅台&lt;/...</td>\n",
882
+ " <td>街头没生意的烟酒店,为什么不会倒闭</td>\n",
883
+ " <td>&lt;p&gt;&lt;img class=\"ke_img\" src=\"https://xqimg.imed...</td>\n",
884
+ " <td>/5497522856/253974630</td>\n",
885
+ " <td>雪球</td>\n",
886
+ " <td>{'allow_all_stock': False, 'block_status': 0, ...</td>\n",
887
+ " </tr>\n",
888
+ " </tbody>\n",
889
+ "</table>\n",
890
+ "</div>"
891
+ ],
892
+ "text/plain": [
893
+ " created_at description \\\n",
894
+ "0 2023-06-25 12:15:07 <a href=\"http://xueqiu.com/S/SZ000860\" target=... \n",
895
+ "1 2023-06-25 12:14:22 <a href=\"http://xueqiu.com/S/SH600519\" target=... \n",
896
+ "2 2023-06-25 12:13:01 ...提高。白酒:五粮液、迎驾贡酒、<span class='highlight'>茅台</... \n",
897
+ "3 2023-06-25 11:58:55 茅台发生活费了 \n",
898
+ "4 2023-06-25 11:54:05 ...业绩及股价,形成正反馈。当年<span class='highlight'>茅台</s... \n",
899
+ "5 2023-06-25 11:50:11 微酒酒业快讯,6月25日,酒业新闻一览-·企业动态·-01<span class='high... \n",
900
+ "6 2023-06-25 11:48:42 <a href=\"http://xueqiu.com/S/SH603027\" target=... \n",
901
+ "7 2023-06-25 11:45:54 段永平说:我不鼓励小散投<a href=\"https://xueqiu.com/S/AAPL... \n",
902
+ "8 2023-06-25 11:33:01 泸州老窖酒传统酿制技艺第二十三代传承人·国窖1573·曾娜大师鉴藏版,端午举杯小酒。<br/... \n",
903
+ "9 2023-06-25 11:25:44 ...酒店中,白酒卖得最好的往往不是<span class='highlight'>茅台</... \n",
904
+ "\n",
905
+ " title \\\n",
906
+ "0 \n",
907
+ "1 \n",
908
+ "2 6.25 赛道和白马的机会 \n",
909
+ "3 \n",
910
+ "4 持仓吹票,共同致富 \n",
911
+ "5 6.25:<span class='highlight'>茅</span><span cla... \n",
912
+ "6 \n",
913
+ "7 \n",
914
+ "8 \n",
915
+ "9 街头没生意的烟酒店,为什么不会倒闭 \n",
916
+ "\n",
917
+ " text target \\\n",
918
+ "0 <a href=\"http://xueqiu.com/S/SZ000860\" target=... /8364804052/253976413 \n",
919
+ "1 <p><a href=\"http://xueqiu.com/S/SH600519\" targ... /4631817224/253976390 \n",
920
+ "2 <p>这个假期外围的环境不太好,已经是基本共识了。明天开盘大A承压低开也基本是一致预期。这么... /4322952939/253976335 \n",
921
+ "3 茅台发生活费了<br/><img class=\"ke_img\" src=\"https://x... /4653939718/253975764 \n",
922
+ "4 <p><a href=\"http://xueqiu.com/k?q=%23%E4%BB%A5... /8113901491/253975613 \n",
923
+ "5 <p><img class=\"ke_img\" src=\"https://xqimg.imed... /3615583399/253975485 \n",
924
+ "6 <a href=\"http://xueqiu.com/S/SH603027\" target=... /2659542807/253975430 \n",
925
+ "7 段永平说:我不鼓励小散投<a href=\"https://xueqiu.com/S/AAPL... /9456980430/253975338 \n",
926
+ "8 泸州老窖酒传统酿制技艺第二十三代传承人·国窖1573·曾娜大师鉴藏版,端午举杯小酒。<br/... /9893982765/253974916 \n",
927
+ "9 <p><img class=\"ke_img\" src=\"https://xqimg.imed... /5497522856/253974630 \n",
928
+ "\n",
929
+ " source user \n",
930
+ "0 Android {'allow_all_stock': False, 'block_status': 0, ... \n",
931
+ "1 雪球 {'allow_all_stock': False, 'block_status': 0, ... \n",
932
+ "2 雪球 {'allow_all_stock': False, 'block_status': 0, ... \n",
933
+ "3 iPhone {'allow_all_stock': False, 'block_status': 0, ... \n",
934
+ "4 Android {'allow_all_stock': False, 'block_status': 0, ... \n",
935
+ "5 雪球 {'allow_all_stock': False, 'block_status': 0, ... \n",
936
+ "6 iPhone {'allow_all_stock': False, 'block_status': 0, ... \n",
937
+ "7 iPhone {'allow_all_stock': False, 'block_status': 0, ... \n",
938
+ "8 Android {'allow_all_stock': False, 'block_status': 0, ... \n",
939
+ "9 雪球 {'allow_all_stock': False, 'block_status': 0, ... "
940
+ ]
941
+ },
942
+ "execution_count": 7,
943
+ "metadata": {},
944
+ "output_type": "execute_result"
945
+ }
946
+ ],
947
+ "source": [
948
+ "selected_columns = [\"created_at\", \"description\", \"title\", \"text\", \"target\", \"source\", \"user\"]\n",
949
+ "downloader.dataframe[selected_columns].head(10)"
950
+ ]
951
+ },
952
+ {
953
+ "attachments": {},
954
+ "cell_type": "markdown",
955
+ "metadata": {},
956
+ "source": [
957
+ "### Stocktwits Streaming"
958
+ ]
959
+ },
960
+ {
961
+ "cell_type": "code",
962
+ "execution_count": 6,
963
+ "metadata": {},
964
+ "outputs": [],
965
+ "source": [
966
+ "from finnlp.data_sources.social_media.stocktwits_streaming import Stocktwits_Streaming"
967
+ ]
968
+ },
969
+ {
970
+ "cell_type": "code",
971
+ "execution_count": 9,
972
+ "metadata": {},
973
+ "outputs": [],
974
+ "source": [
975
+ "pages = 3\n",
976
+ "stock = \"AAPL\"\n",
977
+ "config = {\n",
978
+ " \"use_proxy\": \"us_free\",\n",
979
+ " \"max_retry\": 5,\n",
980
+ " \"proxy_pages\": 2,\n",
981
+ "}"
982
+ ]
983
+ },
984
+ {
985
+ "cell_type": "code",
986
+ "execution_count": 10,
987
+ "metadata": {},
988
+ "outputs": [
989
+ {
990
+ "name": "stderr",
991
+ "output_type": "stream",
992
+ "text": [
993
+ "Checking ips: 100%|██████████| 30/30 [01:07<00:00, 2.24s/it]\n"
994
+ ]
995
+ },
996
+ {
997
+ "name": "stdout",
998
+ "output_type": "stream",
999
+ "text": [
1000
+ "Get proxy ips: 30.\n",
1001
+ "Usable proxy ips: 29.\n"
1002
+ ]
1003
+ },
1004
+ {
1005
+ "name": "stderr",
1006
+ "output_type": "stream",
1007
+ "text": [
1008
+ "100%|██████████| 3/3 [00:05<00:00, 1.68s/it]\n"
1009
+ ]
1010
+ }
1011
+ ],
1012
+ "source": [
1013
+ "downloader = Stocktwits_Streaming(config)\n",
1014
+ "downloader.download_streaming_stock(stock, pages)"
1015
+ ]
1016
+ },
1017
+ {
1018
+ "cell_type": "code",
1019
+ "execution_count": 9,
1020
+ "metadata": {},
1021
+ "outputs": [
1022
+ {
1023
+ "data": {
1024
+ "text/html": [
1025
+ "<div>\n",
1026
+ "<style scoped>\n",
1027
+ " .dataframe tbody tr th:only-of-type {\n",
1028
+ " vertical-align: middle;\n",
1029
+ " }\n",
1030
+ "\n",
1031
+ " .dataframe tbody tr th {\n",
1032
+ " vertical-align: top;\n",
1033
+ " }\n",
1034
+ "\n",
1035
+ " .dataframe thead th {\n",
1036
+ " text-align: right;\n",
1037
+ " }\n",
1038
+ "</style>\n",
1039
+ "<table border=\"1\" class=\"dataframe\">\n",
1040
+ " <thead>\n",
1041
+ " <tr style=\"text-align: right;\">\n",
1042
+ " <th></th>\n",
1043
+ " <th>id</th>\n",
1044
+ " <th>body</th>\n",
1045
+ " <th>created_at</th>\n",
1046
+ " <th>user</th>\n",
1047
+ " <th>source</th>\n",
1048
+ " <th>symbols</th>\n",
1049
+ " <th>prices</th>\n",
1050
+ " <th>mentioned_users</th>\n",
1051
+ " <th>entities</th>\n",
1052
+ " <th>liked_by_self</th>\n",
1053
+ " <th>reshared_by_self</th>\n",
1054
+ " <th>links</th>\n",
1055
+ " <th>reshare_message</th>\n",
1056
+ " <th>conversation</th>\n",
1057
+ " <th>likes</th>\n",
1058
+ " <th>reshares</th>\n",
1059
+ " <th>network</th>\n",
1060
+ " </tr>\n",
1061
+ " </thead>\n",
1062
+ " <tbody>\n",
1063
+ " <tr>\n",
1064
+ " <th>0</th>\n",
1065
+ " <td>522005335</td>\n",
1066
+ " <td>NANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL...</td>\n",
1067
+ " <td>2023-04-07T15:24:22Z</td>\n",
1068
+ " <td>{'id': 4744627, 'username': 'JavierAyala', 'na...</td>\n",
1069
+ " <td>{'id': 1149, 'title': 'StockTwits for iOS', 'u...</td>\n",
1070
+ " <td>[{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '...</td>\n",
1071
+ " <td>[{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '...</td>\n",
1072
+ " <td>[]</td>\n",
1073
+ " <td>{'sentiment': None}</td>\n",
1074
+ " <td>False</td>\n",
1075
+ " <td>False</td>\n",
1076
+ " <td>NaN</td>\n",
1077
+ " <td>NaN</td>\n",
1078
+ " <td>NaN</td>\n",
1079
+ " <td>NaN</td>\n",
1080
+ " <td>NaN</td>\n",
1081
+ " <td>NaN</td>\n",
1082
+ " </tr>\n",
1083
+ " <tr>\n",
1084
+ " <th>1</th>\n",
1085
+ " <td>522004768</td>\n",
1086
+ " <td>$AAPL $SPY \\n \\nhttps://amp.scmp.com/news/chi...</td>\n",
1087
+ " <td>2023-04-07T15:17:43Z</td>\n",
1088
+ " <td>{'id': 6330207, 'username': 'PlainFacts_2121',...</td>\n",
1089
+ " <td>{'id': 2269, 'title': 'StockTwits Web', 'url':...</td>\n",
1090
+ " <td>[{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '...</td>\n",
1091
+ " <td>[{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '...</td>\n",
1092
+ " <td>[]</td>\n",
1093
+ " <td>{'sentiment': None}</td>\n",
1094
+ " <td>False</td>\n",
1095
+ " <td>False</td>\n",
1096
+ " <td>[{'title': 'China officials who abused health ...</td>\n",
1097
+ " <td>NaN</td>\n",
1098
+ " <td>NaN</td>\n",
1099
+ " <td>NaN</td>\n",
1100
+ " <td>NaN</td>\n",
1101
+ " <td>NaN</td>\n",
1102
+ " </tr>\n",
1103
+ " </tbody>\n",
1104
+ "</table>\n",
1105
+ "</div>"
1106
+ ],
1107
+ "text/plain": [
1108
+ " id body \\\n",
1109
+ "0 522005335 NANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL... \n",
1110
+ "1 522004768 $AAPL $SPY \\n \\nhttps://amp.scmp.com/news/chi... \n",
1111
+ "\n",
1112
+ " created_at user \\\n",
1113
+ "0 2023-04-07T15:24:22Z {'id': 4744627, 'username': 'JavierAyala', 'na... \n",
1114
+ "1 2023-04-07T15:17:43Z {'id': 6330207, 'username': 'PlainFacts_2121',... \n",
1115
+ "\n",
1116
+ " source \\\n",
1117
+ "0 {'id': 1149, 'title': 'StockTwits for iOS', 'u... \n",
1118
+ "1 {'id': 2269, 'title': 'StockTwits Web', 'url':... \n",
1119
+ "\n",
1120
+ " symbols \\\n",
1121
+ "0 [{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '... \n",
1122
+ "1 [{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '... \n",
1123
+ "\n",
1124
+ " prices mentioned_users \\\n",
1125
+ "0 [{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '... [] \n",
1126
+ "1 [{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '... [] \n",
1127
+ "\n",
1128
+ " entities liked_by_self reshared_by_self \\\n",
1129
+ "0 {'sentiment': None} False False \n",
1130
+ "1 {'sentiment': None} False False \n",
1131
+ "\n",
1132
+ " links reshare_message \\\n",
1133
+ "0 NaN NaN \n",
1134
+ "1 [{'title': 'China officials who abused health ... NaN \n",
1135
+ "\n",
1136
+ " conversation likes reshares network \n",
1137
+ "0 NaN NaN NaN NaN \n",
1138
+ "1 NaN NaN NaN NaN "
1139
+ ]
1140
+ },
1141
+ "execution_count": 9,
1142
+ "metadata": {},
1143
+ "output_type": "execute_result"
1144
+ }
1145
+ ],
1146
+ "source": [
1147
+ "df = downloader.dataframe\n",
1148
+ "df.head(2)"
1149
+ ]
1150
+ },
1151
+ {
1152
+ "cell_type": "code",
1153
+ "execution_count": 10,
1154
+ "metadata": {},
1155
+ "outputs": [
1156
+ {
1157
+ "data": {
1158
+ "text/html": [
1159
+ "<div>\n",
1160
+ "<style scoped>\n",
1161
+ " .dataframe tbody tr th:only-of-type {\n",
1162
+ " vertical-align: middle;\n",
1163
+ " }\n",
1164
+ "\n",
1165
+ " .dataframe tbody tr th {\n",
1166
+ " vertical-align: top;\n",
1167
+ " }\n",
1168
+ "\n",
1169
+ " .dataframe thead th {\n",
1170
+ " text-align: right;\n",
1171
+ " }\n",
1172
+ "</style>\n",
1173
+ "<table border=\"1\" class=\"dataframe\">\n",
1174
+ " <thead>\n",
1175
+ " <tr style=\"text-align: right;\">\n",
1176
+ " <th></th>\n",
1177
+ " <th>created_at</th>\n",
1178
+ " <th>body</th>\n",
1179
+ " </tr>\n",
1180
+ " </thead>\n",
1181
+ " <tbody>\n",
1182
+ " <tr>\n",
1183
+ " <th>0</th>\n",
1184
+ " <td>2023-04-07T15:24:22Z</td>\n",
1185
+ " <td>NANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL...</td>\n",
1186
+ " </tr>\n",
1187
+ " <tr>\n",
1188
+ " <th>1</th>\n",
1189
+ " <td>2023-04-07T15:17:43Z</td>\n",
1190
+ " <td>$AAPL $SPY \\n \\nhttps://amp.scmp.com/news/chi...</td>\n",
1191
+ " </tr>\n",
1192
+ " <tr>\n",
1193
+ " <th>2</th>\n",
1194
+ " <td>2023-04-07T15:17:25Z</td>\n",
1195
+ " <td>$AAPL $GOOG $AMZN I took a Trump today. \\n\\nH...</td>\n",
1196
+ " </tr>\n",
1197
+ " <tr>\n",
1198
+ " <th>3</th>\n",
1199
+ " <td>2023-04-07T15:16:54Z</td>\n",
1200
+ " <td>$SPY $AAPL will take this baby down, time for ...</td>\n",
1201
+ " </tr>\n",
1202
+ " <tr>\n",
1203
+ " <th>4</th>\n",
1204
+ " <td>2023-04-07T15:11:37Z</td>\n",
1205
+ " <td>$SPY $3T it ALREADY DID - look at the pre-COV...</td>\n",
1206
+ " </tr>\n",
1207
+ " <tr>\n",
1208
+ " <th>5</th>\n",
1209
+ " <td>2023-04-07T15:10:29Z</td>\n",
1210
+ " <td>$AAPL $QQQ $STUDY We are on to the next one! A...</td>\n",
1211
+ " </tr>\n",
1212
+ " <tr>\n",
1213
+ " <th>6</th>\n",
1214
+ " <td>2023-04-07T15:06:00Z</td>\n",
1215
+ " <td>$AAPL was analyzed by 48 analysts. The buy con...</td>\n",
1216
+ " </tr>\n",
1217
+ " <tr>\n",
1218
+ " <th>7</th>\n",
1219
+ " <td>2023-04-07T14:54:29Z</td>\n",
1220
+ " <td>$AAPL both retiring. \\n \\nCraig....</td>\n",
1221
+ " </tr>\n",
1222
+ " <tr>\n",
1223
+ " <th>8</th>\n",
1224
+ " <td>2023-04-07T14:40:06Z</td>\n",
1225
+ " <td>$SPY $QQQ $TSLA $AAPL SPY 500 HAS STARTED🚀😍 BI...</td>\n",
1226
+ " </tr>\n",
1227
+ " <tr>\n",
1228
+ " <th>9</th>\n",
1229
+ " <td>2023-04-07T14:38:57Z</td>\n",
1230
+ " <td>Nancy 🩵 (Tim) $AAPL</td>\n",
1231
+ " </tr>\n",
1232
+ " </tbody>\n",
1233
+ "</table>\n",
1234
+ "</div>"
1235
+ ],
1236
+ "text/plain": [
1237
+ " created_at body\n",
1238
+ "0 2023-04-07T15:24:22Z NANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL...\n",
1239
+ "1 2023-04-07T15:17:43Z $AAPL $SPY \\n \\nhttps://amp.scmp.com/news/chi...\n",
1240
+ "2 2023-04-07T15:17:25Z $AAPL $GOOG $AMZN I took a Trump today. \\n\\nH...\n",
1241
+ "3 2023-04-07T15:16:54Z $SPY $AAPL will take this baby down, time for ...\n",
1242
+ "4 2023-04-07T15:11:37Z $SPY $3T it ALREADY DID - look at the pre-COV...\n",
1243
+ "5 2023-04-07T15:10:29Z $AAPL $QQQ $STUDY We are on to the next one! A...\n",
1244
+ "6 2023-04-07T15:06:00Z $AAPL was analyzed by 48 analysts. The buy con...\n",
1245
+ "7 2023-04-07T14:54:29Z $AAPL both retiring. \\n \\nCraig....\n",
1246
+ "8 2023-04-07T14:40:06Z $SPY $QQQ $TSLA $AAPL SPY 500 HAS STARTED🚀😍 BI...\n",
1247
+ "9 2023-04-07T14:38:57Z Nancy 🩵 (Tim) $AAPL"
1248
+ ]
1249
+ },
1250
+ "execution_count": 10,
1251
+ "metadata": {},
1252
+ "output_type": "execute_result"
1253
+ }
1254
+ ],
1255
+ "source": [
1256
+ "selected_columns = [\"created_at\", \"body\"]\n",
1257
+ "df[selected_columns].head(10)"
1258
+ ]
1259
+ },
1260
+ {
1261
+ "attachments": {},
1262
+ "cell_type": "markdown",
1263
+ "metadata": {},
1264
+ "source": [
1265
+ "### Reddit Wallstreetbets Streaming"
1266
+ ]
1267
+ },
1268
+ {
1269
+ "cell_type": "code",
1270
+ "execution_count": 2,
1271
+ "metadata": {},
1272
+ "outputs": [],
1273
+ "source": [
1274
+ "from finnlp.data_sources.social_media.reddit_streaming import Reddit_Streaming"
1275
+ ]
1276
+ },
1277
+ {
1278
+ "cell_type": "code",
1279
+ "execution_count": 3,
1280
+ "metadata": {},
1281
+ "outputs": [],
1282
+ "source": [
1283
+ "pages = 3\n",
1284
+ "config = {\n",
1285
+ " # \"use_proxy\": \"us_free\",\n",
1286
+ " \"max_retry\": 5,\n",
1287
+ " \"proxy_pages\": 2,\n",
1288
+ "}"
1289
+ ]
1290
+ },
1291
+ {
1292
+ "cell_type": "code",
1293
+ "execution_count": 17,
1294
+ "metadata": {},
1295
+ "outputs": [
1296
+ {
1297
+ "name": "stderr",
1298
+ "output_type": "stream",
1299
+ "text": [
1300
+ "Downloading by pages...: 100%|██████████| 3/3 [00:08<00:00, 2.83s/it]\n"
1301
+ ]
1302
+ }
1303
+ ],
1304
+ "source": [
1305
+ "downloader = Reddit_Streaming(config)\n",
1306
+ "downloader.download_streaming_all(pages)"
1307
+ ]
1308
+ },
1309
+ {
1310
+ "cell_type": "code",
1311
+ "execution_count": 18,
1312
+ "metadata": {},
1313
+ "outputs": [
1314
+ {
1315
+ "data": {
1316
+ "text/html": [
1317
+ "<div>\n",
1318
+ "<style scoped>\n",
1319
+ " .dataframe tbody tr th:only-of-type {\n",
1320
+ " vertical-align: middle;\n",
1321
+ " }\n",
1322
+ "\n",
1323
+ " .dataframe tbody tr th {\n",
1324
+ " vertical-align: top;\n",
1325
+ " }\n",
1326
+ "\n",
1327
+ " .dataframe thead th {\n",
1328
+ " text-align: right;\n",
1329
+ " }\n",
1330
+ "</style>\n",
1331
+ "<table border=\"1\" class=\"dataframe\">\n",
1332
+ " <thead>\n",
1333
+ " <tr style=\"text-align: right;\">\n",
1334
+ " <th></th>\n",
1335
+ " <th>id</th>\n",
1336
+ " <th>numComments</th>\n",
1337
+ " <th>created</th>\n",
1338
+ " <th>score</th>\n",
1339
+ " <th>distinguishType</th>\n",
1340
+ " <th>isLocked</th>\n",
1341
+ " <th>isStickied</th>\n",
1342
+ " <th>thumbnail</th>\n",
1343
+ " <th>title</th>\n",
1344
+ " <th>author</th>\n",
1345
+ " <th>...</th>\n",
1346
+ " <th>postEventInfo</th>\n",
1347
+ " <th>predictionTournament</th>\n",
1348
+ " <th>reactedFrom</th>\n",
1349
+ " <th>removedBy</th>\n",
1350
+ " <th>removedByCategory</th>\n",
1351
+ " <th>subreddit</th>\n",
1352
+ " <th>suggestedCommentSort</th>\n",
1353
+ " <th>topAwardedType</th>\n",
1354
+ " <th>url</th>\n",
1355
+ " <th>whitelistStatus</th>\n",
1356
+ " </tr>\n",
1357
+ " </thead>\n",
1358
+ " <tbody>\n",
1359
+ " <tr>\n",
1360
+ " <th>0</th>\n",
1361
+ " <td>t3_12epaq0</td>\n",
1362
+ " <td>8</td>\n",
1363
+ " <td>1680881974000</td>\n",
1364
+ " <td>0</td>\n",
1365
+ " <td>None</td>\n",
1366
+ " <td>False</td>\n",
1367
+ " <td>False</td>\n",
1368
+ " <td>{'url': 'https://b.thumbs.redditmedia.com/W8hd...</td>\n",
1369
+ " <td>Y’all making me feel like spooderman</td>\n",
1370
+ " <td>ghostwholags</td>\n",
1371
+ " <td>...</td>\n",
1372
+ " <td>NaN</td>\n",
1373
+ " <td>NaN</td>\n",
1374
+ " <td>NaN</td>\n",
1375
+ " <td>NaN</td>\n",
1376
+ " <td>NaN</td>\n",
1377
+ " <td>NaN</td>\n",
1378
+ " <td>NaN</td>\n",
1379
+ " <td>NaN</td>\n",
1380
+ " <td>NaN</td>\n",
1381
+ " <td>NaN</td>\n",
1382
+ " </tr>\n",
1383
+ " <tr>\n",
1384
+ " <th>1</th>\n",
1385
+ " <td>t3_zr9v10</td>\n",
1386
+ " <td>0</td>\n",
1387
+ " <td>1671595782000</td>\n",
1388
+ " <td>2</td>\n",
1389
+ " <td>None</td>\n",
1390
+ " <td>True</td>\n",
1391
+ " <td>False</td>\n",
1392
+ " <td>{'url': 'https://b.thumbs.redditmedia.com/dJqb...</td>\n",
1393
+ " <td>Do you track your investments in a spreadsheet...</td>\n",
1394
+ " <td>sharesight</td>\n",
1395
+ " <td>...</td>\n",
1396
+ " <td>NaN</td>\n",
1397
+ " <td>NaN</td>\n",
1398
+ " <td>NaN</td>\n",
1399
+ " <td>NaN</td>\n",
1400
+ " <td>NaN</td>\n",
1401
+ " <td>NaN</td>\n",
1402
+ " <td>NaN</td>\n",
1403
+ " <td>NaN</td>\n",
1404
+ " <td>NaN</td>\n",
1405
+ " <td>NaN</td>\n",
1406
+ " </tr>\n",
1407
+ " </tbody>\n",
1408
+ "</table>\n",
1409
+ "<p>2 rows × 100 columns</p>\n",
1410
+ "</div>"
1411
+ ],
1412
+ "text/plain": [
1413
+ " id numComments created score distinguishType isLocked \\\n",
1414
+ "0 t3_12epaq0 8 1680881974000 0 None False \n",
1415
+ "1 t3_zr9v10 0 1671595782000 2 None True \n",
1416
+ "\n",
1417
+ " isStickied thumbnail \\\n",
1418
+ "0 False {'url': 'https://b.thumbs.redditmedia.com/W8hd... \n",
1419
+ "1 False {'url': 'https://b.thumbs.redditmedia.com/dJqb... \n",
1420
+ "\n",
1421
+ " title author ... \\\n",
1422
+ "0 Y’all making me feel like spooderman ghostwholags ... \n",
1423
+ "1 Do you track your investments in a spreadsheet... sharesight ... \n",
1424
+ "\n",
1425
+ " postEventInfo predictionTournament reactedFrom removedBy removedByCategory \\\n",
1426
+ "0 NaN NaN NaN NaN NaN \n",
1427
+ "1 NaN NaN NaN NaN NaN \n",
1428
+ "\n",
1429
+ " subreddit suggestedCommentSort topAwardedType url whitelistStatus \n",
1430
+ "0 NaN NaN NaN NaN NaN \n",
1431
+ "1 NaN NaN NaN NaN NaN \n",
1432
+ "\n",
1433
+ "[2 rows x 100 columns]"
1434
+ ]
1435
+ },
1436
+ "execution_count": 18,
1437
+ "metadata": {},
1438
+ "output_type": "execute_result"
1439
+ }
1440
+ ],
1441
+ "source": [
1442
+ "df = downloader.dataframe\n",
1443
+ "df.head(2)"
1444
+ ]
1445
+ },
1446
+ {
1447
+ "cell_type": "code",
1448
+ "execution_count": 20,
1449
+ "metadata": {},
1450
+ "outputs": [
1451
+ {
1452
+ "data": {
1453
+ "text/html": [
1454
+ "<div>\n",
1455
+ "<style scoped>\n",
1456
+ " .dataframe tbody tr th:only-of-type {\n",
1457
+ " vertical-align: middle;\n",
1458
+ " }\n",
1459
+ "\n",
1460
+ " .dataframe tbody tr th {\n",
1461
+ " vertical-align: top;\n",
1462
+ " }\n",
1463
+ "\n",
1464
+ " .dataframe thead th {\n",
1465
+ " text-align: right;\n",
1466
+ " }\n",
1467
+ "</style>\n",
1468
+ "<table border=\"1\" class=\"dataframe\">\n",
1469
+ " <thead>\n",
1470
+ " <tr style=\"text-align: right;\">\n",
1471
+ " <th></th>\n",
1472
+ " <th>id</th>\n",
1473
+ " <th>numComments</th>\n",
1474
+ " <th>created</th>\n",
1475
+ " <th>score</th>\n",
1476
+ " <th>distinguishType</th>\n",
1477
+ " <th>isLocked</th>\n",
1478
+ " <th>isStickied</th>\n",
1479
+ " <th>thumbnail</th>\n",
1480
+ " <th>title</th>\n",
1481
+ " <th>author</th>\n",
1482
+ " <th>...</th>\n",
1483
+ " <th>postEventInfo</th>\n",
1484
+ " <th>predictionTournament</th>\n",
1485
+ " <th>reactedFrom</th>\n",
1486
+ " <th>removedBy</th>\n",
1487
+ " <th>removedByCategory</th>\n",
1488
+ " <th>subreddit</th>\n",
1489
+ " <th>suggestedCommentSort</th>\n",
1490
+ " <th>topAwardedType</th>\n",
1491
+ " <th>url</th>\n",
1492
+ " <th>whitelistStatus</th>\n",
1493
+ " </tr>\n",
1494
+ " </thead>\n",
1495
+ " <tbody>\n",
1496
+ " <tr>\n",
1497
+ " <th>0</th>\n",
1498
+ " <td>t3_12epaq0</td>\n",
1499
+ " <td>8</td>\n",
1500
+ " <td>2023-04-07 15:39:34</td>\n",
1501
+ " <td>0</td>\n",
1502
+ " <td>None</td>\n",
1503
+ " <td>False</td>\n",
1504
+ " <td>False</td>\n",
1505
+ " <td>{'url': 'https://b.thumbs.redditmedia.com/W8hd...</td>\n",
1506
+ " <td>Y’all making me feel like spooderman</td>\n",
1507
+ " <td>ghostwholags</td>\n",
1508
+ " <td>...</td>\n",
1509
+ " <td>NaN</td>\n",
1510
+ " <td>NaN</td>\n",
1511
+ " <td>NaN</td>\n",
1512
+ " <td>NaN</td>\n",
1513
+ " <td>NaN</td>\n",
1514
+ " <td>NaN</td>\n",
1515
+ " <td>NaN</td>\n",
1516
+ " <td>NaN</td>\n",
1517
+ " <td>NaN</td>\n",
1518
+ " <td>NaN</td>\n",
1519
+ " </tr>\n",
1520
+ " <tr>\n",
1521
+ " <th>1</th>\n",
1522
+ " <td>t3_zr9v10</td>\n",
1523
+ " <td>0</td>\n",
1524
+ " <td>2022-12-21 04:09:42</td>\n",
1525
+ " <td>2</td>\n",
1526
+ " <td>None</td>\n",
1527
+ " <td>True</td>\n",
1528
+ " <td>False</td>\n",
1529
+ " <td>{'url': 'https://b.thumbs.redditmedia.com/dJqb...</td>\n",
1530
+ " <td>Do you track your investments in a spreadsheet...</td>\n",
1531
+ " <td>sharesight</td>\n",
1532
+ " <td>...</td>\n",
1533
+ " <td>NaN</td>\n",
1534
+ " <td>NaN</td>\n",
1535
+ " <td>NaN</td>\n",
1536
+ " <td>NaN</td>\n",
1537
+ " <td>NaN</td>\n",
1538
+ " <td>NaN</td>\n",
1539
+ " <td>NaN</td>\n",
1540
+ " <td>NaN</td>\n",
1541
+ " <td>NaN</td>\n",
1542
+ " <td>NaN</td>\n",
1543
+ " </tr>\n",
1544
+ " </tbody>\n",
1545
+ "</table>\n",
1546
+ "<p>2 rows × 100 columns</p>\n",
1547
+ "</div>"
1548
+ ],
1549
+ "text/plain": [
1550
+ " id numComments created score distinguishType isLocked \\\n",
1551
+ "0 t3_12epaq0 8 2023-04-07 15:39:34 0 None False \n",
1552
+ "1 t3_zr9v10 0 2022-12-21 04:09:42 2 None True \n",
1553
+ "\n",
1554
+ " isStickied thumbnail \\\n",
1555
+ "0 False {'url': 'https://b.thumbs.redditmedia.com/W8hd... \n",
1556
+ "1 False {'url': 'https://b.thumbs.redditmedia.com/dJqb... \n",
1557
+ "\n",
1558
+ " title author ... \\\n",
1559
+ "0 Y’all making me feel like spooderman ghostwholags ... \n",
1560
+ "1 Do you track your investments in a spreadsheet... sharesight ... \n",
1561
+ "\n",
1562
+ " postEventInfo predictionTournament reactedFrom removedBy removedByCategory \\\n",
1563
+ "0 NaN NaN NaN NaN NaN \n",
1564
+ "1 NaN NaN NaN NaN NaN \n",
1565
+ "\n",
1566
+ " subreddit suggestedCommentSort topAwardedType url whitelistStatus \n",
1567
+ "0 NaN NaN NaN NaN NaN \n",
1568
+ "1 NaN NaN NaN NaN NaN \n",
1569
+ "\n",
1570
+ "[2 rows x 100 columns]"
1571
+ ]
1572
+ },
1573
+ "execution_count": 20,
1574
+ "metadata": {},
1575
+ "output_type": "execute_result"
1576
+ }
1577
+ ],
1578
+ "source": [
1579
+ "import pandas as pd\n",
1580
+ "df[\"created\"] = pd.to_datetime(df[\"created\"], unit = \"ms\")\n",
1581
+ "df.head(2)"
1582
+ ]
1583
+ },
1584
+ {
1585
+ "cell_type": "code",
1586
+ "execution_count": 22,
1587
+ "metadata": {},
1588
+ "outputs": [
1589
+ {
1590
+ "data": {
1591
+ "text/html": [
1592
+ "<div>\n",
1593
+ "<style scoped>\n",
1594
+ " .dataframe tbody tr th:only-of-type {\n",
1595
+ " vertical-align: middle;\n",
1596
+ " }\n",
1597
+ "\n",
1598
+ " .dataframe tbody tr th {\n",
1599
+ " vertical-align: top;\n",
1600
+ " }\n",
1601
+ "\n",
1602
+ " .dataframe thead th {\n",
1603
+ " text-align: right;\n",
1604
+ " }\n",
1605
+ "</style>\n",
1606
+ "<table border=\"1\" class=\"dataframe\">\n",
1607
+ " <thead>\n",
1608
+ " <tr style=\"text-align: right;\">\n",
1609
+ " <th></th>\n",
1610
+ " <th>created</th>\n",
1611
+ " <th>title</th>\n",
1612
+ " </tr>\n",
1613
+ " </thead>\n",
1614
+ " <tbody>\n",
1615
+ " <tr>\n",
1616
+ " <th>0</th>\n",
1617
+ " <td>2023-04-07 15:39:34</td>\n",
1618
+ " <td>Y’all making me feel like spooderman</td>\n",
1619
+ " </tr>\n",
1620
+ " <tr>\n",
1621
+ " <th>1</th>\n",
1622
+ " <td>2022-12-21 04:09:42</td>\n",
1623
+ " <td>Do you track your investments in a spreadsheet...</td>\n",
1624
+ " </tr>\n",
1625
+ " <tr>\n",
1626
+ " <th>2</th>\n",
1627
+ " <td>2022-12-21 04:09:42</td>\n",
1628
+ " <td>Do you track your investments in a spreadsheet...</td>\n",
1629
+ " </tr>\n",
1630
+ " <tr>\n",
1631
+ " <th>3</th>\n",
1632
+ " <td>2023-04-07 15:29:23</td>\n",
1633
+ " <td>Can a Blackberry holder get some help 🥺</td>\n",
1634
+ " </tr>\n",
1635
+ " <tr>\n",
1636
+ " <th>4</th>\n",
1637
+ " <td>2023-04-07 14:49:55</td>\n",
1638
+ " <td>The week of CPI and FOMC Minutes… 4-6-23 SPY/ ...</td>\n",
1639
+ " </tr>\n",
1640
+ " <tr>\n",
1641
+ " <th>5</th>\n",
1642
+ " <td>2023-04-07 14:19:22</td>\n",
1643
+ " <td>Well let’s hope your job likes you, thanks Jerome</td>\n",
1644
+ " </tr>\n",
1645
+ " <tr>\n",
1646
+ " <th>6</th>\n",
1647
+ " <td>2023-04-07 14:06:32</td>\n",
1648
+ " <td>Does anyone else feel an overwhelming sense of...</td>\n",
1649
+ " </tr>\n",
1650
+ " <tr>\n",
1651
+ " <th>7</th>\n",
1652
+ " <td>2023-04-07 13:47:59</td>\n",
1653
+ " <td>Watermarked Jesus explains the market being cl...</td>\n",
1654
+ " </tr>\n",
1655
+ " <tr>\n",
1656
+ " <th>8</th>\n",
1657
+ " <td>2023-04-07 13:26:23</td>\n",
1658
+ " <td>Jobs report shows 236,000 gain in March. Hot l...</td>\n",
1659
+ " </tr>\n",
1660
+ " <tr>\n",
1661
+ " <th>9</th>\n",
1662
+ " <td>2023-04-07 13:07:15</td>\n",
1663
+ " <td>The recession is over! Let's buy more stocks!</td>\n",
1664
+ " </tr>\n",
1665
+ " </tbody>\n",
1666
+ "</table>\n",
1667
+ "</div>"
1668
+ ],
1669
+ "text/plain": [
1670
+ " created title\n",
1671
+ "0 2023-04-07 15:39:34 Y’all making me feel like spooderman\n",
1672
+ "1 2022-12-21 04:09:42 Do you track your investments in a spreadsheet...\n",
1673
+ "2 2022-12-21 04:09:42 Do you track your investments in a spreadsheet...\n",
1674
+ "3 2023-04-07 15:29:23 Can a Blackberry holder get some help 🥺\n",
1675
+ "4 2023-04-07 14:49:55 The week of CPI and FOMC Minutes… 4-6-23 SPY/ ...\n",
1676
+ "5 2023-04-07 14:19:22 Well let’s hope your job likes you, thanks Jerome\n",
1677
+ "6 2023-04-07 14:06:32 Does anyone else feel an overwhelming sense of...\n",
1678
+ "7 2023-04-07 13:47:59 Watermarked Jesus explains the market being cl...\n",
1679
+ "8 2023-04-07 13:26:23 Jobs report shows 236,000 gain in March. Hot l...\n",
1680
+ "9 2023-04-07 13:07:15 The recession is over! Let's buy more stocks!"
1681
+ ]
1682
+ },
1683
+ "execution_count": 22,
1684
+ "metadata": {},
1685
+ "output_type": "execute_result"
1686
+ }
1687
+ ],
1688
+ "source": [
1689
+ "selected_columns = [\"created\", \"title\"]\n",
1690
+ "df[selected_columns].head(10)"
1691
+ ]
1692
+ },
1693
+ {
1694
+ "attachments": {},
1695
+ "cell_type": "markdown",
1696
+ "metadata": {},
1697
+ "source": [
1698
+ "### Weibo Date Range"
1699
+ ]
1700
+ },
1701
+ {
1702
+ "cell_type": "code",
1703
+ "execution_count": 23,
1704
+ "metadata": {},
1705
+ "outputs": [],
1706
+ "source": [
1707
+ "from finnlp.data_sources.social_media.weibo_date_range import Weibo_Date_Range"
1708
+ ]
1709
+ },
1710
+ {
1711
+ "cell_type": "code",
1712
+ "execution_count": 24,
1713
+ "metadata": {},
1714
+ "outputs": [],
1715
+ "source": [
1716
+ "start_date = \"2016-01-01\"\n",
1717
+ "end_date = \"2016-01-02\"\n",
1718
+ "stock = \"茅台\"\n",
1719
+ "config = {\n",
1720
+ " \"use_proxy\": \"china_free\",\n",
1721
+ " \"max_retry\": 5,\n",
1722
+ " \"proxy_pages\": 5,\n",
1723
+ " \"cookies\": \"Your_Login_Cookies\",\n",
1724
+ "}\n"
1725
+ ]
1726
+ },
1727
+ {
1728
+ "cell_type": "code",
1729
+ "execution_count": 25,
1730
+ "metadata": {},
1731
+ "outputs": [
1732
+ {
1733
+ "name": "stderr",
1734
+ "output_type": "stream",
1735
+ "text": [
1736
+ "Gathering free ips by pages...: 100%|██████████| 5/5 [00:09<00:00, 1.95s/it]\n",
1737
+ "Checking ips: 100%|██████████| 75/75 [01:23<00:00, 1.11s/it]\n"
1738
+ ]
1739
+ },
1740
+ {
1741
+ "name": "stdout",
1742
+ "output_type": "stream",
1743
+ "text": [
1744
+ "获取到的代理ip数量: 75 。Get proxy ips: 75.\n",
1745
+ "能用的代理数量: 13。Usable proxy ips: 13.\n"
1746
+ ]
1747
+ },
1748
+ {
1749
+ "name": "stderr",
1750
+ "output_type": "stream",
1751
+ "text": [
1752
+ "Downloading by dates...: 100%|██████████| 2/2 [01:03<00:00, 31.56s/it]\n"
1753
+ ]
1754
+ }
1755
+ ],
1756
+ "source": [
1757
+ "downloader = Weibo_Date_Range(config)\n",
1758
+ "downloader.download_date_range_stock(start_date, end_date, stock = stock)"
1759
+ ]
1760
+ },
1761
+ {
1762
+ "cell_type": "code",
1763
+ "execution_count": 31,
1764
+ "metadata": {},
1765
+ "outputs": [
1766
+ {
1767
+ "data": {
1768
+ "text/html": [
1769
+ "<div>\n",
1770
+ "<style scoped>\n",
1771
+ " .dataframe tbody tr th:only-of-type {\n",
1772
+ " vertical-align: middle;\n",
1773
+ " }\n",
1774
+ "\n",
1775
+ " .dataframe tbody tr th {\n",
1776
+ " vertical-align: top;\n",
1777
+ " }\n",
1778
+ "\n",
1779
+ " .dataframe thead th {\n",
1780
+ " text-align: right;\n",
1781
+ " }\n",
1782
+ "</style>\n",
1783
+ "<table border=\"1\" class=\"dataframe\">\n",
1784
+ " <thead>\n",
1785
+ " <tr style=\"text-align: right;\">\n",
1786
+ " <th></th>\n",
1787
+ " <th>date</th>\n",
1788
+ " <th>date_content</th>\n",
1789
+ " <th>source</th>\n",
1790
+ " <th>content</th>\n",
1791
+ " </tr>\n",
1792
+ " </thead>\n",
1793
+ " <tbody>\n",
1794
+ " <tr>\n",
1795
+ " <th>0</th>\n",
1796
+ " <td>2016-01-01</td>\n",
1797
+ " <td>2016年01月01日23:41</td>\n",
1798
+ " <td>Moto X</td>\n",
1799
+ " <td>#舆论之锤#唯品会发声明证实销售假茅台-手机腾讯网O网页链接分享来自浏览器!</td>\n",
1800
+ " </tr>\n",
1801
+ " <tr>\n",
1802
+ " <th>2</th>\n",
1803
+ " <td>2016-01-01</td>\n",
1804
+ " <td>2016年01月01日22:57</td>\n",
1805
+ " <td>新浪博客</td>\n",
1806
+ " <td>2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原...</td>\n",
1807
+ " </tr>\n",
1808
+ " <tr>\n",
1809
+ " <th>6</th>\n",
1810
+ " <td>2016-01-01</td>\n",
1811
+ " <td>2016年01月01日22:56</td>\n",
1812
+ " <td>新浪博客</td>\n",
1813
+ " <td>2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原...</td>\n",
1814
+ " </tr>\n",
1815
+ " <tr>\n",
1816
+ " <th>17</th>\n",
1817
+ " <td>2016-01-01</td>\n",
1818
+ " <td>2016年01月01日22:40</td>\n",
1819
+ " <td>五蕴皆崆Android</td>\n",
1820
+ " <td>开心,今天喝了两斤酒(茅台+扎二)三个人,开心!</td>\n",
1821
+ " </tr>\n",
1822
+ " <tr>\n",
1823
+ " <th>18</th>\n",
1824
+ " <td>2016-01-01</td>\n",
1825
+ " <td>NaN</td>\n",
1826
+ " <td>NaN</td>\n",
1827
+ " <td>一家专卖假货的网站某宝,你该学学了!//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品...</td>\n",
1828
+ " </tr>\n",
1829
+ " <tr>\n",
1830
+ " <th>19</th>\n",
1831
+ " <td>2016-01-01</td>\n",
1832
+ " <td>NaN</td>\n",
1833
+ " <td>NaN</td>\n",
1834
+ " <td>一家专卖假货的网站//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品会售假茅台:供货商...</td>\n",
1835
+ " </tr>\n",
1836
+ " <tr>\n",
1837
+ " <th>20</th>\n",
1838
+ " <td>2016-01-01</td>\n",
1839
+ " <td>2016年01月01日21:46</td>\n",
1840
+ " <td>360安全浏览器</td>\n",
1841
+ " <td>前几天说了几点不看好茅台的理由,今年过节喝点茅台支持下,个人口感,茅台比小五好喝,茅台依然是...</td>\n",
1842
+ " </tr>\n",
1843
+ " <tr>\n",
1844
+ " <th>21</th>\n",
1845
+ " <td>2016-01-01</td>\n",
1846
+ " <td>2016年01月01日21:44</td>\n",
1847
+ " <td>华为P8</td>\n",
1848
+ " <td>老杜酱酒已到货,从明天起正式在甘肃武威开卖。可以不相信我说的话,但一定不要怀疑@杜子建的为人...</td>\n",
1849
+ " </tr>\n",
1850
+ " <tr>\n",
1851
+ " <th>22</th>\n",
1852
+ " <td>2016-01-01</td>\n",
1853
+ " <td>2016年01月01日21:24</td>\n",
1854
+ " <td>华为Ascend P7</td>\n",
1855
+ " <td>【唯品会售假茅台后续:供货商被刑拘顾客获十倍补偿】此前,有网友投诉其在唯品会购买的茅台酒质量...</td>\n",
1856
+ " </tr>\n",
1857
+ " <tr>\n",
1858
+ " <th>23</th>\n",
1859
+ " <td>2016-01-01</td>\n",
1860
+ " <td>2016年01月01日21:16</td>\n",
1861
+ " <td>实得惠省钱网</td>\n",
1862
+ " <td>唯品会卖假茅台,供货商被刑拘,买家获十倍补偿8888元|此前,有网友在网络论坛发贴(唯品会宣...</td>\n",
1863
+ " </tr>\n",
1864
+ " </tbody>\n",
1865
+ "</table>\n",
1866
+ "</div>"
1867
+ ],
1868
+ "text/plain": [
1869
+ " date date_content source \\\n",
1870
+ "0 2016-01-01 2016年01月01日23:41 Moto X \n",
1871
+ "2 2016-01-01 2016年01月01日22:57 新浪博客 \n",
1872
+ "6 2016-01-01 2016年01月01日22:56 新浪博客 \n",
1873
+ "17 2016-01-01 2016年01月01日22:40 五蕴皆崆Android \n",
1874
+ "18 2016-01-01 NaN NaN \n",
1875
+ "19 2016-01-01 NaN NaN \n",
1876
+ "20 2016-01-01 2016年01月01日21:46 360安全浏览器 \n",
1877
+ "21 2016-01-01 2016年01月01日21:44 华为P8 \n",
1878
+ "22 2016-01-01 2016年01月01日21:24 华为Ascend P7 \n",
1879
+ "23 2016-01-01 2016年01月01日21:16 实得惠省钱网 \n",
1880
+ "\n",
1881
+ " content \n",
1882
+ "0 #舆论之锤#唯品会发声明证实销售假茅台-手机腾讯网O网页链接分享来自浏览器! \n",
1883
+ "2 2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原... \n",
1884
+ "6 2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原... \n",
1885
+ "17 开心,今天喝了两斤酒(茅台+扎二)三个人,开心! \n",
1886
+ "18 一家专卖假货的网站某宝,你该学学了!//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品... \n",
1887
+ "19 一家专卖假货的网站//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品会售假茅台:供货商... \n",
1888
+ "20 前几天说了几点不看好茅台的理由,今年过节喝点茅台支持下,个人口感,茅台比小五好喝,茅台依然是... \n",
1889
+ "21 老杜酱酒已到货,从明天起正式在甘肃武威开卖。可以不相信我说的话,但一定不要怀疑@杜子建的为人... \n",
1890
+ "22 【唯品会售假茅台后续:供货商被刑拘顾客获十倍补偿】此前,有网友投诉其在唯品会购买的茅台酒质量... \n",
1891
+ "23 唯品会卖假茅台,供货商被刑拘,买家获十倍补偿8888元|此前,有网友在网络论坛发贴(唯品会宣... "
1892
+ ]
1893
+ },
1894
+ "execution_count": 31,
1895
+ "metadata": {},
1896
+ "output_type": "execute_result"
1897
+ }
1898
+ ],
1899
+ "source": [
1900
+ "df = downloader.dataframe\n",
1901
+ "df = df.drop_duplicates()\n",
1902
+ "df.head(10)"
1903
+ ]
1904
+ },
1905
+ {
1906
+ "cell_type": "code",
1907
+ "execution_count": 32,
1908
+ "metadata": {},
1909
+ "outputs": [
1910
+ {
1911
+ "data": {
1912
+ "text/plain": [
1913
+ "(60, 4)"
1914
+ ]
1915
+ },
1916
+ "execution_count": 32,
1917
+ "metadata": {},
1918
+ "output_type": "execute_result"
1919
+ }
1920
+ ],
1921
+ "source": [
1922
+ "df.shape"
1923
+ ]
1924
+ },
1925
+ {
1926
+ "attachments": {},
1927
+ "cell_type": "markdown",
1928
+ "metadata": {},
1929
+ "source": [
1930
+ "### Weibo Streaming"
1931
+ ]
1932
+ },
1933
+ {
1934
+ "cell_type": "code",
1935
+ "execution_count": 4,
1936
+ "metadata": {},
1937
+ "outputs": [],
1938
+ "source": [
1939
+ "from finnlp.data_sources.social_media.weibo_streaming import Weibo_Streaming"
1940
+ ]
1941
+ },
1942
+ {
1943
+ "cell_type": "code",
1944
+ "execution_count": 5,
1945
+ "metadata": {},
1946
+ "outputs": [],
1947
+ "source": [
1948
+ "rounds = 3\n",
1949
+ "stock = \"茅台\"\n",
1950
+ "config = {\n",
1951
+ " \"use_proxy\": \"china_free\",\n",
1952
+ " \"max_retry\": 5,\n",
1953
+ " \"proxy_pages\": 5,\n",
1954
+ " \"cookies\": \"Your_Login_Cookies\",\n",
1955
+ "}\n"
1956
+ ]
1957
+ },
1958
+ {
1959
+ "cell_type": "code",
1960
+ "execution_count": 6,
1961
+ "metadata": {},
1962
+ "outputs": [
1963
+ {
1964
+ "name": "stderr",
1965
+ "output_type": "stream",
1966
+ "text": [
1967
+ "Gathering free ips by pages...: 100%|██████████| 5/5 [00:09<00:00, 1.98s/it]\n",
1968
+ "Checking ips: 100%|██████████| 75/75 [01:26<00:00, 1.15s/it]\n"
1969
+ ]
1970
+ },
1971
+ {
1972
+ "name": "stdout",
1973
+ "output_type": "stream",
1974
+ "text": [
1975
+ "获取到的代理ip数量: 75 。Get proxy ips: 75.\n",
1976
+ "能用的代理数量: 19。Usable proxy ips: 19.\n"
1977
+ ]
1978
+ },
1979
+ {
1980
+ "name": "stderr",
1981
+ "output_type": "stream",
1982
+ "text": [
1983
+ "Processing the text content and downloading the full passage...: 100%|██████████| 9/9 [00:00<00:00, 64.89it/s]\n",
1984
+ "Processing the text content and downloading the full passage...: 100%|██████████| 10/10 [00:09<00:00, 1.07it/s]\n",
1985
+ "Processing the text content and downloading the full passage...: 100%|██████████| 10/10 [00:02<00:00, 4.93it/s]\n",
1986
+ "Downloading by page..: 100%|██████████| 3/3 [00:19<00:00, 6.46s/it]\n"
1987
+ ]
1988
+ }
1989
+ ],
1990
+ "source": [
1991
+ "downloader = Weibo_Streaming(config)\n",
1992
+ "downloader.download_streaming_stock(stock = stock, rounds = rounds)"
1993
+ ]
1994
+ },
1995
+ {
1996
+ "cell_type": "code",
1997
+ "execution_count": 10,
1998
+ "metadata": {},
1999
+ "outputs": [
2000
+ {
2001
+ "data": {
2002
+ "text/html": [
2003
+ "<div>\n",
2004
+ "<style scoped>\n",
2005
+ " .dataframe tbody tr th:only-of-type {\n",
2006
+ " vertical-align: middle;\n",
2007
+ " }\n",
2008
+ "\n",
2009
+ " .dataframe tbody tr th {\n",
2010
+ " vertical-align: top;\n",
2011
+ " }\n",
2012
+ "\n",
2013
+ " .dataframe thead th {\n",
2014
+ " text-align: right;\n",
2015
+ " }\n",
2016
+ "</style>\n",
2017
+ "<table border=\"1\" class=\"dataframe\">\n",
2018
+ " <thead>\n",
2019
+ " <tr style=\"text-align: right;\">\n",
2020
+ " <th></th>\n",
2021
+ " <th>card_type</th>\n",
2022
+ " <th>display_followbtn</th>\n",
2023
+ " <th>mblog</th>\n",
2024
+ " <th>itemid</th>\n",
2025
+ " <th>actionlog</th>\n",
2026
+ " <th>cate_id</th>\n",
2027
+ " <th>display_arrow</th>\n",
2028
+ " <th>show_type</th>\n",
2029
+ " <th>scheme</th>\n",
2030
+ " <th>container_color</th>\n",
2031
+ " <th>container_color_dark</th>\n",
2032
+ " <th>content_short</th>\n",
2033
+ " <th>content</th>\n",
2034
+ " </tr>\n",
2035
+ " </thead>\n",
2036
+ " <tbody>\n",
2037
+ " <tr>\n",
2038
+ " <th>0</th>\n",
2039
+ " <td>9</td>\n",
2040
+ " <td>False</td>\n",
2041
+ " <td>{'attitudes_count': 0, 'can_edit': False, 'com...</td>\n",
2042
+ " <td>seqid:187118896|type:61|t:|pos:1-0-0|q:茅台|srid...</td>\n",
2043
+ " <td>{'act_code': 554, 'ext': 'seqid:187118896|type...</td>\n",
2044
+ " <td>31</td>\n",
2045
+ " <td>0</td>\n",
2046
+ " <td>1</td>\n",
2047
+ " <td>https://m.weibo.cn/status/MAWMprpPp?mblogid=MA...</td>\n",
2048
+ " <td>#EEEEEE</td>\n",
2049
+ " <td>#151515</td>\n",
2050
+ " <td>事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市...</td>\n",
2051
+ " <td>事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市...</td>\n",
2052
+ " </tr>\n",
2053
+ " <tr>\n",
2054
+ " <th>1</th>\n",
2055
+ " <td>9</td>\n",
2056
+ " <td>False</td>\n",
2057
+ " <td>{'attitudes_count': 0, 'can_edit': False, 'com...</td>\n",
2058
+ " <td>seqid:187118896|type:61|t:|pos:1-0-1|q:茅台|srid...</td>\n",
2059
+ " <td>{'act_code': 554, 'ext': 'seqid:187118896|type...</td>\n",
2060
+ " <td>31</td>\n",
2061
+ " <td>0</td>\n",
2062
+ " <td>1</td>\n",
2063
+ " <td>https://m.weibo.cn/status/MAWHVDm0H?mblogid=MA...</td>\n",
2064
+ " <td>#EEEEEE</td>\n",
2065
+ " <td>#151515</td>\n",
2066
+ " <td>茅台茅台成都收4瓶飞天,自提</td>\n",
2067
+ " <td>茅台茅台成都收4瓶飞天,自提</td>\n",
2068
+ " </tr>\n",
2069
+ " </tbody>\n",
2070
+ "</table>\n",
2071
+ "</div>"
2072
+ ],
2073
+ "text/plain": [
2074
+ " card_type display_followbtn \\\n",
2075
+ "0 9 False \n",
2076
+ "1 9 False \n",
2077
+ "\n",
2078
+ " mblog \\\n",
2079
+ "0 {'attitudes_count': 0, 'can_edit': False, 'com... \n",
2080
+ "1 {'attitudes_count': 0, 'can_edit': False, 'com... \n",
2081
+ "\n",
2082
+ " itemid \\\n",
2083
+ "0 seqid:187118896|type:61|t:|pos:1-0-0|q:茅台|srid... \n",
2084
+ "1 seqid:187118896|type:61|t:|pos:1-0-1|q:茅台|srid... \n",
2085
+ "\n",
2086
+ " actionlog cate_id display_arrow \\\n",
2087
+ "0 {'act_code': 554, 'ext': 'seqid:187118896|type... 31 0 \n",
2088
+ "1 {'act_code': 554, 'ext': 'seqid:187118896|type... 31 0 \n",
2089
+ "\n",
2090
+ " show_type scheme \\\n",
2091
+ "0 1 https://m.weibo.cn/status/MAWMprpPp?mblogid=MA... \n",
2092
+ "1 1 https://m.weibo.cn/status/MAWHVDm0H?mblogid=MA... \n",
2093
+ "\n",
2094
+ " container_color container_color_dark \\\n",
2095
+ "0 #EEEEEE #151515 \n",
2096
+ "1 #EEEEEE #151515 \n",
2097
+ "\n",
2098
+ " content_short \\\n",
2099
+ "0 事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市... \n",
2100
+ "1 茅台茅台成都收4瓶飞天,自提 \n",
2101
+ "\n",
2102
+ " content \n",
2103
+ "0 事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市... \n",
2104
+ "1 茅台茅台成都收4瓶飞天,自提 "
2105
+ ]
2106
+ },
2107
+ "execution_count": 10,
2108
+ "metadata": {},
2109
+ "output_type": "execute_result"
2110
+ }
2111
+ ],
2112
+ "source": [
2113
+ "df = downloader.dataframe\n",
2114
+ "df.head(2)"
2115
+ ]
2116
+ },
2117
+ {
2118
+ "cell_type": "code",
2119
+ "execution_count": 11,
2120
+ "metadata": {},
2121
+ "outputs": [
2122
+ {
2123
+ "data": {
2124
+ "text/html": [
2125
+ "<div>\n",
2126
+ "<style scoped>\n",
2127
+ " .dataframe tbody tr th:only-of-type {\n",
2128
+ " vertical-align: middle;\n",
2129
+ " }\n",
2130
+ "\n",
2131
+ " .dataframe tbody tr th {\n",
2132
+ " vertical-align: top;\n",
2133
+ " }\n",
2134
+ "\n",
2135
+ " .dataframe thead th {\n",
2136
+ " text-align: right;\n",
2137
+ " }\n",
2138
+ "</style>\n",
2139
+ "<table border=\"1\" class=\"dataframe\">\n",
2140
+ " <thead>\n",
2141
+ " <tr style=\"text-align: right;\">\n",
2142
+ " <th></th>\n",
2143
+ " <th>content_short</th>\n",
2144
+ " <th>content</th>\n",
2145
+ " </tr>\n",
2146
+ " </thead>\n",
2147
+ " <tbody>\n",
2148
+ " <tr>\n",
2149
+ " <th>0</th>\n",
2150
+ " <td>事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市...</td>\n",
2151
+ " <td>事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市...</td>\n",
2152
+ " </tr>\n",
2153
+ " <tr>\n",
2154
+ " <th>1</th>\n",
2155
+ " <td>茅台茅台成都收4瓶飞天,自提</td>\n",
2156
+ " <td>茅台茅台成都收4瓶飞天,自提</td>\n",
2157
+ " </tr>\n",
2158
+ " <tr>\n",
2159
+ " <th>2</th>\n",
2160
+ " <td>我可太喜欢茅台这个防伪了</td>\n",
2161
+ " <td>我可太喜欢茅台这个防伪了</td>\n",
2162
+ " </tr>\n",
2163
+ " <tr>\n",
2164
+ " <th>3</th>\n",
2165
+ " <td>没想到 4S店的二楼 是卖茅台的吧</td>\n",
2166
+ " <td>没想到 4S店的二楼 是卖茅台的吧</td>\n",
2167
+ " </tr>\n",
2168
+ " <tr>\n",
2169
+ " <th>4</th>\n",
2170
+ " <td>买不起茅台,砸锅卖铁也得买得起茅台冰淇淋 许昌·胖东来时代广场</td>\n",
2171
+ " <td>买不起茅台,砸锅卖铁也得买得起茅台冰淇淋 许昌·胖东来时代广场</td>\n",
2172
+ " </tr>\n",
2173
+ " <tr>\n",
2174
+ " <th>5</th>\n",
2175
+ " <td>xxx给我枇杷xxx给我蜂蜜 xxx偷茅台喝(假的)。我很喜欢自己家的产品,感觉很无害纯天然...</td>\n",
2176
+ " <td>xxx给我枇杷xxx给我蜂蜜 xxx偷茅台喝(假的)。我很喜欢自己家的产品,感觉很无害纯天然...</td>\n",
2177
+ " </tr>\n",
2178
+ " <tr>\n",
2179
+ " <th>6</th>\n",
2180
+ " <td>茅台 奎屯出一只兔茅</td>\n",
2181
+ " <td>茅台 奎屯出一只兔茅</td>\n",
2182
+ " </tr>\n",
2183
+ " <tr>\n",
2184
+ " <th>7</th>\n",
2185
+ " <td>2022胡润酒类品牌榜发布 2022胡润酒类品牌榜发布点评:与我印象中的有点出入。不出茅台和...</td>\n",
2186
+ " <td>2022胡润酒类品牌榜发布 2022胡润酒类品牌榜发布点评:与我印象中的有点出入。不出茅台和...</td>\n",
2187
+ " </tr>\n",
2188
+ " <tr>\n",
2189
+ " <th>8</th>\n",
2190
+ " <td>41岁,很美妙!“爸爸生日快乐,吃个蛋糕🍰”小奶音听着上头。爱人,亲戚,朋友,草莓🍓,茅台+...</td>\n",
2191
+ " <td>41岁,很美妙!“爸爸生日快乐,吃个蛋糕🍰”小奶音听着上头。爱人,亲戚,朋友,草莓🍓,茅台+...</td>\n",
2192
+ " </tr>\n",
2193
+ " <tr>\n",
2194
+ " <th>0</th>\n",
2195
+ " <td>吃到了茅台冰激淋也</td>\n",
2196
+ " <td>吃到了茅台冰激淋也</td>\n",
2197
+ " </tr>\n",
2198
+ " </tbody>\n",
2199
+ "</table>\n",
2200
+ "</div>"
2201
+ ],
2202
+ "text/plain": [
2203
+ " content_short \\\n",
2204
+ "0 事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市... \n",
2205
+ "1 茅台茅台成都收4瓶飞天,自提 \n",
2206
+ "2 我可太喜欢茅台这个防伪了 \n",
2207
+ "3 没想到 4S店的二楼 是卖茅台的吧 \n",
2208
+ "4 买不起茅台,砸锅卖铁也得买得起茅台冰淇淋 许昌·胖东来时代广场 \n",
2209
+ "5 xxx给我枇杷xxx给我蜂蜜 xxx偷茅台喝(假的)。我很喜欢自己家的产品,感觉很无害纯天然... \n",
2210
+ "6 茅台 奎屯出一只兔茅 \n",
2211
+ "7 2022胡润酒类品牌榜发布 2022胡润酒类品牌榜发布点评:与我印象中的有点出入。不出茅台和... \n",
2212
+ "8 41岁,很美妙!“爸爸生日快乐,吃个蛋糕🍰”小奶音听着上头。爱人,亲戚,朋友,草莓🍓,茅台+... \n",
2213
+ "0 吃到了茅台冰激淋也 \n",
2214
+ "\n",
2215
+ " content \n",
2216
+ "0 事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市... \n",
2217
+ "1 茅台茅台成都收4瓶飞天,自提 \n",
2218
+ "2 我可太喜欢茅台这个防伪了 \n",
2219
+ "3 没想到 4S店的二楼 是卖茅台的吧 \n",
2220
+ "4 买不起茅台,砸锅卖铁也得买得起茅台冰淇淋 许昌·胖东来时代广场 \n",
2221
+ "5 xxx给我枇杷xxx给我蜂蜜 xxx偷茅台喝(假的)。我很喜欢自己家的产品,感觉很无害纯天然... \n",
2222
+ "6 茅台 奎屯出一只兔茅 \n",
2223
+ "7 2022胡润酒类品牌榜发布 2022胡润酒类品牌榜发布点评:与我印象中的有点出入。不出茅台和... \n",
2224
+ "8 41岁,很美妙!“爸爸生日快乐,吃个蛋糕🍰”小奶音听着上头。爱人,亲戚,朋友,草莓🍓,茅台+... \n",
2225
+ "0 吃到了茅台冰激淋也 "
2226
+ ]
2227
+ },
2228
+ "execution_count": 11,
2229
+ "metadata": {},
2230
+ "output_type": "execute_result"
2231
+ }
2232
+ ],
2233
+ "source": [
2234
+ "selected_columns = [\"content_short\", \"content\"]\n",
2235
+ "df[selected_columns].head(10)"
2236
+ ]
2237
+ }
2238
+ ],
2239
+ "metadata": {
2240
+ "kernelspec": {
2241
+ "display_name": "finrl",
2242
+ "language": "python",
2243
+ "name": "python3"
2244
+ },
2245
+ "language_info": {
2246
+ "codemirror_mode": {
2247
+ "name": "ipython",
2248
+ "version": 3
2249
+ },
2250
+ "file_extension": ".py",
2251
+ "mimetype": "text/x-python",
2252
+ "name": "python",
2253
+ "nbconvert_exporter": "python",
2254
+ "pygments_lexer": "ipython3",
2255
+ "version": "3.7.12"
2256
+ },
2257
+ "orig_nbformat": 4
2258
+ },
2259
+ "nbformat": 4,
2260
+ "nbformat_minor": 2
2261
+ }
FinNLP/docs/FinNLP/docs/zh/index.md ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 互联网金融数据
2
+
3
+ 演示内容请参见[FinGPT](https://github.com/AI4Finance-Foundation/ChatGPT-for-FinTech)
4
+
5
+ **免责声明:我们根据MIT教育许可证的规定共享代码以供学术研究之用。此处不构成任何金融建议,亦非交易真实资金的推荐。在交易或投资之前请使用常识并首先咨询专业人士。**
6
+
7
+ ## Ⅰ. 架构
8
+
9
+ ![image-20230505200244043](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052002139.png)
10
+
11
+ * 整个项目由4个部分组成:
12
+
13
+ * 第一部分是**数据源**,在这里,我们从互联网上收集历史和流媒体数据。
14
+
15
+ * 接下来,我们将数据推送到**数据工程**部分,在这里我们会对数据进行清洗,标记化处理和提示工程。
16
+
17
+ * 然后,数据被推送到**大语言模型(LLMs)**。在这里,我们可以以不同的方式使用LLMs。我们不仅可以使用收集到的数据来训练我们自己的**轻量级微调模型**,还可以使用这些数据和**训练好的模型**或**LLM API**来支持我们的应用程序。
18
+
19
+ * 最后一部分将是**应用程序**部分,我们可以使用数据和LLMs来制作许多有趣的应用程序。
20
+
21
+ ## Ⅱ. 数据源
22
+
23
+ ![image-20230505200446477](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052004539.png)
24
+
25
+ * 由于空间限制,我们只展示了其中一部分。
26
+
27
+ ### 1. [新闻](jupyter/Data_Sources_News.ipynb)
28
+
29
+ | 平台 | 数据类型 | 相关市场 | 指定公司 | 时间范围 | 数据源类型 | 限制条件 | 文档数量(万) | 支持情况 |
30
+ | :----------------------------------------------------------: | :--------: | :------------: | :----------------------------------------------------------: | :---------------: | :--------: | :-------------------: | ------------------------------------------------------------ | ------------------------------------------------------------ |
31
+ | 雅虎 | 金融新闻 | 美国股票 | √ | 时间范围 | 官方 | N/A | 1,500+ | √ |
32
+ | 路透社 | 金融新闻 | 美国股票 | × | 时间范围 | 官方 | N/A | 1,500+ | √ |
33
+ | 新浪 | 金融新闻 | 中国股票 | × | 时间范围 | 官方 | N/A | 2,000+ | √ |
34
+ | 东方财富 | 金融新闻 | 中国股票 | √ | 时间范围 | 官方 | N/A | 1,000+ | √ |
35
+ | 第一财经 | 金融新闻 | 中国股票 | √ | 时间范围 | 官方 | N/A | 500+ | 即将 |
36
+ | 央视 | 政府新闻 | 中国股票 | × | 时间范围 | 第三方 | N/A | 4 | √ |
37
+ | 美国主流媒体 | 金融新闻 | 美国股票 | √ | 时间范围 | 第三方 | 账户 (免费) | 3,200+ | √ |
38
+ | 中国主流媒体 | 金融新闻 | 中国股票 | × | 时间范围 | 第三方 | ¥500/年 | 3000+ | √ |
39
+
40
+ * FinGPT可能比Bloomberg的文档数目更少,但我们在同一个数量级上。
41
+
42
+ ### 2. [社交媒体](jupyter/Data_Sources_Social_Media.iypnb)
43
+
44
+ | 平台 | 数据类型 | 相关市场 | 指定公司 | 范围类型 | 来源类型 | 限制 | 文档 (1e4) | 支持 |
45
+ | :---------------------: | :------: | :------: | :------: | :------: | :------: | :-----: | ---------- | :--: |
46
+ | Twitter | 推文 | 美国股票 | √ | 时间范围 | 官方 | N/A | 18,000+ | √ |
47
+ | StockTwits | 推文 | 美国股票 | √ | 最新 | 官方 | N/A | 160,000+ | √ |
48
+ | Reddit (wallstreetbets) | 帖子 | 美国股票 | × | 最新 | 官方 | N/A | 9+ | √ |
49
+ | 微博 | 推文 | 中国股票 | √ | 时间范围 | 官方 | Cookies | 1,400,000+ | √ |
50
+ | 微博 | 推文 | 中国股票 | √ | 最新 | 官方 | N/A | 1,400,000+ | √ |
51
+
52
+ * 在 **BloomberGPT** 中,他们**不收集社交媒体数据**,但我们认为**公众舆论是干扰股票市场的最重要因素之一**。
53
+
54
+ ### 3. [公司公告](jupyter/Data_Sources_Company_Announcement.ipynb)
55
+
56
+ | 平台 | 数据类型 | 相关市场 | 指定公司 | 范围类型 | 数据来源 | 限制 | 文档数 (1e4) | 支持情况 |
57
+ | :---------------: | :------: | :------: | :------: | :------: | :------: | :--: | ------------ | :------: |
58
+ | 巨潮网 (官方) | 文本 | 中国股票 | √ | 时间范围 | 官方 | N/A | 2,790+ | √ |
59
+ | 美国证监会 (官方) | 文本 | 美国股票 | √ | 时间范围 | 官方 | N/A | 1,440+ | √ |
60
+
61
+ * 由于我们从不同的股票市场收集数据,因此我们比Bloomberg GPT有更多的申报文档。
62
+
63
+ ### 4. 趋势
64
+
65
+ | 平台 | 数据类型 | 相关市场 | 数据源 | 指定公司 | 范围类型 | 源类型 | 限制 |
66
+ | :--------------------------------------------------: | :------: | :------: | :-----------------------------------------------------: | :------: | :------: | :----: | :--: |
67
+ | [谷歌趋势](https://trends.google.com/trends/explore) | 指数 | 美国股票 | [Google Trends](./finnlp/data_sources/trends/google.py) | √ | 日期范围 | 官方 | N/A |
68
+ | [百度指数](https://index.baidu.com/v2/index.html#/) | 指数 | 中国股票 | 即将推出 | - | - | - | - |
69
+
70
+
71
+ ### 5. 数据集
72
+ | 数据源 | 类型 | 股票 | 日期 | 可用性 |
73
+ | :----------------------------------------------------------: | :--: | :--: | :----------------------: | :----: |
74
+ | [AShare](https://github.com/JinanZou/Astock) | 新闻 | 3680 | 2018-07-01 到 2021-11-30 | √ |
75
+ | [stocknet-dataset](https://github.com/yumoxu/stocknet-dataset) | 推文 | 87 | 2014-01-02 到 2015-12-30 | √ |
76
+ | [CHRNN](https://github.com/wuhuizhe/CHRNN) | 推文 | 38 | 2017-01-03 到 2017-12-28 | √ |
77
+
78
+ ## Ⅲ. 模型
79
+
80
+ ![image-20230505200618504](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052006541.png)
81
+
82
+ * 在数据中心的自然语言处理领域,我们不需要从头开始训练模型。我们只需要调用API和进行轻量级的微调。
83
+ * 左边是一些可能会用到的LLM APIs,中间是我们可能用来进行微调的模型,右边是一些微调方法。
84
+
85
+ ### 1. 微调:Tensor Layers (LoRA)
86
+
87
+ ![image-20230505200944411](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052009480.png)
88
+
89
+ * 在FinGPT中,我们使用新的金融数据集对预训练的LLM进行微调。高质量的标记数据是许多成功的LLM(包括ChatGPT)的最重要的关键之一。
90
+ * 然而,这些高质量的标记数据通常非常昂贵和耗时,并且我们可能需要金融专家的帮助。
91
+ * 如果我们的目标是使用LLM分析与金融相关的文本数据并帮助量化交易,为什么不让市场为我们做标记呢?
92
+ * 因此,在这里,我们使用每个新闻相关的股票价格变化百分比作为输出标签,我们使用阈值将标签分成三组(积极的,消极的和中立的),并使用它们和新闻情感的标签。
93
+ * 相应地,在提示工程师部分,我们还要求模型选择其中一个正面的,负面的和中性的作为输出,以便我们充分利用预训练信息。
94
+ * 通过使用LoRA,我们可以将可训练参数减少从6.17B到3.67M。
95
+ * 如表格所示,与chatGLM相比,FinGPT可以在多个指标上实现大幅改善。然而,直接将我们的模型用于量化交易可能是不合适的。由于大多数新闻标题都是中性的,LLMs的大多数原始输出都是中性的,因此LLMs在积极和消极的标签上表现不佳,而这些标签可能对于量化交易是有用的。
96
+ * 然而,在微调之后,我们已经见证了在预测积极和消极标签方面的巨大改进。
97
+ * 这也是为什么该模型可以实现积极的交易结果的原因。
98
+
99
+ ### 2. 微调:强化学习在股价上的应用 (RLSP)
100
+
101
+ ![image-20230505201209946](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052012996.png)
102
+
103
+ * 同样地,我们可以使用股价上的强化学习(RLSP)来替换ChatGPT中使用的人类反馈上的强化学习。
104
+
105
+ ## Ⅳ. 应用
106
+
107
+ ### 1. 智能投顾
108
+
109
+ ![image-20230505201913233](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052019296.png)
110
+
111
+ * **ChatGPT可以像专业人士一样进行投资建议。**
112
+ * 在这个例子中,苹果的**股价上涨**与ChatGPT分析新闻的**预测相符**。
113
+
114
+ ### 2. 量化交易
115
+
116
+ ![image-20230505201841001](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052018035.png)
117
+
118
+ * 我们还可以使用新闻、社交媒体推文或者公司公告来**构建情感因子**,右侧的部分是由Twitter推文和ChatGPT信号产生的交易结果,数据来自于一个称为[stocknet-dataset](https://link.zhihu.com/?target=https%3A//github.com/yumoxu/stocknet-dataset)的数据集。
119
+ * 正如您从图片中所看到的,由ChatGPT生成的交易信号**非常出色**,我们甚至可以**仅通过根据Twitter情感因子交易而获得良好的结果**。
120
+ * 因此,我们可以通过**结合价格因素**来获得更好的结果。
121
+
122
+ ### 3. 低代码开发
123
+
124
+ ![image-20230505202028292](https://cdn.jsdelivr.net/gh/oliverwang15/imgbed@main/img/202305052020363.png)
125
+
126
+ * 我们可以使用LLMs的帮助来编写代码。
127
+ * 右侧显示了我们如何**快速高效地**开发我们的因子和其他代码。
FinNLP/docs/FinNLP/mkdocs.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ site_name: FinGPT & FinNLP
2
+ site_author: Oliver Wang, Xiao-yang Liu
3
+
4
+ nav:
5
+ - Hello World:
6
+ - About the project: 'index.md'
7
+
8
+ - FinGPT Models:
9
+ - FinGPT-v1: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT-v1'
10
+ - FinGPT-v2: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT-v2'
11
+ - FinGPT-v3: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT-v3'
12
+
13
+ - Robo Advisor:
14
+ - chatgpt-robo-advisor-v1: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-robo-advisor-v1'
15
+ - chatgpt-robo-advisor-v2: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-robo-advisor-v2'
16
+
17
+ - Quantitative Trading:
18
+ - chatgpt-trading-v1: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-trading-v1'
19
+ - chatgpt-trading-v2: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-trading-v2'
20
+
21
+ - Low code development:
22
+ - chatgpt-low-code-development-v1: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-low-code-development-v1'
23
+ - chatgpt-low-code-development-v2: 'https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-low-code-development-v2'
24
+
25
+ - Data Sources:
26
+ - News: jupyter/Data_Sources_News.ipynb
27
+ - Social Media: jupyter/Data_Sources_Social_Media.ipynb
28
+ - Company Announcement: jupyter/Data_Sources_Company_Announcement.ipynb
29
+
30
+ theme:
31
+ name: material
32
+
33
+ plugins:
34
+ - mkdocs-jupyter:
35
+ execute: false
36
+
37
+ extra:
38
+ alternate:
39
+ - name: English
40
+ link: /
41
+ lang: en
42
+ - name: 中文
43
+ link: /zh/
44
+ lang: zh
FinNLP/docs/FinNLP/site/404.html ADDED
@@ -0,0 +1,629 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!doctype html>
3
+ <html lang="en" class="no-js">
4
+ <head>
5
+
6
+ <meta charset="utf-8">
7
+ <meta name="viewport" content="width=device-width,initial-scale=1">
8
+
9
+
10
+ <meta name="author" content="Oliver Wang, Xiao-yang Liu">
11
+
12
+
13
+
14
+
15
+ <link rel="icon" href="/assets/images/favicon.png">
16
+ <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-9.1.6">
17
+
18
+
19
+
20
+ <title>FinGPT & FinNLP</title>
21
+
22
+
23
+
24
+ <link rel="stylesheet" href="/assets/stylesheets/main.ded33207.min.css">
25
+
26
+
27
+ <link rel="stylesheet" href="/assets/stylesheets/palette.a0c5b2b5.min.css">
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
38
+ <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
39
+ <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
40
+
41
+
42
+
43
+ <script>__md_scope=new URL("/",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
44
+
45
+
46
+
47
+
48
+
49
+
50
+ </head>
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+ <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="" data-md-color-accent="">
59
+
60
+
61
+
62
+ <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
63
+ <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
64
+ <label class="md-overlay" for="__drawer"></label>
65
+ <div data-md-component="skip">
66
+
67
+ </div>
68
+ <div data-md-component="announce">
69
+
70
+ </div>
71
+
72
+
73
+
74
+
75
+
76
+
77
+ <header class="md-header md-header--shadow" data-md-component="header">
78
+ <nav class="md-header__inner md-grid" aria-label="Header">
79
+ <a href="/." title="FinGPT &amp; FinNLP" class="md-header__button md-logo" aria-label="FinGPT & FinNLP" data-md-component="logo">
80
+
81
+
82
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
83
+
84
+ </a>
85
+ <label class="md-header__button md-icon" for="__drawer">
86
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
87
+ </label>
88
+ <div class="md-header__title" data-md-component="header-title">
89
+ <div class="md-header__ellipsis">
90
+ <div class="md-header__topic">
91
+ <span class="md-ellipsis">
92
+ FinGPT & FinNLP
93
+ </span>
94
+ </div>
95
+ <div class="md-header__topic" data-md-component="header-topic">
96
+ <span class="md-ellipsis">
97
+
98
+
99
+
100
+ </span>
101
+ </div>
102
+ </div>
103
+ </div>
104
+
105
+
106
+ <div class="md-header__option">
107
+ <div class="md-select">
108
+
109
+ <button class="md-header__button md-icon" aria-label="Select language">
110
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.52 17.52 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11.76-2.04M18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2l-4.5-12m-2.62 7 1.62-4.33L19.12 17h-3.24Z"/></svg>
111
+ </button>
112
+ <div class="md-select__inner">
113
+ <ul class="md-select__list">
114
+
115
+ <li class="md-select__item">
116
+ <a href="/" hreflang="en" class="md-select__link">
117
+ English
118
+ </a>
119
+ </li>
120
+
121
+ <li class="md-select__item">
122
+ <a href="/zh/" hreflang="zh" class="md-select__link">
123
+ 中文
124
+ </a>
125
+ </li>
126
+
127
+ </ul>
128
+ </div>
129
+ </div>
130
+ </div>
131
+
132
+
133
+
134
+ </nav>
135
+
136
+ </header>
137
+
138
+ <div class="md-container" data-md-component="container">
139
+
140
+
141
+
142
+
143
+
144
+
145
+ <main class="md-main" data-md-component="main">
146
+ <div class="md-main__inner md-grid">
147
+
148
+
149
+
150
+ <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
151
+ <div class="md-sidebar__scrollwrap">
152
+ <div class="md-sidebar__inner">
153
+
154
+
155
+
156
+ <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
157
+ <label class="md-nav__title" for="__drawer">
158
+ <a href="/." title="FinGPT &amp; FinNLP" class="md-nav__button md-logo" aria-label="FinGPT & FinNLP" data-md-component="logo">
159
+
160
+
161
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
162
+
163
+ </a>
164
+ FinGPT & FinNLP
165
+ </label>
166
+
167
+ <ul class="md-nav__list" data-md-scrollfix>
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+ <li class="md-nav__item md-nav__item--nested">
178
+
179
+
180
+
181
+
182
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" >
183
+
184
+
185
+
186
+ <label class="md-nav__link" for="__nav_1" id="__nav_1_label" tabindex="0">
187
+ Hello World
188
+ <span class="md-nav__icon md-icon"></span>
189
+ </label>
190
+
191
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
192
+ <label class="md-nav__title" for="__nav_1">
193
+ <span class="md-nav__icon md-icon"></span>
194
+ Hello World
195
+ </label>
196
+ <ul class="md-nav__list" data-md-scrollfix>
197
+
198
+
199
+
200
+
201
+
202
+
203
+ <li class="md-nav__item">
204
+ <a href="/." class="md-nav__link">
205
+ About the project
206
+ </a>
207
+ </li>
208
+
209
+
210
+
211
+
212
+ </ul>
213
+ </nav>
214
+ </li>
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+ <li class="md-nav__item md-nav__item--nested">
227
+
228
+
229
+
230
+
231
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
232
+
233
+
234
+
235
+ <label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
236
+ FinGPT Models
237
+ <span class="md-nav__icon md-icon"></span>
238
+ </label>
239
+
240
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
241
+ <label class="md-nav__title" for="__nav_2">
242
+ <span class="md-nav__icon md-icon"></span>
243
+ FinGPT Models
244
+ </label>
245
+ <ul class="md-nav__list" data-md-scrollfix>
246
+
247
+
248
+
249
+
250
+
251
+
252
+ <li class="md-nav__item">
253
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT-v1" class="md-nav__link">
254
+ FinGPT-v1
255
+ </a>
256
+ </li>
257
+
258
+
259
+
260
+
261
+
262
+
263
+
264
+
265
+
266
+ <li class="md-nav__item">
267
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT-v2" class="md-nav__link">
268
+ FinGPT-v2
269
+ </a>
270
+ </li>
271
+
272
+
273
+
274
+
275
+
276
+
277
+
278
+
279
+
280
+ <li class="md-nav__item">
281
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT-v3" class="md-nav__link">
282
+ FinGPT-v3
283
+ </a>
284
+ </li>
285
+
286
+
287
+
288
+
289
+ </ul>
290
+ </nav>
291
+ </li>
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+ <li class="md-nav__item md-nav__item--nested">
304
+
305
+
306
+
307
+
308
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
309
+
310
+
311
+
312
+ <label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
313
+ Robo Advisor
314
+ <span class="md-nav__icon md-icon"></span>
315
+ </label>
316
+
317
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
318
+ <label class="md-nav__title" for="__nav_3">
319
+ <span class="md-nav__icon md-icon"></span>
320
+ Robo Advisor
321
+ </label>
322
+ <ul class="md-nav__list" data-md-scrollfix>
323
+
324
+
325
+
326
+
327
+
328
+
329
+ <li class="md-nav__item">
330
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-robo-advisor-v1" class="md-nav__link">
331
+ chatgpt-robo-advisor-v1
332
+ </a>
333
+ </li>
334
+
335
+
336
+
337
+
338
+
339
+
340
+
341
+
342
+
343
+ <li class="md-nav__item">
344
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-robo-advisor-v2" class="md-nav__link">
345
+ chatgpt-robo-advisor-v2
346
+ </a>
347
+ </li>
348
+
349
+
350
+
351
+
352
+ </ul>
353
+ </nav>
354
+ </li>
355
+
356
+
357
+
358
+
359
+
360
+
361
+
362
+
363
+
364
+
365
+
366
+ <li class="md-nav__item md-nav__item--nested">
367
+
368
+
369
+
370
+
371
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
372
+
373
+
374
+
375
+ <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
376
+ Quantitative Trading
377
+ <span class="md-nav__icon md-icon"></span>
378
+ </label>
379
+
380
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
381
+ <label class="md-nav__title" for="__nav_4">
382
+ <span class="md-nav__icon md-icon"></span>
383
+ Quantitative Trading
384
+ </label>
385
+ <ul class="md-nav__list" data-md-scrollfix>
386
+
387
+
388
+
389
+
390
+
391
+
392
+ <li class="md-nav__item">
393
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-trading-v1" class="md-nav__link">
394
+ chatgpt-trading-v1
395
+ </a>
396
+ </li>
397
+
398
+
399
+
400
+
401
+
402
+
403
+
404
+
405
+
406
+ <li class="md-nav__item">
407
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-trading-v2" class="md-nav__link">
408
+ chatgpt-trading-v2
409
+ </a>
410
+ </li>
411
+
412
+
413
+
414
+
415
+ </ul>
416
+ </nav>
417
+ </li>
418
+
419
+
420
+
421
+
422
+
423
+
424
+
425
+
426
+
427
+
428
+
429
+ <li class="md-nav__item md-nav__item--nested">
430
+
431
+
432
+
433
+
434
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
435
+
436
+
437
+
438
+ <label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
439
+ Low code development
440
+ <span class="md-nav__icon md-icon"></span>
441
+ </label>
442
+
443
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
444
+ <label class="md-nav__title" for="__nav_5">
445
+ <span class="md-nav__icon md-icon"></span>
446
+ Low code development
447
+ </label>
448
+ <ul class="md-nav__list" data-md-scrollfix>
449
+
450
+
451
+
452
+
453
+
454
+
455
+ <li class="md-nav__item">
456
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-low-code-development-v1" class="md-nav__link">
457
+ chatgpt-low-code-development-v1
458
+ </a>
459
+ </li>
460
+
461
+
462
+
463
+
464
+
465
+
466
+
467
+
468
+
469
+ <li class="md-nav__item">
470
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/chatgpt-low-code-development-v2" class="md-nav__link">
471
+ chatgpt-low-code-development-v2
472
+ </a>
473
+ </li>
474
+
475
+
476
+
477
+
478
+ </ul>
479
+ </nav>
480
+ </li>
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+
491
+
492
+ <li class="md-nav__item md-nav__item--nested">
493
+
494
+
495
+
496
+
497
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
498
+
499
+
500
+
501
+ <label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
502
+ Data Sources
503
+ <span class="md-nav__icon md-icon"></span>
504
+ </label>
505
+
506
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
507
+ <label class="md-nav__title" for="__nav_6">
508
+ <span class="md-nav__icon md-icon"></span>
509
+ Data Sources
510
+ </label>
511
+ <ul class="md-nav__list" data-md-scrollfix>
512
+
513
+
514
+
515
+
516
+
517
+
518
+ <li class="md-nav__item">
519
+ <a href="/jupyter/Data_Sources_News/" class="md-nav__link">
520
+ News
521
+ </a>
522
+ </li>
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+ <li class="md-nav__item">
533
+ <a href="/jupyter/Data_Sources_Social_Media/" class="md-nav__link">
534
+ Social Media
535
+ </a>
536
+ </li>
537
+
538
+
539
+
540
+
541
+
542
+
543
+
544
+
545
+
546
+ <li class="md-nav__item">
547
+ <a href="/jupyter/Data_Sources_Company_Announcement/" class="md-nav__link">
548
+ Company Announcement
549
+ </a>
550
+ </li>
551
+
552
+
553
+
554
+
555
+ </ul>
556
+ </nav>
557
+ </li>
558
+
559
+
560
+
561
+ </ul>
562
+ </nav>
563
+ </div>
564
+ </div>
565
+ </div>
566
+
567
+
568
+
569
+ <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
570
+ <div class="md-sidebar__scrollwrap">
571
+ <div class="md-sidebar__inner">
572
+
573
+
574
+ <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
575
+
576
+
577
+
578
+
579
+ </nav>
580
+ </div>
581
+ </div>
582
+ </div>
583
+
584
+
585
+
586
+ <div class="md-content" data-md-component="content">
587
+ <article class="md-content__inner md-typeset">
588
+
589
+ <h1>404 - Not found</h1>
590
+
591
+ </article>
592
+ </div>
593
+
594
+
595
+ </div>
596
+
597
+ </main>
598
+
599
+ <footer class="md-footer">
600
+
601
+ <div class="md-footer-meta md-typeset">
602
+ <div class="md-footer-meta__inner md-grid">
603
+ <div class="md-copyright">
604
+
605
+
606
+ Made with
607
+ <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
608
+ Material for MkDocs
609
+ </a>
610
+
611
+ </div>
612
+
613
+ </div>
614
+ </div>
615
+ </footer>
616
+
617
+ </div>
618
+ <div class="md-dialog" data-md-component="dialog">
619
+ <div class="md-dialog__inner md-typeset"></div>
620
+ </div>
621
+
622
+ <script id="__config" type="application/json">{"base": "/", "features": [], "search": "/assets/javascripts/workers/search.208ed371.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
623
+
624
+
625
+ <script src="/assets/javascripts/bundle.51198bba.min.js"></script>
626
+
627
+
628
+ </body>
629
+ </html>
FinNLP/docs/FinNLP/site/assets/images/favicon.png ADDED
FinNLP/docs/FinNLP/site/assets/javascripts/bundle.51198bba.min.js ADDED
The diff for this file is too large to render. See raw diff
 
FinNLP/docs/FinNLP/site/assets/javascripts/bundle.51198bba.min.js.map ADDED
The diff for this file is too large to render. See raw diff
 
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ar.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ar=function(){this.pipeline.reset(),this.pipeline.add(e.ar.trimmer,e.ar.stopWordFilter,e.ar.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.ar.stemmer))},e.ar.wordCharacters="ء-ٛٱـ",e.ar.trimmer=e.trimmerSupport.generateTrimmer(e.ar.wordCharacters),e.Pipeline.registerFunction(e.ar.trimmer,"trimmer-ar"),e.ar.stemmer=function(){var e=this;return e.result=!1,e.preRemoved=!1,e.sufRemoved=!1,e.pre={pre1:"ف ك ب و س ل ن ا ي ت",pre2:"ال لل",pre3:"بال وال فال تال كال ولل",pre4:"فبال كبال وبال وكال"},e.suf={suf1:"ه ك ت ن ا ي",suf2:"نك نه ها وك يا اه ون ين تن تم نا وا ان كم كن ني نن ما هم هن تك ته ات يه",suf3:"تين كهم نيه نهم ونه وها يهم ونا ونك وني وهم تكم تنا تها تني تهم كما كها ناه نكم هنا تان يها",suf4:"كموه ناها ونني ونهم تكما تموه تكاه كماه ناكم ناهم نيها وننا"},e.patterns=JSON.parse('{"pt43":[{"pt":[{"c":"ا","l":1}]},{"pt":[{"c":"ا,ت,ن,ي","l":0}],"mPt":[{"c":"ف","l":0,"m":1},{"c":"ع","l":1,"m":2},{"c":"ل","l":2,"m":3}]},{"pt":[{"c":"و","l":2}],"mPt":[{"c":"ف","l":0,"m":0},{"c":"ع","l":1,"m":1},{"c":"ل","l":2,"m":3}]},{"pt":[{"c":"ا","l":2}]},{"pt":[{"c":"ي","l":2}],"mPt":[{"c":"ف","l":0,"m":0},{"c":"ع","l":1,"m":1},{"c":"ا","l":2},{"c":"ل","l":3,"m":3}]},{"pt":[{"c":"م","l":0}]}],"pt53":[{"pt":[{"c":"ت","l":0},{"c":"ا","l":2}]},{"pt":[{"c":"ا,ن,ت,ي","l":0},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ت","l":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"ا","l":0},{"c":"ا","l":2}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ع","l":2,"m":3},{"c":"ل","l":3,"m":4},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"ا","l":0},{"c":"ا","l":3}],"mPt":[{"c":"ف","l":0,"m":1},{"c":"ع","l":1,"m":2},{"c":"ل","l":2,"m":4}]},{"pt":[{"c":"ا","l":3},{"c":"ن","l":4}]},{"pt":[{"c":"ت","l":0},{"c":"ي","l":3}]},{"pt":[{"c":"م","l":0},{"c":"و","l":3}]},{"pt":[{"c":"ا","l":1},{"c":"و","l":3}]},{"pt":[{"c":"و","l":1},{"c":"ا","l":2}]},{"pt":[{"c":"م","l":0},{"c":"ا","l":3}]},{"pt":[{"c":"م","l":0},{"c":"ي","l":3}]},{"pt":[{"c":"ا","l":2},{"c":"ن","l":3}]},{"pt":[{"c":"م","l":0},{"c":"ن","l":1}],"mPt":[{"c":"ا","l":0},{"c":"ن","l":1},{"c":"ف","l":2,"m":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"م","l":0},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ت","l":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"م","l":0},{"c":"ا","l":2}]},{"pt":[{"c":"م","l":1},{"c":"ا","l":3}]},{"pt":[{"c":"ي,ت,ا,ن","l":0},{"c":"ت","l":1}],"mPt":[{"c":"ف","l":0,"m":2},{"c":"ع","l":1,"m":3},{"c":"ا","l":2},{"c":"ل","l":3,"m":4}]},{"pt":[{"c":"ت,ي,ا,ن","l":0},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ت","l":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"ا","l":2},{"c":"ي","l":3}]},{"pt":[{"c":"ا,ي,ت,ن","l":0},{"c":"ن","l":1}],"mPt":[{"c":"ا","l":0},{"c":"ن","l":1},{"c":"ف","l":2,"m":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"ا","l":3},{"c":"ء","l":4}]}],"pt63":[{"pt":[{"c":"ا","l":0},{"c":"ت","l":2},{"c":"ا","l":4}]},{"pt":[{"c":"ا,ت,ن,ي","l":0},{"c":"س","l":1},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"س","l":1},{"c":"ت","l":2},{"c":"ف","l":3,"m":3},{"c":"ع","l":4,"m":4},{"c":"ا","l":5},{"c":"ل","l":6,"m":5}]},{"pt":[{"c":"ا,ن,ت,ي","l":0},{"c":"و","l":3}]},{"pt":[{"c":"م","l":0},{"c":"س","l":1},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"س","l":1},{"c":"ت","l":2},{"c":"ف","l":3,"m":3},{"c":"ع","l":4,"m":4},{"c":"ا","l":5},{"c":"ل","l":6,"m":5}]},{"pt":[{"c":"ي","l":1},{"c":"ي","l":3},{"c":"ا","l":4},{"c":"ء","l":5}]},{"pt":[{"c":"ا","l":0},{"c":"ن","l":1},{"c":"ا","l":4}]}],"pt54":[{"pt":[{"c":"ت","l":0}]},{"pt":[{"c":"ا,ي,ت,ن","l":0}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ع","l":2,"m":2},{"c":"ل","l":3,"m":3},{"c":"ر","l":4,"m":4},{"c":"ا","l":5},{"c":"ر","l":6,"m":4}]},{"pt":[{"c":"م","l":0}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ع","l":2,"m":2},{"c":"ل","l":3,"m":3},{"c":"ر","l":4,"m":4},{"c":"ا","l":5},{"c":"ر","l":6,"m":4}]},{"pt":[{"c":"ا","l":2}]},{"pt":[{"c":"ا","l":0},{"c":"ن","l":2}]}],"pt64":[{"pt":[{"c":"ا","l":0},{"c":"ا","l":4}]},{"pt":[{"c":"م","l":0},{"c":"ت","l":1}]}],"pt73":[{"pt":[{"c":"ا","l":0},{"c":"س","l":1},{"c":"ت","l":2},{"c":"ا","l":5}]}],"pt75":[{"pt":[{"c":"ا","l":0},{"c":"ا","l":5}]}]}'),e.execArray=["cleanWord","removeDiacritics","cleanAlef","removeStopWords","normalizeHamzaAndAlef","removeStartWaw","removePre432","removeEndTaa","wordCheck"],e.stem=function(){var r=0;for(e.result=!1,e.preRemoved=!1,e.sufRemoved=!1;r<e.execArray.length&&1!=e.result;)e.result=e[e.execArray[r]](),r++},e.setCurrent=function(r){e.word=r},e.getCurrent=function(){return e.word},e.cleanWord=function(){var r=new RegExp("[^ء-ٛٱـ]");return e.word=e.word.replace(new RegExp("ـ","g"),""),!!r.test("")},e.removeDiacritics=function(){new RegExp("[ً-ٛ]");return e.word=e.word.replace(/[\u064b-\u065b]/gi,""),!1},e.cleanAlef=function(){var r=new RegExp("[آأإٱى]");return e.word=e.word.replace(r,"ا"),!1},e.removeStopWords=function(){if("، اض امين اه اها اي ا اب اجل اجمع اخ اخذ اصبح اضحى اقبل اقل اكثر الا ام اما امامك امامك امسى اما ان انا انت انتم انتما انتن انت انشا انى او اوشك اولئك اولئكم اولاء اولالك اوه اي ايا اين اينما اي ان اي اف اذ اذا اذا اذما اذن الى اليكم اليكما اليكن اليك اليك الا اما ان انما اي اياك اياكم اياكما اياكن ايانا اياه اياها اياهم اياهما اياهن اياي ايه ان ا ابتدا اثر اجل احد اخرى اخلولق اذا اربعة ارتد استحال اطار اعادة اعلنت اف اكثر اكد الالاء الالى الا الاخيرة الان الاول الاولى التى التي الثاني الثانية الذاتي الذى الذي الذين السابق الف اللائي اللاتي اللتان اللتيا اللتين اللذان اللذين اللواتي الماضي المقبل الوقت الى اليوم اما امام امس ان انبرى انقلب انه انها او اول اي ايار ايام ايضا ب بات باسم بان بخ برس بسبب بس بشكل بضع بطان بعد بعض بك بكم بكما بكن بل بلى بما بماذا بمن بن بنا به بها بي بيد بين بس بله بئس تان تانك تبدل تجاه تحول تلقاء تلك تلكم تلكما تم تينك تين ته تي ثلاثة ثم ثم ثمة ثم جعل جلل جميع جير حار حاشا حاليا حاي حتى حرى حسب حم حوالى حول حيث حيثما حين حي حبذا حتى حذار خلا خلال دون دونك ذا ذات ذاك ذانك ذان ذلك ذلكم ذلكما ذلكن ذو ذوا ذواتا ذواتي ذيت ذينك ذين ذه ذي راح رجع رويدك ريث رب زيارة سبحان سرعان سنة سنوات سوف سوى ساء ساءما شبه شخصا شرع شتان صار صباح صفر صه صه ضد ضمن طاق طالما طفق طق ظل عاد عام عاما عامة عدا عدة عدد عدم عسى عشر عشرة علق على عليك عليه عليها عل عن عند عندما عوض عين عدس عما غدا غير ف فان فلان فو فى في فيم فيما فيه فيها قال قام قبل قد قط قلما قوة كانما كاين كاي كاين كاد كان كانت كذا كذلك كرب كل كلا كلاهما كلتا كلم كليكما كليهما كلما كلا كم كما كي كيت كيف كيفما كان كخ لئن لا لات لاسيما لدن لدى لعمر لقاء لك لكم لكما لكن لكنما لكي لكيلا للامم لم لما لما لن لنا له لها لو لوكالة لولا لوما لي لست لست لستم لستما لستن لست لسن لعل لكن ليت ليس ليسا ليستا ليست ليسوا لسنا ما ماانفك مابرح مادام ماذا مازال مافتئ مايو متى مثل مذ مساء مع معاذ مقابل مكانكم مكانكما مكانكن مكانك مليار مليون مما ممن من منذ منها مه مهما من من نحن نحو نعم نفس نفسه نهاية نخ نعما نعم ها هاؤم هاك هاهنا هب هذا هذه هكذا هل هلم هلا هم هما هن هنا هناك هنالك هو هي هيا هيت هيا هؤلاء هاتان هاتين هاته هاتي هج هذا هذان هذين هذه هذي هيهات و وا واحد واضاف واضافت واكد وان واها واوضح وراءك وفي وقال وقالت وقد وقف وكان وكانت ولا ولم ومن وهو وهي ويكان وي وشكان يكون يمكن يوم ايان".split(" ").indexOf(e.word)>=0)return!0},e.normalizeHamzaAndAlef=function(){return e.word=e.word.replace("ؤ","ء"),e.word=e.word.replace("ئ","ء"),e.word=e.word.replace(/([\u0627])\1+/gi,"ا"),!1},e.removeEndTaa=function(){return!(e.word.length>2)||(e.word=e.word.replace(/[\u0627]$/,""),e.word=e.word.replace("ة",""),!1)},e.removeStartWaw=function(){return e.word.length>3&&"و"==e.word[0]&&"و"==e.word[1]&&(e.word=e.word.slice(1)),!1},e.removePre432=function(){var r=e.word;if(e.word.length>=7){var t=new RegExp("^("+e.pre.pre4.split(" ").join("|")+")");e.word=e.word.replace(t,"")}if(e.word==r&&e.word.length>=6){var c=new RegExp("^("+e.pre.pre3.split(" ").join("|")+")");e.word=e.word.replace(c,"")}if(e.word==r&&e.word.length>=5){var l=new RegExp("^("+e.pre.pre2.split(" ").join("|")+")");e.word=e.word.replace(l,"")}return r!=e.word&&(e.preRemoved=!0),!1},e.patternCheck=function(r){for(var t=0;t<r.length;t++){for(var c=!0,l=0;l<r[t].pt.length;l++){var n=r[t].pt[l].c.split(","),o=!1;if(n.forEach(function(c){e.word[r[t].pt[l].l]==c&&(o=!0)}),!o){c=!1;break}}if(1==c){if(r[t].mPt){for(var p=[],m=0;m<r[t].mPt.length;m++)null!=r[t].mPt[m].m?p[r[t].mPt[m].l]=e.word[r[t].mPt[m].m]:p[r[t].mPt[m].l]=r[t].mPt[m].c;e.word=p.join("")}e.result=!0;break}}},e.removePre1=function(){var r=e.word;if(0==e.preRemoved&&e.word.length>3){var t=new RegExp("^("+e.pre.pre1.split(" ").join("|")+")");e.word=e.word.replace(t,"")}return r!=e.word&&(e.preRemoved=!0),!1},e.removeSuf1=function(){var r=e.word;if(0==e.sufRemoved&&e.word.length>3){var t=new RegExp("("+e.suf.suf1.split(" ").join("|")+")$");e.word=e.word.replace(t,"")}return r!=e.word&&(e.sufRemoved=!0),!1},e.removeSuf432=function(){var r=e.word;if(e.word.length>=6){var t=new RegExp("("+e.suf.suf4.split(" ").join("|")+")$");e.word=e.word.replace(t,"")}if(e.word==r&&e.word.length>=5){var c=new RegExp("("+e.suf.suf3.split(" ").join("|")+")$");e.word=e.word.replace(c,"")}if(e.word==r&&e.word.length>=4){var l=new RegExp("("+e.suf.suf2.split(" ").join("|")+")$");e.word=e.word.replace(l,"")}return r!=e.word&&(e.sufRemoved=!0),!1},e.wordCheck=function(){for(var r=(e.word,[e.removeSuf432,e.removeSuf1,e.removePre1]),t=0,c=!1;e.word.length>=7&&!e.result&&t<r.length;)7!=e.word.length||c?(r[t](),t++,c=!1):(e.checkPattern73(),c=!0);var l=[e.checkPattern63,e.removeSuf432,e.removeSuf1,e.removePre1,e.checkPattern64];for(t=0;6==e.word.length&&!e.result&&t<l.length;)l[t](),t++;var n=[e.checkPattern53,e.removeSuf432,e.removeSuf1,e.removePre1,e.checkPattern54];for(t=0;5==e.word.length&&!e.result&&t<n.length;)n[t](),t++;var o=[e.checkPattern43,e.removeSuf1,e.removePre1,e.removeSuf432];for(t=0;4==e.word.length&&!e.result&&t<o.length;)o[t](),t++;return!0},e.checkPattern43=function(){e.patternCheck(e.patterns.pt43)},e.checkPattern53=function(){e.patternCheck(e.patterns.pt53)},e.checkPattern54=function(){e.patternCheck(e.patterns.pt54)},e.checkPattern63=function(){e.patternCheck(e.patterns.pt63)},e.checkPattern64=function(){e.patternCheck(e.patterns.pt64)},e.checkPattern73=function(){e.patternCheck(e.patterns.pt73)},function(r){return"function"==typeof r.update?r.update(function(r){return e.setCurrent(r),e.stem(),e.getCurrent()}):(e.setCurrent(r),e.stem(),e.getCurrent())}}(),e.Pipeline.registerFunction(e.ar.stemmer,"stemmer-ar"),e.ar.stopWordFilter=e.generateStopWordFilter("، اض امين اه اها اي ا اب اجل اجمع اخ اخذ اصبح اضحى اقبل اقل اكثر الا ام اما امامك امامك امسى اما ان انا انت انتم انتما انتن انت انشا انى او اوشك اولئك اولئكم اولاء اولالك اوه اي ايا اين اينما اي ان اي اف اذ اذا اذا اذما اذن الى اليكم اليكما اليكن اليك اليك الا اما ان انما اي اياك اياكم اياكما اياكن ايانا اياه اياها اياهم اياهما اياهن اياي ايه ان ا ابتدا اثر اجل احد اخرى اخلولق اذا اربعة ارتد استحال اطار اعادة اعلنت اف اكثر اكد الالاء الالى الا الاخيرة الان الاول الاولى التى التي الثاني الثانية الذاتي الذى الذي الذين السابق الف اللائي اللاتي اللتان اللتيا اللتين اللذان اللذين اللواتي الماضي المقبل الوقت الى اليوم اما امام امس ان انبرى انقلب انه انها او اول اي ايار ايام ايضا ب بات باسم بان بخ برس بسبب بس بشكل بضع بطان بعد بعض بك بكم بكما بكن بل بلى بما بماذا بمن بن بنا به بها بي بيد بين بس بله بئس تان تانك تبدل تجاه تحول تلقاء تلك تلكم تلكما تم تينك تين ته تي ثلاثة ثم ثم ثمة ثم جعل جلل جميع جير حار حاشا حاليا حاي حتى حرى حسب حم حوالى حول حيث حيثما حين حي حبذا حتى حذار خلا خلال دون دونك ذا ذات ذاك ذانك ذان ذلك ذلكم ذلكما ذلكن ذو ذوا ذواتا ذواتي ذيت ذينك ذين ذه ذي راح رجع رويدك ريث رب زيارة سبحان سرعان سنة سنوات سوف سوى ساء ساءما شبه شخصا شرع شتان صار صباح صفر صه صه ضد ضمن طاق طالما طفق طق ظل عاد عام عاما عامة عدا عدة عدد عدم عسى عش�� عشرة علق على عليك عليه عليها عل عن عند عندما عوض عين عدس عما غدا غير ف فان فلان فو فى في فيم فيما فيه فيها قال قام قبل قد قط قلما قوة كانما كاين كاي كاين كاد كان كانت كذا كذلك كرب كل كلا كلاهما كلتا كلم كليكما كليهما كلما كلا كم كما كي كيت كيف كيفما كان كخ لئن لا لات لاسيما لدن لدى لعمر لقاء لك لكم لكما لكن لكنما لكي لكيلا للامم لم لما لما لن لنا له لها لو لوكالة لولا لوما لي لست لست لستم لستما لستن لست لسن لعل لكن ليت ليس ليسا ليستا ليست ليسوا لسنا ما ماانفك مابرح مادام ماذا مازال مافتئ مايو متى مثل مذ مساء مع معاذ مقابل مكانكم مكانكما مكانكن مكانك مليار مليون مما ممن من منذ منها مه مهما من من نحن نحو نعم نفس نفسه نهاية نخ نعما نعم ها هاؤم هاك هاهنا هب هذا هذه هكذا هل هلم هلا هم هما هن هنا هناك هنالك هو هي هيا هيت هيا هؤلاء هاتان هاتين هاته هاتي هج هذا هذان هذين هذه هذي هيهات وا واحد واضاف واضافت واكد وان واها واوضح وراءك وفي وقال وقالت وقد وقف وكان وكانت ولا ولم ومن وهو وهي ويكان وي وشكان يكون يمكن يوم ايان".split(" ")),e.Pipeline.registerFunction(e.ar.stopWordFilter,"stopWordFilter-ar")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.da.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Danish` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.da=function(){this.pipeline.reset(),this.pipeline.add(e.da.trimmer,e.da.stopWordFilter,e.da.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.da.stemmer))},e.da.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.da.trimmer=e.trimmerSupport.generateTrimmer(e.da.wordCharacters),e.Pipeline.registerFunction(e.da.trimmer,"trimmer-da"),e.da.stemmer=function(){var r=e.stemmerSupport.Among,i=e.stemmerSupport.SnowballProgram,n=new function(){function e(){var e,r=f.cursor+3;if(d=f.limit,0<=r&&r<=f.limit){for(a=r;;){if(e=f.cursor,f.in_grouping(w,97,248)){f.cursor=e;break}if(f.cursor=e,e>=f.limit)return;f.cursor++}for(;!f.out_grouping(w,97,248);){if(f.cursor>=f.limit)return;f.cursor++}d=f.cursor,d<a&&(d=a)}}function n(){var e,r;if(f.cursor>=d&&(r=f.limit_backward,f.limit_backward=d,f.ket=f.cursor,e=f.find_among_b(c,32),f.limit_backward=r,e))switch(f.bra=f.cursor,e){case 1:f.slice_del();break;case 2:f.in_grouping_b(p,97,229)&&f.slice_del()}}function t(){var e,r=f.limit-f.cursor;f.cursor>=d&&(e=f.limit_backward,f.limit_backward=d,f.ket=f.cursor,f.find_among_b(l,4)?(f.bra=f.cursor,f.limit_backward=e,f.cursor=f.limit-r,f.cursor>f.limit_backward&&(f.cursor--,f.bra=f.cursor,f.slice_del())):f.limit_backward=e)}function s(){var e,r,i,n=f.limit-f.cursor;if(f.ket=f.cursor,f.eq_s_b(2,"st")&&(f.bra=f.cursor,f.eq_s_b(2,"ig")&&f.slice_del()),f.cursor=f.limit-n,f.cursor>=d&&(r=f.limit_backward,f.limit_backward=d,f.ket=f.cursor,e=f.find_among_b(m,5),f.limit_backward=r,e))switch(f.bra=f.cursor,e){case 1:f.slice_del(),i=f.limit-f.cursor,t(),f.cursor=f.limit-i;break;case 2:f.slice_from("løs")}}function o(){var e;f.cursor>=d&&(e=f.limit_backward,f.limit_backward=d,f.ket=f.cursor,f.out_grouping_b(w,97,248)?(f.bra=f.cursor,u=f.slice_to(u),f.limit_backward=e,f.eq_v_b(u)&&f.slice_del()):f.limit_backward=e)}var a,d,u,c=[new r("hed",-1,1),new r("ethed",0,1),new r("ered",-1,1),new r("e",-1,1),new r("erede",3,1),new r("ende",3,1),new r("erende",5,1),new r("ene",3,1),new r("erne",3,1),new r("ere",3,1),new r("en",-1,1),new r("heden",10,1),new r("eren",10,1),new r("er",-1,1),new r("heder",13,1),new r("erer",13,1),new r("s",-1,2),new r("heds",16,1),new r("es",16,1),new r("endes",18,1),new r("erendes",19,1),new r("enes",18,1),new r("ernes",18,1),new r("eres",18,1),new r("ens",16,1),new r("hedens",24,1),new r("erens",24,1),new r("ers",16,1),new r("ets",16,1),new r("erets",28,1),new r("et",-1,1),new r("eret",30,1)],l=[new r("gd",-1,-1),new r("dt",-1,-1),new r("gt",-1,-1),new r("kt",-1,-1)],m=[new r("ig",-1,1),new r("lig",0,1),new r("elig",1,1),new r("els",-1,1),new r("løst",-1,2)],w=[17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,48,0,128],p=[239,254,42,3,0,0,0,0,0,0,0,0,0,0,0,0,16],f=new i;this.setCurrent=function(e){f.setCurrent(e)},this.getCurrent=function(){return f.getCurrent()},this.stem=function(){var r=f.cursor;return e(),f.limit_backward=r,f.cursor=f.limit,n(),f.cursor=f.limit,t(),f.cursor=f.limit,s(),f.cursor=f.limit,o(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return n.setCurrent(e),n.stem(),n.getCurrent()}):(n.setCurrent(e),n.stem(),n.getCurrent())}}(),e.Pipeline.registerFunction(e.da.stemmer,"stemmer-da"),e.da.stopWordFilter=e.generateStopWordFilter("ad af alle alt anden at blev blive bliver da de dem den denne der deres det dette dig din disse dog du efter eller en end er et for fra ham han hans har havde have hende hendes her hos hun hvad hvis hvor i ikke ind jeg jer jo kunne man mange med meget men mig min mine mit mod ned noget nogle nu når og også om op os over på selv sig sin sine sit skal skulle som sådan thi til ud under var vi vil ville vor være været".split(" ")),e.Pipeline.registerFunction(e.da.stopWordFilter,"stopWordFilter-da")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.de.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `German` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.de=function(){this.pipeline.reset(),this.pipeline.add(e.de.trimmer,e.de.stopWordFilter,e.de.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.de.stemmer))},e.de.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.de.trimmer=e.trimmerSupport.generateTrimmer(e.de.wordCharacters),e.Pipeline.registerFunction(e.de.trimmer,"trimmer-de"),e.de.stemmer=function(){var r=e.stemmerSupport.Among,n=e.stemmerSupport.SnowballProgram,i=new function(){function e(e,r,n){return!(!v.eq_s(1,e)||(v.ket=v.cursor,!v.in_grouping(p,97,252)))&&(v.slice_from(r),v.cursor=n,!0)}function i(){for(var r,n,i,s,t=v.cursor;;)if(r=v.cursor,v.bra=r,v.eq_s(1,"ß"))v.ket=v.cursor,v.slice_from("ss");else{if(r>=v.limit)break;v.cursor=r+1}for(v.cursor=t;;)for(n=v.cursor;;){if(i=v.cursor,v.in_grouping(p,97,252)){if(s=v.cursor,v.bra=s,e("u","U",i))break;if(v.cursor=s,e("y","Y",i))break}if(i>=v.limit)return void(v.cursor=n);v.cursor=i+1}}function s(){for(;!v.in_grouping(p,97,252);){if(v.cursor>=v.limit)return!0;v.cursor++}for(;!v.out_grouping(p,97,252);){if(v.cursor>=v.limit)return!0;v.cursor++}return!1}function t(){m=v.limit,l=m;var e=v.cursor+3;0<=e&&e<=v.limit&&(d=e,s()||(m=v.cursor,m<d&&(m=d),s()||(l=v.cursor)))}function o(){for(var e,r;;){if(r=v.cursor,v.bra=r,!(e=v.find_among(h,6)))return;switch(v.ket=v.cursor,e){case 1:v.slice_from("y");break;case 2:case 5:v.slice_from("u");break;case 3:v.slice_from("a");break;case 4:v.slice_from("o");break;case 6:if(v.cursor>=v.limit)return;v.cursor++}}}function c(){return m<=v.cursor}function u(){return l<=v.cursor}function a(){var e,r,n,i,s=v.limit-v.cursor;if(v.ket=v.cursor,(e=v.find_among_b(w,7))&&(v.bra=v.cursor,c()))switch(e){case 1:v.slice_del();break;case 2:v.slice_del(),v.ket=v.cursor,v.eq_s_b(1,"s")&&(v.bra=v.cursor,v.eq_s_b(3,"nis")&&v.slice_del());break;case 3:v.in_grouping_b(g,98,116)&&v.slice_del()}if(v.cursor=v.limit-s,v.ket=v.cursor,(e=v.find_among_b(f,4))&&(v.bra=v.cursor,c()))switch(e){case 1:v.slice_del();break;case 2:if(v.in_grouping_b(k,98,116)){var t=v.cursor-3;v.limit_backward<=t&&t<=v.limit&&(v.cursor=t,v.slice_del())}}if(v.cursor=v.limit-s,v.ket=v.cursor,(e=v.find_among_b(_,8))&&(v.bra=v.cursor,u()))switch(e){case 1:v.slice_del(),v.ket=v.cursor,v.eq_s_b(2,"ig")&&(v.bra=v.cursor,r=v.limit-v.cursor,v.eq_s_b(1,"e")||(v.cursor=v.limit-r,u()&&v.slice_del()));break;case 2:n=v.limit-v.cursor,v.eq_s_b(1,"e")||(v.cursor=v.limit-n,v.slice_del());break;case 3:if(v.slice_del(),v.ket=v.cursor,i=v.limit-v.cursor,!v.eq_s_b(2,"er")&&(v.cursor=v.limit-i,!v.eq_s_b(2,"en")))break;v.bra=v.cursor,c()&&v.slice_del();break;case 4:v.slice_del(),v.ket=v.cursor,e=v.find_among_b(b,2),e&&(v.bra=v.cursor,u()&&1==e&&v.slice_del())}}var d,l,m,h=[new r("",-1,6),new r("U",0,2),new r("Y",0,1),new r("ä",0,3),new r("ö",0,4),new r("ü",0,5)],w=[new r("e",-1,2),new r("em",-1,1),new r("en",-1,2),new r("ern",-1,1),new r("er",-1,1),new r("s",-1,3),new r("es",5,2)],f=[new r("en",-1,1),new r("er",-1,1),new r("st",-1,2),new r("est",2,1)],b=[new r("ig",-1,1),new r("lich",-1,1)],_=[new r("end",-1,1),new r("ig",-1,2),new r("ung",-1,1),new r("lich",-1,3),new r("isch",-1,2),new r("ik",-1,2),new r("heit",-1,3),new r("keit",-1,4)],p=[17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,8,0,32,8],g=[117,30,5],k=[117,30,4],v=new n;this.setCurrent=function(e){v.setCurrent(e)},this.getCurrent=function(){return v.getCurrent()},this.stem=function(){var e=v.cursor;return i(),v.cursor=e,t(),v.limit_backward=e,v.cursor=v.limit,a(),v.cursor=v.limit_backward,o(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return i.setCurrent(e),i.stem(),i.getCurrent()}):(i.setCurrent(e),i.stem(),i.getCurrent())}}(),e.Pipeline.registerFunction(e.de.stemmer,"stemmer-de"),e.de.stopWordFilter=e.generateStopWordFilter("aber alle allem allen aller alles als also am an ander andere anderem anderen anderer anderes anderm andern anderr anders auch auf aus bei bin bis bist da damit dann das dasselbe dazu daß dein deine deinem deinen deiner deines dem demselben den denn denselben der derer derselbe derselben des desselben dessen dich die dies diese dieselbe dieselben diesem diesen dieser dieses dir doch dort du durch ein eine einem einen einer eines einig einige einigem einigen einiger einiges einmal er es etwas euch euer eure eurem euren eurer eures für gegen gewesen hab habe haben hat hatte hatten hier hin hinter ich ihm ihn ihnen ihr ihre ihrem ihren ihrer ihres im in indem ins ist jede jedem jeden jeder jedes jene jenem jenen jener jenes jetzt kann kein keine keinem keinen keiner keines können könnte machen man manche manchem manchen mancher manches mein meine meinem meinen meiner meines mich mir mit muss musste nach nicht nichts noch nun nur ob oder ohne sehr sein seine seinem seinen seiner seines selbst sich sie sind so solche solchem solchen solcher solches soll sollte sondern sonst um und uns unse unsem unsen unser unses unter viel vom von vor war waren warst was weg weil weiter welche welchem welchen welcher welches wenn werde werden wie wieder will wir wird wirst wo wollen wollte während würde würden zu zum zur zwar zwischen über".split(" ")),e.Pipeline.registerFunction(e.de.stopWordFilter,"stopWordFilter-de")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.du.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Dutch` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");console.warn('[Lunr Languages] Please use the "nl" instead of the "du". The "nl" code is the standard code for Dutch language, and "du" will be removed in the next major versions.'),e.du=function(){this.pipeline.reset(),this.pipeline.add(e.du.trimmer,e.du.stopWordFilter,e.du.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.du.stemmer))},e.du.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.du.trimmer=e.trimmerSupport.generateTrimmer(e.du.wordCharacters),e.Pipeline.registerFunction(e.du.trimmer,"trimmer-du"),e.du.stemmer=function(){var r=e.stemmerSupport.Among,i=e.stemmerSupport.SnowballProgram,n=new function(){function e(){for(var e,r,i,o=C.cursor;;){if(C.bra=C.cursor,e=C.find_among(b,11))switch(C.ket=C.cursor,e){case 1:C.slice_from("a");continue;case 2:C.slice_from("e");continue;case 3:C.slice_from("i");continue;case 4:C.slice_from("o");continue;case 5:C.slice_from("u");continue;case 6:if(C.cursor>=C.limit)break;C.cursor++;continue}break}for(C.cursor=o,C.bra=o,C.eq_s(1,"y")?(C.ket=C.cursor,C.slice_from("Y")):C.cursor=o;;)if(r=C.cursor,C.in_grouping(q,97,232)){if(i=C.cursor,C.bra=i,C.eq_s(1,"i"))C.ket=C.cursor,C.in_grouping(q,97,232)&&(C.slice_from("I"),C.cursor=r);else if(C.cursor=i,C.eq_s(1,"y"))C.ket=C.cursor,C.slice_from("Y"),C.cursor=r;else if(n(r))break}else if(n(r))break}function n(e){return C.cursor=e,e>=C.limit||(C.cursor++,!1)}function o(){_=C.limit,f=_,t()||(_=C.cursor,_<3&&(_=3),t()||(f=C.cursor))}function t(){for(;!C.in_grouping(q,97,232);){if(C.cursor>=C.limit)return!0;C.cursor++}for(;!C.out_grouping(q,97,232);){if(C.cursor>=C.limit)return!0;C.cursor++}return!1}function s(){for(var e;;)if(C.bra=C.cursor,e=C.find_among(p,3))switch(C.ket=C.cursor,e){case 1:C.slice_from("y");break;case 2:C.slice_from("i");break;case 3:if(C.cursor>=C.limit)return;C.cursor++}}function u(){return _<=C.cursor}function c(){return f<=C.cursor}function a(){var e=C.limit-C.cursor;C.find_among_b(g,3)&&(C.cursor=C.limit-e,C.ket=C.cursor,C.cursor>C.limit_backward&&(C.cursor--,C.bra=C.cursor,C.slice_del()))}function l(){var e;w=!1,C.ket=C.cursor,C.eq_s_b(1,"e")&&(C.bra=C.cursor,u()&&(e=C.limit-C.cursor,C.out_grouping_b(q,97,232)&&(C.cursor=C.limit-e,C.slice_del(),w=!0,a())))}function m(){var e;u()&&(e=C.limit-C.cursor,C.out_grouping_b(q,97,232)&&(C.cursor=C.limit-e,C.eq_s_b(3,"gem")||(C.cursor=C.limit-e,C.slice_del(),a())))}function d(){var e,r,i,n,o,t,s=C.limit-C.cursor;if(C.ket=C.cursor,e=C.find_among_b(h,5))switch(C.bra=C.cursor,e){case 1:u()&&C.slice_from("heid");break;case 2:m();break;case 3:u()&&C.out_grouping_b(z,97,232)&&C.slice_del()}if(C.cursor=C.limit-s,l(),C.cursor=C.limit-s,C.ket=C.cursor,C.eq_s_b(4,"heid")&&(C.bra=C.cursor,c()&&(r=C.limit-C.cursor,C.eq_s_b(1,"c")||(C.cursor=C.limit-r,C.slice_del(),C.ket=C.cursor,C.eq_s_b(2,"en")&&(C.bra=C.cursor,m())))),C.cursor=C.limit-s,C.ket=C.cursor,e=C.find_among_b(k,6))switch(C.bra=C.cursor,e){case 1:if(c()){if(C.slice_del(),i=C.limit-C.cursor,C.ket=C.cursor,C.eq_s_b(2,"ig")&&(C.bra=C.cursor,c()&&(n=C.limit-C.cursor,!C.eq_s_b(1,"e")))){C.cursor=C.limit-n,C.slice_del();break}C.cursor=C.limit-i,a()}break;case 2:c()&&(o=C.limit-C.cursor,C.eq_s_b(1,"e")||(C.cursor=C.limit-o,C.slice_del()));break;case 3:c()&&(C.slice_del(),l());break;case 4:c()&&C.slice_del();break;case 5:c()&&w&&C.slice_del()}C.cursor=C.limit-s,C.out_grouping_b(j,73,232)&&(t=C.limit-C.cursor,C.find_among_b(v,4)&&C.out_grouping_b(q,97,232)&&(C.cursor=C.limit-t,C.ket=C.cursor,C.cursor>C.limit_backward&&(C.cursor--,C.bra=C.cursor,C.slice_del())))}var f,_,w,b=[new r("",-1,6),new r("á",0,1),new r("ä",0,1),new r("é",0,2),new r("ë",0,2),new r("í",0,3),new r("ï",0,3),new r("ó",0,4),new r("ö",0,4),new r("ú",0,5),new r("ü",0,5)],p=[new r("",-1,3),new r("I",0,2),new r("Y",0,1)],g=[new r("dd",-1,-1),new r("kk",-1,-1),new r("tt",-1,-1)],h=[new r("ene",-1,2),new r("se",-1,3),new r("en",-1,2),new r("heden",2,1),new r("s",-1,3)],k=[new r("end",-1,1),new r("ig",-1,2),new r("ing",-1,1),new r("lijk",-1,3),new r("baar",-1,4),new r("bar",-1,5)],v=[new r("aa",-1,-1),new r("ee",-1,-1),new r("oo",-1,-1),new r("uu",-1,-1)],q=[17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,128],j=[1,0,0,17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,128],z=[17,67,16,1,0,0,0,0,0,0,0,0,0,0,0,0,128],C=new i;this.setCurrent=function(e){C.setCurrent(e)},this.getCurrent=function(){return C.getCurrent()},this.stem=function(){var r=C.cursor;return e(),C.cursor=r,o(),C.limit_backward=r,C.cursor=C.limit,d(),C.cursor=C.limit_backward,s(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return n.setCurrent(e),n.stem(),n.getCurrent()}):(n.setCurrent(e),n.stem(),n.getCurrent())}}(),e.Pipeline.registerFunction(e.du.stemmer,"stemmer-du"),e.du.stopWordFilter=e.generateStopWordFilter(" aan al alles als altijd andere ben bij daar dan dat de der deze die dit doch doen door dus een eens en er ge geen geweest haar had heb hebben heeft hem het hier hij hoe hun iemand iets ik in is ja je kan kon kunnen maar me meer men met mij mijn moet na naar niet niets nog nu of om omdat onder ons ook op over reeds te tegen toch toen tot u uit uw van veel voor want waren was wat werd wezen wie wil worden wordt zal ze zelf zich zij zijn zo zonder zou".split(" ")),e.Pipeline.registerFunction(e.du.stopWordFilter,"stopWordFilter-du")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.es.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Spanish` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,s){"function"==typeof define&&define.amd?define(s):"object"==typeof exports?module.exports=s():s()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.es=function(){this.pipeline.reset(),this.pipeline.add(e.es.trimmer,e.es.stopWordFilter,e.es.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.es.stemmer))},e.es.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.es.trimmer=e.trimmerSupport.generateTrimmer(e.es.wordCharacters),e.Pipeline.registerFunction(e.es.trimmer,"trimmer-es"),e.es.stemmer=function(){var s=e.stemmerSupport.Among,r=e.stemmerSupport.SnowballProgram,n=new function(){function e(){if(A.out_grouping(x,97,252)){for(;!A.in_grouping(x,97,252);){if(A.cursor>=A.limit)return!0;A.cursor++}return!1}return!0}function n(){if(A.in_grouping(x,97,252)){var s=A.cursor;if(e()){if(A.cursor=s,!A.in_grouping(x,97,252))return!0;for(;!A.out_grouping(x,97,252);){if(A.cursor>=A.limit)return!0;A.cursor++}}return!1}return!0}function i(){var s,r=A.cursor;if(n()){if(A.cursor=r,!A.out_grouping(x,97,252))return;if(s=A.cursor,e()){if(A.cursor=s,!A.in_grouping(x,97,252)||A.cursor>=A.limit)return;A.cursor++}}g=A.cursor}function a(){for(;!A.in_grouping(x,97,252);){if(A.cursor>=A.limit)return!1;A.cursor++}for(;!A.out_grouping(x,97,252);){if(A.cursor>=A.limit)return!1;A.cursor++}return!0}function t(){var e=A.cursor;g=A.limit,p=g,v=g,i(),A.cursor=e,a()&&(p=A.cursor,a()&&(v=A.cursor))}function o(){for(var e;;){if(A.bra=A.cursor,e=A.find_among(k,6))switch(A.ket=A.cursor,e){case 1:A.slice_from("a");continue;case 2:A.slice_from("e");continue;case 3:A.slice_from("i");continue;case 4:A.slice_from("o");continue;case 5:A.slice_from("u");continue;case 6:if(A.cursor>=A.limit)break;A.cursor++;continue}break}}function u(){return g<=A.cursor}function w(){return p<=A.cursor}function c(){return v<=A.cursor}function m(){var e;if(A.ket=A.cursor,A.find_among_b(y,13)&&(A.bra=A.cursor,(e=A.find_among_b(q,11))&&u()))switch(e){case 1:A.bra=A.cursor,A.slice_from("iendo");break;case 2:A.bra=A.cursor,A.slice_from("ando");break;case 3:A.bra=A.cursor,A.slice_from("ar");break;case 4:A.bra=A.cursor,A.slice_from("er");break;case 5:A.bra=A.cursor,A.slice_from("ir");break;case 6:A.slice_del();break;case 7:A.eq_s_b(1,"u")&&A.slice_del()}}function l(e,s){if(!c())return!0;A.slice_del(),A.ket=A.cursor;var r=A.find_among_b(e,s);return r&&(A.bra=A.cursor,1==r&&c()&&A.slice_del()),!1}function d(e){return!c()||(A.slice_del(),A.ket=A.cursor,A.eq_s_b(2,e)&&(A.bra=A.cursor,c()&&A.slice_del()),!1)}function b(){var e;if(A.ket=A.cursor,e=A.find_among_b(S,46)){switch(A.bra=A.cursor,e){case 1:if(!c())return!1;A.slice_del();break;case 2:if(d("ic"))return!1;break;case 3:if(!c())return!1;A.slice_from("log");break;case 4:if(!c())return!1;A.slice_from("u");break;case 5:if(!c())return!1;A.slice_from("ente");break;case 6:if(!w())return!1;A.slice_del(),A.ket=A.cursor,e=A.find_among_b(C,4),e&&(A.bra=A.cursor,c()&&(A.slice_del(),1==e&&(A.ket=A.cursor,A.eq_s_b(2,"at")&&(A.bra=A.cursor,c()&&A.slice_del()))));break;case 7:if(l(P,3))return!1;break;case 8:if(l(F,3))return!1;break;case 9:if(d("at"))return!1}return!0}return!1}function f(){var e,s;if(A.cursor>=g&&(s=A.limit_backward,A.limit_backward=g,A.ket=A.cursor,e=A.find_among_b(W,12),A.limit_backward=s,e)){if(A.bra=A.cursor,1==e){if(!A.eq_s_b(1,"u"))return!1;A.slice_del()}return!0}return!1}function _(){var e,s,r,n;if(A.cursor>=g&&(s=A.limit_backward,A.limit_backward=g,A.ket=A.cursor,e=A.find_among_b(L,96),A.limit_backward=s,e))switch(A.bra=A.cursor,e){case 1:r=A.limit-A.cursor,A.eq_s_b(1,"u")?(n=A.limit-A.cursor,A.eq_s_b(1,"g")?A.cursor=A.limit-n:A.cursor=A.limit-r):A.cursor=A.limit-r,A.bra=A.cursor;case 2:A.slice_del()}}function h(){var e,s;if(A.ket=A.cursor,e=A.find_among_b(z,8))switch(A.bra=A.cursor,e){case 1:u()&&A.slice_del();break;case 2:u()&&(A.slice_del(),A.ket=A.cursor,A.eq_s_b(1,"u")&&(A.bra=A.cursor,s=A.limit-A.cursor,A.eq_s_b(1,"g")&&(A.cursor=A.limit-s,u()&&A.slice_del())))}}var v,p,g,k=[new s("",-1,6),new s("á",0,1),new s("é",0,2),new s("í",0,3),new s("ó",0,4),new s("ú",0,5)],y=[new s("la",-1,-1),new s("sela",0,-1),new s("le",-1,-1),new s("me",-1,-1),new s("se",-1,-1),new s("lo",-1,-1),new s("selo",5,-1),new s("las",-1,-1),new s("selas",7,-1),new s("les",-1,-1),new s("los",-1,-1),new s("selos",10,-1),new s("nos",-1,-1)],q=[new s("ando",-1,6),new s("iendo",-1,6),new s("yendo",-1,7),new s("ándo",-1,2),new s("iéndo",-1,1),new s("ar",-1,6),new s("er",-1,6),new s("ir",-1,6),new s("ár",-1,3),new s("ér",-1,4),new s("ír",-1,5)],C=[new s("ic",-1,-1),new s("ad",-1,-1),new s("os",-1,-1),new s("iv",-1,1)],P=[new s("able",-1,1),new s("ible",-1,1),new s("ante",-1,1)],F=[new s("ic",-1,1),new s("abil",-1,1),new s("iv",-1,1)],S=[new s("ica",-1,1),new s("ancia",-1,2),new s("encia",-1,5),new s("adora",-1,2),new s("osa",-1,1),new s("ista",-1,1),new s("iva",-1,9),new s("anza",-1,1),new s("logía",-1,3),new s("idad",-1,8),new s("able",-1,1),new s("ible",-1,1),new s("ante",-1,2),new s("mente",-1,7),new s("amente",13,6),new s("ación",-1,2),new s("ución",-1,4),new s("ico",-1,1),new s("ismo",-1,1),new s("oso",-1,1),new s("amiento",-1,1),new s("imiento",-1,1),new s("ivo",-1,9),new s("ador",-1,2),new s("icas",-1,1),new s("ancias",-1,2),new s("encias",-1,5),new s("adoras",-1,2),new s("osas",-1,1),new s("istas",-1,1),new s("ivas",-1,9),new s("anzas",-1,1),new s("logías",-1,3),new s("idades",-1,8),new s("ables",-1,1),new s("ibles",-1,1),new s("aciones",-1,2),new s("uciones",-1,4),new s("adores",-1,2),new s("antes",-1,2),new s("icos",-1,1),new s("ismos",-1,1),new s("osos",-1,1),new s("amientos",-1,1),new s("imientos",-1,1),new s("ivos",-1,9)],W=[new s("ya",-1,1),new s("ye",-1,1),new s("yan",-1,1),new s("yen",-1,1),new s("yeron",-1,1),new s("yendo",-1,1),new s("yo",-1,1),new s("yas",-1,1),new s("yes",-1,1),new s("yais",-1,1),new s("yamos",-1,1),new s("yó",-1,1)],L=[new s("aba",-1,2),new s("ada",-1,2),new s("ida",-1,2),new s("ara",-1,2),new s("iera",-1,2),new s("ía",-1,2),new s("aría",5,2),new s("ería",5,2),new s("iría",5,2),new s("ad",-1,2),new s("ed",-1,2),new s("id",-1,2),new s("ase",-1,2),new s("iese",-1,2),new s("aste",-1,2),new s("iste",-1,2),new s("an",-1,2),new s("aban",16,2),new s("aran",16,2),new s("ieran",16,2),new s("ían",16,2),new s("arían",20,2),new s("erían",20,2),new s("irían",20,2),new s("en",-1,1),new s("asen",24,2),new s("iesen",24,2),new s("aron",-1,2),new s("ieron",-1,2),new s("arán",-1,2),new s("erán",-1,2),new s("irán",-1,2),new s("ado",-1,2),new s("ido",-1,2),new s("ando",-1,2),new s("iendo",-1,2),new s("ar",-1,2),new s("er",-1,2),new s("ir",-1,2),new s("as",-1,2),new s("abas",39,2),new s("adas",39,2),new s("idas",39,2),new s("aras",39,2),new s("ieras",39,2),new s("ías",39,2),new s("arías",45,2),new s("erías",45,2),new s("irías",45,2),new s("es",-1,1),new s("ases",49,2),new s("ieses",49,2),new s("abais",-1,2),new s("arais",-1,2),new s("ierais",-1,2),new s("íais",-1,2),new s("aríais",55,2),new s("eríais",55,2),new s("iríais",55,2),new s("aseis",-1,2),new s("ieseis",-1,2),new s("asteis",-1,2),new s("isteis",-1,2),new s("áis",-1,2),new s("éis",-1,1),new s("aréis",64,2),new s("eréis",64,2),new s("iréis",64,2),new s("ados",-1,2),new s("idos",-1,2),new s("amos",-1,2),new s("ábamos",70,2),new s("áramos",70,2),new s("iéramos",70,2),new s("íamos",70,2),new s("aríamos",74,2),new s("eríamos",74,2),new s("iríamos",74,2),new s("emos",-1,1),new s("aremos",78,2),new s("eremos",78,2),new s("iremos",78,2),new s("ásemos",78,2),new s("iésemos",78,2),new s("imos",-1,2),new s("arás",-1,2),new s("erás",-1,2),new s("irás",-1,2),new s("ís",-1,2),new s("ará",-1,2),new s("erá",-1,2),new s("irá",-1,2),new s("aré",-1,2),new s("eré",-1,2),new s("iré",-1,2),new s("ió",-1,2)],z=[new s("a",-1,1),new s("e",-1,2),new s("o",-1,1),new s("os",-1,1),new s("á",-1,1),new s("é",-1,2),new s("í",-1,1),new s("ó",-1,1)],x=[17,65,16,0,0,0,0,0,0,0,0,0,0,0,0,0,1,17,4,10],A=new r;this.setCurrent=function(e){A.setCurrent(e)},this.getCurrent=function(){return A.getCurrent()},this.stem=function(){var e=A.cursor;return t(),A.limit_backward=e,A.cursor=A.limit,m(),A.cursor=A.limit,b()||(A.cursor=A.limit,f()||(A.cursor=A.limit,_())),A.cursor=A.limit,h(),A.cursor=A.limit_backward,o(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return n.setCurrent(e),n.stem(),n.getCurrent()}):(n.setCurrent(e),n.stem(),n.getCurrent())}}(),e.Pipeline.registerFunction(e.es.stemmer,"stemmer-es"),e.es.stopWordFilter=e.generateStopWordFilter("a al algo algunas algunos ante antes como con contra cual cuando de del desde donde durante e el ella ellas ellos en entre era erais eran eras eres es esa esas ese eso esos esta estaba estabais estaban estabas estad estada estadas estado estados estamos estando estar estaremos estará estarán estarás estaré estaréis estaría estaríais estaríamos estarían estarías estas este estemos esto estos estoy estuve estuviera estuvierais estuvieran estuvieras estuvieron estuviese estuvieseis estuviesen estuvieses estuvimos estuviste estuvisteis estuviéramos estuviésemos estuvo está estábamos estáis están estás esté estéis estén estés fue fuera fuerais fueran fueras fueron fuese fueseis fuesen fueses fui fuimos fuiste fuisteis fuéramos fuésemos ha habida habidas habido habidos habiendo habremos habrá habrán habrás habré habréis habría habríais habríamos habrían habrías habéis había habíais habíamos habían habías han has hasta hay haya hayamos hayan hayas hayáis he hemos hube hubiera hubierais hubieran hubieras hubieron hubiese hubieseis hubiesen hubieses hubimos hubiste hubisteis hubiéramos hubiésemos hubo la las le les lo los me mi mis mucho muchos muy más mí mía mías mío míos nada ni no nos nosotras nosotros nuestra nuestras nuestro nuestros o os otra otras otro otros para pero poco por porque que quien quienes qué se sea seamos sean seas seremos será serán serás seré seréis sería seríais seríamos serían serías seáis sido siendo sin sobre sois somos son soy su sus suya suyas suyo suyos sí también tanto te tendremos tendrá tendrán tendrás tendré tendréis tendría tendríais tendríamos tendrían tendrías tened tenemos tenga tengamos tengan tengas tengo tengáis tenida tenidas tenido tenidos teniendo tenéis tenía teníais teníamos tenían tenías ti tiene tienen tienes todo todos tu tus tuve tuviera tuvierais tuvieran tuvieras tuvieron tuviese tuvieseis tuviesen tuvieses tuvimos tuviste tuvisteis tuviéramos tuviésemos tuvo tuya tuyas tuyo tuyos tú un una uno unos vosotras vosotros vuestra vuestras vuestro vuestros y ya yo él éramos".split(" ")),e.Pipeline.registerFunction(e.es.stopWordFilter,"stopWordFilter-es")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.fi.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Finnish` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(i,e){"function"==typeof define&&define.amd?define(e):"object"==typeof exports?module.exports=e():e()(i.lunr)}(this,function(){return function(i){if(void 0===i)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===i.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");i.fi=function(){this.pipeline.reset(),this.pipeline.add(i.fi.trimmer,i.fi.stopWordFilter,i.fi.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(i.fi.stemmer))},i.fi.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",i.fi.trimmer=i.trimmerSupport.generateTrimmer(i.fi.wordCharacters),i.Pipeline.registerFunction(i.fi.trimmer,"trimmer-fi"),i.fi.stemmer=function(){var e=i.stemmerSupport.Among,r=i.stemmerSupport.SnowballProgram,n=new function(){function i(){f=A.limit,d=f,n()||(f=A.cursor,n()||(d=A.cursor))}function n(){for(var i;;){if(i=A.cursor,A.in_grouping(W,97,246))break;if(A.cursor=i,i>=A.limit)return!0;A.cursor++}for(A.cursor=i;!A.out_grouping(W,97,246);){if(A.cursor>=A.limit)return!0;A.cursor++}return!1}function t(){return d<=A.cursor}function s(){var i,e;if(A.cursor>=f)if(e=A.limit_backward,A.limit_backward=f,A.ket=A.cursor,i=A.find_among_b(h,10)){switch(A.bra=A.cursor,A.limit_backward=e,i){case 1:if(!A.in_grouping_b(x,97,246))return;break;case 2:if(!t())return}A.slice_del()}else A.limit_backward=e}function o(){var i,e,r;if(A.cursor>=f)if(e=A.limit_backward,A.limit_backward=f,A.ket=A.cursor,i=A.find_among_b(v,9))switch(A.bra=A.cursor,A.limit_backward=e,i){case 1:r=A.limit-A.cursor,A.eq_s_b(1,"k")||(A.cursor=A.limit-r,A.slice_del());break;case 2:A.slice_del(),A.ket=A.cursor,A.eq_s_b(3,"kse")&&(A.bra=A.cursor,A.slice_from("ksi"));break;case 3:A.slice_del();break;case 4:A.find_among_b(p,6)&&A.slice_del();break;case 5:A.find_among_b(g,6)&&A.slice_del();break;case 6:A.find_among_b(j,2)&&A.slice_del()}else A.limit_backward=e}function l(){return A.find_among_b(q,7)}function a(){return A.eq_s_b(1,"i")&&A.in_grouping_b(L,97,246)}function u(){var i,e,r;if(A.cursor>=f)if(e=A.limit_backward,A.limit_backward=f,A.ket=A.cursor,i=A.find_among_b(C,30)){switch(A.bra=A.cursor,A.limit_backward=e,i){case 1:if(!A.eq_s_b(1,"a"))return;break;case 2:case 9:if(!A.eq_s_b(1,"e"))return;break;case 3:if(!A.eq_s_b(1,"i"))return;break;case 4:if(!A.eq_s_b(1,"o"))return;break;case 5:if(!A.eq_s_b(1,"ä"))return;break;case 6:if(!A.eq_s_b(1,"ö"))return;break;case 7:if(r=A.limit-A.cursor,!l()&&(A.cursor=A.limit-r,!A.eq_s_b(2,"ie"))){A.cursor=A.limit-r;break}if(A.cursor=A.limit-r,A.cursor<=A.limit_backward){A.cursor=A.limit-r;break}A.cursor--,A.bra=A.cursor;break;case 8:if(!A.in_grouping_b(W,97,246)||!A.out_grouping_b(W,97,246))return}A.slice_del(),k=!0}else A.limit_backward=e}function c(){var i,e,r;if(A.cursor>=d)if(e=A.limit_backward,A.limit_backward=d,A.ket=A.cursor,i=A.find_among_b(P,14)){if(A.bra=A.cursor,A.limit_backward=e,1==i){if(r=A.limit-A.cursor,A.eq_s_b(2,"po"))return;A.cursor=A.limit-r}A.slice_del()}else A.limit_backward=e}function m(){var i;A.cursor>=f&&(i=A.limit_backward,A.limit_backward=f,A.ket=A.cursor,A.find_among_b(F,2)?(A.bra=A.cursor,A.limit_backward=i,A.slice_del()):A.limit_backward=i)}function w(){var i,e,r,n,t,s;if(A.cursor>=f){if(e=A.limit_backward,A.limit_backward=f,A.ket=A.cursor,A.eq_s_b(1,"t")&&(A.bra=A.cursor,r=A.limit-A.cursor,A.in_grouping_b(W,97,246)&&(A.cursor=A.limit-r,A.slice_del(),A.limit_backward=e,n=A.limit-A.cursor,A.cursor>=d&&(A.cursor=d,t=A.limit_backward,A.limit_backward=A.cursor,A.cursor=A.limit-n,A.ket=A.cursor,i=A.find_among_b(S,2))))){if(A.bra=A.cursor,A.limit_backward=t,1==i){if(s=A.limit-A.cursor,A.eq_s_b(2,"po"))return;A.cursor=A.limit-s}return void A.slice_del()}A.limit_backward=e}}function _(){var i,e,r,n;if(A.cursor>=f){for(i=A.limit_backward,A.limit_backward=f,e=A.limit-A.cursor,l()&&(A.cursor=A.limit-e,A.ket=A.cursor,A.cursor>A.limit_backward&&(A.cursor--,A.bra=A.cursor,A.slice_del())),A.cursor=A.limit-e,A.ket=A.cursor,A.in_grouping_b(y,97,228)&&(A.bra=A.cursor,A.out_grouping_b(W,97,246)&&A.slice_del()),A.cursor=A.limit-e,A.ket=A.cursor,A.eq_s_b(1,"j")&&(A.bra=A.cursor,r=A.limit-A.cursor,A.eq_s_b(1,"o")?A.slice_del():(A.cursor=A.limit-r,A.eq_s_b(1,"u")&&A.slice_del())),A.cursor=A.limit-e,A.ket=A.cursor,A.eq_s_b(1,"o")&&(A.bra=A.cursor,A.eq_s_b(1,"j")&&A.slice_del()),A.cursor=A.limit-e,A.limit_backward=i;;){if(n=A.limit-A.cursor,A.out_grouping_b(W,97,246)){A.cursor=A.limit-n;break}if(A.cursor=A.limit-n,A.cursor<=A.limit_backward)return;A.cursor--}A.ket=A.cursor,A.cursor>A.limit_backward&&(A.cursor--,A.bra=A.cursor,b=A.slice_to(),A.eq_v_b(b)&&A.slice_del())}}var k,b,d,f,h=[new e("pa",-1,1),new e("sti",-1,2),new e("kaan",-1,1),new e("han",-1,1),new e("kin",-1,1),new e("hän",-1,1),new e("kään",-1,1),new e("ko",-1,1),new e("pä",-1,1),new e("kö",-1,1)],p=[new e("lla",-1,-1),new e("na",-1,-1),new e("ssa",-1,-1),new e("ta",-1,-1),new e("lta",3,-1),new e("sta",3,-1)],g=[new e("llä",-1,-1),new e("nä",-1,-1),new e("ssä",-1,-1),new e("tä",-1,-1),new e("ltä",3,-1),new e("stä",3,-1)],j=[new e("lle",-1,-1),new e("ine",-1,-1)],v=[new e("nsa",-1,3),new e("mme",-1,3),new e("nne",-1,3),new e("ni",-1,2),new e("si",-1,1),new e("an",-1,4),new e("en",-1,6),new e("än",-1,5),new e("nsä",-1,3)],q=[new e("aa",-1,-1),new e("ee",-1,-1),new e("ii",-1,-1),new e("oo",-1,-1),new e("uu",-1,-1),new e("ää",-1,-1),new e("öö",-1,-1)],C=[new e("a",-1,8),new e("lla",0,-1),new e("na",0,-1),new e("ssa",0,-1),new e("ta",0,-1),new e("lta",4,-1),new e("sta",4,-1),new e("tta",4,9),new e("lle",-1,-1),new e("ine",-1,-1),new e("ksi",-1,-1),new e("n",-1,7),new e("han",11,1),new e("den",11,-1,a),new e("seen",11,-1,l),new e("hen",11,2),new e("tten",11,-1,a),new e("hin",11,3),new e("siin",11,-1,a),new e("hon",11,4),new e("hän",11,5),new e("hön",11,6),new e("ä",-1,8),new e("llä",22,-1),new e("nä",22,-1),new e("ssä",22,-1),new e("tä",22,-1),new e("ltä",26,-1),new e("stä",26,-1),new e("ttä",26,9)],P=[new e("eja",-1,-1),new e("mma",-1,1),new e("imma",1,-1),new e("mpa",-1,1),new e("impa",3,-1),new e("mmi",-1,1),new e("immi",5,-1),new e("mpi",-1,1),new e("impi",7,-1),new e("ejä",-1,-1),new e("mmä",-1,1),new e("immä",10,-1),new e("mpä",-1,1),new e("impä",12,-1)],F=[new e("i",-1,-1),new e("j",-1,-1)],S=[new e("mma",-1,1),new e("imma",0,-1)],y=[17,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8],W=[17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,8,0,32],L=[17,65,16,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,32],x=[17,97,24,1,0,0,0,0,0,0,0,0,0,0,0,0,8,0,32],A=new r;this.setCurrent=function(i){A.setCurrent(i)},this.getCurrent=function(){return A.getCurrent()},this.stem=function(){var e=A.cursor;return i(),k=!1,A.limit_backward=e,A.cursor=A.limit,s(),A.cursor=A.limit,o(),A.cursor=A.limit,u(),A.cursor=A.limit,c(),A.cursor=A.limit,k?(m(),A.cursor=A.limit):(A.cursor=A.limit,w(),A.cursor=A.limit),_(),!0}};return function(i){return"function"==typeof i.update?i.update(function(i){return n.setCurrent(i),n.stem(),n.getCurrent()}):(n.setCurrent(i),n.stem(),n.getCurrent())}}(),i.Pipeline.registerFunction(i.fi.stemmer,"stemmer-fi"),i.fi.stopWordFilter=i.generateStopWordFilter("ei eivät emme en et ette että he heidän heidät heihin heille heillä heiltä heissä heistä heitä hän häneen hänelle hänellä häneltä hänen hänessä hänestä hänet häntä itse ja johon joiden joihin joiksi joilla joille joilta joina joissa joista joita joka joksi jolla jolle jolta jona jonka jos jossa josta jota jotka kanssa keiden keihin keiksi keille keillä keiltä keinä keissä keistä keitä keneen keneksi kenelle kenellä keneltä kenen kenenä kenessä kenestä kenet ketkä ketkä ketä koska kuin kuka kun me meidän meidät meihin meille meillä meiltä meissä meistä meitä mihin miksi mikä mille millä miltä minkä minkä minua minulla minulle minulta minun minussa minusta minut minuun minä minä missä mistä mitkä mitä mukaan mutta ne niiden niihin niiksi niille niillä niiltä niin niin niinä niissä niistä niitä noiden noihin noiksi noilla noille noilta noin noina noissa noista noita nuo nyt näiden näihin näiksi näille näillä näiltä näinä näissä näistä näitä nämä ole olemme olen olet olette oli olimme olin olisi olisimme olisin olisit olisitte olisivat olit olitte olivat olla olleet ollut on ovat poikki se sekä sen siihen siinä siitä siksi sille sillä sillä siltä sinua sinulla sinulle sinulta sinun sinussa sinusta sinut sinuun sinä sinä sitä tai te teidän teidät teihin teille teillä teiltä teissä teistä teitä tuo tuohon tuoksi tuolla tuolle tuolta tuon tuona tuossa tuosta tuota tähän täksi tälle tällä tältä tämä tämän tänä tässä tästä tätä vaan vai vaikka yli".split(" ")),i.Pipeline.registerFunction(i.fi.stopWordFilter,"stopWordFilter-fi")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.fr.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `French` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.fr=function(){this.pipeline.reset(),this.pipeline.add(e.fr.trimmer,e.fr.stopWordFilter,e.fr.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.fr.stemmer))},e.fr.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.fr.trimmer=e.trimmerSupport.generateTrimmer(e.fr.wordCharacters),e.Pipeline.registerFunction(e.fr.trimmer,"trimmer-fr"),e.fr.stemmer=function(){var r=e.stemmerSupport.Among,s=e.stemmerSupport.SnowballProgram,i=new function(){function e(e,r,s){return!(!W.eq_s(1,e)||(W.ket=W.cursor,!W.in_grouping(F,97,251)))&&(W.slice_from(r),W.cursor=s,!0)}function i(e,r,s){return!!W.eq_s(1,e)&&(W.ket=W.cursor,W.slice_from(r),W.cursor=s,!0)}function n(){for(var r,s;;){if(r=W.cursor,W.in_grouping(F,97,251)){if(W.bra=W.cursor,s=W.cursor,e("u","U",r))continue;if(W.cursor=s,e("i","I",r))continue;if(W.cursor=s,i("y","Y",r))continue}if(W.cursor=r,W.bra=r,!e("y","Y",r)){if(W.cursor=r,W.eq_s(1,"q")&&(W.bra=W.cursor,i("u","U",r)))continue;if(W.cursor=r,r>=W.limit)return;W.cursor++}}}function t(){for(;!W.in_grouping(F,97,251);){if(W.cursor>=W.limit)return!0;W.cursor++}for(;!W.out_grouping(F,97,251);){if(W.cursor>=W.limit)return!0;W.cursor++}return!1}function u(){var e=W.cursor;if(q=W.limit,g=q,p=q,W.in_grouping(F,97,251)&&W.in_grouping(F,97,251)&&W.cursor<W.limit)W.cursor++;else if(W.cursor=e,!W.find_among(v,3)){W.cursor=e;do{if(W.cursor>=W.limit){W.cursor=q;break}W.cursor++}while(!W.in_grouping(F,97,251))}q=W.cursor,W.cursor=e,t()||(g=W.cursor,t()||(p=W.cursor))}function o(){for(var e,r;;){if(r=W.cursor,W.bra=r,!(e=W.find_among(h,4)))break;switch(W.ket=W.cursor,e){case 1:W.slice_from("i");break;case 2:W.slice_from("u");break;case 3:W.slice_from("y");break;case 4:if(W.cursor>=W.limit)return;W.cursor++}}}function c(){return q<=W.cursor}function a(){return g<=W.cursor}function l(){return p<=W.cursor}function w(){var e,r;if(W.ket=W.cursor,e=W.find_among_b(C,43)){switch(W.bra=W.cursor,e){case 1:if(!l())return!1;W.slice_del();break;case 2:if(!l())return!1;W.slice_del(),W.ket=W.cursor,W.eq_s_b(2,"ic")&&(W.bra=W.cursor,l()?W.slice_del():W.slice_from("iqU"));break;case 3:if(!l())return!1;W.slice_from("log");break;case 4:if(!l())return!1;W.slice_from("u");break;case 5:if(!l())return!1;W.slice_from("ent");break;case 6:if(!c())return!1;if(W.slice_del(),W.ket=W.cursor,e=W.find_among_b(z,6))switch(W.bra=W.cursor,e){case 1:l()&&(W.slice_del(),W.ket=W.cursor,W.eq_s_b(2,"at")&&(W.bra=W.cursor,l()&&W.slice_del()));break;case 2:l()?W.slice_del():a()&&W.slice_from("eux");break;case 3:l()&&W.slice_del();break;case 4:c()&&W.slice_from("i")}break;case 7:if(!l())return!1;if(W.slice_del(),W.ket=W.cursor,e=W.find_among_b(y,3))switch(W.bra=W.cursor,e){case 1:l()?W.slice_del():W.slice_from("abl");break;case 2:l()?W.slice_del():W.slice_from("iqU");break;case 3:l()&&W.slice_del()}break;case 8:if(!l())return!1;if(W.slice_del(),W.ket=W.cursor,W.eq_s_b(2,"at")&&(W.bra=W.cursor,l()&&(W.slice_del(),W.ket=W.cursor,W.eq_s_b(2,"ic")))){W.bra=W.cursor,l()?W.slice_del():W.slice_from("iqU");break}break;case 9:W.slice_from("eau");break;case 10:if(!a())return!1;W.slice_from("al");break;case 11:if(l())W.slice_del();else{if(!a())return!1;W.slice_from("eux")}break;case 12:if(!a()||!W.out_grouping_b(F,97,251))return!1;W.slice_del();break;case 13:return c()&&W.slice_from("ant"),!1;case 14:return c()&&W.slice_from("ent"),!1;case 15:return r=W.limit-W.cursor,W.in_grouping_b(F,97,251)&&c()&&(W.cursor=W.limit-r,W.slice_del()),!1}return!0}return!1}function f(){var e,r;if(W.cursor<q)return!1;if(r=W.limit_backward,W.limit_backward=q,W.ket=W.cursor,!(e=W.find_among_b(x,35)))return W.limit_backward=r,!1;if(W.bra=W.cursor,1==e){if(!W.out_grouping_b(F,97,251))return W.limit_backward=r,!1;W.slice_del()}return W.limit_backward=r,!0}function m(){var e,r,s;if(W.cursor<q)return!1;if(r=W.limit_backward,W.limit_backward=q,W.ket=W.cursor,!(e=W.find_among_b(I,38)))return W.limit_backward=r,!1;switch(W.bra=W.cursor,e){case 1:if(!l())return W.limit_backward=r,!1;W.slice_del();break;case 2:W.slice_del();break;case 3:W.slice_del(),s=W.limit-W.cursor,W.ket=W.cursor,W.eq_s_b(1,"e")?(W.bra=W.cursor,W.slice_del()):W.cursor=W.limit-s}return W.limit_backward=r,!0}function _(){var e,r,s,i,n=W.limit-W.cursor;if(W.ket=W.cursor,W.eq_s_b(1,"s")?(W.bra=W.cursor,r=W.limit-W.cursor,W.out_grouping_b(S,97,232)?(W.cursor=W.limit-r,W.slice_del()):W.cursor=W.limit-n):W.cursor=W.limit-n,W.cursor>=q){if(s=W.limit_backward,W.limit_backward=q,W.ket=W.cursor,e=W.find_among_b(P,7))switch(W.bra=W.cursor,e){case 1:if(l()){if(i=W.limit-W.cursor,!W.eq_s_b(1,"s")&&(W.cursor=W.limit-i,!W.eq_s_b(1,"t")))break;W.slice_del()}break;case 2:W.slice_from("i");break;case 3:W.slice_del();break;case 4:W.eq_s_b(2,"gu")&&W.slice_del()}W.limit_backward=s}}function b(){var e=W.limit-W.cursor;W.find_among_b(U,5)&&(W.cursor=W.limit-e,W.ket=W.cursor,W.cursor>W.limit_backward&&(W.cursor--,W.bra=W.cursor,W.slice_del()))}function d(){for(var e,r=1;W.out_grouping_b(F,97,251);)r--;if(r<=0){if(W.ket=W.cursor,e=W.limit-W.cursor,!W.eq_s_b(1,"é")&&(W.cursor=W.limit-e,!W.eq_s_b(1,"è")))return;W.bra=W.cursor,W.slice_from("e")}}function k(){if(!w()&&(W.cursor=W.limit,!f()&&(W.cursor=W.limit,!m())))return W.cursor=W.limit,void _();W.cursor=W.limit,W.ket=W.cursor,W.eq_s_b(1,"Y")?(W.bra=W.cursor,W.slice_from("i")):(W.cursor=W.limit,W.eq_s_b(1,"ç")&&(W.bra=W.cursor,W.slice_from("c")))}var p,g,q,v=[new r("col",-1,-1),new r("par",-1,-1),new r("tap",-1,-1)],h=[new r("",-1,4),new r("I",0,1),new r("U",0,2),new r("Y",0,3)],z=[new r("iqU",-1,3),new r("abl",-1,3),new r("Ièr",-1,4),new r("ièr",-1,4),new r("eus",-1,2),new r("iv",-1,1)],y=[new r("ic",-1,2),new r("abil",-1,1),new r("iv",-1,3)],C=[new r("iqUe",-1,1),new r("atrice",-1,2),new r("ance",-1,1),new r("ence",-1,5),new r("logie",-1,3),new r("able",-1,1),new r("isme",-1,1),new r("euse",-1,11),new r("iste",-1,1),new r("ive",-1,8),new r("if",-1,8),new r("usion",-1,4),new r("ation",-1,2),new r("ution",-1,4),new r("ateur",-1,2),new r("iqUes",-1,1),new r("atrices",-1,2),new r("ances",-1,1),new r("ences",-1,5),new r("logies",-1,3),new r("ables",-1,1),new r("ismes",-1,1),new r("euses",-1,11),new r("istes",-1,1),new r("ives",-1,8),new r("ifs",-1,8),new r("usions",-1,4),new r("ations",-1,2),new r("utions",-1,4),new r("ateurs",-1,2),new r("ments",-1,15),new r("ements",30,6),new r("issements",31,12),new r("ités",-1,7),new r("ment",-1,15),new r("ement",34,6),new r("issement",35,12),new r("amment",34,13),new r("emment",34,14),new r("aux",-1,10),new r("eaux",39,9),new r("eux",-1,1),new r("ité",-1,7)],x=[new r("ira",-1,1),new r("ie",-1,1),new r("isse",-1,1),new r("issante",-1,1),new r("i",-1,1),new r("irai",4,1),new r("ir",-1,1),new r("iras",-1,1),new r("ies",-1,1),new r("îmes",-1,1),new r("isses",-1,1),new r("issantes",-1,1),new r("îtes",-1,1),new r("is",-1,1),new r("irais",13,1),new r("issais",13,1),new r("irions",-1,1),new r("issions",-1,1),new r("irons",-1,1),new r("issons",-1,1),new r("issants",-1,1),new r("it",-1,1),new r("irait",21,1),new r("issait",21,1),new r("issant",-1,1),new r("iraIent",-1,1),new r("issaIent",-1,1),new r("irent",-1,1),new r("issent",-1,1),new r("iront",-1,1),new r("ît",-1,1),new r("iriez",-1,1),new r("issiez",-1,1),new r("irez",-1,1),new r("issez",-1,1)],I=[new r("a",-1,3),new r("era",0,2),new r("asse",-1,3),new r("ante",-1,3),new r("ée",-1,2),new r("ai",-1,3),new r("erai",5,2),new r("er",-1,2),new r("as",-1,3),new r("eras",8,2),new r("âmes",-1,3),new r("asses",-1,3),new r("antes",-1,3),new r("âtes",-1,3),new r("ées",-1,2),new r("ais",-1,3),new r("erais",15,2),new r("ions",-1,1),new r("erions",17,2),new r("assions",17,3),new r("erons",-1,2),new r("ants",-1,3),new r("és",-1,2),new r("ait",-1,3),new r("erait",23,2),new r("ant",-1,3),new r("aIent",-1,3),new r("eraIent",26,2),new r("èrent",-1,2),new r("assent",-1,3),new r("eront",-1,2),new r("ât",-1,3),new r("ez",-1,2),new r("iez",32,2),new r("eriez",33,2),new r("assiez",33,3),new r("erez",32,2),new r("é",-1,2)],P=[new r("e",-1,3),new r("Ière",0,2),new r("ière",0,2),new r("ion",-1,1),new r("Ier",-1,2),new r("ier",-1,2),new r("ë",-1,4)],U=[new r("ell",-1,-1),new r("eill",-1,-1),new r("enn",-1,-1),new r("onn",-1,-1),new r("ett",-1,-1)],F=[17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,128,130,103,8,5],S=[1,65,20,0,0,0,0,0,0,0,0,0,0,0,0,0,128],W=new s;this.setCurrent=function(e){W.setCurrent(e)},this.getCurrent=function(){return W.getCurrent()},this.stem=function(){var e=W.cursor;return n(),W.cursor=e,u(),W.limit_backward=e,W.cursor=W.limit,k(),W.cursor=W.limit,b(),W.cursor=W.limit,d(),W.cursor=W.limit_backward,o(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return i.setCurrent(e),i.stem(),i.getCurrent()}):(i.setCurrent(e),i.stem(),i.getCurrent())}}(),e.Pipeline.registerFunction(e.fr.stemmer,"stemmer-fr"),e.fr.stopWordFilter=e.generateStopWordFilter("ai aie aient aies ait as au aura aurai auraient aurais aurait auras aurez auriez aurions aurons auront aux avaient avais avait avec avez aviez avions avons ayant ayez ayons c ce ceci celà ces cet cette d dans de des du elle en es est et eu eue eues eurent eus eusse eussent eusses eussiez eussions eut eux eûmes eût eûtes furent fus fusse fussent fusses fussiez fussions fut fûmes fût fûtes ici il ils j je l la le les leur leurs lui m ma mais me mes moi mon même n ne nos notre nous on ont ou par pas pour qu que quel quelle quelles quels qui s sa sans se sera serai seraient serais serait seras serez seriez serions serons seront ses soi soient sois soit sommes son sont soyez soyons suis sur t ta te tes toi ton tu un une vos votre vous y à étaient étais était étant étiez étions été étée étées étés êtes".split(" ")),e.Pipeline.registerFunction(e.fr.stopWordFilter,"stopWordFilter-fr")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.hi.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.hi=function(){this.pipeline.reset(),this.pipeline.add(e.hi.trimmer,e.hi.stopWordFilter,e.hi.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.hi.stemmer))},e.hi.wordCharacters="ऀ-ःऄ-एऐ-टठ-यर-िी-ॏॐ-य़ॠ-९॰-ॿa-zA-Za-zA-Z0-90-9",e.hi.trimmer=e.trimmerSupport.generateTrimmer(e.hi.wordCharacters),e.Pipeline.registerFunction(e.hi.trimmer,"trimmer-hi"),e.hi.stopWordFilter=e.generateStopWordFilter("अत अपना अपनी अपने अभी अंदर आदि आप इत्यादि इन इनका इन्हीं इन्हें इन्हों इस इसका इसकी इसके इसमें इसी इसे उन उनका उनकी उनके उनको उन्हीं उन्हें उन्हों उस उसके उसी उसे एक एवं एस ऐसे और कई कर करता करते करना करने करें कहते कहा का काफ़ी कि कितना किन्हें किन्हों किया किर किस किसी किसे की कुछ कुल के को कोई कौन कौनसा गया घर जब जहाँ जा जितना जिन जिन्हें जिन्हों जिस जिसे जीधर जैसा जैसे जो तक तब तरह तिन तिन्हें तिन्हों तिस तिसे तो था थी थे दबारा दिया दुसरा दूसरे दो द्वारा न नके नहीं ना निहायत नीचे ने पर पहले पूरा पे फिर बनी बही बहुत बाद बाला बिलकुल भी भीतर मगर मानो मे में यदि यह यहाँ यही या यिह ये रखें रहा रहे ऱ्वासा लिए लिये लेकिन व वग़ैरह वर्ग वह वहाँ वहीं वाले वुह वे वो सकता सकते सबसे सभी साथ साबुत साभ सारा से सो संग ही हुआ हुई हुए है हैं हो होता होती होते होना होने".split(" ")),e.hi.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var r=e.wordcut;r.init(),e.hi.tokenizer=function(i){if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(r){return isLunr2?new e.Token(r.toLowerCase()):r.toLowerCase()});var t=i.toString().toLowerCase().replace(/^\s+/,"");return r.cut(t).split("|")},e.Pipeline.registerFunction(e.hi.stemmer,"stemmer-hi"),e.Pipeline.registerFunction(e.hi.stopWordFilter,"stopWordFilter-hi")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.hu.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Hungarian` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,n){"function"==typeof define&&define.amd?define(n):"object"==typeof exports?module.exports=n():n()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.hu=function(){this.pipeline.reset(),this.pipeline.add(e.hu.trimmer,e.hu.stopWordFilter,e.hu.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.hu.stemmer))},e.hu.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.hu.trimmer=e.trimmerSupport.generateTrimmer(e.hu.wordCharacters),e.Pipeline.registerFunction(e.hu.trimmer,"trimmer-hu"),e.hu.stemmer=function(){var n=e.stemmerSupport.Among,r=e.stemmerSupport.SnowballProgram,i=new function(){function e(){var e,n=L.cursor;if(d=L.limit,L.in_grouping(W,97,252))for(;;){if(e=L.cursor,L.out_grouping(W,97,252))return L.cursor=e,L.find_among(g,8)||(L.cursor=e,e<L.limit&&L.cursor++),void(d=L.cursor);if(L.cursor=e,e>=L.limit)return void(d=e);L.cursor++}if(L.cursor=n,L.out_grouping(W,97,252)){for(;!L.in_grouping(W,97,252);){if(L.cursor>=L.limit)return;L.cursor++}d=L.cursor}}function i(){return d<=L.cursor}function a(){var e;if(L.ket=L.cursor,(e=L.find_among_b(h,2))&&(L.bra=L.cursor,i()))switch(e){case 1:L.slice_from("a");break;case 2:L.slice_from("e")}}function t(){var e=L.limit-L.cursor;return!!L.find_among_b(p,23)&&(L.cursor=L.limit-e,!0)}function s(){if(L.cursor>L.limit_backward){L.cursor--,L.ket=L.cursor;var e=L.cursor-1;L.limit_backward<=e&&e<=L.limit&&(L.cursor=e,L.bra=e,L.slice_del())}}function c(){var e;if(L.ket=L.cursor,(e=L.find_among_b(_,2))&&(L.bra=L.cursor,i())){if((1==e||2==e)&&!t())return;L.slice_del(),s()}}function o(){L.ket=L.cursor,L.find_among_b(v,44)&&(L.bra=L.cursor,i()&&(L.slice_del(),a()))}function w(){var e;if(L.ket=L.cursor,(e=L.find_among_b(z,3))&&(L.bra=L.cursor,i()))switch(e){case 1:L.slice_from("e");break;case 2:case 3:L.slice_from("a")}}function l(){var e;if(L.ket=L.cursor,(e=L.find_among_b(y,6))&&(L.bra=L.cursor,i()))switch(e){case 1:case 2:L.slice_del();break;case 3:L.slice_from("a");break;case 4:L.slice_from("e")}}function u(){var e;if(L.ket=L.cursor,(e=L.find_among_b(j,2))&&(L.bra=L.cursor,i())){if((1==e||2==e)&&!t())return;L.slice_del(),s()}}function m(){var e;if(L.ket=L.cursor,(e=L.find_among_b(C,7))&&(L.bra=L.cursor,i()))switch(e){case 1:L.slice_from("a");break;case 2:L.slice_from("e");break;case 3:case 4:case 5:case 6:case 7:L.slice_del()}}function k(){var e;if(L.ket=L.cursor,(e=L.find_among_b(P,12))&&(L.bra=L.cursor,i()))switch(e){case 1:case 4:case 7:case 9:L.slice_del();break;case 2:case 5:case 8:L.slice_from("e");break;case 3:case 6:L.slice_from("a")}}function f(){var e;if(L.ket=L.cursor,(e=L.find_among_b(F,31))&&(L.bra=L.cursor,i()))switch(e){case 1:case 4:case 7:case 8:case 9:case 12:case 13:case 16:case 17:case 18:L.slice_del();break;case 2:case 5:case 10:case 14:case 19:L.slice_from("a");break;case 3:case 6:case 11:case 15:case 20:L.slice_from("e")}}function b(){var e;if(L.ket=L.cursor,(e=L.find_among_b(S,42))&&(L.bra=L.cursor,i()))switch(e){case 1:case 4:case 5:case 6:case 9:case 10:case 11:case 14:case 15:case 16:case 17:case 20:case 21:case 24:case 25:case 26:case 29:L.slice_del();break;case 2:case 7:case 12:case 18:case 22:case 27:L.slice_from("a");break;case 3:case 8:case 13:case 19:case 23:case 28:L.slice_from("e")}}var d,g=[new n("cs",-1,-1),new n("dzs",-1,-1),new n("gy",-1,-1),new n("ly",-1,-1),new n("ny",-1,-1),new n("sz",-1,-1),new n("ty",-1,-1),new n("zs",-1,-1)],h=[new n("á",-1,1),new n("é",-1,2)],p=[new n("bb",-1,-1),new n("cc",-1,-1),new n("dd",-1,-1),new n("ff",-1,-1),new n("gg",-1,-1),new n("jj",-1,-1),new n("kk",-1,-1),new n("ll",-1,-1),new n("mm",-1,-1),new n("nn",-1,-1),new n("pp",-1,-1),new n("rr",-1,-1),new n("ccs",-1,-1),new n("ss",-1,-1),new n("zzs",-1,-1),new n("tt",-1,-1),new n("vv",-1,-1),new n("ggy",-1,-1),new n("lly",-1,-1),new n("nny",-1,-1),new n("tty",-1,-1),new n("ssz",-1,-1),new n("zz",-1,-1)],_=[new n("al",-1,1),new n("el",-1,2)],v=[new n("ba",-1,-1),new n("ra",-1,-1),new n("be",-1,-1),new n("re",-1,-1),new n("ig",-1,-1),new n("nak",-1,-1),new n("nek",-1,-1),new n("val",-1,-1),new n("vel",-1,-1),new n("ul",-1,-1),new n("nál",-1,-1),new n("nél",-1,-1),new n("ból",-1,-1),new n("ról",-1,-1),new n("tól",-1,-1),new n("bõl",-1,-1),new n("rõl",-1,-1),new n("tõl",-1,-1),new n("ül",-1,-1),new n("n",-1,-1),new n("an",19,-1),new n("ban",20,-1),new n("en",19,-1),new n("ben",22,-1),new n("képpen",22,-1),new n("on",19,-1),new n("ön",19,-1),new n("képp",-1,-1),new n("kor",-1,-1),new n("t",-1,-1),new n("at",29,-1),new n("et",29,-1),new n("ként",29,-1),new n("anként",32,-1),new n("enként",32,-1),new n("onként",32,-1),new n("ot",29,-1),new n("ért",29,-1),new n("öt",29,-1),new n("hez",-1,-1),new n("hoz",-1,-1),new n("höz",-1,-1),new n("vá",-1,-1),new n("vé",-1,-1)],z=[new n("án",-1,2),new n("én",-1,1),new n("ánként",-1,3)],y=[new n("stul",-1,2),new n("astul",0,1),new n("ástul",0,3),new n("stül",-1,2),new n("estül",3,1),new n("éstül",3,4)],j=[new n("á",-1,1),new n("é",-1,2)],C=[new n("k",-1,7),new n("ak",0,4),new n("ek",0,6),new n("ok",0,5),new n("ák",0,1),new n("ék",0,2),new n("ök",0,3)],P=[new n("éi",-1,7),new n("áéi",0,6),new n("ééi",0,5),new n("é",-1,9),new n("ké",3,4),new n("aké",4,1),new n("eké",4,1),new n("oké",4,1),new n("áké",4,3),new n("éké",4,2),new n("öké",4,1),new n("éé",3,8)],F=[new n("a",-1,18),new n("ja",0,17),new n("d",-1,16),new n("ad",2,13),new n("ed",2,13),new n("od",2,13),new n("ád",2,14),new n("éd",2,15),new n("öd",2,13),new n("e",-1,18),new n("je",9,17),new n("nk",-1,4),new n("unk",11,1),new n("ánk",11,2),new n("énk",11,3),new n("ünk",11,1),new n("uk",-1,8),new n("juk",16,7),new n("ájuk",17,5),new n("ük",-1,8),new n("jük",19,7),new n("éjük",20,6),new n("m",-1,12),new n("am",22,9),new n("em",22,9),new n("om",22,9),new n("ám",22,10),new n("ém",22,11),new n("o",-1,18),new n("á",-1,19),new n("é",-1,20)],S=[new n("id",-1,10),new n("aid",0,9),new n("jaid",1,6),new n("eid",0,9),new n("jeid",3,6),new n("áid",0,7),new n("éid",0,8),new n("i",-1,15),new n("ai",7,14),new n("jai",8,11),new n("ei",7,14),new n("jei",10,11),new n("ái",7,12),new n("éi",7,13),new n("itek",-1,24),new n("eitek",14,21),new n("jeitek",15,20),new n("éitek",14,23),new n("ik",-1,29),new n("aik",18,26),new n("jaik",19,25),new n("eik",18,26),new n("jeik",21,25),new n("áik",18,27),new n("éik",18,28),new n("ink",-1,20),new n("aink",25,17),new n("jaink",26,16),new n("eink",25,17),new n("jeink",28,16),new n("áink",25,18),new n("éink",25,19),new n("aitok",-1,21),new n("jaitok",32,20),new n("áitok",-1,22),new n("im",-1,5),new n("aim",35,4),new n("jaim",36,1),new n("eim",35,4),new n("jeim",38,1),new n("áim",35,2),new n("éim",35,3)],W=[17,65,16,0,0,0,0,0,0,0,0,0,0,0,0,0,1,17,52,14],L=new r;this.setCurrent=function(e){L.setCurrent(e)},this.getCurrent=function(){return L.getCurrent()},this.stem=function(){var n=L.cursor;return e(),L.limit_backward=n,L.cursor=L.limit,c(),L.cursor=L.limit,o(),L.cursor=L.limit,w(),L.cursor=L.limit,l(),L.cursor=L.limit,u(),L.cursor=L.limit,k(),L.cursor=L.limit,f(),L.cursor=L.limit,b(),L.cursor=L.limit,m(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return i.setCurrent(e),i.stem(),i.getCurrent()}):(i.setCurrent(e),i.stem(),i.getCurrent())}}(),e.Pipeline.registerFunction(e.hu.stemmer,"stemmer-hu"),e.hu.stopWordFilter=e.generateStopWordFilter("a abban ahhoz ahogy ahol aki akik akkor alatt amely amelyek amelyekben amelyeket amelyet amelynek ami amikor amit amolyan amíg annak arra arról az azok azon azonban azt aztán azután azzal azért be belül benne bár cikk cikkek cikkeket csak de e ebben eddig egy egyes egyetlen egyik egyre egyéb egész ehhez ekkor el ellen elsõ elég elõ elõször elõtt emilyen ennek erre ez ezek ezen ezt ezzel ezért fel felé hanem hiszen hogy hogyan igen ill ill. illetve ilyen ilyenkor ismét ison itt jobban jó jól kell kellett keressünk keresztül ki kívül között közül legalább legyen lehet lehetett lenne lenni lesz lett maga magát majd majd meg mellett mely melyek mert mi mikor milyen minden mindenki mindent mindig mint mintha mit mivel miért most már más másik még míg nagy nagyobb nagyon ne nekem neki nem nincs néha néhány nélkül olyan ott pedig persze rá s saját sem semmi sok sokat sokkal szemben szerint szinte számára talán tehát teljes tovább továbbá több ugyanis utolsó után utána vagy vagyis vagyok valaki valami valamint való van vannak vele vissza viszont volna volt voltak voltam voltunk által általában át én éppen és így õ õk õket össze úgy új újabb újra".split(" ")),e.Pipeline.registerFunction(e.hu.stopWordFilter,"stopWordFilter-hu")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.it.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Italian` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.it=function(){this.pipeline.reset(),this.pipeline.add(e.it.trimmer,e.it.stopWordFilter,e.it.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.it.stemmer))},e.it.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.it.trimmer=e.trimmerSupport.generateTrimmer(e.it.wordCharacters),e.Pipeline.registerFunction(e.it.trimmer,"trimmer-it"),e.it.stemmer=function(){var r=e.stemmerSupport.Among,n=e.stemmerSupport.SnowballProgram,i=new function(){function e(e,r,n){return!(!x.eq_s(1,e)||(x.ket=x.cursor,!x.in_grouping(L,97,249)))&&(x.slice_from(r),x.cursor=n,!0)}function i(){for(var r,n,i,o,t=x.cursor;;){if(x.bra=x.cursor,r=x.find_among(h,7))switch(x.ket=x.cursor,r){case 1:x.slice_from("à");continue;case 2:x.slice_from("è");continue;case 3:x.slice_from("ì");continue;case 4:x.slice_from("ò");continue;case 5:x.slice_from("ù");continue;case 6:x.slice_from("qU");continue;case 7:if(x.cursor>=x.limit)break;x.cursor++;continue}break}for(x.cursor=t;;)for(n=x.cursor;;){if(i=x.cursor,x.in_grouping(L,97,249)){if(x.bra=x.cursor,o=x.cursor,e("u","U",i))break;if(x.cursor=o,e("i","I",i))break}if(x.cursor=i,x.cursor>=x.limit)return void(x.cursor=n);x.cursor++}}function o(e){if(x.cursor=e,!x.in_grouping(L,97,249))return!1;for(;!x.out_grouping(L,97,249);){if(x.cursor>=x.limit)return!1;x.cursor++}return!0}function t(){if(x.in_grouping(L,97,249)){var e=x.cursor;if(x.out_grouping(L,97,249)){for(;!x.in_grouping(L,97,249);){if(x.cursor>=x.limit)return o(e);x.cursor++}return!0}return o(e)}return!1}function s(){var e,r=x.cursor;if(!t()){if(x.cursor=r,!x.out_grouping(L,97,249))return;if(e=x.cursor,x.out_grouping(L,97,249)){for(;!x.in_grouping(L,97,249);){if(x.cursor>=x.limit)return x.cursor=e,void(x.in_grouping(L,97,249)&&x.cursor<x.limit&&x.cursor++);x.cursor++}return void(k=x.cursor)}if(x.cursor=e,!x.in_grouping(L,97,249)||x.cursor>=x.limit)return;x.cursor++}k=x.cursor}function a(){for(;!x.in_grouping(L,97,249);){if(x.cursor>=x.limit)return!1;x.cursor++}for(;!x.out_grouping(L,97,249);){if(x.cursor>=x.limit)return!1;x.cursor++}return!0}function u(){var e=x.cursor;k=x.limit,p=k,g=k,s(),x.cursor=e,a()&&(p=x.cursor,a()&&(g=x.cursor))}function c(){for(var e;;){if(x.bra=x.cursor,!(e=x.find_among(q,3)))break;switch(x.ket=x.cursor,e){case 1:x.slice_from("i");break;case 2:x.slice_from("u");break;case 3:if(x.cursor>=x.limit)return;x.cursor++}}}function w(){return k<=x.cursor}function l(){return p<=x.cursor}function m(){return g<=x.cursor}function f(){var e;if(x.ket=x.cursor,x.find_among_b(C,37)&&(x.bra=x.cursor,(e=x.find_among_b(z,5))&&w()))switch(e){case 1:x.slice_del();break;case 2:x.slice_from("e")}}function v(){var e;if(x.ket=x.cursor,!(e=x.find_among_b(S,51)))return!1;switch(x.bra=x.cursor,e){case 1:if(!m())return!1;x.slice_del();break;case 2:if(!m())return!1;x.slice_del(),x.ket=x.cursor,x.eq_s_b(2,"ic")&&(x.bra=x.cursor,m()&&x.slice_del());break;case 3:if(!m())return!1;x.slice_from("log");break;case 4:if(!m())return!1;x.slice_from("u");break;case 5:if(!m())return!1;x.slice_from("ente");break;case 6:if(!w())return!1;x.slice_del();break;case 7:if(!l())return!1;x.slice_del(),x.ket=x.cursor,e=x.find_among_b(P,4),e&&(x.bra=x.cursor,m()&&(x.slice_del(),1==e&&(x.ket=x.cursor,x.eq_s_b(2,"at")&&(x.bra=x.cursor,m()&&x.slice_del()))));break;case 8:if(!m())return!1;x.slice_del(),x.ket=x.cursor,e=x.find_among_b(F,3),e&&(x.bra=x.cursor,1==e&&m()&&x.slice_del());break;case 9:if(!m())return!1;x.slice_del(),x.ket=x.cursor,x.eq_s_b(2,"at")&&(x.bra=x.cursor,m()&&(x.slice_del(),x.ket=x.cursor,x.eq_s_b(2,"ic")&&(x.bra=x.cursor,m()&&x.slice_del())))}return!0}function b(){var e,r;x.cursor>=k&&(r=x.limit_backward,x.limit_backward=k,x.ket=x.cursor,e=x.find_among_b(W,87),e&&(x.bra=x.cursor,1==e&&x.slice_del()),x.limit_backward=r)}function d(){var e=x.limit-x.cursor;if(x.ket=x.cursor,x.in_grouping_b(y,97,242)&&(x.bra=x.cursor,w()&&(x.slice_del(),x.ket=x.cursor,x.eq_s_b(1,"i")&&(x.bra=x.cursor,w()))))return void x.slice_del();x.cursor=x.limit-e}function _(){d(),x.ket=x.cursor,x.eq_s_b(1,"h")&&(x.bra=x.cursor,x.in_grouping_b(U,99,103)&&w()&&x.slice_del())}var g,p,k,h=[new r("",-1,7),new r("qu",0,6),new r("á",0,1),new r("é",0,2),new r("í",0,3),new r("ó",0,4),new r("ú",0,5)],q=[new r("",-1,3),new r("I",0,1),new r("U",0,2)],C=[new r("la",-1,-1),new r("cela",0,-1),new r("gliela",0,-1),new r("mela",0,-1),new r("tela",0,-1),new r("vela",0,-1),new r("le",-1,-1),new r("cele",6,-1),new r("gliele",6,-1),new r("mele",6,-1),new r("tele",6,-1),new r("vele",6,-1),new r("ne",-1,-1),new r("cene",12,-1),new r("gliene",12,-1),new r("mene",12,-1),new r("sene",12,-1),new r("tene",12,-1),new r("vene",12,-1),new r("ci",-1,-1),new r("li",-1,-1),new r("celi",20,-1),new r("glieli",20,-1),new r("meli",20,-1),new r("teli",20,-1),new r("veli",20,-1),new r("gli",20,-1),new r("mi",-1,-1),new r("si",-1,-1),new r("ti",-1,-1),new r("vi",-1,-1),new r("lo",-1,-1),new r("celo",31,-1),new r("glielo",31,-1),new r("melo",31,-1),new r("telo",31,-1),new r("velo",31,-1)],z=[new r("ando",-1,1),new r("endo",-1,1),new r("ar",-1,2),new r("er",-1,2),new r("ir",-1,2)],P=[new r("ic",-1,-1),new r("abil",-1,-1),new r("os",-1,-1),new r("iv",-1,1)],F=[new r("ic",-1,1),new r("abil",-1,1),new r("iv",-1,1)],S=[new r("ica",-1,1),new r("logia",-1,3),new r("osa",-1,1),new r("ista",-1,1),new r("iva",-1,9),new r("anza",-1,1),new r("enza",-1,5),new r("ice",-1,1),new r("atrice",7,1),new r("iche",-1,1),new r("logie",-1,3),new r("abile",-1,1),new r("ibile",-1,1),new r("usione",-1,4),new r("azione",-1,2),new r("uzione",-1,4),new r("atore",-1,2),new r("ose",-1,1),new r("ante",-1,1),new r("mente",-1,1),new r("amente",19,7),new r("iste",-1,1),new r("ive",-1,9),new r("anze",-1,1),new r("enze",-1,5),new r("ici",-1,1),new r("atrici",25,1),new r("ichi",-1,1),new r("abili",-1,1),new r("ibili",-1,1),new r("ismi",-1,1),new r("usioni",-1,4),new r("azioni",-1,2),new r("uzioni",-1,4),new r("atori",-1,2),new r("osi",-1,1),new r("anti",-1,1),new r("amenti",-1,6),new r("imenti",-1,6),new r("isti",-1,1),new r("ivi",-1,9),new r("ico",-1,1),new r("ismo",-1,1),new r("oso",-1,1),new r("amento",-1,6),new r("imento",-1,6),new r("ivo",-1,9),new r("ità",-1,8),new r("istà",-1,1),new r("istè",-1,1),new r("istì",-1,1)],W=[new r("isca",-1,1),new r("enda",-1,1),new r("ata",-1,1),new r("ita",-1,1),new r("uta",-1,1),new r("ava",-1,1),new r("eva",-1,1),new r("iva",-1,1),new r("erebbe",-1,1),new r("irebbe",-1,1),new r("isce",-1,1),new r("ende",-1,1),new r("are",-1,1),new r("ere",-1,1),new r("ire",-1,1),new r("asse",-1,1),new r("ate",-1,1),new r("avate",16,1),new r("evate",16,1),new r("ivate",16,1),new r("ete",-1,1),new r("erete",20,1),new r("irete",20,1),new r("ite",-1,1),new r("ereste",-1,1),new r("ireste",-1,1),new r("ute",-1,1),new r("erai",-1,1),new r("irai",-1,1),new r("isci",-1,1),new r("endi",-1,1),new r("erei",-1,1),new r("irei",-1,1),new r("assi",-1,1),new r("ati",-1,1),new r("iti",-1,1),new r("eresti",-1,1),new r("iresti",-1,1),new r("uti",-1,1),new r("avi",-1,1),new r("evi",-1,1),new r("ivi",-1,1),new r("isco",-1,1),new r("ando",-1,1),new r("endo",-1,1),new r("Yamo",-1,1),new r("iamo",-1,1),new r("avamo",-1,1),new r("evamo",-1,1),new r("ivamo",-1,1),new r("eremo",-1,1),new r("iremo",-1,1),new r("assimo",-1,1),new r("ammo",-1,1),new r("emmo",-1,1),new r("eremmo",54,1),new r("iremmo",54,1),new r("immo",-1,1),new r("ano",-1,1),new r("iscano",58,1),new r("avano",58,1),new r("evano",58,1),new r("ivano",58,1),new r("eranno",-1,1),new r("iranno",-1,1),new r("ono",-1,1),new r("iscono",65,1),new r("arono",65,1),new r("erono",65,1),new r("irono",65,1),new r("erebbero",-1,1),new r("irebbero",-1,1),new r("assero",-1,1),new r("essero",-1,1),new r("issero",-1,1),new r("ato",-1,1),new r("ito",-1,1),new r("uto",-1,1),new r("avo",-1,1),new r("evo",-1,1),new r("ivo",-1,1),new r("ar",-1,1),new r("ir",-1,1),new r("erà",-1,1),new r("irà",-1,1),new r("erò",-1,1),new r("irò",-1,1)],L=[17,65,16,0,0,0,0,0,0,0,0,0,0,0,0,128,128,8,2,1],y=[17,65,0,0,0,0,0,0,0,0,0,0,0,0,0,128,128,8,2],U=[17],x=new n;this.setCurrent=function(e){x.setCurrent(e)},this.getCurrent=function(){return x.getCurrent()},this.stem=function(){var e=x.cursor;return i(),x.cursor=e,u(),x.limit_backward=e,x.cursor=x.limit,f(),x.cursor=x.limit,v()||(x.cursor=x.limit,b()),x.cursor=x.limit,_(),x.cursor=x.limit_backward,c(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return i.setCurrent(e),i.stem(),i.getCurrent()}):(i.setCurrent(e),i.stem(),i.getCurrent())}}(),e.Pipeline.registerFunction(e.it.stemmer,"stemmer-it"),e.it.stopWordFilter=e.generateStopWordFilter("a abbia abbiamo abbiano abbiate ad agl agli ai al all alla alle allo anche avemmo avendo avesse avessero avessi avessimo aveste avesti avete aveva avevamo avevano avevate avevi avevo avrai avranno avrebbe avrebbero avrei avremmo avremo avreste avresti avrete avrà avrò avuta avute avuti avuto c che chi ci coi col come con contro cui da dagl dagli dai dal dall dalla dalle dallo degl degli dei del dell della delle dello di dov dove e ebbe ebbero ebbi ed era erano eravamo eravate eri ero essendo faccia facciamo facciano facciate faccio facemmo facendo facesse facessero facessi facessimo faceste facesti faceva facevamo facevano facevate facevi facevo fai fanno farai faranno farebbe farebbero farei faremmo faremo fareste faresti farete farà farò fece fecero feci fosse fossero fossi fossimo foste fosti fu fui fummo furono gli ha hai hanno ho i il in io l la le lei li lo loro lui ma mi mia mie miei mio ne negl negli nei nel nell nella nelle nello noi non nostra nostre nostri nostro o per perché più quale quanta quante quanti quanto quella quelle quelli quello questa queste questi questo sarai saranno sarebbe sarebbero sarei saremmo saremo sareste saresti sarete sarà sarò se sei si sia siamo siano siate siete sono sta stai stando stanno starai staranno starebbe starebbero starei staremmo staremo stareste staresti starete starà starò stava stavamo stavano stavate stavi stavo stemmo stesse stessero stessi stessimo steste stesti stette stettero stetti stia stiamo stiano stiate sto su sua sue sugl sugli sui sul sull sulla sulle sullo suo suoi ti tra tu tua tue tuo tuoi tutti tutto un una uno vi voi vostra vostre vostri vostro è".split(" ")),e.Pipeline.registerFunction(e.it.stopWordFilter,"stopWordFilter-it")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ja.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var r="2"==e.version[0];e.ja=function(){this.pipeline.reset(),this.pipeline.add(e.ja.trimmer,e.ja.stopWordFilter,e.ja.stemmer),r?this.tokenizer=e.ja.tokenizer:(e.tokenizer&&(e.tokenizer=e.ja.tokenizer),this.tokenizerFn&&(this.tokenizerFn=e.ja.tokenizer))};var t=new e.TinySegmenter;e.ja.tokenizer=function(i){var n,o,s,p,a,u,m,l,c,f;if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(t){return r?new e.Token(t.toLowerCase()):t.toLowerCase()});for(o=i.toString().toLowerCase().replace(/^\s+/,""),n=o.length-1;n>=0;n--)if(/\S/.test(o.charAt(n))){o=o.substring(0,n+1);break}for(a=[],s=o.length,c=0,l=0;c<=s;c++)if(u=o.charAt(c),m=c-l,u.match(/\s/)||c==s){if(m>0)for(p=t.segment(o.slice(l,c)).filter(function(e){return!!e}),f=l,n=0;n<p.length;n++)r?a.push(new e.Token(p[n],{position:[f,p[n].length],index:a.length})):a.push(p[n]),f+=p[n].length;l=c+1}return a},e.ja.stemmer=function(){return function(e){return e}}(),e.Pipeline.registerFunction(e.ja.stemmer,"stemmer-ja"),e.ja.wordCharacters="一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9",e.ja.trimmer=e.trimmerSupport.generateTrimmer(e.ja.wordCharacters),e.Pipeline.registerFunction(e.ja.trimmer,"trimmer-ja"),e.ja.stopWordFilter=e.generateStopWordFilter("これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし".split(" ")),e.Pipeline.registerFunction(e.ja.stopWordFilter,"stopWordFilter-ja"),e.jp=e.ja,e.Pipeline.registerFunction(e.jp.stemmer,"stemmer-jp"),e.Pipeline.registerFunction(e.jp.trimmer,"trimmer-jp"),e.Pipeline.registerFunction(e.jp.stopWordFilter,"stopWordFilter-jp")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.jp.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ module.exports=require("./lunr.ja");
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ko.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ko=function(){this.pipeline.reset(),this.pipeline.add(e.ko.trimmer,e.ko.stopWordFilter)},e.ko.wordCharacters="[A-Za-z가-힯a]",e.ko.trimmer=e.trimmerSupport.generateTrimmer(e.ko.wordCharacters),e.Pipeline.registerFunction(e.ko.trimmer,"trimmer-ko"),e.ko.stopWordFilter=e.generateStopWordFilter("아 휴 아이구 아이쿠 아이고 어 나 우리 저희 따라 의해 을 를 에 의 가 으로 로 에게 뿐이다 의거하여 근거하여 입각하여 기준으로 예하면 예를 들면 예를 들자면 저 소인 소생 저희 지말고 하지마 하지마라 다른 물론 또한 그리고 비길수 없다 해서는 안된다 뿐만 아니라 만이 아니다 만은 아니다 막론하고 관계없이 그치지 않다 그러나 그런데 하지만 든간에 논하지 않다 따지지 않다 설사 비록 더라도 아니면 만 못하다 하는 편이 낫다 불문하고 향하여 향해서 향하다 쪽으로 틈타 이용하여 타다 오르다 제외하고 이 외에 이 밖에 하여야 비로소 한다면 몰라도 외에도 이곳 여기 부터 기점으로 따라서 할 생각이다 하려고하다 이리하여 그리하여 그렇게 함으로써 하지만 일때 할때 앞에서 중에서 보는데서 으로써 로써 까지 해야한다 일것이다 반드시 할줄알다 할수있다 할수있어 임에 틀림없다 한다면 등 등등 제 겨우 단지 다만 할뿐 딩동 댕그 대해서 대하여 대하면 훨씬 얼마나 얼마만큼 얼마큼 남짓 여 얼마간 약간 다소 좀 조금 다수 몇 얼마 지만 하물며 또한 그러나 그렇지만 하지만 이외에도 대해 말하자면 뿐이다 다음에 반대로 반대로 말하자면 이와 반대로 바꾸어서 말하면 바꾸어서 한다면 만약 그렇지않으면 까악 툭 딱 삐걱거리다 보드득 비걱거리다 꽈당 응당 해야한다 에 가서 각 각각 여러분 각종 각자 제각기 하도록하다 와 과 그러므로 그래서 고로 한 까닭에 하기 때문에 거니와 이지만 대하여 관하여 관한 과연 실로 아니나다를가 생각한대로 진짜로 한적이있다 하곤하였다 하 하하 허허 아하 거바 와 오 왜 어째서 무엇때문에 어찌 하겠는가 무슨 어디 어느곳 더군다나 하물며 더욱이는 어느때 언제 야 이봐 어이 여보시오 흐흐 흥 휴 헉헉 헐떡헐떡 영차 여차 어기여차 끙끙 아야 앗 아야 콸콸 졸졸 좍좍 뚝뚝 주룩주룩 솨 우르르 그래도 또 그리고 바꾸어말하면 바꾸어말하자면 혹은 혹시 답다 및 그에 따르는 때가 되어 즉 지든지 설령 가령 하더라도 할지라도 일지라도 지든지 몇 거의 하마터면 인젠 이젠 된바에야 된이상 만큼\t어찌됏든 그위에 게다가 점에서 보아 비추어 보아 고려하면 하게될것이다 일것이다 비교적 좀 보다더 비하면 시키다 하게하다 할만하다 의해서 연이서 이어서 잇따라 뒤따라 뒤이어 결국 의지하여 기대여 통하여 자마자 더욱더 불구하고 얼마든지 마음대로 주저하지 않고 곧 즉시 바로 당장 하자마자 밖에 안된다 하면된다 그래 그렇지 요컨대 다시 말하자면 바꿔 말하면 즉 구체적으로 말하자면 시작하여 시초에 이상 허 헉 허걱 바와같이 해도좋다 해도된다 게다가 더구나 하물며 와르르 팍 퍽 펄렁 동안 이래 하고있었다 이었다 에서 로부터 까지 예하면 했어요 해요 함께 같이 더불어 마저 마저도 양자 모두 습니다 가까스로 하려고하다 즈음하여 다른 다른 방면으로 해봐요 습니까 했어요 말할것도 없고 무릎쓰고 개의치않고 하는것만 못하다 하는것이 낫다 매 매번 들 모 어느것 어느 로써 갖고말하자면 어디 어느쪽 어느것 어느해 어느 년도 라 해도 언젠가 어떤것 어느것 저기 저쪽 저것 그때 그럼 그러면 요만한걸 그래 그때 저것만큼 그저 이르기까지 할 줄 안다 할 힘이 있다 너 너희 당신 어찌 설마 차라리 할지언정 할지라도 할망정 할지언정 구토하다 게우다 토하다 메쓰겁다 옆사람 퉤 쳇 의거하여 근거하여 의해 따라 힘입어 그 다음 버금 두번째로 기타 첫번째로 나머지는 그중에서 견지에서 형식으로 쓰여 입장에서 위해서 단지 의해되다 하도록시키다 뿐만아니라 반대로 전후 전자 앞의것 잠시 잠깐 하면서 그렇지만 다음에 그러한즉 그런즉 남들 아무거나 어찌하든지 같다 비슷하다 예컨대 이럴정도로 어떻게 만약 만일 위에서 서술한바와같��� 인 듯하다 하지 않는다면 만약에 무엇 무슨 어느 어떤 아래윗 조차 한데 그럼에도 불구하고 여전히 심지어 까지도 조차도 하지 않도록 않기 위하여 때 시각 무렵 시간 동안 어때 어떠한 하여금 네 예 우선 누구 누가 알겠는가 아무도 줄은모른다 줄은 몰랏다 하는 김에 겸사겸사 하는바 그런 까닭에 한 이유는 그러니 그러니까 때문에 그 너희 그들 너희들 타인 것 것들 너 위하여 공동으로 동시에 하기 위하여 어찌하여 무엇때문에 붕붕 윙윙 나 우리 엉엉 휘익 윙윙 오호 아하 어쨋든 만 못하다\t하기보다는 차라리 하는 편이 낫다 흐흐 놀라다 상대적으로 말하자면 마치 아니라면 쉿 그렇지 않으면 그렇지 않다면 안 그러면 아니었다면 하든지 아니면 이라면 좋아 알았어 하는것도 그만이다 어쩔수 없다 하나 일 일반적으로 일단 한켠으로는 오자마자 이렇게되면 이와같다면 전부 한마디 한항목 근거로 하기에 아울러 하지 않도록 않기 위해서 이르기까지 이 되다 로 인하여 까닭으로 이유만으로 이로 인하여 그래서 이 때문에 그러므로 그런 까닭에 알 수 있다 결론을 낼 수 있다 으로 인하여 있다 어떤것 관계가 있다 관련이 있다 연관되다 어떤것들 에 대해 이리하여 그리하여 여부 하기보다는 하느니 하면 할수록 운운 이러이러하다 하구나 하도다 다시말하면 다음으로 에 있다 에 달려 있다 우리 우리들 오히려 하기는한데 어떻게 어떻해 어찌됏어 어때 어째서 본대로 자 이 이쪽 여기 이것 이번 이렇게말하자면 이런 이러한 이와 같은 요만큼 요만한 것 얼마 안 되는 것 이만큼 이 정도의 이렇게 많은 것 이와 같다 이때 이렇구나 것과 같이 끼익 삐걱 따위 와 같은 사람들 부류의 사람들 왜냐하면 중의하나 오직 오로지 에 한하다 하기만 하면 도착하다 까지 미치다 도달하다 정도에 이르다 할 지경이다 결과에 이르다 관해서는 여러분 하고 있다 한 후 혼자 자기 자기집 자신 우에 종합한것과같이 총적으로 보면 총적으로 말하면 총적으로 대로 하다 으로서 참 그만이다 할 따름이다 쿵 탕탕 쾅쾅 둥둥 봐 봐라 아이야 아니 와아 응 아이 참나 년 월 일 령 영 일 이 삼 사 오 육 륙 칠 팔 구 이천육 이천칠 이천팔 이천구 하나 둘 셋 넷 다섯 여섯 일곱 여덟 아홉 령 영".split(" ")),e.Pipeline.registerFunction(e.ko.stopWordFilter,"stopWordFilter-ko"),e.ko.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}(),e.Pipeline.registerFunction(e.ko.stemmer,"stemmer-ko")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.multi.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){e.multiLanguage=function(){for(var t=Array.prototype.slice.call(arguments),i=t.join("-"),r="",n=[],s=[],p=0;p<t.length;++p)"en"==t[p]?(r+="\\w",n.unshift(e.stopWordFilter),n.push(e.stemmer),s.push(e.stemmer)):(r+=e[t[p]].wordCharacters,e[t[p]].stopWordFilter&&n.unshift(e[t[p]].stopWordFilter),e[t[p]].stemmer&&(n.push(e[t[p]].stemmer),s.push(e[t[p]].stemmer)));var o=e.trimmerSupport.generateTrimmer(r);return e.Pipeline.registerFunction(o,"lunr-multi-trimmer-"+i),n.unshift(o),function(){this.pipeline.reset(),this.pipeline.add.apply(this.pipeline,n),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add.apply(this.searchPipeline,s))}}}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.nl.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Dutch` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(r,e){"function"==typeof define&&define.amd?define(e):"object"==typeof exports?module.exports=e():e()(r.lunr)}(this,function(){return function(r){if(void 0===r)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===r.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");r.nl=function(){this.pipeline.reset(),this.pipeline.add(r.nl.trimmer,r.nl.stopWordFilter,r.nl.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(r.nl.stemmer))},r.nl.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",r.nl.trimmer=r.trimmerSupport.generateTrimmer(r.nl.wordCharacters),r.Pipeline.registerFunction(r.nl.trimmer,"trimmer-nl"),r.nl.stemmer=function(){var e=r.stemmerSupport.Among,i=r.stemmerSupport.SnowballProgram,n=new function(){function r(){for(var r,e,i,o=C.cursor;;){if(C.bra=C.cursor,r=C.find_among(b,11))switch(C.ket=C.cursor,r){case 1:C.slice_from("a");continue;case 2:C.slice_from("e");continue;case 3:C.slice_from("i");continue;case 4:C.slice_from("o");continue;case 5:C.slice_from("u");continue;case 6:if(C.cursor>=C.limit)break;C.cursor++;continue}break}for(C.cursor=o,C.bra=o,C.eq_s(1,"y")?(C.ket=C.cursor,C.slice_from("Y")):C.cursor=o;;)if(e=C.cursor,C.in_grouping(q,97,232)){if(i=C.cursor,C.bra=i,C.eq_s(1,"i"))C.ket=C.cursor,C.in_grouping(q,97,232)&&(C.slice_from("I"),C.cursor=e);else if(C.cursor=i,C.eq_s(1,"y"))C.ket=C.cursor,C.slice_from("Y"),C.cursor=e;else if(n(e))break}else if(n(e))break}function n(r){return C.cursor=r,r>=C.limit||(C.cursor++,!1)}function o(){_=C.limit,d=_,t()||(_=C.cursor,_<3&&(_=3),t()||(d=C.cursor))}function t(){for(;!C.in_grouping(q,97,232);){if(C.cursor>=C.limit)return!0;C.cursor++}for(;!C.out_grouping(q,97,232);){if(C.cursor>=C.limit)return!0;C.cursor++}return!1}function s(){for(var r;;)if(C.bra=C.cursor,r=C.find_among(p,3))switch(C.ket=C.cursor,r){case 1:C.slice_from("y");break;case 2:C.slice_from("i");break;case 3:if(C.cursor>=C.limit)return;C.cursor++}}function u(){return _<=C.cursor}function c(){return d<=C.cursor}function a(){var r=C.limit-C.cursor;C.find_among_b(g,3)&&(C.cursor=C.limit-r,C.ket=C.cursor,C.cursor>C.limit_backward&&(C.cursor--,C.bra=C.cursor,C.slice_del()))}function l(){var r;w=!1,C.ket=C.cursor,C.eq_s_b(1,"e")&&(C.bra=C.cursor,u()&&(r=C.limit-C.cursor,C.out_grouping_b(q,97,232)&&(C.cursor=C.limit-r,C.slice_del(),w=!0,a())))}function m(){var r;u()&&(r=C.limit-C.cursor,C.out_grouping_b(q,97,232)&&(C.cursor=C.limit-r,C.eq_s_b(3,"gem")||(C.cursor=C.limit-r,C.slice_del(),a())))}function f(){var r,e,i,n,o,t,s=C.limit-C.cursor;if(C.ket=C.cursor,r=C.find_among_b(h,5))switch(C.bra=C.cursor,r){case 1:u()&&C.slice_from("heid");break;case 2:m();break;case 3:u()&&C.out_grouping_b(j,97,232)&&C.slice_del()}if(C.cursor=C.limit-s,l(),C.cursor=C.limit-s,C.ket=C.cursor,C.eq_s_b(4,"heid")&&(C.bra=C.cursor,c()&&(e=C.limit-C.cursor,C.eq_s_b(1,"c")||(C.cursor=C.limit-e,C.slice_del(),C.ket=C.cursor,C.eq_s_b(2,"en")&&(C.bra=C.cursor,m())))),C.cursor=C.limit-s,C.ket=C.cursor,r=C.find_among_b(k,6))switch(C.bra=C.cursor,r){case 1:if(c()){if(C.slice_del(),i=C.limit-C.cursor,C.ket=C.cursor,C.eq_s_b(2,"ig")&&(C.bra=C.cursor,c()&&(n=C.limit-C.cursor,!C.eq_s_b(1,"e")))){C.cursor=C.limit-n,C.slice_del();break}C.cursor=C.limit-i,a()}break;case 2:c()&&(o=C.limit-C.cursor,C.eq_s_b(1,"e")||(C.cursor=C.limit-o,C.slice_del()));break;case 3:c()&&(C.slice_del(),l());break;case 4:c()&&C.slice_del();break;case 5:c()&&w&&C.slice_del()}C.cursor=C.limit-s,C.out_grouping_b(z,73,232)&&(t=C.limit-C.cursor,C.find_among_b(v,4)&&C.out_grouping_b(q,97,232)&&(C.cursor=C.limit-t,C.ket=C.cursor,C.cursor>C.limit_backward&&(C.cursor--,C.bra=C.cursor,C.slice_del())))}var d,_,w,b=[new e("",-1,6),new e("á",0,1),new e("ä",0,1),new e("é",0,2),new e("ë",0,2),new e("í",0,3),new e("ï",0,3),new e("ó",0,4),new e("ö",0,4),new e("ú",0,5),new e("ü",0,5)],p=[new e("",-1,3),new e("I",0,2),new e("Y",0,1)],g=[new e("dd",-1,-1),new e("kk",-1,-1),new e("tt",-1,-1)],h=[new e("ene",-1,2),new e("se",-1,3),new e("en",-1,2),new e("heden",2,1),new e("s",-1,3)],k=[new e("end",-1,1),new e("ig",-1,2),new e("ing",-1,1),new e("lijk",-1,3),new e("baar",-1,4),new e("bar",-1,5)],v=[new e("aa",-1,-1),new e("ee",-1,-1),new e("oo",-1,-1),new e("uu",-1,-1)],q=[17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,128],z=[1,0,0,17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,128],j=[17,67,16,1,0,0,0,0,0,0,0,0,0,0,0,0,128],C=new i;this.setCurrent=function(r){C.setCurrent(r)},this.getCurrent=function(){return C.getCurrent()},this.stem=function(){var e=C.cursor;return r(),C.cursor=e,o(),C.limit_backward=e,C.cursor=C.limit,f(),C.cursor=C.limit_backward,s(),!0}};return function(r){return"function"==typeof r.update?r.update(function(r){return n.setCurrent(r),n.stem(),n.getCurrent()}):(n.setCurrent(r),n.stem(),n.getCurrent())}}(),r.Pipeline.registerFunction(r.nl.stemmer,"stemmer-nl"),r.nl.stopWordFilter=r.generateStopWordFilter(" aan al alles als altijd andere ben bij daar dan dat de der deze die dit doch doen door dus een eens en er ge geen geweest haar had heb hebben heeft hem het hier hij hoe hun iemand iets ik in is ja je kan kon kunnen maar me meer men met mij mijn moet na naar niet niets nog nu of om omdat onder ons ook op over reeds te tegen toch toen tot u uit uw van veel voor want waren was wat werd wezen wie wil worden wordt zal ze zelf zich zij zijn zo zonder zou".split(" ")),r.Pipeline.registerFunction(r.nl.stopWordFilter,"stopWordFilter-nl")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.no.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Norwegian` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.no=function(){this.pipeline.reset(),this.pipeline.add(e.no.trimmer,e.no.stopWordFilter,e.no.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.no.stemmer))},e.no.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.no.trimmer=e.trimmerSupport.generateTrimmer(e.no.wordCharacters),e.Pipeline.registerFunction(e.no.trimmer,"trimmer-no"),e.no.stemmer=function(){var r=e.stemmerSupport.Among,n=e.stemmerSupport.SnowballProgram,i=new function(){function e(){var e,r=w.cursor+3;if(a=w.limit,0<=r||r<=w.limit){for(s=r;;){if(e=w.cursor,w.in_grouping(d,97,248)){w.cursor=e;break}if(e>=w.limit)return;w.cursor=e+1}for(;!w.out_grouping(d,97,248);){if(w.cursor>=w.limit)return;w.cursor++}a=w.cursor,a<s&&(a=s)}}function i(){var e,r,n;if(w.cursor>=a&&(r=w.limit_backward,w.limit_backward=a,w.ket=w.cursor,e=w.find_among_b(m,29),w.limit_backward=r,e))switch(w.bra=w.cursor,e){case 1:w.slice_del();break;case 2:n=w.limit-w.cursor,w.in_grouping_b(c,98,122)?w.slice_del():(w.cursor=w.limit-n,w.eq_s_b(1,"k")&&w.out_grouping_b(d,97,248)&&w.slice_del());break;case 3:w.slice_from("er")}}function t(){var e,r=w.limit-w.cursor;w.cursor>=a&&(e=w.limit_backward,w.limit_backward=a,w.ket=w.cursor,w.find_among_b(u,2)?(w.bra=w.cursor,w.limit_backward=e,w.cursor=w.limit-r,w.cursor>w.limit_backward&&(w.cursor--,w.bra=w.cursor,w.slice_del())):w.limit_backward=e)}function o(){var e,r;w.cursor>=a&&(r=w.limit_backward,w.limit_backward=a,w.ket=w.cursor,e=w.find_among_b(l,11),e?(w.bra=w.cursor,w.limit_backward=r,1==e&&w.slice_del()):w.limit_backward=r)}var s,a,m=[new r("a",-1,1),new r("e",-1,1),new r("ede",1,1),new r("ande",1,1),new r("ende",1,1),new r("ane",1,1),new r("ene",1,1),new r("hetene",6,1),new r("erte",1,3),new r("en",-1,1),new r("heten",9,1),new r("ar",-1,1),new r("er",-1,1),new r("heter",12,1),new r("s",-1,2),new r("as",14,1),new r("es",14,1),new r("edes",16,1),new r("endes",16,1),new r("enes",16,1),new r("hetenes",19,1),new r("ens",14,1),new r("hetens",21,1),new r("ers",14,1),new r("ets",14,1),new r("et",-1,1),new r("het",25,1),new r("ert",-1,3),new r("ast",-1,1)],u=[new r("dt",-1,-1),new r("vt",-1,-1)],l=[new r("leg",-1,1),new r("eleg",0,1),new r("ig",-1,1),new r("eig",2,1),new r("lig",2,1),new r("elig",4,1),new r("els",-1,1),new r("lov",-1,1),new r("elov",7,1),new r("slov",7,1),new r("hetslov",9,1)],d=[17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,48,0,128],c=[119,125,149,1],w=new n;this.setCurrent=function(e){w.setCurrent(e)},this.getCurrent=function(){return w.getCurrent()},this.stem=function(){var r=w.cursor;return e(),w.limit_backward=r,w.cursor=w.limit,i(),w.cursor=w.limit,t(),w.cursor=w.limit,o(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return i.setCurrent(e),i.stem(),i.getCurrent()}):(i.setCurrent(e),i.stem(),i.getCurrent())}}(),e.Pipeline.registerFunction(e.no.stemmer,"stemmer-no"),e.no.stopWordFilter=e.generateStopWordFilter("alle at av bare begge ble blei bli blir blitt både båe da de deg dei deim deira deires dem den denne der dere deres det dette di din disse ditt du dykk dykkar då eg ein eit eitt eller elles en enn er et ett etter for fordi fra før ha hadde han hans har hennar henne hennes her hjå ho hoe honom hoss hossen hun hva hvem hver hvilke hvilken hvis hvor hvordan hvorfor i ikke ikkje ikkje ingen ingi inkje inn inni ja jeg kan kom korleis korso kun kunne kva kvar kvarhelst kven kvi kvifor man mange me med medan meg meget mellom men mi min mine mitt mot mykje ned no noe noen noka noko nokon nokor nokre nå når og også om opp oss over på samme seg selv si si sia sidan siden sin sine sitt sjøl skal skulle slik so som som somme somt så sånn til um upp ut uten var vart varte ved vere verte vi vil ville vore vors vort vår være være vært å".split(" ")),e.Pipeline.registerFunction(e.no.stopWordFilter,"stopWordFilter-no")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.pt.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Portuguese` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.pt=function(){this.pipeline.reset(),this.pipeline.add(e.pt.trimmer,e.pt.stopWordFilter,e.pt.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.pt.stemmer))},e.pt.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.pt.trimmer=e.trimmerSupport.generateTrimmer(e.pt.wordCharacters),e.Pipeline.registerFunction(e.pt.trimmer,"trimmer-pt"),e.pt.stemmer=function(){var r=e.stemmerSupport.Among,s=e.stemmerSupport.SnowballProgram,n=new function(){function e(){for(var e;;){if(z.bra=z.cursor,e=z.find_among(k,3))switch(z.ket=z.cursor,e){case 1:z.slice_from("a~");continue;case 2:z.slice_from("o~");continue;case 3:if(z.cursor>=z.limit)break;z.cursor++;continue}break}}function n(){if(z.out_grouping(y,97,250)){for(;!z.in_grouping(y,97,250);){if(z.cursor>=z.limit)return!0;z.cursor++}return!1}return!0}function i(){if(z.in_grouping(y,97,250))for(;!z.out_grouping(y,97,250);){if(z.cursor>=z.limit)return!1;z.cursor++}return g=z.cursor,!0}function o(){var e,r,s=z.cursor;if(z.in_grouping(y,97,250))if(e=z.cursor,n()){if(z.cursor=e,i())return}else g=z.cursor;if(z.cursor=s,z.out_grouping(y,97,250)){if(r=z.cursor,n()){if(z.cursor=r,!z.in_grouping(y,97,250)||z.cursor>=z.limit)return;z.cursor++}g=z.cursor}}function t(){for(;!z.in_grouping(y,97,250);){if(z.cursor>=z.limit)return!1;z.cursor++}for(;!z.out_grouping(y,97,250);){if(z.cursor>=z.limit)return!1;z.cursor++}return!0}function a(){var e=z.cursor;g=z.limit,b=g,h=g,o(),z.cursor=e,t()&&(b=z.cursor,t()&&(h=z.cursor))}function u(){for(var e;;){if(z.bra=z.cursor,e=z.find_among(q,3))switch(z.ket=z.cursor,e){case 1:z.slice_from("ã");continue;case 2:z.slice_from("õ");continue;case 3:if(z.cursor>=z.limit)break;z.cursor++;continue}break}}function w(){return g<=z.cursor}function m(){return b<=z.cursor}function c(){return h<=z.cursor}function l(){var e;if(z.ket=z.cursor,!(e=z.find_among_b(F,45)))return!1;switch(z.bra=z.cursor,e){case 1:if(!c())return!1;z.slice_del();break;case 2:if(!c())return!1;z.slice_from("log");break;case 3:if(!c())return!1;z.slice_from("u");break;case 4:if(!c())return!1;z.slice_from("ente");break;case 5:if(!m())return!1;z.slice_del(),z.ket=z.cursor,e=z.find_among_b(j,4),e&&(z.bra=z.cursor,c()&&(z.slice_del(),1==e&&(z.ket=z.cursor,z.eq_s_b(2,"at")&&(z.bra=z.cursor,c()&&z.slice_del()))));break;case 6:if(!c())return!1;z.slice_del(),z.ket=z.cursor,e=z.find_among_b(C,3),e&&(z.bra=z.cursor,1==e&&c()&&z.slice_del());break;case 7:if(!c())return!1;z.slice_del(),z.ket=z.cursor,e=z.find_among_b(P,3),e&&(z.bra=z.cursor,1==e&&c()&&z.slice_del());break;case 8:if(!c())return!1;z.slice_del(),z.ket=z.cursor,z.eq_s_b(2,"at")&&(z.bra=z.cursor,c()&&z.slice_del());break;case 9:if(!w()||!z.eq_s_b(1,"e"))return!1;z.slice_from("ir")}return!0}function f(){var e,r;if(z.cursor>=g){if(r=z.limit_backward,z.limit_backward=g,z.ket=z.cursor,e=z.find_among_b(S,120))return z.bra=z.cursor,1==e&&z.slice_del(),z.limit_backward=r,!0;z.limit_backward=r}return!1}function d(){var e;z.ket=z.cursor,(e=z.find_among_b(W,7))&&(z.bra=z.cursor,1==e&&w()&&z.slice_del())}function v(e,r){if(z.eq_s_b(1,e)){z.bra=z.cursor;var s=z.limit-z.cursor;if(z.eq_s_b(1,r))return z.cursor=z.limit-s,w()&&z.slice_del(),!1}return!0}function p(){var e;if(z.ket=z.cursor,e=z.find_among_b(L,4))switch(z.bra=z.cursor,e){case 1:w()&&(z.slice_del(),z.ket=z.cursor,z.limit-z.cursor,v("u","g")&&v("i","c"));break;case 2:z.slice_from("c")}}function _(){if(!l()&&(z.cursor=z.limit,!f()))return z.cursor=z.limit,void d();z.cursor=z.limit,z.ket=z.cursor,z.eq_s_b(1,"i")&&(z.bra=z.cursor,z.eq_s_b(1,"c")&&(z.cursor=z.limit,w()&&z.slice_del()))}var h,b,g,k=[new r("",-1,3),new r("ã",0,1),new r("õ",0,2)],q=[new r("",-1,3),new r("a~",0,1),new r("o~",0,2)],j=[new r("ic",-1,-1),new r("ad",-1,-1),new r("os",-1,-1),new r("iv",-1,1)],C=[new r("ante",-1,1),new r("avel",-1,1),new r("ível",-1,1)],P=[new r("ic",-1,1),new r("abil",-1,1),new r("iv",-1,1)],F=[new r("ica",-1,1),new r("ância",-1,1),new r("ência",-1,4),new r("ira",-1,9),new r("adora",-1,1),new r("osa",-1,1),new r("ista",-1,1),new r("iva",-1,8),new r("eza",-1,1),new r("logía",-1,2),new r("idade",-1,7),new r("ante",-1,1),new r("mente",-1,6),new r("amente",12,5),new r("ável",-1,1),new r("ível",-1,1),new r("ución",-1,3),new r("ico",-1,1),new r("ismo",-1,1),new r("oso",-1,1),new r("amento",-1,1),new r("imento",-1,1),new r("ivo",-1,8),new r("aça~o",-1,1),new r("ador",-1,1),new r("icas",-1,1),new r("ências",-1,4),new r("iras",-1,9),new r("adoras",-1,1),new r("osas",-1,1),new r("istas",-1,1),new r("ivas",-1,8),new r("ezas",-1,1),new r("logías",-1,2),new r("idades",-1,7),new r("uciones",-1,3),new r("adores",-1,1),new r("antes",-1,1),new r("aço~es",-1,1),new r("icos",-1,1),new r("ismos",-1,1),new r("osos",-1,1),new r("amentos",-1,1),new r("imentos",-1,1),new r("ivos",-1,8)],S=[new r("ada",-1,1),new r("ida",-1,1),new r("ia",-1,1),new r("aria",2,1),new r("eria",2,1),new r("iria",2,1),new r("ara",-1,1),new r("era",-1,1),new r("ira",-1,1),new r("ava",-1,1),new r("asse",-1,1),new r("esse",-1,1),new r("isse",-1,1),new r("aste",-1,1),new r("este",-1,1),new r("iste",-1,1),new r("ei",-1,1),new r("arei",16,1),new r("erei",16,1),new r("irei",16,1),new r("am",-1,1),new r("iam",20,1),new r("ariam",21,1),new r("eriam",21,1),new r("iriam",21,1),new r("aram",20,1),new r("eram",20,1),new r("iram",20,1),new r("avam",20,1),new r("em",-1,1),new r("arem",29,1),new r("erem",29,1),new r("irem",29,1),new r("assem",29,1),new r("essem",29,1),new r("issem",29,1),new r("ado",-1,1),new r("ido",-1,1),new r("ando",-1,1),new r("endo",-1,1),new r("indo",-1,1),new r("ara~o",-1,1),new r("era~o",-1,1),new r("ira~o",-1,1),new r("ar",-1,1),new r("er",-1,1),new r("ir",-1,1),new r("as",-1,1),new r("adas",47,1),new r("idas",47,1),new r("ias",47,1),new r("arias",50,1),new r("erias",50,1),new r("irias",50,1),new r("aras",47,1),new r("eras",47,1),new r("iras",47,1),new r("avas",47,1),new r("es",-1,1),new r("ardes",58,1),new r("erdes",58,1),new r("irdes",58,1),new r("ares",58,1),new r("eres",58,1),new r("ires",58,1),new r("asses",58,1),new r("esses",58,1),new r("isses",58,1),new r("astes",58,1),new r("estes",58,1),new r("istes",58,1),new r("is",-1,1),new r("ais",71,1),new r("eis",71,1),new r("areis",73,1),new r("ereis",73,1),new r("ireis",73,1),new r("áreis",73,1),new r("éreis",73,1),new r("íreis",73,1),new r("ásseis",73,1),new r("ésseis",73,1),new r("ísseis",73,1),new r("áveis",73,1),new r("íeis",73,1),new r("aríeis",84,1),new r("eríeis",84,1),new r("iríeis",84,1),new r("ados",-1,1),new r("idos",-1,1),new r("amos",-1,1),new r("áramos",90,1),new r("éramos",90,1),new r("íramos",90,1),new r("ávamos",90,1),new r("íamos",90,1),new r("aríamos",95,1),new r("eríamos",95,1),new r("iríamos",95,1),new r("emos",-1,1),new r("aremos",99,1),new r("eremos",99,1),new r("iremos",99,1),new r("ássemos",99,1),new r("êssemos",99,1),new r("íssemos",99,1),new r("imos",-1,1),new r("armos",-1,1),new r("ermos",-1,1),new r("irmos",-1,1),new r("ámos",-1,1),new r("arás",-1,1),new r("erás",-1,1),new r("irás",-1,1),new r("eu",-1,1),new r("iu",-1,1),new r("ou",-1,1),new r("ará",-1,1),new r("erá",-1,1),new r("irá",-1,1)],W=[new r("a",-1,1),new r("i",-1,1),new r("o",-1,1),new r("os",-1,1),new r("á",-1,1),new r("í",-1,1),new r("ó",-1,1)],L=[new r("e",-1,1),new r("ç",-1,2),new r("é",-1,1),new r("ê",-1,1)],y=[17,65,16,0,0,0,0,0,0,0,0,0,0,0,0,0,3,19,12,2],z=new s;this.setCurrent=function(e){z.setCurrent(e)},this.getCurrent=function(){return z.getCurrent()},this.stem=function(){var r=z.cursor;return e(),z.cursor=r,a(),z.limit_backward=r,z.cursor=z.limit,_(),z.cursor=z.limit,p(),z.cursor=z.limit_backward,u(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return n.setCurrent(e),n.stem(),n.getCurrent()}):(n.setCurrent(e),n.stem(),n.getCurrent())}}(),e.Pipeline.registerFunction(e.pt.stemmer,"stemmer-pt"),e.pt.stopWordFilter=e.generateStopWordFilter("a ao aos aquela aquelas aquele aqueles aquilo as até com como da das de dela delas dele deles depois do dos e ela elas ele eles em entre era eram essa essas esse esses esta estamos estas estava estavam este esteja estejam estejamos estes esteve estive estivemos estiver estivera estiveram estiverem estivermos estivesse estivessem estivéramos estivéssemos estou está estávamos estão eu foi fomos for fora foram forem formos fosse fossem fui fôramos fôssemos haja hajam hajamos havemos hei houve houvemos houver houvera houveram houverei houverem houveremos houveria houveriam houvermos houverá houverão houveríamos houvesse houvessem houvéramos houvéssemos há hão isso isto já lhe lhes mais mas me mesmo meu meus minha minhas muito na nas nem no nos nossa nossas nosso nossos num numa não nós o os ou para pela pelas pelo pelos por qual quando que quem se seja sejam sejamos sem serei seremos seria seriam será serão seríamos seu seus somos sou sua suas são só também te tem temos tenha tenham tenhamos tenho terei teremos teria teriam terá terão teríamos teu teus teve tinha tinham tive tivemos tiver tivera tiveram tiverem tivermos tivesse tivessem tivéramos tivéssemos tu tua tuas tém tínhamos um uma você vocês vos à às éramos".split(" ")),e.Pipeline.registerFunction(e.pt.stopWordFilter,"stopWordFilter-pt")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ro.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Romanian` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,i){"function"==typeof define&&define.amd?define(i):"object"==typeof exports?module.exports=i():i()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ro=function(){this.pipeline.reset(),this.pipeline.add(e.ro.trimmer,e.ro.stopWordFilter,e.ro.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.ro.stemmer))},e.ro.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.ro.trimmer=e.trimmerSupport.generateTrimmer(e.ro.wordCharacters),e.Pipeline.registerFunction(e.ro.trimmer,"trimmer-ro"),e.ro.stemmer=function(){var i=e.stemmerSupport.Among,r=e.stemmerSupport.SnowballProgram,n=new function(){function e(e,i){L.eq_s(1,e)&&(L.ket=L.cursor,L.in_grouping(W,97,259)&&L.slice_from(i))}function n(){for(var i,r;;){if(i=L.cursor,L.in_grouping(W,97,259)&&(r=L.cursor,L.bra=r,e("u","U"),L.cursor=r,e("i","I")),L.cursor=i,L.cursor>=L.limit)break;L.cursor++}}function t(){if(L.out_grouping(W,97,259)){for(;!L.in_grouping(W,97,259);){if(L.cursor>=L.limit)return!0;L.cursor++}return!1}return!0}function a(){if(L.in_grouping(W,97,259))for(;!L.out_grouping(W,97,259);){if(L.cursor>=L.limit)return!0;L.cursor++}return!1}function o(){var e,i,r=L.cursor;if(L.in_grouping(W,97,259)){if(e=L.cursor,!t())return void(h=L.cursor);if(L.cursor=e,!a())return void(h=L.cursor)}L.cursor=r,L.out_grouping(W,97,259)&&(i=L.cursor,t()&&(L.cursor=i,L.in_grouping(W,97,259)&&L.cursor<L.limit&&L.cursor++),h=L.cursor)}function u(){for(;!L.in_grouping(W,97,259);){if(L.cursor>=L.limit)return!1;L.cursor++}for(;!L.out_grouping(W,97,259);){if(L.cursor>=L.limit)return!1;L.cursor++}return!0}function c(){var e=L.cursor;h=L.limit,k=h,g=h,o(),L.cursor=e,u()&&(k=L.cursor,u()&&(g=L.cursor))}function s(){for(var e;;){if(L.bra=L.cursor,e=L.find_among(z,3))switch(L.ket=L.cursor,e){case 1:L.slice_from("i");continue;case 2:L.slice_from("u");continue;case 3:if(L.cursor>=L.limit)break;L.cursor++;continue}break}}function w(){return h<=L.cursor}function m(){return k<=L.cursor}function l(){return g<=L.cursor}function f(){var e,i;if(L.ket=L.cursor,(e=L.find_among_b(C,16))&&(L.bra=L.cursor,m()))switch(e){case 1:L.slice_del();break;case 2:L.slice_from("a");break;case 3:L.slice_from("e");break;case 4:L.slice_from("i");break;case 5:i=L.limit-L.cursor,L.eq_s_b(2,"ab")||(L.cursor=L.limit-i,L.slice_from("i"));break;case 6:L.slice_from("at");break;case 7:L.slice_from("aţi")}}function p(){var e,i=L.limit-L.cursor;if(L.ket=L.cursor,(e=L.find_among_b(P,46))&&(L.bra=L.cursor,m())){switch(e){case 1:L.slice_from("abil");break;case 2:L.slice_from("ibil");break;case 3:L.slice_from("iv");break;case 4:L.slice_from("ic");break;case 5:L.slice_from("at");break;case 6:L.slice_from("it")}return _=!0,L.cursor=L.limit-i,!0}return!1}function d(){var e,i;for(_=!1;;)if(i=L.limit-L.cursor,!p()){L.cursor=L.limit-i;break}if(L.ket=L.cursor,(e=L.find_among_b(F,62))&&(L.bra=L.cursor,l())){switch(e){case 1:L.slice_del();break;case 2:L.eq_s_b(1,"ţ")&&(L.bra=L.cursor,L.slice_from("t"));break;case 3:L.slice_from("ist")}_=!0}}function b(){var e,i,r;if(L.cursor>=h){if(i=L.limit_backward,L.limit_backward=h,L.ket=L.cursor,e=L.find_among_b(q,94))switch(L.bra=L.cursor,e){case 1:if(r=L.limit-L.cursor,!L.out_grouping_b(W,97,259)&&(L.cursor=L.limit-r,!L.eq_s_b(1,"u")))break;case 2:L.slice_del()}L.limit_backward=i}}function v(){var e;L.ket=L.cursor,(e=L.find_among_b(S,5))&&(L.bra=L.cursor,w()&&1==e&&L.slice_del())}var _,g,k,h,z=[new i("",-1,3),new i("I",0,1),new i("U",0,2)],C=[new i("ea",-1,3),new i("aţia",-1,7),new i("aua",-1,2),new i("iua",-1,4),new i("aţie",-1,7),new i("ele",-1,3),new i("ile",-1,5),new i("iile",6,4),new i("iei",-1,4),new i("atei",-1,6),new i("ii",-1,4),new i("ului",-1,1),new i("ul",-1,1),new i("elor",-1,3),new i("ilor",-1,4),new i("iilor",14,4)],P=[new i("icala",-1,4),new i("iciva",-1,4),new i("ativa",-1,5),new i("itiva",-1,6),new i("icale",-1,4),new i("aţiune",-1,5),new i("iţiune",-1,6),new i("atoare",-1,5),new i("itoare",-1,6),new i("ătoare",-1,5),new i("icitate",-1,4),new i("abilitate",-1,1),new i("ibilitate",-1,2),new i("ivitate",-1,3),new i("icive",-1,4),new i("ative",-1,5),new i("itive",-1,6),new i("icali",-1,4),new i("atori",-1,5),new i("icatori",18,4),new i("itori",-1,6),new i("ători",-1,5),new i("icitati",-1,4),new i("abilitati",-1,1),new i("ivitati",-1,3),new i("icivi",-1,4),new i("ativi",-1,5),new i("itivi",-1,6),new i("icităi",-1,4),new i("abilităi",-1,1),new i("ivităi",-1,3),new i("icităţi",-1,4),new i("abilităţi",-1,1),new i("ivităţi",-1,3),new i("ical",-1,4),new i("ator",-1,5),new i("icator",35,4),new i("itor",-1,6),new i("ător",-1,5),new i("iciv",-1,4),new i("ativ",-1,5),new i("itiv",-1,6),new i("icală",-1,4),new i("icivă",-1,4),new i("ativă",-1,5),new i("itivă",-1,6)],F=[new i("ica",-1,1),new i("abila",-1,1),new i("ibila",-1,1),new i("oasa",-1,1),new i("ata",-1,1),new i("ita",-1,1),new i("anta",-1,1),new i("ista",-1,3),new i("uta",-1,1),new i("iva",-1,1),new i("ic",-1,1),new i("ice",-1,1),new i("abile",-1,1),new i("ibile",-1,1),new i("isme",-1,3),new i("iune",-1,2),new i("oase",-1,1),new i("ate",-1,1),new i("itate",17,1),new i("ite",-1,1),new i("ante",-1,1),new i("iste",-1,3),new i("ute",-1,1),new i("ive",-1,1),new i("ici",-1,1),new i("abili",-1,1),new i("ibili",-1,1),new i("iuni",-1,2),new i("atori",-1,1),new i("osi",-1,1),new i("ati",-1,1),new i("itati",30,1),new i("iti",-1,1),new i("anti",-1,1),new i("isti",-1,3),new i("uti",-1,1),new i("işti",-1,3),new i("ivi",-1,1),new i("ităi",-1,1),new i("oşi",-1,1),new i("ităţi",-1,1),new i("abil",-1,1),new i("ibil",-1,1),new i("ism",-1,3),new i("ator",-1,1),new i("os",-1,1),new i("at",-1,1),new i("it",-1,1),new i("ant",-1,1),new i("ist",-1,3),new i("ut",-1,1),new i("iv",-1,1),new i("ică",-1,1),new i("abilă",-1,1),new i("ibilă",-1,1),new i("oasă",-1,1),new i("ată",-1,1),new i("ită",-1,1),new i("antă",-1,1),new i("istă",-1,3),new i("ută",-1,1),new i("ivă",-1,1)],q=[new i("ea",-1,1),new i("ia",-1,1),new i("esc",-1,1),new i("ăsc",-1,1),new i("ind",-1,1),new i("ând",-1,1),new i("are",-1,1),new i("ere",-1,1),new i("ire",-1,1),new i("âre",-1,1),new i("se",-1,2),new i("ase",10,1),new i("sese",10,2),new i("ise",10,1),new i("use",10,1),new i("âse",10,1),new i("eşte",-1,1),new i("ăşte",-1,1),new i("eze",-1,1),new i("ai",-1,1),new i("eai",19,1),new i("iai",19,1),new i("sei",-1,2),new i("eşti",-1,1),new i("ăşti",-1,1),new i("ui",-1,1),new i("ezi",-1,1),new i("âi",-1,1),new i("aşi",-1,1),new i("seşi",-1,2),new i("aseşi",29,1),new i("seseşi",29,2),new i("iseşi",29,1),new i("useşi",29,1),new i("âseşi",29,1),new i("işi",-1,1),new i("uşi",-1,1),new i("âşi",-1,1),new i("aţi",-1,2),new i("eaţi",38,1),new i("iaţi",38,1),new i("eţi",-1,2),new i("iţi",-1,2),new i("âţi",-1,2),new i("arăţi",-1,1),new i("serăţi",-1,2),new i("aserăţi",45,1),new i("seserăţi",45,2),new i("iserăţi",45,1),new i("userăţi",45,1),new i("âserăţi",45,1),new i("irăţi",-1,1),new i("urăţi",-1,1),new i("ârăţi",-1,1),new i("am",-1,1),new i("eam",54,1),new i("iam",54,1),new i("em",-1,2),new i("asem",57,1),new i("sesem",57,2),new i("isem",57,1),new i("usem",57,1),new i("âsem",57,1),new i("im",-1,2),new i("âm",-1,2),new i("ăm",-1,2),new i("arăm",65,1),new i("serăm",65,2),new i("aserăm",67,1),new i("seserăm",67,2),new i("iserăm",67,1),new i("userăm",67,1),new i("âserăm",67,1),new i("irăm",65,1),new i("urăm",65,1),new i("ârăm",65,1),new i("au",-1,1),new i("eau",76,1),new i("iau",76,1),new i("indu",-1,1),new i("ându",-1,1),new i("ez",-1,1),new i("ească",-1,1),new i("ară",-1,1),new i("seră",-1,2),new i("aseră",84,1),new i("seseră",84,2),new i("iseră",84,1),new i("useră",84,1),new i("âseră",84,1),new i("iră",-1,1),new i("ură",-1,1),new i("âră",-1,1),new i("ează",-1,1)],S=[new i("a",-1,1),new i("e",-1,1),new i("ie",1,1),new i("i",-1,1),new i("ă",-1,1)],W=[17,65,16,0,0,0,0,0,0,0,0,0,0,0,0,0,2,32,0,0,4],L=new r;this.setCurrent=function(e){L.setCurrent(e)},this.getCurrent=function(){return L.getCurrent()},this.stem=function(){var e=L.cursor;return n(),L.cursor=e,c(),L.limit_backward=e,L.cursor=L.limit,f(),L.cursor=L.limit,d(),L.cursor=L.limit,_||(L.cursor=L.limit,b(),L.cursor=L.limit),v(),L.cursor=L.limit_backward,s(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return n.setCurrent(e),n.stem(),n.getCurrent()}):(n.setCurrent(e),n.stem(),n.getCurrent())}}(),e.Pipeline.registerFunction(e.ro.stemmer,"stemmer-ro"),e.ro.stopWordFilter=e.generateStopWordFilter("acea aceasta această aceea acei aceia acel acela acele acelea acest acesta aceste acestea aceşti aceştia acolo acord acum ai aia aibă aici al ale alea altceva altcineva am ar are asemenea asta astea astăzi asupra au avea avem aveţi azi aş aşadar aţi bine bucur bună ca care caut ce cel ceva chiar cinci cine cineva contra cu cum cumva curând curînd când cât câte câtva câţi cînd cît cîte cîtva cîţi că căci cărei căror cărui către da dacă dar datorită dată dau de deci deja deoarece departe deşi din dinaintea dintr- dintre doi doilea două drept după dă ea ei el ele eram este eu eşti face fata fi fie fiecare fii fim fiu fiţi frumos fără graţie halbă iar ieri la le li lor lui lângă lîngă mai mea mei mele mereu meu mi mie mine mult multă mulţi mulţumesc mâine mîine mă ne nevoie nici nicăieri nimeni nimeri nimic nişte noastre noastră noi noroc nostru nouă noştri nu opt ori oricare orice oricine oricum oricând oricât oricînd oricît oriunde patra patru patrulea pe pentru peste pic poate pot prea prima primul prin puţin puţina puţină până pînă rog sa sale sau se spate spre sub sunt suntem sunteţi sută sînt sîntem sînteţi să săi său ta tale te timp tine toate toată tot totuşi toţi trei treia treilea tu tăi tău un una unde undeva unei uneia unele uneori unii unor unora unu unui unuia unul vi voastre voastră voi vostru vouă voştri vreme vreo vreun vă zece zero zi zice îi îl îmi împotriva în înainte înaintea încotro încât încît între întrucât întrucît îţi ăla ălea ăsta ăstea ăştia şapte şase şi ştiu ţi ţie".split(" ")),e.Pipeline.registerFunction(e.ro.stopWordFilter,"stopWordFilter-ro")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ru.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Russian` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,n){"function"==typeof define&&define.amd?define(n):"object"==typeof exports?module.exports=n():n()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ru=function(){this.pipeline.reset(),this.pipeline.add(e.ru.trimmer,e.ru.stopWordFilter,e.ru.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.ru.stemmer))},e.ru.wordCharacters="Ѐ-҄҇-ԯᴫᵸⷠ-ⷿꙀ-ꚟ︮︯",e.ru.trimmer=e.trimmerSupport.generateTrimmer(e.ru.wordCharacters),e.Pipeline.registerFunction(e.ru.trimmer,"trimmer-ru"),e.ru.stemmer=function(){var n=e.stemmerSupport.Among,r=e.stemmerSupport.SnowballProgram,t=new function(){function e(){for(;!W.in_grouping(S,1072,1103);){if(W.cursor>=W.limit)return!1;W.cursor++}return!0}function t(){for(;!W.out_grouping(S,1072,1103);){if(W.cursor>=W.limit)return!1;W.cursor++}return!0}function w(){b=W.limit,_=b,e()&&(b=W.cursor,t()&&e()&&t()&&(_=W.cursor))}function i(){return _<=W.cursor}function u(e,n){var r,t;if(W.ket=W.cursor,r=W.find_among_b(e,n)){switch(W.bra=W.cursor,r){case 1:if(t=W.limit-W.cursor,!W.eq_s_b(1,"а")&&(W.cursor=W.limit-t,!W.eq_s_b(1,"я")))return!1;case 2:W.slice_del()}return!0}return!1}function o(){return u(h,9)}function s(e,n){var r;return W.ket=W.cursor,!!(r=W.find_among_b(e,n))&&(W.bra=W.cursor,1==r&&W.slice_del(),!0)}function c(){return s(g,26)}function m(){return!!c()&&(u(C,8),!0)}function f(){return s(k,2)}function l(){return u(P,46)}function a(){s(v,36)}function p(){var e;W.ket=W.cursor,(e=W.find_among_b(F,2))&&(W.bra=W.cursor,i()&&1==e&&W.slice_del())}function d(){var e;if(W.ket=W.cursor,e=W.find_among_b(q,4))switch(W.bra=W.cursor,e){case 1:if(W.slice_del(),W.ket=W.cursor,!W.eq_s_b(1,"н"))break;W.bra=W.cursor;case 2:if(!W.eq_s_b(1,"н"))break;case 3:W.slice_del()}}var _,b,h=[new n("в",-1,1),new n("ив",0,2),new n("ыв",0,2),new n("вши",-1,1),new n("ивши",3,2),new n("ывши",3,2),new n("вшись",-1,1),new n("ившись",6,2),new n("ывшись",6,2)],g=[new n("ее",-1,1),new n("ие",-1,1),new n("ое",-1,1),new n("ые",-1,1),new n("ими",-1,1),new n("ыми",-1,1),new n("ей",-1,1),new n("ий",-1,1),new n("ой",-1,1),new n("ый",-1,1),new n("ем",-1,1),new n("им",-1,1),new n("ом",-1,1),new n("ым",-1,1),new n("его",-1,1),new n("ого",-1,1),new n("ему",-1,1),new n("ому",-1,1),new n("их",-1,1),new n("ых",-1,1),new n("ею",-1,1),new n("ою",-1,1),new n("ую",-1,1),new n("юю",-1,1),new n("ая",-1,1),new n("яя",-1,1)],C=[new n("ем",-1,1),new n("нн",-1,1),new n("вш",-1,1),new n("ивш",2,2),new n("ывш",2,2),new n("щ",-1,1),new n("ющ",5,1),new n("ующ",6,2)],k=[new n("сь",-1,1),new n("ся",-1,1)],P=[new n("ла",-1,1),new n("ила",0,2),new n("ыла",0,2),new n("на",-1,1),new n("ена",3,2),new n("ете",-1,1),new n("ите",-1,2),new n("йте",-1,1),new n("ейте",7,2),new n("уйте",7,2),new n("ли",-1,1),new n("или",10,2),new n("ыли",10,2),new n("й",-1,1),new n("ей",13,2),new n("уй",13,2),new n("л",-1,1),new n("ил",16,2),new n("ыл",16,2),new n("ем",-1,1),new n("им",-1,2),new n("ым",-1,2),new n("н",-1,1),new n("ен",22,2),new n("ло",-1,1),new n("ило",24,2),new n("ыло",24,2),new n("но",-1,1),new n("ено",27,2),new n("нно",27,1),new n("ет",-1,1),new n("ует",30,2),new n("ит",-1,2),new n("ыт",-1,2),new n("ют",-1,1),new n("уют",34,2),new n("ят",-1,2),new n("ны",-1,1),new n("ены",37,2),new n("ть",-1,1),new n("ить",39,2),new n("ыть",39,2),new n("ешь",-1,1),new n("ишь",-1,2),new n("ю",-1,2),new n("ую",44,2)],v=[new n("а",-1,1),new n("ев",-1,1),new n("ов",-1,1),new n("е",-1,1),new n("ие",3,1),new n("ье",3,1),new n("и",-1,1),new n("еи",6,1),new n("ии",6,1),new n("ами",6,1),new n("ями",6,1),new n("иями",10,1),new n("й",-1,1),new n("ей",12,1),new n("ией",13,1),new n("ий",12,1),new n("ой",12,1),new n("ам",-1,1),new n("ем",-1,1),new n("ием",18,1),new n("ом",-1,1),new n("ям",-1,1),new n("иям",21,1),new n("о",-1,1),new n("у",-1,1),new n("ах",-1,1),new n("ях",-1,1),new n("иях",26,1),new n("ы",-1,1),new n("ь",-1,1),new n("ю",-1,1),new n("ию",30,1),new n("ью",30,1),new n("я",-1,1),new n("ия",33,1),new n("ья",33,1)],F=[new n("ост",-1,1),new n("ость",-1,1)],q=[new n("ейше",-1,1),new n("н",-1,2),new n("ейш",-1,1),new n("ь",-1,3)],S=[33,65,8,232],W=new r;this.setCurrent=function(e){W.setCurrent(e)},this.getCurrent=function(){return W.getCurrent()},this.stem=function(){return w(),W.cursor=W.limit,!(W.cursor<b)&&(W.limit_backward=b,o()||(W.cursor=W.limit,f()||(W.cursor=W.limit),m()||(W.cursor=W.limit,l()||(W.cursor=W.limit,a()))),W.cursor=W.limit,W.ket=W.cursor,W.eq_s_b(1,"и")?(W.bra=W.cursor,W.slice_del()):W.cursor=W.limit,p(),W.cursor=W.limit,d(),!0)}};return function(e){return"function"==typeof e.update?e.update(function(e){return t.setCurrent(e),t.stem(),t.getCurrent()}):(t.setCurrent(e),t.stem(),t.getCurrent())}}(),e.Pipeline.registerFunction(e.ru.stemmer,"stemmer-ru"),e.ru.stopWordFilter=e.generateStopWordFilter("алло без близко более больше будем будет будете будешь будто буду будут будь бы бывает бывь был была были было быть в важная важное важные важный вам вами вас ваш ваша ваше ваши вверх вдали вдруг ведь везде весь вниз внизу во вокруг вон восемнадцатый восемнадцать восемь восьмой вот впрочем времени время все всегда всего всем всеми всему всех всею всю всюду вся всё второй вы г где говорил говорит год года году да давно даже далеко дальше даром два двадцатый двадцать две двенадцатый двенадцать двух девятнадцатый девятнадцать девятый девять действительно дел день десятый десять для до довольно долго должно другая другие других друго другое другой е его ее ей ему если есть еще ещё ею её ж же жизнь за занят занята занято заняты затем зато зачем здесь значит и из или им именно иметь ими имя иногда их к каждая каждое каждые каждый кажется как какая какой кем когда кого ком кому конечно которая которого которой которые который которых кроме кругом кто куда лет ли лишь лучше люди м мало между меля менее меньше меня миллионов мимо мира мне много многочисленная многочисленное многочисленные многочисленный мной мною мог могут мож может можно можхо мои мой мор мочь моя моё мы на наверху над надо назад наиболее наконец нам нами нас начала наш наша наше наши не него недавно недалеко нее ней нельзя нем немного нему непрерывно нередко несколько нет нею неё ни нибудь ниже низко никогда никуда ними них ничего но ну нужно нх о об оба обычно один одиннадцатый одиннадцать однажды однако одного одной около он она они оно опять особенно от отовсюду отсюда очень первый перед по под пожалуйста позже пока пор пора после посреди потом потому почему почти прекрасно при про просто против процентов пятнадцатый пятнадцать пятый пять раз разве рано раньше рядом с сам сама сами самим самими самих само самого самой самом самому саму свое своего своей свои своих свою сеаой себе себя сегодня седьмой сейчас семнадцатый семнадцать семь сих сказал сказала сказать сколько слишком сначала снова со собой собою совсем спасибо стал суть т та так такая также такие такое такой там твой твоя твоё те тебе тебя тем теми теперь тех то тобой тобою тогда того тоже только том тому тот тою третий три тринадцатый тринадцать ту туда тут ты тысяч у уж уже уметь хорошо хотеть хоть хотя хочешь часто чаще чего человек чем чему через четвертый четыре четырнадцатый четырнадцать что чтоб чтобы чуть шестнадцатый шестнадцать шестой шесть эта эти этим этими этих это этого этой это�� этому этот эту я \ufeffа".split(" ")),e.Pipeline.registerFunction(e.ru.stopWordFilter,"stopWordFilter-ru")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(r,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(r.lunr)}(this,function(){return function(r){r.stemmerSupport={Among:function(r,t,i,s){if(this.toCharArray=function(r){for(var t=r.length,i=new Array(t),s=0;s<t;s++)i[s]=r.charCodeAt(s);return i},!r&&""!=r||!t&&0!=t||!i)throw"Bad Among initialisation: s:"+r+", substring_i: "+t+", result: "+i;this.s_size=r.length,this.s=this.toCharArray(r),this.substring_i=t,this.result=i,this.method=s},SnowballProgram:function(){var r;return{bra:0,ket:0,limit:0,cursor:0,limit_backward:0,setCurrent:function(t){r=t,this.cursor=0,this.limit=t.length,this.limit_backward=0,this.bra=this.cursor,this.ket=this.limit},getCurrent:function(){var t=r;return r=null,t},in_grouping:function(t,i,s){if(this.cursor<this.limit){var e=r.charCodeAt(this.cursor);if(e<=s&&e>=i&&(e-=i,t[e>>3]&1<<(7&e)))return this.cursor++,!0}return!1},in_grouping_b:function(t,i,s){if(this.cursor>this.limit_backward){var e=r.charCodeAt(this.cursor-1);if(e<=s&&e>=i&&(e-=i,t[e>>3]&1<<(7&e)))return this.cursor--,!0}return!1},out_grouping:function(t,i,s){if(this.cursor<this.limit){var e=r.charCodeAt(this.cursor);if(e>s||e<i)return this.cursor++,!0;if(e-=i,!(t[e>>3]&1<<(7&e)))return this.cursor++,!0}return!1},out_grouping_b:function(t,i,s){if(this.cursor>this.limit_backward){var e=r.charCodeAt(this.cursor-1);if(e>s||e<i)return this.cursor--,!0;if(e-=i,!(t[e>>3]&1<<(7&e)))return this.cursor--,!0}return!1},eq_s:function(t,i){if(this.limit-this.cursor<t)return!1;for(var s=0;s<t;s++)if(r.charCodeAt(this.cursor+s)!=i.charCodeAt(s))return!1;return this.cursor+=t,!0},eq_s_b:function(t,i){if(this.cursor-this.limit_backward<t)return!1;for(var s=0;s<t;s++)if(r.charCodeAt(this.cursor-t+s)!=i.charCodeAt(s))return!1;return this.cursor-=t,!0},find_among:function(t,i){for(var s=0,e=i,n=this.cursor,u=this.limit,o=0,h=0,c=!1;;){for(var a=s+(e-s>>1),f=0,l=o<h?o:h,_=t[a],m=l;m<_.s_size;m++){if(n+l==u){f=-1;break}if(f=r.charCodeAt(n+l)-_.s[m])break;l++}if(f<0?(e=a,h=l):(s=a,o=l),e-s<=1){if(s>0||e==s||c)break;c=!0}}for(;;){var _=t[s];if(o>=_.s_size){if(this.cursor=n+_.s_size,!_.method)return _.result;var b=_.method();if(this.cursor=n+_.s_size,b)return _.result}if((s=_.substring_i)<0)return 0}},find_among_b:function(t,i){for(var s=0,e=i,n=this.cursor,u=this.limit_backward,o=0,h=0,c=!1;;){for(var a=s+(e-s>>1),f=0,l=o<h?o:h,_=t[a],m=_.s_size-1-l;m>=0;m--){if(n-l==u){f=-1;break}if(f=r.charCodeAt(n-1-l)-_.s[m])break;l++}if(f<0?(e=a,h=l):(s=a,o=l),e-s<=1){if(s>0||e==s||c)break;c=!0}}for(;;){var _=t[s];if(o>=_.s_size){if(this.cursor=n-_.s_size,!_.method)return _.result;var b=_.method();if(this.cursor=n-_.s_size,b)return _.result}if((s=_.substring_i)<0)return 0}},replace_s:function(t,i,s){var e=s.length-(i-t),n=r.substring(0,t),u=r.substring(i);return r=n+s+u,this.limit+=e,this.cursor>=i?this.cursor+=e:this.cursor>t&&(this.cursor=t),e},slice_check:function(){if(this.bra<0||this.bra>this.ket||this.ket>this.limit||this.limit>r.length)throw"faulty slice operation"},slice_from:function(r){this.slice_check(),this.replace_s(this.bra,this.ket,r)},slice_del:function(){this.slice_from("")},insert:function(r,t,i){var s=this.replace_s(r,t,i);r<=this.bra&&(this.bra+=s),r<=this.ket&&(this.ket+=s)},slice_to:function(){return this.slice_check(),r.substring(this.bra,this.ket)},eq_v_b:function(r){return this.eq_s_b(r.length,r)}}}},r.trimmerSupport={generateTrimmer:function(r){var t=new RegExp("^[^"+r+"]+"),i=new RegExp("[^"+r+"]+$");return function(r){return"function"==typeof r.update?r.update(function(r){return r.replace(t,"").replace(i,"")}):r.replace(t,"").replace(i,"")}}}}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.sv.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Swedish` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.sv=function(){this.pipeline.reset(),this.pipeline.add(e.sv.trimmer,e.sv.stopWordFilter,e.sv.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.sv.stemmer))},e.sv.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",e.sv.trimmer=e.trimmerSupport.generateTrimmer(e.sv.wordCharacters),e.Pipeline.registerFunction(e.sv.trimmer,"trimmer-sv"),e.sv.stemmer=function(){var r=e.stemmerSupport.Among,n=e.stemmerSupport.SnowballProgram,t=new function(){function e(){var e,r=w.cursor+3;if(o=w.limit,0<=r||r<=w.limit){for(a=r;;){if(e=w.cursor,w.in_grouping(l,97,246)){w.cursor=e;break}if(w.cursor=e,w.cursor>=w.limit)return;w.cursor++}for(;!w.out_grouping(l,97,246);){if(w.cursor>=w.limit)return;w.cursor++}o=w.cursor,o<a&&(o=a)}}function t(){var e,r=w.limit_backward;if(w.cursor>=o&&(w.limit_backward=o,w.cursor=w.limit,w.ket=w.cursor,e=w.find_among_b(u,37),w.limit_backward=r,e))switch(w.bra=w.cursor,e){case 1:w.slice_del();break;case 2:w.in_grouping_b(d,98,121)&&w.slice_del()}}function i(){var e=w.limit_backward;w.cursor>=o&&(w.limit_backward=o,w.cursor=w.limit,w.find_among_b(c,7)&&(w.cursor=w.limit,w.ket=w.cursor,w.cursor>w.limit_backward&&(w.bra=--w.cursor,w.slice_del())),w.limit_backward=e)}function s(){var e,r;if(w.cursor>=o){if(r=w.limit_backward,w.limit_backward=o,w.cursor=w.limit,w.ket=w.cursor,e=w.find_among_b(m,5))switch(w.bra=w.cursor,e){case 1:w.slice_del();break;case 2:w.slice_from("lös");break;case 3:w.slice_from("full")}w.limit_backward=r}}var a,o,u=[new r("a",-1,1),new r("arna",0,1),new r("erna",0,1),new r("heterna",2,1),new r("orna",0,1),new r("ad",-1,1),new r("e",-1,1),new r("ade",6,1),new r("ande",6,1),new r("arne",6,1),new r("are",6,1),new r("aste",6,1),new r("en",-1,1),new r("anden",12,1),new r("aren",12,1),new r("heten",12,1),new r("ern",-1,1),new r("ar",-1,1),new r("er",-1,1),new r("heter",18,1),new r("or",-1,1),new r("s",-1,2),new r("as",21,1),new r("arnas",22,1),new r("ernas",22,1),new r("ornas",22,1),new r("es",21,1),new r("ades",26,1),new r("andes",26,1),new r("ens",21,1),new r("arens",29,1),new r("hetens",29,1),new r("erns",21,1),new r("at",-1,1),new r("andet",-1,1),new r("het",-1,1),new r("ast",-1,1)],c=[new r("dd",-1,-1),new r("gd",-1,-1),new r("nn",-1,-1),new r("dt",-1,-1),new r("gt",-1,-1),new r("kt",-1,-1),new r("tt",-1,-1)],m=[new r("ig",-1,1),new r("lig",0,1),new r("els",-1,1),new r("fullt",-1,3),new r("löst",-1,2)],l=[17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,24,0,32],d=[119,127,149],w=new n;this.setCurrent=function(e){w.setCurrent(e)},this.getCurrent=function(){return w.getCurrent()},this.stem=function(){var r=w.cursor;return e(),w.limit_backward=r,w.cursor=w.limit,t(),w.cursor=w.limit,i(),w.cursor=w.limit,s(),!0}};return function(e){return"function"==typeof e.update?e.update(function(e){return t.setCurrent(e),t.stem(),t.getCurrent()}):(t.setCurrent(e),t.stem(),t.getCurrent())}}(),e.Pipeline.registerFunction(e.sv.stemmer,"stemmer-sv"),e.sv.stopWordFilter=e.generateStopWordFilter("alla allt att av blev bli blir blivit de dem den denna deras dess dessa det detta dig din dina ditt du där då efter ej eller en er era ert ett från för ha hade han hans har henne hennes hon honom hur här i icke ingen inom inte jag ju kan kunde man med mellan men mig min mina mitt mot mycket ni nu när någon något några och om oss på samma sedan sig sin sina sitta själv skulle som så sådan sådana sådant till under upp ut utan vad var vara varför varit varje vars vart vem vi vid vilka vilkas vilken vilket vår våra vårt än är åt över".split(" ")),e.Pipeline.registerFunction(e.sv.stopWordFilter,"stopWordFilter-sv")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.ta.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ta=function(){this.pipeline.reset(),this.pipeline.add(e.ta.trimmer,e.ta.stopWordFilter,e.ta.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.ta.stemmer))},e.ta.wordCharacters="஀-உஊ-ஏஐ-ஙச-ட஠-னப-யர-ஹ஺-ிீ-௉ொ-௏ௐ-௙௚-௟௠-௩௪-௯௰-௹௺-௿a-zA-Za-zA-Z0-90-9",e.ta.trimmer=e.trimmerSupport.generateTrimmer(e.ta.wordCharacters),e.Pipeline.registerFunction(e.ta.trimmer,"trimmer-ta"),e.ta.stopWordFilter=e.generateStopWordFilter("அங்கு அங்கே அது அதை அந்த அவர் அவர்கள் அவள் அவன் அவை ஆக ஆகவே ஆகையால் ஆதலால் ஆதலினால் ஆனாலும் ஆனால் இங்கு இங்கே இது இதை இந்த இப்படி இவர் இவர்கள் இவள் இவன் இவை இவ்வளவு உனக்கு உனது உன் உன்னால் எங்கு எங்கே எது எதை எந்த எப்படி எவர் எவர்கள் எவள் எவன் எவை எவ்வளவு எனக்கு எனது எனவே என் என்ன என்னால் ஏது ஏன் தனது தன்னால் தானே தான் நாங்கள் நாம் நான் நீ நீங்கள்".split(" ")),e.ta.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var t=e.wordcut;t.init(),e.ta.tokenizer=function(r){if(!arguments.length||null==r||void 0==r)return[];if(Array.isArray(r))return r.map(function(t){return isLunr2?new e.Token(t.toLowerCase()):t.toLowerCase()});var i=r.toString().toLowerCase().replace(/^\s+/,"");return t.cut(i).split("|")},e.Pipeline.registerFunction(e.ta.stemmer,"stemmer-ta"),e.Pipeline.registerFunction(e.ta.stopWordFilter,"stopWordFilter-ta")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.th.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var r="2"==e.version[0];e.th=function(){this.pipeline.reset(),this.pipeline.add(e.th.trimmer),r?this.tokenizer=e.th.tokenizer:(e.tokenizer&&(e.tokenizer=e.th.tokenizer),this.tokenizerFn&&(this.tokenizerFn=e.th.tokenizer))},e.th.wordCharacters="[฀-๿]",e.th.trimmer=e.trimmerSupport.generateTrimmer(e.th.wordCharacters),e.Pipeline.registerFunction(e.th.trimmer,"trimmer-th");var t=e.wordcut;t.init(),e.th.tokenizer=function(i){if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(t){return r?new e.Token(t):t});var n=i.toString().replace(/^\s+/,"");return t.cut(n).split("|")}}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.tr.min.js ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*!
2
+ * Lunr languages, `Turkish` language
3
+ * https://github.com/MihaiValentin/lunr-languages
4
+ *
5
+ * Copyright 2014, Mihai Valentin
6
+ * http://www.mozilla.org/MPL/
7
+ */
8
+ /*!
9
+ * based on
10
+ * Snowball JavaScript Library v0.3
11
+ * http://code.google.com/p/urim/
12
+ * http://snowball.tartarus.org/
13
+ *
14
+ * Copyright 2010, Oleg Mazko
15
+ * http://www.mozilla.org/MPL/
16
+ */
17
+
18
+ !function(r,i){"function"==typeof define&&define.amd?define(i):"object"==typeof exports?module.exports=i():i()(r.lunr)}(this,function(){return function(r){if(void 0===r)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===r.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");r.tr=function(){this.pipeline.reset(),this.pipeline.add(r.tr.trimmer,r.tr.stopWordFilter,r.tr.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(r.tr.stemmer))},r.tr.wordCharacters="A-Za-zªºÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿⁱⁿₐ-ₜKÅℲⅎⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꞭꞰ-ꞷꟷ-ꟿꬰ-ꭚꭜ-ꭤff-stA-Za-z",r.tr.trimmer=r.trimmerSupport.generateTrimmer(r.tr.wordCharacters),r.Pipeline.registerFunction(r.tr.trimmer,"trimmer-tr"),r.tr.stemmer=function(){var i=r.stemmerSupport.Among,e=r.stemmerSupport.SnowballProgram,n=new function(){function r(r,i,e){for(;;){var n=Dr.limit-Dr.cursor;if(Dr.in_grouping_b(r,i,e)){Dr.cursor=Dr.limit-n;break}if(Dr.cursor=Dr.limit-n,Dr.cursor<=Dr.limit_backward)return!1;Dr.cursor--}return!0}function n(){var i,e;i=Dr.limit-Dr.cursor,r(Wr,97,305);for(var n=0;n<Br.length;n++){e=Dr.limit-Dr.cursor;var t=Br[n];if(Dr.eq_s_b(1,t[0])&&r(t[1],t[2],t[3]))return Dr.cursor=Dr.limit-i,!0;Dr.cursor=Dr.limit-e}return Dr.cursor=Dr.limit-e,!(!Dr.eq_s_b(1,"ü")||!r(Zr,246,252))&&(Dr.cursor=Dr.limit-i,!0)}function t(r,i){var e,n=Dr.limit-Dr.cursor;return r()&&(Dr.cursor=Dr.limit-n,Dr.cursor>Dr.limit_backward&&(Dr.cursor--,e=Dr.limit-Dr.cursor,i()))?(Dr.cursor=Dr.limit-e,!0):(Dr.cursor=Dr.limit-n,r()?(Dr.cursor=Dr.limit-n,!1):(Dr.cursor=Dr.limit-n,!(Dr.cursor<=Dr.limit_backward)&&(Dr.cursor--,!!i()&&(Dr.cursor=Dr.limit-n,!0))))}function u(r){return t(r,function(){return Dr.in_grouping_b(Wr,97,305)})}function o(){return u(function(){return Dr.eq_s_b(1,"n")})}function s(){return u(function(){return Dr.eq_s_b(1,"s")})}function c(){return u(function(){return Dr.eq_s_b(1,"y")})}function l(){return t(function(){return Dr.in_grouping_b(Lr,105,305)},function(){return Dr.out_grouping_b(Wr,97,305)})}function a(){return Dr.find_among_b(ur,10)&&l()}function m(){return n()&&Dr.in_grouping_b(Lr,105,305)&&s()}function d(){return Dr.find_among_b(or,2)}function f(){return n()&&Dr.in_grouping_b(Lr,105,305)&&c()}function b(){return n()&&Dr.find_among_b(sr,4)}function w(){return n()&&Dr.find_among_b(cr,4)&&o()}function _(){return n()&&Dr.find_among_b(lr,2)&&c()}function k(){return n()&&Dr.find_among_b(ar,2)}function p(){return n()&&Dr.find_among_b(mr,4)}function g(){return n()&&Dr.find_among_b(dr,2)}function y(){return n()&&Dr.find_among_b(fr,4)}function z(){return n()&&Dr.find_among_b(br,2)}function v(){return n()&&Dr.find_among_b(wr,2)&&c()}function h(){return Dr.eq_s_b(2,"ki")}function q(){return n()&&Dr.find_among_b(_r,2)&&o()}function C(){return n()&&Dr.find_among_b(kr,4)&&c()}function P(){return n()&&Dr.find_among_b(pr,4)}function F(){return n()&&Dr.find_among_b(gr,4)&&c()}function S(){return Dr.find_among_b(yr,4)}function W(){return n()&&Dr.find_among_b(zr,2)}function L(){return n()&&Dr.find_among_b(vr,4)}function x(){return n()&&Dr.find_among_b(hr,8)}function A(){return Dr.find_among_b(qr,2)}function E(){return n()&&Dr.find_among_b(Cr,32)&&c()}function j(){return Dr.find_among_b(Pr,8)&&c()}function T(){return n()&&Dr.find_among_b(Fr,4)&&c()}function Z(){return Dr.eq_s_b(3,"ken")&&c()}function B(){var r=Dr.limit-Dr.cursor;return!(T()||(Dr.cursor=Dr.limit-r,E()||(Dr.cursor=Dr.limit-r,j()||(Dr.cursor=Dr.limit-r,Z()))))}function D(){if(A()){var r=Dr.limit-Dr.cursor;if(S()||(Dr.cursor=Dr.limit-r,W()||(Dr.cursor=Dr.limit-r,C()||(Dr.cursor=Dr.limit-r,P()||(Dr.cursor=Dr.limit-r,F()||(Dr.cursor=Dr.limit-r))))),T())return!1}return!0}function G(){if(W()){Dr.bra=Dr.cursor,Dr.slice_del();var r=Dr.limit-Dr.cursor;return Dr.ket=Dr.cursor,x()||(Dr.cursor=Dr.limit-r,E()||(Dr.cursor=Dr.limit-r,j()||(Dr.cursor=Dr.limit-r,T()||(Dr.cursor=Dr.limit-r)))),nr=!1,!1}return!0}function H(){if(!L())return!0;var r=Dr.limit-Dr.cursor;return!E()&&(Dr.cursor=Dr.limit-r,!j())}function I(){var r,i=Dr.limit-Dr.cursor;return!(S()||(Dr.cursor=Dr.limit-i,F()||(Dr.cursor=Dr.limit-i,P()||(Dr.cursor=Dr.limit-i,C()))))||(Dr.bra=Dr.cursor,Dr.slice_del(),r=Dr.limit-Dr.cursor,Dr.ket=Dr.cursor,T()||(Dr.cursor=Dr.limit-r),!1)}function J(){var r,i=Dr.limit-Dr.cursor;if(Dr.ket=Dr.cursor,nr=!0,B()&&(Dr.cursor=Dr.limit-i,D()&&(Dr.cursor=Dr.limit-i,G()&&(Dr.cursor=Dr.limit-i,H()&&(Dr.cursor=Dr.limit-i,I()))))){if(Dr.cursor=Dr.limit-i,!x())return;Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,r=Dr.limit-Dr.cursor,S()||(Dr.cursor=Dr.limit-r,W()||(Dr.cursor=Dr.limit-r,C()||(Dr.cursor=Dr.limit-r,P()||(Dr.cursor=Dr.limit-r,F()||(Dr.cursor=Dr.limit-r))))),T()||(Dr.cursor=Dr.limit-r)}Dr.bra=Dr.cursor,Dr.slice_del()}function K(){var r,i,e,n;if(Dr.ket=Dr.cursor,h()){if(r=Dr.limit-Dr.cursor,p())return Dr.bra=Dr.cursor,Dr.slice_del(),i=Dr.limit-Dr.cursor,Dr.ket=Dr.cursor,W()?(Dr.bra=Dr.cursor,Dr.slice_del(),K()):(Dr.cursor=Dr.limit-i,a()&&(Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K()))),!0;if(Dr.cursor=Dr.limit-r,w()){if(Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,e=Dr.limit-Dr.cursor,d())Dr.bra=Dr.cursor,Dr.slice_del();else{if(Dr.cursor=Dr.limit-e,Dr.ket=Dr.cursor,!a()&&(Dr.cursor=Dr.limit-e,!m()&&(Dr.cursor=Dr.limit-e,!K())))return!0;Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K())}return!0}if(Dr.cursor=Dr.limit-r,g()){if(n=Dr.limit-Dr.cursor,d())Dr.bra=Dr.cursor,Dr.slice_del();else if(Dr.cursor=Dr.limit-n,m())Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K());else if(Dr.cursor=Dr.limit-n,!K())return!1;return!0}}return!1}function M(r){if(Dr.ket=Dr.cursor,!g()&&(Dr.cursor=Dr.limit-r,!k()))return!1;var i=Dr.limit-Dr.cursor;if(d())Dr.bra=Dr.cursor,Dr.slice_del();else if(Dr.cursor=Dr.limit-i,m())Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K());else if(Dr.cursor=Dr.limit-i,!K())return!1;return!0}function N(r){if(Dr.ket=Dr.cursor,!z()&&(Dr.cursor=Dr.limit-r,!b()))return!1;var i=Dr.limit-Dr.cursor;return!(!m()&&(Dr.cursor=Dr.limit-i,!d()))&&(Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K()),!0)}function O(){var r,i=Dr.limit-Dr.cursor;return Dr.ket=Dr.cursor,!(!w()&&(Dr.cursor=Dr.limit-i,!v()))&&(Dr.bra=Dr.cursor,Dr.slice_del(),r=Dr.limit-Dr.cursor,Dr.ket=Dr.cursor,!(!W()||(Dr.bra=Dr.cursor,Dr.slice_del(),!K()))||(Dr.cursor=Dr.limit-r,Dr.ket=Dr.cursor,!(a()||(Dr.cursor=Dr.limit-r,m()||(Dr.cursor=Dr.limit-r,K())))||(Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K()),!0)))}function Q(){var r,i,e=Dr.limit-Dr.cursor;if(Dr.ket=Dr.cursor,!p()&&(Dr.cursor=Dr.limit-e,!f()&&(Dr.cursor=Dr.limit-e,!_())))return!1;if(Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,r=Dr.limit-Dr.cursor,a())Dr.bra=Dr.cursor,Dr.slice_del(),i=Dr.limit-Dr.cursor,Dr.ket=Dr.cursor,W()||(Dr.cursor=Dr.limit-i);else if(Dr.cursor=Dr.limit-r,!W())return!0;return Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,K(),!0}function R(){var r,i,e=Dr.limit-Dr.cursor;if(Dr.ket=Dr.cursor,W())return Dr.bra=Dr.cursor,Dr.slice_del(),void K();if(Dr.cursor=Dr.limit-e,Dr.ket=Dr.cursor,q())if(Dr.bra=Dr.cursor,Dr.slice_del(),r=Dr.limit-Dr.cursor,Dr.ket=Dr.cursor,d())Dr.bra=Dr.cursor,Dr.slice_del();else{if(Dr.cursor=Dr.limit-r,Dr.ket=Dr.cursor,!a()&&(Dr.cursor=Dr.limit-r,!m())){if(Dr.cursor=Dr.limit-r,Dr.ket=Dr.cursor,!W())return;if(Dr.bra=Dr.cursor,Dr.slice_del(),!K())return}Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K())}else if(Dr.cursor=Dr.limit-e,!M(e)&&(Dr.cursor=Dr.limit-e,!N(e))){if(Dr.cursor=Dr.limit-e,Dr.ket=Dr.cursor,y())return Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,i=Dr.limit-Dr.cursor,void(a()?(Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K())):(Dr.cursor=Dr.limit-i,W()?(Dr.bra=Dr.cursor,Dr.slice_del(),K()):(Dr.cursor=Dr.limit-i,K())));if(Dr.cursor=Dr.limit-e,!O()){if(Dr.cursor=Dr.limit-e,d())return Dr.bra=Dr.cursor,void Dr.slice_del();Dr.cursor=Dr.limit-e,K()||(Dr.cursor=Dr.limit-e,Q()||(Dr.cursor=Dr.limit-e,Dr.ket=Dr.cursor,(a()||(Dr.cursor=Dr.limit-e,m()))&&(Dr.bra=Dr.cursor,Dr.slice_del(),Dr.ket=Dr.cursor,W()&&(Dr.bra=Dr.cursor,Dr.slice_del(),K()))))}}}function U(){var r;if(Dr.ket=Dr.cursor,r=Dr.find_among_b(Sr,4))switch(Dr.bra=Dr.cursor,r){case 1:Dr.slice_from("p");break;case 2:Dr.slice_from("ç");break;case 3:Dr.slice_from("t");break;case 4:Dr.slice_from("k")}}function V(){for(;;){var r=Dr.limit-Dr.cursor;if(Dr.in_grouping_b(Wr,97,305)){Dr.cursor=Dr.limit-r;break}if(Dr.cursor=Dr.limit-r,Dr.cursor<=Dr.limit_backward)return!1;Dr.cursor--}return!0}function X(r,i,e){if(Dr.cursor=Dr.limit-r,V()){var n=Dr.limit-Dr.cursor;if(!Dr.eq_s_b(1,i)&&(Dr.cursor=Dr.limit-n,!Dr.eq_s_b(1,e)))return!0;Dr.cursor=Dr.limit-r;var t=Dr.cursor;return Dr.insert(Dr.cursor,Dr.cursor,e),Dr.cursor=t,!1}return!0}function Y(){var r=Dr.limit-Dr.cursor;(Dr.eq_s_b(1,"d")||(Dr.cursor=Dr.limit-r,Dr.eq_s_b(1,"g")))&&X(r,"a","ı")&&X(r,"e","i")&&X(r,"o","u")&&X(r,"ö","ü")}function $(){for(var r,i=Dr.cursor,e=2;;){for(r=Dr.cursor;!Dr.in_grouping(Wr,97,305);){if(Dr.cursor>=Dr.limit)return Dr.cursor=r,!(e>0)&&(Dr.cursor=i,!0);Dr.cursor++}e--}}function rr(r,i,e){for(;!Dr.eq_s(i,e);){if(Dr.cursor>=Dr.limit)return!0;Dr.cursor++}return(tr=i)!=Dr.limit||(Dr.cursor=r,!1)}function ir(){var r=Dr.cursor;return!rr(r,2,"ad")||(Dr.cursor=r,!rr(r,5,"soyad"))}function er(){var r=Dr.cursor;return!ir()&&(Dr.limit_backward=r,Dr.cursor=Dr.limit,Y(),Dr.cursor=Dr.limit,U(),!0)}var nr,tr,ur=[new i("m",-1,-1),new i("n",-1,-1),new i("miz",-1,-1),new i("niz",-1,-1),new i("muz",-1,-1),new i("nuz",-1,-1),new i("müz",-1,-1),new i("nüz",-1,-1),new i("mız",-1,-1),new i("nız",-1,-1)],or=[new i("leri",-1,-1),new i("ları",-1,-1)],sr=[new i("ni",-1,-1),new i("nu",-1,-1),new i("nü",-1,-1),new i("nı",-1,-1)],cr=[new i("in",-1,-1),new i("un",-1,-1),new i("ün",-1,-1),new i("ın",-1,-1)],lr=[new i("a",-1,-1),new i("e",-1,-1)],ar=[new i("na",-1,-1),new i("ne",-1,-1)],mr=[new i("da",-1,-1),new i("ta",-1,-1),new i("de",-1,-1),new i("te",-1,-1)],dr=[new i("nda",-1,-1),new i("nde",-1,-1)],fr=[new i("dan",-1,-1),new i("tan",-1,-1),new i("den",-1,-1),new i("ten",-1,-1)],br=[new i("ndan",-1,-1),new i("nden",-1,-1)],wr=[new i("la",-1,-1),new i("le",-1,-1)],_r=[new i("ca",-1,-1),new i("ce",-1,-1)],kr=[new i("im",-1,-1),new i("um",-1,-1),new i("üm",-1,-1),new i("ım",-1,-1)],pr=[new i("sin",-1,-1),new i("sun",-1,-1),new i("sün",-1,-1),new i("sın",-1,-1)],gr=[new i("iz",-1,-1),new i("uz",-1,-1),new i("üz",-1,-1),new i("ız",-1,-1)],yr=[new i("siniz",-1,-1),new i("sunuz",-1,-1),new i("sünüz",-1,-1),new i("sınız",-1,-1)],zr=[new i("lar",-1,-1),new i("ler",-1,-1)],vr=[new i("niz",-1,-1),new i("nuz",-1,-1),new i("nüz",-1,-1),new i("nız",-1,-1)],hr=[new i("dir",-1,-1),new i("tir",-1,-1),new i("dur",-1,-1),new i("tur",-1,-1),new i("dür",-1,-1),new i("tür",-1,-1),new i("dır",-1,-1),new i("tır",-1,-1)],qr=[new i("casına",-1,-1),new i("cesine",-1,-1)],Cr=[new i("di",-1,-1),new i("ti",-1,-1),new i("dik",-1,-1),new i("tik",-1,-1),new i("duk",-1,-1),new i("tuk",-1,-1),new i("dük",-1,-1),new i("tük",-1,-1),new i("dık",-1,-1),new i("tık",-1,-1),new i("dim",-1,-1),new i("tim",-1,-1),new i("dum",-1,-1),new i("tum",-1,-1),new i("düm",-1,-1),new i("tüm",-1,-1),new i("dım",-1,-1),new i("tım",-1,-1),new i("din",-1,-1),new i("tin",-1,-1),new i("dun",-1,-1),new i("tun",-1,-1),new i("dün",-1,-1),new i("tün",-1,-1),new i("dın",-1,-1),new i("tın",-1,-1),new i("du",-1,-1),new i("tu",-1,-1),new i("dü",-1,-1),new i("tü",-1,-1),new i("dı",-1,-1),new i("tı",-1,-1)],Pr=[new i("sa",-1,-1),new i("se",-1,-1),new i("sak",-1,-1),new i("sek",-1,-1),new i("sam",-1,-1),new i("sem",-1,-1),new i("san",-1,-1),new i("sen",-1,-1)],Fr=[new i("miş",-1,-1),new i("muş",-1,-1),new i("müş",-1,-1),new i("mış",-1,-1)],Sr=[new i("b",-1,1),new i("c",-1,2),new i("d",-1,3),new i("ğ",-1,4)],Wr=[17,65,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,8,0,0,0,0,0,0,1],Lr=[1,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,1],xr=[1,64,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],Ar=[17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,130],Er=[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],jr=[17],Tr=[65],Zr=[65],Br=[["a",xr,97,305],["e",Ar,101,252],["ı",Er,97,305],["i",jr,101,105],["o",Tr,111,117],["ö",Zr,246,252],["u",Tr,111,117]],Dr=new e;this.setCurrent=function(r){Dr.setCurrent(r)},this.getCurrent=function(){return Dr.getCurrent()},this.stem=function(){return!!($()&&(Dr.limit_backward=Dr.cursor,Dr.cursor=Dr.limit,J(),Dr.cursor=Dr.limit,nr&&(R(),Dr.cursor=Dr.limit_backward,er())))}};return function(r){return"function"==typeof r.update?r.update(function(r){return n.setCurrent(r),n.stem(),n.getCurrent()}):(n.setCurrent(r),n.stem(),n.getCurrent())}}(),r.Pipeline.registerFunction(r.tr.stemmer,"stemmer-tr"),r.tr.stopWordFilter=r.generateStopWordFilter("acaba altmış altı ama ancak arada aslında ayrıca bana bazı belki ben benden beni benim beri beş bile bin bir biri birkaç birkez birçok birşey birşeyi biz bizden bize bizi bizim bu buna bunda bundan bunlar bunları bunların bunu bunun burada böyle böylece da daha dahi de defa değil diye diğer doksan dokuz dolayı dolayısıyla dört edecek eden ederek edilecek ediliyor edilmesi ediyor elli en etmesi etti ettiği ettiğini eğer gibi göre halen hangi hatta hem henüz hep hepsi her herhangi herkesin hiç hiçbir iki ile ilgili ise itibaren itibariyle için işte kadar karşın katrilyon kendi kendilerine kendini kendisi kendisine kendisini kez ki kim kimden kime kimi kimse kırk milyar milyon mu mü mı nasıl ne neden nedenle nerde nerede nereye niye niçin o olan olarak oldu olduklarını olduğu olduğunu olmadı olmadığı olmak olması olmayan olmaz olsa olsun olup olur olursa oluyor on ona ondan onlar onlardan onları onların onu onun otuz oysa pek rağmen sadece sanki sekiz seksen sen senden seni senin siz sizden sizi sizin tarafından trilyon tüm var vardı ve veya ya yani yapacak yapmak yaptı yaptıkları yaptığı yaptığını yapılan yapılması yapıyor yedi yerine yetmiş yine yirmi yoksa yüz zaten çok çünkü öyle üzere üç şey şeyden şeyi şeyler şu şuna şunda şundan şunları şunu şöyle".split(" ")),r.Pipeline.registerFunction(r.tr.stopWordFilter,"stopWordFilter-tr")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.vi.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.vi=function(){this.pipeline.reset(),this.pipeline.add(e.vi.stopWordFilter,e.vi.trimmer)},e.vi.wordCharacters="[A-Za-ẓ̀͐́͑̉̃̓ÂâÊêÔôĂ-ăĐ-đƠ-ơƯ-ư]",e.vi.trimmer=e.trimmerSupport.generateTrimmer(e.vi.wordCharacters),e.Pipeline.registerFunction(e.vi.trimmer,"trimmer-vi"),e.vi.stopWordFilter=e.generateStopWordFilter("là cái nhưng mà".split(" "))}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/min/lunr.zh.min.js ADDED
@@ -0,0 +1 @@
 
 
1
+ !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r(require("@node-rs/jieba")):r()(e.lunr)}(this,function(e){return function(r,t){if(void 0===r)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===r.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var i="2"==r.version[0];r.zh=function(){this.pipeline.reset(),this.pipeline.add(r.zh.trimmer,r.zh.stopWordFilter,r.zh.stemmer),i?this.tokenizer=r.zh.tokenizer:(r.tokenizer&&(r.tokenizer=r.zh.tokenizer),this.tokenizerFn&&(this.tokenizerFn=r.zh.tokenizer))},r.zh.tokenizer=function(n){if(!arguments.length||null==n||void 0==n)return[];if(Array.isArray(n))return n.map(function(e){return i?new r.Token(e.toLowerCase()):e.toLowerCase()});t&&e.load(t);var o=n.toString().trim().toLowerCase(),s=[];e.cut(o,!0).forEach(function(e){s=s.concat(e.split(" "))}),s=s.filter(function(e){return!!e});var u=0;return s.map(function(e,t){if(i){var n=o.indexOf(e,u),s={};return s.position=[n,e.length],s.index=t,u=n,new r.Token(e,s)}return e})},r.zh.wordCharacters="\\w一-龥",r.zh.trimmer=r.trimmerSupport.generateTrimmer(r.zh.wordCharacters),r.Pipeline.registerFunction(r.zh.trimmer,"trimmer-zh"),r.zh.stemmer=function(){return function(e){return e}}(),r.Pipeline.registerFunction(r.zh.stemmer,"stemmer-zh"),r.zh.stopWordFilter=r.generateStopWordFilter("的 一 不 在 人 有 是 为 以 于 上 他 而 后 之 来 及 了 因 下 可 到 由 这 与 也 此 但 并 个 其 已 无 小 我 们 起 最 再 今 去 好 只 又 或 很 亦 某 把 那 你 乃 它 吧 被 比 别 趁 当 从 到 得 打 凡 儿 尔 该 各 给 跟 和 何 还 即 几 既 看 据 距 靠 啦 了 另 么 每 们 嘛 拿 哪 那 您 凭 且 却 让 仍 啥 如 若 使 谁 虽 随 同 所 她 哇 嗡 往 哪 些 向 沿 哟 用 于 咱 则 怎 曾 至 致 着 诸 自".split(" ")),r.Pipeline.registerFunction(r.zh.stopWordFilter,"stopWordFilter-zh")}});
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/tinyseg.js ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * export the module via AMD, CommonJS or as a browser global
3
+ * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
4
+ */
5
+ ;(function (root, factory) {
6
+ if (typeof define === 'function' && define.amd) {
7
+ // AMD. Register as an anonymous module.
8
+ define(factory)
9
+ } else if (typeof exports === 'object') {
10
+ /**
11
+ * Node. Does not work with strict CommonJS, but
12
+ * only CommonJS-like environments that support module.exports,
13
+ * like Node.
14
+ */
15
+ module.exports = factory()
16
+ } else {
17
+ // Browser globals (root is window)
18
+ factory()(root.lunr);
19
+ }
20
+ }(this, function () {
21
+ /**
22
+ * Just return a value to define the module export.
23
+ * This example returns an object, but the module
24
+ * can return a function as the exported value.
25
+ */
26
+
27
+ return function(lunr) {
28
+ // TinySegmenter 0.1 -- Super compact Japanese tokenizer in Javascript
29
+ // (c) 2008 Taku Kudo <[email protected]>
30
+ // TinySegmenter is freely distributable under the terms of a new BSD licence.
31
+ // For details, see http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt
32
+
33
+ function TinySegmenter() {
34
+ var patterns = {
35
+ "[一二三四五六七八九十百千万億兆]":"M",
36
+ "[一-龠々〆ヵヶ]":"H",
37
+ "[ぁ-ん]":"I",
38
+ "[ァ-ヴーア-ン゙ー]":"K",
39
+ "[a-zA-Za-zA-Z]":"A",
40
+ "[0-90-9]":"N"
41
+ }
42
+ this.chartype_ = [];
43
+ for (var i in patterns) {
44
+ var regexp = new RegExp(i);
45
+ this.chartype_.push([regexp, patterns[i]]);
46
+ }
47
+
48
+ this.BIAS__ = -332
49
+ this.BC1__ = {"HH":6,"II":2461,"KH":406,"OH":-1378};
50
+ this.BC2__ = {"AA":-3267,"AI":2744,"AN":-878,"HH":-4070,"HM":-1711,"HN":4012,"HO":3761,"IA":1327,"IH":-1184,"II":-1332,"IK":1721,"IO":5492,"KI":3831,"KK":-8741,"MH":-3132,"MK":3334,"OO":-2920};
51
+ this.BC3__ = {"HH":996,"HI":626,"HK":-721,"HN":-1307,"HO":-836,"IH":-301,"KK":2762,"MK":1079,"MM":4034,"OA":-1652,"OH":266};
52
+ this.BP1__ = {"BB":295,"OB":304,"OO":-125,"UB":352};
53
+ this.BP2__ = {"BO":60,"OO":-1762};
54
+ this.BQ1__ = {"BHH":1150,"BHM":1521,"BII":-1158,"BIM":886,"BMH":1208,"BNH":449,"BOH":-91,"BOO":-2597,"OHI":451,"OIH":-296,"OKA":1851,"OKH":-1020,"OKK":904,"OOO":2965};
55
+ this.BQ2__ = {"BHH":118,"BHI":-1159,"BHM":466,"BIH":-919,"BKK":-1720,"BKO":864,"OHH":-1139,"OHM":-181,"OIH":153,"UHI":-1146};
56
+ this.BQ3__ = {"BHH":-792,"BHI":2664,"BII":-299,"BKI":419,"BMH":937,"BMM":8335,"BNN":998,"BOH":775,"OHH":2174,"OHM":439,"OII":280,"OKH":1798,"OKI":-793,"OKO":-2242,"OMH":-2402,"OOO":11699};
57
+ this.BQ4__ = {"BHH":-3895,"BIH":3761,"BII":-4654,"BIK":1348,"BKK":-1806,"BMI":-3385,"BOO":-12396,"OAH":926,"OHH":266,"OHK":-2036,"ONN":-973};
58
+ this.BW1__ = {",と":660,",同":727,"B1あ":1404,"B1同":542,"、と":660,"、同":727,"」と":1682,"あっ":1505,"いう":1743,"いっ":-2055,"いる":672,"うし":-4817,"うん":665,"から":3472,"がら":600,"こう":-790,"こと":2083,"こん":-1262,"さら":-4143,"さん":4573,"した":2641,"して":1104,"すで":-3399,"そこ":1977,"それ":-871,"たち":1122,"ため":601,"った":3463,"つい":-802,"てい":805,"てき":1249,"でき":1127,"です":3445,"では":844,"とい":-4915,"とみ":1922,"どこ":3887,"ない":5713,"なっ":3015,"など":7379,"なん":-1113,"にし":2468,"には":1498,"にも":1671,"に対":-912,"の一":-501,"の中":741,"ませ":2448,"まで":1711,"まま":2600,"まる":-2155,"やむ":-1947,"よっ":-2565,"れた":2369,"れで":-913,"をし":1860,"を見":731,"亡く":-1886,"京都":2558,"取り":-2784,"大き":-2604,"大阪":1497,"平方":-2314,"引き":-1336,"日本":-195,"本当":-2423,"毎日":-2113,"目指":-724,"B1あ":1404,"B1同":542,"」と":1682};
59
+ this.BW2__ = {"..":-11822,"11":-669,"――":-5730,"−−":-13175,"いう":-1609,"うか":2490,"かし":-1350,"かも":-602,"から":-7194,"かれ":4612,"がい":853,"がら":-3198,"きた":1941,"くな":-1597,"こと":-8392,"この":-4193,"させ":4533,"され":13168,"さん":-3977,"しい":-1819,"しか":-545,"した":5078,"して":972,"しな":939,"その":-3744,"たい":-1253,"たた":-662,"ただ":-3857,"たち":-786,"たと":1224,"たは":-939,"った":4589,"って":1647,"っと":-2094,"てい":6144,"てき":3640,"てく":2551,"ては":-3110,"ても":-3065,"でい":2666,"でき":-1528,"でし":-3828,"です":-4761,"でも":-4203,"とい":1890,"とこ":-1746,"とと":-2279,"との":720,"とみ":5168,"とも":-3941,"ない":-2488,"なが":-1313,"など":-6509,"なの":2614,"なん":3099,"にお":-1615,"にし":2748,"にな":2454,"によ":-7236,"に対":-14943,"に従":-4688,"に関":-11388,"のか":2093,"ので":-7059,"のに":-6041,"のの":-6125,"はい":1073,"はが":-1033,"はず":-2532,"ばれ":1813,"まし":-1316,"まで":-6621,"まれ":5409,"めて":-3153,"もい":2230,"もの":-10713,"��か":-944,"らし":-1611,"らに":-1897,"りし":651,"りま":1620,"れた":4270,"れて":849,"れば":4114,"ろう":6067,"われ":7901,"を通":-11877,"んだ":728,"んな":-4115,"一人":602,"一方":-1375,"一日":970,"一部":-1051,"上が":-4479,"会社":-1116,"出て":2163,"分の":-7758,"同党":970,"同日":-913,"大阪":-2471,"委員":-1250,"少な":-1050,"年度":-8669,"年間":-1626,"府県":-2363,"手権":-1982,"新聞":-4066,"日新":-722,"日本":-7068,"日米":3372,"曜日":-601,"朝鮮":-2355,"本人":-2697,"東京":-1543,"然と":-1384,"社会":-1276,"立て":-990,"第に":-1612,"米国":-4268,"11":-669};
60
+ this.BW3__ = {"あた":-2194,"あり":719,"ある":3846,"い.":-1185,"い。":-1185,"いい":5308,"いえ":2079,"いく":3029,"いた":2056,"いっ":1883,"いる":5600,"いわ":1527,"うち":1117,"うと":4798,"えと":1454,"か.":2857,"か。":2857,"かけ":-743,"かっ":-4098,"かに":-669,"から":6520,"かり":-2670,"が,":1816,"が、":1816,"がき":-4855,"がけ":-1127,"がっ":-913,"がら":-4977,"がり":-2064,"きた":1645,"けど":1374,"こと":7397,"この":1542,"ころ":-2757,"さい":-714,"さを":976,"し,":1557,"し、":1557,"しい":-3714,"した":3562,"して":1449,"しな":2608,"しま":1200,"す.":-1310,"す。":-1310,"する":6521,"ず,":3426,"ず、":3426,"ずに":841,"そう":428,"た.":8875,"た。":8875,"たい":-594,"たの":812,"たり":-1183,"たる":-853,"だ.":4098,"だ。":4098,"だっ":1004,"った":-4748,"って":300,"てい":6240,"てお":855,"ても":302,"です":1437,"でに":-1482,"では":2295,"とう":-1387,"とし":2266,"との":541,"とも":-3543,"どう":4664,"ない":1796,"なく":-903,"など":2135,"に,":-1021,"に、":-1021,"にし":1771,"にな":1906,"には":2644,"の,":-724,"の、":-724,"の子":-1000,"は,":1337,"は、":1337,"べき":2181,"まし":1113,"ます":6943,"まっ":-1549,"まで":6154,"まれ":-793,"らし":1479,"られ":6820,"るる":3818,"れ,":854,"れ、":854,"れた":1850,"れて":1375,"れば":-3246,"れる":1091,"われ":-605,"んだ":606,"んで":798,"カ月":990,"会議":860,"入り":1232,"大会":2217,"始め":1681,"市":965,"新聞":-5055,"日,":974,"日、":974,"社会":2024,"カ月":990};
61
+ this.TC1__ = {"AAA":1093,"HHH":1029,"HHM":580,"HII":998,"HOH":-390,"HOM":-331,"IHI":1169,"IOH":-142,"IOI":-1015,"IOM":467,"MMH":187,"OOI":-1832};
62
+ this.TC2__ = {"HHO":2088,"HII":-1023,"HMM":-1154,"IHI":-1965,"KKH":703,"OII":-2649};
63
+ this.TC3__ = {"AAA":-294,"HHH":346,"HHI":-341,"HII":-1088,"HIK":731,"HOH":-1486,"IHH":128,"IHI":-3041,"IHO":-1935,"IIH":-825,"IIM":-1035,"IOI":-542,"KHH":-1216,"KKA":491,"KKH":-1217,"KOK":-1009,"MHH":-2694,"MHM":-457,"MHO":123,"MMH":-471,"NNH":-1689,"NNO":662,"OHO":-3393};
64
+ this.TC4__ = {"HHH":-203,"HHI":1344,"HHK":365,"HHM":-122,"HHN":182,"HHO":669,"HIH":804,"HII":679,"HOH":446,"IHH":695,"IHO":-2324,"IIH":321,"III":1497,"IIO":656,"IOO":54,"KAK":4845,"KKA":3386,"KKK":3065,"MHH":-405,"MHI":201,"MMH":-241,"MMM":661,"MOM":841};
65
+ this.TQ1__ = {"BHHH":-227,"BHHI":316,"BHIH":-132,"BIHH":60,"BIII":1595,"BNHH":-744,"BOHH":225,"BOOO":-908,"OAKK":482,"OHHH":281,"OHIH":249,"OIHI":200,"OIIH":-68};
66
+ this.TQ2__ = {"BIHH":-1401,"BIII":-1033,"BKAK":-543,"BOOO":-5591};
67
+ this.TQ3__ = {"BHHH":478,"BHHM":-1073,"BHIH":222,"BHII":-504,"BIIH":-116,"BIII":-105,"BMHI":-863,"BMHM":-464,"BOMH":620,"OHHH":346,"OHHI":1729,"OHII":997,"OHMH":481,"OIHH":623,"OIIH":1344,"OKAK":2792,"OKHH":587,"OKKA":679,"OOHH":110,"OOII":-685};
68
+ this.TQ4__ = {"BHHH":-721,"BHHM":-3604,"BHII":-966,"BIIH":-607,"BIII":-2181,"OAAA":-2763,"OAKK":180,"OHHH":-294,"OHHI":2446,"OHHO":480,"OHIH":-1573,"OIHH":1935,"OIHI":-493,"OIIH":626,"OIII":-4007,"OKAK":-8156};
69
+ this.TW1__ = {"につい":-4681,"東京都":2026};
70
+ this.TW2__ = {"ある程":-2049,"いった":-1256,"ころが":-2434,"しょう":3873,"その後":-4430,"だって":-1049,"ていた":1833,"として":-4657,"ともに":-4517,"もので":1882,"一気に":-792,"初めて":-1512,"同時に":-8097,"大きな":-1255,"対して":-2721,"社会党":-3216};
71
+ this.TW3__ = {"いただ":-1734,"してい":1314,"として":-4314,"につい":-5483,"にとっ":-5989,"に当た":-6247,"ので,":-727,"ので、":-727,"のもの":-600,"れから":-3752,"十二月":-2287};
72
+ this.TW4__ = {"いう.":8576,"いう。":8576,"からな":-2348,"してい":2958,"たが,":1516,"たが、":1516,"ている":1538,"という":1349,"ました":5543,"ません":1097,"ようと":-4258,"よると":5865};
73
+ this.UC1__ = {"A":484,"K":93,"M":645,"O":-505};
74
+ this.UC2__ = {"A":819,"H":1059,"I":409,"M":3987,"N":5775,"O":646};
75
+ this.UC3__ = {"A":-1370,"I":2311};
76
+ this.UC4__ = {"A":-2643,"H":1809,"I":-1032,"K":-3450,"M":3565,"N":3876,"O":6646};
77
+ this.UC5__ = {"H":313,"I":-1238,"K":-799,"M":539,"O":-831};
78
+ this.UC6__ = {"H":-506,"I":-253,"K":87,"M":247,"O":-387};
79
+ this.UP1__ = {"O":-214};
80
+ this.UP2__ = {"B":69,"O":935};
81
+ this.UP3__ = {"B":189};
82
+ this.UQ1__ = {"BH":21,"BI":-12,"BK":-99,"BN":142,"BO":-56,"OH":-95,"OI":477,"OK":410,"OO":-2422};
83
+ this.UQ2__ = {"BH":216,"BI":113,"OK":1759};
84
+ this.UQ3__ = {"BA":-479,"BH":42,"BI":1913,"BK":-7198,"BM":3160,"BN":6427,"BO":14761,"OI":-827,"ON":-3212};
85
+ this.UW1__ = {",":156,"、":156,"「":-463,"あ":-941,"う":-127,"が":-553,"き":121,"こ":505,"で":-201,"と":-547,"ど":-123,"に":-789,"の":-185,"は":-847,"も":-466,"や":-470,"よ":182,"ら":-292,"り":208,"れ":169,"を":-446,"ん":-137,"・":-135,"主":-402,"京":-268,"区":-912,"午":871,"国":-460,"大":561,"委":729,"市":-411,"日":-141,"理":361,"生":-408,"県":-386,"都":-718,"「":-463,"・":-135};
86
+ this.UW2__ = {",":-829,"、":-829,"〇":892,"「":-645,"」":3145,"あ":-538,"い":505,"う":134,"お":-502,"か":1454,"が":-856,"く":-412,"こ":1141,"さ":878,"ざ":540,"し":1529,"す":-675,"せ":300,"そ":-1011,"た":188,"だ":1837,"つ":-949,"て":-291,"で":-268,"と":-981,"ど":1273,"な":1063,"に":-1764,"の":130,"は":-409,"ひ":-1273,"べ":1261,"ま":600,"も":-1263,"や":-402,"よ":1639,"り":-579,"る":-694,"れ":571,"を":-2516,"ん":2095,"ア":-587,"カ":306,"キ":568,"ッ":831,"三":-758,"不":-2150,"世":-302,"中":-968,"主":-861,"事":492,"人":-123,"会":978,"保":362,"入":548,"初":-3025,"副":-1566,"北":-3414,"区":-422,"大":-1769,"天":-865,"太":-483,"子":-1519,"学":760,"実":1023,"小":-2009,"市":-813,"年":-1060,"強":1067,"手":-1519,"揺":-1033,"政":1522,"文":-1355,"新":-1682,"日":-1815,"明":-1462,"最":-630,"朝":-1843,"本":-1650,"東":-931,"果":-665,"次":-2378,"民":-180,"気":-1740,"理":752,"発":529,"目":-1584,"相":-242,"県":-1165,"立":-763,"第":810,"米":509,"自":-1353,"行":838,"西":-744,"見":-3874,"調":1010,"議":1198,"込":3041,"開":1758,"間":-1257,"「":-645,"」":3145,"ッ":831,"ア":-587,"カ":306,"キ":568};
87
+ this.UW3__ = {",":4889,"1":-800,"−":-1723,"、":4889,"々":-2311,"〇":5827,"」":2670,"〓":-3573,"あ":-2696,"い":1006,"う":2342,"え":1983,"お":-4864,"か":-1163,"が":3271,"く":1004,"け":388,"げ":401,"こ":-3552,"ご":-3116,"さ":-1058,"し":-395,"す":584,"せ":3685,"そ":-5228,"た":842,"ち":-521,"っ":-1444,"つ":-1081,"て":6167,"で":2318,"と":1691,"ど":-899,"な":-2788,"に":2745,"の":4056,"は":4555,"ひ":-2171,"ふ":-1798,"へ":1199,"ほ":-5516,"ま":-4384,"み":-120,"め":1205,"も":2323,"や":-788,"よ":-202,"ら":727,"り":649,"る":5905,"れ":2773,"わ":-1207,"を":6620,"ん":-518,"ア":551,"グ":1319,"ス":874,"ッ":-1350,"ト":521,"ム":1109,"ル":1591,"ロ":2201,"ン":278,"・":-3794,"一":-1619,"下":-1759,"世":-2087,"両":3815,"中":653,"主":-758,"予":-1193,"二":974,"人":2742,"今":792,"他":1889,"以":-1368,"低":811,"何":4265,"作":-361,"保":-2439,"元":4858,"党":3593,"全":1574,"公":-3030,"六":755,"共":-1880,"円":5807,"再":3095,"分":457,"初":2475,"別":1129,"前":2286,"副":4437,"力":365,"動":-949,"務":-1872,"化":1327,"北":-1038,"区":4646,"千":-2309,"午":-783,"協":-1006,"口":483,"右":1233,"各":3588,"合":-241,"同":3906,"和":-837,"員":4513,"国":642,"型":1389,"場":1219,"外":-241,"妻":2016,"学":-1356,"安":-423,"実":-1008,"家":1078,"小":-513,"少":-3102,"州":1155,"市":3197,"平":-1804,"年":2416,"広":-1030,"府":1605,"度":1452,"建":-2352,"当":-3885,"得":1905,"思":-1291,"性":1822,"戸":-488,"指":-3973,"政":-2013,"教":-1479,"数":3222,"文":-1489,"新":1764,"日":2099,"旧":5792,"昨":-661,"時":-1248,"曜":-951,"最":-937,"月":4125,"期":360,"李":3094,"村":364,"東":-805,"核":5156,"森":2438,"業":484,"氏":2613,"民":-1694,"決":-1073,"法":1868,"海":-495,"無":979,"物":461,"特":-3850,"生":-273,"用":914,"町":1215,"的":7313,"直":-1835,"省":792,"県":6293,"知":-1528,"私":4231,"税":401,"立":-960,"第":1201,"米":7767,"系":3066,"約":3663,"級":1384,"統":-4229,"総":1163,"線":1255,"者":6457,"能":725,"自":-2869,"英":785,"見":1044,"調":-562,"財":-733,"費":1777,"車":1835,"軍":1375,"込":-1504,"通":-1136,"選":-681,"郎":1026,"郡":4404,"部":1200,"金":2163,"長":421,"開":-1432,"間":1302,"関":-1282,"雨":2009,"電":-1045,"非":2066,"駅":1620,"1":-800,"」":2670,"・":-3794,"ッ":-1350,"ア":551,"グ":1319,"ス":874,"ト":521,"ム":1109,"ル":1591,"ロ":2201,"ン":278};
88
+ this.UW4__ = {",":3930,".":3508,"―":-4841,"、":3930,"。":3508,"〇":4999,"「":1895,"」":3798,"〓":-5156,"あ":4752,"い":-3435,"う":-640,"え":-2514,"お":2405,"か":530,"が":6006,"き":-4482,"ぎ":-3821,"く":-3788,"け":-4376,"げ":-4734,"こ":2255,"ご":1979,"さ":2864,"し":-843,"じ":-2506,"す":-731,"ず":1251,"せ":181,"そ":4091,"た":5034,"だ":5408,"ち":-3654,"っ":-5882,"つ":-1659,"て":3994,"で":7410,"と":4547,"な":5433,"に":6499,"ぬ":1853,"ね":1413,"の":7396,"は":8578,"ば":1940,"ひ":4249,"び":-4134,"ふ":1345,"へ":6665,"べ":-744,"ほ":1464,"ま":1051,"み":-2082,"む":-882,"め":-5046,"も":4169,"ゃ":-2666,"や":2795,"ょ":-1544,"よ":3351,"ら":-2922,"り":-9726,"る":-14896,"れ":-2613,"ろ":-4570,"わ":-1783,"を":13150,"ん":-2352,"カ":2145,"コ":1789,"セ":1287,"ッ":-724,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637,"・":-4371,"ー":-11870,"一":-2069,"中":2210,"予":782,"事":-190,"井":-1768,"人":1036,"以":544,"会":950,"体":-1286,"作":530,"側":4292,"先":601,"党":-2006,"共":-1212,"内":584,"円":788,"初":1347,"前":1623,"副":3879,"力":-302,"動":-740,"務":-2715,"化":776,"区":4517,"協":1013,"参":1555,"合":-1834,"和":-681,"員":-910,"器":-851,"回":1500,"国":-619,"園":-1200,"地":866,"場":-1410,"塁":-2094,"士":-1413,"多":1067,"大":571,"子":-4802,"学":-1397,"定":-1057,"寺":-809,"小":1910,"屋":-1328,"山":-1500,"島":-2056,"川":-2667,"市":2771,"年":374,"庁":-4556,"後":456,"性":553,"感":916,"所":-1566,"支":856,"改":787,"政":2182,"教":704,"文":522,"方":-856,"日":1798,"時":1829,"最":845,"月":-9066,"木":-485,"来":-442,"校":-360,"業":-1043,"氏":5388,"民":-2716,"気":-910,"沢":-939,"済":-543,"物":-735,"率":672,"球":-1267,"生":-1286,"産":-1101,"田":-2900,"町":1826,"的":2586,"目":922,"省":-3485,"県":2997,"空":-867,"立":-2112,"第":788,"米":2937,"系":786,"約":2171,"経":1146,"統":-1169,"総":940,"線":-994,"署":749,"者":2145,"能":-730,"般":-852,"行":-792,"規":792,"警":-1184,"議":-244,"谷":-1000,"賞":730,"車":-1481,"軍":1158,"輪":-1433,"込":-3370,"近":929,"道":-1291,"選":2596,"郎":-4866,"都":1192,"野":-1100,"銀":-2213,"長":357,"間":-2344,"院":-2297,"際":-2604,"電":-878,"領":-1659,"題":-792,"館":-1984,"首":1749,"高":2120,"「":1895,"」":3798,"・":-4371,"ッ":-724,"ー":-11870,"カ":2145,"コ":1789,"セ":1287,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637};
89
+ this.UW5__ = {",":465,".":-299,"1":-514,"E2":-32768,"]":-2762,"、":465,"。":-299,"「":363,"あ":1655,"い":331,"う":-503,"え":1199,"お":527,"か":647,"が":-421,"き":1624,"ぎ":1971,"く":312,"げ":-983,"さ":-1537,"し":-1371,"す":-852,"だ":-1186,"ち":1093,"っ":52,"つ":921,"て":-18,"で":-850,"と":-127,"ど":1682,"な":-787,"に":-1224,"の":-635,"は":-578,"べ":1001,"み":502,"め":865,"ゃ":3350,"ょ":854,"り":-208,"る":429,"れ":504,"わ":419,"を":-1264,"ん":327,"イ":241,"ル":451,"ン":-343,"中":-871,"京":722,"会":-1153,"党":-654,"務":3519,"区":-901,"告":848,"員":2104,"大":-1296,"学":-548,"定":1785,"嵐":-1304,"市":-2991,"席":921,"年":1763,"思":872,"所":-814,"挙":1618,"新":-1682,"日":218,"月":-4353,"査":932,"格":1356,"機":-1508,"氏":-1347,"田":240,"町":-3912,"的":-3149,"相":1319,"省":-1052,"県":-4003,"研":-997,"社":-278,"空":-813,"統":1955,"者":-2233,"表":663,"語":-1073,"議":1219,"選":-1018,"郎":-368,"長":786,"間":1191,"題":2368,"館":-689,"1":-514,"E2":-32768,"「":363,"イ":241,"ル":451,"ン":-343};
90
+ this.UW6__ = {",":227,".":808,"1":-270,"E1":306,"、":227,"。":808,"あ":-307,"う":189,"か":241,"が":-73,"く":-121,"こ":-200,"じ":1782,"す":383,"た":-428,"っ":573,"て":-1014,"で":101,"と":-105,"な":-253,"に":-149,"の":-417,"は":-236,"も":-206,"り":187,"る":-135,"を":195,"ル":-673,"ン":-496,"一":-277,"中":201,"件":-800,"会":624,"前":302,"区":1792,"員":-1212,"委":798,"学":-960,"市":887,"広":-695,"後":535,"業":-697,"相":753,"社":-507,"福":974,"空":-822,"者":1811,"連":463,"郎":1082,"1":-270,"E1":306,"ル":-673,"ン":-496};
91
+
92
+ return this;
93
+ }
94
+ TinySegmenter.prototype.ctype_ = function(str) {
95
+ for (var i in this.chartype_) {
96
+ if (str.match(this.chartype_[i][0])) {
97
+ return this.chartype_[i][1];
98
+ }
99
+ }
100
+ return "O";
101
+ }
102
+
103
+ TinySegmenter.prototype.ts_ = function(v) {
104
+ if (v) { return v; }
105
+ return 0;
106
+ }
107
+
108
+ TinySegmenter.prototype.segment = function(input) {
109
+ if (input == null || input == undefined || input == "") {
110
+ return [];
111
+ }
112
+ var result = [];
113
+ var seg = ["B3","B2","B1"];
114
+ var ctype = ["O","O","O"];
115
+ var o = input.split("");
116
+ for (i = 0; i < o.length; ++i) {
117
+ seg.push(o[i]);
118
+ ctype.push(this.ctype_(o[i]))
119
+ }
120
+ seg.push("E1");
121
+ seg.push("E2");
122
+ seg.push("E3");
123
+ ctype.push("O");
124
+ ctype.push("O");
125
+ ctype.push("O");
126
+ var word = seg[3];
127
+ var p1 = "U";
128
+ var p2 = "U";
129
+ var p3 = "U";
130
+ for (var i = 4; i < seg.length - 3; ++i) {
131
+ var score = this.BIAS__;
132
+ var w1 = seg[i-3];
133
+ var w2 = seg[i-2];
134
+ var w3 = seg[i-1];
135
+ var w4 = seg[i];
136
+ var w5 = seg[i+1];
137
+ var w6 = seg[i+2];
138
+ var c1 = ctype[i-3];
139
+ var c2 = ctype[i-2];
140
+ var c3 = ctype[i-1];
141
+ var c4 = ctype[i];
142
+ var c5 = ctype[i+1];
143
+ var c6 = ctype[i+2];
144
+ score += this.ts_(this.UP1__[p1]);
145
+ score += this.ts_(this.UP2__[p2]);
146
+ score += this.ts_(this.UP3__[p3]);
147
+ score += this.ts_(this.BP1__[p1 + p2]);
148
+ score += this.ts_(this.BP2__[p2 + p3]);
149
+ score += this.ts_(this.UW1__[w1]);
150
+ score += this.ts_(this.UW2__[w2]);
151
+ score += this.ts_(this.UW3__[w3]);
152
+ score += this.ts_(this.UW4__[w4]);
153
+ score += this.ts_(this.UW5__[w5]);
154
+ score += this.ts_(this.UW6__[w6]);
155
+ score += this.ts_(this.BW1__[w2 + w3]);
156
+ score += this.ts_(this.BW2__[w3 + w4]);
157
+ score += this.ts_(this.BW3__[w4 + w5]);
158
+ score += this.ts_(this.TW1__[w1 + w2 + w3]);
159
+ score += this.ts_(this.TW2__[w2 + w3 + w4]);
160
+ score += this.ts_(this.TW3__[w3 + w4 + w5]);
161
+ score += this.ts_(this.TW4__[w4 + w5 + w6]);
162
+ score += this.ts_(this.UC1__[c1]);
163
+ score += this.ts_(this.UC2__[c2]);
164
+ score += this.ts_(this.UC3__[c3]);
165
+ score += this.ts_(this.UC4__[c4]);
166
+ score += this.ts_(this.UC5__[c5]);
167
+ score += this.ts_(this.UC6__[c6]);
168
+ score += this.ts_(this.BC1__[c2 + c3]);
169
+ score += this.ts_(this.BC2__[c3 + c4]);
170
+ score += this.ts_(this.BC3__[c4 + c5]);
171
+ score += this.ts_(this.TC1__[c1 + c2 + c3]);
172
+ score += this.ts_(this.TC2__[c2 + c3 + c4]);
173
+ score += this.ts_(this.TC3__[c3 + c4 + c5]);
174
+ score += this.ts_(this.TC4__[c4 + c5 + c6]);
175
+ // score += this.ts_(this.TC5__[c4 + c5 + c6]);
176
+ score += this.ts_(this.UQ1__[p1 + c1]);
177
+ score += this.ts_(this.UQ2__[p2 + c2]);
178
+ score += this.ts_(this.UQ3__[p3 + c3]);
179
+ score += this.ts_(this.BQ1__[p2 + c2 + c3]);
180
+ score += this.ts_(this.BQ2__[p2 + c3 + c4]);
181
+ score += this.ts_(this.BQ3__[p3 + c2 + c3]);
182
+ score += this.ts_(this.BQ4__[p3 + c3 + c4]);
183
+ score += this.ts_(this.TQ1__[p2 + c1 + c2 + c3]);
184
+ score += this.ts_(this.TQ2__[p2 + c2 + c3 + c4]);
185
+ score += this.ts_(this.TQ3__[p3 + c1 + c2 + c3]);
186
+ score += this.ts_(this.TQ4__[p3 + c2 + c3 + c4]);
187
+ var p = "O";
188
+ if (score > 0) {
189
+ result.push(word);
190
+ word = "";
191
+ p = "B";
192
+ }
193
+ p1 = p2;
194
+ p2 = p3;
195
+ p3 = p;
196
+ word += seg[i];
197
+ }
198
+ result.push(word);
199
+
200
+ return result;
201
+ }
202
+
203
+ lunr.TinySegmenter = TinySegmenter;
204
+ };
205
+
206
+ }));
FinNLP/docs/FinNLP/site/assets/javascripts/lunr/wordcut.js ADDED
The diff for this file is too large to render. See raw diff
 
FinNLP/docs/FinNLP/site/assets/javascripts/workers/search.208ed371.min.js ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use strict";(()=>{var me=Object.create;var U=Object.defineProperty,xe=Object.defineProperties,ve=Object.getOwnPropertyDescriptor,Se=Object.getOwnPropertyDescriptors,Te=Object.getOwnPropertyNames,Y=Object.getOwnPropertySymbols,Qe=Object.getPrototypeOf,X=Object.prototype.hasOwnProperty,Ee=Object.prototype.propertyIsEnumerable;var Z=Math.pow,J=(t,e,r)=>e in t?U(t,e,{enumerable:!0,configurable:!0,writable:!0,value:r}):t[e]=r,A=(t,e)=>{for(var r in e||(e={}))X.call(e,r)&&J(t,r,e[r]);if(Y)for(var r of Y(e))Ee.call(e,r)&&J(t,r,e[r]);return t},q=(t,e)=>xe(t,Se(e));var be=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports);var Le=(t,e,r,n)=>{if(e&&typeof e=="object"||typeof e=="function")for(let i of Te(e))!X.call(t,i)&&i!==r&&U(t,i,{get:()=>e[i],enumerable:!(n=ve(e,i))||n.enumerable});return t};var we=(t,e,r)=>(r=t!=null?me(Qe(t)):{},Le(e||!t||!t.__esModule?U(r,"default",{value:t,enumerable:!0}):r,t));var B=(t,e,r)=>new Promise((n,i)=>{var s=c=>{try{a(r.next(c))}catch(u){i(u)}},o=c=>{try{a(r.throw(c))}catch(u){i(u)}},a=c=>c.done?n(c.value):Promise.resolve(c.value).then(s,o);a((r=r.apply(t,e)).next())});var te=be((K,ee)=>{/**
2
+ * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.3.9
3
+ * Copyright (C) 2020 Oliver Nightingale
4
+ * @license MIT
5
+ */(function(){var t=function(e){var r=new t.Builder;return r.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),r.searchPipeline.add(t.stemmer),e.call(r,r),r.build()};t.version="2.3.9";/*!
6
+ * lunr.utils
7
+ * Copyright (C) 2020 Oliver Nightingale
8
+ */t.utils={},t.utils.warn=function(e){return function(r){e.console&&console.warn&&console.warn(r)}}(this),t.utils.asString=function(e){return e==null?"":e.toString()},t.utils.clone=function(e){if(e==null)return e;for(var r=Object.create(null),n=Object.keys(e),i=0;i<n.length;i++){var s=n[i],o=e[s];if(Array.isArray(o)){r[s]=o.slice();continue}if(typeof o=="string"||typeof o=="number"||typeof o=="boolean"){r[s]=o;continue}throw new TypeError("clone is not deep and does not support nested objects")}return r},t.FieldRef=function(e,r,n){this.docRef=e,this.fieldName=r,this._stringValue=n},t.FieldRef.joiner="/",t.FieldRef.fromString=function(e){var r=e.indexOf(t.FieldRef.joiner);if(r===-1)throw"malformed field ref string";var n=e.slice(0,r),i=e.slice(r+1);return new t.FieldRef(i,n,e)},t.FieldRef.prototype.toString=function(){return this._stringValue==null&&(this._stringValue=this.fieldName+t.FieldRef.joiner+this.docRef),this._stringValue};/*!
9
+ * lunr.Set
10
+ * Copyright (C) 2020 Oliver Nightingale
11
+ */t.Set=function(e){if(this.elements=Object.create(null),e){this.length=e.length;for(var r=0;r<this.length;r++)this.elements[e[r]]=!0}else this.length=0},t.Set.complete={intersect:function(e){return e},union:function(){return this},contains:function(){return!0}},t.Set.empty={intersect:function(){return this},union:function(e){return e},contains:function(){return!1}},t.Set.prototype.contains=function(e){return!!this.elements[e]},t.Set.prototype.intersect=function(e){var r,n,i,s=[];if(e===t.Set.complete)return this;if(e===t.Set.empty)return e;this.length<e.length?(r=this,n=e):(r=e,n=this),i=Object.keys(r.elements);for(var o=0;o<i.length;o++){var a=i[o];a in n.elements&&s.push(a)}return new t.Set(s)},t.Set.prototype.union=function(e){return e===t.Set.complete?t.Set.complete:e===t.Set.empty?this:new t.Set(Object.keys(this.elements).concat(Object.keys(e.elements)))},t.idf=function(e,r){var n=0;for(var i in e)i!="_index"&&(n+=Object.keys(e[i]).length);var s=(r-n+.5)/(n+.5);return Math.log(1+Math.abs(s))},t.Token=function(e,r){this.str=e||"",this.metadata=r||{}},t.Token.prototype.toString=function(){return this.str},t.Token.prototype.update=function(e){return this.str=e(this.str,this.metadata),this},t.Token.prototype.clone=function(e){return e=e||function(r){return r},new t.Token(e(this.str,this.metadata),this.metadata)};/*!
12
+ * lunr.tokenizer
13
+ * Copyright (C) 2020 Oliver Nightingale
14
+ */t.tokenizer=function(e,r){if(e==null||e==null)return[];if(Array.isArray(e))return e.map(function(g){return new t.Token(t.utils.asString(g).toLowerCase(),t.utils.clone(r))});for(var n=e.toString().toLowerCase(),i=n.length,s=[],o=0,a=0;o<=i;o++){var c=n.charAt(o),u=o-a;if(c.match(t.tokenizer.separator)||o==i){if(u>0){var l=t.utils.clone(r)||{};l.position=[a,u],l.index=s.length,s.push(new t.Token(n.slice(a,o),l))}a=o+1}}return s},t.tokenizer.separator=/[\s\-]+/;/*!
15
+ * lunr.Pipeline
16
+ * Copyright (C) 2020 Oliver Nightingale
17
+ */t.Pipeline=function(){this._stack=[]},t.Pipeline.registeredFunctions=Object.create(null),t.Pipeline.registerFunction=function(e,r){r in this.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+r),e.label=r,t.Pipeline.registeredFunctions[e.label]=e},t.Pipeline.warnIfFunctionNotRegistered=function(e){var r=e.label&&e.label in this.registeredFunctions;r||t.utils.warn(`Function is not registered with pipeline. This may cause problems when serialising the index.
18
+ `,e)},t.Pipeline.load=function(e){var r=new t.Pipeline;return e.forEach(function(n){var i=t.Pipeline.registeredFunctions[n];if(i)r.add(i);else throw new Error("Cannot load unregistered function: "+n)}),r},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(r){t.Pipeline.warnIfFunctionNotRegistered(r),this._stack.push(r)},this)},t.Pipeline.prototype.after=function(e,r){t.Pipeline.warnIfFunctionNotRegistered(r);var n=this._stack.indexOf(e);if(n==-1)throw new Error("Cannot find existingFn");n=n+1,this._stack.splice(n,0,r)},t.Pipeline.prototype.before=function(e,r){t.Pipeline.warnIfFunctionNotRegistered(r);var n=this._stack.indexOf(e);if(n==-1)throw new Error("Cannot find existingFn");this._stack.splice(n,0,r)},t.Pipeline.prototype.remove=function(e){var r=this._stack.indexOf(e);r!=-1&&this._stack.splice(r,1)},t.Pipeline.prototype.run=function(e){for(var r=this._stack.length,n=0;n<r;n++){for(var i=this._stack[n],s=[],o=0;o<e.length;o++){var a=i(e[o],o,e);if(!(a==null||a===""))if(Array.isArray(a))for(var c=0;c<a.length;c++)s.push(a[c]);else s.push(a)}e=s}return e},t.Pipeline.prototype.runString=function(e,r){var n=new t.Token(e,r);return this.run([n]).map(function(i){return i.toString()})},t.Pipeline.prototype.reset=function(){this._stack=[]},t.Pipeline.prototype.toJSON=function(){return this._stack.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})};/*!
19
+ * lunr.Vector
20
+ * Copyright (C) 2020 Oliver Nightingale
21
+ */t.Vector=function(e){this._magnitude=0,this.elements=e||[]},t.Vector.prototype.positionForIndex=function(e){if(this.elements.length==0)return 0;for(var r=0,n=this.elements.length/2,i=n-r,s=Math.floor(i/2),o=this.elements[s*2];i>1&&(o<e&&(r=s),o>e&&(n=s),o!=e);)i=n-r,s=r+Math.floor(i/2),o=this.elements[s*2];if(o==e||o>e)return s*2;if(o<e)return(s+1)*2},t.Vector.prototype.insert=function(e,r){this.upsert(e,r,function(){throw"duplicate index"})},t.Vector.prototype.upsert=function(e,r,n){this._magnitude=0;var i=this.positionForIndex(e);this.elements[i]==e?this.elements[i+1]=n(this.elements[i+1],r):this.elements.splice(i,0,e,r)},t.Vector.prototype.magnitude=function(){if(this._magnitude)return this._magnitude;for(var e=0,r=this.elements.length,n=1;n<r;n+=2){var i=this.elements[n];e+=i*i}return this._magnitude=Math.sqrt(e)},t.Vector.prototype.dot=function(e){for(var r=0,n=this.elements,i=e.elements,s=n.length,o=i.length,a=0,c=0,u=0,l=0;u<s&&l<o;)a=n[u],c=i[l],a<c?u+=2:a>c?l+=2:a==c&&(r+=n[u+1]*i[l+1],u+=2,l+=2);return r},t.Vector.prototype.similarity=function(e){return this.dot(e)/this.magnitude()||0},t.Vector.prototype.toArray=function(){for(var e=new Array(this.elements.length/2),r=1,n=0;r<this.elements.length;r+=2,n++)e[n]=this.elements[r];return e},t.Vector.prototype.toJSON=function(){return this.elements};/*!
22
+ * lunr.stemmer
23
+ * Copyright (C) 2020 Oliver Nightingale
24
+ * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
25
+ */t.stemmer=function(){var e={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},r={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",s=n+"[^aeiouy]*",o=i+"[aeiou]*",a="^("+s+")?"+o+s,c="^("+s+")?"+o+s+"("+o+")?$",u="^("+s+")?"+o+s+o+s,l="^("+s+")?"+i,g=new RegExp(a),f=new RegExp(u),v=new RegExp(c),m=new RegExp(l),x=/^(.+?)(ss|i)es$/,d=/^(.+?)([^s])s$/,y=/^(.+?)eed$/,b=/^(.+?)(ed|ing)$/,E=/.$/,w=/(at|bl|iz)$/,R=new RegExp("([^aeiouylsz])\\1$"),j=new RegExp("^"+s+i+"[^aeiouwxy]$"),_=/^(.+?[^aeiou])y$/,D=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,N=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,C=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,V=/^(.+?)(s|t)(ion)$/,P=/^(.+?)e$/,z=/ll$/,$=new RegExp("^"+s+i+"[^aeiouwxy]$"),M=function(h){var S,k,L,p,T,O,F;if(h.length<3)return h;if(L=h.substr(0,1),L=="y"&&(h=L.toUpperCase()+h.substr(1)),p=x,T=d,p.test(h)?h=h.replace(p,"$1$2"):T.test(h)&&(h=h.replace(T,"$1$2")),p=y,T=b,p.test(h)){var Q=p.exec(h);p=g,p.test(Q[1])&&(p=E,h=h.replace(p,""))}else if(T.test(h)){var Q=T.exec(h);S=Q[1],T=m,T.test(S)&&(h=S,T=w,O=R,F=j,T.test(h)?h=h+"e":O.test(h)?(p=E,h=h.replace(p,"")):F.test(h)&&(h=h+"e"))}if(p=_,p.test(h)){var Q=p.exec(h);S=Q[1],h=S+"i"}if(p=D,p.test(h)){var Q=p.exec(h);S=Q[1],k=Q[2],p=g,p.test(S)&&(h=S+e[k])}if(p=N,p.test(h)){var Q=p.exec(h);S=Q[1],k=Q[2],p=g,p.test(S)&&(h=S+r[k])}if(p=C,T=V,p.test(h)){var Q=p.exec(h);S=Q[1],p=f,p.test(S)&&(h=S)}else if(T.test(h)){var Q=T.exec(h);S=Q[1]+Q[2],T=f,T.test(S)&&(h=S)}if(p=P,p.test(h)){var Q=p.exec(h);S=Q[1],p=f,T=v,O=$,(p.test(S)||T.test(S)&&!O.test(S))&&(h=S)}return p=z,T=f,p.test(h)&&T.test(h)&&(p=E,h=h.replace(p,"")),L=="y"&&(h=L.toLowerCase()+h.substr(1)),h};return function(I){return I.update(M)}}(),t.Pipeline.registerFunction(t.stemmer,"stemmer");/*!
26
+ * lunr.stopWordFilter
27
+ * Copyright (C) 2020 Oliver Nightingale
28
+ */t.generateStopWordFilter=function(e){var r=e.reduce(function(n,i){return n[i]=i,n},{});return function(n){if(n&&r[n.toString()]!==n.toString())return n}},t.stopWordFilter=t.generateStopWordFilter(["a","able","about","across","after","all","almost","also","am","among","an","and","any","are","as","at","be","because","been","but","by","can","cannot","could","dear","did","do","does","either","else","ever","every","for","from","get","got","had","has","have","he","her","hers","him","his","how","however","i","if","in","into","is","it","its","just","least","let","like","likely","may","me","might","most","must","my","neither","no","nor","not","of","off","often","on","only","or","other","our","own","rather","said","say","says","she","should","since","so","some","than","that","the","their","them","then","there","these","they","this","tis","to","too","twas","us","wants","was","we","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"]),t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter");/*!
29
+ * lunr.trimmer
30
+ * Copyright (C) 2020 Oliver Nightingale
31
+ */t.trimmer=function(e){return e.update(function(r){return r.replace(/^\W+/,"").replace(/\W+$/,"")})},t.Pipeline.registerFunction(t.trimmer,"trimmer");/*!
32
+ * lunr.TokenSet
33
+ * Copyright (C) 2020 Oliver Nightingale
34
+ */t.TokenSet=function(){this.final=!1,this.edges={},this.id=t.TokenSet._nextId,t.TokenSet._nextId+=1},t.TokenSet._nextId=1,t.TokenSet.fromArray=function(e){for(var r=new t.TokenSet.Builder,n=0,i=e.length;n<i;n++)r.insert(e[n]);return r.finish(),r.root},t.TokenSet.fromClause=function(e){return"editDistance"in e?t.TokenSet.fromFuzzyString(e.term,e.editDistance):t.TokenSet.fromString(e.term)},t.TokenSet.fromFuzzyString=function(e,r){for(var n=new t.TokenSet,i=[{node:n,editsRemaining:r,str:e}];i.length;){var s=i.pop();if(s.str.length>0){var o=s.str.charAt(0),a;o in s.node.edges?a=s.node.edges[o]:(a=new t.TokenSet,s.node.edges[o]=a),s.str.length==1&&(a.final=!0),i.push({node:a,editsRemaining:s.editsRemaining,str:s.str.slice(1)})}if(s.editsRemaining!=0){if("*"in s.node.edges)var c=s.node.edges["*"];else{var c=new t.TokenSet;s.node.edges["*"]=c}if(s.str.length==0&&(c.final=!0),i.push({node:c,editsRemaining:s.editsRemaining-1,str:s.str}),s.str.length>1&&i.push({node:s.node,editsRemaining:s.editsRemaining-1,str:s.str.slice(1)}),s.str.length==1&&(s.node.final=!0),s.str.length>=1){if("*"in s.node.edges)var u=s.node.edges["*"];else{var u=new t.TokenSet;s.node.edges["*"]=u}s.str.length==1&&(u.final=!0),i.push({node:u,editsRemaining:s.editsRemaining-1,str:s.str.slice(1)})}if(s.str.length>1){var l=s.str.charAt(0),g=s.str.charAt(1),f;g in s.node.edges?f=s.node.edges[g]:(f=new t.TokenSet,s.node.edges[g]=f),s.str.length==1&&(f.final=!0),i.push({node:f,editsRemaining:s.editsRemaining-1,str:l+s.str.slice(2)})}}}return n},t.TokenSet.fromString=function(e){for(var r=new t.TokenSet,n=r,i=0,s=e.length;i<s;i++){var o=e[i],a=i==s-1;if(o=="*")r.edges[o]=r,r.final=a;else{var c=new t.TokenSet;c.final=a,r.edges[o]=c,r=c}}return n},t.TokenSet.prototype.toArray=function(){for(var e=[],r=[{prefix:"",node:this}];r.length;){var n=r.pop(),i=Object.keys(n.node.edges),s=i.length;n.node.final&&(n.prefix.charAt(0),e.push(n.prefix));for(var o=0;o<s;o++){var a=i[o];r.push({prefix:n.prefix.concat(a),node:n.node.edges[a]})}}return e},t.TokenSet.prototype.toString=function(){if(this._str)return this._str;for(var e=this.final?"1":"0",r=Object.keys(this.edges).sort(),n=r.length,i=0;i<n;i++){var s=r[i],o=this.edges[s];e=e+s+o.id}return e},t.TokenSet.prototype.intersect=function(e){for(var r=new t.TokenSet,n=void 0,i=[{qNode:e,output:r,node:this}];i.length;){n=i.pop();for(var s=Object.keys(n.qNode.edges),o=s.length,a=Object.keys(n.node.edges),c=a.length,u=0;u<o;u++)for(var l=s[u],g=0;g<c;g++){var f=a[g];if(f==l||l=="*"){var v=n.node.edges[f],m=n.qNode.edges[l],x=v.final&&m.final,d=void 0;f in n.output.edges?(d=n.output.edges[f],d.final=d.final||x):(d=new t.TokenSet,d.final=x,n.output.edges[f]=d),i.push({qNode:m,output:d,node:v})}}}return r},t.TokenSet.Builder=function(){this.previousWord="",this.root=new t.TokenSet,this.uncheckedNodes=[],this.minimizedNodes={}},t.TokenSet.Builder.prototype.insert=function(e){var r,n=0;if(e<this.previousWord)throw new Error("Out of order word insertion");for(var i=0;i<e.length&&i<this.previousWord.length&&e[i]==this.previousWord[i];i++)n++;this.minimize(n),this.uncheckedNodes.length==0?r=this.root:r=this.uncheckedNodes[this.uncheckedNodes.length-1].child;for(var i=n;i<e.length;i++){var s=new t.TokenSet,o=e[i];r.edges[o]=s,this.uncheckedNodes.push({parent:r,char:o,child:s}),r=s}r.final=!0,this.previousWord=e},t.TokenSet.Builder.prototype.finish=function(){this.minimize(0)},t.TokenSet.Builder.prototype.minimize=function(e){for(var r=this.uncheckedNodes.length-1;r>=e;r--){var n=this.uncheckedNodes[r],i=n.child.toString();i in this.minimizedNodes?n.parent.edges[n.char]=this.minimizedNodes[i]:(n.child._str=i,this.minimizedNodes[i]=n.child),this.uncheckedNodes.pop()}};/*!
35
+ * lunr.Index
36
+ * Copyright (C) 2020 Oliver Nightingale
37
+ */t.Index=function(e){this.invertedIndex=e.invertedIndex,this.fieldVectors=e.fieldVectors,this.tokenSet=e.tokenSet,this.fields=e.fields,this.pipeline=e.pipeline},t.Index.prototype.search=function(e){return this.query(function(r){var n=new t.QueryParser(e,r);n.parse()})},t.Index.prototype.query=function(e){for(var r=new t.Query(this.fields),n=Object.create(null),i=Object.create(null),s=Object.create(null),o=Object.create(null),a=Object.create(null),c=0;c<this.fields.length;c++)i[this.fields[c]]=new t.Vector;e.call(r,r);for(var c=0;c<r.clauses.length;c++){var u=r.clauses[c],l=null,g=t.Set.empty;u.usePipeline?l=this.pipeline.runString(u.term,{fields:u.fields}):l=[u.term];for(var f=0;f<l.length;f++){var v=l[f];u.term=v;var m=t.TokenSet.fromClause(u),x=this.tokenSet.intersect(m).toArray();if(x.length===0&&u.presence===t.Query.presence.REQUIRED){for(var d=0;d<u.fields.length;d++){var y=u.fields[d];o[y]=t.Set.empty}break}for(var b=0;b<x.length;b++)for(var E=x[b],w=this.invertedIndex[E],R=w._index,d=0;d<u.fields.length;d++){var y=u.fields[d],j=w[y],_=Object.keys(j),D=E+"/"+y,N=new t.Set(_);if(u.presence==t.Query.presence.REQUIRED&&(g=g.union(N),o[y]===void 0&&(o[y]=t.Set.complete)),u.presence==t.Query.presence.PROHIBITED){a[y]===void 0&&(a[y]=t.Set.empty),a[y]=a[y].union(N);continue}if(i[y].upsert(R,u.boost,function(ge,ye){return ge+ye}),!s[D]){for(var C=0;C<_.length;C++){var V=_[C],P=new t.FieldRef(V,y),z=j[V],$;($=n[P])===void 0?n[P]=new t.MatchData(E,y,z):$.add(E,y,z)}s[D]=!0}}}if(u.presence===t.Query.presence.REQUIRED)for(var d=0;d<u.fields.length;d++){var y=u.fields[d];o[y]=o[y].intersect(g)}}for(var M=t.Set.complete,I=t.Set.empty,c=0;c<this.fields.length;c++){var y=this.fields[c];o[y]&&(M=M.intersect(o[y])),a[y]&&(I=I.union(a[y]))}var h=Object.keys(n),S=[],k=Object.create(null);if(r.isNegated()){h=Object.keys(this.fieldVectors);for(var c=0;c<h.length;c++){var P=h[c],L=t.FieldRef.fromString(P);n[P]=new t.MatchData}}for(var c=0;c<h.length;c++){var L=t.FieldRef.fromString(h[c]),p=L.docRef;if(M.contains(p)&&!I.contains(p)){var T=this.fieldVectors[L],O=i[L.fieldName].similarity(T),F;if((F=k[p])!==void 0)F.score+=O,F.matchData.combine(n[L]);else{var Q={ref:p,score:O,matchData:n[L]};k[p]=Q,S.push(Q)}}}return S.sort(function(de,pe){return pe.score-de.score})},t.Index.prototype.toJSON=function(){var e=Object.keys(this.invertedIndex).sort().map(function(n){return[n,this.invertedIndex[n]]},this),r=Object.keys(this.fieldVectors).map(function(n){return[n,this.fieldVectors[n].toJSON()]},this);return{version:t.version,fields:this.fields,fieldVectors:r,invertedIndex:e,pipeline:this.pipeline.toJSON()}},t.Index.load=function(e){var r={},n={},i=e.fieldVectors,s=Object.create(null),o=e.invertedIndex,a=new t.TokenSet.Builder,c=t.Pipeline.load(e.pipeline);e.version!=t.version&&t.utils.warn("Version mismatch when loading serialised index. Current version of lunr '"+t.version+"' does not match serialized index '"+e.version+"'");for(var u=0;u<i.length;u++){var l=i[u],g=l[0],f=l[1];n[g]=new t.Vector(f)}for(var u=0;u<o.length;u++){var l=o[u],v=l[0],m=l[1];a.insert(v),s[v]=m}return a.finish(),r.fields=e.fields,r.fieldVectors=n,r.invertedIndex=s,r.tokenSet=a.root,r.pipeline=c,new t.Index(r)};/*!
38
+ * lunr.Builder
39
+ * Copyright (C) 2020 Oliver Nightingale
40
+ */t.Builder=function(){this._ref="id",this._fields=Object.create(null),this._documents=Object.create(null),this.invertedIndex=Object.create(null),this.fieldTermFrequencies={},this.fieldLengths={},this.tokenizer=t.tokenizer,this.pipeline=new t.Pipeline,this.searchPipeline=new t.Pipeline,this.documentCount=0,this._b=.75,this._k1=1.2,this.termIndex=0,this.metadataWhitelist=[]},t.Builder.prototype.ref=function(e){this._ref=e},t.Builder.prototype.field=function(e,r){if(/\//.test(e))throw new RangeError("Field '"+e+"' contains illegal character '/'");this._fields[e]=r||{}},t.Builder.prototype.b=function(e){e<0?this._b=0:e>1?this._b=1:this._b=e},t.Builder.prototype.k1=function(e){this._k1=e},t.Builder.prototype.add=function(e,r){var n=e[this._ref],i=Object.keys(this._fields);this._documents[n]=r||{},this.documentCount+=1;for(var s=0;s<i.length;s++){var o=i[s],a=this._fields[o].extractor,c=a?a(e):e[o],u=this.tokenizer(c,{fields:[o]}),l=this.pipeline.run(u),g=new t.FieldRef(n,o),f=Object.create(null);this.fieldTermFrequencies[g]=f,this.fieldLengths[g]=0,this.fieldLengths[g]+=l.length;for(var v=0;v<l.length;v++){var m=l[v];if(f[m]==null&&(f[m]=0),f[m]+=1,this.invertedIndex[m]==null){var x=Object.create(null);x._index=this.termIndex,this.termIndex+=1;for(var d=0;d<i.length;d++)x[i[d]]=Object.create(null);this.invertedIndex[m]=x}this.invertedIndex[m][o][n]==null&&(this.invertedIndex[m][o][n]=Object.create(null));for(var y=0;y<this.metadataWhitelist.length;y++){var b=this.metadataWhitelist[y],E=m.metadata[b];this.invertedIndex[m][o][n][b]==null&&(this.invertedIndex[m][o][n][b]=[]),this.invertedIndex[m][o][n][b].push(E)}}}},t.Builder.prototype.calculateAverageFieldLengths=function(){for(var e=Object.keys(this.fieldLengths),r=e.length,n={},i={},s=0;s<r;s++){var o=t.FieldRef.fromString(e[s]),a=o.fieldName;i[a]||(i[a]=0),i[a]+=1,n[a]||(n[a]=0),n[a]+=this.fieldLengths[o]}for(var c=Object.keys(this._fields),s=0;s<c.length;s++){var u=c[s];n[u]=n[u]/i[u]}this.averageFieldLength=n},t.Builder.prototype.createFieldVectors=function(){for(var e={},r=Object.keys(this.fieldTermFrequencies),n=r.length,i=Object.create(null),s=0;s<n;s++){for(var o=t.FieldRef.fromString(r[s]),a=o.fieldName,c=this.fieldLengths[o],u=new t.Vector,l=this.fieldTermFrequencies[o],g=Object.keys(l),f=g.length,v=this._fields[a].boost||1,m=this._documents[o.docRef].boost||1,x=0;x<f;x++){var d=g[x],y=l[d],b=this.invertedIndex[d]._index,E,w,R;i[d]===void 0?(E=t.idf(this.invertedIndex[d],this.documentCount),i[d]=E):E=i[d],w=E*((this._k1+1)*y)/(this._k1*(1-this._b+this._b*(c/this.averageFieldLength[a]))+y),w*=v,w*=m,R=Math.round(w*1e3)/1e3,u.insert(b,R)}e[o]=u}this.fieldVectors=e},t.Builder.prototype.createTokenSet=function(){this.tokenSet=t.TokenSet.fromArray(Object.keys(this.invertedIndex).sort())},t.Builder.prototype.build=function(){return this.calculateAverageFieldLengths(),this.createFieldVectors(),this.createTokenSet(),new t.Index({invertedIndex:this.invertedIndex,fieldVectors:this.fieldVectors,tokenSet:this.tokenSet,fields:Object.keys(this._fields),pipeline:this.searchPipeline})},t.Builder.prototype.use=function(e){var r=Array.prototype.slice.call(arguments,1);r.unshift(this),e.apply(this,r)},t.MatchData=function(e,r,n){for(var i=Object.create(null),s=Object.keys(n||{}),o=0;o<s.length;o++){var a=s[o];i[a]=n[a].slice()}this.metadata=Object.create(null),e!==void 0&&(this.metadata[e]=Object.create(null),this.metadata[e][r]=i)},t.MatchData.prototype.combine=function(e){for(var r=Object.keys(e.metadata),n=0;n<r.length;n++){var i=r[n],s=Object.keys(e.metadata[i]);this.metadata[i]==null&&(this.metadata[i]=Object.create(null));for(var o=0;o<s.length;o++){var a=s[o],c=Object.keys(e.metadata[i][a]);this.metadata[i][a]==null&&(this.metadata[i][a]=Object.create(null));for(var u=0;u<c.length;u++){var l=c[u];this.metadata[i][a][l]==null?this.metadata[i][a][l]=e.metadata[i][a][l]:this.metadata[i][a][l]=this.metadata[i][a][l].concat(e.metadata[i][a][l])}}}},t.MatchData.prototype.add=function(e,r,n){if(!(e in this.metadata)){this.metadata[e]=Object.create(null),this.metadata[e][r]=n;return}if(!(r in this.metadata[e])){this.metadata[e][r]=n;return}for(var i=Object.keys(n),s=0;s<i.length;s++){var o=i[s];o in this.metadata[e][r]?this.metadata[e][r][o]=this.metadata[e][r][o].concat(n[o]):this.metadata[e][r][o]=n[o]}},t.Query=function(e){this.clauses=[],this.allFields=e},t.Query.wildcard=new String("*"),t.Query.wildcard.NONE=0,t.Query.wildcard.LEADING=1,t.Query.wildcard.TRAILING=2,t.Query.presence={OPTIONAL:1,REQUIRED:2,PROHIBITED:3},t.Query.prototype.clause=function(e){return"fields"in e||(e.fields=this.allFields),"boost"in e||(e.boost=1),"usePipeline"in e||(e.usePipeline=!0),"wildcard"in e||(e.wildcard=t.Query.wildcard.NONE),e.wildcard&t.Query.wildcard.LEADING&&e.term.charAt(0)!=t.Query.wildcard&&(e.term="*"+e.term),e.wildcard&t.Query.wildcard.TRAILING&&e.term.slice(-1)!=t.Query.wildcard&&(e.term=""+e.term+"*"),"presence"in e||(e.presence=t.Query.presence.OPTIONAL),this.clauses.push(e),this},t.Query.prototype.isNegated=function(){for(var e=0;e<this.clauses.length;e++)if(this.clauses[e].presence!=t.Query.presence.PROHIBITED)return!1;return!0},t.Query.prototype.term=function(e,r){if(Array.isArray(e))return e.forEach(function(i){this.term(i,t.utils.clone(r))},this),this;var n=r||{};return n.term=e.toString(),this.clause(n),this},t.QueryParseError=function(e,r,n){this.name="QueryParseError",this.message=e,this.start=r,this.end=n},t.QueryParseError.prototype=new Error,t.QueryLexer=function(e){this.lexemes=[],this.str=e,this.length=e.length,this.pos=0,this.start=0,this.escapeCharPositions=[]},t.QueryLexer.prototype.run=function(){for(var e=t.QueryLexer.lexText;e;)e=e(this)},t.QueryLexer.prototype.sliceString=function(){for(var e=[],r=this.start,n=this.pos,i=0;i<this.escapeCharPositions.length;i++)n=this.escapeCharPositions[i],e.push(this.str.slice(r,n)),r=n+1;return e.push(this.str.slice(r,this.pos)),this.escapeCharPositions.length=0,e.join("")},t.QueryLexer.prototype.emit=function(e){this.lexemes.push({type:e,str:this.sliceString(),start:this.start,end:this.pos}),this.start=this.pos},t.QueryLexer.prototype.escapeCharacter=function(){this.escapeCharPositions.push(this.pos-1),this.pos+=1},t.QueryLexer.prototype.next=function(){if(this.pos>=this.length)return t.QueryLexer.EOS;var e=this.str.charAt(this.pos);return this.pos+=1,e},t.QueryLexer.prototype.width=function(){return this.pos-this.start},t.QueryLexer.prototype.ignore=function(){this.start==this.pos&&(this.pos+=1),this.start=this.pos},t.QueryLexer.prototype.backup=function(){this.pos-=1},t.QueryLexer.prototype.acceptDigitRun=function(){var e,r;do e=this.next(),r=e.charCodeAt(0);while(r>47&&r<58);e!=t.QueryLexer.EOS&&this.backup()},t.QueryLexer.prototype.more=function(){return this.pos<this.length},t.QueryLexer.EOS="EOS",t.QueryLexer.FIELD="FIELD",t.QueryLexer.TERM="TERM",t.QueryLexer.EDIT_DISTANCE="EDIT_DISTANCE",t.QueryLexer.BOOST="BOOST",t.QueryLexer.PRESENCE="PRESENCE",t.QueryLexer.lexField=function(e){return e.backup(),e.emit(t.QueryLexer.FIELD),e.ignore(),t.QueryLexer.lexText},t.QueryLexer.lexTerm=function(e){if(e.width()>1&&(e.backup(),e.emit(t.QueryLexer.TERM)),e.ignore(),e.more())return t.QueryLexer.lexText},t.QueryLexer.lexEditDistance=function(e){return e.ignore(),e.acceptDigitRun(),e.emit(t.QueryLexer.EDIT_DISTANCE),t.QueryLexer.lexText},t.QueryLexer.lexBoost=function(e){return e.ignore(),e.acceptDigitRun(),e.emit(t.QueryLexer.BOOST),t.QueryLexer.lexText},t.QueryLexer.lexEOS=function(e){e.width()>0&&e.emit(t.QueryLexer.TERM)},t.QueryLexer.termSeparator=t.tokenizer.separator,t.QueryLexer.lexText=function(e){for(;;){var r=e.next();if(r==t.QueryLexer.EOS)return t.QueryLexer.lexEOS;if(r.charCodeAt(0)==92){e.escapeCharacter();continue}if(r==":")return t.QueryLexer.lexField;if(r=="~")return e.backup(),e.width()>0&&e.emit(t.QueryLexer.TERM),t.QueryLexer.lexEditDistance;if(r=="^")return e.backup(),e.width()>0&&e.emit(t.QueryLexer.TERM),t.QueryLexer.lexBoost;if(r=="+"&&e.width()===1||r=="-"&&e.width()===1)return e.emit(t.QueryLexer.PRESENCE),t.QueryLexer.lexText;if(r.match(t.QueryLexer.termSeparator))return t.QueryLexer.lexTerm}},t.QueryParser=function(e,r){this.lexer=new t.QueryLexer(e),this.query=r,this.currentClause={},this.lexemeIdx=0},t.QueryParser.prototype.parse=function(){this.lexer.run(),this.lexemes=this.lexer.lexemes;for(var e=t.QueryParser.parseClause;e;)e=e(this);return this.query},t.QueryParser.prototype.peekLexeme=function(){return this.lexemes[this.lexemeIdx]},t.QueryParser.prototype.consumeLexeme=function(){var e=this.peekLexeme();return this.lexemeIdx+=1,e},t.QueryParser.prototype.nextClause=function(){var e=this.currentClause;this.query.clause(e),this.currentClause={}},t.QueryParser.parseClause=function(e){var r=e.peekLexeme();if(r!=null)switch(r.type){case t.QueryLexer.PRESENCE:return t.QueryParser.parsePresence;case t.QueryLexer.FIELD:return t.QueryParser.parseField;case t.QueryLexer.TERM:return t.QueryParser.parseTerm;default:var n="expected either a field or a term, found "+r.type;throw r.str.length>=1&&(n+=" with value '"+r.str+"'"),new t.QueryParseError(n,r.start,r.end)}},t.QueryParser.parsePresence=function(e){var r=e.consumeLexeme();if(r!=null){switch(r.str){case"-":e.currentClause.presence=t.Query.presence.PROHIBITED;break;case"+":e.currentClause.presence=t.Query.presence.REQUIRED;break;default:var n="unrecognised presence operator'"+r.str+"'";throw new t.QueryParseError(n,r.start,r.end)}var i=e.peekLexeme();if(i==null){var n="expecting term or field, found nothing";throw new t.QueryParseError(n,r.start,r.end)}switch(i.type){case t.QueryLexer.FIELD:return t.QueryParser.parseField;case t.QueryLexer.TERM:return t.QueryParser.parseTerm;default:var n="expecting term or field, found '"+i.type+"'";throw new t.QueryParseError(n,i.start,i.end)}}},t.QueryParser.parseField=function(e){var r=e.consumeLexeme();if(r!=null){if(e.query.allFields.indexOf(r.str)==-1){var n=e.query.allFields.map(function(o){return"'"+o+"'"}).join(", "),i="unrecognised field '"+r.str+"', possible fields: "+n;throw new t.QueryParseError(i,r.start,r.end)}e.currentClause.fields=[r.str];var s=e.peekLexeme();if(s==null){var i="expecting term, found nothing";throw new t.QueryParseError(i,r.start,r.end)}switch(s.type){case t.QueryLexer.TERM:return t.QueryParser.parseTerm;default:var i="expecting term, found '"+s.type+"'";throw new t.QueryParseError(i,s.start,s.end)}}},t.QueryParser.parseTerm=function(e){var r=e.consumeLexeme();if(r!=null){e.currentClause.term=r.str.toLowerCase(),r.str.indexOf("*")!=-1&&(e.currentClause.usePipeline=!1);var n=e.peekLexeme();if(n==null){e.nextClause();return}switch(n.type){case t.QueryLexer.TERM:return e.nextClause(),t.QueryParser.parseTerm;case t.QueryLexer.FIELD:return e.nextClause(),t.QueryParser.parseField;case t.QueryLexer.EDIT_DISTANCE:return t.QueryParser.parseEditDistance;case t.QueryLexer.BOOST:return t.QueryParser.parseBoost;case t.QueryLexer.PRESENCE:return e.nextClause(),t.QueryParser.parsePresence;default:var i="Unexpected lexeme type '"+n.type+"'";throw new t.QueryParseError(i,n.start,n.end)}}},t.QueryParser.parseEditDistance=function(e){var r=e.consumeLexeme();if(r!=null){var n=parseInt(r.str,10);if(isNaN(n)){var i="edit distance must be numeric";throw new t.QueryParseError(i,r.start,r.end)}e.currentClause.editDistance=n;var s=e.peekLexeme();if(s==null){e.nextClause();return}switch(s.type){case t.QueryLexer.TERM:return e.nextClause(),t.QueryParser.parseTerm;case t.QueryLexer.FIELD:return e.nextClause(),t.QueryParser.parseField;case t.QueryLexer.EDIT_DISTANCE:return t.QueryParser.parseEditDistance;case t.QueryLexer.BOOST:return t.QueryParser.parseBoost;case t.QueryLexer.PRESENCE:return e.nextClause(),t.QueryParser.parsePresence;default:var i="Unexpected lexeme type '"+s.type+"'";throw new t.QueryParseError(i,s.start,s.end)}}},t.QueryParser.parseBoost=function(e){var r=e.consumeLexeme();if(r!=null){var n=parseInt(r.str,10);if(isNaN(n)){var i="boost must be numeric";throw new t.QueryParseError(i,r.start,r.end)}e.currentClause.boost=n;var s=e.peekLexeme();if(s==null){e.nextClause();return}switch(s.type){case t.QueryLexer.TERM:return e.nextClause(),t.QueryParser.parseTerm;case t.QueryLexer.FIELD:return e.nextClause(),t.QueryParser.parseField;case t.QueryLexer.EDIT_DISTANCE:return t.QueryParser.parseEditDistance;case t.QueryLexer.BOOST:return t.QueryParser.parseBoost;case t.QueryLexer.PRESENCE:return e.nextClause(),t.QueryParser.parsePresence;default:var i="Unexpected lexeme type '"+s.type+"'";throw new t.QueryParseError(i,s.start,s.end)}}},function(e,r){typeof define=="function"&&define.amd?define(r):typeof K=="object"?ee.exports=r():e.lunr=r()}(this,function(){return t})})()});var fe=we(te());function re(t,e=document){let r=Pe(t,e);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${t}" to be present`);return r}function Pe(t,e=document){return e.querySelector(t)||void 0}Object.entries||(Object.entries=function(t){let e=[];for(let r of Object.keys(t))e.push([r,t[r]]);return e});Object.values||(Object.values=function(t){let e=[];for(let r of Object.keys(t))e.push(t[r]);return e});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(t,e){typeof t=="object"?(this.scrollLeft=t.left,this.scrollTop=t.top):(this.scrollLeft=t,this.scrollTop=e)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...t){let e=this.parentNode;if(e){t.length===0&&e.removeChild(this);for(let r=t.length-1;r>=0;r--){let n=t[r];typeof n=="string"?n=document.createTextNode(n):n.parentNode&&n.parentNode.removeChild(n),r?e.insertBefore(this.previousSibling,n):e.replaceChild(n,this)}}}));function ne(t){let e=new Map;for(let r of t){let[n]=r.location.split("#"),i=e.get(n);typeof i=="undefined"?e.set(n,r):(e.set(r.location,r),r.parent=i)}return e}function W(t,e,r){var s;e=new RegExp(e,"g");let n,i=0;do{n=e.exec(t);let o=(s=n==null?void 0:n.index)!=null?s:t.length;if(i<o&&r(i,o),n){let[a]=n;i=n.index+a.length,a.length===0&&(e.lastIndex=n.index+1)}}while(n)}function ie(t,e){let r=0,n=0,i=0;for(let s=0;i<t.length;i++)t.charAt(i)==="<"&&i>n?e(r,1,n,n=i):t.charAt(i)===">"&&(t.charAt(n+1)==="/"?--s===0&&e(r++,2,n,i+1):t.charAt(i-1)!=="/"&&s++===0&&e(r,0,n,i+1),n=i+1);i>n&&e(r,1,n,i)}function se(t,e,r){return G([t],e,r).pop()}function G(t,e,r){let n=[0];for(let i=1;i<e.length;i++){let s=e[i-1],o=e[i],a=s[s.length-1]>>>2&1023,c=o[0]>>>12;n.push(+(a>c)+n[n.length-1])}return t.map((i,s)=>{let o=new Map;for(let c of r.sort((u,l)=>u-l)){let u=c&1048575,l=c>>>20;if(n[l]!==s)continue;let g=o.get(l);typeof g=="undefined"&&o.set(l,g=[]),g.push(u)}if(o.size===0)return i;let a=[];for(let[c,u]of o){let l=e[c],g=l[0]>>>12,f=l[l.length-1]>>>12,v=l[l.length-1]>>>2&1023,m=i.slice(g,f+v);for(let x of u.sort((d,y)=>y-d)){let d=(l[x]>>>12)-g,y=(l[x]>>>2&1023)+d;m=[m.slice(0,d),"<mark>",m.slice(d,y),"</mark>",m.slice(y)].join("")}if(a.push(m)===2)break}return a.join("")})}function oe(t){let e=[];if(typeof t=="undefined")return e;let r=Array.isArray(t)?t:[t];for(let n=0;n<r.length;n++){let i=lunr.tokenizer.table,s=i.length;ie(r[n],(o,a,c,u)=>{var l;switch(i[l=o+=s]||(i[l]=[]),a){case 0:case 2:i[o].push(c<<12|u-c<<2|a);break;case 1:let g=r[n].slice(c,u);W(g,lunr.tokenizer.separator,(f,v)=>{if(typeof lunr.segmenter!="undefined"){let m=g.slice(f,v);if(/^[MHIK]$/.test(lunr.segmenter.ctype_(m))){let x=lunr.segmenter.segment(m);for(let d=0,y=0;d<x.length;d++)i[o]||(i[o]=[]),i[o].push(c+f+y<<12|x[d].length<<2|a),e.push(new lunr.Token(x[d].toLowerCase(),{position:o<<20|i[o].length-1})),y+=x[d].length;return}}i[o].push(c+f<<12|v-f<<2|a),e.push(new lunr.Token(g.slice(f,v).toLowerCase(),{position:o<<20|i[o].length-1}))})}})}return e}function ae(t,e=r=>r){return t.trim().split(/"([^"]+)"/g).map((r,n)=>n&1?r.replace(/^\b|^(?![^\x00-\x7F]|$)|\s+/g," +"):r).join("").replace(/"|(?:^|\s+)[*+\-:^~]+(?=\s+|$)/g,"").split(/\s+/g).flatMap(e).map(r=>/([~^]$)/.test(r)?`${r}1`:r).map(r=>/(^[+-]|[~^]\d+$)/.test(r)?r:`${r}*`).join(" ")}function ue(t){return ae(t,e=>{let r=[],n=new lunr.QueryLexer(e);n.run();for(let{type:i,str:s,start:o,end:a}of n.lexemes)switch(i){case"FIELD":["title","text","tags"].includes(s)||(e=[e.slice(0,a)," ",e.slice(a+1)].join(""));break;case"TERM":W(s,lunr.tokenizer.separator,(...c)=>{r.push([e.slice(0,o),s.slice(...c),e.slice(a)].join(""))})}return r})}function ce(t){let e=new lunr.Query(["title","text","tags"]);new lunr.QueryParser(t,e).parse();for(let n of e.clauses)n.usePipeline=!0,n.term.startsWith("*")&&(n.wildcard=lunr.Query.wildcard.LEADING,n.term=n.term.slice(1)),n.term.endsWith("*")&&(n.wildcard=lunr.Query.wildcard.TRAILING,n.term=n.term.slice(0,-1));return e.clauses}function le(t,e){var i;let r=new Set(t),n={};for(let s=0;s<e.length;s++)for(let o of r)e[s].startsWith(o.term)&&(n[o.term]=!0,r.delete(o));for(let s of r)(i=lunr.stopWordFilter)!=null&&i.call(lunr,s.term)&&(n[s.term]=!1);return n}function ke(t){return e=>r=>{if(typeof r[e]=="undefined")return;let n=[r.location,e].join(":");return t.set(n,lunr.tokenizer.table=[]),r[e]}}function Oe(t,e){let[r,n]=[new Set(t),new Set(e)];return[...new Set([...r].filter(i=>!n.has(i)))]}var H=class{constructor({config:e,docs:r,options:n}){let i=ke(this.table=new Map);this.map=ne(r),this.options=n,this.index=lunr(function(){this.metadataWhitelist=["position"],this.b(0),e.lang.length===1&&e.lang[0]!=="en"?this.use(lunr[e.lang[0]]):e.lang.length>1&&this.use(lunr.multiLanguage(...e.lang)),this.tokenizer=oe,lunr.tokenizer.separator=new RegExp(e.separator),lunr.segmenter="TinySegmenter"in lunr?new lunr.TinySegmenter:void 0;let s=Oe(["trimmer","stopWordFilter","stemmer"],e.pipeline);for(let o of e.lang.map(a=>a==="en"?lunr:lunr[a]))for(let a of s)this.pipeline.remove(o[a]),this.searchPipeline.remove(o[a]);this.ref("location"),this.field("title",{boost:1e3,extractor:i("title")}),this.field("text",{boost:1,extractor:i("text")}),this.field("tags",{boost:1e6,extractor:i("tags")});for(let o of r)this.add(o,{boost:o.boost})})}search(e){if(e=ue(e),!e)return{items:[]};let r=ce(e).filter(s=>s.presence!==lunr.Query.presence.PROHIBITED),n=this.index.search(e).reduce((s,{ref:o,score:a,matchData:c})=>{let u=this.map.get(o);if(typeof u!="undefined"){u=A({},u),u.tags&&(u.tags=[...u.tags]);let l=le(r,Object.keys(c.metadata));for(let f of this.index.fields){if(typeof u[f]=="undefined")continue;let v=[];for(let d of Object.values(c.metadata))typeof d[f]!="undefined"&&v.push(...d[f].position);if(!v.length)continue;let m=this.table.get([u.location,f].join(":")),x=Array.isArray(u[f])?G:se;u[f]=x(u[f],m,v)}let g=+!u.parent+Object.values(l).filter(f=>f).length/Object.keys(l).length;s.push(q(A({},u),{score:a*(1+Z(g,2)),terms:l}))}return s},[]).sort((s,o)=>o.score-s.score).reduce((s,o)=>{let a=this.map.get(o.location);if(typeof a!="undefined"){let c=a.parent?a.parent.location:a.location;s.set(c,[...s.get(c)||[],o])}return s},new Map);for(let[s,o]of n)if(!o.find(a=>a.location===s)){let a=this.map.get(s);o.push(q(A({},a),{score:0,terms:{}}))}let i;if(this.options.suggest){let s=this.index.query(o=>{for(let a of r)o.term(a.term,{fields:["title"],presence:lunr.Query.presence.REQUIRED,wildcard:lunr.Query.wildcard.TRAILING})});i=s.length?Object.keys(s[0].matchData.metadata):[]}return A({items:[...n.values()]},typeof i!="undefined"&&{suggest:i})}};var he;function Re(t){return B(this,null,function*(){let e="../lunr";if(typeof parent!="undefined"&&"IFrameWorker"in parent){let n=re("script[src]"),[i]=n.src.split("/worker");e=e.replace("..",i)}let r=[];for(let n of t.lang){switch(n){case"ja":r.push(`${e}/tinyseg.js`);break;case"hi":case"th":r.push(`${e}/wordcut.js`);break}n!=="en"&&r.push(`${e}/min/lunr.${n}.min.js`)}t.lang.length>1&&r.push(`${e}/min/lunr.multi.min.js`),r.length&&(yield importScripts(`${e}/min/lunr.stemmer.support.min.js`,...r))})}function Ie(t){return B(this,null,function*(){switch(t.type){case 0:return yield Re(t.data.config),he=new H(t.data),{type:1};case 2:let e=t.data;try{return{type:3,data:he.search(e)}}catch(r){return console.warn(`Invalid query: ${e} \u2013 see https://bit.ly/2s3ChXG`),console.warn(r),{type:3,data:{items:[]}}}default:throw new TypeError("Invalid message type")}})}self.lunr=fe.default;addEventListener("message",t=>B(void 0,null,function*(){postMessage(yield Ie(t.data))}));})();
41
+ //# sourceMappingURL=search.208ed371.min.js.map
42
+