Commit
·
da332f1
0
Parent(s):
Duplicate from LightChen2333/OpenSLU
Browse filesCo-authored-by: Qiguang Chen <[email protected]>
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +34 -0
- .gitignore +136 -0
- README.md +14 -0
- __init__.py +1 -0
- accelerate/config-old.yaml +16 -0
- accelerate/config.yaml +22 -0
- app.py +63 -0
- common/__init__.py +1 -0
- common/config.py +192 -0
- common/global_pool.py +26 -0
- common/loader.py +332 -0
- common/logger.py +237 -0
- common/metric.py +346 -0
- common/model_manager.py +419 -0
- common/saver.py +80 -0
- common/tokenizer.py +323 -0
- common/utils.py +499 -0
- config/README.md +348 -0
- config/app.yaml +6 -0
- config/decoder/interaction/stack-propagation.yaml +1 -0
- config/examples/README.md +38 -0
- config/examples/from_pretrained.yaml +53 -0
- config/examples/from_pretrained_multi.yaml +55 -0
- config/examples/normal.yaml +70 -0
- config/examples/reload_to_train.yaml +71 -0
- config/reproduction/atis/bi-model.yaml +106 -0
- config/reproduction/atis/dca-net.yaml +88 -0
- config/reproduction/atis/deberta.yaml +67 -0
- config/reproduction/atis/electra.yaml +67 -0
- config/reproduction/atis/joint-bert.yaml +70 -0
- config/reproduction/atis/roberta.yaml +70 -0
- config/reproduction/atis/slot-gated.yaml +87 -0
- config/reproduction/atis/stack-propagation.yaml +109 -0
- config/reproduction/mix-atis/agif.yaml +133 -0
- config/reproduction/mix-atis/gl-gin.yaml +128 -0
- config/reproduction/mix-atis/vanilla.yaml +95 -0
- config/reproduction/mix-snips/agif.yaml +131 -0
- config/reproduction/mix-snips/gl-gin.yaml +131 -0
- config/reproduction/mix-snips/vanilla.yaml +95 -0
- config/reproduction/snips/bi-model.yaml +104 -0
- config/reproduction/snips/dca_net.yaml +88 -0
- config/reproduction/snips/deberta.yaml +70 -0
- config/reproduction/snips/electra.yaml +69 -0
- config/reproduction/snips/joint-bert.yaml +75 -0
- config/reproduction/snips/roberta.yaml +70 -0
- config/reproduction/snips/slot-gated.yaml +87 -0
- config/reproduction/snips/stack-propagation.yaml +105 -0
- config/visual.yaml +6 -0
- model/__init__.py +3 -0
- model/decoder/__init__.py +5 -0
.gitattributes
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
.idea/
|
| 6 |
+
wandb/*
|
| 7 |
+
save/*
|
| 8 |
+
!save/.gitkeep
|
| 9 |
+
logs/*
|
| 10 |
+
!logs/.gitkeep
|
| 11 |
+
test
|
| 12 |
+
# C extensions
|
| 13 |
+
*.so
|
| 14 |
+
|
| 15 |
+
# Distribution / packaging
|
| 16 |
+
.Python
|
| 17 |
+
build/
|
| 18 |
+
develop-eggs/
|
| 19 |
+
dist/
|
| 20 |
+
downloads/
|
| 21 |
+
eggs/
|
| 22 |
+
.eggs/
|
| 23 |
+
lib/
|
| 24 |
+
lib64/
|
| 25 |
+
parts/
|
| 26 |
+
sdist/
|
| 27 |
+
var/
|
| 28 |
+
wheels/
|
| 29 |
+
pip-wheel-metadata/
|
| 30 |
+
share/python-wheels/
|
| 31 |
+
*.egg-info/
|
| 32 |
+
.installed.cfg
|
| 33 |
+
*.egg
|
| 34 |
+
MANIFEST
|
| 35 |
+
|
| 36 |
+
# PyInstaller
|
| 37 |
+
# Usually these files are written by a python script from a template
|
| 38 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 39 |
+
*.manifest
|
| 40 |
+
*.spec
|
| 41 |
+
|
| 42 |
+
# Installer logs
|
| 43 |
+
pip-log.txt
|
| 44 |
+
pip-delete-this-directory.txt
|
| 45 |
+
|
| 46 |
+
# Unit test / coverage reports
|
| 47 |
+
htmlcov/
|
| 48 |
+
.tox/
|
| 49 |
+
.nox/
|
| 50 |
+
.coverage
|
| 51 |
+
.coverage.*
|
| 52 |
+
.cache
|
| 53 |
+
nosetests.xml
|
| 54 |
+
coverage.xml
|
| 55 |
+
*.cover
|
| 56 |
+
*.py,cover
|
| 57 |
+
.hypothesis/
|
| 58 |
+
.pytest_cache/
|
| 59 |
+
|
| 60 |
+
# Translations
|
| 61 |
+
*.mo
|
| 62 |
+
*.pot
|
| 63 |
+
|
| 64 |
+
# Django stuff:
|
| 65 |
+
*.log
|
| 66 |
+
local_settings.py
|
| 67 |
+
db.sqlite3
|
| 68 |
+
db.sqlite3-journal
|
| 69 |
+
|
| 70 |
+
# Flask stuff:
|
| 71 |
+
instance/
|
| 72 |
+
.webassets-cache
|
| 73 |
+
|
| 74 |
+
# Scrapy stuff:
|
| 75 |
+
.scrapy
|
| 76 |
+
|
| 77 |
+
# Sphinx documentation
|
| 78 |
+
docs/_build/
|
| 79 |
+
|
| 80 |
+
# PyBuilder
|
| 81 |
+
target/
|
| 82 |
+
|
| 83 |
+
# Jupyter Notebook
|
| 84 |
+
.ipynb_checkpoints
|
| 85 |
+
|
| 86 |
+
# IPython
|
| 87 |
+
profile_default/
|
| 88 |
+
ipython_config.py
|
| 89 |
+
|
| 90 |
+
# pyenv
|
| 91 |
+
.python-version
|
| 92 |
+
|
| 93 |
+
# pipenv
|
| 94 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 95 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 96 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 97 |
+
# install all needed dependencies.
|
| 98 |
+
#Pipfile.lock
|
| 99 |
+
|
| 100 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
| 101 |
+
__pypackages__/
|
| 102 |
+
|
| 103 |
+
# Celery stuff
|
| 104 |
+
celerybeat-schedule
|
| 105 |
+
celerybeat.pid
|
| 106 |
+
|
| 107 |
+
# SageMath parsed files
|
| 108 |
+
*.sage.py
|
| 109 |
+
|
| 110 |
+
# Environments
|
| 111 |
+
.env
|
| 112 |
+
.venv
|
| 113 |
+
env/
|
| 114 |
+
venv/
|
| 115 |
+
ENV/
|
| 116 |
+
env.bak/
|
| 117 |
+
venv.bak/
|
| 118 |
+
|
| 119 |
+
# Spyder project settings
|
| 120 |
+
.spyderproject
|
| 121 |
+
.spyproject
|
| 122 |
+
|
| 123 |
+
# Rope project settings
|
| 124 |
+
.ropeproject
|
| 125 |
+
|
| 126 |
+
# mkdocs documentation
|
| 127 |
+
/site
|
| 128 |
+
|
| 129 |
+
# mypy
|
| 130 |
+
.mypy_cache/
|
| 131 |
+
.dmypy.json
|
| 132 |
+
dmypy.json
|
| 133 |
+
|
| 134 |
+
# Pyre type checker
|
| 135 |
+
.pyre/
|
| 136 |
+
.vscode/
|
README.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
title: OpenSLU
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 3.18.0
|
| 6 |
+
app_file: app.py
|
| 7 |
+
emoji: 🚀
|
| 8 |
+
colorFrom: blue
|
| 9 |
+
colorTo: purple
|
| 10 |
+
pinned: false
|
| 11 |
+
tags:
|
| 12 |
+
- making-demos
|
| 13 |
+
duplicated_from: LightChen2333/OpenSLU
|
| 14 |
+
---
|
__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
accelerate/config-old.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
deepspeed_config: {}
|
| 3 |
+
distributed_type: MULTI_GPU
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
fsdp_config: {}
|
| 6 |
+
gpu_ids: all
|
| 7 |
+
machine_rank: 0
|
| 8 |
+
main_process_ip: null
|
| 9 |
+
main_process_port: 9001
|
| 10 |
+
main_training_function: main
|
| 11 |
+
mixed_precision: 'no'
|
| 12 |
+
num_machines: 0
|
| 13 |
+
num_processes: 2
|
| 14 |
+
rdzv_backend: static
|
| 15 |
+
same_network: true
|
| 16 |
+
use_cpu: false
|
accelerate/config.yaml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
command_file: null
|
| 2 |
+
commands: null
|
| 3 |
+
compute_environment: LOCAL_MACHINE
|
| 4 |
+
deepspeed_config: {}
|
| 5 |
+
distributed_type: 'NO'
|
| 6 |
+
downcast_bf16: 'no'
|
| 7 |
+
dynamo_backend: 'NO'
|
| 8 |
+
fsdp_config: {}
|
| 9 |
+
gpu_ids: all
|
| 10 |
+
machine_rank: 0
|
| 11 |
+
main_process_ip: null
|
| 12 |
+
main_process_port: null
|
| 13 |
+
main_training_function: main
|
| 14 |
+
megatron_lm_config: {}
|
| 15 |
+
mixed_precision: 'no'
|
| 16 |
+
num_machines: 1
|
| 17 |
+
num_processes: 2
|
| 18 |
+
rdzv_backend: static
|
| 19 |
+
same_network: true
|
| 20 |
+
tpu_name: null
|
| 21 |
+
tpu_zone: null
|
| 22 |
+
use_cpu: false
|
app.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Author: Qiguang Chen
|
| 3 |
+
LastEditors: Qiguang Chen
|
| 4 |
+
Date: 2023-02-07 15:42:32
|
| 5 |
+
LastEditTime: 2023-02-19 21:04:03
|
| 6 |
+
Description:
|
| 7 |
+
|
| 8 |
+
'''
|
| 9 |
+
import argparse
|
| 10 |
+
import gradio as gr
|
| 11 |
+
|
| 12 |
+
from common.config import Config
|
| 13 |
+
from common.model_manager import ModelManager
|
| 14 |
+
from common.utils import str2bool
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
parser = argparse.ArgumentParser()
|
| 18 |
+
parser.add_argument('--config_path', '-cp', type=str, default="config/examples/from_pretrained.yaml")
|
| 19 |
+
parser.add_argument('--push_to_public', '-p', type=str2bool, nargs='?',
|
| 20 |
+
const=True, default=False,
|
| 21 |
+
help="Push to public network.")
|
| 22 |
+
args = parser.parse_args()
|
| 23 |
+
config = Config.load_from_yaml(args.config_path)
|
| 24 |
+
config.base["train"] = False
|
| 25 |
+
config.base["test"] = False
|
| 26 |
+
|
| 27 |
+
model_manager = ModelManager(config)
|
| 28 |
+
model_manager.init_model()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def text_analysis(text):
|
| 32 |
+
print(text)
|
| 33 |
+
data = model_manager.predict(text)
|
| 34 |
+
html = """<link href="https://cdn.staticfile.org/twitter-bootstrap/5.1.1/css/bootstrap.min.css" rel="stylesheet">
|
| 35 |
+
<script src="https://cdn.staticfile.org/twitter-bootstrap/5.1.1/js/bootstrap.bundle.min.js"></script>"""
|
| 36 |
+
html += """<div style="background: white; padding: 16px;"><b>Intent:</b>"""
|
| 37 |
+
|
| 38 |
+
for intent in data["intent"]:
|
| 39 |
+
html += """<button type="button" class="btn btn-white">
|
| 40 |
+
<span class="badge text-dark btn-light">""" + intent + """</span> </button>"""
|
| 41 |
+
html += """<br /> <b>Slot:</b>"""
|
| 42 |
+
for t, slot in zip(data["text"], data["slot"]):
|
| 43 |
+
html += """<button type="button" class="btn btn-white">"""+t+"""<span class="badge text-dark" style="background-color: rgb(255, 255, 255);
|
| 44 |
+
color: rgb(62 62 62);
|
| 45 |
+
box-shadow: 2px 2px 7px 1px rgba(210, 210, 210, 0.42);">"""+slot+\
|
| 46 |
+
"""</span>
|
| 47 |
+
</button>"""
|
| 48 |
+
html+="</div>"
|
| 49 |
+
return html
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
demo = gr.Interface(
|
| 53 |
+
text_analysis,
|
| 54 |
+
gr.Textbox(placeholder="Enter sentence here..."),
|
| 55 |
+
["html"],
|
| 56 |
+
examples=[
|
| 57 |
+
["i would like to find a flight from charlotte to las vegas that makes a stop in st louis"],
|
| 58 |
+
],
|
| 59 |
+
)
|
| 60 |
+
if args.push_to_public:
|
| 61 |
+
demo.launch(share=True)
|
| 62 |
+
else:
|
| 63 |
+
demo.launch()
|
common/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
common/config.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Author: Qiguang Chen
|
| 3 |
+
Date: 2023-01-11 10:39:26
|
| 4 |
+
LastEditors: Qiguang Chen
|
| 5 |
+
LastEditTime: 2023-02-15 17:58:53
|
| 6 |
+
Description: Configuration class to manage all process in OpenSLU like model construction, learning processing and so on.
|
| 7 |
+
|
| 8 |
+
'''
|
| 9 |
+
import re
|
| 10 |
+
|
| 11 |
+
from ruamel import yaml
|
| 12 |
+
import datetime
|
| 13 |
+
|
| 14 |
+
class Config(dict):
|
| 15 |
+
def __init__(self, *args, **kwargs):
|
| 16 |
+
""" init with dict as args
|
| 17 |
+
"""
|
| 18 |
+
dict.__init__(self, *args, **kwargs)
|
| 19 |
+
self.__dict__ = self
|
| 20 |
+
self.start_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
|
| 21 |
+
if not self.model.get("_from_pretrained_"):
|
| 22 |
+
self.__autowired()
|
| 23 |
+
|
| 24 |
+
@staticmethod
|
| 25 |
+
def load_from_yaml(file_path:str)->"Config":
|
| 26 |
+
"""load config files with path
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
file_path (str): yaml configuration file path.
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Config: config object.
|
| 33 |
+
"""
|
| 34 |
+
with open(file_path) as stream:
|
| 35 |
+
try:
|
| 36 |
+
return Config(yaml.safe_load(stream))
|
| 37 |
+
except yaml.YAMLError as exc:
|
| 38 |
+
print(exc)
|
| 39 |
+
|
| 40 |
+
@staticmethod
|
| 41 |
+
def load_from_args(args)->"Config":
|
| 42 |
+
""" load args to replace item value in config files assigned with '--config_path' or '--model'
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
args (Any): args with command line.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
Config: _description_
|
| 49 |
+
"""
|
| 50 |
+
if args.model is not None and args.dataset is not None:
|
| 51 |
+
args.config_path = f"config/reproduction/{args.dataset}/{args.model}.yaml"
|
| 52 |
+
config = Config.load_from_yaml(args.config_path)
|
| 53 |
+
if args.dataset is not None:
|
| 54 |
+
config.__update_dataset(args.dataset)
|
| 55 |
+
if args.device is not None:
|
| 56 |
+
config["base"]["device"] = args.device
|
| 57 |
+
if args.learning_rate is not None:
|
| 58 |
+
config["optimizer"]["lr"] = args.learning_rate
|
| 59 |
+
if args.epoch_num is not None:
|
| 60 |
+
config["base"]["epoch_num"] = args.epoch_num
|
| 61 |
+
return config
|
| 62 |
+
|
| 63 |
+
def autoload_template(self):
|
| 64 |
+
""" search '{*}' template to excute as python code, support replace variable as any configure item
|
| 65 |
+
"""
|
| 66 |
+
self.__autoload_template(self.__dict__)
|
| 67 |
+
|
| 68 |
+
def __get_autoload_value(self, matched):
|
| 69 |
+
keys = matched.group()[1:-1].split(".")
|
| 70 |
+
temp = self.__dict__
|
| 71 |
+
for k in keys:
|
| 72 |
+
temp = temp[k]
|
| 73 |
+
return str(temp)
|
| 74 |
+
|
| 75 |
+
def __autoload_template(self, config:dict):
|
| 76 |
+
for k in config:
|
| 77 |
+
if isinstance(config, dict):
|
| 78 |
+
sub_config = config[k]
|
| 79 |
+
elif isinstance(config, list):
|
| 80 |
+
sub_config = k
|
| 81 |
+
else:
|
| 82 |
+
continue
|
| 83 |
+
if isinstance(sub_config, dict) or isinstance(sub_config, list):
|
| 84 |
+
self.__autoload_template(sub_config)
|
| 85 |
+
if isinstance(sub_config, str) and "{" in sub_config and "}" in sub_config:
|
| 86 |
+
res = re.sub(r'{.*?}', self.__get_autoload_value, config[k])
|
| 87 |
+
res_dict= {"res": None}
|
| 88 |
+
exec("res=" + res, res_dict)
|
| 89 |
+
config[k] = res_dict["res"]
|
| 90 |
+
|
| 91 |
+
def __update_dataset(self, dataset_name):
|
| 92 |
+
if dataset_name is not None and isinstance(dataset_name, str):
|
| 93 |
+
self.__dict__["dataset"]["dataset_name"] = dataset_name
|
| 94 |
+
|
| 95 |
+
def get_model_config(self):
|
| 96 |
+
return self.__dict__["model"]
|
| 97 |
+
|
| 98 |
+
def __autowired(self):
|
| 99 |
+
# Set encoder
|
| 100 |
+
encoder_config = self.__dict__["model"]["encoder"]
|
| 101 |
+
encoder_type = encoder_config["_model_target_"].split(".")[-1]
|
| 102 |
+
|
| 103 |
+
def get_output_dim(encoder_config):
|
| 104 |
+
encoder_type = encoder_config["_model_target_"].split(".")[-1]
|
| 105 |
+
if (encoder_type == "AutoEncoder" and encoder_config["encoder_name"] in ["lstm", "self-attention-lstm",
|
| 106 |
+
"bi-encoder"]) or encoder_type == "NoPretrainedEncoder":
|
| 107 |
+
output_dim = 0
|
| 108 |
+
if encoder_config.get("lstm"):
|
| 109 |
+
output_dim += encoder_config["lstm"]["output_dim"]
|
| 110 |
+
if encoder_config.get("attention"):
|
| 111 |
+
output_dim += encoder_config["attention"]["output_dim"]
|
| 112 |
+
return output_dim
|
| 113 |
+
else:
|
| 114 |
+
return encoder_config["output_dim"]
|
| 115 |
+
|
| 116 |
+
if encoder_type == "BiEncoder":
|
| 117 |
+
output_dim = get_output_dim(encoder_config["intent_encoder"]) + \
|
| 118 |
+
get_output_dim(encoder_config["slot_encoder"])
|
| 119 |
+
else:
|
| 120 |
+
output_dim = get_output_dim(encoder_config)
|
| 121 |
+
self.__dict__["model"]["encoder"]["output_dim"] = output_dim
|
| 122 |
+
|
| 123 |
+
# Set interaction
|
| 124 |
+
if "interaction" in self.__dict__["model"]["decoder"] and self.__dict__["model"]["decoder"]["interaction"].get(
|
| 125 |
+
"input_dim") is None:
|
| 126 |
+
self.__dict__["model"]["decoder"]["interaction"]["input_dim"] = output_dim
|
| 127 |
+
interaction_type = self.__dict__["model"]["decoder"]["interaction"]["_model_target_"].split(".")[-1]
|
| 128 |
+
if not ((encoder_type == "AutoEncoder" and encoder_config[
|
| 129 |
+
"encoder_name"] == "self-attention-lstm") or encoder_type == "SelfAttentionLSTMEncoder") and interaction_type != "BiModelWithoutDecoderInteraction":
|
| 130 |
+
output_dim = self.__dict__["model"]["decoder"]["interaction"]["output_dim"]
|
| 131 |
+
|
| 132 |
+
# Set classifier
|
| 133 |
+
if "slot_classifier" in self.__dict__["model"]["decoder"]:
|
| 134 |
+
if self.__dict__["model"]["decoder"]["slot_classifier"].get("input_dim") is None:
|
| 135 |
+
self.__dict__["model"]["decoder"]["slot_classifier"]["input_dim"] = output_dim
|
| 136 |
+
self.__dict__["model"]["decoder"]["slot_classifier"]["use_slot"] = True
|
| 137 |
+
if "intent_classifier" in self.__dict__["model"]["decoder"]:
|
| 138 |
+
if self.__dict__["model"]["decoder"]["intent_classifier"].get("input_dim") is None:
|
| 139 |
+
self.__dict__["model"]["decoder"]["intent_classifier"]["input_dim"] = output_dim
|
| 140 |
+
self.__dict__["model"]["decoder"]["intent_classifier"]["use_intent"] = True
|
| 141 |
+
|
| 142 |
+
def get_intent_label_num(self):
|
| 143 |
+
""" get the number of intent labels.
|
| 144 |
+
"""
|
| 145 |
+
classifier_conf = self.__dict__["model"]["decoder"]["intent_classifier"]
|
| 146 |
+
return classifier_conf["intent_label_num"] if "intent_label_num" in classifier_conf else 0
|
| 147 |
+
|
| 148 |
+
def get_slot_label_num(self):
|
| 149 |
+
""" get the number of slot labels.
|
| 150 |
+
"""
|
| 151 |
+
classifier_conf = self.__dict__["model"]["decoder"]["slot_classifier"]
|
| 152 |
+
return classifier_conf["slot_label_num"] if "slot_label_num" in classifier_conf else 0
|
| 153 |
+
|
| 154 |
+
def set_intent_label_num(self, intent_label_num):
|
| 155 |
+
""" set the number of intent labels.
|
| 156 |
+
|
| 157 |
+
Args:
|
| 158 |
+
slot_label_num (int): the number of intent label
|
| 159 |
+
"""
|
| 160 |
+
self.__dict__["base"]["intent_label_num"] = intent_label_num
|
| 161 |
+
self.__dict__["model"]["decoder"]["intent_classifier"]["intent_label_num"] = intent_label_num
|
| 162 |
+
if "interaction" in self.__dict__["model"]["decoder"]:
|
| 163 |
+
|
| 164 |
+
self.__dict__["model"]["decoder"]["interaction"]["intent_label_num"] = intent_label_num
|
| 165 |
+
if self.__dict__["model"]["decoder"]["interaction"]["_model_target_"].split(".")[
|
| 166 |
+
-1] == "StackInteraction":
|
| 167 |
+
self.__dict__["model"]["decoder"]["slot_classifier"]["input_dim"] += intent_label_num
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def set_slot_label_num(self, slot_label_num:int)->None:
|
| 171 |
+
"""set the number of slot label
|
| 172 |
+
|
| 173 |
+
Args:
|
| 174 |
+
slot_label_num (int): the number of slot label
|
| 175 |
+
"""
|
| 176 |
+
self.__dict__["base"]["slot_label_num"] = slot_label_num
|
| 177 |
+
self.__dict__["model"]["decoder"]["slot_classifier"]["slot_label_num"] = slot_label_num
|
| 178 |
+
if "interaction" in self.__dict__["model"]["decoder"]:
|
| 179 |
+
self.__dict__["model"]["decoder"]["interaction"]["slot_label_num"] = slot_label_num
|
| 180 |
+
|
| 181 |
+
def set_vocab_size(self, vocab_size):
|
| 182 |
+
"""set the size of vocabulary in non-pretrained tokenizer
|
| 183 |
+
Args:
|
| 184 |
+
slot_label_num (int): the number of slot label
|
| 185 |
+
"""
|
| 186 |
+
encoder_type = self.__dict__["model"]["encoder"]["_model_target_"].split(".")[-1]
|
| 187 |
+
encoder_name = self.__dict__["model"]["encoder"].get("encoder_name")
|
| 188 |
+
if encoder_type == "BiEncoder" or (encoder_type == "AutoEncoder" and encoder_name == "bi-encoder"):
|
| 189 |
+
self.__dict__["model"]["encoder"]["intent_encoder"]["embedding"]["vocab_size"] = vocab_size
|
| 190 |
+
self.__dict__["model"]["encoder"]["slot_encoder"]["embedding"]["vocab_size"] = vocab_size
|
| 191 |
+
elif self.__dict__["model"]["encoder"].get("embedding"):
|
| 192 |
+
self.__dict__["model"]["encoder"]["embedding"]["vocab_size"] = vocab_size
|
common/global_pool.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Author: Qiguang Chen
|
| 3 |
+
LastEditors: Qiguang Chen
|
| 4 |
+
Date: 2023-02-12 14:35:37
|
| 5 |
+
LastEditTime: 2023-02-12 14:37:40
|
| 6 |
+
Description:
|
| 7 |
+
|
| 8 |
+
'''
|
| 9 |
+
def _init():
|
| 10 |
+
global _global_dict
|
| 11 |
+
_global_dict = {}
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def set_value(key, value):
|
| 15 |
+
# set gobal value to object pool
|
| 16 |
+
_global_dict[key] = value
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_value(key):
|
| 20 |
+
# get gobal value from object pool
|
| 21 |
+
try:
|
| 22 |
+
return _global_dict[key]
|
| 23 |
+
except:
|
| 24 |
+
print('读取' + key + '失败\r\n')
|
| 25 |
+
|
| 26 |
+
|
common/loader.py
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Author: Qiguang Chen
|
| 3 |
+
Date: 2023-01-11 10:39:26
|
| 4 |
+
LastEditors: Qiguang Chen
|
| 5 |
+
LastEditTime: 2023-02-19 15:39:48
|
| 6 |
+
Description: all class for load data.
|
| 7 |
+
|
| 8 |
+
'''
|
| 9 |
+
import os
|
| 10 |
+
import torch
|
| 11 |
+
import json
|
| 12 |
+
from datasets import load_dataset, Dataset
|
| 13 |
+
from torch.utils.data import DataLoader
|
| 14 |
+
|
| 15 |
+
from common.utils import InputData
|
| 16 |
+
|
| 17 |
+
ABS_PATH=os.path.join(os.path.abspath(os.path.dirname(__file__)), "../")
|
| 18 |
+
|
| 19 |
+
class DataFactory(object):
|
| 20 |
+
def __init__(self, tokenizer,use_multi_intent=False, to_lower_case=True):
|
| 21 |
+
"""_summary_
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
tokenizer (Tokenizer): _description_
|
| 25 |
+
use_multi_intent (bool, optional): _description_. Defaults to False.
|
| 26 |
+
"""
|
| 27 |
+
self.tokenizer = tokenizer
|
| 28 |
+
self.slot_label_list = []
|
| 29 |
+
self.intent_label_list = []
|
| 30 |
+
self.use_multi = use_multi_intent
|
| 31 |
+
self.to_lower_case = to_lower_case
|
| 32 |
+
self.slot_label_dict = None
|
| 33 |
+
self.intent_label_dict = None
|
| 34 |
+
|
| 35 |
+
def __is_supported_datasets(self, dataset_name:str)->bool:
|
| 36 |
+
return dataset_name.lower() in ["atis", "snips", "mix-atis", "mix-atis"]
|
| 37 |
+
|
| 38 |
+
def load_dataset(self, dataset_config, split="train"):
|
| 39 |
+
dataset_name = None
|
| 40 |
+
if split not in dataset_config:
|
| 41 |
+
dataset_name = dataset_config.get("dataset_name")
|
| 42 |
+
elif self.__is_supported_datasets(dataset_config[split]):
|
| 43 |
+
dataset_name = dataset_config[split].lower()
|
| 44 |
+
if dataset_name is not None:
|
| 45 |
+
return load_dataset("LightChen2333/OpenSLU", dataset_name, split=split)
|
| 46 |
+
else:
|
| 47 |
+
data_file = dataset_config[split]
|
| 48 |
+
data_dict = {"text": [], "slot": [], "intent":[]}
|
| 49 |
+
with open(data_file, encoding="utf-8") as f:
|
| 50 |
+
for line in f:
|
| 51 |
+
row = json.loads(line)
|
| 52 |
+
data_dict["text"].append(row["text"])
|
| 53 |
+
data_dict["slot"].append(row["slot"])
|
| 54 |
+
data_dict["intent"].append(row["intent"])
|
| 55 |
+
return Dataset.from_dict(data_dict)
|
| 56 |
+
|
| 57 |
+
def update_label_names(self, dataset):
|
| 58 |
+
for intent_labels in dataset["intent"]:
|
| 59 |
+
if self.use_multi:
|
| 60 |
+
intent_label = intent_labels.split("#")
|
| 61 |
+
else:
|
| 62 |
+
intent_label = [intent_labels]
|
| 63 |
+
for x in intent_label:
|
| 64 |
+
if x not in self.intent_label_list:
|
| 65 |
+
self.intent_label_list.append(x)
|
| 66 |
+
for slot_label in dataset["slot"]:
|
| 67 |
+
for x in slot_label:
|
| 68 |
+
if x not in self.slot_label_list:
|
| 69 |
+
self.slot_label_list.append(x)
|
| 70 |
+
self.intent_label_dict = {key: index for index,
|
| 71 |
+
key in enumerate(self.intent_label_list)}
|
| 72 |
+
self.slot_label_dict = {key: index for index,
|
| 73 |
+
key in enumerate(self.slot_label_list)}
|
| 74 |
+
|
| 75 |
+
def update_vocabulary(self, dataset):
|
| 76 |
+
if self.tokenizer.name_or_path in ["word_tokenizer"]:
|
| 77 |
+
for data in dataset:
|
| 78 |
+
self.tokenizer.add_instance(data["text"])
|
| 79 |
+
|
| 80 |
+
@staticmethod
|
| 81 |
+
def fast_align_data(text, padding_side="right"):
|
| 82 |
+
for i in range(len(text.input_ids)):
|
| 83 |
+
desired_output = []
|
| 84 |
+
for word_id in text.word_ids(i):
|
| 85 |
+
if word_id is not None:
|
| 86 |
+
start, end = text.word_to_tokens(
|
| 87 |
+
i, word_id, sequence_index=0 if padding_side == "right" else 1)
|
| 88 |
+
if start == end - 1:
|
| 89 |
+
tokens = [start]
|
| 90 |
+
else:
|
| 91 |
+
tokens = [start, end - 1]
|
| 92 |
+
if len(desired_output) == 0 or desired_output[-1] != tokens:
|
| 93 |
+
desired_output.append(tokens)
|
| 94 |
+
yield desired_output
|
| 95 |
+
|
| 96 |
+
def fast_align(self,
|
| 97 |
+
batch,
|
| 98 |
+
ignore_index=-100,
|
| 99 |
+
device="cuda",
|
| 100 |
+
config=None,
|
| 101 |
+
enable_label=True,
|
| 102 |
+
label2tensor=True):
|
| 103 |
+
if self.to_lower_case:
|
| 104 |
+
input_list = [[t.lower() for t in x["text"]] for x in batch]
|
| 105 |
+
else:
|
| 106 |
+
input_list = [x["text"] for x in batch]
|
| 107 |
+
text = self.tokenizer(input_list,
|
| 108 |
+
return_tensors="pt",
|
| 109 |
+
padding=True,
|
| 110 |
+
is_split_into_words=True,
|
| 111 |
+
truncation=True,
|
| 112 |
+
**config).to(device)
|
| 113 |
+
if enable_label:
|
| 114 |
+
if label2tensor:
|
| 115 |
+
|
| 116 |
+
slot_mask = torch.ones_like(text.input_ids) * ignore_index
|
| 117 |
+
for i, offsets in enumerate(
|
| 118 |
+
DataFactory.fast_align_data(text, padding_side=self.tokenizer.padding_side)):
|
| 119 |
+
num = 0
|
| 120 |
+
assert len(offsets) == len(batch[i]["text"])
|
| 121 |
+
assert len(offsets) == len(batch[i]["slot"])
|
| 122 |
+
for off in offsets:
|
| 123 |
+
slot_mask[i][off[0]
|
| 124 |
+
] = self.slot_label_dict[batch[i]["slot"][num]]
|
| 125 |
+
num += 1
|
| 126 |
+
slot = slot_mask.clone()
|
| 127 |
+
attentin_id = 0 if self.tokenizer.padding_side == "right" else 1
|
| 128 |
+
for i, slot_batch in enumerate(slot):
|
| 129 |
+
for j, x in enumerate(slot_batch):
|
| 130 |
+
if x == ignore_index and text.attention_mask[i][j] == attentin_id and (text.input_ids[i][
|
| 131 |
+
j] not in self.tokenizer.all_special_ids or text.input_ids[i][j] == self.tokenizer.unk_token_id):
|
| 132 |
+
slot[i][j] = slot[i][j - 1]
|
| 133 |
+
slot = slot.to(device)
|
| 134 |
+
if not self.use_multi:
|
| 135 |
+
intent = torch.tensor(
|
| 136 |
+
[self.intent_label_dict[x["intent"]] for x in batch]).to(device)
|
| 137 |
+
else:
|
| 138 |
+
one_hot = torch.zeros(
|
| 139 |
+
(len(batch), len(self.intent_label_list)), dtype=torch.float)
|
| 140 |
+
for index, b in enumerate(batch):
|
| 141 |
+
for x in b["intent"].split("#"):
|
| 142 |
+
one_hot[index][self.intent_label_dict[x]] = 1.
|
| 143 |
+
intent = one_hot.to(device)
|
| 144 |
+
else:
|
| 145 |
+
slot_mask = None
|
| 146 |
+
slot = [['#' for _ in range(text.input_ids.shape[1])]
|
| 147 |
+
for _ in range(text.input_ids.shape[0])]
|
| 148 |
+
for i, offsets in enumerate(DataFactory.fast_align_data(text)):
|
| 149 |
+
num = 0
|
| 150 |
+
for off in offsets:
|
| 151 |
+
slot[i][off[0]] = batch[i]["slot"][num]
|
| 152 |
+
num += 1
|
| 153 |
+
if not self.use_multi:
|
| 154 |
+
intent = [x["intent"] for x in batch]
|
| 155 |
+
else:
|
| 156 |
+
intent = [
|
| 157 |
+
[x for x in b["intent"].split("#")] for b in batch]
|
| 158 |
+
return InputData((text, slot, intent))
|
| 159 |
+
else:
|
| 160 |
+
return InputData((text, None, None))
|
| 161 |
+
|
| 162 |
+
def general_align_data(self, split_text_list, raw_text_list, encoded_text):
|
| 163 |
+
for i in range(len(split_text_list)):
|
| 164 |
+
desired_output = []
|
| 165 |
+
jdx = 0
|
| 166 |
+
offset = encoded_text.offset_mapping[i].tolist()
|
| 167 |
+
split_texts = split_text_list[i]
|
| 168 |
+
raw_text = raw_text_list[i]
|
| 169 |
+
last = 0
|
| 170 |
+
temp_offset = []
|
| 171 |
+
for off in offset:
|
| 172 |
+
s, e = off
|
| 173 |
+
if len(temp_offset) > 0 and (e != 0 and last == s):
|
| 174 |
+
len_1 = off[1] - off[0]
|
| 175 |
+
len_2 = temp_offset[-1][1] - temp_offset[-1][0]
|
| 176 |
+
if len_1 > len_2:
|
| 177 |
+
temp_offset.pop(-1)
|
| 178 |
+
temp_offset.append([0, 0])
|
| 179 |
+
temp_offset.append(off)
|
| 180 |
+
continue
|
| 181 |
+
temp_offset.append(off)
|
| 182 |
+
last = s
|
| 183 |
+
offset = temp_offset
|
| 184 |
+
for split_text in split_texts:
|
| 185 |
+
while jdx < len(offset) and offset[jdx][0] == 0 and offset[jdx][1] == 0:
|
| 186 |
+
jdx += 1
|
| 187 |
+
if jdx == len(offset):
|
| 188 |
+
continue
|
| 189 |
+
start_, end_ = offset[jdx]
|
| 190 |
+
tokens = None
|
| 191 |
+
if split_text == raw_text[start_:end_].strip():
|
| 192 |
+
tokens = [jdx]
|
| 193 |
+
else:
|
| 194 |
+
# Compute "xxx" -> "xx" "#x"
|
| 195 |
+
temp_jdx = jdx
|
| 196 |
+
last_str = raw_text[start_:end_].strip()
|
| 197 |
+
while last_str != split_text and temp_jdx < len(offset) - 1:
|
| 198 |
+
temp_jdx += 1
|
| 199 |
+
last_str += raw_text[offset[temp_jdx]
|
| 200 |
+
[0]:offset[temp_jdx][1]].strip()
|
| 201 |
+
|
| 202 |
+
if temp_jdx == jdx:
|
| 203 |
+
raise ValueError("Illegal Input data")
|
| 204 |
+
elif last_str == split_text:
|
| 205 |
+
tokens = [jdx, temp_jdx]
|
| 206 |
+
jdx = temp_jdx
|
| 207 |
+
else:
|
| 208 |
+
jdx -= 1
|
| 209 |
+
jdx += 1
|
| 210 |
+
if tokens is not None:
|
| 211 |
+
desired_output.append(tokens)
|
| 212 |
+
yield desired_output
|
| 213 |
+
|
| 214 |
+
def general_align(self,
|
| 215 |
+
batch,
|
| 216 |
+
ignore_index=-100,
|
| 217 |
+
device="cuda",
|
| 218 |
+
config=None,
|
| 219 |
+
enable_label=True,
|
| 220 |
+
label2tensor=True,
|
| 221 |
+
locale="en-US"):
|
| 222 |
+
if self.to_lower_case:
|
| 223 |
+
raw_data = [" ".join(x["text"]).lower() if locale not in ['ja-JP', 'zh-CN', 'zh-TW'] else "".join(x["text"]) for x in
|
| 224 |
+
batch]
|
| 225 |
+
input_list = [[t.lower() for t in x["text"]] for x in batch]
|
| 226 |
+
else:
|
| 227 |
+
input_list = [x["text"] for x in batch]
|
| 228 |
+
raw_data = [" ".join(x["text"]) if locale not in ['ja-JP', 'zh-CN', 'zh-TW'] else "".join(x["text"]) for x in
|
| 229 |
+
batch]
|
| 230 |
+
text = self.tokenizer(raw_data,
|
| 231 |
+
return_tensors="pt",
|
| 232 |
+
padding=True,
|
| 233 |
+
truncation=True,
|
| 234 |
+
return_offsets_mapping=True,
|
| 235 |
+
**config).to(device)
|
| 236 |
+
if enable_label:
|
| 237 |
+
if label2tensor:
|
| 238 |
+
slot_mask = torch.ones_like(text.input_ids) * ignore_index
|
| 239 |
+
for i, offsets in enumerate(
|
| 240 |
+
self.general_align_data(input_list, raw_data, encoded_text=text)):
|
| 241 |
+
num = 0
|
| 242 |
+
# if len(offsets) != len(batch[i]["text"]) or len(offsets) != len(batch[i]["slot"]):
|
| 243 |
+
# if
|
| 244 |
+
for off in offsets:
|
| 245 |
+
slot_mask[i][off[0]
|
| 246 |
+
] = self.slot_label_dict[batch[i]["slot"][num]]
|
| 247 |
+
num += 1
|
| 248 |
+
# slot = slot_mask.clone()
|
| 249 |
+
# attentin_id = 0 if self.tokenizer.padding_side == "right" else 1
|
| 250 |
+
# for i, slot_batch in enumerate(slot):
|
| 251 |
+
# for j, x in enumerate(slot_batch):
|
| 252 |
+
# if x == ignore_index and text.attention_mask[i][j] == attentin_id and text.input_ids[i][
|
| 253 |
+
# j] not in self.tokenizer.all_special_ids:
|
| 254 |
+
# slot[i][j] = slot[i][j - 1]
|
| 255 |
+
slot = slot_mask.to(device)
|
| 256 |
+
if not self.use_multi:
|
| 257 |
+
intent = torch.tensor(
|
| 258 |
+
[self.intent_label_dict[x["intent"]] for x in batch]).to(device)
|
| 259 |
+
else:
|
| 260 |
+
one_hot = torch.zeros(
|
| 261 |
+
(len(batch), len(self.intent_label_list)), dtype=torch.float)
|
| 262 |
+
for index, b in enumerate(batch):
|
| 263 |
+
for x in b["intent"].split("#"):
|
| 264 |
+
one_hot[index][self.intent_label_dict[x]] = 1.
|
| 265 |
+
intent = one_hot.to(device)
|
| 266 |
+
else:
|
| 267 |
+
slot_mask = None
|
| 268 |
+
slot = [['#' for _ in range(text.input_ids.shape[1])]
|
| 269 |
+
for _ in range(text.input_ids.shape[0])]
|
| 270 |
+
for i, offsets in enumerate(self.general_align_data(input_list, raw_data, encoded_text=text)):
|
| 271 |
+
num = 0
|
| 272 |
+
for off in offsets:
|
| 273 |
+
slot[i][off[0]] = batch[i]["slot"][num]
|
| 274 |
+
num += 1
|
| 275 |
+
if not self.use_multi:
|
| 276 |
+
intent = [x["intent"] for x in batch]
|
| 277 |
+
else:
|
| 278 |
+
intent = [
|
| 279 |
+
[x for x in b["intent"].split("#")] for b in batch]
|
| 280 |
+
return InputData((text, slot, intent))
|
| 281 |
+
else:
|
| 282 |
+
return InputData((text, None, None))
|
| 283 |
+
|
| 284 |
+
def batch_fn(self,
|
| 285 |
+
batch,
|
| 286 |
+
ignore_index=-100,
|
| 287 |
+
device="cuda",
|
| 288 |
+
config=None,
|
| 289 |
+
align_mode="fast",
|
| 290 |
+
enable_label=True,
|
| 291 |
+
label2tensor=True):
|
| 292 |
+
if align_mode == "fast":
|
| 293 |
+
# try:
|
| 294 |
+
return self.fast_align(batch,
|
| 295 |
+
ignore_index=ignore_index,
|
| 296 |
+
device=device,
|
| 297 |
+
config=config,
|
| 298 |
+
enable_label=enable_label,
|
| 299 |
+
label2tensor=label2tensor)
|
| 300 |
+
# except:
|
| 301 |
+
# return self.general_align(batch,
|
| 302 |
+
# ignore_index=ignore_index,
|
| 303 |
+
# device=device,
|
| 304 |
+
# config=config,
|
| 305 |
+
# enable_label=enable_label,
|
| 306 |
+
# label2tensor=label2tensor)
|
| 307 |
+
else:
|
| 308 |
+
return self.general_align(batch,
|
| 309 |
+
ignore_index=ignore_index,
|
| 310 |
+
device=device,
|
| 311 |
+
config=config,
|
| 312 |
+
enable_label=enable_label,
|
| 313 |
+
label2tensor=label2tensor)
|
| 314 |
+
|
| 315 |
+
def get_data_loader(self,
|
| 316 |
+
dataset,
|
| 317 |
+
batch_size,
|
| 318 |
+
shuffle=False,
|
| 319 |
+
device="cuda",
|
| 320 |
+
enable_label=True,
|
| 321 |
+
align_mode="fast",
|
| 322 |
+
label2tensor=True, **config):
|
| 323 |
+
data_loader = DataLoader(dataset,
|
| 324 |
+
shuffle=shuffle,
|
| 325 |
+
batch_size=batch_size,
|
| 326 |
+
collate_fn=lambda x: self.batch_fn(x,
|
| 327 |
+
device=device,
|
| 328 |
+
config=config,
|
| 329 |
+
enable_label=enable_label,
|
| 330 |
+
align_mode=align_mode,
|
| 331 |
+
label2tensor=label2tensor))
|
| 332 |
+
return data_loader
|
common/logger.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Author: Qiguang Chen
|
| 3 |
+
Date: 2023-01-11 10:39:26
|
| 4 |
+
LastEditors: Qiguang Chen
|
| 5 |
+
LastEditTime: 2023-02-19 22:05:49
|
| 6 |
+
Description: log manager
|
| 7 |
+
|
| 8 |
+
'''
|
| 9 |
+
import datetime
|
| 10 |
+
import json
|
| 11 |
+
import os
|
| 12 |
+
import time
|
| 13 |
+
from common.config import Config
|
| 14 |
+
import logging
|
| 15 |
+
import colorlog
|
| 16 |
+
|
| 17 |
+
def mkdirs(dir_names):
|
| 18 |
+
for dir_name in dir_names:
|
| 19 |
+
if not os.path.exists(dir_name):
|
| 20 |
+
os.mkdir(dir_name)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class Logger():
|
| 25 |
+
""" logging infomation by [wandb, fitlog, local file]
|
| 26 |
+
"""
|
| 27 |
+
def __init__(self,
|
| 28 |
+
logger_type: str,
|
| 29 |
+
logger_name: str,
|
| 30 |
+
logging_level="INFO",
|
| 31 |
+
start_time='',
|
| 32 |
+
accelerator=None):
|
| 33 |
+
""" create logger
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
logger_type (str): support type = ["wandb", "fitlog", "local"]
|
| 37 |
+
logger_name (str): logger name, means project name in wandb, and logging file name
|
| 38 |
+
logging_level (str, optional): logging level. Defaults to "INFO".
|
| 39 |
+
start_time (str, optional): start time string. Defaults to ''.
|
| 40 |
+
"""
|
| 41 |
+
self.logger_type = logger_type
|
| 42 |
+
times = time.localtime()
|
| 43 |
+
self.output_dir = "logs/" + logger_name + "/" + start_time
|
| 44 |
+
self.accelerator = accelerator
|
| 45 |
+
self.logger_name = logger_name
|
| 46 |
+
if accelerator is not None:
|
| 47 |
+
from accelerate.logging import get_logger
|
| 48 |
+
self.logging = get_logger(logger_name)
|
| 49 |
+
else:
|
| 50 |
+
if self.logger_type == "wandb":
|
| 51 |
+
import wandb
|
| 52 |
+
self.logger = wandb
|
| 53 |
+
mkdirs(["logs", "logs/" + logger_name, self.output_dir])
|
| 54 |
+
self.logger.init(project=logger_name)
|
| 55 |
+
elif self.logger_type == "fitlog":
|
| 56 |
+
import fitlog
|
| 57 |
+
self.logger = fitlog
|
| 58 |
+
mkdirs(["logs", "logs/" + logger_name, self.output_dir])
|
| 59 |
+
self.logger.set_log_dir("logs/" + logger_name)
|
| 60 |
+
else:
|
| 61 |
+
mkdirs(["logs", "logs/" + logger_name, self.output_dir])
|
| 62 |
+
self.config_file = os.path.join(self.output_dir, "config.jsonl")
|
| 63 |
+
with open(self.config_file, "w", encoding="utf8") as f:
|
| 64 |
+
print(f"Config will be written to {self.config_file}")
|
| 65 |
+
|
| 66 |
+
self.loss_file = os.path.join(self.output_dir, "loss.jsonl")
|
| 67 |
+
with open(self.loss_file, "w", encoding="utf8") as f:
|
| 68 |
+
print(f"Loss Result will be written to {self.loss_file}")
|
| 69 |
+
|
| 70 |
+
self.metric_file = os.path.join(self.output_dir, "metric.jsonl")
|
| 71 |
+
with open(self.metric_file, "w", encoding="utf8") as f:
|
| 72 |
+
print(f"Metric Result will be written to {self.metric_file}")
|
| 73 |
+
|
| 74 |
+
self.other_log_file = os.path.join(self.output_dir, "other_log.jsonl")
|
| 75 |
+
with open(self.other_log_file, "w", encoding="utf8") as f:
|
| 76 |
+
print(f"Other Log Result will be written to {self.other_log_file}")
|
| 77 |
+
|
| 78 |
+
LOGGING_LEVEL_MAP = {
|
| 79 |
+
"CRITICAL": logging.CRITICAL,
|
| 80 |
+
"FATAL": logging.FATAL,
|
| 81 |
+
"ERROR": logging.ERROR,
|
| 82 |
+
"WARNING": logging.WARNING,
|
| 83 |
+
"WARN": logging.WARN,
|
| 84 |
+
"INFO": logging.INFO,
|
| 85 |
+
"DEBUG": logging.DEBUG,
|
| 86 |
+
"NOTSET": logging.NOTSET,
|
| 87 |
+
}
|
| 88 |
+
# logging.basicConfig(format='[%(levelname)s - %(asctime)s]\t%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
|
| 89 |
+
# filename=os.path.join(self.output_dir, "log.log"), level=LOGGING_LEVEL_MAP[logging_level])
|
| 90 |
+
|
| 91 |
+
# logger = logging.getLogger()
|
| 92 |
+
# KZT = logging.StreamHandler()
|
| 93 |
+
# KZT.setLevel(logging.DEBUG)
|
| 94 |
+
# logger.addHandler(KZT)
|
| 95 |
+
|
| 96 |
+
self.logging = self._get_logging_logger(logging_level)
|
| 97 |
+
|
| 98 |
+
def _get_logging_logger(self, level="INFO"):
|
| 99 |
+
log_colors_config = {
|
| 100 |
+
'DEBUG': 'cyan',
|
| 101 |
+
'INFO': 'blue',
|
| 102 |
+
'WARNING': 'yellow',
|
| 103 |
+
'ERROR': 'red',
|
| 104 |
+
'CRITICAL': 'red,bg_white',
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
logger = logging.getLogger()
|
| 108 |
+
logger.setLevel(level)
|
| 109 |
+
|
| 110 |
+
log_path = os.path.join(self.output_dir, "log.log")
|
| 111 |
+
|
| 112 |
+
if not logger.handlers:
|
| 113 |
+
sh = logging.StreamHandler()
|
| 114 |
+
fh = logging.FileHandler(filename=log_path, mode='a', encoding="utf-8")
|
| 115 |
+
fmt = logging.Formatter(
|
| 116 |
+
fmt='[%(levelname)s - %(asctime)s]\t%(message)s',
|
| 117 |
+
datefmt='%m/%d/%Y %I:%M:%S %p')
|
| 118 |
+
|
| 119 |
+
sh_fmt = colorlog.ColoredFormatter(
|
| 120 |
+
fmt='%(log_color)s[%(levelname)s - %(asctime)s]\t%(message)s',
|
| 121 |
+
datefmt='%m/%d/%Y %I:%M:%S %p',
|
| 122 |
+
log_colors=log_colors_config)
|
| 123 |
+
sh.setFormatter(fmt=sh_fmt)
|
| 124 |
+
fh.setFormatter(fmt=fmt)
|
| 125 |
+
logger.addHandler(sh)
|
| 126 |
+
logger.addHandler(fh)
|
| 127 |
+
return logger
|
| 128 |
+
|
| 129 |
+
def set_config(self, config: Config):
|
| 130 |
+
"""save config
|
| 131 |
+
|
| 132 |
+
Args:
|
| 133 |
+
config (Config): configuration object to save
|
| 134 |
+
"""
|
| 135 |
+
if self.accelerator is not None:
|
| 136 |
+
self.accelerator.init_trackers(self.logger_name, config=config)
|
| 137 |
+
elif self.logger_type == "wandb":
|
| 138 |
+
self.logger.config.update(config)
|
| 139 |
+
elif self.logger_type == "fitlog":
|
| 140 |
+
self.logger.add_hyper(config)
|
| 141 |
+
else:
|
| 142 |
+
with open(self.config_file, "a", encoding="utf8") as f:
|
| 143 |
+
f.write(json.dumps(config) + "\n")
|
| 144 |
+
|
| 145 |
+
def log(self, data, step=0):
|
| 146 |
+
"""log data and step
|
| 147 |
+
|
| 148 |
+
Args:
|
| 149 |
+
data (Any): data to log
|
| 150 |
+
step (int, optional): step num. Defaults to 0.
|
| 151 |
+
"""
|
| 152 |
+
if self.accelerator is not None:
|
| 153 |
+
self.accelerator.log(data, step=0)
|
| 154 |
+
elif self.logger_type == "wandb":
|
| 155 |
+
self.logger.log(data, step=step)
|
| 156 |
+
elif self.logger_type == "fitlog":
|
| 157 |
+
self.logger.add_other({"data": data, "step": step})
|
| 158 |
+
else:
|
| 159 |
+
with open(self.other_log_file, "a", encoding="utf8") as f:
|
| 160 |
+
f.write(json.dumps({"data": data, "step": step}) + "\n")
|
| 161 |
+
|
| 162 |
+
def log_metric(self, metric, metric_split="dev", step=0):
|
| 163 |
+
"""log metric
|
| 164 |
+
|
| 165 |
+
Args:
|
| 166 |
+
metric (Any): metric
|
| 167 |
+
metric_split (str, optional): dataset split. Defaults to 'dev'.
|
| 168 |
+
step (int, optional): step num. Defaults to 0.
|
| 169 |
+
"""
|
| 170 |
+
if self.accelerator is not None:
|
| 171 |
+
self.accelerator.log({metric_split: metric}, step=step)
|
| 172 |
+
elif self.logger_type == "wandb":
|
| 173 |
+
self.logger.log({metric_split: metric}, step=step)
|
| 174 |
+
elif self.logger_type == "fitlog":
|
| 175 |
+
self.logger.add_metric({metric_split: metric}, step=step)
|
| 176 |
+
else:
|
| 177 |
+
with open(self.metric_file, "a", encoding="utf8") as f:
|
| 178 |
+
f.write(json.dumps({metric_split: metric, "step": step}) + "\n")
|
| 179 |
+
|
| 180 |
+
def log_loss(self, loss, loss_name="Loss", step=0):
|
| 181 |
+
"""log loss
|
| 182 |
+
|
| 183 |
+
Args:
|
| 184 |
+
loss (Any): loss
|
| 185 |
+
loss_name (str, optional): loss description. Defaults to 'Loss'.
|
| 186 |
+
step (int, optional): step num. Defaults to 0.
|
| 187 |
+
"""
|
| 188 |
+
if self.accelerator is not None:
|
| 189 |
+
self.accelerator.log({loss_name: loss}, step=step)
|
| 190 |
+
elif self.logger_type == "wandb":
|
| 191 |
+
self.logger.log({loss_name: loss}, step=step)
|
| 192 |
+
elif self.logger_type == "fitlog":
|
| 193 |
+
self.logger.add_loss(loss, name=loss_name, step=step)
|
| 194 |
+
else:
|
| 195 |
+
with open(self.loss_file, "a", encoding="utf8") as f:
|
| 196 |
+
f.write(json.dumps({loss_name: loss, "step": step}) + "\n")
|
| 197 |
+
|
| 198 |
+
def finish(self):
|
| 199 |
+
"""finish logging
|
| 200 |
+
"""
|
| 201 |
+
if self.logger_type == "fitlog":
|
| 202 |
+
self.logger.finish()
|
| 203 |
+
|
| 204 |
+
def info(self, message:str):
|
| 205 |
+
""" Log a message with severity 'INFO' in local file / console.
|
| 206 |
+
|
| 207 |
+
Args:
|
| 208 |
+
message (str): message to log
|
| 209 |
+
"""
|
| 210 |
+
self.logging.info(message)
|
| 211 |
+
|
| 212 |
+
def warning(self, message):
|
| 213 |
+
""" Log a message with severity 'WARNING' in local file / console.
|
| 214 |
+
|
| 215 |
+
Args:
|
| 216 |
+
message (str): message to log
|
| 217 |
+
"""
|
| 218 |
+
self.logging.warning(message)
|
| 219 |
+
|
| 220 |
+
def error(self, message):
|
| 221 |
+
""" Log a message with severity 'ERROR' in local file / console.
|
| 222 |
+
|
| 223 |
+
Args:
|
| 224 |
+
message (str): message to log
|
| 225 |
+
"""
|
| 226 |
+
self.logging.error(message)
|
| 227 |
+
|
| 228 |
+
def debug(self, message):
|
| 229 |
+
""" Log a message with severity 'DEBUG' in local file / console.
|
| 230 |
+
|
| 231 |
+
Args:
|
| 232 |
+
message (str): message to log
|
| 233 |
+
"""
|
| 234 |
+
self.logging.debug(message)
|
| 235 |
+
|
| 236 |
+
def critical(self, message):
|
| 237 |
+
self.logging.critical(message)
|
common/metric.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Author: Qiguang Chen
|
| 3 |
+
Date: 2023-01-11 10:39:26
|
| 4 |
+
LastEditors: Qiguang Chen
|
| 5 |
+
LastEditTime: 2023-02-17 19:39:22
|
| 6 |
+
Description: Metric calculation class
|
| 7 |
+
|
| 8 |
+
'''
|
| 9 |
+
from collections import Counter
|
| 10 |
+
from typing import List, Dict
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
from sklearn.metrics import f1_score
|
| 14 |
+
|
| 15 |
+
from common.utils import InputData, OutputData
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class Evaluator(object):
|
| 19 |
+
"""Evaluation metric funtions library class
|
| 20 |
+
supported metric:
|
| 21 |
+
- slot_f1
|
| 22 |
+
- intent_acc
|
| 23 |
+
- exactly_match_accuracy
|
| 24 |
+
- intent_f1 (defult "macro_intent_f1")
|
| 25 |
+
- macro_intent_f1
|
| 26 |
+
- micro_intent_f1=
|
| 27 |
+
"""
|
| 28 |
+
@staticmethod
|
| 29 |
+
def exactly_match_accuracy(pred_slot: List[List[str or int]],
|
| 30 |
+
real_slot: List[List[str or int]],
|
| 31 |
+
pred_intent: List[List[str or int] or str or int],
|
| 32 |
+
real_intent: List[List[str or int] or str or int]) -> float:
|
| 33 |
+
"""Compute the accuracy based on the whole predictions of given sentence, including slot and intent.
|
| 34 |
+
(both support str or int index as the representation of slot and intent)
|
| 35 |
+
Args:
|
| 36 |
+
pred_slot (List[List[str or int]]): predicted sequence of slot list
|
| 37 |
+
real_slot (List[List[str or int]]): golden sequence of slot list.
|
| 38 |
+
pred_intent (List[List[str or int] or str or int]): golden intent list / golden multi intent list.
|
| 39 |
+
real_intent (List[List[str or int] or str or int]): predicted intent list / predicted multi intent list.
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
float: exactly match accuracy score
|
| 43 |
+
"""
|
| 44 |
+
total_count, correct_count = 0.0, 0.0
|
| 45 |
+
for p_slot, r_slot, p_intent, r_intent in zip(pred_slot, real_slot, pred_intent, real_intent):
|
| 46 |
+
if isinstance(p_intent, list):
|
| 47 |
+
p_intent, r_intent = set(p_intent), set(r_intent)
|
| 48 |
+
if p_slot == r_slot and p_intent == r_intent:
|
| 49 |
+
correct_count += 1.0
|
| 50 |
+
total_count += 1.0
|
| 51 |
+
|
| 52 |
+
return 1.0 * correct_count / total_count
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@staticmethod
|
| 56 |
+
def intent_accuracy(pred_list: List, real_list: List) -> float:
|
| 57 |
+
"""Get intent accuracy measured by predictions and ground-trues. Support both multi intent and single intent.
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
pred_list (List): predicted intent list
|
| 61 |
+
real_list (List): golden intent list
|
| 62 |
+
|
| 63 |
+
Returns:
|
| 64 |
+
float: intent accuracy score
|
| 65 |
+
"""
|
| 66 |
+
total_count, correct_count = 0.0, 0.0
|
| 67 |
+
for p_intent, r_intent in zip(pred_list, real_list):
|
| 68 |
+
if isinstance(p_intent, list):
|
| 69 |
+
p_intent, r_intent = set(p_intent), set(r_intent)
|
| 70 |
+
if p_intent == r_intent:
|
| 71 |
+
correct_count += 1.0
|
| 72 |
+
total_count += 1.0
|
| 73 |
+
|
| 74 |
+
return 1.0 * correct_count / total_count
|
| 75 |
+
|
| 76 |
+
@staticmethod
|
| 77 |
+
def intent_f1(pred_list: List[List[int]], real_list: List[List[int]], num_intent: int, average='macro') -> float:
|
| 78 |
+
"""Get intent accuracy measured by predictions and ground-trues. Support both multi intent and single intent.
|
| 79 |
+
(Only support multi intent now, but you can use [[intent1], [intent2], ...] to compute intent f1 in single intent)
|
| 80 |
+
Args:
|
| 81 |
+
pred_list (List[List[int]]): predicted multi intent list.
|
| 82 |
+
real_list (List[List[int]]): golden multi intent list.
|
| 83 |
+
num_intent (int)
|
| 84 |
+
average (str): support "micro" and "macro"
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
float: intent accuracy score
|
| 88 |
+
"""
|
| 89 |
+
return f1_score(Evaluator.__instance2onehot(num_intent, real_list),
|
| 90 |
+
Evaluator.__instance2onehot(num_intent, pred_list),
|
| 91 |
+
average=average,
|
| 92 |
+
zero_division=0)
|
| 93 |
+
|
| 94 |
+
@staticmethod
|
| 95 |
+
def __multilabel2one_hot(labels, nums):
|
| 96 |
+
res = [0.] * nums
|
| 97 |
+
if len(labels) == 0:
|
| 98 |
+
return res
|
| 99 |
+
if isinstance(labels[0], list):
|
| 100 |
+
for label in labels[0]:
|
| 101 |
+
res[label] = 1.
|
| 102 |
+
return res
|
| 103 |
+
for label in labels:
|
| 104 |
+
res[label] = 1.
|
| 105 |
+
return res
|
| 106 |
+
|
| 107 |
+
@staticmethod
|
| 108 |
+
def __instance2onehot(num_intent, data):
|
| 109 |
+
res = []
|
| 110 |
+
for intents in data:
|
| 111 |
+
res.append(Evaluator.__multilabel2one_hot(intents, num_intent))
|
| 112 |
+
return np.array(res)
|
| 113 |
+
|
| 114 |
+
@staticmethod
|
| 115 |
+
def __startOfChunk(prevTag, tag, prevTagType, tagType, chunkStart=False):
|
| 116 |
+
if prevTag == 'B' and tag == 'B':
|
| 117 |
+
chunkStart = True
|
| 118 |
+
if prevTag == 'I' and tag == 'B':
|
| 119 |
+
chunkStart = True
|
| 120 |
+
if prevTag == 'O' and tag == 'B':
|
| 121 |
+
chunkStart = True
|
| 122 |
+
if prevTag == 'O' and tag == 'I':
|
| 123 |
+
chunkStart = True
|
| 124 |
+
|
| 125 |
+
if prevTag == 'E' and tag == 'E':
|
| 126 |
+
chunkStart = True
|
| 127 |
+
if prevTag == 'E' and tag == 'I':
|
| 128 |
+
chunkStart = True
|
| 129 |
+
if prevTag == 'O' and tag == 'E':
|
| 130 |
+
chunkStart = True
|
| 131 |
+
if prevTag == 'O' and tag == 'I':
|
| 132 |
+
chunkStart = True
|
| 133 |
+
|
| 134 |
+
if tag != 'O' and tag != '.' and prevTagType != tagType:
|
| 135 |
+
chunkStart = True
|
| 136 |
+
return chunkStart
|
| 137 |
+
|
| 138 |
+
@staticmethod
|
| 139 |
+
def __endOfChunk(prevTag, tag, prevTagType, tagType, chunkEnd=False):
|
| 140 |
+
if prevTag == 'B' and tag == 'B':
|
| 141 |
+
chunkEnd = True
|
| 142 |
+
if prevTag == 'B' and tag == 'O':
|
| 143 |
+
chunkEnd = True
|
| 144 |
+
if prevTag == 'I' and tag == 'B':
|
| 145 |
+
chunkEnd = True
|
| 146 |
+
if prevTag == 'I' and tag == 'O':
|
| 147 |
+
chunkEnd = True
|
| 148 |
+
|
| 149 |
+
if prevTag == 'E' and tag == 'E':
|
| 150 |
+
chunkEnd = True
|
| 151 |
+
if prevTag == 'E' and tag == 'I':
|
| 152 |
+
chunkEnd = True
|
| 153 |
+
if prevTag == 'E' and tag == 'O':
|
| 154 |
+
chunkEnd = True
|
| 155 |
+
if prevTag == 'I' and tag == 'O':
|
| 156 |
+
chunkEnd = True
|
| 157 |
+
|
| 158 |
+
if prevTag != 'O' and prevTag != '.' and prevTagType != tagType:
|
| 159 |
+
chunkEnd = True
|
| 160 |
+
return chunkEnd
|
| 161 |
+
|
| 162 |
+
@staticmethod
|
| 163 |
+
def __splitTagType(tag):
|
| 164 |
+
s = tag.split('-')
|
| 165 |
+
if len(s) > 2 or len(s) == 0:
|
| 166 |
+
raise ValueError('tag format wrong. it must be B-xxx.xxx')
|
| 167 |
+
if len(s) == 1:
|
| 168 |
+
tag = s[0]
|
| 169 |
+
tagType = ""
|
| 170 |
+
else:
|
| 171 |
+
tag = s[0]
|
| 172 |
+
tagType = s[1]
|
| 173 |
+
return tag, tagType
|
| 174 |
+
|
| 175 |
+
@staticmethod
|
| 176 |
+
def computeF1Score(correct_slots: List[List[str]], pred_slots: List[List[str]]) -> float:
|
| 177 |
+
"""compute f1 score is modified from conlleval.pl
|
| 178 |
+
|
| 179 |
+
Args:
|
| 180 |
+
correct_slots (List[List[str]]): golden slot string list
|
| 181 |
+
pred_slots (List[List[str]]): predicted slot string list
|
| 182 |
+
|
| 183 |
+
Returns:
|
| 184 |
+
float: slot f1 score
|
| 185 |
+
"""
|
| 186 |
+
correctChunk = {}
|
| 187 |
+
correctChunkCnt = 0.0
|
| 188 |
+
foundCorrect = {}
|
| 189 |
+
foundCorrectCnt = 0.0
|
| 190 |
+
foundPred = {}
|
| 191 |
+
foundPredCnt = 0.0
|
| 192 |
+
correctTags = 0.0
|
| 193 |
+
tokenCount = 0.0
|
| 194 |
+
for correct_slot, pred_slot in zip(correct_slots, pred_slots):
|
| 195 |
+
inCorrect = False
|
| 196 |
+
lastCorrectTag = 'O'
|
| 197 |
+
lastCorrectType = ''
|
| 198 |
+
lastPredTag = 'O'
|
| 199 |
+
lastPredType = ''
|
| 200 |
+
for c, p in zip(correct_slot, pred_slot):
|
| 201 |
+
c = str(c)
|
| 202 |
+
p = str(p)
|
| 203 |
+
correctTag, correctType = Evaluator.__splitTagType(c)
|
| 204 |
+
predTag, predType = Evaluator.__splitTagType(p)
|
| 205 |
+
|
| 206 |
+
if inCorrect == True:
|
| 207 |
+
if Evaluator.__endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
|
| 208 |
+
Evaluator.__endOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
|
| 209 |
+
(lastCorrectType == lastPredType):
|
| 210 |
+
inCorrect = False
|
| 211 |
+
correctChunkCnt += 1.0
|
| 212 |
+
if lastCorrectType in correctChunk:
|
| 213 |
+
correctChunk[lastCorrectType] += 1.0
|
| 214 |
+
else:
|
| 215 |
+
correctChunk[lastCorrectType] = 1.0
|
| 216 |
+
elif Evaluator.__endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) != \
|
| 217 |
+
Evaluator.__endOfChunk(lastPredTag, predTag, lastPredType, predType) or \
|
| 218 |
+
(correctType != predType):
|
| 219 |
+
inCorrect = False
|
| 220 |
+
|
| 221 |
+
if Evaluator.__startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
|
| 222 |
+
Evaluator.__startOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
|
| 223 |
+
(correctType == predType):
|
| 224 |
+
inCorrect = True
|
| 225 |
+
|
| 226 |
+
if Evaluator.__startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True:
|
| 227 |
+
foundCorrectCnt += 1
|
| 228 |
+
if correctType in foundCorrect:
|
| 229 |
+
foundCorrect[correctType] += 1.0
|
| 230 |
+
else:
|
| 231 |
+
foundCorrect[correctType] = 1.0
|
| 232 |
+
|
| 233 |
+
if Evaluator.__startOfChunk(lastPredTag, predTag, lastPredType, predType) == True:
|
| 234 |
+
foundPredCnt += 1.0
|
| 235 |
+
if predType in foundPred:
|
| 236 |
+
foundPred[predType] += 1.0
|
| 237 |
+
else:
|
| 238 |
+
foundPred[predType] = 1.0
|
| 239 |
+
|
| 240 |
+
if correctTag == predTag and correctType == predType:
|
| 241 |
+
correctTags += 1.0
|
| 242 |
+
|
| 243 |
+
tokenCount += 1.0
|
| 244 |
+
|
| 245 |
+
lastCorrectTag = correctTag
|
| 246 |
+
lastCorrectType = correctType
|
| 247 |
+
lastPredTag = predTag
|
| 248 |
+
lastPredType = predType
|
| 249 |
+
|
| 250 |
+
if inCorrect == True:
|
| 251 |
+
correctChunkCnt += 1.0
|
| 252 |
+
if lastCorrectType in correctChunk:
|
| 253 |
+
correctChunk[lastCorrectType] += 1.0
|
| 254 |
+
else:
|
| 255 |
+
correctChunk[lastCorrectType] = 1.0
|
| 256 |
+
|
| 257 |
+
if foundPredCnt > 0:
|
| 258 |
+
precision = 1.0 * correctChunkCnt / foundPredCnt
|
| 259 |
+
else:
|
| 260 |
+
precision = 0
|
| 261 |
+
|
| 262 |
+
if foundCorrectCnt > 0:
|
| 263 |
+
recall = 1.0 * correctChunkCnt / foundCorrectCnt
|
| 264 |
+
else:
|
| 265 |
+
recall = 0
|
| 266 |
+
|
| 267 |
+
if (precision + recall) > 0:
|
| 268 |
+
f1 = (2.0 * precision * recall) / (precision + recall)
|
| 269 |
+
else:
|
| 270 |
+
f1 = 0
|
| 271 |
+
|
| 272 |
+
return f1
|
| 273 |
+
|
| 274 |
+
@staticmethod
|
| 275 |
+
def max_freq_predict(sample):
|
| 276 |
+
"""Max frequency prediction.
|
| 277 |
+
"""
|
| 278 |
+
predict = []
|
| 279 |
+
for items in sample:
|
| 280 |
+
predict.append(Counter(items).most_common(1)[0][0])
|
| 281 |
+
return predict
|
| 282 |
+
|
| 283 |
+
@staticmethod
|
| 284 |
+
def __token_map(indexes, token_label_map):
|
| 285 |
+
return [[token_label_map[idx] if idx in token_label_map else -1 for idx in index] for index in indexes]
|
| 286 |
+
|
| 287 |
+
@staticmethod
|
| 288 |
+
def compute_all_metric(inps: InputData,
|
| 289 |
+
output: OutputData,
|
| 290 |
+
intent_label_map: dict = None,
|
| 291 |
+
metric_list: List=None)-> Dict:
|
| 292 |
+
"""Auto compute all metric mentioned in 'metric_list'
|
| 293 |
+
|
| 294 |
+
Args:
|
| 295 |
+
inps (InputData): input golden slot and intent labels
|
| 296 |
+
output (OutputData): output predicted slot and intent labels
|
| 297 |
+
intent_label_map (dict, Optional): dict like {"intent1": 0, "intent2": 1, ...},which aims to map intent string to index
|
| 298 |
+
metric_list (List): support metrics in ["slot_f1", "intent_acc", "intent_f1", "macro_intent_f1", "micro_intent_f1", "EMA"]
|
| 299 |
+
|
| 300 |
+
Returns:
|
| 301 |
+
Dict: all metric mentioned in 'metric_list', like {'EMA': 0.7, ...}
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
Example:
|
| 305 |
+
if compute slot metric:
|
| 306 |
+
|
| 307 |
+
inps.slot = [["slot1", "slot2", ...], ...]; output.slot_ids=[["slot1", "slot2", ...], ...];
|
| 308 |
+
|
| 309 |
+
if compute intent metric:
|
| 310 |
+
|
| 311 |
+
[Multi Intent] inps.intent = [["intent1", "intent2", ...], ...]; output.intent_ids = [["intent1", "intent2", ...], ...]
|
| 312 |
+
|
| 313 |
+
[Single Intent] inps.intent = ["intent1", ...]; [Single Intent] output.intent_ids = ["intent1", ...]
|
| 314 |
+
"""
|
| 315 |
+
if not metric_list:
|
| 316 |
+
metric_list = ["slot_f1", "intent_acc", "EMA"]
|
| 317 |
+
res_dict = {}
|
| 318 |
+
use_slot = output.slot_ids is not None and len(output.slot_ids) > 0
|
| 319 |
+
use_intent = output.intent_ids is not None and len(
|
| 320 |
+
output.intent_ids) > 0
|
| 321 |
+
if use_slot and "slot_f1" in metric_list:
|
| 322 |
+
|
| 323 |
+
res_dict["slot_f1"] = Evaluator.computeF1Score(
|
| 324 |
+
output.slot_ids, inps.slot)
|
| 325 |
+
if use_intent and "intent_acc" in metric_list:
|
| 326 |
+
res_dict["intent_acc"] = Evaluator.intent_accuracy(
|
| 327 |
+
output.intent_ids, inps.intent)
|
| 328 |
+
if isinstance(output.intent_ids[0], list):
|
| 329 |
+
if "intent_f1" in metric_list:
|
| 330 |
+
res_dict["intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
|
| 331 |
+
Evaluator.__token_map(
|
| 332 |
+
inps.intent, intent_label_map),
|
| 333 |
+
len(intent_label_map.keys()))
|
| 334 |
+
elif "macro_intent_f1" in metric_list:
|
| 335 |
+
res_dict["macro_intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
|
| 336 |
+
Evaluator.__token_map(inps.intent, intent_label_map),
|
| 337 |
+
len(intent_label_map.keys()), average="macro")
|
| 338 |
+
if "micro_intent_f1" in metric_list:
|
| 339 |
+
res_dict["micro_intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
|
| 340 |
+
Evaluator.__token_map(inps.intent, intent_label_map),
|
| 341 |
+
len(intent_label_map.keys()), average="micro")
|
| 342 |
+
|
| 343 |
+
if use_slot and use_intent and "EMA" in metric_list:
|
| 344 |
+
res_dict["EMA"] = Evaluator.exactly_match_accuracy(output.slot_ids, inps.slot, output.intent_ids,
|
| 345 |
+
inps.intent)
|
| 346 |
+
return res_dict
|
common/model_manager.py
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Author: Qiguang Chen
|
| 3 |
+
Date: 2023-01-11 10:39:26
|
| 4 |
+
LastEditors: Qiguang Chen
|
| 5 |
+
LastEditTime: 2023-02-19 18:50:11
|
| 6 |
+
Description: manage all process of model training and prediction.
|
| 7 |
+
|
| 8 |
+
'''
|
| 9 |
+
import math
|
| 10 |
+
import os
|
| 11 |
+
import queue
|
| 12 |
+
import random
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
import torch
|
| 16 |
+
from tqdm import tqdm
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
from common import utils
|
| 20 |
+
from common.loader import DataFactory
|
| 21 |
+
from common.logger import Logger
|
| 22 |
+
from common.metric import Evaluator
|
| 23 |
+
from common.saver import Saver
|
| 24 |
+
from common.tokenizer import get_tokenizer, get_tokenizer_class, load_embedding
|
| 25 |
+
from common.utils import InputData, instantiate
|
| 26 |
+
from common.utils import OutputData
|
| 27 |
+
from common.config import Config
|
| 28 |
+
import dill
|
| 29 |
+
from common import global_pool
|
| 30 |
+
from tools.load_from_hugging_face import PreTrainedTokenizerForSLU, PretrainedModelForSLU
|
| 31 |
+
# from tools.hugging_face_parser import load_model, load_tokenizer
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class ModelManager(object):
|
| 35 |
+
def __init__(self, config: Config):
|
| 36 |
+
"""create model manager by config
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
config (Config): configuration to manage all process in OpenSLU
|
| 40 |
+
"""
|
| 41 |
+
# init config
|
| 42 |
+
global_pool._init()
|
| 43 |
+
self.config = config
|
| 44 |
+
self.__set_seed(self.config.base.get("seed"))
|
| 45 |
+
self.device = self.config.base.get("device")
|
| 46 |
+
self.load_dir = self.config.model_manager.get("load_dir")
|
| 47 |
+
if self.config.get("logger") and self.config["logger"].get("logger_type"):
|
| 48 |
+
logger_type = self.config["logger"].get("logger_type")
|
| 49 |
+
else:
|
| 50 |
+
logger_type = "wandb"
|
| 51 |
+
# enable accelerator
|
| 52 |
+
if "accelerator" in self.config and self.config["accelerator"].get("use_accelerator"):
|
| 53 |
+
from accelerate import Accelerator
|
| 54 |
+
self.accelerator = Accelerator(log_with=logger_type)
|
| 55 |
+
else:
|
| 56 |
+
self.accelerator = None
|
| 57 |
+
self.tokenizer = None
|
| 58 |
+
self.saver = Saver(self.config.model_manager, start_time=self.config.start_time)
|
| 59 |
+
if self.config.base.get("train"):
|
| 60 |
+
self.model = None
|
| 61 |
+
self.optimizer = None
|
| 62 |
+
self.total_step = None
|
| 63 |
+
self.lr_scheduler = None
|
| 64 |
+
self.init_step = 0
|
| 65 |
+
self.best_metric = 0
|
| 66 |
+
self.logger = Logger(logger_type=logger_type,
|
| 67 |
+
logger_name=self.config.base["name"],
|
| 68 |
+
start_time=self.config.start_time,
|
| 69 |
+
accelerator=self.accelerator)
|
| 70 |
+
global_pool.set_value("logger", self.logger)
|
| 71 |
+
|
| 72 |
+
def init_model(self):
|
| 73 |
+
"""init model, optimizer, lr_scheduler
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
model (Any): pytorch model
|
| 77 |
+
"""
|
| 78 |
+
self.prepared = False
|
| 79 |
+
if self.load_dir is not None:
|
| 80 |
+
self.load()
|
| 81 |
+
self.config.set_vocab_size(self.tokenizer.vocab_size)
|
| 82 |
+
self.init_data()
|
| 83 |
+
if self.config.base.get("train") and self.config.model_manager.get("load_train_state"):
|
| 84 |
+
train_state = torch.load(os.path.join(
|
| 85 |
+
self.load_dir, "train_state.pkl"), pickle_module=dill)
|
| 86 |
+
self.optimizer = instantiate(
|
| 87 |
+
self.config["optimizer"])(self.model.parameters())
|
| 88 |
+
self.lr_scheduler = instantiate(self.config["scheduler"])(
|
| 89 |
+
optimizer=self.optimizer,
|
| 90 |
+
num_training_steps=self.total_step
|
| 91 |
+
)
|
| 92 |
+
self.optimizer.load_state_dict(train_state["optimizer"])
|
| 93 |
+
self.optimizer.zero_grad()
|
| 94 |
+
self.lr_scheduler.load_state_dict(train_state["lr_scheduler"])
|
| 95 |
+
self.init_step = train_state["step"]
|
| 96 |
+
self.best_metric = train_state["best_metric"]
|
| 97 |
+
elif self.config.model.get("_from_pretrained_") and self.config.tokenizer.get("_from_pretrained_"):
|
| 98 |
+
self.from_pretrained()
|
| 99 |
+
self.config.set_vocab_size(self.tokenizer.vocab_size)
|
| 100 |
+
self.init_data()
|
| 101 |
+
else:
|
| 102 |
+
self.tokenizer = get_tokenizer(
|
| 103 |
+
self.config.tokenizer.get("_tokenizer_name_"))
|
| 104 |
+
self.init_data()
|
| 105 |
+
self.model = instantiate(self.config.model)
|
| 106 |
+
self.model.to(self.device)
|
| 107 |
+
if self.config.base.get("train"):
|
| 108 |
+
self.optimizer = instantiate(
|
| 109 |
+
self.config["optimizer"])(self.model.parameters())
|
| 110 |
+
self.lr_scheduler = instantiate(self.config["scheduler"])(
|
| 111 |
+
optimizer=self.optimizer,
|
| 112 |
+
num_training_steps=self.total_step
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def init_data(self):
|
| 117 |
+
self.data_factory = DataFactory(tokenizer=self.tokenizer,
|
| 118 |
+
use_multi_intent=self.config.base.get("multi_intent"),
|
| 119 |
+
to_lower_case=self.config.tokenizer.get("_to_lower_case_"))
|
| 120 |
+
batch_size = self.config.base["batch_size"]
|
| 121 |
+
# init tokenizer config and dataloaders
|
| 122 |
+
tokenizer_config = {key: self.config.tokenizer[key]
|
| 123 |
+
for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
|
| 124 |
+
|
| 125 |
+
if self.config.base.get("train"):
|
| 126 |
+
# init dataloader & load data
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
train_dataset = self.data_factory.load_dataset(self.config.dataset, split="train")
|
| 130 |
+
|
| 131 |
+
# update label and vocabulary (ONLY SUPPORT FOR "word_tokenizer")
|
| 132 |
+
self.data_factory.update_label_names(train_dataset)
|
| 133 |
+
self.data_factory.update_vocabulary(train_dataset)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
self.train_dataloader = self.data_factory.get_data_loader(train_dataset,
|
| 137 |
+
batch_size,
|
| 138 |
+
shuffle=True,
|
| 139 |
+
device=self.device,
|
| 140 |
+
enable_label=True,
|
| 141 |
+
align_mode=self.config.tokenizer.get(
|
| 142 |
+
"_align_mode_"),
|
| 143 |
+
label2tensor=True,
|
| 144 |
+
**tokenizer_config)
|
| 145 |
+
self.total_step = int(self.config.base.get("epoch_num")) * len(self.train_dataloader)
|
| 146 |
+
dev_dataset = self.data_factory.load_dataset(self.config.dataset, split="validation")
|
| 147 |
+
self.dev_dataloader = self.data_factory.get_data_loader(dev_dataset,
|
| 148 |
+
batch_size,
|
| 149 |
+
shuffle=False,
|
| 150 |
+
device=self.device,
|
| 151 |
+
enable_label=True,
|
| 152 |
+
align_mode=self.config.tokenizer.get(
|
| 153 |
+
"_align_mode_"),
|
| 154 |
+
label2tensor=False,
|
| 155 |
+
**tokenizer_config)
|
| 156 |
+
self.data_factory.update_vocabulary(dev_dataset)
|
| 157 |
+
self.intent_list = None
|
| 158 |
+
self.intent_dict = None
|
| 159 |
+
self.slot_list = None
|
| 160 |
+
self.slot_dict = None
|
| 161 |
+
# add intent label num and slot label num to config
|
| 162 |
+
if self.config.model["decoder"].get("intent_classifier") and int(self.config.get_intent_label_num()) == 0:
|
| 163 |
+
self.intent_list = self.data_factory.intent_label_list
|
| 164 |
+
self.intent_dict = self.data_factory.intent_label_dict
|
| 165 |
+
self.config.set_intent_label_num(len(self.intent_list))
|
| 166 |
+
if self.config.model["decoder"].get("slot_classifier") and int(self.config.get_slot_label_num()) == 0:
|
| 167 |
+
self.slot_list = self.data_factory.slot_label_list
|
| 168 |
+
self.slot_dict = self.data_factory.slot_label_dict
|
| 169 |
+
self.config.set_slot_label_num(len(self.slot_list))
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# autoload embedding for non-pretrained encoder
|
| 174 |
+
if self.config["model"]["encoder"].get("embedding") and self.config["model"]["encoder"]["embedding"].get(
|
| 175 |
+
"load_embedding_name"):
|
| 176 |
+
self.config["model"]["encoder"]["embedding"]["embedding_matrix"] = load_embedding(self.tokenizer,
|
| 177 |
+
self.config["model"][
|
| 178 |
+
"encoder"][
|
| 179 |
+
"embedding"].get(
|
| 180 |
+
"load_embedding_name"))
|
| 181 |
+
# fill template in config
|
| 182 |
+
self.config.autoload_template()
|
| 183 |
+
# save config
|
| 184 |
+
self.logger.set_config(self.config)
|
| 185 |
+
self.saver.save_tokenizer(self.tokenizer)
|
| 186 |
+
self.saver.save_label(self.intent_list, self.slot_list)
|
| 187 |
+
self.config.set_vocab_size(self.tokenizer.vocab_size)
|
| 188 |
+
|
| 189 |
+
if self.config.base.get("test"):
|
| 190 |
+
self.test_dataset = self.data_factory.load_dataset(self.config.dataset, split="test")
|
| 191 |
+
self.test_dataloader = self.data_factory.get_data_loader(self.test_dataset,
|
| 192 |
+
batch_size,
|
| 193 |
+
shuffle=False,
|
| 194 |
+
device=self.device,
|
| 195 |
+
enable_label=True,
|
| 196 |
+
align_mode=self.config.tokenizer.get(
|
| 197 |
+
"_align_mode_"),
|
| 198 |
+
label2tensor=False,
|
| 199 |
+
**tokenizer_config)
|
| 200 |
+
|
| 201 |
+
def eval(self, step: int, best_metric: float) -> float:
|
| 202 |
+
""" evaluation models.
|
| 203 |
+
|
| 204 |
+
Args:
|
| 205 |
+
step (int): which step the model has trained in
|
| 206 |
+
best_metric (float): last best metric value to judge whether to test or save model
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
float: updated best metric value
|
| 210 |
+
"""
|
| 211 |
+
# TODO: save dev
|
| 212 |
+
_, res = self.__evaluate(self.model, self.dev_dataloader, mode="dev")
|
| 213 |
+
self.logger.log_metric(res, metric_split="dev", step=step)
|
| 214 |
+
if res[self.config.evaluator.get("best_key")] > best_metric:
|
| 215 |
+
best_metric = res[self.config.evaluator.get("best_key")]
|
| 216 |
+
train_state = {
|
| 217 |
+
"step": step,
|
| 218 |
+
"best_metric": best_metric,
|
| 219 |
+
"optimizer": self.optimizer.state_dict(),
|
| 220 |
+
"lr_scheduler": self.lr_scheduler.state_dict()
|
| 221 |
+
}
|
| 222 |
+
self.saver.save_model(self.model, train_state, self.accelerator)
|
| 223 |
+
if self.config.base.get("test"):
|
| 224 |
+
outputs, test_res = self.__evaluate(self.model, self.test_dataloader, mode="test")
|
| 225 |
+
self.saver.save_output(outputs, self.test_dataset)
|
| 226 |
+
self.logger.log_metric(test_res, metric_split="test", step=step)
|
| 227 |
+
return best_metric
|
| 228 |
+
|
| 229 |
+
def train(self) -> float:
|
| 230 |
+
""" train models.
|
| 231 |
+
|
| 232 |
+
Returns:
|
| 233 |
+
float: updated best metric value
|
| 234 |
+
"""
|
| 235 |
+
self.model.train()
|
| 236 |
+
if self.accelerator is not None:
|
| 237 |
+
self.total_step = math.ceil(self.total_step / self.accelerator.num_processes)
|
| 238 |
+
if self.optimizer is None:
|
| 239 |
+
self.optimizer = instantiate(self.config["optimizer"])(self.model.parameters())
|
| 240 |
+
if self.lr_scheduler is None:
|
| 241 |
+
self.lr_scheduler = instantiate(self.config["scheduler"])(
|
| 242 |
+
optimizer=self.optimizer,
|
| 243 |
+
num_training_steps=self.total_step
|
| 244 |
+
)
|
| 245 |
+
if not self.prepared and self.accelerator is not None:
|
| 246 |
+
self.model, self.optimizer, self.train_dataloader, self.lr_scheduler = self.accelerator.prepare(
|
| 247 |
+
self.model, self.optimizer, self.train_dataloader, self.lr_scheduler)
|
| 248 |
+
step = self.init_step
|
| 249 |
+
progress_bar = tqdm(range(self.total_step))
|
| 250 |
+
progress_bar.update(self.init_step)
|
| 251 |
+
self.optimizer.zero_grad()
|
| 252 |
+
for _ in range(int(self.config.base.get("epoch_num"))):
|
| 253 |
+
for data in self.train_dataloader:
|
| 254 |
+
if step == 0:
|
| 255 |
+
self.logger.info(data.get_item(
|
| 256 |
+
0, tokenizer=self.tokenizer, intent_map=self.intent_list, slot_map=self.slot_list))
|
| 257 |
+
output = self.model(data)
|
| 258 |
+
if self.accelerator is not None and hasattr(self.model, "module"):
|
| 259 |
+
loss, intent_loss, slot_loss = self.model.module.compute_loss(
|
| 260 |
+
pred=output, target=data)
|
| 261 |
+
else:
|
| 262 |
+
loss, intent_loss, slot_loss = self.model.compute_loss(
|
| 263 |
+
pred=output, target=data)
|
| 264 |
+
self.logger.log_loss(loss, "Loss", step=step)
|
| 265 |
+
self.logger.log_loss(intent_loss, "Intent Loss", step=step)
|
| 266 |
+
self.logger.log_loss(slot_loss, "Slot Loss", step=step)
|
| 267 |
+
self.optimizer.zero_grad()
|
| 268 |
+
|
| 269 |
+
if self.accelerator is not None:
|
| 270 |
+
self.accelerator.backward(loss)
|
| 271 |
+
else:
|
| 272 |
+
loss.backward()
|
| 273 |
+
self.optimizer.step()
|
| 274 |
+
self.lr_scheduler.step()
|
| 275 |
+
train_state = {
|
| 276 |
+
"step": step,
|
| 277 |
+
"best_metric": self.best_metric,
|
| 278 |
+
"optimizer": self.optimizer.state_dict(),
|
| 279 |
+
"lr_scheduler": self.lr_scheduler.state_dict()
|
| 280 |
+
}
|
| 281 |
+
if not self.saver.auto_save_step(self.model, train_state, self.accelerator):
|
| 282 |
+
if not self.config.evaluator.get("eval_by_epoch") and step % self.config.evaluator.get("eval_step") == 0 and step != 0:
|
| 283 |
+
self.best_metric = self.eval(step, self.best_metric)
|
| 284 |
+
step += 1
|
| 285 |
+
progress_bar.update(1)
|
| 286 |
+
if self.config.evaluator.get("eval_by_epoch"):
|
| 287 |
+
self.best_metric = self.eval(step, self.best_metric)
|
| 288 |
+
self.logger.finish()
|
| 289 |
+
return self.best_metric
|
| 290 |
+
|
| 291 |
+
def test(self):
|
| 292 |
+
return self.__evaluate(self.model, self.test_dataloader, mode="test")
|
| 293 |
+
|
| 294 |
+
def __set_seed(self, seed_value: int):
|
| 295 |
+
"""Manually set random seeds.
|
| 296 |
+
|
| 297 |
+
Args:
|
| 298 |
+
seed_value (int): random seed
|
| 299 |
+
"""
|
| 300 |
+
random.seed(seed_value)
|
| 301 |
+
np.random.seed(seed_value)
|
| 302 |
+
torch.manual_seed(seed_value)
|
| 303 |
+
torch.random.manual_seed(seed_value)
|
| 304 |
+
os.environ['PYTHONHASHSEED'] = str(seed_value)
|
| 305 |
+
if torch.cuda.is_available():
|
| 306 |
+
torch.cuda.manual_seed(seed_value)
|
| 307 |
+
torch.cuda.manual_seed_all(seed_value)
|
| 308 |
+
torch.backends.cudnn.deterministic = True
|
| 309 |
+
torch.backends.cudnn.benchmark = True
|
| 310 |
+
return
|
| 311 |
+
|
| 312 |
+
def __evaluate(self, model, dataloader, mode="dev"):
|
| 313 |
+
model.eval()
|
| 314 |
+
inps = InputData()
|
| 315 |
+
outputs = OutputData()
|
| 316 |
+
for data in dataloader:
|
| 317 |
+
torch.cuda.empty_cache()
|
| 318 |
+
output = model(data)
|
| 319 |
+
if self.accelerator is not None and hasattr(self.model, "module"):
|
| 320 |
+
decode_output = model.module.decode(output, data)
|
| 321 |
+
else:
|
| 322 |
+
decode_output = model.decode(output, data)
|
| 323 |
+
|
| 324 |
+
decode_output.map_output(slot_map=self.slot_list,
|
| 325 |
+
intent_map=self.intent_list)
|
| 326 |
+
if self.config.model["decoder"].get("slot_classifier"):
|
| 327 |
+
data, decode_output = utils.remove_slot_ignore_index(
|
| 328 |
+
data, decode_output, ignore_index="#")
|
| 329 |
+
|
| 330 |
+
inps.merge_input_data(data)
|
| 331 |
+
outputs.merge_output_data(decode_output)
|
| 332 |
+
if "metric" in self.config.evaluator:
|
| 333 |
+
res = Evaluator.compute_all_metric(
|
| 334 |
+
inps, outputs, intent_label_map=self.intent_dict, metric_list=self.config.evaluator["metric"])
|
| 335 |
+
else:
|
| 336 |
+
res = Evaluator.compute_all_metric(
|
| 337 |
+
inps, outputs, intent_label_map=self.intent_dict)
|
| 338 |
+
self.logger.info(f"Best {mode} metric: "+str(res))
|
| 339 |
+
model.train()
|
| 340 |
+
return outputs, res
|
| 341 |
+
|
| 342 |
+
def load(self):
|
| 343 |
+
|
| 344 |
+
if self.tokenizer is None:
|
| 345 |
+
with open(os.path.join(self.load_dir, "tokenizer.pkl"), 'rb') as f:
|
| 346 |
+
self.tokenizer = dill.load(f)
|
| 347 |
+
label = utils.load_json(os.path.join(self.load_dir, "label.json"))
|
| 348 |
+
if label["intent"] is None:
|
| 349 |
+
self.intent_list = None
|
| 350 |
+
self.intent_dict = None
|
| 351 |
+
else:
|
| 352 |
+
self.intent_list = label["intent"]
|
| 353 |
+
self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
|
| 354 |
+
self.config.set_intent_label_num(len(self.intent_list))
|
| 355 |
+
if label["slot"] is None:
|
| 356 |
+
self.slot_list = None
|
| 357 |
+
self.slot_dict = None
|
| 358 |
+
else:
|
| 359 |
+
self.slot_list = label["slot"]
|
| 360 |
+
self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
|
| 361 |
+
self.config.set_slot_label_num(len(self.slot_list))
|
| 362 |
+
self.config.set_vocab_size(self.tokenizer.vocab_size)
|
| 363 |
+
if self.accelerator is not None and self.load_dir is not None:
|
| 364 |
+
self.model = torch.load(os.path.join(self.load_dir, "model.pkl"), map_location=torch.device(self.device))
|
| 365 |
+
self.prepared = True
|
| 366 |
+
self.accelerator.load_state(self.load_dir)
|
| 367 |
+
self.accelerator.prepare_model(self.model)
|
| 368 |
+
else:
|
| 369 |
+
self.model = torch.load(os.path.join(
|
| 370 |
+
self.load_dir, "model.pkl"), map_location=torch.device(self.device))
|
| 371 |
+
# if self.config.tokenizer["_tokenizer_name_"] == "word_tokenizer":
|
| 372 |
+
# self.tokenizer = get_tokenizer_class(self.config.tokenizer["_tokenizer_name_"]).from_file(os.path.join(self.load_dir, "tokenizer.json"))
|
| 373 |
+
# else:
|
| 374 |
+
# self.tokenizer = get_tokenizer(self.config.tokenizer["_tokenizer_name_"])
|
| 375 |
+
self.model.to(self.device)
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
def from_pretrained(self):
|
| 379 |
+
self.config.autoload_template()
|
| 380 |
+
model = PretrainedModelForSLU.from_pretrained(self.config.model["_from_pretrained_"])
|
| 381 |
+
# model = load_model(self.config.model["_from_pretrained_"])
|
| 382 |
+
self.model = model.model
|
| 383 |
+
if self.tokenizer is None:
|
| 384 |
+
self.tokenizer = PreTrainedTokenizerForSLU.from_pretrained(
|
| 385 |
+
self.config.tokenizer["_from_pretrained_"])
|
| 386 |
+
self.config.tokenizer = model.config.tokenizer
|
| 387 |
+
# self.tokenizer = load_tokenizer(self.config.tokenizer["_from_pretrained_"])
|
| 388 |
+
|
| 389 |
+
self.model.to(self.device)
|
| 390 |
+
label = model.config._id2label
|
| 391 |
+
self.config.model = model.config.model
|
| 392 |
+
self.intent_list = label["intent"]
|
| 393 |
+
self.slot_list = label["slot"]
|
| 394 |
+
self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
|
| 395 |
+
self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
|
| 396 |
+
|
| 397 |
+
def predict(self, text_data):
|
| 398 |
+
self.model.eval()
|
| 399 |
+
tokenizer_config = {key: self.config.tokenizer[key]
|
| 400 |
+
for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
|
| 401 |
+
align_mode = self.config.tokenizer.get("_align_mode_")
|
| 402 |
+
inputs = self.data_factory.batch_fn(batch=[{"text": text_data.split(" ")}],
|
| 403 |
+
device=self.device,
|
| 404 |
+
config=tokenizer_config,
|
| 405 |
+
enable_label=False,
|
| 406 |
+
align_mode=align_mode if align_mode is not None else "general",
|
| 407 |
+
label2tensor=False)
|
| 408 |
+
output = self.model(inputs)
|
| 409 |
+
decode_output = self.model.decode(output, inputs)
|
| 410 |
+
decode_output.map_output(slot_map=self.slot_list,
|
| 411 |
+
intent_map=self.intent_list)
|
| 412 |
+
if self.config.base.get("multi_intent"):
|
| 413 |
+
intent = decode_output.intent_ids[0]
|
| 414 |
+
else:
|
| 415 |
+
intent = [decode_output.intent_ids[0]]
|
| 416 |
+
input_ids = inputs.input_ids[0].tolist()
|
| 417 |
+
tokens = [self.tokenizer.decode(ids) for ids in input_ids]
|
| 418 |
+
slots = decode_output.slot_ids[0]
|
| 419 |
+
return {"intent": intent, "slot": slots, "text": tokens}
|
common/saver.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Author: Qiguang Chen
|
| 3 |
+
LastEditors: Qiguang Chen
|
| 4 |
+
Date: 2023-02-12 22:23:58
|
| 5 |
+
LastEditTime: 2023-02-19 14:14:56
|
| 6 |
+
Description:
|
| 7 |
+
|
| 8 |
+
'''
|
| 9 |
+
import json
|
| 10 |
+
import os
|
| 11 |
+
import queue
|
| 12 |
+
import shutil
|
| 13 |
+
import torch
|
| 14 |
+
import dill
|
| 15 |
+
from common import utils
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class Saver():
|
| 19 |
+
def __init__(self, config, start_time=None) -> None:
|
| 20 |
+
self.config = config
|
| 21 |
+
if self.config.get("save_dir"):
|
| 22 |
+
self.model_save_dir = self.config["save_dir"]
|
| 23 |
+
else:
|
| 24 |
+
if not os.path.exists("save/"):
|
| 25 |
+
os.mkdir("save/")
|
| 26 |
+
self.model_save_dir = "save/" + start_time
|
| 27 |
+
if not os.path.exists(self.model_save_dir):
|
| 28 |
+
os.mkdir(self.model_save_dir)
|
| 29 |
+
save_mode = config.get("save_mode")
|
| 30 |
+
self.save_mode = save_mode if save_mode is not None else "save-by-eval"
|
| 31 |
+
|
| 32 |
+
max_save_num = self.config.get("max_save_num")
|
| 33 |
+
self.max_save_num = max_save_num if max_save_num is not None else 1
|
| 34 |
+
self.save_pool = queue.Queue(maxsize=max_save_num)
|
| 35 |
+
|
| 36 |
+
def save_tokenizer(self, tokenizer):
|
| 37 |
+
with open(os.path.join(self.model_save_dir, "tokenizer.pkl"), 'wb') as f:
|
| 38 |
+
dill.dump(tokenizer, f)
|
| 39 |
+
|
| 40 |
+
def save_label(self, intent_list, slot_list):
|
| 41 |
+
utils.save_json(os.path.join(self.model_save_dir, "label.json"), {"intent": intent_list, "slot": slot_list})
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def save_model(self, model, train_state, accelerator=None):
|
| 45 |
+
step = train_state["step"]
|
| 46 |
+
if self.max_save_num != 1:
|
| 47 |
+
|
| 48 |
+
model_save_dir =os.path.join(self.model_save_dir, str(step))
|
| 49 |
+
if self.save_pool.full():
|
| 50 |
+
delete_dir = self.save_pool.get()
|
| 51 |
+
shutil.rmtree(delete_dir)
|
| 52 |
+
self.save_pool.put(model_save_dir)
|
| 53 |
+
else:
|
| 54 |
+
self.save_pool.put(model_save_dir)
|
| 55 |
+
if not os.path.exists(model_save_dir):
|
| 56 |
+
os.mkdir(model_save_dir)
|
| 57 |
+
else:
|
| 58 |
+
model_save_dir = self.model_save_dir
|
| 59 |
+
if not os.path.exists(model_save_dir):
|
| 60 |
+
os.mkdir(model_save_dir)
|
| 61 |
+
if accelerator is None:
|
| 62 |
+
torch.save(model, os.path.join(model_save_dir, "model.pkl"))
|
| 63 |
+
torch.save(train_state, os.path.join(model_save_dir, "train_state.pkl"), pickle_module=dill)
|
| 64 |
+
else:
|
| 65 |
+
accelerator.wait_for_everyone()
|
| 66 |
+
unwrapped_model = accelerator.unwrap_model(model)
|
| 67 |
+
accelerator.save(unwrapped_model, os.path.join(model_save_dir, "model.pkl"))
|
| 68 |
+
accelerator.save_state(output_dir=model_save_dir)
|
| 69 |
+
|
| 70 |
+
def auto_save_step(self, model, train_state, accelerator=None):
|
| 71 |
+
step = train_state["step"]
|
| 72 |
+
if self.save_mode == "save-by-step" and step % self.config.get("save_step")==0 and step != 0:
|
| 73 |
+
self.save_model(model, train_state, accelerator)
|
| 74 |
+
return True
|
| 75 |
+
else:
|
| 76 |
+
return False
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def save_output(self, outputs, dataset):
|
| 80 |
+
outputs.save(self.model_save_dir, dataset)
|
common/tokenizer.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from collections import Counter
|
| 4 |
+
from collections import OrderedDict
|
| 5 |
+
from typing import List
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from ordered_set import OrderedSet
|
| 9 |
+
from transformers import AutoTokenizer
|
| 10 |
+
|
| 11 |
+
from common.utils import download, unzip_file
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_tokenizer(tokenizer_name:str):
|
| 15 |
+
"""auto get tokenizer
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
tokenizer_name (str): support "word_tokenizer" and other pretrained tokenizer in hugging face.
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
Any: Tokenizer Object
|
| 22 |
+
"""
|
| 23 |
+
if tokenizer_name == "word_tokenizer":
|
| 24 |
+
return WordTokenizer(tokenizer_name)
|
| 25 |
+
else:
|
| 26 |
+
return AutoTokenizer.from_pretrained(tokenizer_name)
|
| 27 |
+
|
| 28 |
+
def get_tokenizer_class(tokenizer_name:str):
|
| 29 |
+
"""auto get tokenizer class
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
tokenizer_name (str): support "word_tokenizer" and other pretrained tokenizer in hugging face.
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
Any: Tokenizer Class
|
| 36 |
+
"""
|
| 37 |
+
if tokenizer_name == "word_tokenizer":
|
| 38 |
+
return WordTokenizer
|
| 39 |
+
else:
|
| 40 |
+
return AutoTokenizer.from_pretrained
|
| 41 |
+
|
| 42 |
+
BATCH_STATE = 1
|
| 43 |
+
INSTANCE_STATE = 2
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class WordTokenizer(object):
|
| 47 |
+
|
| 48 |
+
def __init__(self, name):
|
| 49 |
+
self.__name = name
|
| 50 |
+
self.index2instance = OrderedSet()
|
| 51 |
+
self.instance2index = OrderedDict()
|
| 52 |
+
# Counter Object record the frequency
|
| 53 |
+
# of element occurs in raw text.
|
| 54 |
+
self.counter = Counter()
|
| 55 |
+
|
| 56 |
+
self.__sign_pad = "[PAD]"
|
| 57 |
+
self.add_instance(self.__sign_pad)
|
| 58 |
+
self.__sign_unk = "[UNK]"
|
| 59 |
+
self.add_instance(self.__sign_unk)
|
| 60 |
+
|
| 61 |
+
@property
|
| 62 |
+
def padding_side(self):
|
| 63 |
+
return "right"
|
| 64 |
+
@property
|
| 65 |
+
def all_special_ids(self):
|
| 66 |
+
return [self.unk_token_id, self.pad_token_id]
|
| 67 |
+
|
| 68 |
+
@property
|
| 69 |
+
def name_or_path(self):
|
| 70 |
+
return self.__name
|
| 71 |
+
|
| 72 |
+
@property
|
| 73 |
+
def vocab_size(self):
|
| 74 |
+
return len(self.instance2index)
|
| 75 |
+
|
| 76 |
+
@property
|
| 77 |
+
def pad_token_id(self):
|
| 78 |
+
return self.instance2index[self.__sign_pad]
|
| 79 |
+
|
| 80 |
+
@property
|
| 81 |
+
def unk_token_id(self):
|
| 82 |
+
return self.instance2index[self.__sign_unk]
|
| 83 |
+
|
| 84 |
+
def add_instance(self, instance):
|
| 85 |
+
""" Add instances to alphabet.
|
| 86 |
+
|
| 87 |
+
1, We support any iterative data structure which
|
| 88 |
+
contains elements of str type.
|
| 89 |
+
|
| 90 |
+
2, We will count added instances that will influence
|
| 91 |
+
the serialization of unknown instance.
|
| 92 |
+
|
| 93 |
+
Args:
|
| 94 |
+
instance: is given instance or a list of it.
|
| 95 |
+
"""
|
| 96 |
+
|
| 97 |
+
if isinstance(instance, (list, tuple)):
|
| 98 |
+
for element in instance:
|
| 99 |
+
self.add_instance(element)
|
| 100 |
+
return
|
| 101 |
+
|
| 102 |
+
# We only support elements of str type.
|
| 103 |
+
assert isinstance(instance, str)
|
| 104 |
+
|
| 105 |
+
# count the frequency of instances.
|
| 106 |
+
# self.counter[instance] += 1
|
| 107 |
+
|
| 108 |
+
if instance not in self.index2instance:
|
| 109 |
+
self.instance2index[instance] = len(self.index2instance)
|
| 110 |
+
self.index2instance.append(instance)
|
| 111 |
+
|
| 112 |
+
def __call__(self, instance,
|
| 113 |
+
return_tensors="pt",
|
| 114 |
+
is_split_into_words=True,
|
| 115 |
+
padding=True,
|
| 116 |
+
add_special_tokens=False,
|
| 117 |
+
truncation=True,
|
| 118 |
+
max_length=512,
|
| 119 |
+
**config):
|
| 120 |
+
if isinstance(instance, (list, tuple)) and isinstance(instance[0], (str)) and is_split_into_words:
|
| 121 |
+
res = self.get_index(instance)
|
| 122 |
+
state = INSTANCE_STATE
|
| 123 |
+
elif isinstance(instance, str) and not is_split_into_words:
|
| 124 |
+
res = self.get_index(instance.split(" "))
|
| 125 |
+
state = INSTANCE_STATE
|
| 126 |
+
elif not is_split_into_words and isinstance(instance, (list, tuple)):
|
| 127 |
+
res = [self.get_index(ins.split(" ")) for ins in instance]
|
| 128 |
+
state = BATCH_STATE
|
| 129 |
+
else:
|
| 130 |
+
res = [self.get_index(ins) for ins in instance]
|
| 131 |
+
state = BATCH_STATE
|
| 132 |
+
res = [r[:max_length] if len(r) >= max_length else r for r in res]
|
| 133 |
+
pad_id = self.get_index(self.__sign_pad)
|
| 134 |
+
if padding and state == BATCH_STATE:
|
| 135 |
+
max_len = max([len(x) for x in instance])
|
| 136 |
+
|
| 137 |
+
for i in range(len(res)):
|
| 138 |
+
res[i] = res[i] + [pad_id] * (max_len - len(res[i]))
|
| 139 |
+
if return_tensors == "pt":
|
| 140 |
+
input_ids = torch.Tensor(res).long()
|
| 141 |
+
attention_mask = (input_ids != pad_id).long()
|
| 142 |
+
elif state == BATCH_STATE:
|
| 143 |
+
input_ids = res
|
| 144 |
+
attention_mask = [1 if r != pad_id else 0 for batch in res for r in batch]
|
| 145 |
+
else:
|
| 146 |
+
input_ids = res
|
| 147 |
+
attention_mask = [1 if r != pad_id else 0 for r in res]
|
| 148 |
+
return TokenizedData(input_ids, token_type_ids=attention_mask, attention_mask=attention_mask)
|
| 149 |
+
|
| 150 |
+
def get_index(self, instance):
|
| 151 |
+
""" Serialize given instance and return.
|
| 152 |
+
|
| 153 |
+
For unknown words, the return index of alphabet
|
| 154 |
+
depends on variable self.__use_unk:
|
| 155 |
+
|
| 156 |
+
1, If True, then return the index of "<UNK>";
|
| 157 |
+
2, If False, then return the index of the
|
| 158 |
+
element that hold max frequency in training data.
|
| 159 |
+
|
| 160 |
+
Args:
|
| 161 |
+
instance (Any): is given instance or a list of it.
|
| 162 |
+
Return:
|
| 163 |
+
Any: the serialization of query instance.
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
if isinstance(instance, (list, tuple)):
|
| 167 |
+
return [self.get_index(elem) for elem in instance]
|
| 168 |
+
|
| 169 |
+
assert isinstance(instance, str)
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
return self.instance2index[instance]
|
| 173 |
+
except KeyError:
|
| 174 |
+
return self.instance2index[self.__sign_unk]
|
| 175 |
+
|
| 176 |
+
def decode(self, index):
|
| 177 |
+
""" Get corresponding instance of query index.
|
| 178 |
+
|
| 179 |
+
if index is invalid, then throws exception.
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
index (int): is query index, possibly iterable.
|
| 183 |
+
Returns:
|
| 184 |
+
is corresponding instance.
|
| 185 |
+
"""
|
| 186 |
+
|
| 187 |
+
if isinstance(index, list):
|
| 188 |
+
return [self.decode(elem) for elem in index]
|
| 189 |
+
if isinstance(index, torch.Tensor):
|
| 190 |
+
index = index.tolist()
|
| 191 |
+
return self.decode(index)
|
| 192 |
+
return self.index2instance[index]
|
| 193 |
+
|
| 194 |
+
def decode_batch(self, index, **kargs):
|
| 195 |
+
""" Get corresponding instance of query index.
|
| 196 |
+
|
| 197 |
+
if index is invalid, then throws exception.
|
| 198 |
+
|
| 199 |
+
Args:
|
| 200 |
+
index (int): is query index, possibly iterable.
|
| 201 |
+
Returns:
|
| 202 |
+
is corresponding instance.
|
| 203 |
+
"""
|
| 204 |
+
return self.decode(index)
|
| 205 |
+
|
| 206 |
+
def save(self, path):
|
| 207 |
+
""" Save the content of alphabet to files.
|
| 208 |
+
|
| 209 |
+
There are two kinds of saved files:
|
| 210 |
+
1, The first is a list file, elements are
|
| 211 |
+
sorted by the frequency of occurrence.
|
| 212 |
+
|
| 213 |
+
2, The second is a dictionary file, elements
|
| 214 |
+
are sorted by it serialized index.
|
| 215 |
+
|
| 216 |
+
Args:
|
| 217 |
+
path (str): is the path to save object.
|
| 218 |
+
"""
|
| 219 |
+
|
| 220 |
+
with open(path, 'w', encoding="utf8") as fw:
|
| 221 |
+
fw.write(json.dumps({"name": self.__name, "token_map": self.instance2index}))
|
| 222 |
+
|
| 223 |
+
@staticmethod
|
| 224 |
+
def from_file(path):
|
| 225 |
+
with open(path, 'r', encoding="utf8") as fw:
|
| 226 |
+
obj = json.load(fw)
|
| 227 |
+
tokenizer = WordTokenizer(obj["name"])
|
| 228 |
+
tokenizer.instance2index = OrderedDict(obj["token_map"])
|
| 229 |
+
# tokenizer.counter = len(tokenizer.instance2index)
|
| 230 |
+
tokenizer.index2instance = OrderedSet(tokenizer.instance2index.keys())
|
| 231 |
+
return tokenizer
|
| 232 |
+
|
| 233 |
+
def __len__(self):
|
| 234 |
+
return len(self.index2instance)
|
| 235 |
+
|
| 236 |
+
def __str__(self):
|
| 237 |
+
return 'Alphabet {} contains about {} words: \n\t{}'.format(self.name_or_path, len(self), self.index2instance)
|
| 238 |
+
|
| 239 |
+
def convert_tokens_to_ids(self, tokens):
|
| 240 |
+
"""convert token sequence to intput ids sequence
|
| 241 |
+
|
| 242 |
+
Args:
|
| 243 |
+
tokens (Any): token sequence
|
| 244 |
+
|
| 245 |
+
Returns:
|
| 246 |
+
Any: intput ids sequence
|
| 247 |
+
"""
|
| 248 |
+
try:
|
| 249 |
+
if isinstance(tokens, (list, tuple)):
|
| 250 |
+
return [self.instance2index[x] for x in tokens]
|
| 251 |
+
return self.instance2index[tokens]
|
| 252 |
+
|
| 253 |
+
except KeyError:
|
| 254 |
+
return self.instance2index[self.__sign_unk]
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
class TokenizedData():
|
| 258 |
+
"""tokenized output data with input_ids, token_type_ids, attention_mask
|
| 259 |
+
"""
|
| 260 |
+
def __init__(self, input_ids, token_type_ids, attention_mask):
|
| 261 |
+
self.input_ids = input_ids
|
| 262 |
+
self.token_type_ids = token_type_ids
|
| 263 |
+
self.attention_mask = attention_mask
|
| 264 |
+
|
| 265 |
+
def word_ids(self, index: int) -> List[int or None]:
|
| 266 |
+
""" get word id list
|
| 267 |
+
|
| 268 |
+
Args:
|
| 269 |
+
index (int): word index in sequence
|
| 270 |
+
|
| 271 |
+
Returns:
|
| 272 |
+
List[int or None]: word id list
|
| 273 |
+
"""
|
| 274 |
+
return [j if self.attention_mask[index][j] != 0 else None for j, x in enumerate(self.input_ids[index])]
|
| 275 |
+
|
| 276 |
+
def word_to_tokens(self, index, word_id, **kwargs):
|
| 277 |
+
"""map word and tokens
|
| 278 |
+
|
| 279 |
+
Args:
|
| 280 |
+
index (int): unused
|
| 281 |
+
word_id (int): word index in sequence
|
| 282 |
+
"""
|
| 283 |
+
return (word_id, word_id + 1)
|
| 284 |
+
|
| 285 |
+
def to(self, device):
|
| 286 |
+
"""set device
|
| 287 |
+
|
| 288 |
+
Args:
|
| 289 |
+
device (str): support ["cpu", "cuda"]
|
| 290 |
+
"""
|
| 291 |
+
self.input_ids = self.input_ids.to(device)
|
| 292 |
+
self.token_type_ids = self.token_type_ids.to(device)
|
| 293 |
+
self.attention_mask = self.attention_mask.to(device)
|
| 294 |
+
return self
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def load_embedding(tokenizer: WordTokenizer, glove_name:str):
|
| 298 |
+
""" load embedding from standford server or local cache.
|
| 299 |
+
|
| 300 |
+
Args:
|
| 301 |
+
tokenizer (WordTokenizer): non-pretrained tokenizer
|
| 302 |
+
glove_name (str): _description_
|
| 303 |
+
|
| 304 |
+
Returns:
|
| 305 |
+
Any: word embedding
|
| 306 |
+
"""
|
| 307 |
+
save_path = "save/" + glove_name + ".zip"
|
| 308 |
+
if not os.path.exists(save_path):
|
| 309 |
+
download("http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip#" + glove_name, save_path)
|
| 310 |
+
unzip_file(save_path, "save/" + glove_name)
|
| 311 |
+
dim = int(glove_name.split(".")[-2][:-1])
|
| 312 |
+
embedding_list = torch.rand((tokenizer.vocab_size, dim))
|
| 313 |
+
embedding_list[tokenizer.pad_token_id] = torch.zeros((1, dim))
|
| 314 |
+
with open("save/" + glove_name + "/" + glove_name, "r", encoding="utf8") as f:
|
| 315 |
+
for line in f.readlines():
|
| 316 |
+
datas = line.split(" ")
|
| 317 |
+
word = datas[0]
|
| 318 |
+
embedding = torch.Tensor([float(datas[i + 1]) for i in range(len(datas) - 1)])
|
| 319 |
+
tokenized = tokenizer.convert_tokens_to_ids(word)
|
| 320 |
+
if isinstance(tokenized, int) and tokenized != tokenizer.unk_token_id:
|
| 321 |
+
embedding_list[tokenized] = embedding
|
| 322 |
+
|
| 323 |
+
return embedding_list
|
common/utils.py
ADDED
|
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import functools
|
| 2 |
+
import importlib
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import tarfile
|
| 6 |
+
from typing import List, Tuple
|
| 7 |
+
import zipfile
|
| 8 |
+
from collections import Callable
|
| 9 |
+
from ruamel import yaml
|
| 10 |
+
import requests
|
| 11 |
+
import torch
|
| 12 |
+
from torch.nn.utils.rnn import pad_sequence
|
| 13 |
+
from tqdm import tqdm
|
| 14 |
+
from torch import Tensor
|
| 15 |
+
import argparse
|
| 16 |
+
class InputData():
|
| 17 |
+
"""input datas class
|
| 18 |
+
"""
|
| 19 |
+
def __init__(self, inputs: List =None):
|
| 20 |
+
"""init input datas class
|
| 21 |
+
|
| 22 |
+
if inputs is None:
|
| 23 |
+
this class can be used to save all InputData in the history by 'merge_input_data(X:InputData)'
|
| 24 |
+
else:
|
| 25 |
+
this class can be used for model input.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
inputs (List, optional): inputs with [tokenized_data, slot, intent]. Defaults to None.
|
| 29 |
+
"""
|
| 30 |
+
if inputs == None:
|
| 31 |
+
self.slot = []
|
| 32 |
+
self.intent = []
|
| 33 |
+
self.input_ids = None
|
| 34 |
+
self.token_type_ids = None
|
| 35 |
+
self.attention_mask = None
|
| 36 |
+
self.seq_lens = None
|
| 37 |
+
else:
|
| 38 |
+
self.input_ids = inputs[0].input_ids
|
| 39 |
+
self.token_type_ids = None
|
| 40 |
+
if hasattr(inputs[0], "token_type_ids"):
|
| 41 |
+
self.token_type_ids = inputs[0].token_type_ids
|
| 42 |
+
self.attention_mask = inputs[0].attention_mask
|
| 43 |
+
if len(inputs)>=2:
|
| 44 |
+
self.slot = inputs[1]
|
| 45 |
+
if len(inputs)>=3:
|
| 46 |
+
self.intent = inputs[2]
|
| 47 |
+
self.seq_lens = self.attention_mask.sum(-1)
|
| 48 |
+
|
| 49 |
+
def get_inputs(self):
|
| 50 |
+
""" get tokenized_data
|
| 51 |
+
|
| 52 |
+
Returns:
|
| 53 |
+
dict: tokenized data
|
| 54 |
+
"""
|
| 55 |
+
res = {
|
| 56 |
+
"input_ids": self.input_ids,
|
| 57 |
+
"attention_mask": self.attention_mask
|
| 58 |
+
}
|
| 59 |
+
if self.token_type_ids is not None:
|
| 60 |
+
res["token_type_ids"] = self.token_type_ids
|
| 61 |
+
return res
|
| 62 |
+
|
| 63 |
+
def merge_input_data(self, inp: "InputData"):
|
| 64 |
+
"""merge another InputData object with slot and intent
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
inp (InputData): another InputData object
|
| 68 |
+
"""
|
| 69 |
+
self.slot += inp.slot
|
| 70 |
+
self.intent += inp.intent
|
| 71 |
+
|
| 72 |
+
def get_slot_mask(self, ignore_index:int)->Tensor:
|
| 73 |
+
"""get slot mask
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
ignore_index (int): ignore index used in slot padding
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
Tensor: mask tensor
|
| 80 |
+
"""
|
| 81 |
+
mask = self.slot != ignore_index
|
| 82 |
+
mask[:, 0] = torch.ones_like(mask[:, 0]).to(self.slot.device)
|
| 83 |
+
return mask
|
| 84 |
+
|
| 85 |
+
def get_item(self, index, tokenizer=None, intent_map=None, slot_map=None, ignore_index = -100):
|
| 86 |
+
res = {"input_ids": self.input_ids[index]}
|
| 87 |
+
if tokenizer is not None:
|
| 88 |
+
res["tokens"] = [tokenizer.decode(x) for x in self.input_ids[index]]
|
| 89 |
+
if intent_map is not None:
|
| 90 |
+
intents = self.intent.tolist()
|
| 91 |
+
if isinstance(intents[index], list):
|
| 92 |
+
res["intent"] = [intent_map[int(x)] for x in intents[index]]
|
| 93 |
+
else:
|
| 94 |
+
res["intent"] = intent_map[intents[index]]
|
| 95 |
+
if slot_map is not None:
|
| 96 |
+
res["slot"] = [slot_map[x] if x != ignore_index else "#" for x in self.slot.tolist()[index]]
|
| 97 |
+
return res
|
| 98 |
+
|
| 99 |
+
class OutputData():
|
| 100 |
+
"""output data class
|
| 101 |
+
"""
|
| 102 |
+
def __init__(self, intent_ids=None, slot_ids=None):
|
| 103 |
+
"""init output data class
|
| 104 |
+
|
| 105 |
+
if intent_ids is None and slot_ids is None:
|
| 106 |
+
this class can be used to save all OutputData in the history by 'merge_output_data(X:OutputData)'
|
| 107 |
+
else:
|
| 108 |
+
this class can be used to model output management.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
intent_ids (Any, optional): list(Tensor) of intent ids / logits / strings. Defaults to None.
|
| 112 |
+
slot_ids (Any, optional): list(Tensor) of slot ids / ids / strings. Defaults to None.
|
| 113 |
+
"""
|
| 114 |
+
if intent_ids is None and slot_ids is None:
|
| 115 |
+
self.intent_ids = []
|
| 116 |
+
self.slot_ids = []
|
| 117 |
+
else:
|
| 118 |
+
if isinstance(intent_ids, ClassifierOutputData):
|
| 119 |
+
self.intent_ids = intent_ids.classifier_output
|
| 120 |
+
else:
|
| 121 |
+
self.intent_ids = intent_ids
|
| 122 |
+
if isinstance(slot_ids, ClassifierOutputData):
|
| 123 |
+
self.slot_ids = slot_ids.classifier_output
|
| 124 |
+
else:
|
| 125 |
+
self.slot_ids = slot_ids
|
| 126 |
+
|
| 127 |
+
def map_output(self, slot_map=None, intent_map=None):
|
| 128 |
+
""" map intent or slot ids to intent or slot string.
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
slot_map (dict, optional): slot id-to-string map. Defaults to None.
|
| 132 |
+
intent_map (dict, optional): intent id-to-string map. Defaults to None.
|
| 133 |
+
"""
|
| 134 |
+
if self.slot_ids is not None:
|
| 135 |
+
if slot_map:
|
| 136 |
+
self.slot_ids = [[slot_map[x] if x >= 0 else "#" for x in sid] for sid in self.slot_ids]
|
| 137 |
+
if self.intent_ids is not None:
|
| 138 |
+
if intent_map:
|
| 139 |
+
self.intent_ids = [[intent_map[x] for x in sid] if isinstance(sid, list) else intent_map[sid] for sid in
|
| 140 |
+
self.intent_ids]
|
| 141 |
+
|
| 142 |
+
def merge_output_data(self, output:"OutputData"):
|
| 143 |
+
"""merge another OutData object with slot and intent
|
| 144 |
+
|
| 145 |
+
Args:
|
| 146 |
+
output (OutputData): another OutputData object
|
| 147 |
+
"""
|
| 148 |
+
if output.slot_ids is not None:
|
| 149 |
+
self.slot_ids += output.slot_ids
|
| 150 |
+
if output.intent_ids is not None:
|
| 151 |
+
self.intent_ids += output.intent_ids
|
| 152 |
+
|
| 153 |
+
def save(self, path:str, original_dataset=None):
|
| 154 |
+
""" save all OutputData in the history
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
path (str): save dir path
|
| 158 |
+
original_dataset(Iterable): original dataset
|
| 159 |
+
"""
|
| 160 |
+
# with open(f"{path}/intent.jsonl", "w") as f:
|
| 161 |
+
# for x in self.intent_ids:
|
| 162 |
+
# f.write(json.dumps(x) + "\n")
|
| 163 |
+
with open(f"{path}/outputs.jsonl", "w") as f:
|
| 164 |
+
if original_dataset is not None:
|
| 165 |
+
for i, s, d in zip(self.intent_ids, self.slot_ids, original_dataset):
|
| 166 |
+
f.write(json.dumps({"pred_intent": i, "pred_slot": s, "text": d["text"], "golden_intent":d["intent"], "golden_slot":d["slot"]}) + "\n")
|
| 167 |
+
else:
|
| 168 |
+
for i, s in zip(self.intent_ids, self.slot_ids):
|
| 169 |
+
f.write(json.dumps({"pred_intent": i, "pred_slot": s}) + "\n")
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
class HiddenData():
|
| 173 |
+
"""Interactive data structure for all model components
|
| 174 |
+
"""
|
| 175 |
+
def __init__(self, intent_hidden, slot_hidden):
|
| 176 |
+
"""init hidden data structure
|
| 177 |
+
|
| 178 |
+
Args:
|
| 179 |
+
intent_hidden (Any): sentence-level or intent hidden state
|
| 180 |
+
slot_hidden (Any): token-level or slot hidden state
|
| 181 |
+
"""
|
| 182 |
+
self.intent_hidden = intent_hidden
|
| 183 |
+
self.slot_hidden = slot_hidden
|
| 184 |
+
self.inputs = None
|
| 185 |
+
self.embedding = None
|
| 186 |
+
|
| 187 |
+
def get_intent_hidden_state(self):
|
| 188 |
+
"""get intent hidden state
|
| 189 |
+
|
| 190 |
+
Returns:
|
| 191 |
+
Any: intent hidden state
|
| 192 |
+
"""
|
| 193 |
+
return self.intent_hidden
|
| 194 |
+
|
| 195 |
+
def get_slot_hidden_state(self):
|
| 196 |
+
"""get slot hidden state
|
| 197 |
+
|
| 198 |
+
Returns:
|
| 199 |
+
Any: slot hidden state
|
| 200 |
+
"""
|
| 201 |
+
return self.slot_hidden
|
| 202 |
+
|
| 203 |
+
def update_slot_hidden_state(self, hidden_state):
|
| 204 |
+
"""update slot hidden state
|
| 205 |
+
|
| 206 |
+
Args:
|
| 207 |
+
hidden_state (Any): slot hidden state to update
|
| 208 |
+
"""
|
| 209 |
+
self.slot_hidden = hidden_state
|
| 210 |
+
|
| 211 |
+
def update_intent_hidden_state(self, hidden_state):
|
| 212 |
+
"""update intent hidden state
|
| 213 |
+
|
| 214 |
+
Args:
|
| 215 |
+
hidden_state (Any): intent hidden state to update
|
| 216 |
+
"""
|
| 217 |
+
self.intent_hidden = hidden_state
|
| 218 |
+
|
| 219 |
+
def add_input(self, inputs: InputData or "HiddenData"):
|
| 220 |
+
"""add last model component input information to next model component
|
| 221 |
+
|
| 222 |
+
Args:
|
| 223 |
+
inputs (InputDataor or HiddenData): last model component input
|
| 224 |
+
"""
|
| 225 |
+
self.inputs = inputs
|
| 226 |
+
|
| 227 |
+
def add_embedding(self, embedding):
|
| 228 |
+
self.embedding = embedding
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
class ClassifierOutputData():
|
| 232 |
+
"""Classifier output data structure of all classifier components
|
| 233 |
+
"""
|
| 234 |
+
def __init__(self, classifier_output):
|
| 235 |
+
self.classifier_output = classifier_output
|
| 236 |
+
self.output_embedding = None
|
| 237 |
+
|
| 238 |
+
def remove_slot_ignore_index(inputs:InputData, outputs:OutputData, ignore_index=-100):
|
| 239 |
+
""" remove padding or extra token in input id and output id
|
| 240 |
+
|
| 241 |
+
Args:
|
| 242 |
+
inputs (InputData): input data with input id
|
| 243 |
+
outputs (OutputData): output data with decoded output id
|
| 244 |
+
ignore_index (int, optional): ignore_index in input_ids. Defaults to -100.
|
| 245 |
+
|
| 246 |
+
Returns:
|
| 247 |
+
InputData: input data removed padding or extra token
|
| 248 |
+
OutputData: output data removed padding or extra token
|
| 249 |
+
"""
|
| 250 |
+
for index, (inp_ss, out_ss) in enumerate(zip(inputs.slot, outputs.slot_ids)):
|
| 251 |
+
temp_inp = []
|
| 252 |
+
temp_out = []
|
| 253 |
+
for inp_s, out_s in zip(list(inp_ss), list(out_ss)):
|
| 254 |
+
if inp_s != ignore_index:
|
| 255 |
+
temp_inp.append(inp_s)
|
| 256 |
+
temp_out.append(out_s)
|
| 257 |
+
|
| 258 |
+
inputs.slot[index] = temp_inp
|
| 259 |
+
outputs.slot_ids[index] = temp_out
|
| 260 |
+
return inputs, outputs
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
def pack_sequence(inputs:Tensor, seq_len:Tensor or List) -> Tensor:
|
| 264 |
+
"""pack sequence data to packed data without padding.
|
| 265 |
+
|
| 266 |
+
Args:
|
| 267 |
+
inputs (Tensor): list(Tensor) of packed sequence inputs
|
| 268 |
+
seq_len (Tensor or List): list(Tensor) of sequence length
|
| 269 |
+
|
| 270 |
+
Returns:
|
| 271 |
+
Tensor: packed inputs
|
| 272 |
+
|
| 273 |
+
Examples:
|
| 274 |
+
inputs = [[x, y, z, PAD, PAD], [x, y, PAD, PAD, PAD]]
|
| 275 |
+
|
| 276 |
+
seq_len = [3,2]
|
| 277 |
+
|
| 278 |
+
return -> [x, y, z, x, y]
|
| 279 |
+
"""
|
| 280 |
+
output = []
|
| 281 |
+
for index, batch in enumerate(inputs):
|
| 282 |
+
output.append(batch[:seq_len[index]])
|
| 283 |
+
return torch.cat(output, dim=0)
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def unpack_sequence(inputs:Tensor, seq_lens:Tensor or List, padding_value=0) -> Tensor:
|
| 287 |
+
"""unpack sequence data.
|
| 288 |
+
|
| 289 |
+
Args:
|
| 290 |
+
inputs (Tensor): list(Tensor) of packed sequence inputs
|
| 291 |
+
seq_lens (Tensor or List): list(Tensor) of sequence length
|
| 292 |
+
padding_value (int, optional): padding value. Defaults to 0.
|
| 293 |
+
|
| 294 |
+
Returns:
|
| 295 |
+
Tensor: unpacked inputs
|
| 296 |
+
|
| 297 |
+
Examples:
|
| 298 |
+
inputs = [x, y, z, x, y]
|
| 299 |
+
|
| 300 |
+
seq_len = [3,2]
|
| 301 |
+
|
| 302 |
+
return -> [[x, y, z, PAD, PAD], [x, y, PAD, PAD, PAD]]
|
| 303 |
+
"""
|
| 304 |
+
last_idx = 0
|
| 305 |
+
output = []
|
| 306 |
+
for _, seq_len in enumerate(seq_lens):
|
| 307 |
+
output.append(inputs[last_idx:last_idx + seq_len])
|
| 308 |
+
last_idx = last_idx + seq_len
|
| 309 |
+
return pad_sequence(output, batch_first=True, padding_value=padding_value)
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def get_dict_with_key_prefix(input_dict: dict, prefix=""):
|
| 313 |
+
res = {}
|
| 314 |
+
for t in input_dict:
|
| 315 |
+
res[t + prefix] = input_dict[t]
|
| 316 |
+
return res
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def download(url: str, fname: str):
|
| 320 |
+
"""download file from url to fname
|
| 321 |
+
|
| 322 |
+
Args:
|
| 323 |
+
url (str): remote server url path
|
| 324 |
+
fname (str): local path to save
|
| 325 |
+
"""
|
| 326 |
+
resp = requests.get(url, stream=True)
|
| 327 |
+
total = int(resp.headers.get('content-length', 0))
|
| 328 |
+
with open(fname, 'wb') as file, tqdm(
|
| 329 |
+
desc=fname,
|
| 330 |
+
total=total,
|
| 331 |
+
unit='iB',
|
| 332 |
+
unit_scale=True,
|
| 333 |
+
unit_divisor=1024,
|
| 334 |
+
) as bar:
|
| 335 |
+
for data in resp.iter_content(chunk_size=1024):
|
| 336 |
+
size = file.write(data)
|
| 337 |
+
bar.update(size)
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
def tar_gz_data(file_name:str):
|
| 341 |
+
"""use "tar.gz" format to compress data
|
| 342 |
+
|
| 343 |
+
Args:
|
| 344 |
+
file_name (str): file path to tar
|
| 345 |
+
"""
|
| 346 |
+
t = tarfile.open(f"{file_name}.tar.gz", "w:gz")
|
| 347 |
+
|
| 348 |
+
for root, dir, files in os.walk(f"{file_name}"):
|
| 349 |
+
print(root, dir, files)
|
| 350 |
+
for file in files:
|
| 351 |
+
fullpath = os.path.join(root, file)
|
| 352 |
+
t.add(fullpath)
|
| 353 |
+
t.close()
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def untar(fname:str, dirs:str):
|
| 357 |
+
""" uncompress "tar.gz" file
|
| 358 |
+
|
| 359 |
+
Args:
|
| 360 |
+
fname (str): file path to untar
|
| 361 |
+
dirs (str): target dir path
|
| 362 |
+
"""
|
| 363 |
+
t = tarfile.open(fname)
|
| 364 |
+
t.extractall(path=dirs)
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def unzip_file(zip_src:str, dst_dir:str):
|
| 368 |
+
""" uncompress "zip" file
|
| 369 |
+
|
| 370 |
+
Args:
|
| 371 |
+
fname (str): file path to unzip
|
| 372 |
+
dirs (str): target dir path
|
| 373 |
+
"""
|
| 374 |
+
r = zipfile.is_zipfile(zip_src)
|
| 375 |
+
if r:
|
| 376 |
+
if not os.path.exists(dst_dir):
|
| 377 |
+
os.mkdir(dst_dir)
|
| 378 |
+
fz = zipfile.ZipFile(zip_src, 'r')
|
| 379 |
+
for file in fz.namelist():
|
| 380 |
+
fz.extract(file, dst_dir)
|
| 381 |
+
else:
|
| 382 |
+
print('This is not zip')
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
def find_callable(target: str) -> Callable:
|
| 386 |
+
""" find callable function / class to instantiate
|
| 387 |
+
|
| 388 |
+
Args:
|
| 389 |
+
target (str): class/module path
|
| 390 |
+
|
| 391 |
+
Raises:
|
| 392 |
+
e: can not import module
|
| 393 |
+
|
| 394 |
+
Returns:
|
| 395 |
+
Callable: return function / class
|
| 396 |
+
"""
|
| 397 |
+
target_module_path, target_callable_path = target.rsplit(".", 1)
|
| 398 |
+
target_callable_paths = [target_callable_path]
|
| 399 |
+
|
| 400 |
+
target_module = None
|
| 401 |
+
while len(target_module_path):
|
| 402 |
+
try:
|
| 403 |
+
target_module = importlib.import_module(target_module_path)
|
| 404 |
+
break
|
| 405 |
+
except Exception as e:
|
| 406 |
+
raise e
|
| 407 |
+
target_callable = target_module
|
| 408 |
+
for attr in reversed(target_callable_paths):
|
| 409 |
+
target_callable = getattr(target_callable, attr)
|
| 410 |
+
|
| 411 |
+
return target_callable
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
def instantiate(config, target="_model_target_", partial="_model_partial_"):
|
| 415 |
+
""" instantiate object by config.
|
| 416 |
+
|
| 417 |
+
Modified from https://github.com/HIT-SCIR/ltp/blob/main/python/core/ltp_core/models/utils/instantiate.py.
|
| 418 |
+
|
| 419 |
+
Args:
|
| 420 |
+
config (Any): configuration
|
| 421 |
+
target (str, optional): key to assign the class to be instantiated. Defaults to "_model_target_".
|
| 422 |
+
partial (str, optional): key to judge object whether should be instantiated partially. Defaults to "_model_partial_".
|
| 423 |
+
|
| 424 |
+
Returns:
|
| 425 |
+
Any: instantiated object
|
| 426 |
+
"""
|
| 427 |
+
if isinstance(config, dict) and target in config:
|
| 428 |
+
target_path = config.get(target)
|
| 429 |
+
target_callable = find_callable(target_path)
|
| 430 |
+
|
| 431 |
+
is_partial = config.get(partial, False)
|
| 432 |
+
target_args = {
|
| 433 |
+
key: instantiate(value)
|
| 434 |
+
for key, value in config.items()
|
| 435 |
+
if key not in [target, partial]
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
if is_partial:
|
| 439 |
+
return functools.partial(target_callable, **target_args)
|
| 440 |
+
else:
|
| 441 |
+
return target_callable(**target_args)
|
| 442 |
+
elif isinstance(config, dict):
|
| 443 |
+
return {key: instantiate(value) for key, value in config.items()}
|
| 444 |
+
else:
|
| 445 |
+
return config
|
| 446 |
+
|
| 447 |
+
|
| 448 |
+
def load_yaml(file):
|
| 449 |
+
""" load data from yaml files.
|
| 450 |
+
|
| 451 |
+
Args:
|
| 452 |
+
file (str): yaml file path.
|
| 453 |
+
|
| 454 |
+
Returns:
|
| 455 |
+
Any: data
|
| 456 |
+
"""
|
| 457 |
+
with open(file, encoding="utf-8") as stream:
|
| 458 |
+
try:
|
| 459 |
+
return yaml.safe_load(stream)
|
| 460 |
+
except yaml.YAMLError as exc:
|
| 461 |
+
raise exc
|
| 462 |
+
|
| 463 |
+
def from_configured(configure_name_or_file:str, model_class:Callable, config_prefix="./config/", **input_config):
|
| 464 |
+
"""load module from pre-configured data
|
| 465 |
+
|
| 466 |
+
Args:
|
| 467 |
+
configure_name_or_file (str): config path -> {config_prefix}/{configure_name_or_file}.yaml
|
| 468 |
+
model_class (Callable): module class
|
| 469 |
+
config_prefix (str, optional): configuration root path. Defaults to "./config/".
|
| 470 |
+
|
| 471 |
+
Returns:
|
| 472 |
+
Any: instantiated object.
|
| 473 |
+
"""
|
| 474 |
+
if os.path.exists(configure_name_or_file):
|
| 475 |
+
configure_file=configure_name_or_file
|
| 476 |
+
else:
|
| 477 |
+
configure_file= os.path.join(config_prefix, configure_name_or_file+".yaml")
|
| 478 |
+
config = load_yaml(configure_file)
|
| 479 |
+
config.update(input_config)
|
| 480 |
+
return model_class(**config)
|
| 481 |
+
|
| 482 |
+
def save_json(file_path, obj):
|
| 483 |
+
with open(file_path, 'w', encoding="utf8") as fw:
|
| 484 |
+
fw.write(json.dumps(obj))
|
| 485 |
+
|
| 486 |
+
def load_json(file_path):
|
| 487 |
+
with open(file_path, 'r', encoding="utf8") as fw:
|
| 488 |
+
res =json.load(fw)
|
| 489 |
+
return res
|
| 490 |
+
|
| 491 |
+
def str2bool(v):
|
| 492 |
+
if isinstance(v, bool):
|
| 493 |
+
return v
|
| 494 |
+
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
| 495 |
+
return True
|
| 496 |
+
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
|
| 497 |
+
return False
|
| 498 |
+
else:
|
| 499 |
+
raise argparse.ArgumentTypeError('Boolean value expected.')
|
config/README.md
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Configuation
|
| 2 |
+
|
| 3 |
+
## 1. Introduction
|
| 4 |
+
|
| 5 |
+
Configuration is divided into fine-grained reusable modules:
|
| 6 |
+
|
| 7 |
+
- `base`: basic configuration
|
| 8 |
+
- `logger`: logger setting
|
| 9 |
+
- `model_manager`: loading and saving model parameters
|
| 10 |
+
- `accelerator`: whether to enable multi-GPU
|
| 11 |
+
- `dataset`: dataset management
|
| 12 |
+
- `evaluator`: evaluation and metrics setting.
|
| 13 |
+
- `tokenizer`: Tokenizer initiation and tokenizing setting.
|
| 14 |
+
- `optimizer`: Optimizer initiation setting.
|
| 15 |
+
- `scheduler`: scheduler initiation setting.
|
| 16 |
+
- `model`: model construction setting.
|
| 17 |
+
|
| 18 |
+
From Sec. 2 to Sec. 11, we will describe the configuration in detail. Or you can see [Examples](examples/README.md) for Quick Start.
|
| 19 |
+
|
| 20 |
+
NOTE: `_*_` config are reserved fields in OpenSLU.
|
| 21 |
+
|
| 22 |
+
## Configuration Item Script
|
| 23 |
+
In OpenSLU configuration, we support simple calculation script for each configuration item. For example, we can get `dataset_name` by using `{dataset.dataset_name}`, and fill its value into python script `'LightChen2333/agif-slu-' + '*'`.(Without '', `{dataset.dataset_name}` value will be treated as a variable).
|
| 24 |
+
|
| 25 |
+
NOTE: each item with `{}` will be treated as python script.
|
| 26 |
+
```yaml
|
| 27 |
+
tokenizer:
|
| 28 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'" # Support simple calculation script
|
| 29 |
+
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## `base` Config
|
| 33 |
+
```yaml
|
| 34 |
+
# `start_time` will generated automatically when start any config script, needless to be assigned.
|
| 35 |
+
# start_time: xxxxxxxx
|
| 36 |
+
base:
|
| 37 |
+
name: "OpenSLU" # project/logger name
|
| 38 |
+
multi_intent: false # whether to enable multi-intent setting
|
| 39 |
+
train: True # enable train else enable zero-shot
|
| 40 |
+
test: True # enable test during train.
|
| 41 |
+
device: cuda # device for cuda/cpu
|
| 42 |
+
seed: 42 # random seed
|
| 43 |
+
best_key: EMA # save model by which metric[intent_acc/slot_f1/EMA]
|
| 44 |
+
tokenizer_name: word_tokenizer # tokenizer: word_tokenizer for no pretrained model, else use [AutoTokenizer] tokenizer name
|
| 45 |
+
add_special_tokens: false # whether add [CLS], [SEP] special tokens
|
| 46 |
+
epoch_num: 300 # train epoch num
|
| 47 |
+
# eval_step: 280 # if eval_by_epoch = false and eval_step > 0, will evaluate model by steps
|
| 48 |
+
eval_by_epoch: true # evaluate model by epoch
|
| 49 |
+
batch_size: 16 # batch size
|
| 50 |
+
```
|
| 51 |
+
## `logger` Config
|
| 52 |
+
```yaml
|
| 53 |
+
logger:
|
| 54 |
+
# `wandb` is supported both in single- multi-GPU,
|
| 55 |
+
# `tensorboard` is only supported in multi-GPU,
|
| 56 |
+
# and `fitlog` is only supported in single-GPU
|
| 57 |
+
logger_type: wandb
|
| 58 |
+
```
|
| 59 |
+
## `model_manager` Config
|
| 60 |
+
```yaml
|
| 61 |
+
model_manager:
|
| 62 |
+
# if load_dir != `null`, OpenSLU will try to load checkpoint to continue training,
|
| 63 |
+
# if load_dir == `null`, OpenSLU will restart training.
|
| 64 |
+
load_dir: null
|
| 65 |
+
# The dir path to save model and training state.
|
| 66 |
+
# if save_dir == `null` model will be saved to `save/{start_time}`
|
| 67 |
+
save_dir: save/stack
|
| 68 |
+
# save_mode can be selected in [save-by-step, save-by-eval]
|
| 69 |
+
# `save-by-step` means save model only by {save_step} steps without evaluation.
|
| 70 |
+
# `save-by-eval` means save model by best validation performance
|
| 71 |
+
save_mode: save-by-eval
|
| 72 |
+
# save_step: 100 # only enabled when save_mode == `save-by-step`
|
| 73 |
+
max_save_num: 1 # The number of best models will be saved.
|
| 74 |
+
```
|
| 75 |
+
## `accelerator` Config
|
| 76 |
+
```yaml
|
| 77 |
+
accelerator:
|
| 78 |
+
use_accelerator: false # will enable `accelerator` if use_accelerator is `true`
|
| 79 |
+
```
|
| 80 |
+
## `dataset` Config
|
| 81 |
+
```yaml
|
| 82 |
+
dataset:
|
| 83 |
+
# support load model from hugging-face.
|
| 84 |
+
# dataset_name can be selected in [atis, snips, mix-atis, mix-snips]
|
| 85 |
+
dataset_name: atis
|
| 86 |
+
# support assign any one of dataset path and other dataset split is the same as split in `dataset_name`
|
| 87 |
+
# train: atis # support load model from hugging-face or assigned local data path.
|
| 88 |
+
# validation: {root}/ATIS/dev.jsonl
|
| 89 |
+
# test: {root}/ATIS/test.jsonl
|
| 90 |
+
```
|
| 91 |
+
## `evaluator` Config
|
| 92 |
+
```yaml
|
| 93 |
+
evaluator:
|
| 94 |
+
best_key: EMA # the metric to judge the best model
|
| 95 |
+
eval_by_epoch: true # Evaluate after an epoch if `true`.
|
| 96 |
+
# Evaluate after {eval_step} steps if eval_by_epoch == `false`.
|
| 97 |
+
# eval_step: 1800
|
| 98 |
+
# metric is supported the metric as below:
|
| 99 |
+
# - intent_acc
|
| 100 |
+
# - slot_f1
|
| 101 |
+
# - EMA
|
| 102 |
+
# - intent_f1
|
| 103 |
+
# - macro_intent_f1
|
| 104 |
+
# - micro_intent_f1
|
| 105 |
+
# NOTE: [intent_f1, macro_intent_f1, micro_intent_f1] is only supported in multi-intent setting. intent_f1 and macro_intent_f1 is the same metric.
|
| 106 |
+
metric:
|
| 107 |
+
- intent_acc
|
| 108 |
+
- slot_f1
|
| 109 |
+
- EMA
|
| 110 |
+
```
|
| 111 |
+
## `tokenizer` Config
|
| 112 |
+
```yaml
|
| 113 |
+
tokenizer:
|
| 114 |
+
# Init tokenizer. Support `word_tokenizer` and other tokenizers in huggingface.
|
| 115 |
+
_tokenizer_name_: word_tokenizer
|
| 116 |
+
# if `_tokenizer_name_` is not assigned, you can load pretrained tokenizer from hugging-face.
|
| 117 |
+
# _from_pretrained_: LightChen2333/stack-propagation-slu-atis
|
| 118 |
+
_padding_side_: right # the padding side of tokenizer, support [left/ right]
|
| 119 |
+
# Align mode between text and slot, support [fast/ general],
|
| 120 |
+
# `general` is supported in most tokenizer, `fast` is supported only in small portion of tokenizers.
|
| 121 |
+
_align_mode_: fast
|
| 122 |
+
_to_lower_case_: true
|
| 123 |
+
add_special_tokens: false # other tokenizer args, you can add other args to tokenizer initialization except `_*_` format args
|
| 124 |
+
max_length: 512
|
| 125 |
+
|
| 126 |
+
```
|
| 127 |
+
## `optimizer` Config
|
| 128 |
+
```yaml
|
| 129 |
+
optimizer:
|
| 130 |
+
_model_target_: torch.optim.Adam # Optimizer class/ function return Optimizer object
|
| 131 |
+
_model_partial_: true # partial load configuration. Here will add model.parameters() to complete all Optimizer parameters
|
| 132 |
+
lr: 0.001 # learning rate
|
| 133 |
+
weight_decay: 1e-6 # weight decay
|
| 134 |
+
```
|
| 135 |
+
## `scheduler` Config
|
| 136 |
+
```yaml
|
| 137 |
+
scheduler:
|
| 138 |
+
_model_target_: transformers.get_scheduler
|
| 139 |
+
_model_partial_: true # partial load configuration. Here will add optimizer, num_training_steps to complete all Optimizer parameters
|
| 140 |
+
name : "linear"
|
| 141 |
+
num_warmup_steps: 0
|
| 142 |
+
```
|
| 143 |
+
## `model` Config
|
| 144 |
+
```yaml
|
| 145 |
+
model:
|
| 146 |
+
# _from_pretrained_: LightChen2333/stack-propagation-slu-atis # load model from hugging-face and is not need to assigned any parameters below.
|
| 147 |
+
_model_target_: model.OpenSLUModel # the general model class, can automatically build the model through configuration.
|
| 148 |
+
|
| 149 |
+
encoder:
|
| 150 |
+
_model_target_: model.encoder.AutoEncoder # auto-encoder to autoload provided encoder model
|
| 151 |
+
encoder_name: self-attention-lstm # support [lstm/ self-attention-lstm] and other pretrained models those hugging-face supported
|
| 152 |
+
|
| 153 |
+
embedding: # word embedding layer
|
| 154 |
+
# load_embedding_name: glove.6B.300d.txt # support autoload glove embedding.
|
| 155 |
+
embedding_dim: 256 # embedding dim
|
| 156 |
+
dropout_rate: 0.5 # dropout ratio after embedding
|
| 157 |
+
|
| 158 |
+
lstm:
|
| 159 |
+
layer_num: 1 # lstm configuration
|
| 160 |
+
bidirectional: true
|
| 161 |
+
output_dim: 256 # module should set output_dim for autoload input_dim in next module. You can also set input_dim manually.
|
| 162 |
+
dropout_rate: 0.5
|
| 163 |
+
|
| 164 |
+
attention: # self-attention configuration
|
| 165 |
+
hidden_dim: 1024
|
| 166 |
+
output_dim: 128
|
| 167 |
+
dropout_rate: 0.5
|
| 168 |
+
|
| 169 |
+
return_with_input: true # add inputs information, like attention_mask, to decoder module.
|
| 170 |
+
return_sentence_level_hidden: false # if return sentence representation to decoder module
|
| 171 |
+
|
| 172 |
+
decoder:
|
| 173 |
+
_model_target_: model.decoder.StackPropagationDecoder # decoder name
|
| 174 |
+
interaction:
|
| 175 |
+
_model_target_: model.decoder.interaction.StackInteraction # interaction module name
|
| 176 |
+
differentiable: false # interaction module config
|
| 177 |
+
|
| 178 |
+
intent_classifier:
|
| 179 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier # intent classifier module name
|
| 180 |
+
layer_num: 1
|
| 181 |
+
bidirectional: false
|
| 182 |
+
hidden_dim: 64
|
| 183 |
+
force_ratio: 0.9 # teacher-force ratio
|
| 184 |
+
embedding_dim: 8 # intent embedding dim
|
| 185 |
+
ignore_index: -100 # ignore index to compute loss and metric
|
| 186 |
+
dropout_rate: 0.5
|
| 187 |
+
mode: "token-level-intent" # decode mode, support [token-level-intent, intent, slot]
|
| 188 |
+
use_multi: "{base.multi_intent}"
|
| 189 |
+
return_sentence_level: true # whether to return sentence level prediction as decoded input
|
| 190 |
+
|
| 191 |
+
slot_classifier:
|
| 192 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 193 |
+
layer_num: 1
|
| 194 |
+
bidirectional: false
|
| 195 |
+
force_ratio: 0.9
|
| 196 |
+
hidden_dim: 64
|
| 197 |
+
embedding_dim: 32
|
| 198 |
+
ignore_index: -100
|
| 199 |
+
dropout_rate: 0.5
|
| 200 |
+
mode: "slot"
|
| 201 |
+
use_multi: false
|
| 202 |
+
return_sentence_level: false
|
| 203 |
+
```
|
| 204 |
+
|
| 205 |
+
## Implementing a New Model
|
| 206 |
+
|
| 207 |
+
### 1. Interaction Re-Implement
|
| 208 |
+
Here we take `DCA-Net` as an example:
|
| 209 |
+
|
| 210 |
+
In most cases, you just need to rewrite `Interaction` module:
|
| 211 |
+
|
| 212 |
+
```python
|
| 213 |
+
from common.utils import HiddenData
|
| 214 |
+
from model.decoder.interaction import BaseInteraction
|
| 215 |
+
class DCANetInteraction(BaseInteraction):
|
| 216 |
+
def __init__(self, **config):
|
| 217 |
+
super().__init__(**config)
|
| 218 |
+
self.T_block1 = I_S_Block(self.config["output_dim"], self.config["attention_dropout"], self.config["num_attention_heads"])
|
| 219 |
+
...
|
| 220 |
+
|
| 221 |
+
def forward(self, encode_hidden: HiddenData, **kwargs):
|
| 222 |
+
...
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
and then you should configure your module:
|
| 226 |
+
```yaml
|
| 227 |
+
base:
|
| 228 |
+
...
|
| 229 |
+
|
| 230 |
+
optimizer:
|
| 231 |
+
...
|
| 232 |
+
|
| 233 |
+
scheduler:
|
| 234 |
+
...
|
| 235 |
+
|
| 236 |
+
model:
|
| 237 |
+
_model_target_: model.OpenSLUModel
|
| 238 |
+
encoder:
|
| 239 |
+
_model_target_: model.encoder.AutoEncoder
|
| 240 |
+
encoder_name: lstm
|
| 241 |
+
|
| 242 |
+
embedding:
|
| 243 |
+
load_embedding_name: glove.6B.300d.txt
|
| 244 |
+
embedding_dim: 300
|
| 245 |
+
dropout_rate: 0.5
|
| 246 |
+
|
| 247 |
+
lstm:
|
| 248 |
+
dropout_rate: 0.5
|
| 249 |
+
output_dim: 128
|
| 250 |
+
layer_num: 2
|
| 251 |
+
bidirectional: true
|
| 252 |
+
output_dim: "{model.encoder.lstm.output_dim}"
|
| 253 |
+
return_with_input: true
|
| 254 |
+
return_sentence_level_hidden: false
|
| 255 |
+
|
| 256 |
+
decoder:
|
| 257 |
+
_model_target_: model.decoder.DCANetDecoder
|
| 258 |
+
interaction:
|
| 259 |
+
_model_target_: model.decoder.interaction.DCANetInteraction
|
| 260 |
+
output_dim: "{model.encoder.output_dim}"
|
| 261 |
+
attention_dropout: 0.5
|
| 262 |
+
num_attention_heads: 8
|
| 263 |
+
|
| 264 |
+
intent_classifier:
|
| 265 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 266 |
+
mode: "intent"
|
| 267 |
+
input_dim: "{model.decoder.output_dim.output_dim}"
|
| 268 |
+
ignore_index: -100
|
| 269 |
+
|
| 270 |
+
slot_classifier:
|
| 271 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 272 |
+
mode: "slot"
|
| 273 |
+
input_dim: "{model.decoder.output_dim.output_dim}"
|
| 274 |
+
ignore_index: -100
|
| 275 |
+
```
|
| 276 |
+
|
| 277 |
+
Oops, you finish all model construction. You can run script as follows to train model:
|
| 278 |
+
```shell
|
| 279 |
+
python run.py -cp config/dca_net.yaml [-ds atis]
|
| 280 |
+
```
|
| 281 |
+
### 2. Decoder Re-Implement
|
| 282 |
+
Sometimes, `interaction then classification` order can not meet your needs. Therefore, you should simply rewrite decoder for flexible interaction order:
|
| 283 |
+
|
| 284 |
+
Here, we take `stack-propagation` as an example:
|
| 285 |
+
1. We should rewrite interaction module for `stack-propagation`
|
| 286 |
+
```python
|
| 287 |
+
from common.utils import ClassifierOutputData, HiddenData
|
| 288 |
+
from model.decoder.interaction.base_interaction import BaseInteraction
|
| 289 |
+
class StackInteraction(BaseInteraction):
|
| 290 |
+
def __init__(self, **config):
|
| 291 |
+
super().__init__(**config)
|
| 292 |
+
...
|
| 293 |
+
|
| 294 |
+
def forward(self, intent_output: ClassifierOutputData, encode_hidden: HiddenData):
|
| 295 |
+
...
|
| 296 |
+
```
|
| 297 |
+
2. We should rewrite `StackPropagationDecoder` for stack-propagation interaction order:
|
| 298 |
+
```python
|
| 299 |
+
from common.utils import HiddenData, OutputData
|
| 300 |
+
class StackPropagationDecoder(BaseDecoder):
|
| 301 |
+
|
| 302 |
+
def forward(self, hidden: HiddenData):
|
| 303 |
+
pred_intent = self.intent_classifier(hidden)
|
| 304 |
+
hidden = self.interaction(pred_intent, hidden)
|
| 305 |
+
pred_slot = self.slot_classifier(hidden)
|
| 306 |
+
return OutputData(pred_intent, pred_slot)
|
| 307 |
+
```
|
| 308 |
+
|
| 309 |
+
3. Then we can easily combine general model by `config/stack-propagation.yaml` configuration file:
|
| 310 |
+
```yaml
|
| 311 |
+
base:
|
| 312 |
+
...
|
| 313 |
+
|
| 314 |
+
...
|
| 315 |
+
|
| 316 |
+
model:
|
| 317 |
+
_model_target_: model.OpenSLUModel
|
| 318 |
+
|
| 319 |
+
encoder:
|
| 320 |
+
...
|
| 321 |
+
|
| 322 |
+
decoder:
|
| 323 |
+
_model_target_: model.decoder.StackPropagationDecoder
|
| 324 |
+
interaction:
|
| 325 |
+
_model_target_: model.decoder.interaction.StackInteraction
|
| 326 |
+
differentiable: false
|
| 327 |
+
|
| 328 |
+
intent_classifier:
|
| 329 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 330 |
+
... # parameters needed __init__(*)
|
| 331 |
+
mode: "token-level-intent"
|
| 332 |
+
use_multi: false
|
| 333 |
+
return_sentence_level: true
|
| 334 |
+
|
| 335 |
+
slot_classifier:
|
| 336 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 337 |
+
... # parameters needed __init__(*)
|
| 338 |
+
mode: "slot"
|
| 339 |
+
use_multi: false
|
| 340 |
+
return_sentence_level: false
|
| 341 |
+
```
|
| 342 |
+
4. You can run script as follows to train model:
|
| 343 |
+
```shell
|
| 344 |
+
python run.py -cp config/stack-propagation.yaml
|
| 345 |
+
```
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
|
config/app.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
host: 127.0.0.1
|
| 2 |
+
port: 7860
|
| 3 |
+
|
| 4 |
+
is_push_to_public: false
|
| 5 |
+
save-path: save/stack/outputs.jsonl
|
| 6 |
+
page-size: 2
|
config/decoder/interaction/stack-propagation.yaml
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
differentiable: false
|
config/examples/README.md
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Examples
|
| 2 |
+
|
| 3 |
+
Here we introduce some usage of our famework by configuration.
|
| 4 |
+
|
| 5 |
+
## Reload to train
|
| 6 |
+
|
| 7 |
+
Firstly, you can run this script to train a `joint-bert` model:
|
| 8 |
+
```shell
|
| 9 |
+
python run.py -cp config/examples/normal.yaml
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
and you can use `kill` or `Ctrl+C` to kill the training process.
|
| 13 |
+
|
| 14 |
+
Then, to reload model and continue training, you can run `reload_to_train.yaml` to reload checkpoint and training state.
|
| 15 |
+
```shell
|
| 16 |
+
python run.py -cp config/examples/reload_to_train.yaml
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
The main difference in `reload_to_train.yaml` is the `model_manager` configuration item:
|
| 20 |
+
```yaml
|
| 21 |
+
...
|
| 22 |
+
model_manager:
|
| 23 |
+
load_train_state: True # set to True
|
| 24 |
+
load_dir: save/joint_bert # not null
|
| 25 |
+
...
|
| 26 |
+
...
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## Load from Pre-finetuned model.
|
| 30 |
+
We upload all models to [LightChen2333](https://huggingface.co/LightChen2333). You can load those model by simple configuration.
|
| 31 |
+
In `from_pretrained.yaml` and `from_pretrained_multi.yaml`, we show two example scripts to load from hugging face in single- and multi-intent, respectively. The key configuration items are as below:
|
| 32 |
+
```yaml
|
| 33 |
+
tokenizer:
|
| 34 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'" # Support simple calculation script
|
| 35 |
+
|
| 36 |
+
model:
|
| 37 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
|
| 38 |
+
```
|
config/examples/from_pretrained.yaml
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: false
|
| 6 |
+
test: true
|
| 7 |
+
device: cpu
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
logger:
|
| 13 |
+
logger_type: local # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
|
| 14 |
+
|
| 15 |
+
model_manager:
|
| 16 |
+
load_dir: null
|
| 17 |
+
save_dir: save/joint_bert
|
| 18 |
+
save_mode: save-by-eval # save-by-step
|
| 19 |
+
# save_step: 100
|
| 20 |
+
max_save_num: 1
|
| 21 |
+
|
| 22 |
+
accelerator:
|
| 23 |
+
use_accelerator: false
|
| 24 |
+
|
| 25 |
+
dataset:
|
| 26 |
+
dataset_name: atis
|
| 27 |
+
|
| 28 |
+
evaluator:
|
| 29 |
+
best_key: EMA
|
| 30 |
+
eval_by_epoch: true
|
| 31 |
+
# eval_step: 1800
|
| 32 |
+
metric:
|
| 33 |
+
- intent_acc
|
| 34 |
+
- slot_f1
|
| 35 |
+
- EMA
|
| 36 |
+
|
| 37 |
+
tokenizer:
|
| 38 |
+
_from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
|
| 39 |
+
|
| 40 |
+
optimizer:
|
| 41 |
+
_model_target_: torch.optim.Adam
|
| 42 |
+
_model_partial_: true
|
| 43 |
+
lr: 0.001
|
| 44 |
+
weight_decay: 1e-6
|
| 45 |
+
|
| 46 |
+
scheduler:
|
| 47 |
+
_model_target_: transformers.get_scheduler
|
| 48 |
+
_model_partial_: true
|
| 49 |
+
name : "linear"
|
| 50 |
+
num_warmup_steps: 0
|
| 51 |
+
|
| 52 |
+
model:
|
| 53 |
+
_from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
|
config/examples/from_pretrained_multi.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
multi_intent: true
|
| 6 |
+
train: false
|
| 7 |
+
test: true
|
| 8 |
+
device: cuda
|
| 9 |
+
seed: 42
|
| 10 |
+
epoch_num: 300
|
| 11 |
+
batch_size: 16
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
logger:
|
| 15 |
+
logger_type: wandb # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
|
| 16 |
+
|
| 17 |
+
model_manager:
|
| 18 |
+
load_dir: null
|
| 19 |
+
save_dir: save/joint_bert
|
| 20 |
+
save_mode: save-by-eval # save-by-step
|
| 21 |
+
# save_step: 100
|
| 22 |
+
max_save_num: 1
|
| 23 |
+
|
| 24 |
+
accelerator:
|
| 25 |
+
use_accelerator: false
|
| 26 |
+
|
| 27 |
+
dataset:
|
| 28 |
+
dataset_name: atis
|
| 29 |
+
|
| 30 |
+
evaluator:
|
| 31 |
+
best_key: EMA
|
| 32 |
+
eval_by_epoch: true
|
| 33 |
+
# eval_step: 1800
|
| 34 |
+
metric:
|
| 35 |
+
- intent_acc
|
| 36 |
+
- slot_f1
|
| 37 |
+
- EMA
|
| 38 |
+
|
| 39 |
+
tokenizer:
|
| 40 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
|
| 41 |
+
|
| 42 |
+
optimizer:
|
| 43 |
+
_model_target_: torch.optim.Adam
|
| 44 |
+
_model_partial_: true
|
| 45 |
+
lr: 0.001
|
| 46 |
+
weight_decay: 1e-6
|
| 47 |
+
|
| 48 |
+
scheduler:
|
| 49 |
+
_model_target_: transformers.get_scheduler
|
| 50 |
+
_model_partial_: true
|
| 51 |
+
name : "linear"
|
| 52 |
+
num_warmup_steps: 0
|
| 53 |
+
|
| 54 |
+
model:
|
| 55 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
|
config/examples/normal.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLU-test"
|
| 5 |
+
train: True
|
| 6 |
+
test: True
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 128
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/joint_bert
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: atis
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: bert-base-uncased
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: general
|
| 35 |
+
add_special_tokens: true
|
| 36 |
+
|
| 37 |
+
optimizer:
|
| 38 |
+
_model_target_: torch.optim.AdamW
|
| 39 |
+
_model_partial_: true
|
| 40 |
+
lr: 4e-6
|
| 41 |
+
weight_decay: 1e-8
|
| 42 |
+
|
| 43 |
+
scheduler:
|
| 44 |
+
_model_target_: transformers.get_scheduler
|
| 45 |
+
_model_partial_: true
|
| 46 |
+
name : "linear"
|
| 47 |
+
num_warmup_steps: 0
|
| 48 |
+
|
| 49 |
+
model:
|
| 50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 51 |
+
ignore_index: -100
|
| 52 |
+
encoder:
|
| 53 |
+
_model_target_: model.encoder.AutoEncoder
|
| 54 |
+
encoder_name: bert-base-uncased
|
| 55 |
+
output_dim: 768
|
| 56 |
+
return_with_input: true
|
| 57 |
+
return_sentence_level_hidden: true
|
| 58 |
+
|
| 59 |
+
decoder:
|
| 60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 61 |
+
intent_classifier:
|
| 62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 63 |
+
mode: "intent"
|
| 64 |
+
ignore_index: -100
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
slot_classifier:
|
| 68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 69 |
+
mode: "slot"
|
| 70 |
+
ignore_index: -100
|
config/examples/reload_to_train.yaml
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLU-test"
|
| 5 |
+
train: True
|
| 6 |
+
test: True
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 128
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_train_state: True
|
| 14 |
+
load_dir: save/joint_bert
|
| 15 |
+
save_dir: save/joint_bert
|
| 16 |
+
|
| 17 |
+
evaluator:
|
| 18 |
+
best_key: EMA
|
| 19 |
+
eval_by_epoch: true
|
| 20 |
+
# eval_step: 1800
|
| 21 |
+
metric:
|
| 22 |
+
- intent_acc
|
| 23 |
+
- slot_f1
|
| 24 |
+
- EMA
|
| 25 |
+
|
| 26 |
+
accelerator:
|
| 27 |
+
use_accelerator: false
|
| 28 |
+
|
| 29 |
+
dataset:
|
| 30 |
+
dataset_name: atis
|
| 31 |
+
|
| 32 |
+
tokenizer:
|
| 33 |
+
_tokenizer_name_: bert-base-uncased
|
| 34 |
+
_padding_side_: right
|
| 35 |
+
_align_mode_: general
|
| 36 |
+
add_special_tokens: true
|
| 37 |
+
|
| 38 |
+
optimizer:
|
| 39 |
+
_model_target_: torch.optim.AdamW
|
| 40 |
+
_model_partial_: true
|
| 41 |
+
lr: 4e-6
|
| 42 |
+
weight_decay: 1e-8
|
| 43 |
+
|
| 44 |
+
scheduler:
|
| 45 |
+
_model_target_: transformers.get_scheduler
|
| 46 |
+
_model_partial_: true
|
| 47 |
+
name : "linear"
|
| 48 |
+
num_warmup_steps: 0
|
| 49 |
+
|
| 50 |
+
model:
|
| 51 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 52 |
+
ignore_index: -100
|
| 53 |
+
encoder:
|
| 54 |
+
_model_target_: model.encoder.AutoEncoder
|
| 55 |
+
encoder_name: bert-base-uncased
|
| 56 |
+
output_dim: 768
|
| 57 |
+
return_with_input: true
|
| 58 |
+
return_sentence_level_hidden: true
|
| 59 |
+
|
| 60 |
+
decoder:
|
| 61 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 62 |
+
intent_classifier:
|
| 63 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 64 |
+
mode: "intent"
|
| 65 |
+
ignore_index: -100
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
slot_classifier:
|
| 69 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 70 |
+
mode: "slot"
|
| 71 |
+
ignore_index: -100
|
config/reproduction/atis/bi-model.yaml
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/bi-model-atis
|
| 15 |
+
|
| 16 |
+
accelerator:
|
| 17 |
+
use_accelerator: false
|
| 18 |
+
|
| 19 |
+
dataset:
|
| 20 |
+
dataset_name: atis
|
| 21 |
+
|
| 22 |
+
evaluator:
|
| 23 |
+
best_key: EMA
|
| 24 |
+
eval_by_epoch: true
|
| 25 |
+
# eval_step: 1800
|
| 26 |
+
metric:
|
| 27 |
+
- intent_acc
|
| 28 |
+
- slot_f1
|
| 29 |
+
- EMA
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
tokenizer:
|
| 33 |
+
_tokenizer_name_: word_tokenizer
|
| 34 |
+
_padding_side_: right
|
| 35 |
+
_align_mode_: fast
|
| 36 |
+
add_special_tokens: false
|
| 37 |
+
max_length: 512
|
| 38 |
+
|
| 39 |
+
optimizer:
|
| 40 |
+
_model_target_: torch.optim.Adam
|
| 41 |
+
_model_partial_: true
|
| 42 |
+
lr: 0.001
|
| 43 |
+
weight_decay: 1e-6
|
| 44 |
+
|
| 45 |
+
scheduler:
|
| 46 |
+
_model_target_: transformers.get_scheduler
|
| 47 |
+
_model_partial_: true
|
| 48 |
+
name : "linear"
|
| 49 |
+
num_warmup_steps: 0
|
| 50 |
+
|
| 51 |
+
model:
|
| 52 |
+
_model_target_: model.OpenSLUModel
|
| 53 |
+
|
| 54 |
+
encoder:
|
| 55 |
+
_model_target_: model.encoder.BiEncoder
|
| 56 |
+
intent_encoder:
|
| 57 |
+
_model_target_: model.encoder.AutoEncoder
|
| 58 |
+
encoder_name: lstm
|
| 59 |
+
|
| 60 |
+
embedding:
|
| 61 |
+
embedding_dim: 256
|
| 62 |
+
dropout_rate: 0.4
|
| 63 |
+
|
| 64 |
+
lstm:
|
| 65 |
+
dropout_rate: 0.5
|
| 66 |
+
output_dim: 256
|
| 67 |
+
layer_num: 2
|
| 68 |
+
bidirectional: true
|
| 69 |
+
|
| 70 |
+
return_with_input: true
|
| 71 |
+
return_sentence_level_hidden: false
|
| 72 |
+
|
| 73 |
+
slot_encoder:
|
| 74 |
+
_model_target_: model.encoder.AutoEncoder
|
| 75 |
+
encoder_name: lstm
|
| 76 |
+
|
| 77 |
+
embedding:
|
| 78 |
+
embedding_dim: 256
|
| 79 |
+
dropout_rate: 0.4
|
| 80 |
+
|
| 81 |
+
lstm:
|
| 82 |
+
dropout_rate: 0.5
|
| 83 |
+
output_dim: 256
|
| 84 |
+
layer_num: 2
|
| 85 |
+
bidirectional: true
|
| 86 |
+
|
| 87 |
+
return_with_input: true
|
| 88 |
+
return_sentence_level_hidden: false
|
| 89 |
+
|
| 90 |
+
decoder:
|
| 91 |
+
_model_target_: model.decoder.BaseDecoder
|
| 92 |
+
# teacher_forcing: true
|
| 93 |
+
interaction:
|
| 94 |
+
_model_target_: model.decoder.interaction.BiModelInteraction
|
| 95 |
+
output_dim: 256
|
| 96 |
+
dropout_rate: 0.4
|
| 97 |
+
|
| 98 |
+
intent_classifier:
|
| 99 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 100 |
+
mode: "intent"
|
| 101 |
+
ignore_index: -100
|
| 102 |
+
|
| 103 |
+
slot_classifier:
|
| 104 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 105 |
+
mode: "slot"
|
| 106 |
+
ignore_index: -100
|
config/reproduction/atis/dca-net.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla P100-PCIE-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/dca-net-atis
|
| 15 |
+
|
| 16 |
+
accelerator:
|
| 17 |
+
use_accelerator: false
|
| 18 |
+
|
| 19 |
+
dataset:
|
| 20 |
+
dataset_name: atis
|
| 21 |
+
|
| 22 |
+
evaluator:
|
| 23 |
+
best_key: EMA
|
| 24 |
+
eval_by_epoch: true
|
| 25 |
+
# eval_step: 1800
|
| 26 |
+
metric:
|
| 27 |
+
- intent_acc
|
| 28 |
+
- slot_f1
|
| 29 |
+
- EMA
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: word_tokenizer
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: fast
|
| 35 |
+
add_special_tokens: false
|
| 36 |
+
max_length: 512
|
| 37 |
+
|
| 38 |
+
optimizer:
|
| 39 |
+
_model_target_: torch.optim.Adam
|
| 40 |
+
_model_partial_: true
|
| 41 |
+
lr: 0.001
|
| 42 |
+
weight_decay: 1e-6
|
| 43 |
+
|
| 44 |
+
scheduler:
|
| 45 |
+
_model_target_: transformers.get_scheduler
|
| 46 |
+
_model_partial_: true
|
| 47 |
+
name : "linear"
|
| 48 |
+
num_warmup_steps: 0
|
| 49 |
+
|
| 50 |
+
model:
|
| 51 |
+
_model_target_: model.OpenSLUModel
|
| 52 |
+
encoder:
|
| 53 |
+
_model_target_: model.encoder.AutoEncoder
|
| 54 |
+
encoder_name: lstm
|
| 55 |
+
|
| 56 |
+
embedding:
|
| 57 |
+
load_embedding_name: glove.6B.300d.txt
|
| 58 |
+
embedding_dim: 300
|
| 59 |
+
dropout_rate: 0.5
|
| 60 |
+
|
| 61 |
+
lstm:
|
| 62 |
+
dropout_rate: 0.5
|
| 63 |
+
output_dim: 128
|
| 64 |
+
layer_num: 2
|
| 65 |
+
bidirectional: true
|
| 66 |
+
output_dim: "{model.encoder.lstm.output_dim}"
|
| 67 |
+
return_with_input: true
|
| 68 |
+
return_sentence_level_hidden: false
|
| 69 |
+
|
| 70 |
+
decoder:
|
| 71 |
+
_model_target_: model.decoder.DCANetDecoder
|
| 72 |
+
interaction:
|
| 73 |
+
_model_target_: model.decoder.interaction.DCANetInteraction
|
| 74 |
+
output_dim: "{model.encoder.output_dim}"
|
| 75 |
+
attention_dropout: 0.5
|
| 76 |
+
num_attention_heads: 8
|
| 77 |
+
|
| 78 |
+
intent_classifier:
|
| 79 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 80 |
+
mode: "intent"
|
| 81 |
+
input_dim: "{model.encoder.output_dim}"
|
| 82 |
+
ignore_index: -100
|
| 83 |
+
|
| 84 |
+
slot_classifier:
|
| 85 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 86 |
+
mode: "slot"
|
| 87 |
+
input_dim: "{model.encoder.output_dim}"
|
| 88 |
+
ignore_index: -100
|
config/reproduction/atis/deberta.yaml
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 32
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/deberta-atis
|
| 15 |
+
|
| 16 |
+
dataset:
|
| 17 |
+
dataset_name: atis
|
| 18 |
+
|
| 19 |
+
evaluator:
|
| 20 |
+
best_key: EMA
|
| 21 |
+
eval_by_epoch: true
|
| 22 |
+
# eval_step: 1800
|
| 23 |
+
metric:
|
| 24 |
+
- intent_acc
|
| 25 |
+
- slot_f1
|
| 26 |
+
- EMA
|
| 27 |
+
|
| 28 |
+
tokenizer:
|
| 29 |
+
_tokenizer_name_: microsoft/deberta-v3-base
|
| 30 |
+
_padding_side_: right
|
| 31 |
+
add_special_tokens: true
|
| 32 |
+
max_length: 512
|
| 33 |
+
|
| 34 |
+
optimizer:
|
| 35 |
+
_model_target_: torch.optim.AdamW
|
| 36 |
+
_model_partial_: true
|
| 37 |
+
lr: 2e-5
|
| 38 |
+
weight_decay: 1e-8
|
| 39 |
+
|
| 40 |
+
scheduler:
|
| 41 |
+
_model_target_: transformers.get_scheduler
|
| 42 |
+
_model_partial_: true
|
| 43 |
+
name : "linear"
|
| 44 |
+
num_warmup_steps: 0
|
| 45 |
+
|
| 46 |
+
model:
|
| 47 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 48 |
+
ignore_index: -100
|
| 49 |
+
encoder:
|
| 50 |
+
_model_target_: model.encoder.AutoEncoder
|
| 51 |
+
encoder_name: microsoft/deberta-v3-base
|
| 52 |
+
output_dim: 768
|
| 53 |
+
return_with_input: true
|
| 54 |
+
return_sentence_level_hidden: true
|
| 55 |
+
|
| 56 |
+
decoder:
|
| 57 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 58 |
+
intent_classifier:
|
| 59 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 60 |
+
mode: "intent"
|
| 61 |
+
ignore_index: -100
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
slot_classifier:
|
| 65 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 66 |
+
mode: "slot"
|
| 67 |
+
ignore_index: -100
|
config/reproduction/atis/electra.yaml
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: True
|
| 6 |
+
test: True
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 32
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/electra-atis
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
dataset:
|
| 26 |
+
dataset_name: atis
|
| 27 |
+
|
| 28 |
+
tokenizer:
|
| 29 |
+
_tokenizer_name_: google/electra-small-discriminator
|
| 30 |
+
_padding_side_: right
|
| 31 |
+
add_special_tokens: true
|
| 32 |
+
max_length: 512
|
| 33 |
+
|
| 34 |
+
optimizer:
|
| 35 |
+
_model_target_: torch.optim.AdamW
|
| 36 |
+
_model_partial_: true
|
| 37 |
+
lr: 2e-5
|
| 38 |
+
weight_decay: 1e-8
|
| 39 |
+
|
| 40 |
+
scheduler:
|
| 41 |
+
_model_target_: transformers.get_scheduler
|
| 42 |
+
_model_partial_: true
|
| 43 |
+
name : "linear"
|
| 44 |
+
num_warmup_steps: 0
|
| 45 |
+
|
| 46 |
+
model:
|
| 47 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 48 |
+
ignore_index: -100
|
| 49 |
+
encoder:
|
| 50 |
+
_model_target_: model.encoder.AutoEncoder
|
| 51 |
+
encoder_name: google/electra-small-discriminator
|
| 52 |
+
output_dim: 256
|
| 53 |
+
return_with_input: true
|
| 54 |
+
return_sentence_level_hidden: true
|
| 55 |
+
|
| 56 |
+
decoder:
|
| 57 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 58 |
+
intent_classifier:
|
| 59 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 60 |
+
mode: "intent"
|
| 61 |
+
ignore_index: -100
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
slot_classifier:
|
| 65 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 66 |
+
mode: "slot"
|
| 67 |
+
ignore_index: -100
|
config/reproduction/atis/joint-bert.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: True
|
| 6 |
+
test: True
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 128
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/joint-bert-atis
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: atis
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: bert-base-uncased
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: general
|
| 35 |
+
add_special_tokens: true
|
| 36 |
+
|
| 37 |
+
optimizer:
|
| 38 |
+
_model_target_: torch.optim.AdamW
|
| 39 |
+
_model_partial_: true
|
| 40 |
+
lr: 4e-6
|
| 41 |
+
weight_decay: 1e-8
|
| 42 |
+
|
| 43 |
+
scheduler:
|
| 44 |
+
_model_target_: transformers.get_scheduler
|
| 45 |
+
_model_partial_: true
|
| 46 |
+
name : "linear"
|
| 47 |
+
num_warmup_steps: 0
|
| 48 |
+
|
| 49 |
+
model:
|
| 50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 51 |
+
ignore_index: -100
|
| 52 |
+
encoder:
|
| 53 |
+
_model_target_: model.encoder.AutoEncoder
|
| 54 |
+
encoder_name: bert-base-uncased
|
| 55 |
+
output_dim: 768
|
| 56 |
+
return_with_input: true
|
| 57 |
+
return_sentence_level_hidden: true
|
| 58 |
+
|
| 59 |
+
decoder:
|
| 60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 61 |
+
intent_classifier:
|
| 62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 63 |
+
mode: "intent"
|
| 64 |
+
ignore_index: -100
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
slot_classifier:
|
| 68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 69 |
+
mode: "slot"
|
| 70 |
+
ignore_index: -100
|
config/reproduction/atis/roberta.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB" #Useless info
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: True
|
| 6 |
+
test: True
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 32
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/roberta-atis
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: atis
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: roberta-base
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
add_special_tokens: true
|
| 35 |
+
max_length: 512
|
| 36 |
+
|
| 37 |
+
optimizer:
|
| 38 |
+
_model_target_: torch.optim.AdamW
|
| 39 |
+
_model_partial_: true
|
| 40 |
+
lr: 2e-5
|
| 41 |
+
weight_decay: 1e-8
|
| 42 |
+
|
| 43 |
+
scheduler:
|
| 44 |
+
_model_target_: transformers.get_scheduler
|
| 45 |
+
_model_partial_: true
|
| 46 |
+
name : "linear"
|
| 47 |
+
num_warmup_steps: 0
|
| 48 |
+
|
| 49 |
+
model:
|
| 50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 51 |
+
ignore_index: -100
|
| 52 |
+
encoder:
|
| 53 |
+
_model_target_: model.encoder.AutoEncoder
|
| 54 |
+
encoder_name: roberta-base
|
| 55 |
+
output_dim: 768
|
| 56 |
+
return_with_input: true
|
| 57 |
+
return_sentence_level_hidden: true
|
| 58 |
+
|
| 59 |
+
decoder:
|
| 60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 61 |
+
intent_classifier:
|
| 62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 63 |
+
mode: "intent"
|
| 64 |
+
ignore_index: -100
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
slot_classifier:
|
| 68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 69 |
+
mode: "slot"
|
| 70 |
+
ignore_index: -100
|
config/reproduction/atis/slot-gated.yaml
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/slot-gated-atis
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: atis
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: word_tokenizer
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: fast
|
| 35 |
+
add_special_tokens: false
|
| 36 |
+
max_length: 512
|
| 37 |
+
|
| 38 |
+
optimizer:
|
| 39 |
+
_model_target_: torch.optim.Adam
|
| 40 |
+
_model_partial_: true
|
| 41 |
+
lr: 0.001
|
| 42 |
+
weight_decay: 1e-6
|
| 43 |
+
|
| 44 |
+
scheduler:
|
| 45 |
+
_model_target_: transformers.get_scheduler
|
| 46 |
+
_model_partial_: true
|
| 47 |
+
name : "linear"
|
| 48 |
+
num_warmup_steps: 0
|
| 49 |
+
|
| 50 |
+
model:
|
| 51 |
+
_model_target_: model.OpenSLUModel
|
| 52 |
+
ignore_index: -100
|
| 53 |
+
encoder:
|
| 54 |
+
_model_target_: model.encoder.AutoEncoder
|
| 55 |
+
encoder_name: lstm
|
| 56 |
+
|
| 57 |
+
embedding:
|
| 58 |
+
embedding_dim: 256
|
| 59 |
+
dropout_rate: 0.4
|
| 60 |
+
|
| 61 |
+
lstm:
|
| 62 |
+
dropout_rate: 0.5
|
| 63 |
+
output_dim: 256
|
| 64 |
+
layer_num: 2
|
| 65 |
+
bidirectional: true
|
| 66 |
+
|
| 67 |
+
return_with_input: true
|
| 68 |
+
return_sentence_level_hidden: false
|
| 69 |
+
|
| 70 |
+
decoder:
|
| 71 |
+
_model_target_: model.decoder.BaseDecoder
|
| 72 |
+
|
| 73 |
+
interaction:
|
| 74 |
+
_model_target_: model.decoder.interaction.SlotGatedInteraction
|
| 75 |
+
remove_slot_attn: false
|
| 76 |
+
output_dim: 256
|
| 77 |
+
dropout_rate: 0.4
|
| 78 |
+
|
| 79 |
+
intent_classifier:
|
| 80 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 81 |
+
mode: "intent"
|
| 82 |
+
ignore_index: -100
|
| 83 |
+
|
| 84 |
+
slot_classifier:
|
| 85 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 86 |
+
mode: "slot"
|
| 87 |
+
ignore_index: -100
|
config/reproduction/atis/stack-propagation.yaml
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/stack-propagation-atis
|
| 15 |
+
save_mode: save-by-eval # save-by-step
|
| 16 |
+
# save_step: 100
|
| 17 |
+
max_save_num: 1
|
| 18 |
+
|
| 19 |
+
accelerator:
|
| 20 |
+
use_accelerator: false
|
| 21 |
+
|
| 22 |
+
dataset:
|
| 23 |
+
dataset_name: atis
|
| 24 |
+
|
| 25 |
+
evaluator:
|
| 26 |
+
best_key: EMA
|
| 27 |
+
eval_by_epoch: true
|
| 28 |
+
# eval_step: 1800
|
| 29 |
+
metric:
|
| 30 |
+
- intent_acc
|
| 31 |
+
- slot_f1
|
| 32 |
+
- EMA
|
| 33 |
+
|
| 34 |
+
tokenizer:
|
| 35 |
+
_tokenizer_name_: word_tokenizer
|
| 36 |
+
_padding_side_: right
|
| 37 |
+
_align_mode_: fast
|
| 38 |
+
_to_lower_case_: true
|
| 39 |
+
add_special_tokens: false
|
| 40 |
+
max_length: 512
|
| 41 |
+
|
| 42 |
+
optimizer:
|
| 43 |
+
_model_target_: torch.optim.Adam
|
| 44 |
+
_model_partial_: true
|
| 45 |
+
lr: 0.001
|
| 46 |
+
weight_decay: 1e-6
|
| 47 |
+
|
| 48 |
+
scheduler:
|
| 49 |
+
_model_target_: transformers.get_scheduler
|
| 50 |
+
_model_partial_: true
|
| 51 |
+
name : "linear"
|
| 52 |
+
num_warmup_steps: 0
|
| 53 |
+
|
| 54 |
+
model:
|
| 55 |
+
_model_target_: model.OpenSLUModel
|
| 56 |
+
|
| 57 |
+
encoder:
|
| 58 |
+
_model_target_: model.encoder.AutoEncoder
|
| 59 |
+
encoder_name: self-attention-lstm
|
| 60 |
+
|
| 61 |
+
embedding:
|
| 62 |
+
embedding_dim: 256
|
| 63 |
+
dropout_rate: 0.55
|
| 64 |
+
|
| 65 |
+
lstm:
|
| 66 |
+
layer_num: 1
|
| 67 |
+
bidirectional: true
|
| 68 |
+
output_dim: 256
|
| 69 |
+
dropout_rate: 0.5
|
| 70 |
+
|
| 71 |
+
attention:
|
| 72 |
+
hidden_dim: 1024
|
| 73 |
+
output_dim: 128
|
| 74 |
+
dropout_rate: 0.6
|
| 75 |
+
|
| 76 |
+
return_with_input: true
|
| 77 |
+
return_sentence_level_hidden: false
|
| 78 |
+
|
| 79 |
+
decoder:
|
| 80 |
+
_model_target_: model.decoder.StackPropagationDecoder
|
| 81 |
+
interaction:
|
| 82 |
+
_model_target_: model.decoder.interaction.StackInteraction
|
| 83 |
+
differentiable: false
|
| 84 |
+
|
| 85 |
+
intent_classifier:
|
| 86 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 87 |
+
layer_num: 1
|
| 88 |
+
bidirectional: false
|
| 89 |
+
force_ratio: 0.9
|
| 90 |
+
hidden_dim: 64
|
| 91 |
+
embedding_dim: 8
|
| 92 |
+
ignore_index: -100
|
| 93 |
+
dropout_rate: 0.5
|
| 94 |
+
mode: "token-level-intent"
|
| 95 |
+
use_multi: false
|
| 96 |
+
return_sentence_level: true
|
| 97 |
+
|
| 98 |
+
slot_classifier:
|
| 99 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 100 |
+
layer_num: 1
|
| 101 |
+
bidirectional: false
|
| 102 |
+
force_ratio: 0.9
|
| 103 |
+
hidden_dim: 64
|
| 104 |
+
embedding_dim: 32
|
| 105 |
+
ignore_index: -100
|
| 106 |
+
dropout_rate: 0.55
|
| 107 |
+
mode: "slot"
|
| 108 |
+
use_multi: false
|
| 109 |
+
return_sentence_level: false
|
config/reproduction/mix-atis/agif.yaml
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 3080"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
multi_intent: true
|
| 6 |
+
train: true
|
| 7 |
+
test: true
|
| 8 |
+
device: cuda
|
| 9 |
+
seed: 42
|
| 10 |
+
epoch_num: 100
|
| 11 |
+
batch_size: 32
|
| 12 |
+
ignore_index: -100
|
| 13 |
+
|
| 14 |
+
model_manager:
|
| 15 |
+
load_dir: null
|
| 16 |
+
save_dir: save/agif-mix-atis
|
| 17 |
+
|
| 18 |
+
accelerator:
|
| 19 |
+
use_accelerator: false
|
| 20 |
+
|
| 21 |
+
dataset:
|
| 22 |
+
dataset_name: mix-atis
|
| 23 |
+
|
| 24 |
+
evaluator:
|
| 25 |
+
best_key: EMA
|
| 26 |
+
eval_by_epoch: true
|
| 27 |
+
# eval_step: 1800
|
| 28 |
+
metric:
|
| 29 |
+
- intent_acc
|
| 30 |
+
- intent_f1
|
| 31 |
+
- slot_f1
|
| 32 |
+
- EMA
|
| 33 |
+
|
| 34 |
+
tokenizer:
|
| 35 |
+
_tokenizer_name_: word_tokenizer
|
| 36 |
+
_padding_side_: right
|
| 37 |
+
_align_mode_: fast
|
| 38 |
+
add_special_tokens: false
|
| 39 |
+
max_length: 512
|
| 40 |
+
|
| 41 |
+
optimizer:
|
| 42 |
+
_model_target_: torch.optim.Adam
|
| 43 |
+
_model_partial_: true
|
| 44 |
+
lr: 0.001
|
| 45 |
+
weight_decay: 1e-6
|
| 46 |
+
|
| 47 |
+
scheduler:
|
| 48 |
+
_model_target_: transformers.get_scheduler
|
| 49 |
+
_model_partial_: true
|
| 50 |
+
name : "linear"
|
| 51 |
+
num_warmup_steps: 0
|
| 52 |
+
|
| 53 |
+
model:
|
| 54 |
+
_model_target_: model.OpenSLUModel
|
| 55 |
+
|
| 56 |
+
encoder:
|
| 57 |
+
_model_target_: model.encoder.AutoEncoder
|
| 58 |
+
encoder_name: self-attention-lstm
|
| 59 |
+
|
| 60 |
+
embedding:
|
| 61 |
+
embedding_dim: 128
|
| 62 |
+
dropout_rate: 0.4
|
| 63 |
+
|
| 64 |
+
lstm:
|
| 65 |
+
layer_num: 1
|
| 66 |
+
bidirectional: true
|
| 67 |
+
output_dim: 256
|
| 68 |
+
dropout_rate: 0.4
|
| 69 |
+
|
| 70 |
+
attention:
|
| 71 |
+
hidden_dim: 1024
|
| 72 |
+
output_dim: 128
|
| 73 |
+
dropout_rate: 0.4
|
| 74 |
+
|
| 75 |
+
unflat_attention:
|
| 76 |
+
dropout_rate: 0.4
|
| 77 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
| 78 |
+
return_with_input: true
|
| 79 |
+
return_sentence_level_hidden: true
|
| 80 |
+
|
| 81 |
+
decoder:
|
| 82 |
+
_model_target_: model.decoder.AGIFDecoder
|
| 83 |
+
# teacher_forcing: true
|
| 84 |
+
interaction:
|
| 85 |
+
_model_target_: model.decoder.interaction.AGIFInteraction
|
| 86 |
+
intent_embedding_dim: 128
|
| 87 |
+
input_dim: "{model.encoder.output_dim}"
|
| 88 |
+
hidden_dim: 128
|
| 89 |
+
output_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
| 90 |
+
dropout_rate: 0.4
|
| 91 |
+
alpha: 0.2
|
| 92 |
+
num_heads: 4
|
| 93 |
+
num_layers: 2
|
| 94 |
+
row_normalized: true
|
| 95 |
+
|
| 96 |
+
intent_classifier:
|
| 97 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
| 98 |
+
mode: "intent"
|
| 99 |
+
mlp:
|
| 100 |
+
- _model_target_: torch.nn.Linear
|
| 101 |
+
in_features: "{model.encoder.output_dim}"
|
| 102 |
+
out_features: 256
|
| 103 |
+
- _model_target_: torch.nn.LeakyReLU
|
| 104 |
+
negative_slope: 0.2
|
| 105 |
+
- _model_target_: torch.nn.Linear
|
| 106 |
+
in_features: 256
|
| 107 |
+
out_features: "{base.intent_label_num}"
|
| 108 |
+
dropout_rate: 0.4
|
| 109 |
+
loss_fn:
|
| 110 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
| 111 |
+
use_multi: "{base.multi_intent}"
|
| 112 |
+
multi_threshold: 0.5
|
| 113 |
+
return_sentence_level: true
|
| 114 |
+
ignore_index: -100
|
| 115 |
+
weight: 0.3
|
| 116 |
+
|
| 117 |
+
slot_classifier:
|
| 118 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 119 |
+
mode: "slot"
|
| 120 |
+
input_dim: "{model.encoder.output_dim}"
|
| 121 |
+
layer_num: 1
|
| 122 |
+
bidirectional: false
|
| 123 |
+
force_ratio: 0.9
|
| 124 |
+
hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
| 125 |
+
embedding_dim: 128
|
| 126 |
+
# loss_fn:
|
| 127 |
+
# _model_target_: torch.nn.NLLLoss
|
| 128 |
+
ignore_index: -100
|
| 129 |
+
dropout_rate: 0.4
|
| 130 |
+
use_multi: false
|
| 131 |
+
multi_threshold: 0.5
|
| 132 |
+
return_sentence_level: false
|
| 133 |
+
weight: 0.7
|
config/reproduction/mix-atis/gl-gin.yaml
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
multi_intent: true
|
| 6 |
+
train: true
|
| 7 |
+
test: true
|
| 8 |
+
device: cuda
|
| 9 |
+
seed: 42
|
| 10 |
+
epoch_num: 300
|
| 11 |
+
batch_size: 32
|
| 12 |
+
ignore_index: -100
|
| 13 |
+
|
| 14 |
+
model_manager:
|
| 15 |
+
load_dir: null
|
| 16 |
+
save_dir: save/gl-gin-mix-atis
|
| 17 |
+
|
| 18 |
+
evaluator:
|
| 19 |
+
best_key: EMA
|
| 20 |
+
eval_by_epoch: true
|
| 21 |
+
# eval_step: 1800
|
| 22 |
+
metric:
|
| 23 |
+
- intent_acc
|
| 24 |
+
- intent_f1
|
| 25 |
+
- slot_f1
|
| 26 |
+
- EMA
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: mix-atis
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: word_tokenizer
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: fast
|
| 35 |
+
add_special_tokens: false
|
| 36 |
+
max_length: 512
|
| 37 |
+
|
| 38 |
+
optimizer:
|
| 39 |
+
_model_target_: torch.optim.Adam
|
| 40 |
+
_model_partial_: true
|
| 41 |
+
lr: 0.001
|
| 42 |
+
weight_decay: 1e-6
|
| 43 |
+
|
| 44 |
+
scheduler:
|
| 45 |
+
_model_target_: transformers.get_scheduler
|
| 46 |
+
_model_partial_: true
|
| 47 |
+
name : "linear"
|
| 48 |
+
num_warmup_steps: 0
|
| 49 |
+
|
| 50 |
+
model:
|
| 51 |
+
_model_target_: model.OpenSLUModel
|
| 52 |
+
|
| 53 |
+
encoder:
|
| 54 |
+
_model_target_: model.encoder.AutoEncoder
|
| 55 |
+
encoder_name: self-attention-lstm
|
| 56 |
+
|
| 57 |
+
embedding:
|
| 58 |
+
embedding_dim: 128
|
| 59 |
+
dropout_rate: 0.4
|
| 60 |
+
|
| 61 |
+
lstm:
|
| 62 |
+
layer_num: 1
|
| 63 |
+
bidirectional: true
|
| 64 |
+
output_dim: 256
|
| 65 |
+
dropout_rate: 0.4
|
| 66 |
+
|
| 67 |
+
attention:
|
| 68 |
+
hidden_dim: 1024
|
| 69 |
+
output_dim: 128
|
| 70 |
+
dropout_rate: 0.4
|
| 71 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
| 72 |
+
return_with_input: true
|
| 73 |
+
return_sentence_level_hidden: false
|
| 74 |
+
|
| 75 |
+
decoder:
|
| 76 |
+
_model_target_: model.decoder.GLGINDecoder
|
| 77 |
+
dropout_rate: 0.4
|
| 78 |
+
interaction:
|
| 79 |
+
_model_target_: model.decoder.interaction.GLGINInteraction
|
| 80 |
+
intent_embedding_dim: 64
|
| 81 |
+
input_dim: "{model.encoder.output_dim}"
|
| 82 |
+
hidden_dim: 256
|
| 83 |
+
output_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
| 84 |
+
dropout_rate: 0.4
|
| 85 |
+
alpha: 0.2
|
| 86 |
+
num_heads: 8
|
| 87 |
+
num_layers: 2
|
| 88 |
+
row_normalized: true
|
| 89 |
+
slot_graph_window: 1
|
| 90 |
+
intent_label_num: "{base.intent_label_num}"
|
| 91 |
+
|
| 92 |
+
intent_classifier:
|
| 93 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
| 94 |
+
mode: "token-level-intent"
|
| 95 |
+
mlp:
|
| 96 |
+
- _model_target_: torch.nn.Linear
|
| 97 |
+
in_features: "{model.encoder.output_dim}"
|
| 98 |
+
out_features: 256
|
| 99 |
+
- _model_target_: torch.nn.LeakyReLU
|
| 100 |
+
negative_slope: 0.2
|
| 101 |
+
- _model_target_: torch.nn.Linear
|
| 102 |
+
in_features: 256
|
| 103 |
+
out_features: "{base.intent_label_num}"
|
| 104 |
+
loss_fn:
|
| 105 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
| 106 |
+
dropout_rate: 0.4
|
| 107 |
+
use_multi: "{base.multi_intent}"
|
| 108 |
+
multi_threshold: 0.5
|
| 109 |
+
return_sentence_level: true
|
| 110 |
+
ignore_index: "{base.ignore_index}"
|
| 111 |
+
|
| 112 |
+
slot_classifier:
|
| 113 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
| 114 |
+
mode: "slot"
|
| 115 |
+
mlp:
|
| 116 |
+
- _model_target_: torch.nn.Linear
|
| 117 |
+
in_features: "{model.decoder.interaction.output_dim}"
|
| 118 |
+
out_features: "{model.decoder.interaction.output_dim}"
|
| 119 |
+
- _model_target_: torch.nn.LeakyReLU
|
| 120 |
+
negative_slope: 0.2
|
| 121 |
+
- _model_target_: torch.nn.Linear
|
| 122 |
+
in_features: "{model.decoder.interaction.output_dim}"
|
| 123 |
+
out_features: "{base.slot_label_num}"
|
| 124 |
+
ignore_index: "{base.ignore_index}"
|
| 125 |
+
dropout_rate: 0.4
|
| 126 |
+
use_multi: false
|
| 127 |
+
multi_threshold: 0.5
|
| 128 |
+
return_sentence_level: false
|
config/reproduction/mix-atis/vanilla.yaml
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base:
|
| 2 |
+
name: "OpenSLUv1"
|
| 3 |
+
multi_intent: true
|
| 4 |
+
train: true
|
| 5 |
+
test: true
|
| 6 |
+
device: cuda
|
| 7 |
+
seed: 42
|
| 8 |
+
epoch_num: 100
|
| 9 |
+
batch_size: 16
|
| 10 |
+
ignore_index: -100
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/vanilla-mix-atis
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- intent_f1
|
| 23 |
+
- slot_f1
|
| 24 |
+
- EMA
|
| 25 |
+
|
| 26 |
+
dataset:
|
| 27 |
+
dataset_name: atis
|
| 28 |
+
|
| 29 |
+
tokenizer:
|
| 30 |
+
_tokenizer_name_: word_tokenizer
|
| 31 |
+
_padding_side_: right
|
| 32 |
+
_align_mode_: fast
|
| 33 |
+
add_special_tokens: false
|
| 34 |
+
max_length: 512
|
| 35 |
+
|
| 36 |
+
optimizer:
|
| 37 |
+
_model_target_: torch.optim.Adam
|
| 38 |
+
_model_partial_: true
|
| 39 |
+
lr: 0.001
|
| 40 |
+
weight_decay: 1e-6
|
| 41 |
+
|
| 42 |
+
scheduler:
|
| 43 |
+
_model_target_: transformers.get_scheduler
|
| 44 |
+
_model_partial_: true
|
| 45 |
+
name : "linear"
|
| 46 |
+
num_warmup_steps: 0
|
| 47 |
+
|
| 48 |
+
model:
|
| 49 |
+
_model_target_: model.OpenSLUModel
|
| 50 |
+
|
| 51 |
+
encoder:
|
| 52 |
+
_model_target_: model.encoder.AutoEncoder
|
| 53 |
+
encoder_name: self-attention-lstm
|
| 54 |
+
|
| 55 |
+
embedding:
|
| 56 |
+
embedding_dim: 128
|
| 57 |
+
dropout_rate: 0.4
|
| 58 |
+
|
| 59 |
+
lstm:
|
| 60 |
+
layer_num: 1
|
| 61 |
+
bidirectional: true
|
| 62 |
+
output_dim: 256
|
| 63 |
+
dropout_rate: 0.4
|
| 64 |
+
|
| 65 |
+
attention:
|
| 66 |
+
hidden_dim: 1024
|
| 67 |
+
output_dim: 128
|
| 68 |
+
dropout_rate: 0.4
|
| 69 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
| 70 |
+
return_with_input: true
|
| 71 |
+
return_sentence_level_hidden: true
|
| 72 |
+
|
| 73 |
+
decoder:
|
| 74 |
+
_model_target_: model.decoder.BaseDecoder
|
| 75 |
+
|
| 76 |
+
intent_classifier:
|
| 77 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 78 |
+
mode: "intent"
|
| 79 |
+
input_dim: "{model.encoder.output_dim}"
|
| 80 |
+
loss_fn:
|
| 81 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
| 82 |
+
use_multi: "{base.multi_intent}"
|
| 83 |
+
multi_threshold: 0.5
|
| 84 |
+
return_sentence_level: true
|
| 85 |
+
ignore_index: "{base.ignore_index}"
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
slot_classifier:
|
| 89 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 90 |
+
mode: "slot"
|
| 91 |
+
input_dim: "{model.encoder.output_dim}"
|
| 92 |
+
use_multi: false
|
| 93 |
+
multi_threshold: 0.5
|
| 94 |
+
ignore_index: "{base.ignore_index}"
|
| 95 |
+
return_sentence_level: false
|
config/reproduction/mix-snips/agif.yaml
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla P100-PCIE-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
multi_intent: true
|
| 6 |
+
train: true
|
| 7 |
+
test: true
|
| 8 |
+
device: cuda
|
| 9 |
+
seed: 42
|
| 10 |
+
epoch_num: 50
|
| 11 |
+
batch_size: 64
|
| 12 |
+
ignore_index: -100
|
| 13 |
+
|
| 14 |
+
model_manager:
|
| 15 |
+
load_dir: null
|
| 16 |
+
save_dir: save/agif-mix-snips
|
| 17 |
+
|
| 18 |
+
evaluator:
|
| 19 |
+
best_key: EMA
|
| 20 |
+
eval_by_epoch: true
|
| 21 |
+
# eval_step: 1800
|
| 22 |
+
metric:
|
| 23 |
+
- intent_acc
|
| 24 |
+
- intent_f1
|
| 25 |
+
- slot_f1
|
| 26 |
+
- EMA
|
| 27 |
+
|
| 28 |
+
accelerator:
|
| 29 |
+
use_accelerator: false
|
| 30 |
+
|
| 31 |
+
dataset:
|
| 32 |
+
dataset_name: mix-snips
|
| 33 |
+
|
| 34 |
+
tokenizer:
|
| 35 |
+
_tokenizer_name_: word_tokenizer
|
| 36 |
+
_padding_side_: right
|
| 37 |
+
_align_mode_: fast
|
| 38 |
+
add_special_tokens: false
|
| 39 |
+
max_length: 512
|
| 40 |
+
|
| 41 |
+
optimizer:
|
| 42 |
+
_model_target_: torch.optim.Adam
|
| 43 |
+
_model_partial_: true
|
| 44 |
+
lr: 0.001
|
| 45 |
+
weight_decay: 1e-6
|
| 46 |
+
|
| 47 |
+
scheduler:
|
| 48 |
+
_model_target_: transformers.get_scheduler
|
| 49 |
+
_model_partial_: true
|
| 50 |
+
name : "linear"
|
| 51 |
+
num_warmup_steps: 0
|
| 52 |
+
|
| 53 |
+
model:
|
| 54 |
+
_model_target_: model.OpenSLUModel
|
| 55 |
+
|
| 56 |
+
encoder:
|
| 57 |
+
_model_target_: model.encoder.AutoEncoder
|
| 58 |
+
encoder_name: self-attention-lstm
|
| 59 |
+
|
| 60 |
+
embedding:
|
| 61 |
+
embedding_dim: 128
|
| 62 |
+
dropout_rate: 0.4
|
| 63 |
+
|
| 64 |
+
lstm:
|
| 65 |
+
layer_num: 1
|
| 66 |
+
bidirectional: true
|
| 67 |
+
output_dim: 256
|
| 68 |
+
dropout_rate: 0.4
|
| 69 |
+
|
| 70 |
+
attention:
|
| 71 |
+
hidden_dim: 1024
|
| 72 |
+
output_dim: 128
|
| 73 |
+
dropout_rate: 0.4
|
| 74 |
+
|
| 75 |
+
unflat_attention:
|
| 76 |
+
dropout_rate: 0.4
|
| 77 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
| 78 |
+
return_with_input: true
|
| 79 |
+
return_sentence_level_hidden: true
|
| 80 |
+
|
| 81 |
+
decoder:
|
| 82 |
+
_model_target_: model.decoder.AGIFDecoder
|
| 83 |
+
# teacher_forcing: true
|
| 84 |
+
interaction:
|
| 85 |
+
_model_target_: model.decoder.interaction.AGIFInteraction
|
| 86 |
+
intent_embedding_dim: 128
|
| 87 |
+
input_dim: "{model.encoder.output_dim}"
|
| 88 |
+
hidden_dim: 128
|
| 89 |
+
output_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
| 90 |
+
dropout_rate: 0.4
|
| 91 |
+
alpha: 0.2
|
| 92 |
+
num_heads: 4
|
| 93 |
+
num_layers: 2
|
| 94 |
+
row_normalized: true
|
| 95 |
+
|
| 96 |
+
intent_classifier:
|
| 97 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
| 98 |
+
mode: "intent"
|
| 99 |
+
mlp:
|
| 100 |
+
- _model_target_: torch.nn.Linear
|
| 101 |
+
in_features: "{model.encoder.output_dim}"
|
| 102 |
+
out_features: 256
|
| 103 |
+
- _model_target_: torch.nn.LeakyReLU
|
| 104 |
+
negative_slope: 0.2
|
| 105 |
+
- _model_target_: torch.nn.Linear
|
| 106 |
+
in_features: 256
|
| 107 |
+
out_features: "{base.intent_label_num}"
|
| 108 |
+
dropout_rate: 0.4
|
| 109 |
+
loss_fn:
|
| 110 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
| 111 |
+
use_multi: "{base.multi_intent}"
|
| 112 |
+
multi_threshold: 0.5
|
| 113 |
+
return_sentence_level: true
|
| 114 |
+
ignore_index: -100
|
| 115 |
+
weight: 0.3
|
| 116 |
+
|
| 117 |
+
slot_classifier:
|
| 118 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 119 |
+
mode: "slot"
|
| 120 |
+
input_dim: "{model.encoder.output_dim}"
|
| 121 |
+
layer_num: 1
|
| 122 |
+
bidirectional: false
|
| 123 |
+
force_ratio: 0.9
|
| 124 |
+
hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
| 125 |
+
embedding_dim: 128
|
| 126 |
+
ignore_index: -100
|
| 127 |
+
dropout_rate: 0.4
|
| 128 |
+
use_multi: false
|
| 129 |
+
multi_threshold: 0.5
|
| 130 |
+
return_sentence_level: false
|
| 131 |
+
weight: 0.7
|
config/reproduction/mix-snips/gl-gin.yaml
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
multi_intent: true
|
| 6 |
+
train: true
|
| 7 |
+
test: true
|
| 8 |
+
device: cuda
|
| 9 |
+
seed: 42
|
| 10 |
+
epoch_num: 50
|
| 11 |
+
batch_size: 32
|
| 12 |
+
ignore_index: -100
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
model_manager:
|
| 16 |
+
load_dir: null
|
| 17 |
+
save_dir: save/gl-gin-mix-snips
|
| 18 |
+
|
| 19 |
+
evaluator:
|
| 20 |
+
best_key: EMA
|
| 21 |
+
eval_by_epoch: false
|
| 22 |
+
eval_step: 1800
|
| 23 |
+
metric:
|
| 24 |
+
- intent_acc
|
| 25 |
+
- intent_f1
|
| 26 |
+
- slot_f1
|
| 27 |
+
- EMA
|
| 28 |
+
|
| 29 |
+
dataset:
|
| 30 |
+
dataset_name: mix-snips
|
| 31 |
+
|
| 32 |
+
tokenizer:
|
| 33 |
+
_tokenizer_name_: word_tokenizer
|
| 34 |
+
_padding_side_: right
|
| 35 |
+
_align_mode_: fast
|
| 36 |
+
add_special_tokens: false
|
| 37 |
+
max_length: 512
|
| 38 |
+
|
| 39 |
+
optimizer:
|
| 40 |
+
_model_target_: torch.optim.Adam
|
| 41 |
+
_model_partial_: true
|
| 42 |
+
lr: 0.001
|
| 43 |
+
weight_decay: 1e-6
|
| 44 |
+
|
| 45 |
+
scheduler:
|
| 46 |
+
_model_target_: transformers.get_scheduler
|
| 47 |
+
_model_partial_: true
|
| 48 |
+
name : "linear"
|
| 49 |
+
num_warmup_steps: 0
|
| 50 |
+
|
| 51 |
+
model:
|
| 52 |
+
_model_target_: model.OpenSLUModel
|
| 53 |
+
|
| 54 |
+
encoder:
|
| 55 |
+
_model_target_: model.encoder.AutoEncoder
|
| 56 |
+
encoder_name: self-attention-lstm
|
| 57 |
+
|
| 58 |
+
embedding:
|
| 59 |
+
embedding_dim: 128
|
| 60 |
+
dropout_rate: 0.4
|
| 61 |
+
|
| 62 |
+
lstm:
|
| 63 |
+
layer_num: 2
|
| 64 |
+
bidirectional: true
|
| 65 |
+
output_dim: 256
|
| 66 |
+
dropout_rate: 0.4
|
| 67 |
+
|
| 68 |
+
attention:
|
| 69 |
+
hidden_dim: 1024
|
| 70 |
+
output_dim: 128
|
| 71 |
+
dropout_rate: 0.4
|
| 72 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
| 73 |
+
return_with_input: true
|
| 74 |
+
return_sentence_level_hidden: false
|
| 75 |
+
|
| 76 |
+
decoder:
|
| 77 |
+
_model_target_: model.decoder.GLGINDecoder
|
| 78 |
+
dropout_rate: 0.4
|
| 79 |
+
interaction:
|
| 80 |
+
_model_target_: model.decoder.interaction.GLGINInteraction
|
| 81 |
+
intent_embedding_dim: 256
|
| 82 |
+
input_dim: "{model.encoder.output_dim}"
|
| 83 |
+
hidden_dim: 256
|
| 84 |
+
output_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
| 85 |
+
dropout_rate: 0.4
|
| 86 |
+
alpha: 0.2
|
| 87 |
+
num_heads: 4
|
| 88 |
+
num_layers: 2
|
| 89 |
+
row_normalized: true
|
| 90 |
+
slot_graph_window: 1
|
| 91 |
+
intent_label_num: "{base.intent_label_num}"
|
| 92 |
+
|
| 93 |
+
intent_classifier:
|
| 94 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
| 95 |
+
mode: "token-level-intent"
|
| 96 |
+
mlp:
|
| 97 |
+
- _model_target_: torch.nn.Linear
|
| 98 |
+
in_features: "{model.encoder.output_dim}"
|
| 99 |
+
out_features: 256
|
| 100 |
+
- _model_target_: torch.nn.LeakyReLU
|
| 101 |
+
negative_slope: 0.2
|
| 102 |
+
- _model_target_: torch.nn.Linear
|
| 103 |
+
in_features: 256
|
| 104 |
+
out_features: "{base.intent_label_num}"
|
| 105 |
+
loss_fn:
|
| 106 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
| 107 |
+
dropout_rate: 0.4
|
| 108 |
+
use_multi: "{base.multi_intent}"
|
| 109 |
+
multi_threshold: 0.5
|
| 110 |
+
return_sentence_level: true
|
| 111 |
+
ignore_index: "{base.ignore_index}"
|
| 112 |
+
weight: 0.2
|
| 113 |
+
|
| 114 |
+
slot_classifier:
|
| 115 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
| 116 |
+
mode: "slot"
|
| 117 |
+
mlp:
|
| 118 |
+
- _model_target_: torch.nn.Linear
|
| 119 |
+
in_features: "{model.decoder.interaction.output_dim}"
|
| 120 |
+
out_features: "{model.decoder.interaction.output_dim}"
|
| 121 |
+
- _model_target_: torch.nn.LeakyReLU
|
| 122 |
+
negative_slope: 0.2
|
| 123 |
+
- _model_target_: torch.nn.Linear
|
| 124 |
+
in_features: "{model.decoder.interaction.output_dim}"
|
| 125 |
+
out_features: "{base.slot_label_num}"
|
| 126 |
+
ignore_index: "{base.ignore_index}"
|
| 127 |
+
dropout_rate: 0.4
|
| 128 |
+
use_multi: false
|
| 129 |
+
multi_threshold: 0.5
|
| 130 |
+
weight: 0.8
|
| 131 |
+
return_sentence_level: false
|
config/reproduction/mix-snips/vanilla.yaml
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base:
|
| 2 |
+
name: "OpenSLUv1"
|
| 3 |
+
multi_intent: true
|
| 4 |
+
train: true
|
| 5 |
+
test: true
|
| 6 |
+
device: cuda
|
| 7 |
+
seed: 42
|
| 8 |
+
epoch_num: 100
|
| 9 |
+
batch_size: 16
|
| 10 |
+
ignore_index: -100
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/vanilla-mix-snips
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- intent_f1
|
| 23 |
+
- slot_f1
|
| 24 |
+
- EMA
|
| 25 |
+
|
| 26 |
+
dataset:
|
| 27 |
+
dataset_name: atis
|
| 28 |
+
|
| 29 |
+
tokenizer:
|
| 30 |
+
_tokenizer_name_: word_tokenizer
|
| 31 |
+
_padding_side_: right
|
| 32 |
+
_align_mode_: fast
|
| 33 |
+
add_special_tokens: false
|
| 34 |
+
max_length: 512
|
| 35 |
+
|
| 36 |
+
optimizer:
|
| 37 |
+
_model_target_: torch.optim.Adam
|
| 38 |
+
_model_partial_: true
|
| 39 |
+
lr: 0.001
|
| 40 |
+
weight_decay: 1e-6
|
| 41 |
+
|
| 42 |
+
scheduler:
|
| 43 |
+
_model_target_: transformers.get_scheduler
|
| 44 |
+
_model_partial_: true
|
| 45 |
+
name : "linear"
|
| 46 |
+
num_warmup_steps: 0
|
| 47 |
+
|
| 48 |
+
model:
|
| 49 |
+
_model_target_: model.OpenSLUModel
|
| 50 |
+
|
| 51 |
+
encoder:
|
| 52 |
+
_model_target_: model.encoder.AutoEncoder
|
| 53 |
+
encoder_name: self-attention-lstm
|
| 54 |
+
|
| 55 |
+
embedding:
|
| 56 |
+
embedding_dim: 128
|
| 57 |
+
dropout_rate: 0.4
|
| 58 |
+
|
| 59 |
+
lstm:
|
| 60 |
+
layer_num: 1
|
| 61 |
+
bidirectional: true
|
| 62 |
+
output_dim: 256
|
| 63 |
+
dropout_rate: 0.4
|
| 64 |
+
|
| 65 |
+
attention:
|
| 66 |
+
hidden_dim: 1024
|
| 67 |
+
output_dim: 128
|
| 68 |
+
dropout_rate: 0.4
|
| 69 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
| 70 |
+
return_with_input: true
|
| 71 |
+
return_sentence_level_hidden: true
|
| 72 |
+
|
| 73 |
+
decoder:
|
| 74 |
+
_model_target_: model.decoder.BaseDecoder
|
| 75 |
+
|
| 76 |
+
intent_classifier:
|
| 77 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 78 |
+
mode: "intent"
|
| 79 |
+
input_dim: "{model.encoder.output_dim}"
|
| 80 |
+
loss_fn:
|
| 81 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
| 82 |
+
use_multi: "{base.multi_intent}"
|
| 83 |
+
multi_threshold: 0.5
|
| 84 |
+
return_sentence_level: true
|
| 85 |
+
ignore_index: "{base.ignore_index}"
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
slot_classifier:
|
| 89 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 90 |
+
mode: "slot"
|
| 91 |
+
input_dim: "{model.encoder.output_dim}"
|
| 92 |
+
use_multi: false
|
| 93 |
+
multi_threshold: 0.5
|
| 94 |
+
ignore_index: "{base.ignore_index}"
|
| 95 |
+
return_sentence_level: false
|
config/reproduction/snips/bi-model.yaml
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/bi-model-snips
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: snips
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: word_tokenizer
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: fast
|
| 35 |
+
add_special_tokens: false
|
| 36 |
+
max_length: 512
|
| 37 |
+
|
| 38 |
+
optimizer:
|
| 39 |
+
_model_target_: torch.optim.Adam
|
| 40 |
+
_model_partial_: true
|
| 41 |
+
lr: 0.001
|
| 42 |
+
weight_decay: 1e-6
|
| 43 |
+
|
| 44 |
+
scheduler:
|
| 45 |
+
_model_target_: transformers.get_scheduler
|
| 46 |
+
_model_partial_: true
|
| 47 |
+
name : "linear"
|
| 48 |
+
num_warmup_steps: 0
|
| 49 |
+
|
| 50 |
+
model:
|
| 51 |
+
_model_target_: model.OpenSLUModel
|
| 52 |
+
|
| 53 |
+
encoder:
|
| 54 |
+
_model_target_: model.encoder.BiEncoder
|
| 55 |
+
intent_encoder:
|
| 56 |
+
_model_target_: model.encoder.AutoEncoder
|
| 57 |
+
encoder_name: lstm
|
| 58 |
+
|
| 59 |
+
embedding:
|
| 60 |
+
embedding_dim: 256
|
| 61 |
+
dropout_rate: 0.5
|
| 62 |
+
|
| 63 |
+
lstm:
|
| 64 |
+
dropout_rate: 0.5
|
| 65 |
+
output_dim: 256
|
| 66 |
+
layer_num: 2
|
| 67 |
+
bidirectional: true
|
| 68 |
+
|
| 69 |
+
return_with_input: true
|
| 70 |
+
return_sentence_level_hidden: false
|
| 71 |
+
|
| 72 |
+
slot_encoder:
|
| 73 |
+
_model_target_: model.encoder.AutoEncoder
|
| 74 |
+
encoder_name: lstm
|
| 75 |
+
|
| 76 |
+
embedding:
|
| 77 |
+
embedding_dim: 256
|
| 78 |
+
dropout_rate: 0.5
|
| 79 |
+
|
| 80 |
+
lstm:
|
| 81 |
+
dropout_rate: 0.5
|
| 82 |
+
output_dim: 256
|
| 83 |
+
layer_num: 2
|
| 84 |
+
bidirectional: true
|
| 85 |
+
|
| 86 |
+
return_with_input: true
|
| 87 |
+
return_sentence_level_hidden: false
|
| 88 |
+
|
| 89 |
+
decoder:
|
| 90 |
+
_model_target_: model.decoder.BaseDecoder
|
| 91 |
+
interaction:
|
| 92 |
+
_model_target_: model.decoder.interaction.BiModelInteraction
|
| 93 |
+
output_dim: 256
|
| 94 |
+
dropout_rate: 0.5
|
| 95 |
+
|
| 96 |
+
intent_classifier:
|
| 97 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 98 |
+
mode: "intent"
|
| 99 |
+
ignore_index: -100
|
| 100 |
+
|
| 101 |
+
slot_classifier:
|
| 102 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 103 |
+
mode: "slot"
|
| 104 |
+
ignore_index: -100
|
config/reproduction/snips/dca_net.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/dca-net-snips
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: snips
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: word_tokenizer
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: fast
|
| 35 |
+
add_special_tokens: false
|
| 36 |
+
max_length: 512
|
| 37 |
+
|
| 38 |
+
optimizer:
|
| 39 |
+
_model_target_: torch.optim.Adam
|
| 40 |
+
_model_partial_: true
|
| 41 |
+
lr: 0.001
|
| 42 |
+
weight_decay: 1e-6
|
| 43 |
+
|
| 44 |
+
scheduler:
|
| 45 |
+
_model_target_: transformers.get_scheduler
|
| 46 |
+
_model_partial_: true
|
| 47 |
+
name : "linear"
|
| 48 |
+
num_warmup_steps: 0
|
| 49 |
+
|
| 50 |
+
model:
|
| 51 |
+
_model_target_: model.OpenSLUModel
|
| 52 |
+
encoder:
|
| 53 |
+
_model_target_: model.encoder.AutoEncoder
|
| 54 |
+
encoder_name: lstm
|
| 55 |
+
|
| 56 |
+
embedding:
|
| 57 |
+
load_embedding_name: glove.6B.300d.txt
|
| 58 |
+
embedding_dim: 300
|
| 59 |
+
dropout_rate: 0.4
|
| 60 |
+
|
| 61 |
+
lstm:
|
| 62 |
+
dropout_rate: 0.4
|
| 63 |
+
output_dim: 128
|
| 64 |
+
layer_num: 2
|
| 65 |
+
bidirectional: true
|
| 66 |
+
output_dim: "{model.encoder.lstm.output_dim}"
|
| 67 |
+
return_with_input: true
|
| 68 |
+
return_sentence_level_hidden: false
|
| 69 |
+
|
| 70 |
+
decoder:
|
| 71 |
+
_model_target_: model.decoder.DCANetDecoder
|
| 72 |
+
interaction:
|
| 73 |
+
_model_target_: model.decoder.interaction.DCANetInteraction
|
| 74 |
+
output_dim: "{model.encoder.output_dim}"
|
| 75 |
+
attention_dropout: 0.4
|
| 76 |
+
num_attention_heads: 8
|
| 77 |
+
|
| 78 |
+
intent_classifier:
|
| 79 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 80 |
+
mode: "intent"
|
| 81 |
+
input_dim: "{model.encoder.output_dim}"
|
| 82 |
+
ignore_index: -100
|
| 83 |
+
|
| 84 |
+
slot_classifier:
|
| 85 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 86 |
+
mode: "slot"
|
| 87 |
+
input_dim: "{model.encoder.output_dim}"
|
| 88 |
+
ignore_index: -100
|
config/reproduction/snips/deberta.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 32
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/deberta-snips
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: snips
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: microsoft/deberta-v3-base
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
add_special_tokens: true
|
| 35 |
+
max_length: 512
|
| 36 |
+
|
| 37 |
+
optimizer:
|
| 38 |
+
_model_target_: torch.optim.AdamW
|
| 39 |
+
_model_partial_: true
|
| 40 |
+
lr: 2e-5
|
| 41 |
+
weight_decay: 1e-8
|
| 42 |
+
|
| 43 |
+
scheduler:
|
| 44 |
+
_model_target_: transformers.get_scheduler
|
| 45 |
+
_model_partial_: true
|
| 46 |
+
name : "linear"
|
| 47 |
+
num_warmup_steps: 0
|
| 48 |
+
|
| 49 |
+
model:
|
| 50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 51 |
+
ignore_index: -100
|
| 52 |
+
encoder:
|
| 53 |
+
_model_target_: model.encoder.AutoEncoder
|
| 54 |
+
encoder_name: microsoft/deberta-v3-base
|
| 55 |
+
output_dim: 768
|
| 56 |
+
return_with_input: true
|
| 57 |
+
return_sentence_level_hidden: true
|
| 58 |
+
|
| 59 |
+
decoder:
|
| 60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 61 |
+
intent_classifier:
|
| 62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 63 |
+
mode: "intent"
|
| 64 |
+
ignore_index: -100
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
slot_classifier:
|
| 68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 69 |
+
mode: "slot"
|
| 70 |
+
ignore_index: -100
|
config/reproduction/snips/electra.yaml
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
base:
|
| 3 |
+
name: "OpenSLUv1"
|
| 4 |
+
train: true
|
| 5 |
+
test: true
|
| 6 |
+
device: cuda
|
| 7 |
+
seed: 42
|
| 8 |
+
epoch_num: 300
|
| 9 |
+
batch_size: 32
|
| 10 |
+
|
| 11 |
+
model_manager:
|
| 12 |
+
load_dir: null
|
| 13 |
+
save_dir: save/electra-snips
|
| 14 |
+
|
| 15 |
+
evaluator:
|
| 16 |
+
best_key: EMA
|
| 17 |
+
eval_by_epoch: true
|
| 18 |
+
# eval_step: 1800
|
| 19 |
+
metric:
|
| 20 |
+
- intent_acc
|
| 21 |
+
- slot_f1
|
| 22 |
+
- EMA
|
| 23 |
+
|
| 24 |
+
accelerator:
|
| 25 |
+
use_accelerator: false
|
| 26 |
+
|
| 27 |
+
dataset:
|
| 28 |
+
dataset_name: snips
|
| 29 |
+
|
| 30 |
+
tokenizer:
|
| 31 |
+
_tokenizer_name_: google/electra-small-discriminator
|
| 32 |
+
_padding_side_: right
|
| 33 |
+
add_special_tokens: true
|
| 34 |
+
max_length: 512
|
| 35 |
+
|
| 36 |
+
optimizer:
|
| 37 |
+
_model_target_: torch.optim.AdamW
|
| 38 |
+
_model_partial_: true
|
| 39 |
+
lr: 2e-5
|
| 40 |
+
weight_decay: 1e-8
|
| 41 |
+
|
| 42 |
+
scheduler:
|
| 43 |
+
_model_target_: transformers.get_scheduler
|
| 44 |
+
_model_partial_: true
|
| 45 |
+
name : "linear"
|
| 46 |
+
num_warmup_steps: 0
|
| 47 |
+
|
| 48 |
+
model:
|
| 49 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 50 |
+
ignore_index: -100
|
| 51 |
+
encoder:
|
| 52 |
+
_model_target_: model.encoder.AutoEncoder
|
| 53 |
+
encoder_name: google/electra-small-discriminator
|
| 54 |
+
output_dim: 256
|
| 55 |
+
return_with_input: true
|
| 56 |
+
return_sentence_level_hidden: true
|
| 57 |
+
|
| 58 |
+
decoder:
|
| 59 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 60 |
+
intent_classifier:
|
| 61 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 62 |
+
mode: "intent"
|
| 63 |
+
ignore_index: -100
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
slot_classifier:
|
| 67 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 68 |
+
mode: "slot"
|
| 69 |
+
ignore_index: -100
|
config/reproduction/snips/joint-bert.yaml
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 128
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/joint-bert-snips
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: snips
|
| 30 |
+
|
| 31 |
+
metric:
|
| 32 |
+
- intent_acc
|
| 33 |
+
- slot_f1
|
| 34 |
+
- EMA
|
| 35 |
+
|
| 36 |
+
tokenizer:
|
| 37 |
+
_tokenizer_name_: bert-base-uncased
|
| 38 |
+
_padding_side_: right
|
| 39 |
+
_align_mode_: general
|
| 40 |
+
add_special_tokens: true
|
| 41 |
+
|
| 42 |
+
optimizer:
|
| 43 |
+
_model_target_: torch.optim.AdamW
|
| 44 |
+
_model_partial_: true
|
| 45 |
+
lr: 4e-6
|
| 46 |
+
weight_decay: 1e-8
|
| 47 |
+
|
| 48 |
+
scheduler:
|
| 49 |
+
_model_target_: transformers.get_scheduler
|
| 50 |
+
_model_partial_: true
|
| 51 |
+
name : "linear"
|
| 52 |
+
num_warmup_steps: 0
|
| 53 |
+
|
| 54 |
+
model:
|
| 55 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 56 |
+
ignore_index: -100
|
| 57 |
+
encoder:
|
| 58 |
+
_model_target_: model.encoder.AutoEncoder
|
| 59 |
+
encoder_name: bert-base-uncased
|
| 60 |
+
output_dim: 768
|
| 61 |
+
return_with_input: true
|
| 62 |
+
return_sentence_level_hidden: true
|
| 63 |
+
|
| 64 |
+
decoder:
|
| 65 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 66 |
+
intent_classifier:
|
| 67 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 68 |
+
mode: "intent"
|
| 69 |
+
ignore_index: -100
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
slot_classifier:
|
| 73 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 74 |
+
mode: "slot"
|
| 75 |
+
ignore_index: -100
|
config/reproduction/snips/roberta.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 32
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/roberta-snips
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: snips
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: roberta-base
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
add_special_tokens: true
|
| 35 |
+
max_length: 512
|
| 36 |
+
|
| 37 |
+
optimizer:
|
| 38 |
+
_model_target_: torch.optim.AdamW
|
| 39 |
+
_model_partial_: true
|
| 40 |
+
lr: 2e-5
|
| 41 |
+
weight_decay: 1e-8
|
| 42 |
+
|
| 43 |
+
scheduler:
|
| 44 |
+
_model_target_: transformers.get_scheduler
|
| 45 |
+
_model_partial_: true
|
| 46 |
+
name : "linear"
|
| 47 |
+
num_warmup_steps: 0
|
| 48 |
+
|
| 49 |
+
model:
|
| 50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
| 51 |
+
ignore_index: -100
|
| 52 |
+
encoder:
|
| 53 |
+
_model_target_: model.encoder.AutoEncoder
|
| 54 |
+
encoder_name: roberta-base
|
| 55 |
+
output_dim: 768
|
| 56 |
+
return_with_input: true
|
| 57 |
+
return_sentence_level_hidden: true
|
| 58 |
+
|
| 59 |
+
decoder:
|
| 60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
| 61 |
+
intent_classifier:
|
| 62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 63 |
+
mode: "intent"
|
| 64 |
+
ignore_index: -100
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
slot_classifier:
|
| 68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 69 |
+
mode: "slot"
|
| 70 |
+
ignore_index: -100
|
config/reproduction/snips/slot-gated.yaml
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/slot-gated-snips
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: snips
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: word_tokenizer
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: fast
|
| 35 |
+
add_special_tokens: false
|
| 36 |
+
max_length: 512
|
| 37 |
+
|
| 38 |
+
optimizer:
|
| 39 |
+
_model_target_: torch.optim.Adam
|
| 40 |
+
_model_partial_: true
|
| 41 |
+
lr: 0.001
|
| 42 |
+
weight_decay: 1e-6
|
| 43 |
+
|
| 44 |
+
scheduler:
|
| 45 |
+
_model_target_: transformers.get_scheduler
|
| 46 |
+
_model_partial_: true
|
| 47 |
+
name : "linear"
|
| 48 |
+
num_warmup_steps: 0
|
| 49 |
+
|
| 50 |
+
model:
|
| 51 |
+
_model_target_: model.OpenSLUModel
|
| 52 |
+
ignore_index: -100
|
| 53 |
+
encoder:
|
| 54 |
+
_model_target_: model.encoder.AutoEncoder
|
| 55 |
+
encoder_name: lstm
|
| 56 |
+
|
| 57 |
+
embedding:
|
| 58 |
+
embedding_dim: 256
|
| 59 |
+
dropout_rate: 0.4
|
| 60 |
+
|
| 61 |
+
lstm:
|
| 62 |
+
dropout_rate: 0.5
|
| 63 |
+
output_dim: 256
|
| 64 |
+
layer_num: 2
|
| 65 |
+
bidirectional: true
|
| 66 |
+
|
| 67 |
+
return_with_input: true
|
| 68 |
+
return_sentence_level_hidden: false
|
| 69 |
+
|
| 70 |
+
decoder:
|
| 71 |
+
_model_target_: model.decoder.BaseDecoder
|
| 72 |
+
|
| 73 |
+
interaction:
|
| 74 |
+
_model_target_: model.decoder.interaction.SlotGatedInteraction
|
| 75 |
+
remove_slot_attn: false
|
| 76 |
+
output_dim: 256
|
| 77 |
+
dropout_rate: 0.4
|
| 78 |
+
|
| 79 |
+
intent_classifier:
|
| 80 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 81 |
+
mode: "intent"
|
| 82 |
+
ignore_index: -100
|
| 83 |
+
|
| 84 |
+
slot_classifier:
|
| 85 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
| 86 |
+
mode: "slot"
|
| 87 |
+
ignore_index: -100
|
config/reproduction/snips/stack-propagation.yaml
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
device: "Tesla V100-SXM2-16GB"
|
| 2 |
+
|
| 3 |
+
base:
|
| 4 |
+
name: "OpenSLUv1"
|
| 5 |
+
train: true
|
| 6 |
+
test: true
|
| 7 |
+
device: cuda
|
| 8 |
+
seed: 42
|
| 9 |
+
epoch_num: 300
|
| 10 |
+
batch_size: 16
|
| 11 |
+
|
| 12 |
+
model_manager:
|
| 13 |
+
load_dir: null
|
| 14 |
+
save_dir: save/stack-propagation-snips
|
| 15 |
+
|
| 16 |
+
evaluator:
|
| 17 |
+
best_key: EMA
|
| 18 |
+
eval_by_epoch: true
|
| 19 |
+
# eval_step: 1800
|
| 20 |
+
metric:
|
| 21 |
+
- intent_acc
|
| 22 |
+
- slot_f1
|
| 23 |
+
- EMA
|
| 24 |
+
|
| 25 |
+
accelerator:
|
| 26 |
+
use_accelerator: false
|
| 27 |
+
|
| 28 |
+
dataset:
|
| 29 |
+
dataset_name: snips
|
| 30 |
+
|
| 31 |
+
tokenizer:
|
| 32 |
+
_tokenizer_name_: word_tokenizer
|
| 33 |
+
_padding_side_: right
|
| 34 |
+
_align_mode_: fast
|
| 35 |
+
add_special_tokens: false
|
| 36 |
+
max_length: 512
|
| 37 |
+
|
| 38 |
+
optimizer:
|
| 39 |
+
_model_target_: torch.optim.Adam
|
| 40 |
+
_model_partial_: true
|
| 41 |
+
lr: 0.001
|
| 42 |
+
weight_decay: 1e-6
|
| 43 |
+
|
| 44 |
+
scheduler:
|
| 45 |
+
_model_target_: transformers.get_scheduler
|
| 46 |
+
_model_partial_: true
|
| 47 |
+
name : "linear"
|
| 48 |
+
num_warmup_steps: 0
|
| 49 |
+
|
| 50 |
+
model:
|
| 51 |
+
_model_target_: model.OpenSLUModel
|
| 52 |
+
|
| 53 |
+
encoder:
|
| 54 |
+
_model_target_: model.encoder.AutoEncoder
|
| 55 |
+
encoder_name: self-attention-lstm
|
| 56 |
+
|
| 57 |
+
embedding:
|
| 58 |
+
embedding_dim: 256
|
| 59 |
+
dropout_rate: 0.4
|
| 60 |
+
|
| 61 |
+
lstm:
|
| 62 |
+
layer_num: 1
|
| 63 |
+
bidirectional: true
|
| 64 |
+
output_dim: 256
|
| 65 |
+
dropout_rate: 0.4
|
| 66 |
+
|
| 67 |
+
attention:
|
| 68 |
+
hidden_dim: 1024
|
| 69 |
+
output_dim: 128
|
| 70 |
+
dropout_rate: 0.4
|
| 71 |
+
|
| 72 |
+
return_with_input: true
|
| 73 |
+
return_sentence_level_hidden: false
|
| 74 |
+
|
| 75 |
+
decoder:
|
| 76 |
+
_model_target_: model.decoder.StackPropagationDecoder
|
| 77 |
+
interaction:
|
| 78 |
+
_model_target_: model.decoder.interaction.StackInteraction
|
| 79 |
+
differentiable: false
|
| 80 |
+
|
| 81 |
+
intent_classifier:
|
| 82 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 83 |
+
layer_num: 1
|
| 84 |
+
bidirectional: false
|
| 85 |
+
force_ratio: 0.9
|
| 86 |
+
hidden_dim: 64
|
| 87 |
+
embedding_dim: 8
|
| 88 |
+
ignore_index: -100
|
| 89 |
+
dropout_rate: 0.4
|
| 90 |
+
mode: "token-level-intent"
|
| 91 |
+
use_multi: false
|
| 92 |
+
return_sentence_level: true
|
| 93 |
+
|
| 94 |
+
slot_classifier:
|
| 95 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
| 96 |
+
layer_num: 1
|
| 97 |
+
bidirectional: false
|
| 98 |
+
force_ratio: 0.9
|
| 99 |
+
hidden_dim: 64
|
| 100 |
+
embedding_dim: 32
|
| 101 |
+
ignore_index: -100
|
| 102 |
+
dropout_rate: 0.4
|
| 103 |
+
mode: "slot"
|
| 104 |
+
use_multi: false
|
| 105 |
+
return_sentence_level: false
|
config/visual.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
host: 127.0.0.1
|
| 2 |
+
port: 7861
|
| 3 |
+
|
| 4 |
+
is_push_to_public: true
|
| 5 |
+
output_path: save/stack/outputs.jsonl
|
| 6 |
+
page-size: 2
|
model/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from model.open_slu_model import OpenSLUModel
|
| 2 |
+
|
| 3 |
+
__all__ = ["OpenSLUModel"]
|
model/decoder/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from model.decoder.agif_decoder import AGIFDecoder
|
| 2 |
+
from model.decoder.base_decoder import StackPropagationDecoder, BaseDecoder, DCANetDecoder
|
| 3 |
+
from model.decoder.gl_gin_decoder import GLGINDecoder
|
| 4 |
+
|
| 5 |
+
__all__ = ["StackPropagationDecoder", "BaseDecoder", "DCANetDecoder", "AGIFDecoder", "GLGINDecoder"]
|