Update README.md
Browse files
README.md
CHANGED
|
@@ -1,3 +1,58 @@
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
---
|
| 4 |
+
|
| 5 |
+
## HugIE:基于MRC的Instruction-tuning的统一信息抽取框架
|
| 6 |
+
基本思想和优势:
|
| 7 |
+
- 构建Instruction模板,将实体识别和事件抽取统一为MRC形式;
|
| 8 |
+
- 采用Global Pointer训练抽取器;
|
| 9 |
+
- 只需少量代码即可实现事件抽取,获取实体名称,事件信息。
|
| 10 |
+
|
| 11 |
+
快速使用:
|
| 12 |
+
|
| 13 |
+
```python
|
| 14 |
+
from applications.information_extraction.HugIE.api_test import HugIEAPI
|
| 15 |
+
model_type = 'bert'
|
| 16 |
+
hugie_model_name_or_path = 'wjn1996/wjn1996-hugnlp-hugie-large-zh'
|
| 17 |
+
hugie = HugIEAPI('bert', hugie_model_name_or_path)
|
| 18 |
+
text = "央广网北京2月23日消息 据中国地震台网正式测定,2月23日8时37分在塔吉克斯坦发生7.2级地震,震源深度10公里,震中位于北纬37.98度,东经73.29度,距我国边境线最近约82公里,地震造成新疆喀什等地震感强烈。"
|
| 19 |
+
|
| 20 |
+
entity = "塔吉克斯坦地震"
|
| 21 |
+
relation = "震源位置"
|
| 22 |
+
predictions, topk_predictions = hugie.request(text, entity, relation=relation)
|
| 23 |
+
print("entity:{}, relation:{}".format(entity, relation))
|
| 24 |
+
print("predictions:\n{}".format(predictions))
|
| 25 |
+
print("topk_predictions:\n{}".format(predictions))
|
| 26 |
+
print("\n\n")
|
| 27 |
+
|
| 28 |
+
"""
|
| 29 |
+
# 实体识别输出结果:
|
| 30 |
+
entity:塔吉克斯坦地震, relation:震源位置
|
| 31 |
+
predictions:
|
| 32 |
+
{0: ['10公里', '距我国边境线最近约82公里', '北纬37.98度,东经73.29度', '北纬37.98度,东经73.29度,距我国边境线最近约82公里']}
|
| 33 |
+
topk_predictions:
|
| 34 |
+
{0: [{'answer': '10公里', 'prob': 0.9895901083946228, 'pos': [(80, 84)]}, {'answer': '距我国边境线最近约82公里', 'prob': 0.8584909439086914, 'pos': [(107, 120)]}, {'answer': '北纬37.98度,东经73.29度', 'prob': 0.7202121615409851, 'pos': [(89, 106)]}, {'answer': '北纬37.98度,东经73.29度,距我国边境线最近约82公里', 'prob': 0.11628123372793198, 'pos': [(89, 120)]}]}
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
entity = "塔吉克斯坦地震"
|
| 39 |
+
relation = "时间"
|
| 40 |
+
predictions, topk_predictions = hugie.request(text, entity, relation=relation)
|
| 41 |
+
print("entity:{}, relation:{}".format(entity, relation))
|
| 42 |
+
print("predictions:\n{}".format(predictions))
|
| 43 |
+
print("topk_predictions:\n{}".format(predictions))
|
| 44 |
+
print("\n\n")
|
| 45 |
+
|
| 46 |
+
"""
|
| 47 |
+
# 事件信息输出结果:
|
| 48 |
+
entity:塔吉克斯坦地震, relation:时间
|
| 49 |
+
predictions:
|
| 50 |
+
{0: ['2月23日8时37分']}
|
| 51 |
+
topk_predictions:
|
| 52 |
+
{0: [{'answer': '2月23日8时37分', 'prob': 0.9999995231628418, 'pos': [(49, 59)]}]}
|
| 53 |
+
"""
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
欢迎使用统一NLP开发框架——HugNLP,GitHub地址:[https://github.com/wjn1996/HugNLP](https://github.com/wjn1996/HugNLP)
|