Commit
·
3037cf1
1
Parent(s):
8a01860
modeling_flm.py: copyright modification and minor code modification
Browse files- configuration_flm.py +14 -6
- modeling_flm.py +22 -6
configuration_flm.py
CHANGED
|
@@ -1,6 +1,18 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
# Copyright
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
""" Cofe-AI FLM configuration"""
|
| 5 |
|
| 6 |
from transformers.configuration_utils import PretrainedConfig
|
|
@@ -11,10 +23,6 @@ from transformers.utils import logging
|
|
| 11 |
logger = logging.get_logger(__name__)
|
| 12 |
|
| 13 |
FLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
| 14 |
-
# "freelm": "xxxx/config.json",
|
| 15 |
-
# "freelm-medium": "xxxx/config.json",
|
| 16 |
-
# "freelm-large": "xxxx/config.json",
|
| 17 |
-
# "freelm-xl": "xxxx/config.json",
|
| 18 |
}
|
| 19 |
|
| 20 |
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
|
| 3 |
+
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
""" Cofe-AI FLM configuration"""
|
| 17 |
|
| 18 |
from transformers.configuration_utils import PretrainedConfig
|
|
|
|
| 23 |
logger = logging.get_logger(__name__)
|
| 24 |
|
| 25 |
FLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
}
|
| 27 |
|
| 28 |
|
modeling_flm.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
# Copyright
|
|
|
|
| 3 |
#
|
| 4 |
# This code is based on OpenAI's GPT-2 library. It has been modified from its
|
| 5 |
-
# original forms to accommodate
|
| 6 |
#
|
| 7 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 8 |
# you may not use this file except in compliance with the License.
|
|
@@ -15,12 +16,13 @@
|
|
| 15 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 16 |
# See the License for the specific language governing permissions and
|
| 17 |
# limitations under the License.
|
|
|
|
|
|
|
| 18 |
from typing import Optional, Tuple, Union
|
| 19 |
|
| 20 |
import math
|
| 21 |
import torch
|
| 22 |
-
|
| 23 |
-
from einops import rearrange, repeat
|
| 24 |
from torch import einsum, nn
|
| 25 |
from torch.cuda.amp import autocast
|
| 26 |
from transformers.activations import ACT2FN
|
|
@@ -31,8 +33,9 @@ from transformers.modeling_outputs import (
|
|
| 31 |
)
|
| 32 |
from transformers.modeling_utils import PreTrainedModel
|
| 33 |
from transformers.pytorch_utils import find_pruneable_heads_and_indices, prune_conv1d_layer
|
| 34 |
-
from transformers.utils import
|
| 35 |
from transformers.utils.model_parallel_utils import assert_device_map, get_device_map
|
|
|
|
| 36 |
from .configuration_flm import FLMConfig
|
| 37 |
|
| 38 |
|
|
@@ -102,6 +105,18 @@ class RotaryEmbedding(nn.Module):
|
|
| 102 |
rotated_k = apply_rotary_emb(freqs_k, k, scale=scale_k ** -1)
|
| 103 |
return rotated_q, rotated_k
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
def get_scale(self, t, cache_key=None, offset=0, ):
|
| 106 |
assert self.use_xpos, 'This function is only useful for xpos.'
|
| 107 |
if exists(cache_key) and cache_key in self.cache_scale:
|
|
@@ -372,6 +387,7 @@ class FLMAttention(nn.Module):
|
|
| 372 |
|
| 373 |
batch_size, head_num, k_seq_len, head_features = key.shape
|
| 374 |
_, _, q_seq_len, _ = query.shape
|
|
|
|
| 375 |
if rotary_embedding is not None:
|
| 376 |
query = query.contiguous().view(batch_size * head_num, q_seq_len, head_features)
|
| 377 |
key = key.contiguous().view(batch_size * head_num, k_seq_len, head_features)
|
|
@@ -381,7 +397,7 @@ class FLMAttention(nn.Module):
|
|
| 381 |
# query: [batch_size * head_num, seqlen, hn]
|
| 382 |
query, key = rotary_embedding.rotate_queries_and_keys(query, key)
|
| 383 |
else:
|
| 384 |
-
query = rotary_embedding.rotate_queries_or_keys(query)
|
| 385 |
key = rotary_embedding.rotate_queries_or_keys(key)
|
| 386 |
# batch_size * head_num, k_seq_len(q_seq_len), head_features
|
| 387 |
query = query.view(batch_size, head_num, q_seq_len, head_features)
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
|
| 3 |
+
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
| 4 |
#
|
| 5 |
# This code is based on OpenAI's GPT-2 library. It has been modified from its
|
| 6 |
+
# original forms to accommodate architectural differences compared to GPT-2.
|
| 7 |
#
|
| 8 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 16 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
# See the License for the specific language governing permissions and
|
| 18 |
# limitations under the License.
|
| 19 |
+
"""PyTorch FLM model."""
|
| 20 |
+
|
| 21 |
from typing import Optional, Tuple, Union
|
| 22 |
|
| 23 |
import math
|
| 24 |
import torch
|
| 25 |
+
from einops import rearrange
|
|
|
|
| 26 |
from torch import einsum, nn
|
| 27 |
from torch.cuda.amp import autocast
|
| 28 |
from transformers.activations import ACT2FN
|
|
|
|
| 33 |
)
|
| 34 |
from transformers.modeling_utils import PreTrainedModel
|
| 35 |
from transformers.pytorch_utils import find_pruneable_heads_and_indices, prune_conv1d_layer
|
| 36 |
+
from transformers.utils import logging
|
| 37 |
from transformers.utils.model_parallel_utils import assert_device_map, get_device_map
|
| 38 |
+
|
| 39 |
from .configuration_flm import FLMConfig
|
| 40 |
|
| 41 |
|
|
|
|
| 105 |
rotated_k = apply_rotary_emb(freqs_k, k, scale=scale_k ** -1)
|
| 106 |
return rotated_q, rotated_k
|
| 107 |
|
| 108 |
+
def rotate_queries_or_keys(self, t, seq_dim=-2, offset=0):
|
| 109 |
+
"""
|
| 110 |
+
use this only when xpos is NOT activated.
|
| 111 |
+
"""
|
| 112 |
+
# t's shape e.g. -> (batchsize, headnum, seqlen, dimofhead)
|
| 113 |
+
assert not self.use_xpos, 'you must use `.rotate_queries_and_keys` method instead and pass in both queries and keys, for length extrapolatable rotary embeddings'
|
| 114 |
+
device, seq_len = t.device, t.shape[seq_dim]
|
| 115 |
+
pos_seq_t = torch.arange(offset, offset + seq_len, device=device, dtype=torch.float32)
|
| 116 |
+
freqs = self.forward(pos_seq_t, cache_key=f"{offset}:{offset+seq_len}")
|
| 117 |
+
# freqs seqlen x dim
|
| 118 |
+
return apply_rotary_emb(freqs, t)
|
| 119 |
+
|
| 120 |
def get_scale(self, t, cache_key=None, offset=0, ):
|
| 121 |
assert self.use_xpos, 'This function is only useful for xpos.'
|
| 122 |
if exists(cache_key) and cache_key in self.cache_scale:
|
|
|
|
| 387 |
|
| 388 |
batch_size, head_num, k_seq_len, head_features = key.shape
|
| 389 |
_, _, q_seq_len, _ = query.shape
|
| 390 |
+
query_offset = k_seq_len - q_seq_len
|
| 391 |
if rotary_embedding is not None:
|
| 392 |
query = query.contiguous().view(batch_size * head_num, q_seq_len, head_features)
|
| 393 |
key = key.contiguous().view(batch_size * head_num, k_seq_len, head_features)
|
|
|
|
| 397 |
# query: [batch_size * head_num, seqlen, hn]
|
| 398 |
query, key = rotary_embedding.rotate_queries_and_keys(query, key)
|
| 399 |
else:
|
| 400 |
+
query = rotary_embedding.rotate_queries_or_keys(query, offset=query_offset)
|
| 401 |
key = rotary_embedding.rotate_queries_or_keys(key)
|
| 402 |
# batch_size * head_num, k_seq_len(q_seq_len), head_features
|
| 403 |
query = query.view(batch_size, head_num, q_seq_len, head_features)
|