Positional Encoding-Transformer

文章目录

1. 创建一个位置编码类
2. Python代码

1. 创建一个位置编码类

$\begin{equation} PE_{(pos,2i)}=\sin(pos/10000^{2i/d_{model}}) \end{equation}$
$\begin{equation} PE_{(pos,2i+1)}=\cos(pos/10000^{2i/d_{model}}) \end{equation}$

2. Python代码

python">#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName  :PositionEmbedding.py
# @Time      :2024/12/22 12:05
# @Author    :Jason Zhang
import torch
from torch import nn
import torch.nn.functional as F

torch.set_printoptions(precision=3, sci_mode=False)


# PE(pos,2i) = sin(pos/10000**(2i/d_model))
# PE(pos,2i+1) = sin(pos/10000**(2i/d_model))

class PositionEmbedding(object):
    def __init__(self, max_pos: int = 7, power_x: int = 10000, d_model: int = 8):
        self.max_pos = max_pos
        self.d_model = d_model
        self.power_x = power_x
        # self._result = torch.zeros((self.max_pos, self.d_model))
        self._result = nn.Embedding(self.max_pos, self.d_model)

    @property
    def result(self):
        # step1: pos/10000^i/dmodel
        pos_arrange = torch.arange(self.max_pos).reshape((-1, 1)).to(torch.float32)
        pos_ones = torch.ones(self.max_pos).reshape((-1, 1)).to(torch.float32)
        dim_arrange = torch.arange(self.d_model).reshape((1, -1)).to(torch.float32)
        dim_ones = torch.ones(self.d_model).reshape((1, -1)).to(torch.float32)
        pos_mat = pos_arrange @ dim_ones
        dim_mat = pos_ones @ dim_arrange
        dim_mat = torch.pow(self.power_x, dim_mat / self.d_model)
        print(f"pos_mat=\n{pos_mat}")
        print(f"dim_mat=\n{dim_mat}")
        pos_dim_mat = pos_mat / dim_mat
        print(f"pos_dim_mat=\n{pos_dim_mat}")
        even_mat = torch.eye(self.d_model)
        even_mat[:, 1::2] = 0
        even_pos_dim = pos_dim_mat @ even_mat
        even_pos_dim = torch.sin(even_pos_dim)
        print(f"even_pos_dim=\n{even_pos_dim}")

        odd_mat = torch.eye(self.d_model)
        odd_mat[:, 0::2] = 0
        odd_pos_dim = pos_dim_mat @ odd_mat
        cos_ones = torch.ones_like(odd_pos_dim)
        cos_ones[:, 1::2] = 0
        print(f"cos_ones=\n{cos_ones}")
        odd_pos_dim = torch.cos(odd_pos_dim) - cos_ones
        print(f"odd_pos_dim=\n{odd_pos_dim}")
        #  print(f"even_mat=\n{even_mat}")
        #  print(f"odd_mat=\n{odd_mat}")
        my_result = even_pos_dim + odd_pos_dim
        print(f"my_result=\n{my_result}")
        self._result.weight = nn.Parameter(my_result, requires_grad=False)
        print(f"self._result.weight=\n{self._result.weight}")
        return self._result


if __name__ == "__main__":
    run_code = 0
    test_position = PositionEmbedding()
    test_position_result = test_position.result
    print(f"test_position_result=\n{test_position_result}")

结果：

python">pos_mat=
tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1.],
        [2., 2., 2., 2., 2., 2., 2., 2.],
        [3., 3., 3., 3., 3., 3., 3., 3.],
        [4., 4., 4., 4., 4., 4., 4., 4.],
        [5., 5., 5., 5., 5., 5., 5., 5.],
        [6., 6., 6., 6., 6., 6., 6., 6.]])
dim_mat=
tensor([[    1.000,     3.162,    10.000,    31.623,   100.000,   316.228,
          1000.000,  3162.278],
        [    1.000,     3.162,    10.000,    31.623,   100.000,   316.228,
          1000.000,  3162.278],
        [    1.000,     3.162,    10.000,    31.623,   100.000,   316.228,
          1000.000,  3162.278],
        [    1.000,     3.162,    10.000,    31.623,   100.000,   316.228,
          1000.000,  3162.278],
        [    1.000,     3.162,    10.000,    31.623,   100.000,   316.228,
          1000.000,  3162.278],
        [    1.000,     3.162,    10.000,    31.623,   100.000,   316.228,
          1000.000,  3162.278],
        [    1.000,     3.162,    10.000,    31.623,   100.000,   316.228,
          1000.000,  3162.278]])
pos_dim_mat=
tensor([[    0.000,     0.000,     0.000,     0.000,     0.000,     0.000,
             0.000,     0.000],
        [    1.000,     0.316,     0.100,     0.032,     0.010,     0.003,
             0.001,     0.000],
        [    2.000,     0.632,     0.200,     0.063,     0.020,     0.006,
             0.002,     0.001],
        [    3.000,     0.949,     0.300,     0.095,     0.030,     0.009,
             0.003,     0.001],
        [    4.000,     1.265,     0.400,     0.126,     0.040,     0.013,
             0.004,     0.001],
        [    5.000,     1.581,     0.500,     0.158,     0.050,     0.016,
             0.005,     0.002],
        [    6.000,     1.897,     0.600,     0.190,     0.060,     0.019,
             0.006,     0.002]])
even_pos_dim=
tensor([[ 0.000,  0.000,  0.000,  0.000,  0.000,  0.000,  0.000,  0.000],
        [ 0.841,  0.000,  0.100,  0.000,  0.010,  0.000,  0.001,  0.000],
        [ 0.909,  0.000,  0.199,  0.000,  0.020,  0.000,  0.002,  0.000],
        [ 0.141,  0.000,  0.296,  0.000,  0.030,  0.000,  0.003,  0.000],
        [-0.757,  0.000,  0.389,  0.000,  0.040,  0.000,  0.004,  0.000],
        [-0.959,  0.000,  0.479,  0.000,  0.050,  0.000,  0.005,  0.000],
        [-0.279,  0.000,  0.565,  0.000,  0.060,  0.000,  0.006,  0.000]])
cos_ones=
tensor([[1., 0., 1., 0., 1., 0., 1., 0.],
        [1., 0., 1., 0., 1., 0., 1., 0.],
        [1., 0., 1., 0., 1., 0., 1., 0.],
        [1., 0., 1., 0., 1., 0., 1., 0.],
        [1., 0., 1., 0., 1., 0., 1., 0.],
        [1., 0., 1., 0., 1., 0., 1., 0.],
        [1., 0., 1., 0., 1., 0., 1., 0.]])
odd_pos_dim=
tensor([[ 0.000,  1.000,  0.000,  1.000,  0.000,  1.000,  0.000,  1.000],
        [ 0.000,  0.950,  0.000,  1.000,  0.000,  1.000,  0.000,  1.000],
        [ 0.000,  0.807,  0.000,  0.998,  0.000,  1.000,  0.000,  1.000],
        [ 0.000,  0.583,  0.000,  0.996,  0.000,  1.000,  0.000,  1.000],
        [ 0.000,  0.301,  0.000,  0.992,  0.000,  1.000,  0.000,  1.000],
        [ 0.000, -0.010,  0.000,  0.988,  0.000,  1.000,  0.000,  1.000],
        [ 0.000, -0.321,  0.000,  0.982,  0.000,  1.000,  0.000,  1.000]])
my_result=
tensor([[     0.000,      1.000,      0.000,      1.000,      0.000,      1.000,
              0.000,      1.000],
        [     0.841,      0.950,      0.100,      1.000,      0.010,      1.000,
              0.001,      1.000],
        [     0.909,      0.807,      0.199,      0.998,      0.020,      1.000,
              0.002,      1.000],
        [     0.141,      0.583,      0.296,      0.996,      0.030,      1.000,
              0.003,      1.000],
        [    -0.757,      0.301,      0.389,      0.992,      0.040,      1.000,
              0.004,      1.000],
        [    -0.959,     -0.010,      0.479,      0.988,      0.050,      1.000,
              0.005,      1.000],
        [    -0.279,     -0.321,      0.565,      0.982,      0.060,      1.000,
              0.006,      1.000]])
self._result.weight=
Parameter containing:
tensor([[     0.000,      1.000,      0.000,      1.000,      0.000,      1.000,
              0.000,      1.000],
        [     0.841,      0.950,      0.100,      1.000,      0.010,      1.000,
              0.001,      1.000],
        [     0.909,      0.807,      0.199,      0.998,      0.020,      1.000,
              0.002,      1.000],
        [     0.141,      0.583,      0.296,      0.996,      0.030,      1.000,
              0.003,      1.000],
        [    -0.757,      0.301,      0.389,      0.992,      0.040,      1.000,
              0.004,      1.000],
        [    -0.959,     -0.010,      0.479,      0.988,      0.050,      1.000,
              0.005,      1.000],
        [    -0.279,     -0.321,      0.565,      0.982,      0.060,      1.000,
              0.006,      1.000]])
test_position_result=
Embedding(7, 8)