feat: 搭建 Attention 在各层的基本结构

YdrMaster · YdrMaster · commit 4f16fdd5ffe5 · 2024-01-29T09:16:42.000+08:00
Signed-off-by: YdrMaster &lt;ydrml@hotmail.com&gt;
diff --git a/src/04kernel/include/kernel/collectors/attention.h b/src/04kernel/include/kernel/collectors/attention.h
@@ -0,0 +1,19 @@
+﻿#ifndef KERNEL_ATTENTION_H
+#define KERNEL_ATTENTION_H
+
+#include "../collector.h"
+
+namespace refactor::kernel {
+
+    struct AttentionCollector final : public InfoCollector {
+        dim_t maxSeqLen;
+
+        AttentionCollector(decltype(_target), decltype(maxSeqLen)) noexcept;
+
+        std::vector<KernelBox>
+        filter(TensorRefs inputs, TensorRefs outputs) const final;
+    };
+
+}// namespace refactor::kernel
+
+#endif// KERNEL_ATTENTION_H
diff --git a/src/04kernel/src/collectors/attention.cc b/src/04kernel/src/collectors/attention.cc
@@ -0,0 +1,31 @@
+﻿#include "kernel/collectors/attention.h"
+#include "kernel/kernel.h"
+#include "kernel/tensor.h"
+// #include "../kernels/attention/cpu_kernel.hh"
+// #include "../kernels/attention/cuda_kernel.hh"
+
+namespace refactor::kernel {
+
+    AttentionCollector::AttentionCollector(
+        decltype(_target) target,
+        decltype(maxSeqLen) maxSeqLen_) noexcept
+        : InfoCollector(target),
+          maxSeqLen(maxSeqLen_) {}
+
+    std::vector<KernelBox>
+    AttentionCollector::filter(TensorRefs inputs, TensorRefs outputs) const {
+        std::vector<KernelBox> ans;
+        switch (_target) {
+            case decltype(_target)::Cpu:
+                break;
+            case decltype(_target)::Nvidia:
+                break;
+            case decltype(_target)::Mlu:
+                break;
+            default:
+                UNREACHABLEX(void, "Unknown target");
+        }
+        return ans;
+    }
+
+}// namespace refactor::kernel
diff --git a/src/05computation/include/computation/operators/attention.h b/src/05computation/include/computation/operators/attention.h
@@ -0,0 +1,21 @@
+﻿#ifndef COMPUTATION_ATTENTION_H
+#define COMPUTATION_ATTENTION_H
+
+#include "../operator.h"
+
+namespace refactor::computation {
+
+    struct Attention final : public Operator {
+        dim_t maxSeqLen;
+
+        constexpr Attention(decltype(maxSeqLen) maxSeqLen_) noexcept
+            : Operator(), maxSeqLen(maxSeqLen_) {}
+
+        static size_t typeId() noexcept;
+        size_t opTypeId() const noexcept final;
+        std::string_view name() const noexcept final;
+    };
+
+}// namespace refactor::computation
+
+#endif// COMPUTATION_ATTENTION_H
diff --git a/src/05computation/src/operators/attention.cc b/src/05computation/src/operators/attention.cc
@@ -0,0 +1,13 @@
+﻿#include "computation/operators/attention.h"
+
+namespace refactor::computation {
+    using Op = Attention;
+
+    auto Op::typeId() noexcept -> size_t {
+        static uint8_t ID = 1;
+        return reinterpret_cast<size_t>(&ID);
+    }
+    auto Op::opTypeId() const noexcept -> size_t { return typeId(); }
+    auto Op::name() const noexcept -> std::string_view { return "Attention"; }
+
+}// namespace refactor::computation
diff --git a/src/08-01llm/src/operators/attention.cc b/src/08-01llm/src/operators/attention.cc
@@ -0,0 +1,31 @@
+﻿#include "computation/operators/attention.h"
+#include "attention.hh"
+#include "common.h"
+
+namespace refactor::llm {
+    using Op = Attention;
+
+    Op::Attention(decltype(maxSeqLen) maxSeqLen_)
+        : Operator(), maxSeqLen(maxSeqLen_) {}
+
+    auto Op::build(ModelContext const &, std::string_view, Attributes attributes) -> OpBox {
+        auto maxSeqLen = attributes.getOrInsert("max_seq_len", {0}).float_();
+        return OpBox(std::make_unique<Op>(maxSeqLen));
+    }
+    auto Op::typeId() -> size_t {
+        static uint8_t ID = 1;
+        return reinterpret_cast<size_t>(&ID);
+    }
+
+    auto Op::opTypeId() const -> size_t { return typeId(); }
+    auto Op::opTypeName() const -> std::string_view { return "llm::Attention"; }
+
+    auto Op::infer(TensorRefs inputs, InferOptions const &) const -> InferResult {
+        TODO("");
+    }
+
+    auto Op::lower(TensorRefs) const -> computation::OpBox {
+        TODO("");
+    }
+
+}// namespace refactor::llm
diff --git a/src/08-01llm/src/operators/attention.hh b/src/08-01llm/src/operators/attention.hh
@@ -0,0 +1,25 @@
+﻿#ifndef LLM_RMS_ATTENTION_HH
+#define LLM_RMS_ATTENTION_HH
+
+#include "frontend/operator.h"
+
+namespace refactor::llm {
+    using namespace frontend;
+
+    struct Attention final : public Operator {
+        dim_t maxSeqLen;
+
+        explicit Attention(decltype(maxSeqLen));
+
+        static OpBox build(ModelContext const &, std::string_view, Attributes);
+        static size_t typeId();
+
+        size_t opTypeId() const final;
+        std::string_view opTypeName() const final;
+        InferResult infer(TensorRefs, InferOptions const &) const final;
+        computation::OpBox lower(TensorRefs) const final;
+    };
+
+}// namespace refactor::llm
+
+#endif// LLM_RMS_ATTENTION_HH
diff --git a/src/08-01llm/src/operators/rms_normalization.hh b/src/08-01llm/src/operators/rms_normalization.hh
@@ -9,7 +9,7 @@ namespace refactor::llm {
     struct RmsNormalization final : public Operator {
         float epsilon;
 
-        RmsNormalization(decltype(epsilon));
+        explicit RmsNormalization(decltype(epsilon));
 
         static OpBox build(ModelContext const &, std::string_view, Attributes);
         static size_t typeId();