@@ -648,6 +648,25 @@ struct T5Block : public GGMLBlock {
648
648
}
649
649
};
650
650
651
+ struct T5Projection : public UnaryBlock {
652
+ public:
653
+ T5Projection (int64_t model_dim, int64_t projection_dim) {
654
+ blocks[" 0" ] = std::shared_ptr<GGMLBlock>(new Linear (model_dim, projection_dim, false ));
655
+ blocks[" 3" ] = std::shared_ptr<GGMLBlock>(new Linear (projection_dim, projection_dim, false ));
656
+ }
657
+
658
+ struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) {
659
+ // x: [N, n_token, model_dim]
660
+ auto wi = std::dynamic_pointer_cast<Linear>(blocks[" 0" ]);
661
+ auto wo = std::dynamic_pointer_cast<Linear>(blocks[" 3" ]);
662
+
663
+ x = wi->forward (ctx, x);
664
+ x = ggml_relu_inplace (ctx, x);
665
+ x = wo->forward (ctx, x);
666
+ return x;
667
+ }
668
+ };
669
+
651
670
struct T5Stack : public GGMLBlock {
652
671
int64_t num_layers;
653
672
@@ -682,6 +701,7 @@ struct T5Stack : public GGMLBlock {
682
701
auto final_layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks[" final_layer_norm" ]);
683
702
684
703
x = final_layer_norm->forward (ctx, x);
704
+
685
705
return x;
686
706
}
687
707
};
@@ -692,9 +712,11 @@ struct T5 : public GGMLBlock {
692
712
int64_t model_dim,
693
713
int64_t ff_dim,
694
714
int64_t num_heads,
695
- int64_t vocab_size) {
715
+ int64_t vocab_size,
716
+ int64_t projection_dim) {
696
717
blocks[" encoder" ] = std::shared_ptr<GGMLBlock>(new T5Stack (num_layers, model_dim, model_dim, ff_dim, num_heads));
697
718
blocks[" shared" ] = std::shared_ptr<GGMLBlock>(new Embedding (vocab_size, model_dim));
719
+ blocks[" final_projection" ] = std::shared_ptr<GGMLBlock>(new T5Projection (model_dim, projection_dim));
698
720
}
699
721
700
722
struct ggml_tensor * forward (struct ggml_context * ctx,
@@ -709,6 +731,9 @@ struct T5 : public GGMLBlock {
709
731
710
732
auto x = shared->forward (ctx, input_ids);
711
733
x = encoder->forward (ctx, x, past_bias, attention_mask, relative_position_bucket);
734
+
735
+ auto final_projection = std::dynamic_pointer_cast<T5Projection>(blocks[" final_projection" ]);
736
+ x = final_projection->forward (ctx, x);
712
737
return x;
713
738
}
714
739
};
@@ -720,12 +745,13 @@ struct T5Runner : public GGMLRunner {
720
745
T5Runner (ggml_backend_t backend,
721
746
std::map<std::string, enum ggml_type>& tensor_types,
722
747
const std::string prefix,
723
- int64_t num_layers = 24 ,
724
- int64_t model_dim = 4096 ,
725
- int64_t ff_dim = 10240 ,
726
- int64_t num_heads = 64 ,
727
- int64_t vocab_size = 32128 )
728
- : GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size) {
748
+ int64_t num_layers = 12 ,
749
+ int64_t model_dim = 768 ,
750
+ int64_t ff_dim = 2048 ,
751
+ int64_t num_heads = 12 ,
752
+ int64_t vocab_size = 32128 ,
753
+ int64_t projection_dim = 4096 )
754
+ : GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size, projection_dim) {
729
755
model.init (params_ctx, tensor_types, prefix);
730
756
}
731
757
@@ -861,12 +887,13 @@ struct T5Embedder {
861
887
T5Embedder (ggml_backend_t backend,
862
888
std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
863
889
const std::string prefix = " " ,
864
- int64_t num_layers = 24 ,
865
- int64_t model_dim = 4096 ,
866
- int64_t ff_dim = 10240 ,
867
- int64_t num_heads = 64 ,
868
- int64_t vocab_size = 32128 )
869
- : model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size) {
890
+ int64_t num_layers = 12 ,
891
+ int64_t model_dim = 768 ,
892
+ int64_t ff_dim = 2048 ,
893
+ int64_t num_heads = 12 ,
894
+ int64_t vocab_size = 32128 ,
895
+ int64_t projection_dim = 4096 )
896
+ : model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size, projection_dim) {
870
897
}
871
898
872
899
void get_param_tensors (std::map<std::string, struct ggml_tensor *>& tensors, const std::string prefix) {
@@ -983,4 +1010,4 @@ struct T5Embedder {
983
1010
}
984
1011
};
985
1012
986
- #endif // __T5_HPP__
1013
+ #endif // __T5_HPP__
0 commit comments