-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathProgram.cs
178 lines (142 loc) · 4.8 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
using GGMLSharp;
using System;
using static GGMLSharp.Structs;
namespace SimpleBackend
{
internal class Program
{
static void Main(string[] args)
{
// initialize data of matrices to perform matrix multiplication
const int rows_A = 4, cols_A = 2;
float[] matrix_A = new float[rows_A * cols_A]
{
2, 8,
5, 1,
4, 2,
8, 6
};
const int rows_B = 3, cols_B = 2;
/* Transpose([
10, 9, 5,
5, 9, 4
]) 2 rows, 3 cols */
float[] matrix_B = new float[rows_B * cols_B]
{
10, 5,
9, 9,
5, 4
};
SimpleModel model = LoadModel(matrix_A, matrix_B, rows_A, cols_A, rows_B, cols_B);
// calculate the temporaly memory required to compute
SafeGGmlGraphAllocr allocr = new SafeGGmlGraphAllocr(model.backend.GetDefaultBufferType());
// create the worst case graph for memory usage estimation
SafeGGmlGraph gf = BuildGraph(model);
gf.Reserve(allocr);
ulong mem_size = allocr.GetBufferSize(0);
Console.WriteLine($"compute buffer size: {mem_size / 1024.0} KB");
// perform computation
SafeGGmlTensor result = Compute(model, allocr);
// bring the data from the backend memory
//Native.ggml_backend_tensor_get(result, Marshal.UnsafeAddrOfPinnedArrayElement(out_data, 0), 0, Native.ggml_nbytes(result));
byte[] backendBytes = result.GetBackend();
// create a array to print result
float[] out_data = DataConverter.ConvertToFloats(backendBytes);
// expected result:
// [ 60.00 110.00 54.00 29.00
// 55.00 90.00 126.00 28.00
// 50.00 54.00 42.00 64.00 ]
Console.WriteLine($"mul mat ({(int)result.Shape[0]} x {(int)result.Shape[1]}) (transposed result):[");
for (int j = 0; j < result.Shape[1] /* rows */; j++)
{
if (j > 0)
{
Console.WriteLine();
}
for (int i = 0; i < result.Shape[0] /* cols */; i++)
{
Console.Write($" {out_data[i * result.Shape[1] + j]}");
}
}
Console.WriteLine(" ]");
// release backend memory used for computation
allocr.Free();
// free memory
model.ctx.Free();
// release backend memory and free backend
//model.buffer.Free();
//Native.ggml_backend_buffer_free(model.buffer);
model.backend.Free();
Console.WriteLine("Done");
Console.ReadKey();
}
class SimpleModel
{
public SafeGGmlTensor a;
public SafeGGmlTensor b;
// the backend to perform the computation (CPU, CUDA, METAL)
public SafeGGmlBackend backend = null;
// the backend buffer to storage the tensors data of a and b
public SafeGGmlBackendBuffer buffer;
// the context to define the tensor information (dimensions, size, memory address)
public SafeGGmlContext ctx;
};
static SimpleModel LoadModel(float[] a, float[] b, int rows_A, int cols_A, int rows_B, int cols_B)
{
// initialize the backend
SimpleModel model = new SimpleModel();
if (SafeGGmlBackend.HasCuda)
{
model.backend = SafeGGmlBackend.CudaInit(); // init device 0
}
else
{
model.backend = SafeGGmlBackend.CpuInit();
}
if (model.backend == null)
{
Console.WriteLine("ggml_backend_cuda_init() failed.");
Console.WriteLine("we while use ggml_backend_cpu_init() instead.");
// if there aren't GPU Backends fallback to CPU backend
model.backend = SafeGGmlBackend.CpuInit();
}
// create context
model.ctx = new SafeGGmlContext(IntPtr.Zero, NoAllocateMemory: true);
// create tensors
model.a = model.ctx.NewTensor2d(Structs.GGmlType.GGML_TYPE_F32, cols_A, rows_A);
model.b = model.ctx.NewTensor2d(Structs.GGmlType.GGML_TYPE_F32, cols_B, rows_B);
// create a backend buffer (backend memory) and alloc the tensors from the context
model.buffer = model.ctx.BackendAllocContextTensors(model.backend);
// load data from cpu memory to backend buffer
model.a.SetBackend(a);
model.b.SetBackend(b);
return model;
}
static SafeGGmlGraph BuildGraph(SimpleModel model)
{
ulong buf_size = Common.TensorOverheadLength * GGML_DEFAULT_GRAPH_SIZE + Common.GraphOverheadLength;
byte[] buffer = new byte[buf_size];
// create a temporally context to build the graph
SafeGGmlContext ctx0 = new SafeGGmlContext(buffer, true);
SafeGGmlGraph gf = ctx0.NewGraph();
// result = a*b^T
SafeGGmlTensor result = ctx0.MulMat(model.a, model.b);
// build operations nodes
gf.BuildForwardExpend(result);
// delete the temporally context used to build the graph
ctx0.Free();
return gf;
}
// compute with backend
static SafeGGmlTensor Compute(SimpleModel model, SafeGGmlGraphAllocr allocr)
{
// reset the allocator to free all the memory allocated during the previous inference
SafeGGmlGraph gf = BuildGraph(model);
// allocate tensors
gf.GraphAllocate(allocr);
gf.BackendCompute(model.backend);
// in this case, the output tensor is the last one in the graph
return gf.Nodes[gf.NodeCount - 1];
}
}
}