We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ce0442d commit bc278c8Copy full SHA for bc278c8
ggml-impl.h
@@ -18,7 +18,9 @@
18
#define MAX(a, b) ((a) > (b) ? (a) : (b))
19
20
/**
21
- * Google Brain 16-bit floating point number.
+ * Converts brain16 to float32.
22
+ *
23
+ * The bfloat16 floating point format has the following structure:
24
*
25
* ┌sign
26
* │
@@ -52,17 +54,8 @@
52
54
* │┌─┴─┐┌─┴──────┐
53
55
* 0b0000000000000000 IEEE binary16
56
- * So be warned that converting between them, destroys several bits.
- *
57
* @see IEEE 754-2008
58
*/
59
-struct ggml_bf16_s {
60
- uint16_t bits;
61
-};
62
-
63
-/**
64
- * Converts brain16 to float32.
65
- */
66
static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
67
union {
68
float f;
0 commit comments