无符号浮点数的16位二进制表示 [英] 16 bit binary representation of unsigned float
本文介绍了无符号浮点数的16位二进制表示的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
伙计们
如何获得无符号浮点数的16位二进制表示;
14bit matinssa和2位指数。
I我知道IEEE 754的半精度但它是10位到5位的1位符号
hi guys;
how do get 16 bit binary representation of unsigned float;
14bit matinssa and 2bit exponent.
I am aware of the IEEE 754 half-precision but it is 10bit to 5 bit 1bit sign
推荐答案
之后的很多评论在所有解决方案的主题中,我得出的结论是,这不是问题的解决方案。
我保留它,因为它显示了构建自己的类并转换为前后的方法计算并且只*存储该特定格式的值。
[/ EDIT]
您可能会考虑使用所有的铃声和口哨声,如NaN和Inf处理,但计算所有的双打。例如。
After the many comments in all the solution's threads, I come to the conclusion that this is not a solution to the question.
I leave it nonetheless stand since it shows the approach to build your own class and convert forth and back for calculating and only *store* the value in that particular format.
[/EDIT]
You might consider to make a value type with all the bells and whistles like NaN and Inf handling but calculate all in doubles. E.g.
// encoding: no sign, exp = 2 bits, significand = 14 bits
// exp = 0: 0.significand * 2^0
// exp = 1..3: 1.significand * 2^(exp-1) --> bias = 1
// special values: 0x0000u = zero
// 0x0001u = NaN
// 0x0002u = Inf
// there are no negative exponents --> max number = 0xFFFFu --> (2-2^-14)*2^2 = 8-2^-12 = 7.99975586
// --> min number = 0x0003u --> (3*2^-14)*2^0 = 3*2^-14 = 0.00018311
struct F142
{
private UInt16 _raw;
private const UInt16 _zero = (UInt16)0x0000u;
private const UInt16 _naN = _zero + 1;
private const UInt16 _inf = _zero + 2;
private const UInt16 _min = _zero + 3;
private const UInt16 _max = (UInt16)0xFFFFu;
private const UInt16 _significandMask = (UInt16)0x3FFFu;
private const UInt16 _unit = _significandMask + 1;
private const double _dMin = 3.0 / _unit;
private const double _dMax = 8.0 - 8.0 / _unit;
private UInt16 Exp { get { return (UInt16)((_raw >> 14) & 0x3); } }
private void SetFromDouble(double d)
{
if (double.IsNaN(d)) _raw = _naN;
else if (double.IsPositiveInfinity(d)) _raw = _inf;
else if (double.IsNegativeInfinity(d)) _raw = _naN;
else if (d < 0.0) _raw = _naN;
else if (d > _dMax) _raw = _inf;
else if (d < _dMin) _raw = _zero;
else
{
_raw = _unit;
while (d >= 2.0 && _raw < 3*_unit)
{
_raw += _unit;
d /= 2.0;
}
if (d < 1.0) _raw = _zero;
else d -= 1.0;
_raw |= (UInt16)(d*_unit);
}
}
private F142(UInt16 raw) { _raw = raw; }
public F142(double d) { _raw = 0; SetFromDouble(d); }
public static readonly F142 Min = new F142(_min);
public static readonly F142 Max = new F142(_max);
public bool IsNaN { get { return _raw == _naN; } }
public bool IsInf { get { return _raw == _inf; } }
public bool IsZero { get { return _raw == _zero; } }
public static F142 FromDouble(double d) { return new F142(d); }
public double ToDouble()
{
if (IsNaN) return double.NaN;
if (IsInf) return double.PositiveInfinity;
if (IsZero) return 0.0;
double d = (_raw & _significandMask);
d /= _unit;
if (Exp > 0) d += 1.0;
for (UInt16 i = 1; i < Exp; ++i)
{
d *= 2.0;
}
return d;
}
}
使用如下:
Using like this:
F142 res = new F142(3.0/2.0);
F142 nan1 = new F142(0.0 / 0.0);
F142 nan2 = new F142(-5.0);
F142 inf1 = new F142(17.0);
F142 inf2 = new F142(17.0/0.0);
干杯
Andi
Cheers
Andi
经过修改,可以扩展到单位。
[/编辑]
以下内容适用于IEC62055-41无符号2位 - 指数14位尾数不动点值:
Modified such that it scales to units.
[/EDIT]
The following should work for a IEC62055-41 unsigned 2-bit-exponent 14-bit-mantissa fixed point value:
public enum As
{
Raw, // raw number - unit = 1.0
Energy, // kWh - unit = 0.1
Power, // W - unit = 1.0
Water, // m3 - unit = 0.1
Gas, // m3 - unit = 1.0
Time, // min - unit = 1.0
Currency, // local currency - unit = 0.00001
}
public struct FixU_2_14
{
private UInt16 _raw;
public UInt16 Bits { get { return _raw; } }
public double this[As unit]
{
get
{
double mantissa = _raw & 0x3FFF;
double n = 1.0;
double m = 0.0;
for (int i = (_raw >> 14) & 0x3; i > 0; --i)
{
n *= 10.0;
m *= 10;
m++;
}
double value = n * mantissa + m * 16384;
switch (unit)
{
case As.Energy:
case As.Water: return value / 10.0;
case As.Currency: return value / 10000.0;
default: return value;
}
}
}
public FixU_2_14(double value, As unit)
{
if (double.IsInfinity(value) || double.IsNaN(value))
{
throw new ArgumentOutOfRangeException("value", value.ToString());
}
switch (unit)
{
case As.Energy:
case As.Water: value *= 10; break;
case As.Currency: value *= 10000; break;
default: break;
}
if (value < 0.0 || value > 1111.0 * 16384.0 - 1000.0)
{
throw new ArgumentOutOfRangeException("value", value.ToString());
}
_raw = 0;
double n = 1.0;
double m = 0.0;
double max = 0.0;
UInt16 exp = 0;
while (true)
{
max = n * 16383 + m * 16384;
if (max >= value) break;
n *= 10;
m *= 10;
m++;
exp++;
}
value -= m * 16384;
value /= n;
_raw = (UInt16)((exp << 14) | (int)value & ((1 << 14) - 1));
}
}
一些用法:
static void WriteFix(double d, As unit)
{
string us = string.Format("{0}", "["+unit.ToString()+"]");
FixU_2_14 fp = new FixU_2_14(d, unit);
double dd = fp[unit];
Console.WriteLine("{0,9} {4,-10} -> 0x{1:x4} = {2,9} {4,-10} (Delta = {3,3})", d, fp.Bits, dd, d - dd, us);
}
...
WriteFix(0.0, As.Raw);
WriteFix(1.0, As.Water);
WriteFix(2.0, As.Currency);
WriteFix(25.6, As.Energy);
WriteFix(1638.3, As.Energy);
WriteFix(16384, As.Water);
WriteFix(18201624, As.Gas);
for (int i = 1; i <= 18; i++) WriteFix(1000000 * i, As.Gas);
这导致
0 [Raw] -> 0x0000 = 0 [Raw] (Delta = 0)
1 [Water] -> 0x000a = 1 [Water] (Delta = 0)
2 [Currency] -> 0x4169 = 1.9994 [Currency] (Delta = 0.000599999999999934)
25.6 [Energy] -> 0x0100 = 25.6 [Energy] (Delta = 0)
1638.3 [Energy] -> 0x3fff = 1638.3 [Energy] (Delta = 0)
16384 [Water] -> 0x7999 = 16383.4 [Water] (Delta = 0.600000000000364)
18201624 [Gas] -> 0xffff = 18201624 [Gas] (Delta = 0)
1000000 [Gas] -> 0xa005 = 999924 [Gas] (Delta = 76)
2000000 [Gas] -> 0xc0b5 = 1999624 [Gas] (Delta = 376)
3000000 [Gas] -> 0xc49d = 2999624 [Gas] (Delta = 376)
4000000 [Gas] -> 0xc885 = 3999624 [Gas] (Delta = 376)
5000000 [Gas] -> 0xcc6d = 4999624 [Gas] (Delta = 376)
6000000 [Gas] -> 0xd055 = 5999624 [Gas] (Delta = 376)
7000000 [Gas] -> 0xd43d = 6999624 [Gas] (Delta = 376)
8000000 [Gas] -> 0xd825 = 7999624 [Gas] (Delta = 376)
9000000 [Gas] -> 0xdc0d = 8999624 [Gas] (Delta = 376)
10000000 [Gas] -> 0xdff5 = 9999624 [Gas] (Delta = 376)
11000000 [Gas] -> 0xe3dd = 10999624 [Gas] (Delta = 376)
12000000 [Gas] -> 0xe7c5 = 11999624 [Gas] (Delta = 376)
13000000 [Gas] -> 0xebad = 12999624 [Gas] (Delta = 376)
14000000 [Gas] -> 0xef95 = 13999624 [Gas] (Delta = 376)
15000000 [Gas] -> 0xf37d = 14999624 [Gas] (Delta = 376)
16000000 [Gas] -> 0xf765 = 15999624 [Gas] (Delta = 376)
17000000 [Gas] -> 0xfb4d = 16999624 [Gas] (Delta = 376)
18000000 [Gas] -> 0xff35 = 17999624 [Gas] (Delta = 376)
干杯
Andi
Cheers
Andi
两位指数?
你确定吗?
这不仅仅是10 ^ 0到10 ^ 3的范围很大。
AFAIK那里使用该解决方案不是浮点数的标准实现:您必须自己实现它。
Two bit exponent?
Are you sure?
That's not a lot of range 10^0 to 10^3 only.
AFAIK there is no "standard" implementation of a float using that resolution: you will have to implement it yourself.
这篇关于无符号浮点数的16位二进制表示的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文