如何去序列化一个大的,复杂的对象? [英] How to go about serializing a large, complex object?

查看:213
本文介绍了如何去序列化一个大的,复杂的对象?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有一个用户类,有40多个私有变量,包括复杂的对象,如私钥/公钥(QCA库),自定义QObject等。类有一个称为 sign()的函数,该函数加密,签名,序列化自身并返回一个 QByteArray 在SQLite blob。



序列化复杂对象的最佳方法是什么?使用 QMetaObject 迭代属性?将它转换为protobuf对象?



是否可以转换为字符数组?

解决方案


是否可以转换为字符数组?


c> QObject 的内部构件,你不知道,第二次运行程序时无效的指针等。



TL ; DR:对于显式数据元素,手动实现它,并且利用用于 QObject Q_GADGET 课程将帮助一些苦差事。



最简单的解决方案可能是为对象和你使用的类型实现 QDataStream 运算符。确保遵循良好的做法:每个类都可以改变它所拥有的数据格式必须发出格式标识符。



例如,让我们来看下面的类:

  class User {
QString m_name;
QList< CryptoKey> m_keys;
QList< Address> m_addresses;
QObject m_props;
...
friend QDataStream& operator<<(QDataStream& const User&);
friend QDataStream& operator>>(QDataStream& User,amp;);
public:
...
};
Q_DECLARE_METATYPE(User)//没有分号

类地址{
QString m_line1;
QString m_line2;
QString m_postCode;
...
friend QDataStream& operator<<(QDataStream& const address&);
friend QDataStream& operator>>(QDataStream& Address;&;);
public:
...
};
Q_DECLARE_METATYPE(Address)//无分号!

Q_DECLARE_METATYPE QVariant QMetaType 类型系统。因此,例如,可以将地址分配给 QVariant ,将这样的 QVariant 地址,将变体直接流式传输到数据流等。



首先,让我们解释如何转储 QObject 属性:

  QList< QByteArray> publicNames(QList< QByteArray> names){
names.erase(std :: remove_if(names.begin(),names.end(),
[](const QByteArray& v){return v .startsWith(_ q_);}),names.end());
返回名称;
}

bool isDumpable(const QMetaProperty& prop){
return prop.isStored()&& !prop.isConstant()&& prop.isReadable()&& prop.isWritable();
}

void dumpProperties(QDataStream& s,const QObject& obj)
{
s< quint8(0); // format
QList< QByteArray> names = publicNames(obj.dynamicPropertyNames());
s<<名称;
for(name:names)s<< obj.property(name);
auto mObj = obj.metaObject();
for(int i = 0; i auto prop = mObj- property(i);
if(!isDumpable(prop))continue;
auto name = QByteArray :: fromRawData(prop.name(),strlen(prop.name());
if(!name.isEmpty())s<< name< prop.read(& obj);
}
s<< QByteArray();
}

一般来说,如果我们处理来自 User 的数据,但没有 m_props 成员,我们需要能够清除属性。每次扩展存储对象并升级序列化格式时,这个习语都会出现。

  void clearProperties(QObject& obj)
{
auto names = publicNames(obj.dynamicPropertyNames());
const QVariant null;
for(name:names)obj.setProperty(name,null);
auto const mObj = obj.metaObject();
for(int i = 0; i< mObj-> property(),++ i){
auto prop = mObj-> property(i);
if(!isDumpable(prop))continue;
if(prop.isResettable ){
prop.reset(& obj);
continue;
}
prop.write(& obj,null);
}
}

现在我们知道如何从流中恢复属性:

  void loadProperties(QDataStream& s,QObject& obj)
{
quint8 format;
s>>格式;
//我们目前只支持一种格式。
QList< QByteArray>名称;
s>>名称;
for(name:names){
QVariant val;
s>> val;
obj.setProperty(name,val);
}
auto const mObj = obj.metaObject();
forever {
QByteArray name;
s>>名称;
if(name.isEmpty())break;
QVariant value;
s>>值;
int idx = mObj-> indexOfProperty(name);
if(idx <0)continue;
auto prop = mObj-> property(idx);
if(!isDumpable(prop))continue;
prop.write(& obj,value);
}
}



我们可以实现流运算符来序列化我们的对象:

  #define fallthrough 

QDataStream& operator<<(QDataStream& s,const User& user){
s< quint8(1)// format
<< user.m_name<< user.m_keys<< user.m_addresses;
dumpProperties(s,& m_props);
return s;
}

QDataStream& operator>>(QDataStream& s,User& user){
quint8 format;
s>>格式;
switch(format){
case 0:
s>> user.m_name>> user.m_keys;
user.m_addresses.clear();
clearProperties(& user.m_props);
fallthrough;
case 1:
s>> user.m_addresses;
loadProperties(& user.m_props);
break;
}
return s;
}

QDataStream& operator<<(QDataStream& s,const Address& address){
s< quint8(0)// format
<< address.m_line1<< address.m_line2<< address.m_postCode;
return s;
}

QDataStream& operator>>(QDataStream& s,Address& address){
quint8 format;
s>>格式;
switch(format){
case 0:
s>> address.m_line1>> address.m_line2>> address.m_postCode;
break;
}
return s;
}

属性系统也适用于任何其他类,其属性,并添加 Q_GADGET 宏(而不是 Q_OBJECT )。



假设我们声明了 Address 类,如下所示:



类地址{
Q_GADGET
Q_PROPERTY(QString line1 MEMBER m_line1)
Q_PROPERTY(QString line2 MEMBER m_line2)
Q_PROPERTY(QString postCode MEMBER m_postCode)

QString m_line1;
QString m_line2;
QString m_postCode;
...
friend QDataStream& operator<<(QDataStream& const address&);
friend QDataStream& operator>>(QDataStream& Address;&;);
public:
...
};

然后让我们以 [dump | clear | load ]属性修改为处理小工具:

  QDataStream& operator<<(QDataStream& s,const Address& address){
s< quint8(0); // format
dumpProperties(s,& address);
return s;
}

QDataStream& operator>>(QDataStream& s,Address& address){
quint8 format;
s>>格式;
loadProperties(s,& address);
return s;
}



我们不需要更改格式指示符,即使属性集已经改变。我们应该保留格式指示符,以防我们有其他更改不能被表示为一个简单的属性转储。这在大多数情况下是不可能的,但是必须记住,不使用格式说明符的决定立即设置流数据的格式。



最后,属性处理程序是稍微裁剪和修改的变体,用于 QObject 属性:

  template< typename T> void dumpProperties(QDataStream& s,const T * gadget){
dumpProperties(s,T :: staticMetaObject,gadget);
}

void dumpProperties(QDataStream& s,const QMetaObject& mObj,const void * gadget)
{
s< quint8(0); // format
for(int i = 0; i< mObj.propertyCount(),++ i){
auto prop = mObj.property(i);
if(!isDumpable(prop))continue;
auto name = QByteArray :: fromRawData(prop.name(),strlen(prop.name());
if(!name.isEmpty())s<< name< prop.readOnGadget(gadget);
}
s<< QByteArray();
}

template< typename T> void clearProperties(T * gadget) {
clearProperties(T :: staticMetaObject,gadget);
}

void clearProperties(const QMetaObject& mObj,void * gadget)
{
const QVariant null;
for(int i = 0; i auto prop = mObj.property(i);
if(! isDumpable(prop))continue;
if(prop.isResettable()){
prop.resetOnGadget(gadget);
continue;
}
prop.writeOnGadget ;
}
}

模板< typename T> void loadProperties(QDataStream& s,T * gadget){
loadProperties(s, T :: staticMetaObject,gadget);
}

void loadProperties(QDataStream& s,const QMetaObject& mObj,void * gadget)
{
quint8 format ;
s>>格式;
forever {
QByteArray name;
s>>名称;
if(name.isEmpty())break;
QVariant value;
s>>值;
auto index = mObj.indexOfProperty(name);
if(index< 0)continue;
auto prop = mObj.property(index);
if(!isDumpable(prop))continue;
prop.writeOnGadget(gadget,value);
}
}

TODO loadProperties 实现中没有解决的是清除对象中存在但不存在于序列化中的属性。



当涉及到 QDataStream 格式的内部版本时,确定整个数据流的版本是非常重要的。必须阅读文档



还必须决定如何在软件版本之间处理兼容性。有几种方法:


  1. (最典型且不幸的)没有兼容性:不存储格式信息。新成员以ad-hoc方式添加到序列化。较旧版本的软件在面对较新的数据时将呈现未定义的行为。


  2. 向后兼容性:格式信息存储在每个自定义类型的序列化中。新版本可以正常处理旧版本的数据。旧版本必须检测未处理的格式,中止反序列化,并向用户指示错误。


  3. 完全向后和向前兼容性:每个序列化的自定义类型都存储在 QByteArray 或类似的容器。通过这样做,您可以了解整个类型的数据记录有多长时间。 QDataStream 版本必须是固定的。要读取自定义类型,首先读取其字节数组,然后设置一个 QBuffer ,使用 QDataStream 读取。您读取您能够以您知道的格式解析的元素,并忽略其余数据。这就强制了格式的增量方法,其中较新的格式只能通过现有格式附加元素。但是,如果较新的格式放弃了旧格式的一些数据元素,它仍然必须转储它,但是使用null或其他安全的默认值,保持您的代码的旧版本开心。


如果您认为格式字节可能已用完,您可以使用可变长度编码方案,称为扩展或扩展字节各种国际电联标准(例如 Q.931 4.5 .5承载能力信息单元)。想法如下:八位字节(字节)的最高位用于指示该值是否需要更多的八位字节来表示。这使得该字节具有表示值的7位和用于标记扩展的1位。如果该位置1,则读取随后的八位字节,并以小端方式将它们连接到现有值。下面是如何做到这一点:

  class VarLengthInt {
public:
quint64 val;
VarLengthInt(quint64v):val(v){Q_ASSERT(v <(1ULL<<(7 * 8))) }
operator quint64()const {return val; }
};

QDataStream&运算符<<<(QDataStream& s,VarLengthInt v){
while(v.val> 127){
s& (quint8)((v& 0x7F)| 0x80);
v.val = v.val>> 7;
}
Q_ASSERT(v.val< = 127);
s<< (quint8)v.val;
return s;
}

QDataStream& operator>>(QDataStream& s,VarLengthInt& v){
v.val = 0;
forever {
quint8 octet;
s>>八位字节;
v.val =(v.val<< 7)| (octet& 0x7F);
if(!(octet& 0x80))break;
}
return s;
}

VarLengthInt 具有可变长度,并且始终使用给定值可能的最小字节数:1字节直到0x7F,2字节直到0x3FFF,3字节直到0x1F'FFFF,4字节直到0x0FFF'FFFF等。单引号在 C ++ 14整数文字中有效。



它将使用如下:

  QDataStream& operator<<(QDataStream& s,const User& user){
s< VarLengthInt(1)// format
<< user.m_name<< user.m_keys<< user.m_addresses;
dumpProperties(s,& m_props);
return s;
}

QDataStream& operator>>(QDataStream& s,User& user){
VarLengthInt format;
s>>格式;
...
return s;
}


I have a "User" class with 40+ private variables including complex objects like private/public keys (QCA library), custom QObjects etc. The idea is that the class has a function called sign() which encrypts, signs, serializes itself and returns a QByteArray which can then be stored in a SQLite blob.

What's the best approach to serialize a complex object? Iterating though the properties with QMetaObject? Converting it to a protobuf object?

Could it be casted to a char array?

解决方案

Could it be casted to a char array?

No, because you'd be casting QObject's internals that you know nothing about, pointers that are not valid the second time you run your program, etc.

TL;DR: Implementing it manually is OK for explicit data elements, and leveraging metaobject system for QObject and Q_GADGET classes will help some of the drudgery.

The simplest solution might be to implement QDataStream operators for the object and the types you use. Make sure to follow good practice: each class that could conceivably ever change the format of data it holds must emit a format identifier.

For example, let's take the following classes:

class User {
  QString m_name;
  QList<CryptoKey> m_keys;
  QList<Address> m_addresses;
  QObject m_props;
  ...
  friend QDataStream & operator<<(QDataStream &, const User &);
  friend QDataStream & operator>>(QDataStream &, User &);
public:
  ...
};
Q_DECLARE_METATYPE(User) // no semi-colon

class Address {
  QString m_line1;
  QString m_line2;
  QString m_postCode;
  ...
  friend QDataStream & operator<<(QDataStream &, const Address &);
  friend QDataStream & operator>>(QDataStream &, Address &);
public:
  ...
};
Q_DECLARE_METATYPE(Address) // no semi-colon!

The Q_DECLARE_METATYPE macro makes the classes known to the QVariant and the QMetaType type system. Thus, for example, it's possible to assign an Address to a QVariant, convert such a QVariant to Address, to stream the variant directly to a datastream, etc.

First, let's address how to dump the QObject properties:

QList<QByteArray> publicNames(QList<QByteArray> names) {
  names.erase(std::remove_if(names.begin(), names.end(),
              [](const QByteArray & v){ return v.startsWith("_q_"); }), names.end());
  return names;
}

bool isDumpable(const QMetaProperty & prop) {
  return prop.isStored() && !prop.isConstant() && prop.isReadable() && prop.isWritable();
}

void dumpProperties(QDataStream & s, const QObject & obj)
{
  s << quint8(0); // format
  QList<QByteArray> names = publicNames(obj.dynamicPropertyNames());
  s << names;
  for (name : names) s << obj.property(name);
  auto mObj = obj.metaObject();
  for (int i = 0; i < mObj->propertyCount(), ++i) {
    auto prop = mObj->property(i);
    if (! isDumpable(prop)) continue;
    auto name = QByteArray::fromRawData(prop.name(), strlen(prop.name());
    if (! name.isEmpty()) s << name << prop.read(&obj);
  }
  s << QByteArray();
}

In general, if we were to deal with data from a User that didn't have the m_props member, we'd need to be able to clear the properties. This idiom will come up every time you extend the stored object and upgrade the serialization format.

void clearProperties(QObject & obj)
{
  auto names = publicNames(obj.dynamicPropertyNames());
  const QVariant null;
  for (name : names) obj.setProperty(name, null);
  auto const mObj = obj.metaObject();
  for (int i = 0; i < mObj->propertyCount(), ++i) {
    auto prop = mObj->property(i);
    if (! isDumpable(prop)) continue;
    if (prop.isResettable()) {
      prop.reset(&obj);
      continue;
    }
    prop.write(&obj, null);
  }
}

Now we know how to restore the properties from a stream:

void loadProperties(QDataStream & s, QObject & obj)
{
  quint8 format;
  s >> format;
  // We only support one format at the moment.
  QList<QByteArray> names;
  s >> names;
  for (name : names) {
    QVariant val;
    s >> val;
    obj.setProperty(name, val);
  }
  auto const mObj = obj.metaObject();
  forever {
    QByteArray name;
    s >> name;
    if (name.isEmpty()) break;
    QVariant value;    
    s >> value;
    int idx = mObj->indexOfProperty(name);
    if (idx < 0) continue;
    auto prop = mObj->property(idx);
    if (! isDumpable(prop)) continue;
    prop.write(&obj, value);
  }
}

We can thus implement the stream operators to serialize our objects:

#define fallthrough

QDataStream & operator<<(QDataStream & s, const User & user) {
  s << quint8(1) // format
    << user.m_name << user.m_keys << user.m_addresses;
  dumpProperties(s, &m_props);
  return s;
}

QDataStream & operator>>(QDataStream & s, User & user) {
  quint8 format;
  s >> format;
  switch (format) {
  case 0:
    s >> user.m_name >> user.m_keys;
    user.m_addresses.clear();
    clearProperties(&user.m_props);
    fallthrough;
  case 1:
    s >> user.m_addresses;
    loadProperties(&user.m_props);
    break;
  }
  return s;
}

QDataStream & operator<<(QDataStream & s, const Address & address) {
  s << quint8(0) // format
    << address.m_line1 << address.m_line2 << address.m_postCode;
  return s;
}

QDataStream & operator>>(QDataStream & s, Address & address) {
  quint8 format;
  s >> format;
  switch (format) {
  case 0:
    s >> address.m_line1 >> address.m_line2 >> address.m_postCode;
    break;
  }
  return s;
}

The property system will also work for any other class, as long as you declare its properties and add the Q_GADGET macro (instead of Q_OBJECT). This is supported from Qt 5.5 onwards.

Suppose that we declared our Address class as follows:

class Address {
  Q_GADGET
  Q_PROPERTY(QString line1 MEMBER m_line1)
  Q_PROPERTY(QString line2 MEMBER m_line2)
  Q_PROPERTY(QString postCode MEMBER m_postCode)

  QString m_line1;
  QString m_line2;
  QString m_postCode;
  ...
  friend QDataStream & operator<<(QDataStream &, const Address &);
  friend QDataStream & operator>>(QDataStream &, Address &);
public:
  ...
};

Let's then declare the datastream operators in terms of [dump|clear|load]Properties modified for dealing with gadgets:

QDataStream & operator<<(QDataStream & s, const Address & address) {
  s << quint8(0); // format
  dumpProperties(s, &address);
  return s;
}

QDataStream & operator>>(QDataStream & s, Address & address) {
  quint8 format;
  s >> format;
  loadProperties(s, &address);
  return s;
}

We do not need to change the format designator even if the property set has been changed. We should retain the format designator in case we had other changes that couldn't be expressed as a simple property dump anymore. This is unlikely in most cases, but one must remember that a decision not to use a format specifier immediately sets the format of the streamed data in stone. It's not subsequently possible to change it!

Finally, the property handlers are slightly cut-down and modified variants of the ones used for the QObject properties:

template <typename T> void dumpProperties(QDataStream & s, const T * gadget) {
  dumpProperties(s, T::staticMetaObject, gadget);
}

void dumpProperties(QDataStream & s, const QMetaObject & mObj, const void * gadget)
{
  s << quint8(0); // format
  for (int i = 0; i < mObj.propertyCount(), ++i) {
    auto prop = mObj.property(i);
    if (! isDumpable(prop)) continue;
    auto name = QByteArray::fromRawData(prop.name(), strlen(prop.name());
    if (! name.isEmpty()) s << name << prop.readOnGadget(gadget);
  }
  s << QByteArray();
}

template <typename T> void clearProperties(T * gadget) {
  clearProperties(T::staticMetaObject, gadget);
}

void clearProperties(const QMetaObject & mObj, void * gadget)
{
  const QVariant null;
  for (int i = 0; i < mObj.propertyCount(), ++i) {
    auto prop = mObj.property(i);
    if (! isDumpable(prop)) continue;
    if (prop.isResettable()) {
      prop.resetOnGadget(gadget);
      continue;
    }
    prop.writeOnGadget(gadget, null);
  }
}

template <typename T> void loadProperties(QDataStream & s, T * gadget) {
  loadProperties(s, T::staticMetaObject, gadget);
}

void loadProperties(QDataStream & s, const QMetaObject & mObj, void * gadget)
{
  quint8 format;
  s >> format;
  forever {
    QByteArray name;
    s >> name;
    if (name.isEmpty()) break;
    QVariant value;    
    s >> value;
    auto index = mObj.indexOfProperty(name);
    if (index < 0) continue;
    auto prop = mObj.property(index);
    if (! isDumpable(prop)) continue;
    prop.writeOnGadget(gadget, value);
  }
}

TODO An issue that was not addressed in the loadProperties implementations is to clear the properties that are present in the object but not present in the serialization.

It is very important to establish how the entire data stream is versioned when it comes to the internal version of QDataStream formats. The documentation is a required reading.

One also has to decide how is the compatibility handled between the versions of the software. There are several approaches:

  1. (Most typical and unfortunate) No compatiblity: No format information is stored. New members are added to the serialization in an ad-hoc fashion. Older versions of the software will exhibit undefined behavior when faced with newer data. Newer versions will do the same with older data.

  2. Backward compatibility: Format information is stored in the serialization of each custom type. New versions can properly deal with older versions of the data. Older versions must detect an unhandled format, abort deserialization, and indicate an error to the user. Ignoring newer formats leads to undefined behavior.

  3. Full backward-and-forward compatibility: Each serialized custom type is stored in a QByteArray or a similar container. By doing this, you have information on how long the data record for the entire type is. The QDataStream version must be fixed. To read a custom type, its byte array is read first, then a QBuffer is set up that you use a QDataStream to read from. You read the elements you can parse in the formats you know of, and ignore the rest of the data. This forces an incremental approach to formats, where a newer format can only append elements over an existing format. But, if a newer format abandons some data element from an older format, it must still dump it, but with a null or otherwise safe default value that keeps the older versions of your code "happy".

If you think that the format bytes may ever run out, you can employ a variable-length encoding scheme, known as extension or extended octets, familiar across various ITU standards (e.g. Q.931 4.5.5 Bearer Capability information element). The idea is as follows: the highest bit of an octet (byte) is used to indicate whether the value needs more octets for representation. This makes the byte have 7 bits to represent the value, and 1 bit to mark extension. If the bit is set, you read the subsequent octets and concatenate them in little-endian fashion to the existing value. Here is how you might do this:

class VarLengthInt {
public:
  quint64 val;
  VarLengthInt(quint64 v) : val(v) { Q_ASSERT(v < (1ULL<<(7*8))); }
  operator quint64() const { return val; }
};

QDataStream & operator<<(QDataStream & s, VarLengthInt v) {
  while (v.val > 127) {
    s << (quint8)((v & 0x7F) | 0x80);
    v.val = v.val >> 7;
  }
  Q_ASSERT(v.val <= 127);
  s << (quint8)v.val;
  return s;
}

QDataStream & operator>>(QDataStream & s, VarLengthInt & v) {
  v.val = 0;
  forever {
    quint8 octet;
    s >> octet;
    v.val = (v.val << 7) | (octet & 0x7F);
    if (! (octet & 0x80)) break;
  }
  return s;
}

The serialization of VarLengthInt has variable length and always uses the minimum number of bytes possible for a given value: 1 byte up to 0x7F, 2 bytes up to 0x3FFF, 3 bytes up to 0x1F'FFFF, 4 bytes up to 0x0FFF'FFFF, etc. Apostrophes are valid in C++14 integer literals.

It would be used as follows:

QDataStream & operator<<(QDataStream & s, const User & user) {
  s << VarLengthInt(1) // format
    << user.m_name << user.m_keys << user.m_addresses;
  dumpProperties(s, &m_props);
  return s;
}

QDataStream & operator>>(QDataStream & s, User & user) {
  VarLengthInt format;
  s >> format;
  ...
  return s;
}

这篇关于如何去序列化一个大的,复杂的对象?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆