Tensorflow 中来自 TFRecords 的训练分类器 [英] Training classifier from TFRecords in Tensorflow

查看:27
本文介绍了Tensorflow 中来自 TFRecords 的训练分类器的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我已经有一些代码可以从 numpy 数组中训练分类器.但是,我的训练数据集非常大.似乎推荐的解决方案是使用 TFRecords.我尝试将 TFRecords 与我自己的数据集一起使用失败了,所以我逐渐将我的代码减少到最小的玩具.

I already have some code which trains a classifier from numpy arrays. However, my training data set is very large. It seems the recommended solution is to use TFRecords. My attempts to use TFRecords with my own data set have failed, so I have gradually reduced my code to a minimal toy.

示例:

import tensorflow as tf

def readsingleexample(serialized):
    print("readsingleexample", serialized)
    feature = dict()
    feature['x'] = tf.FixedLenFeature([], tf.int64)
    feature['label'] = tf.FixedLenFeature([], tf.int64)
    parsed_example = tf.parse_single_example(serialized, features=feature)
    print(parsed_example)
    return parsed_example['x'], parsed_example['label']

def TestParse(filename):
    record_iterator=tf.python_io.tf_record_iterator(path=filename)
    for string_record in record_iterator:
        example=tf.train.Example()
        example.ParseFromString(string_record)
        print(example.features)

def TestRead(filename):
    record_iterator=tf.python_io.tf_record_iterator(path=filename)
    for string_record in record_iterator:
        feats, label = readsingleexample(string_record)
        print(feats, label)

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def TFRecordsTest(filename):

    example=tf.train.Example(features=tf.train.Features(feature={
        'x': _int64_feature(7),
        'label': _int64_feature(4)
        }))
    writer = tf.python_io.TFRecordWriter(filename)
    writer.write(example.SerializeToString())

    record_iterator=tf.python_io.tf_record_iterator(path=filename)
    for string_record in record_iterator:
        example=tf.train.Example()
        example.ParseFromString(string_record)
        print(example.features)

    dataset=tf.data.TFRecordDataset(filenames=[filename])
    dataset=dataset.map(readsingleexample)
    dataset=dataset.repeat()

    def train_input_fn():
        iterator=dataset.make_one_shot_iterator()
        feats_tensor, labels_tensor = iterator.get_next()
        return {"x":feats_tensor}, labels_tensor

    feature_columns = []
    feature_columns.append(tf.feature_column.numeric_column(key='x'))

    classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
                                      hidden_units=[10, 10, 10],
                                      n_classes=2)
    classifier.train(input_fn=train_input_fn, steps=1000)

    return

结果如下:

feature {
  key: "label"
  value {
    int64_list {
      value: 4
    }
  }
}
feature {
  key: "x"
  value {
    int64_list {
      value: 7
    }
  }
}

readsingleexample Tensor("arg0:0", shape=(), dtype=string)
{'x': <tf.Tensor 'ParseSingleExample/ParseSingleExample:1' shape=() dtype=int64>, 'label': <tf.Tensor 'ParseSingleExample/ParseSingleExample:0' shape=() dtype=int64>}
WARNING:tensorflow:Using temporary folder as model directory: C:\Users\eeark\AppData\Local\Temp\tmpcl47b2ut
Traceback (most recent call last):
  File "<pyshell#2>", line 1, in <module>
    tfrecords_test.TFRecordsTest(fn)
  File "C:\_P4\user_feindselig\_python\tfrecords_test.py", line 60, in TFRecordsTest
    classifier.train(input_fn=train_input_fn, steps=1000)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\estimator.py", line 352, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\estimator.py", line 812, in _train_model
    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\estimator.py", line 793, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\canned\dnn.py", line 354, in _model_fn
    config=config)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\canned\dnn.py", line 185, in _dnn_model_fn
    logits = logit_fn(features=features, mode=mode)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\canned\dnn.py", line 91, in dnn_logit_fn
    features=features, feature_columns=feature_columns)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 273, in input_layer
    trainable, cols_to_vars)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 198, in _internal_input_layer
    trainable=trainable)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 2080, in _get_dense_tensor
    return inputs.get(self)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 1883, in get
    transformed = column._transform_feature(self)  # pylint: disable=protected-access
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 2048, in _transform_feature
    input_tensor = inputs.get(self.key)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 1870, in get
    feature_tensor = self._get_raw_feature_as_tensor(key)
  File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 1924, in _get_raw_feature_as_tensor
    key, feature_tensor))
ValueError: Feature (key: x) cannot have rank 0. Give: Tensor("IteratorGetNext:0", shape=(), dtype=int64, device=/device:CPU:0)

错误是什么意思?可能出什么问题了?

What does the error mean? What could be going wrong?

推荐答案

以下似乎有效:至少没有出现错误.使用 tf.parse_example([serialized], ...) 代替 tf.parse_single_example(serialized, ...).(此外,合成数据中的标签被更改为小于类数.)

The following appears to work: no errors are raised, at least. tf.parse_example([serialized], ...) is used instead of tf.parse_single_example(serialized, ...). (Also, the label in the synthetic data was altered to be less than the number of classes.)

import tensorflow as tf

def readsingleexample(serialized):
    print("readsingleexample", serialized)
    feature = dict()
    feature['x'] = tf.FixedLenFeature([], tf.int64)
    feature['label'] = tf.FixedLenFeature([], tf.int64)
    parsed_example = tf.parse_example([serialized], features=feature)
    print(parsed_example)
    return parsed_example['x'], parsed_example['label']

def TestParse(filename):
    record_iterator=tf.python_io.tf_record_iterator(path=filename)
    for string_record in record_iterator:
        example=tf.train.Example()
        example.ParseFromString(string_record)
        print(example.features)

def TestRead(filename):
    record_iterator=tf.python_io.tf_record_iterator(path=filename)
    for string_record in record_iterator:
        feats, label = readsingleexample(string_record)
        print(feats, label)

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def TFRecordsTest(filename):

    example=tf.train.Example(features=tf.train.Features(feature={
        'x': _int64_feature(7),
        'label': _int64_feature(0)
        }))
    writer = tf.python_io.TFRecordWriter(filename)
    writer.write(example.SerializeToString())

    record_iterator=tf.python_io.tf_record_iterator(path=filename)
    for string_record in record_iterator:
        example=tf.train.Example()
        example.ParseFromString(string_record)
        print(example.features)

    dataset=tf.data.TFRecordDataset(filenames=[filename])
    dataset=dataset.map(readsingleexample)
    dataset=dataset.repeat()

    def train_input_fn():
        iterator=dataset.make_one_shot_iterator()
        feats_tensor, labels_tensor = iterator.get_next()
        return {'x':feats_tensor}, labels_tensor

    feature_columns = []
    feature_columns.append(tf.feature_column.numeric_column(key='x'))

    classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
                                      hidden_units=[10, 10, 10],
                                      n_classes=2)
    classifier.train(input_fn=train_input_fn, steps=1000)

    return

这篇关于Tensorflow 中来自 TFRecords 的训练分类器的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆