Windows 10中的keras-gpu,tensorflow,cudatoolkit和cuDNN之间的最新版本兼容性是什么? [英] What are current version compatibility between keras-gpu, tensorflow, cudatoolkit, and cuDNN in windows 10?
问题描述
这些是我当前通过Windows 10 64位计算机上的anaconda安装的版本,如下所示:
These are the versions I currently have installed via anaconda in a windows 10 64-bit machine as shown here:
这是简单卷积神经网络训练".我正在尝试跑步:
This is the "simple convolutional neural network training" I am trying to run:
import numpy as np
import tensorflow as tf
# gpus= tf.config.experimental.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(gpus[0], True)
import keras as k
tf.config.experimental.set_memory_growth(gpus[0], True)
from keras.optimizers import SGD, Adam, Nadam
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
data_dir = 'C:\\Users\\shikh\\Mark_Everett_Collaboration\\Data\\Washington-On-Brazos\\Processing_For_ML\\SyntheticDataGeneration\\RegularMesh\\Data_Files\\'
data_file = 'Weighted_Data_12K.npy'
# model_file = 'Weighted_Models_12K.npy'
model_file = 'models_rand_cubes12k.npy'
X_raw = np.load(data_dir + data_file); # y_raw = (np.load(data_dir + model_file))
y_raw = (np.load(data_dir + model_file))*47000.
X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_raw, y_raw, test_size=0.2, random_state = 123)
# ssf = StandardScaler()
# ssm = StandardScaler()
ssf = MinMaxScaler()
ssm = MinMaxScaler()
X_tr_ss = ssf.fit_transform(X_train_raw)
X_tst_ss = ssf.transform(X_test_raw)
y_tr_ss = ssm.fit_transform(y_train_raw)
y_tst_ss = ssm.transform(y_test_raw)
n_samp_tr = X_tr_ss.shape[0]; n_samp_tst = X_tst_ss.shape[0]
n_col = X_tr_ss.shape[1]; n_out = y_tr_ss.shape[1]
X_tr_cnn = X_tr_ss.reshape(n_samp_tr, n_col, 1); X_tst_cnn = X_tst_ss.reshape(n_samp_tst, n_col, 1)
y_tr_cnn = y_tr_ss.reshape(n_samp_tr, n_out, 1); y_tst_cnn = y_tst_ss.reshape(n_samp_tst, n_out, 1)
com_name = 'cnn'
in_lyr = k.layers.Input(shape=(n_col,1), name = 'inpt_'+com_name)
re_lyr = k.layers.Conv1D(16, 9, padding = 'same', name = 'conv_1_'+com_name)(in_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.Conv1D(16, 9, padding = 'same', name = 'conv_2_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.MaxPooling1D(pool_size = 2, strides = 2, padding = 'same')(re_lyr)
re_lyr = k.layers.Dropout(0.1)(re_lyr)
re_lyr = k.layers.Conv1D(32, 9, padding = 'same', name = 'conv_3_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.Conv1D(32, 9, padding = 'same', name = 'conv_4_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.MaxPooling1D(pool_size = 2, strides = 2, padding = 'same')(re_lyr)
re_lyr = k.layers.Dropout(0.1)(re_lyr)
re_lyr = k.layers.Conv1D(64, 9, padding = 'same', name = 'conv_5_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.Conv1D(64, 9, padding = 'same', name = 'conv_6_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.UpSampling1D(size = 2)(re_lyr)
re_lyr = k.layers.Dropout(0.1)(re_lyr)
re_lyr = k.layers.Conv1D(32, 9, padding = 'same', name = 'conv_7_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.Conv1D(32, 9, padding = 'same', name = 'conv_8_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.UpSampling1D(size = 2)(re_lyr)
re_lyr = k.layers.Cropping1D(cropping = (1,1))(re_lyr)
re_lyr = k.layers.Dropout(0.1)(re_lyr)
re_lyr = k.layers.Conv1D(16, 9, padding = 'same', name = 'conv_9_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
re_lyr = k.layers.Conv1D(16, 9, padding = 'same', name = 'conv_10_'+com_name)(re_lyr)
re_lyr = k.layers.LeakyReLU(alpha=0.1)(re_lyr)
re_lyr = k.layers.BatchNormalization()(re_lyr)
out_lyr = k.layers.Conv1D(1, 1, name = 'out_'+com_name)(re_lyr)
model_cnn = k.models.Model(inputs=[in_lyr], outputs=[out_lyr])
# k.utils.plot_model(model_cnn, show_shapes=True)
# model_cnn.summary()
model_cnn.compile(optimizer='adam', loss='mse')
def fit_pred_nd(m_obj, esm, mcps, n_epch, bt_sz, A_train_ss, b_train_ss, A_test_ss, b_test_ss, sm):
ht = m_obj.fit(A_train_ss, b_train_ss, validation_data = [A_test_ss, b_test_ss], callbacks = [esm, mcps], epochs = n_epch, batch_size = bt_sz, verbose = 1)
b_tr_pr_ss_nd = m_obj.predict(A_train_ss); b_tst_pr_ss_nd = m_obj.predict(A_test_ss)
return ht, b_tr_pr_ss_nd, b_tst_pr_ss_nd
def plt_train_results(b_tr_rw, b_tst_rw, b_tr_prd, b_tst_prd, ttl, xl, yl):
sum_train_pred = np.zeros((len(b_tr_rw), 2)); sum_test_pred = np.zeros((len(b_tst_rw),2));
for train_mdl in np.arange(0, len(b_tr_rw)):
sum_train_pred[train_mdl,0] = np.mean(np.abs(b_tr_rw[train_mdl,:]))
sum_train_pred[train_mdl,1] = np.mean(np.abs(b_tr_prd[train_mdl,:]))
for test_mdl in np.arange(0, len(b_tst_rw)):
sum_test_pred[test_mdl,0] = np.mean(np.abs(b_tst_rw[test_mdl,:]))
sum_test_pred[test_mdl,1] = np.mean(np.abs(b_tst_prd[test_mdl,:]))
fig, axes = plt.subplots()
axes.scatter(sum_train_pred[:,0], sum_train_pred[:,1], c = 'b', marker = '+', linestyle = 'None', label = 'Training Models')
axes.scatter(sum_test_pred[:,0], sum_test_pred[:,1], c = 'r', marker = 'x', linestyle = 'None', label = 'Test Models')
plt.title(ttl)
axes.grid()
axes.legend(bbox_to_anchor=(1.45, 1))
axes.set_xlabel(xl)
axes.set_ylabel(yl)
plt.show()
return
# weights = model_mlp.get_weights()
n_patience = 5
n_epoch = 300; batch_sz = 1000
mdl_dir = 'C:\\Users\\shikh\\Mark_Everett_Collaboration\\Jupyter_Notebooks\\Saved_Keras_Models\\CNN_Models\\Puzyrev_Inspired\\'
mcp_save = k.callbacks.ModelCheckpoint(mdl_dir + 'wts_1.hdf5', save_best_only=True, monitor='val_loss', mode='min')
early_stopping_monitor = k.callbacks.EarlyStopping(patience=n_patience, restore_best_weights = True)
# sgd_lr = SGD(lr=0.1, momentum=0.5, nesterov = True); adam_lr = Adam(lr=0.001); nadam_lr = Nadam(learning_rate=0.001)
# model_cnn.compile(optimizer='adam', loss='mse')
# model_cnn.compile(optimizer='nadam', loss='mse')
# model_cnn.compile(optimizer=nadam_lr, loss='mse')
# hist = model_cnn.fit(X_tr_ss, y_tr_ss, validation_data = [X_tst_ss, y_tst_ss], callbacks = [early_stopping_monitor], epochs = n_epoch, batch_size = 1000, verbose = 1)
hist, y_tr_pr, y_tst_pr = fit_pred_nd(model_cnn, early_stopping_monitor, mcp_save, n_epoch, batch_sz, X_tr_cnn, y_tr_cnn, X_tst_cnn, y_tst_cnn, ssm)
susc_train_pred = ssm.inverse_transform(y_tr_pr[:,:,1]); susc_test_pred = ssm.inverse_transform(y_tst_pr[:,:,1])
ttil = 'Train and Test Predictions'; xlbl = 'True value'; ylbl = 'Predicted value'
plt_train_results(y_train_raw, y_test_raw, susc_train_pred, susc_test_pred, ttil, xlbl, ylbl)
错误消息如下: (即使我在脚本开头不注释与gpu相关的命令,也是如此.)
The error message is as follows: (It is the same message even if I uncomment the gpu related commands at the beginning of the script).
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-3-fee4da188aac> in <module>
141 # model_cnn.compile(optimizer=nadam_lr, loss='mse')
142 # hist = model_cnn.fit(X_tr_ss, y_tr_ss, validation_data = [X_tst_ss, y_tst_ss], callbacks = [early_stopping_monitor], epochs = n_epoch, batch_size = 1000, verbose = 1)
--> 143 hist, y_tr_pr, y_tst_pr = fit_pred_nd(model_cnn, early_stopping_monitor, mcp_save, n_epoch, batch_sz, X_tr_cnn, y_tr_cnn, X_tst_cnn, y_tst_cnn, ssm)
144 susc_train_pred = ssm.inverse_transform(y_tr_pr[:,:,1]); susc_test_pred = ssm.inverse_transform(y_tst_pr[:,:,1])
145
<ipython-input-3-fee4da188aac> in fit_pred_nd(m_obj, esm, mcps, n_epch, bt_sz, A_train_ss, b_train_ss, A_test_ss, b_test_ss, sm)
102
103 def fit_pred_nd(m_obj, esm, mcps, n_epch, bt_sz, A_train_ss, b_train_ss, A_test_ss, b_test_ss, sm):
--> 104 ht = m_obj.fit(A_train_ss, b_train_ss, validation_data = [A_test_ss, b_test_ss], callbacks = [esm, mcps], epochs = n_epch, batch_size = bt_sz, verbose = 1)
105 b_tr_pr_ss_nd = m_obj.predict(A_train_ss); b_tst_pr_ss_nd = m_obj.predict(A_test_ss)
106 return ht, b_tr_pr_ss_nd, b_tst_pr_ss_nd
~\anaconda3\envs\keras_gpu\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
1237 steps_per_epoch=steps_per_epoch,
1238 validation_steps=validation_steps,
-> 1239 validation_freq=validation_freq)
1240
1241 def evaluate(self,
~\anaconda3\envs\keras_gpu\lib\site-packages\keras\engine\training_arrays.py in fit_loop(model, fit_function, fit_inputs, out_labels, batch_size, epochs, verbose, callbacks, val_function, val_inputs, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq)
194 ins_batch[i] = ins_batch[i].toarray()
195
--> 196 outs = fit_function(ins_batch)
197 outs = to_list(outs)
198 for l, o in zip(out_labels, outs):
~\anaconda3\envs\keras_gpu\lib\site-packages\tensorflow\python\keras\backend.py in __call__(self, inputs)
3290
3291 fetched = self._callable_fn(*array_vals,
-> 3292 run_metadata=self.run_metadata)
3293 self._call_fetch_callbacks(fetched[-len(self._fetches):])
3294 output_structure = nest.pack_sequence_as(
~\anaconda3\envs\keras_gpu\lib\site-packages\tensorflow\python\client\session.py in __call__(self, *args, **kwargs)
1456 ret = tf_session.TF_SessionRunCallable(self._session._session,
1457 self._handle, args,
-> 1458 run_metadata_ptr)
1459 if run_metadata:
1460 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
UnknownError: 2 root error(s) found.
(0) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[{{node conv_1_cnn_1/convolution}}]]
[[Mean_1/_1563]]
(1) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[{{node conv_1_cnn_1/convolution}}]]
0 successful operations.
0 derived errors ignored.
我尝试在Anaconda中升级cuda和cuDNN,但被告知仍然存在一些冲突.因此,关于如何前进的任何建议都会有很大帮助.非常感谢. -苏维克
I tried upgrading cuda and cuDNN in anaconda and was told there are are several conflicts anyways. So, any suggestions on how to move forward will be a big help. Many thanks. - Souvik
响应@Lescurel的问题,来自tensorflow的初始运行警告消息如下:
In response to @Lescurel 's question, the warning messages from tensorflow for the initial run are as follows:
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorflow\python\framework\dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint8 = np.dtype([("qint8", np.int8, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorflow\python\framework\dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint8 = np.dtype([("quint8", np.uint8, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorflow\python\framework\dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint16 = np.dtype([("qint16", np.int16, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorflow\python\framework\dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint16 = np.dtype([("quint16", np.uint16, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorflow\python\framework\dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint32 = np.dtype([("qint32", np.int32, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorflow\python\framework\dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
np_resource = np.dtype([("resource", np.ubyte, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint8 = np.dtype([("qint8", np.int8, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint8 = np.dtype([("quint8", np.uint8, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint16 = np.dtype([("qint16", np.int16, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint16 = np.dtype([("quint16", np.uint16, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint32 = np.dtype([("qint32", np.int32, 1)])
C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.
还有这个:
WARNING:tensorflow:From C:\Users\shikh\anaconda3\envs\keras_gpu\lib\site-packages\keras\backend\tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.
推荐答案
如果要升级到tensorflow-gpu
的新版本,有几件事:
There is a couple of things if you want to upgrade to a new version of tensorflow-gpu
:
首先,在使用tensorflow-gpu
以获得兼容版本时,您必须尝试使用conda软件包管理器安装tensorflow-gpu
.这将处理cuDnn和cudatoolkit的兼容性.
First, in using tensorflow-gpu
for having compatible versions together you have to try to install the tensorflow-gpu
using the conda package manager. This will handle the compatibility of cuDnn and cudatoolkit.
第二,自tensorflow-gpu
版本1.14.0起,已经做了几处更改,您必须检查您的代码语法是否与新版本兼容.
Second, since tensorflow-gpu
version 1.14.0 several things have been changed and you have to check whether if your code syntax is compatible with the new version or not.
我在此 shared_link 使用新版的软件包和标准方法.
I rewrote your code in my Google Colab notebook in this shared_link using the new version of packages and standard methods.
一切似乎都能正常工作.
Everything seems to work properly.
这篇关于Windows 10中的keras-gpu,tensorflow,cudatoolkit和cuDNN之间的最新版本兼容性是什么?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!