使用PyCUDA来连接cuSOLVER-sparse [英] Interfacing cuSOLVER-sparse using PyCUDA

查看:577
本文介绍了使用PyCUDA来连接cuSOLVER-sparse的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我尝试使用PyCUDA界面化稀疏的cuSOLVER例程 cusolverSpDcsrlsvqr()(> = CUDA 7.0)和我面临一些困难:
我试着用相同的方式密集的cuSolver例程被包装在scikits-cuda中( https: //github.com/lebedov/scikits.cuda/blob/master/scikits/cuda/cusolver.py )。

I'm trying to interface the sparse cuSOLVER routine cusolverSpDcsrlsvqr() (>= CUDA 7.0) using PyCUDA and am facing some difficulties: I have tried wrapping the methods the same way the dense cuSolver routines are wrapped in scikits-cuda (https://github.com/lebedov/scikits.cuda/blob/master/scikits/cuda/cusolver.py).

但是,调用cusolverSpDcsrlsvqr()函数时,代码会因为分段错误而崩溃。
使用cuda-gdb( cuda-gdb --args python -m pycuda.debug test.py; run; bt )调试生成以下stacktrace, p>

However, the code crashes with a segmentation fault when calling the cusolverSpDcsrlsvqr() function. Debugging with cuda-gdb (cuda-gdb --args python -m pycuda.debug test.py; run;bt) yields the following stacktrace,


#0 0x00007fffd9e3b71a in cusolverSpXcsrissymHost()from /usr/local/cuda/lib64/libcusolver.so
#1 0x00007fffd9df5237 in hsolverXcsrqr_zeroPivot )from /usr/local/cuda/lib64/libcusolver.so

#2 0x00007fffd9e0c764 in hsolverXcsrqr_analysis_coletree()from /usr/local/cuda/lib64/libcusolver.so

#3来自/usr/local/cuda/lib64/libcusolver.so的cusolverXcsrqr_analysis()中的0x00007fffd9f160a0

#4 0x00007fffd9f28d78(来自/usr/local/cuda/lib64/libcusolver.so的cusolverSpScsrlsvqr())

#0 0x00007fffd9e3b71a in cusolverSpXcsrissymHost () from /usr/local/cuda/lib64/libcusolver.so #1 0x00007fffd9df5237 in hsolverXcsrqr_zeroPivot () from /usr/local/cuda/lib64/libcusolver.so
#2 0x00007fffd9e0c764 in hsolverXcsrqr_analysis_coletree () from /usr/local/cuda/lib64/libcusolver.so
#3 0x00007fffd9f160a0 in cusolverXcsrqr_analysis () from /usr/local/cuda/lib64/libcusolver.so
#4 0x00007fffd9f28d78 in cusolverSpScsrlsvqr () from /usr/local/cuda/lib64/libcusolver.so

这很奇怪,因为我不调用cusolverSp S csrlsvqr(),我也不应该调用一个主机函数cusolverSpXcsrissym Host )。

which is weird, since I do not call cusolverSpScsrlsvqr() nor do I think it should call a host function (cusolverSpXcsrissymHost).

这是我说的代码 - 感谢您的帮助:

This is the code I'm talking about - thanks for your help:

# ### Interface cuSOLVER PyCUDA


import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import scipy.sparse as sp
import ctypes


# #### wrap the cuSOLVER cusolverSpDcsrlsvqr() using ctypes

# cuSparse
_libcusparse = ctypes.cdll.LoadLibrary('libcusparse.so')

class cusparseMatDescr_t(ctypes.Structure):
    _fields_ = [
        ('MatrixType', ctypes.c_int),
        ('FillMode', ctypes.c_int),
        ('DiagType', ctypes.c_int),
        ('IndexBase', ctypes.c_int)
        ]
_libcusparse.cusparseCreate.restype = int
_libcusparse.cusparseCreate.argtypes = [ctypes.c_void_p]

_libcusparse.cusparseDestroy.restype = int
_libcusparse.cusparseDestroy.argtypes = [ctypes.c_void_p]

_libcusparse.cusparseCreateMatDescr.restype = int
_libcusparse.cusparseCreateMatDescr.argtypes = [ctypes.c_void_p]


# cuSOLVER
_libcusolver = ctypes.cdll.LoadLibrary('libcusolver.so')



_libcusolver.cusolverSpCreate.restype = int
_libcusolver.cusolverSpCreate.argtypes = [ctypes.c_void_p]

_libcusolver.cusolverSpDestroy.restype = int
_libcusolver.cusolverSpDestroy.argtypes = [ctypes.c_void_p]



_libcusolver.cusolverSpDcsrlsvqr.restype = int
_libcusolver.cusolverSpDcsrlsvqr.argtypes= [ctypes.c_void_p,
                                            ctypes.c_int,
                                            ctypes.c_int,
                                            cusparseMatDescr_t,
                                            ctypes.c_void_p,
                                            ctypes.c_void_p,
                                            ctypes.c_void_p,
                                            ctypes.c_void_p,
                                            ctypes.c_double,
                                            ctypes.c_int,
                                            ctypes.c_void_p,
                                            ctypes.c_void_p]


#### Prepare the matrix and parameters, copy to Device via gpuarray

# coo to csr
val = np.arange(1,5,dtype=np.float64)
col = np.arange(0,4,dtype=np.int32)
row = np.arange(0,4,dtype=np.int32)
A = sp.coo_matrix((val,(row,col))).todense()
Acsr = sp.csr_matrix(A)
b = np.ones(4)
x = np.empty(4)
print('A:' + str(A))
print('b: ' + str(b))


dcsrVal = gpuarray.to_gpu(Acsr.data)
dcsrColInd = gpuarray.to_gpu(Acsr.indices)
dcsrIndPtr = gpuarray.to_gpu(Acsr.indptr)
dx = gpuarray.to_gpu(x)
db = gpuarray.to_gpu(b)
m = ctypes.c_int(4)
nnz = ctypes.c_int(4)
descrA = cusparseMatDescr_t()
reorder = ctypes.c_int(0)
tol = ctypes.c_double(1e-10)
singularity = ctypes.c_int(99)


#create cusparse handle
_cusp_handle = ctypes.c_void_p()
status = _libcusparse.cusparseCreate(ctypes.byref(_cusp_handle))
print('status: ' + str(status))
cusp_handle = _cusp_handle.value

#create MatDescriptor
status = _libcusparse.cusparseCreateMatDescr(ctypes.byref(descrA))
print('status: ' + str(status))

#create cusolver handle
_cuso_handle = ctypes.c_void_p()
status = _libcusolver.cusolverSpCreate(ctypes.byref(_cuso_handle))
print('status: ' + str(status))
cuso_handle = _cuso_handle.value



print('cusp handle: ' + str(cusp_handle))
print('cuso handle: ' + str(cuso_handle))


### Call solver
_libcusolver.cusolverSpDcsrlsvqr(cuso_handle,
                                 m,
                                 nnz,
                                 descrA,
                                 int(dcsrVal.gpudata),
                                 int(dcsrIndPtr.gpudata),
                                 int(dcsrColInd.gpudata),
                                 int(db.gpudata),
                                 tol,
                                 reorder,
                                 int(dx.gpudata),
                                 ctypes.byref(singularity))

# destroy handles
status = _libcusolver.cusolverSpDestroy(cuso_handle)
print('status: ' + str(status))
status = _libcusparse.cusparseDestroy(cusp_handle)
print('status: ' + str(status))


推荐答案

descrA 设置为 ctypes.c_void_p() cusolverSpDcsrlsvqr 包装器中使用 ctypes.c_void_p 的code> cusparseMatDescr_t 问题。

Setting descrA to ctypes.c_void_p() and replacing cusparseMatDescr_t in the cusolverSpDcsrlsvqr wrapper with ctypes.c_void_p should solve the problem.

这篇关于使用PyCUDA来连接cuSOLVER-sparse的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆