在CUDA中使用clang-llvm编译器添加类似于__shared__的内存类型的支持 [英] Adding support for a memory type similar to __shared__ in CUDA using clang-llvm compiler

查看:836
本文介绍了在CUDA中使用clang-llvm编译器添加类似于__shared__的内存类型的支持的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在努力添加一个类似于 __ shared __ 在CUDA中名为 __ noc __ 使用clang-llvm。以下是根据 answer



步骤1 :在clangs的Attr.td文件(clang / include / clang /Basic/Attr.td),添加了类似于共享关键字的noc关键字。

  def CUDAShared:InheritableAttr {
let Spellings = [GNU<shared>];
let Subjects = SubjectList< [Var]> ;;
let LangOpts = [CUDA];
let Documentation = [Undocumented];
}

def CUDANoc:InheritableAttr {
let Spellings = [Keyword<noc>];
let Subjects = SubjectList< [Var]> ;;
let LangOpts = [CUDA];
let Documentation = [Undocumented];
}

步骤2 :类似于 CUDASharedAttr CUDANocAttr 已添加到clang / lib / Sema / SemaDeclAttr.cpp。

  case AttributeList :: AT_CUDAShared:
handleSimpleAttribute< CUDASharedAttr>(S,D,Attr);
break;
case AttributeList :: AT_CUDANoc:
handleSimpleAttribute< CUDANocAttr>(S,D,Attr);
printf(\\\
T1:SemaDeclAttr.cpp); // testpoint 1:for noc debugging
break;

步骤3 :在SemaDecl.cpp文件中,添加CUDANocAttr 以强制noc作为静态存储(类似于共享)

  if(getLangOpts()。CUDA){
if(EmitTLSUnsupportedError&& DeclAttrsMatchCUDAMode(getLangOpts(),NewVD))
Diag(D.getDeclSpec()。getThreadStorageClassSpecLoc(),
diag :: err_thread_unsupported);
// CUDA B.2.5:__shared__和__constant__变量暗示了静态
//存储器[duration]。
if(SC == SC_None&& S-> getFnParent()!= nullptr&
(NewVD-> hasAttr< CUDASharedAttr>()||
- > hasAttr< CUDANocAttr>()||
NewVD-> hasAttr< CUDAConstantAttr>())){
NewVD-> setStorageClass(SC_Static);
}
}

步骤4 :NOC被添加到CodeGenModule(llvm / tools / clang / lib / CodeGen / CodeGenModule.cpp)以允许 cuda_noc 地址空间从 NVPTXAddrSpaceMap

  else if(D-> hasAttr< CUDASharedAttr>())
AddrSpace = getContext ().getTargetAddressSpace(LangAS :: cuda_shared);
else if(D-> hasAttr< CUDANocAttr>())
AddrSpace = getContext()。getTargetAddressSpace(LangAS :: cuda_noc);
else
AddrSpace = getContext()。getTargetAddressSpace(LangAS :: cuda_device);
}

return AddrSpace;
}

第5步 cuda_noc 添加到 NVPTXAddrSpaceMap 数组以允许新类型的地址空间

  static const unsigned NVPTXAddrSpaceMap [] = {
1,// opencl_global
3,// opencl_local
4,// opencl_constant
FIXME:泛型必须添加到目标
0,// opencl_generic
1,// cuda_device
4,// cuda_constant
3,// cuda_shared
6,// cuda_noc
};

第6步 #define __noc__ __location __(noc)被添加到文件clang / lib / Headers / __ clang_cuda_runtime_wrapper.h,其中包含来自CUDA的host_defines.h。



llvm源代码已成功编译并安装。但是当试图编译一个内存类型为__noc__的CUDA源文件时,它会给出以下警告:

 警告:未知属性'noc'ignored [-Wunknown-attributes] 
__noc__ int c_shared [5];
^

/usr/local/bin/../lib/clang/3.8.0/include/__clang_cuda_runtime_wrapper.h:69:30:note:从宏__noc__$扩展b $ b #define __noc__ __location __(noc)
^
1生成警告。

从警告中可以观察到 __ noc __ 被忽略。在IR生成的 addrspace(6)中对应于 __ noc __
$ b

从调试printf放入文件clang / lib / Sema / SemaDeclAttr.cpp(步骤2),可以观察到 AttributeList :: AT_CUDANoc 没有得到执行。



任何建议或直觉可以大有帮助。在编译* .td文件中的输入的llvm源代码以显示为C ++源代码之前,是否有明确运行的脚本...

解决方案

__ location __(noc)会扩展为 __ attribute __((noc))。这是GNU或gcc属性语法。所以问题是这一行:

  let Spellings = [Keyword<noc>]; 

为了 noc code> __ location __ 宏,您应该使用 GNU<noc> ,而不是关键字<noc >


I am working towards adding a new memory type similar to __shared__ in CUDA called __noc__ which needs to be compiled using clang-llvm. Following are the steps followed to achieve the parsing for the new memory type taking reference from the answer:

Step 1: In the clangs's Attr.td file (clang/include/clang/Basic/Attr.td), the noc keyword was added similar to shared keyword.

def CUDAShared : InheritableAttr {
  let Spellings = [GNU<"shared">];
  let Subjects = SubjectList<[Var]>;
  let LangOpts = [CUDA];
  let Documentation = [Undocumented];
}

def CUDANoc : InheritableAttr {
  let Spellings = [Keyword<"noc">];
  let Subjects = SubjectList<[Var]>;
  let LangOpts = [CUDA];
  let Documentation = [Undocumented];
}

Step 2: Similar to CUDASharedAttr, CUDANocAttr was added in clang/lib/Sema/SemaDeclAttr.cpp.

  case AttributeList::AT_CUDAShared:
    handleSimpleAttribute<CUDASharedAttr>(S, D, Attr);
    break;
  case AttributeList::AT_CUDANoc:
    handleSimpleAttribute<CUDANocAttr>(S, D, Attr);
    printf("\n T1:SemaDeclAttr.cpp"); //testpoint 1 : for noc debugging
    break;

Step 3: In the SemaDecl.cpp file, the CUDANocAttr is added to enforce noc to be a static storage (similar to shared)

  if (getLangOpts().CUDA) {
    if (EmitTLSUnsupportedError && DeclAttrsMatchCUDAMode(getLangOpts(), NewVD))
      Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
           diag::err_thread_unsupported);
    // CUDA B.2.5: "__shared__ and __constant__ variables have implied static
    // storage [duration]."
    if (SC == SC_None && S->getFnParent() != nullptr &&
        (NewVD->hasAttr<CUDASharedAttr>() ||
         NewVD->hasAttr<CUDANocAttr>()||
         NewVD->hasAttr<CUDAConstantAttr>())) {
      NewVD->setStorageClass(SC_Static);
    }
  }

Step 4: NOC is added in CodeGenModule (llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp) to allow the access of cuda_noc address space from NVPTXAddrSpaceMap

    else if (D->hasAttr<CUDASharedAttr>())
      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared);
    else if (D->hasAttr<CUDANocAttr>())
      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_noc);
    else
      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device);
  }

  return AddrSpace;
}

Step 5: The cuda_noc is added to NVPTXAddrSpaceMap array to allow a new type of address space

static const unsigned NVPTXAddrSpaceMap[] = {
    1, // opencl_global
    3, // opencl_local
    4, // opencl_constant
    // FIXME: generic has to be added to the target
    0, // opencl_generic
    1, // cuda_device
    4, // cuda_constant
    3, // cuda_shared
    6, // cuda_noc
};

Step 6: The macro #define __noc__ __location__(noc) is added to the file clang/lib/Headers/__clang_cuda_runtime_wrapper.h where host_defines.h from CUDA is included.

The llvm source code got compiled and installed successfully. But when trying to compile a CUDA source file with the a memory type __noc__, it give the following warning:

warning: unknown attribute 'noc' ignored [-Wunknown-attributes]
        __noc__ int c_shared[5];
        ^

/usr/local/bin/../lib/clang/3.8.0/include/__clang_cuda_runtime_wrapper.h:69:30: note: expanded from macro '__noc__'
#define __noc__ __location__(noc)
                             ^
1 warning generated.

From the warnings it can be observed that __noc__ is ignored. In the IR generated, addrspace(6) which is corresponding to __noc__ is not observed.

From the debug printf put into the file clang/lib/Sema/SemaDeclAttr.cpp (step 2), it can be observed that the case for AttributeList::AT_CUDANoc is not getting executed.

Any suggestions or intuitions can greatly help. Is there any script to be run explicitly before compiling the llvm source code for the inputs in the *.td file to appear as C++ source code...

解决方案

__location__(noc) gets expanded to __attribute__((noc)). This is the GNU or gcc attribute syntax. So the issue is with this line:

let Spellings = [Keyword<"noc">];

In order for noc to work with __location__ macro, you should use GNU<"noc"> instead of Keyword<"noc">.

这篇关于在CUDA中使用clang-llvm编译器添加类似于__shared__的内存类型的支持的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆