NVIDIA PTX 简单入门

先看代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone

define void @kernel(ptr addrspace(1) %A,
ptr addrspace(1) %B,
ptr addrspace(1) %C) {
entry:
%id = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%ptrA = getelementptr inbounds float, ptr addrspace(1) %A, i32 %id
%ptrB = getelementptr inbounds float, ptr addrspace(1) %B, i32 %id
%ptrC = getelementptr inbounds float, ptr addrspace(1) %C, i32 %id

%valA = load float, ptr addrspace(1) %ptrA, align 4
%valB = load float, ptr addrspace(1) %ptrB, align 4
%valC = fadd float %valA, %valB

store float %valC, ptr addrspace(1) %ptrC, align 4
ret void
}

!nvvm.annotations = !{!0}
!0 = !{ptr @kernel, !"kernel", i32 1}
阅读更多
Your browser is out-of-date!

Update your browser to view this website correctly.&npsb;Update my browser now

×