< !DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
< html >
< head > < title > CUDA Environment Routines< / title >
< meta http-equiv = "Content-Type" content = "text/html; charset=iso-8859-1" >
< meta name = "generator" content = "TeX4ht (https://tug.org/tex4ht/)" >
< meta name = "originator" content = "TeX4ht (https://tug.org/tex4ht/)" >
<!-- html,3 -->
< meta name = "src" content = "userhtml.tex" >
< link rel = "stylesheet" type = "text/css" href = "userhtml.css" >
< / head > < body
>
<!-- l. 87 --> < div class = "crosslinks" > < p class = "noindent" > [< a
href="userhtmlse12.html" >prev< / a > ] [< a
href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail< / a > ] [< a
href="userhtmlse10.html#tailuserhtmlse13.html">tail< / a > ] [< a
href="userhtml.html# " >up< / a > ] < / p > < / div >
< h3 class = "sectionHead" > < span class = "titlemark" > 13 < / span > < a
id="x20-15500013">< / a > CUDA Environment Routines< / h3 >
<!-- l. 91 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-156000">< / a > psb_cuda_init — Initializes PSBLAS-CUDA environment< / h4 >
< a
id="Q1-20-194">< / a >
< div class = "center"
>
<!-- l. 99 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-110" >
call  psb_cuda_init(ctxt  [,  device])
< / pre >
<!-- l. 103 --> < p class = "nopar" > < / div > < / div >
<!-- l. 108 --> < p class = "noindent" > This subroutine initializes the PSBLAS-CUDA environment.
< dl class = "description" > < dt class = "description" >
<!-- l. 110 --> < p class = "noindent" >
< span
class="pplb7t-">Type:< / span > < / dt > < dd
class="description">
<!-- l. 110 --> < p class = "noindent" > Synchronous.
< / dd > < dt class = "description" >
<!-- l. 111 --> < p class = "noindent" >
< span
class="pplb7t-">On Entry< / span > < / dt > < dd
class="description">
<!-- l. 111 --> < p class = "noindent" >
< / dd > < dt class = "description" >
<!-- l. 112 --> < p class = "noindent" >
< span
class="pplb7t-">device< / span > < / dt > < dd
class="description">
<!-- l. 112 --> < p class = "noindent" > ID of CUDA device to attach to.< br
class="newline" />Scope: < span
class="pplb7t-">local< / span > .< br
class="newline" />Type: < span
class="pplb7t-">optional< / span > .< br
class="newline" />Intent: < span
class="pplb7t-">in< / span > .< br
class="newline" />Specified as: an integer value.   Default: use < code class = "lstinline" > < span style = "color:#000000" > mod< / span > < span style = "color:#000000" > (< / span > < span style = "color:#000000" > iam< / span > < span style = "color:#000000" > ,< / span > < span style = "color:#000000" > ngpu< / span > < span style = "color:#000000" > )< / span > < / code > where < code class = "lstinline" > < span style = "color:#000000" > iam< / span > < / code > is
the calling process index and < code class = "lstinline" > < span style = "color:#000000" > ngpu< / span > < / code > is the total number of CUDA devices
available on the current node.< / dd > < / dl >
<!-- l. 123 --> < p class = "noindent" > < span
class="pplb7t-x-x-120">Notes< / span >
< ol class = "enumerate1" >
< li
class="enumerate" id="x20-156002x1">
<!-- l. 125 --> < p class = "noindent" > A call to this routine must precede any other PSBLAS-CUDA call.< / li > < / ol >
<!-- l. 129 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-157000">< / a > psb_cuda_exit — Exit from PSBLAS-CUDA environment< / h4 >
< a
id="Q1-20-196">< / a >
< div class = "center"
>
<!-- l. 137 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-111" >
call  psb_cuda_exit(ctxt)
< / pre >
<!-- l. 141 --> < p class = "nopar" > < / div > < / div >
<!-- l. 146 --> < p class = "noindent" > This subroutine exits from the PSBLAS CUDA context.
< dl class = "description" > < dt class = "description" >
<!-- l. 148 --> < p class = "noindent" >
< span
class="pplb7t-">Type:< / span > < / dt > < dd
class="description">
<!-- l. 148 --> < p class = "noindent" > Synchronous.
< / dd > < dt class = "description" >
<!-- l. 149 --> < p class = "noindent" >
< span
class="pplb7t-">On Entry< / span > < / dt > < dd
class="description">
<!-- l. 149 --> < p class = "noindent" >
< / dd > < dt class = "description" >
<!-- l. 150 --> < p class = "noindent" >
< span
class="pplb7t-">ctxt< / span > < / dt > < dd
class="description">
<!-- l. 150 --> < p class = "noindent" > the communication context identifying the virtual parallel machine.< br
class="newline" />Scope: < span
class="pplb7t-">global< / span > .< br
class="newline" />Type: < span
class="pplb7t-">required< / span > .< br
class="newline" />Intent: < span
class="pplb7t-">in< / span > .< br
class="newline" />Specified as: an integer variable.< / dd > < / dl >
<!-- l. 161 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-158000">< / a > psb_cuda_DeviceSync — Synchronize CUDA device< / h4 >
< a
id="Q1-20-198">< / a >
< div class = "center"
>
<!-- l. 169 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-112" >
call  psb_cuda_DeviceSync()
< / pre >
<!-- l. 173 --> < p class = "nopar" > < / div > < / div >
<!-- l. 178 --> < p class = "noindent" > This subroutine ensures that all previosly invoked kernels, i.e. all invocation of
CUDA-side code, have completed.
<!-- l. 182 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-159000">< / a > psb_cuda_getDeviceCount < / h4 >
< a
id="Q1-20-200">< / a >
< div class = "center"
>
<!-- l. 190 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-113" >
ngpus  =    psb_cuda_getDeviceCount()
< / pre >
<!-- l. 194 --> < p class = "nopar" > < / div > < / div >
<!-- l. 199 --> < p class = "noindent" > Get number of devices available on current computing node.
<!-- l. 201 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-160000">< / a > psb_cuda_getDevice < / h4 >
< a
id="Q1-20-202">< / a >
< div class = "center"
>
<!-- l. 209 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-114" >
ngpus  =    psb_cuda_getDevice()
< / pre >
<!-- l. 213 --> < p class = "nopar" > < / div > < / div >
<!-- l. 218 --> < p class = "noindent" > Get device in use by current process.
<!-- l. 220 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-161000">< / a > psb_cuda_setDevice < / h4 >
< a
id="Q1-20-204">< / a >
< div class = "center"
>
<!-- l. 228 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-115" >
info  =  psb_cuda_setDevice(dev)
< / pre >
<!-- l. 232 --> < p class = "nopar" > < / div > < / div >
<!-- l. 237 --> < p class = "noindent" > Set device to be used by current process.
<!-- l. 239 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-162000">< / a > psb_cuda_DeviceHasUVA < / h4 >
< a
id="Q1-20-206">< / a >
< div class = "center"
>
<!-- l. 247 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-116" >
hasUva  =  psb_cuda_DeviceHasUVA()
< / pre >
<!-- l. 251 --> < p class = "nopar" > < / div > < / div >
<!-- l. 256 --> < p class = "noindent" > Returns true if device currently in use supports UVA (Unified Virtual Addressing).
<!-- l. 259 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-163000">< / a > psb_cuda_WarpSize < / h4 >
< a
id="Q1-20-208">< / a >
< div class = "center"
>
<!-- l. 267 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-117" >
nw  =  psb_cuda_WarpSize()
< / pre >
<!-- l. 271 --> < p class = "nopar" > < / div > < / div >
<!-- l. 276 --> < p class = "noindent" > Returns the warp size.
<!-- l. 279 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-164000">< / a > psb_cuda_MultiProcessors < / h4 >
< a
id="Q1-20-210">< / a >
< div class = "center"
>
<!-- l. 287 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-118" >
nmp  =  psb_cuda_MultiProcessors()
< / pre >
<!-- l. 291 --> < p class = "nopar" > < / div > < / div >
<!-- l. 296 --> < p class = "noindent" > Returns the number of multiprocessors in the CUDA device.
<!-- l. 298 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-165000">< / a > psb_cuda_MaxThreadsPerMP < / h4 >
< a
id="Q1-20-212">< / a >
< div class = "center"
>
<!-- l. 306 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-119" >
nt  =  psb_cuda_MaxThreadsPerMP()
< / pre >
<!-- l. 310 --> < p class = "nopar" > < / div > < / div >
<!-- l. 315 --> < p class = "noindent" > Returns the maximum number of threads per multiprocessor.
<!-- l. 318 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-166000">< / a > psb_cuda_MaxRegistersPerBlock < / h4 >
< a
id="Q1-20-214">< / a >
< div class = "center"
>
<!-- l. 326 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-120" >
nr  =  psb_cuda_MaxRegistersPerBlock()
< / pre >
<!-- l. 330 --> < p class = "nopar" > < / div > < / div >
<!-- l. 335 --> < p class = "noindent" > Returns the maximum number of register per thread block.
<!-- l. 338 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-167000">< / a > psb_cuda_MemoryClockRate < / h4 >
< a
id="Q1-20-216">< / a >
< div class = "center"
>
<!-- l. 346 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-121" >
cl  =  psb_cuda_MemoryClockRate()
< / pre >
<!-- l. 350 --> < p class = "nopar" > < / div > < / div >
<!-- l. 355 --> < p class = "noindent" > Returns the memory clock rate in KHz, as an integer.
<!-- l. 357 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-168000">< / a > psb_cuda_MemoryBusWidth < / h4 >
< a
id="Q1-20-218">< / a >
< div class = "center"
>
<!-- l. 365 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-122" >
nb  =  psb_cuda_MemoryBusWidth()
< / pre >
<!-- l. 369 --> < p class = "nopar" > < / div > < / div >
<!-- l. 374 --> < p class = "noindent" > Returns the memory bus width in bits.
<!-- l. 376 --> < p class = "noindent" >
< h4 class = "likesubsectionHead" > < a
id="x20-169000">< / a > psb_cuda_MemoryPeakBandwidth < / h4 >
< a
id="Q1-20-220">< / a >
< div class = "center"
>
<!-- l. 384 --> < p class = "noindent" >
< div class = "minipage" > < pre class = "verbatim" id = "verbatim-123" >
bw  =  psb_cuda_MemoryPeakBandwidth()
< / pre >
<!-- l. 388 --> < p class = "nopar" > < / div > < / div >
<!-- l. 392 --> < p class = "noindent" > Returns the peak memory bandwidth in MB/s (real double precision).
<!-- l. 134 --> < p class = "indent" >
<!-- l. 2 --> < div class = "crosslinks" > < p class = "noindent" > [< a
href="userhtmlse12.html" >prev< / a > ] [< a
href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail< / a > ] [< a
href="userhtmlse13.html" >front< / a > ] [< a
href="userhtml.html# " >up< / a > ] < / p > < / div >
<!-- l. 2 --> < p class = "indent" > < a
id="tailuserhtmlse13.html">< / a >
< / body > < / html >