You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
234 lines
11 KiB
HTML
234 lines
11 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
|
"http://www.w3.org/TR/html4/loose.dtd">
|
|
<html >
|
|
<head><title>GPU example</title>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
|
<meta name="generator" content="TeX4ht (http://www.tug.org/tex4ht/)">
|
|
<meta name="originator" content="TeX4ht (http://www.tug.org/tex4ht/)">
|
|
<!-- html,3 -->
|
|
<meta name="src" content="userhtml.tex">
|
|
<link rel="stylesheet" type="text/css" href="userhtml.css">
|
|
</head><body
|
|
>
|
|
<!--l. 420--><div class="crosslinks"><p class="noindent"><span
|
|
class="cmr-12">[</span><a
|
|
href="userhtmlsu6.html" ><span
|
|
class="cmr-12">prev</span></a><span
|
|
class="cmr-12">] [</span><a
|
|
href="userhtmlsu6.html#tailuserhtmlsu6.html" ><span
|
|
class="cmr-12">prev-tail</span></a><span
|
|
class="cmr-12">] [</span><a
|
|
href="#tailuserhtmlsu7.html"><span
|
|
class="cmr-12">tail</span></a><span
|
|
class="cmr-12">] [</span><a
|
|
href="userhtmlse4.html#userhtmlsu7.html" ><span
|
|
class="cmr-12">up</span></a><span
|
|
class="cmr-12">] </span></p></div>
|
|
<h4 class="subsectionHead"><span class="titlemark"><span
|
|
class="cmr-12">4.2 </span></span> <a
|
|
id="x16-150004.2"></a><span
|
|
class="cmr-12">GPU example</span></h4>
|
|
<!--l. 422--><p class="noindent" ><span
|
|
class="cmr-12">The code discussed here shows how to set up a program exploiting the combined GPU</span>
|
|
<span
|
|
class="cmr-12">capabilities of PSBLAS and AMG4PSBLAS. The code example is available in the</span>
|
|
<span
|
|
class="cmr-12">source distribution directory </span><span class="obeylines-h"><span class="verb"><span
|
|
class="cmtt-12">amg4psblas/examples/gpu</span></span></span><span
|
|
class="cmr-12">.</span>
|
|
<!--l. 427--><p class="indent" > <span
|
|
class="cmr-12">First of all, we need to include the appropriate modules and declare some auxiliary</span>
|
|
<span
|
|
class="cmr-12">variables:</span>
|
|
|
|
|
|
|
|
<!--l. 429--><p class="indent" > <a
|
|
id="x16-15001r5"></a><hr class="float"><div class="float"
|
|
>
|
|
|
|
|
|
|
|
<div class="center"
|
|
>
|
|
<!--l. 448--><p class="noindent" >
|
|
<div class="minipage"><div class="verbatim" id="verbatim-10">
|
|
program amg_dexample_gpu
|
|
 <br />  use psb_base_mod
|
|
 <br />  use amg_prec_mod
|
|
 <br />  use psb_krylov_mod
|
|
 <br />  use psb_util_mod
|
|
 <br />  use psb_gpu_mod
|
|
 <br />  use data_input
|
|
 <br />  use amg_d_pde_mod
|
|
 <br />  implicit none
|
|
 <br />  .......
|
|
 <br />  ! GPU variables
|
|
 <br />  type(psb_d_hlg_sparse_mat) :: agmold
|
|
 <br />  type(psb_d_vect_gpu)       :: vgmold
|
|
 <br />  type(psb_i_vect_gpu)       :: igmold
|
|
 <br />
|
|
 <br /> 
|
|
</div>
|
|
<!--l. 467--><p class="nopar" ></div></div>
|
|
<br /> <div class="caption"
|
|
><span class="id">Listing 5: </span><span
|
|
class="content">setup of a GPU-enabled test program part one.</span></div><!--tex4ht:label?: x16-15001r5 -->
|
|
|
|
|
|
|
|
</div><hr class="endfloat" />
|
|
<!--l. 474--><p class="indent" > <span
|
|
class="cmr-12">In this particular example we are choosing to employ a </span><span class="obeylines-h"><span class="verb"><span
|
|
class="cmtt-12">HLG</span></span></span> <span
|
|
class="cmr-12">data structure for</span>
|
|
<span
|
|
class="cmr-12">sparse matrices on GPUs; for more information please refer to the PSBLAS-EXT users’</span>
|
|
<span
|
|
class="cmr-12">guide.</span>
|
|
<!--l. 478--><p class="indent" > <span
|
|
class="cmr-12">We then have to initialize the GPU environment, and pass the appropriate MOLD</span>
|
|
<span
|
|
class="cmr-12">variables to the build methods (see also the PSBLAS and PSBLAS-EXT users’</span>
|
|
<span
|
|
class="cmr-12">guides).</span>
|
|
|
|
|
|
|
|
<!--l. 481--><p class="indent" > <a
|
|
id="x16-15002r6"></a><hr class="float"><div class="float"
|
|
>
|
|
|
|
|
|
|
|
<div class="center"
|
|
>
|
|
<!--l. 497--><p class="noindent" >
|
|
<div class="minipage"><div class="verbatim" id="verbatim-11">
|
|
  call psb_init(ctxt)
|
|
 <br />  call psb_info(ctxt,iam,np)
|
|
 <br />  !
|
|
 <br />  ! BEWARE: if you have NGPUS  per node, the default is to
|
|
 <br />  ! attach to mod(IAM,NGPUS)
|
|
 <br />  !
|
|
 <br />  call psb_gpu_init(ictxt)
|
|
 <br />  ......
|
|
 <br />  t1 = psb_wtime()
|
|
 <br />  call prec%smoothers_build(a,desc_a,info, amold=agmold, vmold=vgmold, imold=igmold)
|
|
 <br />
|
|
 <br /> 
|
|
</div>
|
|
<!--l. 512--><p class="nopar" ></div></div>
|
|
<br /> <div class="caption"
|
|
><span class="id">Listing 6: </span><span
|
|
class="content">setup of a GPU-enabled test program part two.</span></div><!--tex4ht:label?: x16-15002r6 -->
|
|
|
|
|
|
|
|
</div><hr class="endfloat" />
|
|
<!--l. 519--><p class="indent" > <span
|
|
class="cmr-12">Finally, we convert the input matrix, the descriptor and the vectors to use a</span>
|
|
<span
|
|
class="cmr-12">GPU-enabled internal storage format. We then preallocate the preconditioner</span>
|
|
<span
|
|
class="cmr-12">workspace before entering the Krylov method. At the end of the code, we close the</span>
|
|
<span
|
|
class="cmr-12">GPU environment</span>
|
|
|
|
|
|
|
|
<!--l. 523--><p class="indent" > <a
|
|
id="x16-15003r7"></a><hr class="float"><div class="float"
|
|
>
|
|
|
|
|
|
|
|
<div class="center"
|
|
>
|
|
<!--l. 553--><p class="noindent" >
|
|
<div class="minipage"><div class="verbatim" id="verbatim-12">
|
|
  call desc_a%cnv(mold=igmold)
|
|
 <br />  call a%cscnv(info,mold=agmold)
|
|
 <br />  call psb_geasb(x,desc_a,info,mold=vgmold)
|
|
 <br />  call psb_geasb(b,desc_a,info,mold=vgmold)
|
|
 <br />
|
|
 <br />  !
|
|
 <br />  ! iterative method parameters
|
|
 <br />  !
|
|
 <br />  call psb_barrier(ctxt)
|
|
 <br />  call prec%allocate_wrk(info)
|
|
 <br />  t1 = psb_wtime()
|
|
 <br />  call psb_krylov(s_choice%kmethd,a,prec,b,x,s_choice%eps,&
|
|
 <br />       & desc_a,info,itmax=s_choice%itmax,iter=iter,err=err,itrace=s_choice%itrace,&
|
|
 <br />       & istop=s_choice%istopc,irst=s_choice%irst)
|
|
 <br />  call prec%deallocate_wrk(info)
|
|
 <br />  call psb_barrier(ctxt)
|
|
 <br />  tslv = psb_wtime() - t1
|
|
 <br />
|
|
 <br />  ......
|
|
 <br />  call psb_gpu_exit()
|
|
 <br />  call psb_exit(ctxt)
|
|
 <br />  stop
|
|
 <br />
|
|
 <br /> 
|
|
</div>
|
|
<!--l. 580--><p class="nopar" ></div></div>
|
|
<br /> <div class="caption"
|
|
><span class="id">Listing 7: </span><span
|
|
class="content">setup of a GPU-enabled test program part three.</span></div><!--tex4ht:label?: x16-15003r7 -->
|
|
|
|
|
|
|
|
</div><hr class="endfloat" />
|
|
<!--l. 588--><p class="indent" > <span
|
|
class="cmr-12">It is very important to employ smoothers and coarsest solvers that are suited to the</span>
|
|
<span
|
|
class="cmr-12">GPU, i.e. methods that do NOT employ triangular system solve kernels. Methods that</span>
|
|
<span
|
|
class="cmr-12">satisfy this constraint include:</span>
|
|
<ul class="itemize1">
|
|
<li class="itemize"><span class="obeylines-h"><span class="verb"><span
|
|
class="cmtt-12">JACOBI</span></span></span>
|
|
</li>
|
|
<li class="itemize"><span class="obeylines-h"><span class="verb"><span
|
|
class="cmtt-12">BJAC</span></span></span> <span
|
|
class="cmr-12">with the following methods on the local blocks:</span>
|
|
<ul class="itemize2">
|
|
<li class="itemize"><span class="obeylines-h"><span class="verb"><span
|
|
class="cmtt-12">INVK</span></span></span>
|
|
</li>
|
|
<li class="itemize"><span class="obeylines-h"><span class="verb"><span
|
|
class="cmtt-12">INVT</span></span></span>
|
|
</li>
|
|
<li class="itemize"><span class="obeylines-h"><span class="verb"><span
|
|
class="cmtt-12">AINV</span></span></span></li></ul>
|
|
</li></ul>
|
|
<!--l. 600--><p class="noindent" ><span
|
|
class="cmr-12">and their </span><span
|
|
class="cmmi-12">ℓ</span><sub><span
|
|
class="cmr-8">1</span></sub> <span
|
|
class="cmr-12">variants.</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<!--l. 1--><div class="crosslinks"><p class="noindent"><span
|
|
class="cmr-12">[</span><a
|
|
href="userhtmlsu6.html" ><span
|
|
class="cmr-12">prev</span></a><span
|
|
class="cmr-12">] [</span><a
|
|
href="userhtmlsu6.html#tailuserhtmlsu6.html" ><span
|
|
class="cmr-12">prev-tail</span></a><span
|
|
class="cmr-12">] [</span><a
|
|
href="userhtmlsu7.html" ><span
|
|
class="cmr-12">front</span></a><span
|
|
class="cmr-12">] [</span><a
|
|
href="userhtmlse4.html#userhtmlsu7.html" ><span
|
|
class="cmr-12">up</span></a><span
|
|
class="cmr-12">] </span></p></div>
|
|
<!--l. 1--><p class="indent" > <a
|
|
id="tailuserhtmlsu7.html"></a>
|
|
</body></html>
|