Appendix A. Script of the Program
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <curand_kernel.h>
#include <time.h>
#include <stdexcept>
#include <helper_cuda.h>
#define U 0.001 \\ value which should be modified
#define T 1 \\ value which should be modified
#define lambda 1 \\ value which should be modified
#define theta 0.01 \\ value which should be modified
#define alpha 0.01 \\ value which should be modified
#define c 100000 \\ value which should be modified
#define MU 1/lambda
#define NTMAX 1000000
#define N_SIMUL 1000000000000000
_ _global_ _ void initRNG(curandState *const rngStates,
const unsigned int seed)
{
// Determine thread ID
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
// Initialise the RNG
curand_init(seed, tid, 0, &rngStates[tid]);
}
_ _device_ _ inline double ran_exponential(curandState &state, double mu)
{
return( -mu*log(curand_uniform_double(&state)) );
}
_ _device_ _ inline double ran_pareto(curandState &state,
double a, double b)
{
return( b * ( pow (curand_uniform_double(&state), -1 / a) -1) );
}
_ _global_ _ void monte_carlo(curandState *const rngStates,
long long int *d_results,
long long int n_simul) {
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
double temps;
double entrees,cmoy_seuil0, cmoy_seuil;
int n;
double cc;
double ccc;
int r_eq;
int r_base;
for(long long int i_simul=0; i_simul<n_simul; i_simul++) {
temps=0;
n=0;
cc=0;
ccc = 0;
r_eq=0;
r_base=0;
cmoy_seuil = 0;
do {
// random time step
temps += ran_exponential(rngStates[tid], MU);
if(temps<T) {
if(n >= NTMAX) {
printf("Taille insuffisante pour le vecteur des dates...\n");
// exit(1);
}
// simulates and adds an amount of a claim
cc += ran_pareto(rngStates[tid], alpha, theta);
// checks if there is a ruin
entrees = U + temps * c;
if(entrees < cc) {
r_base=1;
}
n++;
// updates the threshold for the equalized process so as to
// determine a case of ruin
cmoy_seuil0 = entrees / n;
if(n == 1 || cmoy_seuil0 < cmoy_seuil) {
cmoy_seuil = cmoy_seuil0;
}
}
} while(temps<T);
if(n>0) {
// computes the average of the amounts of the claims
ccc = cc/n;
if(ccc > cmoy_seuil) {
r_eq = 1;
}
}
// updates the counts of ruin for each of the 4 cases
if(r_eq > 0) {
if(r_base > 0) {
d_results[4*tid+2]++; // both process ruined
} else {
d_results[4*tid]++; // only the classical process is ruined
}
} else {
if(r_base>0) {
d_results[4*tid+1]++; // only the equalized process is ruined
} else {
d_results[4*tid+3]++; // neither is ruined
}
}
}
}
int main(int argc,const char **argv) {
using std::runtime_error;
using std::invalid_argument;
time_t debut = time(NULL);
char *value = 0;
unsigned int seed = 1234;
int n_blocks = 1024;
int n_threads_per_block = 1;
int deviceCount = 0;
cudaError_t cudaResult = cudaSuccess;
// by default specify GPU Device == 0
int device = 0;
// Get number of available devices
cudaResult = cudaGetDeviceCount(&deviceCount);
if (cudaResult != cudaSuccess) {
printf("could not get device count.\n");
throw runtime_error("cudaGetDeviceCount");
}
// --device n
if (getCmdLineArgumentString(argc, argv, "device", &value)) {
device = (int)atoi(value);
if (device >= deviceCount) {
printf("invalid target device specified on command line
(device %d does not exist).\n", device);
throw invalid_argument("device");
}
} else {
device = gpuGetMaxGflopsDeviceId();
}
printf("Device : %d\n", device);
// --seed n
if (getCmdLineArgumentString(argc, argv, "seed", &value)) {
// Check requested seed is valid
seed = (unsigned int)atoi(value);
if (seed == 0) {
printf("specified seed (%d) is invalid, must be non-zero.\n", seed);
throw invalid_argument("seed");
}
}
printf("Seed : %u\n", seed);
// sets number of blocks and threads from the device properties
cudaDeviceProp deviceProperties;
cudaResult = cudaGetDeviceProperties(&deviceProperties, device);
if (cudaResult != cudaSuccess) {
printf("cound not get device properties for device %d.\n", device);
throw runtime_error("cudaGetDeviceProperties");
}
n_threads_per_block =
static_cast<unsigned int>(deviceProperties.maxThreadsPerBlock);
n_blocks = 10 * deviceProperties.multiProcessorCount;
printf("Blocs : %d\n", n_blocks);
printf("Threads par bloc : %d\n", n_threads_per_block);
int n_threads=n_blocks*n_threads_per_block;
long long int n_simul=N_SIMUL;
// --sims n
if (getCmdLineArgumentString(argc, argv, "sims", &value)) {
n_simul = atoll(value);
if(n_simul<100) {
// if it is smaller than 100, we take the power of 10
long long int x=1;
for(int i=0; i<n_simul; i++) x*=10;
n_simul=x;
}
}
long long int n_simul_par_thread = n_simul/n_threads;
n_simul = n_simul_par_thread * n_threads;
printf("Simulations: %lld\n", n_simul);
// ----------------------------------------------------------------
// select device
cudaResult = cudaSetDevice(device);
if (cudaResult != cudaSuccess) {
printf("cound not select device %d.\n", device);
throw runtime_error("cudaSetDevice");
}
// init RNGs
curandState *d_rngStates = 0;
cudaResult = cudaMalloc((void **)&d_rngStates,
n_threads * sizeof(curandState));
if (cudaResult != cudaSuccess) {
printf("cound not allocate RNG state memory for device %d.\n", device);
throw runtime_error("cudaAllocate");
}
initRNG<<<n_blocks, n_threads_per_block>>>(d_rngStates, seed);
// allocate results memory
long long int *d_results;
cudaResult = cudaMalloc((void**)&d_results,
sizeof(long long int) * 4 * n_threads);
if (cudaResult != cudaSuccess) {
printf("cound not allocate results memory for device %d.\n", device);
throw runtime_error("cudaAllocate");
}
// call Monte Carlo simulations
monte_carlo<<<n_blocks, n_threads_per_block>>>(d_rngStates, d_results,
n_simul_par_thread);
// get back results
long long int *results = (long long *)malloc(sizeof(long long int) * 4
* n_threads);
cudaResult = cudaMemcpy(results, d_results,
sizeof(long long int) * 4 * n_threads, cudaMemcpyDeviceToHost);
if (cudaResult != cudaSuccess) {
printf("cound not get back results for device %d.\n", device);
throw runtime_error("cudaMemcpy");
}
// sum results up
for(int n=1; n<n_threads; n++) {
for(int i=0; i<4; i++) {
results[i] += results[i+n*4];
}
}
// output stats
printf("EQ + BASE + DEUX + AUCUN = %lld + %lld + %lld + %lld\n",
results[0], results[1], results[2], results[3]);
printf("P(ruine_base) = %g\n", ((double)(results[1]+results[2]))/n_simul );
printf("P(ruine_eq) = %g\n", ((double)(results[0]+results[2]))/n_simul );
printf("E(ruine_eq - ruine_base) = %g\n",
((double)(results[0]-results[1]))/n_simul);
double esp=((double)(results[0]-results[1]))/n_simul; \\ confidence interval
double esp2=((double)(results[0]+results[1]))/n_simul; \\ confidence interval
printf("E(ruine_eq - ruine_base) = %g +/- %g\n",
esp, 1.96 * sqrt((esp2-esp*esp)/n_simul)); \\ confidence interval
printf("Secondes ecoulees : %d\n", time(NULL)-debut);
}