以下はネットで見つけたサンプルコードですが、 こちらの Xeon Phi マシンでオフロード計算出来ました。。。
#include<stdio.h>
#include<stdlib.h>
#define INTERVALS 1000000000
int main( void )
{
int i;
double x;
double pi = 0.0;
double step = 1.0 / INTERVALS;
#pragma offload target(mic)
#pragma omp parallel for private(x) reduction(+:pi)
for( i=0; i<INTERVALS; i++)
{
x = step * ((double)i - 0.5 );
pi += 4.0 / (1.0 + x * x );
}
pi = step * pi;
printf( "PI = %lfn", pi );
return 0;
}
./sample_offload
[Offload] [MIC 0] [File] sample_offload.c
[Offload] [MIC 0] [Line] 12
[Offload] [MIC 0] [Tag] Tag 0
[Offload] [HOST] [Tag 0] [CPU Time] 0.665600(seconds)
[Offload] [MIC 0] [Tag 0] [CPU->MIC Data] 36 (bytes)
[Offload] [MIC 0] [Tag 0] [MIC Time] 0.245841(seconds)
[Offload] [MIC 0] [Tag 0] [MIC->CPU Data] 36 (bytes)
PI = 3.141593
#include<stdio.h>
#include<stdlib.h>
#define INTERVALS 1000000000
int main( void )
{
int i;
double x;
double pi = 0.0;
double step = 1.0 / INTERVALS;
#pragma offload target(mic)
#pragma omp parallel for private(x) reduction(+:pi)
for( i=0; i<INTERVALS; i++)
{
x = step * ((double)i - 0.5 );
pi += 4.0 / (1.0 + x * x );
}
pi = step * pi;
printf( "PI = %lfn", pi );
return 0;
}
./sample_offload
[Offload] [MIC 0] [File] sample_offload.c
[Offload] [MIC 0] [Line] 12
[Offload] [MIC 0] [Tag] Tag 0
[Offload] [HOST] [Tag 0] [CPU Time] 0.665600(seconds)
[Offload] [MIC 0] [Tag 0] [CPU->MIC Data] 36 (bytes)
[Offload] [MIC 0] [Tag 0] [MIC Time] 0.245841(seconds)
[Offload] [MIC 0] [Tag 0] [MIC->CPU Data] 36 (bytes)
PI = 3.141593