On intelHD gpu with beignet drivers.
julia> v=GPUArray(rand(2,2))
GPUArray with ctx: CLContext: Intel(R) HD Graphics 5500 BroadWell U-Processor GT2:
2×2 Array{Float64,2}:
0.841527 0.436476
0.23317 0.0211867
julia> v[1,1]
Couldn't compile kernel:
1 : // dependencies
2 : // (GPUArrays.linear_index, Tuple{Transpiler.CLIntrinsics.CLArray{Float64,2},Float32})
3 : uint linear_index_1(__global float * restrict x2unused2, float state)
4 : {
5 : return (get_global_id)(0) + (uint){1};
6 : }
7 : // Type{UInt32}
8 : typedef int Type3UInt324; // placeholder type instance
9 : __constant Type3UInt324 TYP_INST_Type3UInt324;
10 :
11 : // (GPUArrays._ind2sub, Tuple{Tuple{UInt32},UInt32})
12 : uint _ind2sub_5(uint indslast, uint ind)
13 : {
14 : return (uint){ind + (uint){1}};
15 : }
16 : // (Base.argtail, Tuple{UInt32,UInt32})
17 : uint argtail_6(uint x, uint rest)
18 : {
19 : return rest;
20 : }
21 : // (Base.tail, Tuple{Tuple{UInt32,UInt32}})
22 : uint tail_7(uint2 x)
23 : {
24 : uint2 x22_apply_tmp2659;
25 : x22_apply_tmp2659 = x;
26 : return (argtail_6)(x22_apply_tmp2659.s0, (uint){x22_apply_tmp2659.s1});
27 : }
28 : // (GPUArrays._ind2sub, Tuple{Tuple{UInt32,UInt32},UInt32})
29 : uint2 _ind2sub_2(uint2 inds, uint ind)
30 : {
31 : uint l;
32 : uint f;
33 : uint indnext;
34 : uint r1;
35 : r1 = inds.s0;
36 : indnext = ind / r1;
37 : f = (uint){1};
38 : l = r1;
39 : uint x22_apply_tmp2658;
40 : x22_apply_tmp2658 = (_ind2sub_5)((tail_7)(inds), indnext);
41 : return (uint2){(ind - l * indnext) + f, x22_apply_tmp2658};
42 : }
43 : // (GPUArrays.gpu_ind2sub, Tuple{Tuple{UInt32,UInt32},UInt32})
44 : uint2 gpu_ind2sub_2(uint2 dims, uint ind)
45 : {
46 : return (_ind2sub_2)((uint2){dims}, ind - (uint){1});
47 : }
48 : // Tuple{}
49 : typedef int Tuple_; // empty type emitted as an int
50 : // (Base.argtail, Tuple{UInt32})
51 : Tuple_ argtail_8(uint x, Tuple_ rest)
52 : {
53 : return rest;
54 : }
55 : // (Base.tail, Tuple{Tuple{UInt32}})
56 : Tuple_ tail_9(uint x)
57 : {
58 : uint x22_apply_tmp2663;
59 : x22_apply_tmp2663 = x;
60 : return (argtail_8)(x22_apply_tmp2663, (Tuple_){0.0f});
61 : }
62 : // (GPUArrays._sub2ind, Tuple{Tuple{},UInt32,UInt32})
63 : uint _sub2ind_10(Tuple_ x, uint L, uint ind)
64 : {
65 : return ind;
66 : }
67 : // (GPUArrays._sub2ind, Tuple{Tuple{UInt32},UInt32,UInt32,UInt32})
68 : uint _sub2ind_11(uint inds, uint L, uint ind, uint i, Tuple_ I)
69 : {
70 : uint r1;
71 : r1 = inds;
72 : Tuple_ x22_apply_tmp2662;
73 : x22_apply_tmp2662 = I;
74 : return (_sub2ind_10)((tail_9)(inds), L * r1, ind + (i - (uint){1}) * L);
75 : }
76 : // (GPUArrays._sub2ind, Tuple{Tuple{UInt32,UInt32},UInt32,UInt32,UInt32,UInt32})
77 : uint _sub2ind_12(uint2 inds, uint L, uint ind, uint i, uint I)
78 : {
79 : uint r1;
80 : r1 = inds.s0;
81 : uint x22_apply_tmp2661;
82 : x22_apply_tmp2661 = I;
83 : return (_sub2ind_11)((tail_7)(inds), L * r1, ind + (i - (uint){1}) * L, x22_apply_tmp2661, (Tuple_){0.0f});
84 : }
85 : // (GPUArrays.gpu_sub2ind, Tuple{Tuple{UInt32,UInt32},Tuple{UInt32,UInt32}})
86 : uint gpu_sub2ind_3(uint2 dims, uint2 I)
87 : {
88 : uint2 x22_apply_tmp2660;
89 : x22_apply_tmp2660 = I;
90 : return (_sub2ind_12)((uint2){dims}, (uint){1}, (uint){1}, x22_apply_tmp2660.s0, (uint){x22_apply_tmp2660.s1});
91 : }
92 : // ########################
93 : // Main inner function
94 : // (GPUArrays.copy_kernel!, (Float32, Transpiler.CLIntrinsics.CLArray{Float64,2}, Tuple{UInt32,UInt32}, Transpiler.CLIntrinsics.CLArray{Float64,2}, Tuple{UInt32,UInt32}, Tuple{UInt32,UInt32}, Tuple{UInt32,UInt32}, Tuple{UInt32,UInt32}, UInt32))
95 : __kernel void copy_kernel1_4(float state, __global float * restrict dest, uint2 dest_offsets, __global float * restrict src, uint2 src_offsets, uint2 shape, uint2 shape_dest, uint2 shape_source, uint length)
96 : {
97 : uint src_idx;
98 : uint dest_idx;
99 : uint2 idx;
100 : uint i;
101 : i = (linear_index_1)(dest, state);
102 : if(i <= length){
103 : idx = (gpu_ind2sub_2)(shape, i);
104 : dest_idx = (gpu_sub2ind_3)(shape_dest, idx + dest_offsets);
105 : src_idx = (gpu_sub2ind_3)(shape_source, idx + src_offsets);
106 : ;
107 : float _ssavalue_0;
108 : _ssavalue_0 = (src)[src_idx - 0x00000001];
109 : (dest)[dest_idx - 0x00000001] = _ssavalue_0;
110 : ;
111 : };
112 : ;
113 : }
114 :
With following build error:
stringInput.cl:5:13: error: taking address of function is not allowed
stringInput.cl:9:25: error: variable in constant address space must be initialized
stringInput.cl:26:13: error: taking address of function is not allowed
stringInput.cl:40:26: error: taking address of function is not allowed
stringInput.cl:40:39: error: taking address of function is not allowed
stringInput.cl:46:13: error: taking address of function is not allowed
stringInput.cl:60:13: error: taking address of function is not allowed
stringInput.cl:74:13: error: taking address of function is not allowed
stringInput.cl:74:27: error: taking address of function is not allowed
stringInput.cl:83:13: error: taking address of function is not allowed
stringInput.cl:83:27: error: taking address of function is not allowed
stringInput.cl:90:13: error: taking address of function is not allowed
stringInput.cl:101:10: error: taking address of function is not allowed
stringInput.cl:103:16: error: taking address of function is not allowed
stringInput.cl:104:21: error: taking address of function is not allowed
stringInput.cl:105:20: error: taking address of function is not allowed
ERROR: CLError(code=-11, CL_BUILD_PROGRAM_FAILURE)
Stacktrace:
[1] macro expansion at /home/carlo/.julia/v0.6/OpenCL/src/macros.jl:6 [inlined]
[2] #build!#113(::String, ::Bool, ::Function, ::OpenCL.cl.Program) at /home/carlo/.julia/v0.6/OpenCL/src/program.jl:101
[3] (::OpenCL.cl.#kw##build!)(::Array{Any,1}, ::OpenCL.cl.#build!, ::OpenCL.cl.Program) at ./<missing>:0
[4] (::Transpiler.##41#42{Tuple{Float32,GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},UInt32},GPUArrays.#copy_kernel!,OpenCL.cl.CmdQueue,OpenCL.cl.Context,NTuple{9,DataType}})() at /home/carlo/.julia/v0.6/Transpiler/src/clike/opencl/compilation.jl:96
[5] get!(::Transpiler.##41#42{Tuple{Float32,GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},UInt32},GPUArrays.#copy_kernel!,OpenCL.cl.CmdQueue,OpenCL.cl.Context,NTuple{9,DataType}}, ::Dict{Any,Transpiler.CLFunction}, ::Tuple{GPUArrays.#copy_kernel!,NTuple{9,DataType}}) at ./dict.jl:449
[6] Transpiler.CLFunction(::Function, ::Tuple{Float32,GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},UInt32}, ::OpenCL.cl.CmdQueue) at /home/carlo/.julia/v0.6/Transpiler/src/clike/opencl/compilation.jl:77
[7] gpu_call(::Function, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::Tuple{GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},UInt32}, ::Int64, ::Void) at /home/carlo/.julia/v0.6/GPUArrays/src/backends/opencl/opencl.jl:220
[8] copy!(::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::CartesianRange{CartesianIndex{2}}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::CartesianRange{CartesianIndex{2}}) at /home/carlo/.julia/v0.6/GPUArrays/src/abstractarray.jl:329
[9] copy!(::Array{Float64,2}, ::CartesianRange{CartesianIndex{2}}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::CartesianRange{CartesianIndex{2}}) at /home/carlo/.julia/v0.6/GPUArrays/src/abstractarray.jl:360
[10] getindex(::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::Int64, ::Int64) at /home/carlo/.julia/v0.6/GPUArrays/src/abstractarray.jl:401
[11] macro expansion at ./REPL.jl:97 [inlined]
[12] (::Base.REPL.##1#2{Base.REPL.REPLBackend})() at ./event.jl:73
julia> v * v
Couldn't compile kernel:
1 : // dependencies
2 : // (GPUArrays.linear_index, Tuple{Transpiler.CLIntrinsics.CLArray{Float64,2},Float32})
3 : uint linear_index_1(__global float * restrict x2unused2, float state)
4 : {
5 : return (get_global_id)(0) + (uint){1};
6 : }
7 : // Type{UInt32}
8 : typedef int Type3UInt324; // placeholder type instance
9 : __constant Type3UInt324 TYP_INST_Type3UInt324;
10 :
11 : // (GPUArrays._ind2sub, Tuple{Tuple{UInt32},UInt32})
12 : uint _ind2sub_5(uint indslast, uint ind)
13 : {
14 : return (uint){ind + (uint){1}};
15 : }
16 : // (Base.argtail, Tuple{UInt32,UInt32})
17 : uint argtail_6(uint x, uint rest)
18 : {
19 : return rest;
20 : }
21 : // (Base.tail, Tuple{Tuple{UInt32,UInt32}})
22 : uint tail_7(uint2 x)
23 : {
24 : uint2 x22_apply_tmp2671;
25 : x22_apply_tmp2671 = x;
26 : return (argtail_6)(x22_apply_tmp2671.s0, (uint){x22_apply_tmp2671.s1});
27 : }
28 : // (GPUArrays._ind2sub, Tuple{Tuple{UInt32,UInt32},UInt32})
29 : uint2 _ind2sub_2(uint2 inds, uint ind)
30 : {
31 : uint l;
32 : uint f;
33 : uint indnext;
34 : uint r1;
35 : r1 = inds.s0;
36 : indnext = ind / r1;
37 : f = (uint){1};
38 : l = r1;
39 : uint x22_apply_tmp2670;
40 : x22_apply_tmp2670 = (_ind2sub_5)((tail_7)(inds), indnext);
41 : return (uint2){(ind - l * indnext) + f, x22_apply_tmp2670};
42 : }
43 : // (GPUArrays.gpu_ind2sub, Tuple{Tuple{UInt32,UInt32},UInt32})
44 : uint2 gpu_ind2sub_2(uint2 dims, uint ind)
45 : {
46 : return (_ind2sub_2)((uint2){dims}, ind - (uint){1});
47 : }
48 : // Tuple{}
49 : typedef int Tuple_; // empty type emitted as an int
50 : // (Base.argtail, Tuple{UInt32})
51 : Tuple_ argtail_8(uint x, Tuple_ rest)
52 : {
53 : return rest;
54 : }
55 : // (Base.tail, Tuple{Tuple{UInt32}})
56 : Tuple_ tail_9(uint x)
57 : {
58 : uint x22_apply_tmp2675;
59 : x22_apply_tmp2675 = x;
60 : return (argtail_8)(x22_apply_tmp2675, (Tuple_){0.0f});
61 : }
62 : // (GPUArrays._sub2ind, Tuple{Tuple{},UInt32,UInt32})
63 : uint _sub2ind_10(Tuple_ x, uint L, uint ind)
64 : {
65 : return ind;
66 : }
67 : // (GPUArrays._sub2ind, Tuple{Tuple{UInt32},UInt32,UInt32,UInt32})
68 : uint _sub2ind_11(uint inds, uint L, uint ind, uint i, Tuple_ I)
69 : {
70 : uint r1;
71 : r1 = inds;
72 : Tuple_ x22_apply_tmp2674;
73 : x22_apply_tmp2674 = I;
74 : return (_sub2ind_10)((tail_9)(inds), L * r1, ind + (i - (uint){1}) * L);
75 : }
76 : // (GPUArrays._sub2ind, Tuple{Tuple{UInt32,UInt32},UInt32,UInt32,UInt32,UInt32})
77 : uint _sub2ind_12(uint2 inds, uint L, uint ind, uint i, uint I)
78 : {
79 : uint r1;
80 : r1 = inds.s0;
81 : uint x22_apply_tmp2673;
82 : x22_apply_tmp2673 = I;
83 : return (_sub2ind_11)((tail_7)(inds), L * r1, ind + (i - (uint){1}) * L, x22_apply_tmp2673, (Tuple_){0.0f});
84 : }
85 : // (GPUArrays.gpu_sub2ind, Tuple{Tuple{UInt32,UInt32},Tuple{UInt32,UInt32}})
86 : uint gpu_sub2ind_3(uint2 dims, uint2 I)
87 : {
88 : uint2 x22_apply_tmp2672;
89 : x22_apply_tmp2672 = I;
90 : return (_sub2ind_12)((uint2){dims}, (uint){1}, (uint){1}, x22_apply_tmp2672.s0, (uint){x22_apply_tmp2672.s1});
91 : }
92 : // ########################
93 : // Main inner function
94 : // (GPUArrays.copy_kernel!, (Float32, Transpiler.CLIntrinsics.CLArray{Float64,2}, Tuple{UInt32,UInt32}, Transpiler.CLIntrinsics.CLArray{Float64,2}, Tuple{UInt32,UInt32}, Tuple{UInt32,UInt32}, Tuple{UInt32,UInt32}, Tuple{UInt32,UInt32}, UInt32))
95 : __kernel void copy_kernel1_4(float state, __global float * restrict dest, uint2 dest_offsets, __global float * restrict src, uint2 src_offsets, uint2 shape, uint2 shape_dest, uint2 shape_source, uint length)
96 : {
97 : uint src_idx;
98 : uint dest_idx;
99 : uint2 idx;
100 : uint i;
101 : i = (linear_index_1)(dest, state);
102 : if(i <= length){
103 : idx = (gpu_ind2sub_2)(shape, i);
104 : dest_idx = (gpu_sub2ind_3)(shape_dest, idx + dest_offsets);
105 : src_idx = (gpu_sub2ind_3)(shape_source, idx + src_offsets);
106 : ;
107 : float _ssavalue_0;
108 : _ssavalue_0 = (src)[src_idx - 0x00000001];
109 : (dest)[dest_idx - 0x00000001] = _ssavalue_0;
110 : ;
111 : };
112 : ;
113 : }
114 :
With following build error:
stringInput.cl:5:13: error: taking address of function is not allowed
stringInput.cl:9:25: error: variable in constant address space must be initialized
stringInput.cl:26:13: error: taking address of function is not allowed
stringInput.cl:40:26: error: taking address of function is not allowed
stringInput.cl:40:39: error: taking address of function is not allowed
stringInput.cl:46:13: error: taking address of function is not allowed
stringInput.cl:60:13: error: taking address of function is not allowed
stringInput.cl:74:13: error: taking address of function is not allowed
stringInput.cl:74:27: error: taking address of function is not allowed
stringInput.cl:83:13: error: taking address of function is not allowed
stringInput.cl:83:27: error: taking address of function is not allowed
stringInput.cl:90:13: error: taking address of function is not allowed
stringInput.cl:101:10: error: taking address of function is not allowed
stringInput.cl:103:16: error: taking address of function is not allowed
stringInput.cl:104:21: error: taking address of function is not allowed
stringInput.cl:105:20: error: taking address of function is not allowed
ERROR: CLError(code=-11, CL_BUILD_PROGRAM_FAILURE)
Stacktrace:
[1] macro expansion at /home/carlo/.julia/v0.6/OpenCL/src/macros.jl:6 [inlined]
[2] #build!#113(::String, ::Bool, ::Function, ::OpenCL.cl.Program) at /home/carlo/.julia/v0.6/OpenCL/src/program.jl:101
[3] (::OpenCL.cl.#kw##build!)(::Array{Any,1}, ::OpenCL.cl.#build!, ::OpenCL.cl.Program) at ./<missing>:0
[4] (::Transpiler.##41#42{Tuple{Float32,GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},UInt32},GPUArrays.#copy_kernel!,OpenCL.cl.CmdQueue,OpenCL.cl.Context,NTuple{9,DataType}})() at /home/carlo/.julia/v0.6/Transpiler/src/clike/opencl/compilation.jl:96
[5] get!(::Transpiler.##41#42{Tuple{Float32,GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},UInt32},GPUArrays.#copy_kernel!,OpenCL.cl.CmdQueue,OpenCL.cl.Context,NTuple{9,DataType}}, ::Dict{Any,Transpiler.CLFunction}, ::Tuple{GPUArrays.#copy_kernel!,NTuple{9,DataType}}) at ./dict.jl:449
[6] Transpiler.CLFunction(::Function, ::Tuple{Float32,GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},UInt32}, ::OpenCL.cl.CmdQueue) at /home/carlo/.julia/v0.6/Transpiler/src/clike/opencl/compilation.jl:77
[7] gpu_call(::Function, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::Tuple{GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},Tuple{UInt32,UInt32},UInt32}, ::Int64, ::Void) at /home/carlo/.julia/v0.6/GPUArrays/src/backends/opencl/opencl.jl:220
[8] copy!(::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::CartesianRange{CartesianIndex{2}}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::CartesianRange{CartesianIndex{2}}) at /home/carlo/.julia/v0.6/GPUArrays/src/abstractarray.jl:329
[9] copy!(::Array{Float64,2}, ::CartesianRange{CartesianIndex{2}}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::CartesianRange{CartesianIndex{2}}) at /home/carlo/.julia/v0.6/GPUArrays/src/abstractarray.jl:360
[10] getindex(::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::Int64, ::Int64) at /home/carlo/.julia/v0.6/GPUArrays/src/abstractarray.jl:401
[11] matmul2x2!(::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::Char, ::Char, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}) at ./linalg/matmul.jl:670
[12] gemm_wrapper!(::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::Char, ::Char, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}) at ./linalg/matmul.jl:360
[13] A_mul_B!(::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}) at ./linalg/matmul.jl:148
[14] *(::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}, ::GPUArrays.GPUArray{Float64,2,OpenCL.cl.Buffer{Float64},GPUArrays.CLBackend.CLContext}) at ./linalg/matmul.jl:146
[15] macro expansion at ./REPL.jl:97 [inlined]
[16] (::Base.REPL.##1#2{Base.REPL.REPLBackend})() at ./event.jl:73