generate.story(z, r'.\images\ex1.jpg')
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\skimage\transform_warps.py:84: UserWarning: The default mode, 'constant', will be changed to 'reflect' in skimage 0.15.
warn("The default mode, 'constant', will be changed to 'reflect' in "
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\lasagne\layers\conv.py:489: UserWarning: The image_shape
keyword argument to tensor.nnet.conv2d
is deprecated, it has been renamed to input_shape
.
border_mode=border_mode)
1 #include <Python.h>
2 #include
3 #include "theano_mod_helper.h"
4 #include "cuda_ndarray.cuh"
5 #include <math.h>
6 #include <numpy/arrayobject.h>
7 #include <numpy/arrayscalars.h>
8 #include "cudnn.h"
9 #include "cudnn_helper.h"
10 //////////////////////
11 //// Support Code
12 //////////////////////
13
14 void _capsule_destructor(PyObject o) {
15 void d = PyCapsule_GetContext(o);
16 void p = PyCapsule_GetPointer(o, NULL);
17 void (f)(void ) = (void ()(void ))d;
18 if (f != NULL) f(p);
19 }
20
21
22 static cudnnHandle_t _handle = NULL;
23
24
25 static int
26 c_set_tensorNd(CudaNdarray var, cudnnTensorDescriptor_t desc) {
27
28 int dim = CudaNdarray_NDIM(var);
29 int strides = (int )malloc(dim * sizeof(int));
30 int default_str = 1;
31 int return_value = 0;
32
33 if (strides != NULL) {
34 for (int i = dim-1; i >= 0; i--)
35 {
36 if (CudaNdarray_HOST_STRIDES(var)[i])
37 strides[i] = CudaNdarray_HOST_STRIDES(var)[i];
38 else
39 strides[i] = default_str;
40 default_str = CudaNdarray_HOST_DIMS(var)[i];
41 }
42
43 cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, CUDNN_DATA_FLOAT, dim,
44 CudaNdarray_HOST_DIMS(var),
45 strides);
46
47
48 if (err != CUDNN_STATUS_SUCCESS) {
49 PyErr_Format(PyExc_RuntimeError,
50 "Could not set tensorNd descriptor: %s"
51 "dim=%d",
52 cudnnGetErrorString(err), dim);
53
54 return_value = -1;
55 }
56 } else {
57 PyErr_Format(PyExc_MemoryError,
58 "Could not allocate memory for strides array of size %d.",
59 dim);
60
61 return_value = -1;
62 }
63
64 free(strides);
65 return return_value;
66 }
67
68
69 static int
70 c_set_filterNd(CudaNdarray var, cudnnFilterDescriptor_t desc) {
71 if (!CudaNdarray_is_c_contiguous(var)) {
72 PyErr_SetString(PyExc_ValueError,
73 "Only contiguous filters (kernels) are supported.");
74 return -1;
75 }
76 int dim = CudaNdarray_NDIM(var);
77 cudnnStatus_t err = cudnnSetFilterNdDescriptor_v4(desc,
78 CUDNN_DATA_FLOAT,
79 CUDNN_TENSOR_NCHW,
80 dim,
81 CudaNdarray_HOST_DIMS(var));
82 if (err != CUDNN_STATUS_SUCCESS) {
83 PyErr_Format(PyExc_RuntimeError,
84 "Could not set filter descriptor: %s."
85 " dims= %d",
86 cudnnGetErrorString(err), dim);
87 return -1;
88 }
89 return 0;
90 }
91
92
93
94 namespace {
95 struct __struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b {
96 PyObject __ERROR;
97
98 PyObject storage_V3;
99 PyObject storage_V5;
100 PyObject storage_V7;
101 PyObject storage_V9;
102 PyObject storage_V11;
103 PyObject storage_V13;
104 PyObject storage_V1;
105
106 #define DTYPE_INPUT_0 npy_float32
107 #define TYPENUM_INPUT_0 11
108 #define ITEMSIZE_INPUT_0 4
109 #define DTYPE_INPUT_1 npy_float32
110 #define TYPENUM_INPUT_1 11
111 #define ITEMSIZE_INPUT_1 4
112 #define DTYPE_INPUT_2 npy_float32
113 #define TYPENUM_INPUT_2 11
114 #define ITEMSIZE_INPUT_2 4
115 #define DTYPE_INPUT_4 npy_float32
116 #define TYPENUM_INPUT_4 11
117 #define ITEMSIZE_INPUT_4 4
118 #define DTYPE_INPUT_5 npy_float32
119 #define TYPENUM_INPUT_5 11
120 #define ITEMSIZE_INPUT_5 4
121 #define DTYPE_OUTPUT_0 npy_float32
122 #define TYPENUM_OUTPUT_0 11
123 #define ITEMSIZE_OUTPUT_0 4
124 #define APPLY_SPECIFIC(str) str##_node_md48cd7c806151b0105e1fa2b573cc03b_0
125 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
126 #define CHOOSE_ALGO 0
127 #define CHOOSE_ALGO_ONCE 0
128 #define CHOOSE_ALGO_TIME 0
129 #define CONV_INPLACE 1
130
131 cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
132 cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
133 cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
134
135 / Keep track, from one execution to another, of the dimension of the data
136 and the algorithms, if any, that were selected according to these dimensions
137 and according to the amount of memory available at that time.
138
139 Note : Implementation selection for backward convolution only exists starting
140 at V3.
141 /
142 int APPLY_SPECIFIC(previous_input_shape)[5];
143 int APPLY_SPECIFIC(previous_kerns_shape)[5];
144 int APPLY_SPECIFIC(previous_output_shape)[5];
145 bool APPLY_SPECIFIC(previous_algo_set);
146 cudnnConvolutionFwdAlgo_t APPLY_SPECIFIC(previous_algo);
147 cudnnConvolutionBwdFilterAlgo_t APPLY_SPECIFIC(previous_bwd_f_algo);
148 cudnnConvolutionBwdDataAlgo_t APPLY_SPECIFIC(previous_bwd_d_algo);
149
150
151
152 int
153 APPLY_SPECIFIC(conv_fwd)(CudaNdarray input, CudaNdarray kerns,
154 CudaNdarray om, cudnnConvolutionDescriptor_t desc,
155 float alpha, float beta, CudaNdarray output) {
156
157 cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
158 if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
159 PyErr_SetString(PyExc_ValueError,
160 "GpuDnnConv images and kernel must have the same stack size\n");
161 return 1;
162 }
163
164 int nb_dim = CudaNdarray_NDIM(input);
165
166 #ifdef CONV_INPLACE
167 Py_XDECREF(output);
168 output = om;
169 Py_INCREF(output);
170 #else
171 if (CudaNdarray_prep_output(output, nb_dim, CudaNdarray_HOST_DIMS(om)) != 0)
172 return 1;
173 if (beta != 0.0 && CudaNdarray_CopyFromCudaNdarray(output, om))
174 return 1;
175 #endif
176
177 if (CudaNdarray_DIMS(input)[0] == 0 || CudaNdarray_DIMS(kerns)[0] == 0 || CudaNdarray_DIMS(kerns)[1] == 0) {
178 cudaError_t err2 = cudaMemset((output)->devdata, 0,
179 CudaNdarray_SIZE(output) * sizeof(real));
180 if (err2 != cudaSuccess) {
181 PyErr_Format(PyExc_RuntimeError,
182 "GpuDnnConv could not fill the output with zeros: %s",
183 cudaGetErrorString(err2));
184 return 1;
185 }
186 return 0;
187 }
188
189 if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
190 return 1;
191 if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
192 return 1;
193 if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
194 return 1;
195
196 {
197 size_t worksize;
198 void workspace;
199 cudnnConvolutionFwdAlgo_t chosen_algo;
200
201
202 if (CHOOSE_ALGO)
203 {
204
205 // A new convolution implementation should be selected, based either on
206 // timing or heuristics if in one of the two following cases :
207 // - The implementation should only be chosen during the first execution
208 // of an apply node and this is the first execution of the apply node.
209 // - The implementation should be chosen as often as necessary and the
210 // shapes of the inputs differ from the last time an implementation
211 // was chosen.
212 bool reuse_previous_algo;
213 if (CHOOSE_ALGO_ONCE)
214 {
215 // Only choose a new implementation of none has been chosen before.
216 reuse_previous_algo = APPLY_SPECIFIC(previous_algo_set);
217 }
218 else
219 {
220 // Reuse the previous implementation if the inputs and the kernels
221 // have the same shapes as they had when the previous implementation
222 // was selected
223 bool same_shapes = true;
224 for (int i = 0; (i < nb_dim) && same_shapes; i++)
225 {
226 same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
227 APPLY_SPECIFIC(previous_input_shape)[i]);
228 same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] ==
229 APPLY_SPECIFIC(previous_kerns_shape)[i]);
230 }
231 reuse_previous_algo = same_shapes;
232 }
233
234 // If the previously choosen implementation can't be reused, select a
235 // new one based on the shapes of the current inputs
236 if (!reuse_previous_algo)
237 {
238
239 // Obtain a convolution algorithm appropriate for the input and kernel
240 // shapes. Either by choosing one according to heuristics or by making
241 // cuDNN time every implementation and choose the best one.
242 if (CHOOSE_ALGO_TIME)
243 {
244 // Time the different implementations to choose the best one
245 int requestedCount = 1;
246 int count;
247 cudnnConvolutionFwdAlgoPerf_t choosen_algo_perf;
248 err = cudnnFindConvolutionForwardAlgorithm(_handle,
249 APPLY_SPECIFIC(input),
250 APPLY_SPECIFIC(kerns),
251 desc,
252 APPLY_SPECIFIC(output),
253 requestedCount,
254 &count,
255 &choosen_algo_perf);
256 if (err != CUDNN_STATUS_SUCCESS) {
257 PyErr_Format(PyExc_RuntimeError,
258 "GpuDnnConv: error selecting convolution algo: %s",
259 cudnnGetErrorString(err));
260 return 1;
261 }
262
263 chosen_algo = choosen_algo_perf.algo;
264 }
265 else
266 {
267 // The implementation should be chosen using heuristics based on the
268 // input shapes and the amount of memory available.
269
270 // Get the amount of available memory
271 size_t free = 0, total = 0;
272 cudaError_t err2 = cudaMemGetInfo(&free, &total);
273 if (err2 != cudaSuccess){
274 cudaGetLastError();
275 fprintf(stderr,
276 "Error when trying to find the memory information"
277 " on the GPU: %s\n", cudaGetErrorString(err2));
278 return 1;
279 }
280
281 // Use heuristics to choose the implementation
282 err = cudnnGetConvolutionForwardAlgorithm(_handle,
283 APPLY_SPECIFIC(input),
284 APPLY_SPECIFIC(kerns),
285 desc,
286 APPLY_SPECIFIC(output),
287 CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
288 free,
289 &chosen_algo);
290
291 if (err != CUDNN_STATUS_SUCCESS) {
292 PyErr_Format(PyExc_RuntimeError,
293 "GpuDnnConv: error selecting convolution algo: %s",
294 cudnnGetErrorString(err));
295 return 1;
296 }
297 }
298
299 // Store the shapes of the inputs and kernels as well as the chosen
300 // algorithm for future use.
301 APPLY_SPECIFIC(previous_algo) = chosen_algo;
302 APPLY_SPECIFIC(previous_algo_set) = true;
303 for (int i = 0; i < nb_dim; i++)
304 {
305 APPLY_SPECIFIC(previous_input_shape)[i] =
306 CudaNdarray_HOST_DIMS(input)[i];
307 APPLY_SPECIFIC(previous_kerns_shape)[i] =
308 CudaNdarray_HOST_DIMS(kerns)[i];
309 }
310 }
311 else
312 {
313 // Reuse the previously chosen convolution implementation
314 chosen_algo = APPLY_SPECIFIC(previous_algo);
315 }
316 }
317 else
318 {
319 chosen_algo = CONV_ALGO;
320 }
321
322 if (0){
323 char * a;
324 switch(chosen_algo){
325 case CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM:
326 a = "implicit gemm (0)";
327 break;
328 case CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM:
329 a = "precomp gemm (1)";
330 break;
331 case CUDNN_CONVOLUTION_FWD_ALGO_GEMM:
332 a = "gemm (2)";
333 break;
334 case CUDNN_CONVOLUTION_FWD_ALGO_DIRECT:
335 a = "direct (3)";
336 break;
337 case CUDNN_CONVOLUTION_FWD_ALGO_FFT:
338 a = "fft (4)";
339 break;
340 case CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING:
341 a = "fft tiling (5)";
342 break;
343 #if CUDNN_VERSION > 5000
344 case CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD:
345 a = "winograd (6)";
346 break;
347 #endif
348 }
349 printf("GpuDNNConv: algo %s\n", a);
350 }
351
352 // The FFT implementation (only in V3 and onward) does not support strides,
353 // 1x1 filters or inputs with a spatial dimension larger than 1024.
354 // The tiled-FFT implementation (only in V4 onward) does not support
355 // strides.
356 // If the chosen implementation is FFT or tiled-FFT, validate that it can
357 // be used on the current data and default on a safe implementation if it
358 // can't.
359 // Following code is 2d-specific, but it is fine as FFT and tiled-FFT are
360 // defined only for 2d-filters
361 if ((chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
362 chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && nb_dim == 4)
363 {
364
365 // Extract the properties of the convolution descriptor
366 int nd;
367 int pad[2];
368 int stride[2];
369 int upscale[2];
370 cudnnConvolutionMode_t mode;
371 cudnnDataType_t data_type;
372 err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
373 upscale, &mode, &data_type);
374
375 if (err != CUDNN_STATUS_SUCCESS) {
376 PyErr_Format(PyExc_RuntimeError,
377 "GpuDnnConv: error getting convolution properties: %s",
378 cudnnGetErrorString(err));
379 return 1;
380 }
381
382 // Extract the spatial size of the filters
383 int filter_h = CudaNdarray_HOST_DIMS(kerns)[2];
384 int filter_w = CudaNdarray_HOST_DIMS(kerns)[3];
385
386 // Extract the spatial size of the input
387 int input_h = CudaNdarray_HOST_DIMS(input)[2];
388 int input_w = CudaNdarray_HOST_DIMS(input)[3];
389
390 // Ensure that the selected implementation supports the requested
391 // convolution. Fall back to a safe implementation otherwise.
392 if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
393 {
394 if (stride[0] != 1 || stride[1] != 1 || input_h > 1024 ||
395 input_w > 1024 || (filter_h == 1 && filter_w == 1))
396 {
397 chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
398 }
399 }
400 else
401 {
402 // chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
403 if (stride[0] != 1 || stride[1] != 1)
404 {
405 chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
406 }
407 }
408 }
409
410 err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
411 APPLY_SPECIFIC(input),
412 APPLY_SPECIFIC(kerns),
413 desc,
414 APPLY_SPECIFIC(output),
415 chosen_algo,
416 &worksize);
417 if (err == CUDNN_STATUS_NOT_SUPPORTED) {
418 // Fallback to none algo if not supported
419 // TODO: Print a warning
420 chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
421
422 err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
423 APPLY_SPECIFIC(input),
424 APPLY_SPECIFIC(kerns),
425 desc,
426 APPLY_SPECIFIC(output),
427 chosen_algo,
428 &worksize);
429 }
430 if (err != CUDNN_STATUS_SUCCESS) {
431 PyErr_Format(PyExc_RuntimeError,
432 "GpuDnnConv: error getting worksize: %s",
433 cudnnGetErrorString(err));
434 return 1;
435 }
436 workspace = get_work_mem(worksize);
437 if (workspace == NULL && worksize != 0)
438 return 1;
439
440 err = cudnnConvolutionForward(
441 _handle,
442 (void )&alpha,
443 APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
444 APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
445 desc,
446 chosen_algo,
447 workspace, worksize,
448 (void )&beta,
449 APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output));
450 }
451 if (err != CUDNN_STATUS_SUCCESS) {
452 PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s",
453 cudnnGetErrorString(err));
454 return 1;
455 }
456 return 0;
457 }
458
459 #undef DTYPE_INPUT_0
460 #undef TYPENUM_INPUT_0
461 #undef ITEMSIZE_INPUT_0
462 #undef DTYPE_INPUT_1
463 #undef TYPENUM_INPUT_1
464 #undef ITEMSIZE_INPUT_1
465 #undef DTYPE_INPUT_2
466 #undef TYPENUM_INPUT_2
467 #undef ITEMSIZE_INPUT_2
468 #undef DTYPE_INPUT_4
469 #undef TYPENUM_INPUT_4
470 #undef ITEMSIZE_INPUT_4
471 #undef DTYPE_INPUT_5
472 #undef TYPENUM_INPUT_5
473 #undef ITEMSIZE_INPUT_5
474 #undef DTYPE_OUTPUT_0
475 #undef TYPENUM_OUTPUT_0
476 #undef ITEMSIZE_OUTPUT_0
477 #undef APPLY_SPECIFIC
478 #undef CONV_ALGO
479 #undef CHOOSE_ALGO
480 #undef CHOOSE_ALGO_ONCE
481 #undef CHOOSE_ALGO_TIME
482 #undef CONV_INPLACE
483
484 __struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b() {
485 // This is only somewhat safe because we:
486 // 1) Are not a virtual class
487 // 2) Do not use any virtual classes in the members
488 // 3) Deal with mostly POD and pointers
489
490 // If this changes, we would have to revise this, but for
491 // now I am tired of chasing segfaults because
492 // initialization code had an error and some pointer has
493 // a junk value.
494 memset(this, 0, sizeof(this));
495 }
496 ~__struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b(void) {
497 cleanup();
498 }
499
500 int init(PyObject __ERROR, PyObject storage_V3, PyObject storage_V5, PyObject storage_V7, PyObject storage_V9, PyObject storage_V11, PyObject storage_V13, PyObject storage_V1) {
501 Py_XINCREF(storage_V3);
502 Py_XINCREF(storage_V5);
503 Py_XINCREF(storage_V7);
504 Py_XINCREF(storage_V9);
505 Py_XINCREF(storage_V11);
506 Py_XINCREF(storage_V13);
507 Py_XINCREF(storage_V1);
508 this->storage_V3 = storage_V3;
509 this->storage_V5 = storage_V5;
510 this->storage_V7 = storage_V7;
511 this->storage_V9 = storage_V9;
512 this->storage_V11 = storage_V11;
513 this->storage_V13 = storage_V13;
514 this->storage_V1 = storage_V1;
515
516
517
518
519
520
521
522
523
524 #define DTYPE_INPUT_0 npy_float32
525 #define TYPENUM_INPUT_0 11
526 #define ITEMSIZE_INPUT_0 4
527 #define DTYPE_INPUT_1 npy_float32
528 #define TYPENUM_INPUT_1 11
529 #define ITEMSIZE_INPUT_1 4
530 #define DTYPE_INPUT_2 npy_float32
531 #define TYPENUM_INPUT_2 11
532 #define ITEMSIZE_INPUT_2 4
533 #define DTYPE_INPUT_4 npy_float32
534 #define TYPENUM_INPUT_4 11
535 #define ITEMSIZE_INPUT_4 4
536 #define DTYPE_INPUT_5 npy_float32
537 #define TYPENUM_INPUT_5 11
538 #define ITEMSIZE_INPUT_5 4
539 #define DTYPE_OUTPUT_0 npy_float32
540 #define TYPENUM_OUTPUT_0 11
541 #define ITEMSIZE_OUTPUT_0 4
542 #define APPLY_SPECIFIC(str) str##_node_md48cd7c806151b0105e1fa2b573cc03b_0
543 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
544 #define CHOOSE_ALGO 0
545 #define CHOOSE_ALGO_ONCE 0
546 #define CHOOSE_ALGO_TIME 0
547 #define CONV_INPLACE 1
548 #define FAIL {
549 if (!PyErr_Occurred()) {
550 PyErr_SetString(PyExc_RuntimeError,
551 "Unexpected error in an Op's C code. "
552 "No Python exception was set.");
553 }
554 return 15;
555 }
556
557
558 cudnnStatus_t APPLY_SPECIFIC(err);
559 APPLY_SPECIFIC(input) = NULL;
560 APPLY_SPECIFIC(output) = NULL;
561 APPLY_SPECIFIC(kerns) = NULL;
562 if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS) {
563 PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
564 "(inp): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
565 FAIL;
566 }
567 if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) != CUDNN_STATUS_SUCCESS) {
568 PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
569 "(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
570 FAIL;
571 }
572 if ((APPLY_SPECIFIC(err) = cudnnCreateFilterDescriptor(&APPLY_SPECIFIC(kerns))) != CUDNN_STATUS_SUCCESS) {
573 PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: %s",
574 cudnnGetErrorString(APPLY_SPECIFIC(err)));
575 FAIL;
576 }
577
578 for (int i = 0; i < 5; i++)
579 {
580 APPLY_SPECIFIC(previous_input_shape)[i] = 0;
581 APPLY_SPECIFIC(previous_kerns_shape)[i] = 0;
582 APPLY_SPECIFIC(previous_output_shape)[i] = 0;
583 }
584
585 APPLY_SPECIFIC(previous_algo_set) = false;
586
587 // Select default implementations for the case where the convolution
588 // implementations should be selected based on the size of the data.
589 APPLY_SPECIFIC(previous_algo) = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
590 APPLY_SPECIFIC(previous_bwd_f_algo) = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
591 APPLY_SPECIFIC(previous_bwd_d_algo) = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
592
593
594 #undef FAIL
595 #undef DTYPE_INPUT_0
596 #undef TYPENUM_INPUT_0
597 #undef ITEMSIZE_INPUT_0
598 #undef DTYPE_INPUT_1
599 #undef TYPENUM_INPUT_1
600 #undef ITEMSIZE_INPUT_1
601 #undef DTYPE_INPUT_2
602 #undef TYPENUM_INPUT_2
603 #undef ITEMSIZE_INPUT_2
604 #undef DTYPE_INPUT_4
605 #undef TYPENUM_INPUT_4
606 #undef ITEMSIZE_INPUT_4
607 #undef DTYPE_INPUT_5
608 #undef TYPENUM_INPUT_5
609 #undef ITEMSIZE_INPUT_5
610 #undef DTYPE_OUTPUT_0
611 #undef TYPENUM_OUTPUT_0
612 #undef ITEMSIZE_OUTPUT_0
613 #undef APPLY_SPECIFIC
614 #undef CONV_ALGO
615 #undef CHOOSE_ALGO
616 #undef CHOOSE_ALGO_ONCE
617 #undef CHOOSE_ALGO_TIME
618 #undef CONV_INPLACE
619 this->__ERROR = __ERROR;
620 return 0;
621 }
622 void cleanup(void) {
623 __label_1:
624
625 double __DUMMY_1;
626 __label_3:
627
628 double __DUMMY_3;
629 __label_5:
630
631 double __DUMMY_5;
632 __label_7:
633
634 double __DUMMY_7;
635 __label_9:
636
637 double __DUMMY_9;
638 __label_11:
639
640 double __DUMMY_11;
641 __label_13:
642
643 double __DUMMY_13;
644 __label_16:
645
646 #define DTYPE_INPUT_0 npy_float32
647 #define TYPENUM_INPUT_0 11
648 #define ITEMSIZE_INPUT_0 4
649 #define DTYPE_INPUT_1 npy_float32
650 #define TYPENUM_INPUT_1 11
651 #define ITEMSIZE_INPUT_1 4
652 #define DTYPE_INPUT_2 npy_float32
653 #define TYPENUM_INPUT_2 11
654 #define ITEMSIZE_INPUT_2 4
655 #define DTYPE_INPUT_4 npy_float32
656 #define TYPENUM_INPUT_4 11
657 #define ITEMSIZE_INPUT_4 4
658 #define DTYPE_INPUT_5 npy_float32
659 #define TYPENUM_INPUT_5 11
660 #define ITEMSIZE_INPUT_5 4
661 #define DTYPE_OUTPUT_0 npy_float32
662 #define TYPENUM_OUTPUT_0 11
663 #define ITEMSIZE_OUTPUT_0 4
664 #define APPLY_SPECIFIC(str) str##_node_md48cd7c806151b0105e1fa2b573cc03b_0
665 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
666 #define CHOOSE_ALGO 0
667 #define CHOOSE_ALGO_ONCE 0
668 #define CHOOSE_ALGO_TIME 0
669 #define CONV_INPLACE 1
670
671
672 if (APPLY_SPECIFIC(input) != NULL)
673 cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input));
674 if (APPLY_SPECIFIC(output) != NULL)
675 cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output));
676 if (APPLY_SPECIFIC(kerns) != NULL)
677 cudnnDestroyFilterDescriptor(APPLY_SPECIFIC(kerns));
678
679 #undef DTYPE_INPUT_0
680 #undef TYPENUM_INPUT_0
681 #undef ITEMSIZE_INPUT_0
682 #undef DTYPE_INPUT_1
683 #undef TYPENUM_INPUT_1
684 #undef ITEMSIZE_INPUT_1
685 #undef DTYPE_INPUT_2
686 #undef TYPENUM_INPUT_2
687 #undef ITEMSIZE_INPUT_2
688 #undef DTYPE_INPUT_4
689 #undef TYPENUM_INPUT_4
690 #undef ITEMSIZE_INPUT_4
691 #undef DTYPE_INPUT_5
692 #undef TYPENUM_INPUT_5
693 #undef ITEMSIZE_INPUT_5
694 #undef DTYPE_OUTPUT_0
695 #undef TYPENUM_OUTPUT_0
696 #undef ITEMSIZE_OUTPUT_0
697 #undef APPLY_SPECIFIC
698 #undef CONV_ALGO
699 #undef CHOOSE_ALGO
700 #undef CHOOSE_ALGO_ONCE
701 #undef CHOOSE_ALGO_TIME
702 #undef CONV_INPLACE
703 double __DUMMY_16;
704
705 Py_XDECREF(this->storage_V3);
706 Py_XDECREF(this->storage_V5);
707 Py_XDECREF(this->storage_V7);
708 Py_XDECREF(this->storage_V9);
709 Py_XDECREF(this->storage_V11);
710 Py_XDECREF(this->storage_V13);
711 Py_XDECREF(this->storage_V1);
712 }
713 int run(void) {
714 int __failure = 0;
715
716 PyObject py_V1;
717 CudaNdarray * V1;
718 PyObject py_V3;
719 CudaNdarray * V3;
720 PyObject py_V5;
721 CudaNdarray * V5;
722 PyObject py_V7;
723 CudaNdarray * V7;
724 PyObject py_V9;
725
726 cudnnConvolutionDescriptor_t V9;
727
728 PyObject py_V11;
729
730 typedef npy_float32 dtype_V11;
731
732 npy_float32 V11;
733
734 PyObject py_V13;
735
736 typedef npy_float32 dtype_V13;
737
738 npy_float32 V13;
739
740 {
741
742 py_V1 = PyList_GET_ITEM(storage_V1, 0);
743 {Py_XINCREF(py_V1);}
744
745 if (py_V1 == Py_None)
746 {
747 V1 = NULL;
748 }
749 else
750 {
751
752 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
753 // and one ref from the local scope.
754
755 if (CudaNdarray_Check(py_V1))
756 {
757 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
758 V1 = (CudaNdarray)py_V1;
759 //std::cerr << "c_extract " << V1 << '\n';
760
761
762 if (V1->nd != 4)
763 {
764 PyErr_Format(PyExc_RuntimeError,
765 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 4",
766 V1->nd);
767 V1 = NULL;
768 {
769 __failure = 2;
770 if (!PyErr_Occurred()) {
771 PyErr_SetString(PyExc_RuntimeError,
772 "Unexpected error in an Op's C code. "
773 "No Python exception was set.");
774 }
775 goto __label_2;};
776 }
777 //std::cerr << "c_extract " << V1 << " nd check passed\n";
778
779
780 assert(V1);
781 Py_INCREF(py_V1);
782 }
783 else if (py_V1 == Py_None)
784 {
785 PyErr_SetString(PyExc_TypeError,
786 "expected a CudaNdarray, not None");
787 V1 = NULL;
788 {
789 __failure = 2;
790 if (!PyErr_Occurred()) {
791 PyErr_SetString(PyExc_RuntimeError,
792 "Unexpected error in an Op's C code. "
793 "No Python exception was set.");
794 }
795 goto __label_2;};
796 }
797 else
798 {
799 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
800 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
801 V1 = NULL;
802 {
803 __failure = 2;
804 if (!PyErr_Occurred()) {
805 PyErr_SetString(PyExc_RuntimeError,
806 "Unexpected error in an Op's C code. "
807 "No Python exception was set.");
808 }
809 goto __label_2;};
810 }
811 //std::cerr << "c_extract done " << V1 << '\n';
812
813
814 }
815
816 {
817
818 py_V3 = PyList_GET_ITEM(storage_V3, 0);
819 {Py_XINCREF(py_V3);}
820
821 assert(py_V3->ob_refcnt >= 2); // There should be at least one ref from the container object,
822 // and one ref from the local scope.
823
824 if (CudaNdarray_Check(py_V3))
825 {
826 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt));
827 V3 = (CudaNdarray)py_V3;
828 //std::cerr << "c_extract " << V3 << '\n';
829
830
831 if (V3->nd != 4)
832 {
833 PyErr_Format(PyExc_RuntimeError,
834 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 4",
835 V3->nd);
836 V3 = NULL;
837 {
838 __failure = 4;
839 if (!PyErr_Occurred()) {
840 PyErr_SetString(PyExc_RuntimeError,
841 "Unexpected error in an Op's C code. "
842 "No Python exception was set.");
843 }
844 goto __label_4;};
845 }
846 //std::cerr << "c_extract " << V3 << " nd check passed\n";
847
848
849 assert(V3);
850 Py_INCREF(py_V3);
851 }
852 else if (py_V3 == Py_None)
853 {
854 PyErr_SetString(PyExc_TypeError,
855 "expected a CudaNdarray, not None");
856 V3 = NULL;
857 {
858 __failure = 4;
859 if (!PyErr_Occurred()) {
860 PyErr_SetString(PyExc_RuntimeError,
861 "Unexpected error in an Op's C code. "
862 "No Python exception was set.");
863 }
864 goto __label_4;};
865 }
866 else
867 {
868 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt));
869 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
870 V3 = NULL;
871 {
872 __failure = 4;
873 if (!PyErr_Occurred()) {
874 PyErr_SetString(PyExc_RuntimeError,
875 "Unexpected error in an Op's C code. "
876 "No Python exception was set.");
877 }
878 goto __label_4;};
879 }
880 //std::cerr << "c_extract done " << V3 << '\n';
881
882
883 {
884
885 py_V5 = PyList_GET_ITEM(storage_V5, 0);
886 {Py_XINCREF(py_V5);}
887
888 assert(py_V5->ob_refcnt >= 2); // There should be at least one ref from the container object,
889 // and one ref from the local scope.
890
891 if (CudaNdarray_Check(py_V5))
892 {
893 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V5, (py_V5->ob_refcnt));
894 V5 = (CudaNdarray)py_V5;
895 //std::cerr << "c_extract " << V5 << '\n';
896
897
898 if (V5->nd != 4)
899 {
900 PyErr_Format(PyExc_RuntimeError,
901 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 4",
902 V5->nd);
903 V5 = NULL;
904 {
905 __failure = 6;
906 if (!PyErr_Occurred()) {
907 PyErr_SetString(PyExc_RuntimeError,
908 "Unexpected error in an Op's C code. "
909 "No Python exception was set.");
910 }
911 goto __label_6;};
912 }
913 //std::cerr << "c_extract " << V5 << " nd check passed\n";
914
915
916 assert(V5);
917 Py_INCREF(py_V5);
918 }
919 else if (py_V5 == Py_None)
920 {
921 PyErr_SetString(PyExc_TypeError,
922 "expected a CudaNdarray, not None");
923 V5 = NULL;
924 {
925 __failure = 6;
926 if (!PyErr_Occurred()) {
927 PyErr_SetString(PyExc_RuntimeError,
928 "Unexpected error in an Op's C code. "
929 "No Python exception was set.");
930 }
931 goto __label_6;};
932 }
933 else
934 {
935 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V5, (py_V5->ob_refcnt));
936 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
937 V5 = NULL;
938 {
939 __failure = 6;
940 if (!PyErr_Occurred()) {
941 PyErr_SetString(PyExc_RuntimeError,
942 "Unexpected error in an Op's C code. "
943 "No Python exception was set.");
944 }
945 goto __label_6;};
946 }
947 //std::cerr << "c_extract done " << V5 << '\n';
948
949
950 {
951
952 py_V7 = PyList_GET_ITEM(storage_V7, 0);
953 {Py_XINCREF(py_V7);}
954
955 assert(py_V7->ob_refcnt >= 2); // There should be at least one ref from the container object,
956 // and one ref from the local scope.
957
958 if (CudaNdarray_Check(py_V7))
959 {
960 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V7, (py_V7->ob_refcnt));
961 V7 = (CudaNdarray)py_V7;
962 //std::cerr << "c_extract " << V7 << '\n';
963
964
965 if (V7->nd != 4)
966 {
967 PyErr_Format(PyExc_RuntimeError,
968 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 4",
969 V7->nd);
970 V7 = NULL;
971 {
972 __failure = 8;
973 if (!PyErr_Occurred()) {
974 PyErr_SetString(PyExc_RuntimeError,
975 "Unexpected error in an Op's C code. "
976 "No Python exception was set.");
977 }
978 goto __label_8;};
979 }
980 //std::cerr << "c_extract " << V7 << " nd check passed\n";
981
982
983 assert(V7);
984 Py_INCREF(py_V7);
985 }
986 else if (py_V7 == Py_None)
987 {
988 PyErr_SetString(PyExc_TypeError,
989 "expected a CudaNdarray, not None");
990 V7 = NULL;
991 {
992 __failure = 8;
993 if (!PyErr_Occurred()) {
994 PyErr_SetString(PyExc_RuntimeError,
995 "Unexpected error in an Op's C code. "
996 "No Python exception was set.");
997 }
998 goto __label_8;};
999 }
1000 else
1001 {
1002 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V7, (py_V7->ob_refcnt));
1003 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
1004 V7 = NULL;
1005 {
1006 __failure = 8;
1007 if (!PyErr_Occurred()) {
1008 PyErr_SetString(PyExc_RuntimeError,
1009 "Unexpected error in an Op's C code. "
1010 "No Python exception was set.");
1011 }
1012 goto __label_8;};
1013 }
1014 //std::cerr << "c_extract done " << V7 << '\n';
1015
1016
1017 {
1018
1019 py_V9 = PyList_GET_ITEM(storage_V9, 0);
1020 {Py_XINCREF(py_V9);}
1021
1022 V9 = (cudnnConvolutionDescriptor_t)PyCapsule_GetPointer(py_V9, NULL);
1023 if (V9 == NULL) {
1024 __failure = 10;
1025 if (!PyErr_Occurred()) {
1026 PyErr_SetString(PyExc_RuntimeError,
1027 "Unexpected error in an Op's C code. "
1028 "No Python exception was set.");
1029 }
1030 goto __label_10;}
1031
1032 {
1033
1034 py_V11 = PyList_GET_ITEM(storage_V11, 0);
1035 {Py_XINCREF(py_V11);}
1036
1037 if (!PyObject_TypeCheck(py_V11, &PyFloat32ArrType_Type))
1038 {
1039 PyErr_Format(PyExc_ValueError,
1040 "Scalar check failed (npy_float32)");
1041 {
1042 __failure = 12;
1043 if (!PyErr_Occurred()) {
1044 PyErr_SetString(PyExc_RuntimeError,
1045 "Unexpected error in an Op's C code. "
1046 "No Python exception was set.");
1047 }
1048 goto __label_12;}
1049 }
1050
1051 PyArray_ScalarAsCtype(py_V11, &V11);
1052
1053 {
1054
1055 py_V13 = PyList_GET_ITEM(storage_V13, 0);
1056 {Py_XINCREF(py_V13);}
1057
1058 if (!PyObject_TypeCheck(py_V13, &PyFloat32ArrType_Type))
1059 {
1060 PyErr_Format(PyExc_ValueError,
1061 "Scalar check failed (npy_float32)");
1062 {
1063 __failure = 14;
1064 if (!PyErr_Occurred()) {
1065 PyErr_SetString(PyExc_RuntimeError,
1066 "Unexpected error in an Op's C code. "
1067 "No Python exception was set.");
1068 }
1069 goto __label_14;}
1070 }
1071
1072 PyArray_ScalarAsCtype(py_V13, &V13);
1073
1074 {
1075 // Op class GpuDnnConv
1076
1077 #define APPLY_SPECIFIC(str) str##_node_md48cd7c806151b0105e1fa2b573cc03b_0
1078 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
1079 #define CHOOSE_ALGO 0
1080 #define CHOOSE_ALGO_ONCE 0
1081 #define CHOOSE_ALGO_TIME 0
1082 #define CONV_INPLACE 1
1083 {
1084 if (APPLY_SPECIFIC(conv_fwd)(V3, V5, V7, V9, V11, V13, &V1) != 0) {
1085 {
1086 __failure = 15;
1087 if (!PyErr_Occurred()) {
1088 PyErr_SetString(PyExc_RuntimeError,
1089 "Unexpected error in an Op's C code. "
1090 "No Python exception was set.");
1091 }
1092 goto __label_15;}
1093 }
1094 }
1095 #undef APPLY_SPECIFIC
1096 #undef CONV_ALGO
1097 #undef CHOOSE_ALGO
1098 #undef CHOOSE_ALGO_ONCE
1099 #undef CHOOSE_ALGO_TIME
1100 #undef CONV_INPLACE
1101 __label_15:
1102
1103 double __DUMMY_15;
1104
1105 }
1106 __label_14:
1107
1108 {Py_XDECREF(py_V13);}
1109
1110 double __DUMMY_14;
1111
1112 }
1113 __label_12:
1114
1115 {Py_XDECREF(py_V11);}
1116
1117 double __DUMMY_12;
1118
1119 }
1120 __label_10:
1121
1122 {Py_XDECREF(py_V9);}
1123
1124 double __DUMMY_10;
1125
1126 }
1127 __label_8:
1128
1129 //std::cerr << "cleanup " << py_V7 << " " << V7 << "\n";
1130 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V7, (py_V7->ob_refcnt));
1131 if (V7)
1132 {
1133 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V7, (V7->ob_refcnt));
1134 Py_XDECREF(V7);
1135 }
1136 //std::cerr << "cleanup done" << py_V7 << "\n";
1137
1138 {Py_XDECREF(py_V7);}
1139
1140 double __DUMMY_8;
1141
1142 }
1143 __label_6:
1144
1145 //std::cerr << "cleanup " << py_V5 << " " << V5 << "\n";
1146 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V5, (py_V5->ob_refcnt));
1147 if (V5)
1148 {
1149 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V5, (V5->ob_refcnt));
1150 Py_XDECREF(V5);
1151 }
1152 //std::cerr << "cleanup done" << py_V5 << "\n";
1153
1154 {Py_XDECREF(py_V5);}
1155
1156 double __DUMMY_6;
1157
1158 }
1159 __label_4:
1160
1161 //std::cerr << "cleanup " << py_V3 << " " << V3 << "\n";
1162 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt));
1163 if (V3)
1164 {
1165 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V3, (V3->ob_refcnt));
1166 Py_XDECREF(V3);
1167 }
1168 //std::cerr << "cleanup done" << py_V3 << "\n";
1169
1170 {Py_XDECREF(py_V3);}
1171
1172 double __DUMMY_4;
1173
1174 }
1175 __label_2:
1176
1177 if (!__failure) {
1178
1179 //std::cerr << "sync\n";
1180 if (NULL == V1) {
1181 // failure: sync None to storage
1182 Py_XDECREF(py_V1);
1183 py_V1 = Py_None;
1184 Py_INCREF(py_V1);
1185 }
1186 else
1187 {
1188 if (py_V1 != (PyObject*)V1)
1189 {
1190 Py_XDECREF(py_V1);
1191 py_V1 = (PyObject*)V1;
1192 Py_INCREF(py_V1);
1193 }
1194 assert(py_V1->ob_refcnt);
1195 }
1196
1197 PyObject* old = PyList_GET_ITEM(storage_V1, 0);
1198 {Py_XINCREF(py_V1);}
1199 PyList_SET_ITEM(storage_V1, 0, py_V1);
1200 {Py_XDECREF(old);}
1201 }
1202
1203 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
1204 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
1205 if (V1)
1206 {
1207 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
1208 Py_XDECREF(V1);
1209 }
1210 //std::cerr << "cleanup done" << py_V1 << "\n";
1211
1212 {Py_XDECREF(py_V1);}
1213
1214 double __DUMMY_2;
1215
1216 }
1217
1218
1219 if (__failure) {
1220 // When there is a failure, this code puts the exception
1221 // in __ERROR.
1222 PyObject* err_type = NULL;
1223 PyObject* err_msg = NULL;
1224 PyObject* err_traceback = NULL;
1225 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
1226 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
1227 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
1228 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
1229 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
1230 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
1231 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
1232 PyList_SET_ITEM(__ERROR, 0, err_type);
1233 PyList_SET_ITEM(__ERROR, 1, err_msg);
1234 PyList_SET_ITEM(__ERROR, 2, err_traceback);
1235 {Py_XDECREF(old_err_type);}
1236 {Py_XDECREF(old_err_msg);}
1237 {Py_XDECREF(old_err_traceback);}
1238 }
1239 // The failure code is returned to index what code block failed.
1240 return __failure;
1241
1242 }
1243 };
1244 }
1245
1246
1247 static int __struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b_executor(__struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b *self) {
1248 return self->run();
1249 }
1250
1251 static void __struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b_destructor(PyObject *capsule) {
1252 __struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b self = (__struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b )PyCapsule_GetContext(capsule);
1253 delete self;
1254 }
1255
1256 //////////////////////
1257 //// Functions
1258 //////////////////////
1259 static PyObject * instantiate(PyObject * self, PyObject argtuple) {
1260 assert(PyTuple_Check(argtuple));
1261 if (8 != PyTuple_Size(argtuple)){
1262 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 8, got %i", (int)PyTuple_Size(argtuple));
1263 return NULL;
1264 }
1265 __struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b struct_ptr = new __struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b();
1266 if (struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4),PyTuple_GET_ITEM(argtuple, 5),PyTuple_GET_ITEM(argtuple, 6),PyTuple_GET_ITEM(argtuple, 7) ) != 0) {
1267 delete struct_ptr;
1268 return NULL;
1269 }
1270 PyObject thunk = PyCapsule_New((void)(&__struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b_executor), NULL, __struct_compiled_op_md48cd7c806151b0105e1fa2b573cc03b_destructor);
1271 if (thunk != NULL && PyCapsule_SetContext(thunk, struct_ptr) != 0) {
1272 PyErr_Clear();
1273 Py_DECREF(thunk);
1274 thunk = NULL;
1275 }
1276
1277 return thunk; }
1278
1279 //////////////////////
1280 //// Module init
1281 //////////////////////
1282 static PyMethodDef MyMethods[] = {
1283 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
1284 {NULL, NULL, 0, NULL}
1285 };
1286 static struct PyModuleDef moduledef = {
1287 PyModuleDef_HEAD_INIT,
1288 "md48cd7c806151b0105e1fa2b573cc03b",
1289 NULL,
1290 -1,
1291 MyMethods,
1292 };
1293
1294 PyMODINIT_FUNC PyInit_md48cd7c806151b0105e1fa2b573cc03b(void) {
1295 import_array();
1296
1297
1298 {
1299 cudnnStatus_t err;
1300 if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
1301 PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s",
1302 cudnnGetErrorString(err));
1303 #if PY_MAJOR_VERSION >= 3
1304 return NULL;
1305 #else
1306 return;
1307 #endif
1308 }
1309 }
1310
1311 PyObject *m = PyModule_Create(&moduledef);
1312 return m;
1313 }
1314
===============================
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\theano\sandbox\cuda\cuda_ndarray.cuh(17): warning C4005: 'PyString_Check': macro redefinition
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\numpy\core\include\numpy/npy_3kcompat.h(71): note: see previous definition of 'PyString_Check'
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\theano\sandbox\cuda\cuda_ndarray.cuh(18): warning C4005: 'PyString_FromString': macro redefinition
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\numpy\core\include\numpy/npy_3kcompat.h(73): note: see previous definition of 'PyString_FromString'
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\theano\sandbox\cuda\cuda_ndarray.cuh(19): warning C4005: 'PyString_AsString': macro redefinition
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\numpy\core\include\numpy/npy_3kcompat.h(80): note: see previous definition of 'PyString_AsString'
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\theano\sandbox\cuda\cuda_ndarray.cuh(20): warning C4005: 'PyString_FromStringAndSize': macro redefinition
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\numpy\core\include\numpy/npy_3kcompat.h(74): note: see previous definition of 'PyString_FromStringAndSize'
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\theano\sandbox\cuda\cuda_ndarray.cuh(21): warning C4005: 'PyString_Size': macro redefinition
C:\Users\Dell\Anaconda3\envs\tensorflow-gpu\lib\site-packages\numpy\core\include\numpy/npy_3kcompat.h(82): note: see previous definition of 'PyString_Size'
mod.cu(77): error: identifier "cudnnSetFilterNdDescriptor_v4" is undefined