Comments (6)
@qinxianglinya 时间这里你能描述的再清楚一点吗?cudaMemcpyAsync是异步拷贝数据到显存,因为是异步所以函数会立即执行完不阻塞。后面的cudaStreamSynchronize是用于同步的,等待执行完成。所以你统计出来的26 ms基本就是执行时间。还是不理解的话你可以查下那两个cuda函数的作用
from yolo-tensorrt.
下面是我打印的时间信息,不知道是不是打印的有问题。
clock_t start = clock();
m_Context->enqueue(batchSize, m_DeviceBuffers.data(), m_CudaStream, nullptr);
clock_t end = clock();
std::cout << "infer time:" << (float)(end - start) << "ms" << std::endl;
std::cout << m_OutputTensors.size() << std::endl;
clock_t start1 = clock();
for (auto& tensor : m_OutputTensors)
{
NV_CUDA_CHECK(cudaMemcpyAsync(tensor.hostBuffer, m_DeviceBuffers.at(tensor.bindingIndex),
batchSize * tensor.volume * sizeof(float),
cudaMemcpyDeviceToHost, m_CudaStream));
}
clock_t end1 = clock();
std::cout << "gpu to cpu:" << float(end1 - start1) << "ms" << std:: endl;
clock_t start2 = clock();
cudaStreamSynchronize(m_CudaStream);
clock_t end2 = clock();
std::cout << "cudaStreamSynchronize time:" << float(end2 - start2) << "ms" <<std::endl;
from yolo-tensorrt.
Timer timer;
assert(batchSize <= m_BatchSize && "Image batch size exceeds TRT engines batch size");
NV_CUDA_CHECK(cudaMemcpyAsync(m_DeviceBuffers.at(m_InputBindingIndex), input,
batchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice,
m_CudaStream));
m_Context->enqueue(batchSize, m_DeviceBuffers.data(), m_CudaStream, nullptr);
for (auto& tensor : m_OutputTensors)
{
NV_CUDA_CHECK(cudaMemcpyAsync(tensor.hostBuffer, m_DeviceBuffers.at(tensor.bindingIndex),
batchSize * tensor.volume * sizeof(float),
cudaMemcpyDeviceToHost, m_CudaStream));
}
cudaStreamSynchronize(m_CudaStream);
timer.out("inference");
from yolo-tensorrt.
Timer timer; assert(batchSize <= m_BatchSize && "Image batch size exceeds TRT engines batch size"); NV_CUDA_CHECK(cudaMemcpyAsync(m_DeviceBuffers.at(m_InputBindingIndex), input, batchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice, m_CudaStream)); m_Context->enqueue(batchSize, m_DeviceBuffers.data(), m_CudaStream, nullptr); for (auto& tensor : m_OutputTensors) { NV_CUDA_CHECK(cudaMemcpyAsync(tensor.hostBuffer, m_DeviceBuffers.at(tensor.bindingIndex), batchSize * tensor.volume * sizeof(float), cudaMemcpyDeviceToHost, m_CudaStream)); } cudaStreamSynchronize(m_CudaStream); timer.out("inference");
ok,谢谢
from yolo-tensorrt.
@qinxianglinya 觉得有用点个start哈
from yolo-tensorrt.
@qinxianglinya 觉得有用点个start哈
嗯嗯
from yolo-tensorrt.
Related Issues (20)
- run engine error HOT 2
- Does this project have minimum CPU requirements? HOT 1
- 使用自己训练yolov5l模型,生成engine后检测锚框不准,在tensorrtx工程上可以正常运行,请问有可能是那部分的问题 HOT 1
- 关于dynamic input size
- where is attempt_download
- 关于yolov5s6减少类别至8的推理结果差异问题
- update yolov7 HOT 1
- 检测结果中id和真实目标的映射
- Are there any mirrors for the weight instead of "MEGA" host?
- 前处理和后处理的时间是不是太长了?
- how to find the corresponding version of yolov5?
- Explicit batch
- trt8 is not supply leaky? HOT 3
- trt8 maxpool的問題 HOT 3
- 检测结果解码时间长 HOT 8
- 有没有留一个专门函数,weights 转 engine 文件的
- 能够支持yolov3-tiny吗?
- yolo7 tiny? HOT 3
- YOLO v8
- Problems with yolov4 when loading engine second time
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from yolo-tensorrt.