cudamemcpyasync code example
Example: cudamemcpyasync
cudaStream_t streams[3];
for (int i = 0; i < 3; i++) {
cudaStreamCreate(&streams[i]);
}
cudaMemcpyAsync(h_array_1, d_array_1, size_1 * sizeof(int), cudaMemcpyDeviceToHost, streams[0]);
cudaMemcpyAsync(h_array_2, d_array_2, size_2 * sizeof(int), cudaMemcpyDeviceToHost, streams[1]);
cudaMemcpyAsync(h_array_3, d_array_3, size_3 * sizeof(int), cudaMemcpyDeviceToHost, streams[2]);
cudaDeviceSynchronize();
for (int i = 0; i < 3; i++) {
cudaStreamDestroy(streams[i]);
}