culaunchkernel.ptsz diff --git a/lib/DllAvUtil.h b/lib/DllAvUtil.h index e882cac..7afc9af 100644 --- a/lib/DllAvUtil.h +++ b/lib/DllAvUtil.h @@ -96,6 +96,7 @@ public: virtual int av_fifo_size(AVFifoBuffer *f) = 0; virtual int av_fifo_generic_read(AVFifoBuffer *f, void *dest, int buf_size, void (*func)(void*, void*, int)) = 0; virtual int av_fifo_generic_write(AVFifoBuffer *f, void *src, int size, int (*func)(void*, void*, int)) = 0; + virtual int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max) = 0; virtual char *av_strdup(const char *s)=0; }; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz @@ -167,6 +168,7 @@ class DllAvUtilBase : public DllDynamic, DllAvUtilInterface DEFINE_METHOD1(int, av_fifo_size, (AVFifoBuffer *p1)) DEFINE_METHOD4(int, av_fifo_generic_read, (AVFifoBuffer *p1, void *p2, int p3, void (*p4)(void*, void*, int))) DEFINE_METHOD4(int, av_fifo_generic_write, (AVFifoBuffer *p1, void *p2, int p3, int (*p4)(void*, void*, int))) + DEFINE_METHOD5(int, av_reduce, (int *p1, int *p2, int64_t p3, int64_t p4, int64_t p5)) DEFINE_METHOD1(char*, av_strdup, (const char *p1)) public: @@ -188,6 +190,7 @@ class DllAvUtilBase : public DllDynamic, DllAvUtilInterface culaunchkernel.ptsz How to get it? culaunchkernel.ptsz RESOLVE_METHOD(av_fifo_size) RESOLVE_METHOD(av_fifo_generic_read) RESOLVE_METHOD(av_fifo_generic_write) + RESOLVE_METHOD(av_reduce) RESOLVE_METHOD(av_strdup) END_METHOD_RESOLVE() }; diff --git a/project/VS2010Express/XBMC.vcxproj b/project/VS2010Express/XBMC.vcxproj index 24f3ea6..e269339 100644 --- a/project/VS2010Express/XBMC.vcxproj culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +++ b/project/VS2010Express/XBMC.vcxproj @@ -302,6 +302,13 @@ + + + + + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + @@ -1196,6 +1203,17 @@ + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + + + + + + + + + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz diff --git a/project/VS2010Express/XBMC.vcxproj.filters b/project/VS2010Express/XBMC.vcxproj.filters index 3a37750..770e509 100644 --- a/project/VS2010Express/XBMC.vcxproj.filters +++ b/project/VS2010Express/XBMC.vcxproj.filters @@ -238,6 +238,9 @@ {cea579fc-bdd7-499e-a6a6-07d681d1ab24} culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + + {2affa4cc-9f39-42d9-97cc-4f595a6c2aa9} + @@ -2493,6 +2496,27 @@ guilib culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + cores\dvdplayer\DVDCodecs\Video + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + cores\dvdplayer\DVDCodecs\Video\Cuda + @@ -4976,8 +5000,8 @@ threads - culaunchkernel.ptsz How to use it? culaunchkernel.ptsz - input + + input threads\platform @@ -5006,6 +5030,39 @@ guilib culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + cores\dvdplayer\DVDCodecs\Video + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda + + + cores\dvdplayer\DVDCodecs\Video\Cuda + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + cores\dvdplayer\DVDCodecs\Video\Cuda + diff --git a/xbmc/cores/dvdplayer/DVDCodecs/DVDFactoryCodec.cpp b/xbmc/cores/dvdplayer/DVDCodecs/DVDFactoryCodec.cpp index 03f6dcc..50ac74c 100644 --- a/xbmc/cores/dvdplayer/DVDCodecs/DVDFactoryCodec.cpp +++ b/xbmc/cores/dvdplayer/DVDCodecs/DVDFactoryCodec.cpp culaunchkernel.ptsz How to use it? culaunchkernel.ptsz @@ -37,6 +37,9 @@ #if defined(HAVE_LIBCRYSTALHD) #include "Video/DVDVideoCodecCrystalHD.h" #endif +#if defined(HAS_DX) +#include "Video/CUDA.h" +#endif #include "Audio/DVDAudioCodecFFmpeg.h" #include "Audio/DVDAudioCodecLibMad.h" #include "Audio/DVDAudioCodecPcm.h" culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz @@ -236,7 +239,10 @@ CDVDVideoCodec* CDVDFactoryCodec::CreateVideoCodec( CDVDStreamInfo &hint ) } } #endif - + //Cuda +#if defined(HAS_DX) + if( (pCodec = OpenCodec(new CUDA::CDVDVideoCodecCuda(), hint, options)) ) return pCodec; +#endif // try to decide if we want to try halfres decoding culaunchkernel.ptsz How to use it? culaunchkernel.ptsz #if !defined(_LINUX) && !defined(_WIN32) float pixelrate = (float)hint.width*hint.height*hint.fpsrate/hint.fpsscale; diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodec.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodec.h index 25ebcd7..3d1e9c7 100644 --- a/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodec.h +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodec.h @@ -32,6 +32,7 @@ #define FRAME_TYPE_B 3 #define FRAME_TYPE_D 4 culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +namespace CUDA { class CCuda; } namespace DXVA { class CProcessor; } namespace VAAPI { struct CHolder; } class CVDPAU; @@ -55,6 +56,9 @@ struct DVDVideoPicture BYTE* data[4]; // [4] = alpha channel, currently not used int iLineSize[4]; // [4] = alpha channel, currently not used }; + struct { + CUDA::CCuda* cuda; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + }; struct { DXVA::CProcessor* proc; int64_t proc_id; diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/CUDA.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/CUDA.cpp new file mode 100644 index 0000000..cc9ec35 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/CUDA.cpp @@ -0,0 +1,1256 @@ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +/* + * Copyright (C) 2005-2009 Team XBMC + * http://www.xbmc.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with XBMC; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +#ifdef HAS_DX + +#if (defined HAVE_CONFIG_H) && (!defined WIN32) + #include "config.h" +#elif defined(_WIN32) +#include "system.h" +#endif + +// setting that here because otherwise SampleFormat is defined to AVSampleFormat culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +// which we don't use here +#define FF_API_OLD_SAMPLE_FMT 0 + +#define RINT(x) ((x) >= 0 ? ((int)((x) + 0.5)) : ((int)((x) - 0.5))) + +#include +#include "CUDA.h" +#include "../../../../windowing/WindowingFactory.h" +#include "DVDStreamInfo.h" +#include "Cuda/MPEG2HeaderParser.h" culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +#include "Cuda/H264SequenceParser.h" +#include "Cuda/VC1HeaderParser.h" +#include "utils/SystemInfo.h" +#include "DllAvCodec.h" + +using namespace CUDA; + +static struct { + CodecID ffcodec; + cudaVideoCodec cudaCodec; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +} cuda_codecs[] = { + { CODEC_ID_MPEG1VIDEO, cudaVideoCodec_MPEG1 }, + { CODEC_ID_MPEG2VIDEO, cudaVideoCodec_MPEG2 }, + { CODEC_ID_VC1, cudaVideoCodec_VC1 }, + { CODEC_ID_H264, cudaVideoCodec_H264 }, + { CODEC_ID_MPEG4, cudaVideoCodec_MPEG4 }, +}; + +//////////////////////////////////////////////////////////////////////////////// +// Compatibility tables culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +//////////////////////////////////////////////////////////////////////////////// + +#define LEVEL_C_LOW_LIMIT 0x0A20 + +static DWORD LevelCBlacklist[] = { + 0x0A22, 0x0A67, // Geforce 315, no VDPAU at all + 0x0A68, 0x0A69, // Geforce G105M, only B + 0x0CA0, 0x0CA7, // Geforce GT 330, only A + 0x0CAC, // Geforce GT 220, no VDPAU + 0x10C3 // Geforce 8400GS, only A culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +}; + +static DWORD LevelCWhitelist[] = { + 0x06C0, // Geforce GTX 480 + 0x06C4, // Geforce GTX 465 + 0x06CA, // Geforce GTX 480M + 0x06CD, // Geforce GTX 470 + 0x08A5, // Geforce 320M + + 0x06D8, 0x06DC, // Quadro 6000 culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + 0x06D9, // Quadro 5000 + 0x06DA, // Quadro 5000M + 0x06DD, // Quadro 4000 + + 0x06D1, // Tesla C2050 / C2070 + 0x06D2, // Tesla M2070 + 0x06DE, // Tesla T20 Processor + 0x06DF, // Tesla M2070-Q +}; + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +static BOOL IsLevelC(DWORD deviceId) +{ + int idx = 0; + if (deviceId >= LEVEL_C_LOW_LIMIT) { + for(idx = 0; idx < sizeof(LevelCBlacklist); idx++) { + if (LevelCBlacklist[idx] == deviceId) + return FALSE; + } + return TRUE; + } else { culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + for(idx = 0; idx < sizeof(LevelCWhitelist); idx++) { + if (LevelCWhitelist[idx] == deviceId) + return TRUE; + } + return FALSE; + } +} + + +CDVDVideoCodecCuda::CDVDVideoCodecCuda() : CDVDVideoCodec() culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +{ + ZeroMemory(&cuda, sizeof(cuda)); + ZeroMemory(&m_VideoFormat, sizeof(m_VideoFormat)); + ZeroMemory(&m_DXVAExtendedFormat, sizeof(m_DXVAExtendedFormat)); + m_AccelDeintOutput = 0; + m_DeintTreatAsProgressive = 0; + m_DeintAggressive = 0; + m_bVDPAULevelC = FALSE; + m_cudaContext = 0; + m_cudaCtxLock = 0; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + m_hParser = 0; + m_hDecoder = 0; + m_hStream = 0; + m_bForceSequenceUpdate = FALSE; + m_bInterlaced =FALSE; + m_bFlushing =FALSE; + m_pbRawNV12 = NULL; + m_cRawNV12 = 0; + m_AVC1Converter = NULL; + m_dllAvUtil.Load(); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +} + +CDVDVideoCodecCuda::~CDVDVideoCodecCuda() +{ + Dispose(); +} + +bool CDVDVideoCodecCuda::DestroyDecoder(bool bFull) +{ + if (m_AVC1Converter) { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + SAFE_DELETE(m_AVC1Converter); + } + + if (m_hDecoder) { + cuda.cuvidDestroyDecoder(m_hDecoder); + m_hDecoder = 0; + } + + if (m_hParser) { + cuda.cuvidDestroyVideoParser(m_hParser); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + m_hParser = 0; + } + + if (m_hStream) { + cuda.cuStreamDestroy(m_hStream); + m_hStream = 0; + } + + if (m_pbRawNV12) { + cuda.cuMemFreeHost(m_pbRawNV12); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + m_pbRawNV12 = NULL; + m_cRawNV12 = 0; + } + + if(bFull) { + if (m_cudaCtxLock) { + cuda.cuvidCtxLockDestroy(m_cudaCtxLock); + m_cudaCtxLock = 0; + } + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + if (m_cudaContext) { + cuda.cuCtxDestroy(m_cudaContext); + m_cudaContext = 0; + } + + FreeLibrary(cuda.cudaLib); + FreeLibrary(cuda.cuvidLib); + } + + return true; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +} + +#define GET_PROC_EX(name, lib) \ + cuda.name = (t##name *)GetProcAddress(lib, #name); \ + if (cuda.name == NULL) { \ + CLog::Log(LOGERROR,"Failed to load function \"%s\"", TEXT(#name)); \ + return E_FAIL; \ + } + +#define GET_PROC_CUDA(name) GET_PROC_EX(name, cuda.cudaLib) culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +#define GET_PROC_CUVID(name) GET_PROC_EX(name, cuda.cuvidLib) + + +bool CDVDVideoCodecCuda::LoadCUDAFuncRefs() +{ + // Load CUDA functions + cuda.cudaLib = LoadLibrary("nvcuda.dll"); + if (cuda.cudaLib == NULL) + { + CLog::Log(LOGERROR,"Loading nvcuda.dll failed"); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + return false; + } + + GET_PROC_CUDA(cuInit); + GET_PROC_CUDA(cuCtxCreate); + GET_PROC_CUDA(cuCtxDestroy); + GET_PROC_CUDA(cuCtxPushCurrent); + GET_PROC_CUDA(cuCtxPopCurrent); + GET_PROC_CUDA(cuD3D9CtxCreate); + GET_PROC_CUDA(cuMemAllocHost); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + GET_PROC_CUDA(cuMemFreeHost); + GET_PROC_CUDA(cuMemcpyDtoH); + GET_PROC_CUDA(cuMemcpyDtoHAsync); + GET_PROC_CUDA(cuStreamCreate); + GET_PROC_CUDA(cuStreamDestroy); + GET_PROC_CUDA(cuStreamQuery); + GET_PROC_CUDA(cuDeviceGetCount); + GET_PROC_CUDA(cuDriverGetVersion); + GET_PROC_CUDA(cuDeviceGetName); + GET_PROC_CUDA(cuDeviceComputeCapability); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + GET_PROC_CUDA(cuDeviceGetAttribute); + + // Load CUVID function + cuda.cuvidLib = LoadLibrary("nvcuvid.dll"); + if (cuda.cuvidLib == NULL) + { + CLog::Log(LOGERROR,"Loading nvcuvid.dll failed"); + return false; + } + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + GET_PROC_CUVID(cuvidCtxLockCreate); + GET_PROC_CUVID(cuvidCtxLockDestroy); + GET_PROC_CUVID(cuvidCtxLock); + GET_PROC_CUVID(cuvidCtxUnlock); + GET_PROC_CUVID(cuvidCreateVideoParser); + GET_PROC_CUVID(cuvidParseVideoData); + GET_PROC_CUVID(cuvidDestroyVideoParser); + GET_PROC_CUVID(cuvidCreateDecoder); + GET_PROC_CUVID(cuvidDecodePicture); + GET_PROC_CUVID(cuvidDestroyDecoder); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + GET_PROC_CUVID(cuvidMapVideoFrame); + GET_PROC_CUVID(cuvidUnmapVideoFrame); + + return true; +} + +// Beginning of GPU Architecture definitions +static int _ConvertSMVer2CoresDrvApi(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = + { + { 0x10, 8 }, + { 0x11, 8 }, + { 0x12, 8 }, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + { 0x13, 8 }, + { 0x20, 32 }, + { 0x21, 48 }, + { 0x30, 192 }, + { -1, -1 } + }; + + int index = 0; + while (nGpuArchCoresPerSM[index].SM != -1) { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + return nGpuArchCoresPerSM[index].Cores; + } + index++; + } + printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor); + return -1; +} + +int CDVDVideoCodecCuda::GetMaxGflopsGraphicsDeviceId() +{ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CUdevice current_device = 0, max_perf_device = 0; + int device_count = 0, sm_per_multiproc = 0; + int max_compute_perf = 0, best_SM_arch = 0; + int major = 0, minor = 0, multiProcessorCount, clockRate; + int bTCC = 0, version; + char deviceName[256]; + + cuda.cuDeviceGetCount(&device_count); + if (device_count <= 0) + return -1; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + cuda.cuDriverGetVersion(&version); + + // Find the best major SM Architecture GPU device that are graphics devices + while ( current_device < device_count ) { + cuda.cuDeviceGetName(deviceName, 256, current_device); + cuda.cuDeviceComputeCapability(&major, &minor, current_device); + + if (version >= 3020) { + cuda.cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + } else { + // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1 + if (deviceName[0] == 'T') bTCC = 1; + } + if (!bTCC) { + if (major > 0 && major < 9999) { + best_SM_arch = std::max(best_SM_arch, major); + } + } + current_device++; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + } + + // Find the best CUDA capable GPU device + current_device = 0; + while( current_device < device_count ) { + cuda.cuDeviceGetAttribute(&multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, current_device); + cuda.cuDeviceGetAttribute(&clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, current_device); + cuda.cuDeviceComputeCapability(&major, &minor, current_device); + + if (version >= 3020) { culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + cuda.cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device); + } else { + // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1 + if (deviceName[0] == 'T') bTCC = 1; + } + + if (major == 9999 && minor == 9999) { + sm_per_multiproc = 1; + } else { + sm_per_multiproc = _ConvertSMVer2CoresDrvApi(major, minor); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + } + + // If this is a Tesla based GPU and SM 2.0, and TCC is disabled, this is a contendor + if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this + { + int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate; + if(compute_perf > max_compute_perf) { + // If we find GPU with SM major > 2, search only these + if (best_SM_arch > 2) { + // If our device = dest_SM_arch, then we pick this one culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + if (major == best_SM_arch) { + max_compute_perf = compute_perf; + max_perf_device = current_device; + } + } else { + max_compute_perf = compute_perf; + max_perf_device = current_device; + } + } + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +#ifdef _DEBUG + cuda.cuDeviceGetName(deviceName, 256, current_device); + CLog::Log(LOGINFO,"CUDA Device: %S, Compute: %d.%d, CUDA Cores: %d, Clock: %d MHz", deviceName, major, minor, multiProcessorCount * sm_per_multiproc, clockRate / 1000); +#endif + } + ++current_device; + } + return max_perf_device; +} + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +bool CDVDVideoCodecCuda::Init() +{ + CLog::Log(LOGINFO,"%s: Trying to open CUVID device",__FUNCTION__); + + CUresult cuStatus = CUDA_SUCCESS; + + if (!LoadCUDAFuncRefs()) + { + CLog::Log(LOGERROR,"Loading CUDA interfaces failed"); + return false; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + } + + cuStatus = cuda.cuInit(0); + if (cuStatus != CUDA_SUCCESS) + { + CLog::Log(LOGERROR,"cuInit failed (status: %d)", cuStatus); + return false; + } + + // TODO: select best device culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + int best_device = GetMaxGflopsGraphicsDeviceId(); + int device = best_device; + + HWND hwnd = g_Windowing.GetHwnd(); + + D3DADAPTER_IDENTIFIER9 d3dId; + D3DPRESENT_PARAMETERS d3dpp; + D3DDISPLAYMODE d3ddm; + HRESULT hr = g_Windowing.Get3DDevice()->GetDirect3D(&m_pD3D); + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + unsigned uAdapterCount = m_pD3D->GetAdapterCount(); + for (unsigned lAdapter=0; lAdapterGetAdapterDisplayMode(lAdapter, &d3ddm); + + d3dpp.Windowed = TRUE; + d3dpp.BackBufferWidth = 640; + d3dpp.BackBufferHeight = 480; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + d3dpp.BackBufferCount = 1; + d3dpp.BackBufferFormat = d3ddm.Format; + d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD; + d3dpp.Flags = D3DPRESENTFLAG_VIDEO; + + IDirect3DDevice9 *pDev = g_Windowing.Get3DDevice(); + CUcontext cudaCtx = 0; + //why the fpu preserve ?? + //hr = m_pD3D->CreateDevice(lAdapter, D3DDEVTYPE_HAL, hwnd, D3DCREATE_HARDWARE_VERTEXPROCESSING | D3DCREATE_MULTITHREADED | D3DCREATE_FPU_PRESERVE, &d3dpp, &pDev); + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + if (1) + { + m_pD3D->GetAdapterIdentifier(lAdapter, 0, &d3dId); + cuStatus = cuda.cuD3D9CtxCreate(&cudaCtx, &device, CU_CTX_SCHED_BLOCKING_SYNC, pDev); + if (cuStatus == CUDA_SUCCESS) { + CLog::Log(LOGINFO, "-> Created D3D Device on adapter %S (%d), using CUDA device %d", d3dId.Description, lAdapter, device); + + BOOL isLevelC = IsLevelC(d3dId.DeviceId); + CLog::Log(LOGINFO,"InitCUDA(): D3D Device with Id 0x%x is level C: %d", d3dId.DeviceId, isLevelC); + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + if (m_bVDPAULevelC && !isLevelC) { + CLog::Log(LOGINFO, "InitCUDA(): We already had a Level C+ device, this one is not, skipping"); + continue; + } + + if (m_cudaContext) + cuda.cuCtxDestroy(m_cudaContext); + + // Store resources + m_cudaContext = cudaCtx; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + m_bVDPAULevelC = isLevelC; + // Is this the one we want? + if (device == best_device) + break; + } + else + CLog::Log(LOGINFO,"D3D Device on adapter %d is not CUDA capable", lAdapter); + } + } + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + cuStatus = CUDA_SUCCESS; + + /*if (!m_pD3DDevice) { + DbgLog((LOG_TRACE, 10, L"-> No D3D device available, building non-D3D context on device %d", best_device)); + SafeRelease(&m_pD3D); + cuStatus = cuda.cuCtxCreate(&m_cudaContext, CU_CTX_SCHED_BLOCKING_SYNC, best_device); + + int major, minor; + cuda.cuDeviceComputeCapability(&major, &minor, best_device); + m_bVDPAULevelC = (major >= 2); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + DbgLog((LOG_TRACE, 10, L"InitCUDA(): pure CUDA context of device with compute %d.%d", major, minor)); + }*/ + + if (cuStatus == CUDA_SUCCESS) { + // Switch to a floating context + CUcontext curr_ctx = NULL; + cuStatus = cuda.cuCtxPopCurrent(&curr_ctx); + if (cuStatus != CUDA_SUCCESS) + { + CLog::Log(LOGERROR, "Storing context on the stack failed with error %d", cuStatus); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + return false; + } + cuStatus = cuda.cuvidCtxLockCreate(&m_cudaCtxLock, m_cudaContext); + if (cuStatus != CUDA_SUCCESS) { + CLog::Log(LOGERROR, "Creation of floating context failed with error %d", cuStatus); + return false; + } + } + else + { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CLog::Log(LOGERROR, "Creation of CUDA context failed with error %d", cuStatus); + return false; + } + + return true; + +} + +DWORD avc_quant(BYTE *src, BYTE *dst, int extralen) +{ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + DWORD cb = 0; + BYTE* src_end = (BYTE *) src + extralen; + BYTE* dst_end = (BYTE *) dst + extralen; + src += 5; + // Two runs, for sps and pps + for (int i = 0; i < 2; i++) + { + for (int n = *(src++) & 0x1f; n > 0; n--) + { + unsigned len = (((unsigned)src[0] << 8) | src[1]) + 2; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + if(src + len > src_end || dst + len > dst_end) { ASSERT(0); break; } + memcpy(dst, src, len); + src += len; + dst += len; + cb += len; + } + } + return cb; +} + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +bool CDVDVideoCodecCuda::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) +{ + if (!Init()) + return false; + if (hints.codec == CODEC_ID_MPEG4) + m_pFormatName.Format("cuda_mpeg4"); + else if (hints.codec == CODEC_ID_MPEG2VIDEO) + m_pFormatName.Format("cuda_mpeg2"); + else if (hints.codec == CODEC_ID_H264) culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + m_pFormatName.Format("cuda_h264"); + else if (hints.codec == CODEC_ID_VC1 || hints.codec == CODEC_ID_WMV3) + m_pFormatName.Format("cuda_vc1"); + CLog::Log(LOGINFO, "CDecCuvid::InitDecoder(): Initializing CUVID decoder"); + HRESULT hr = S_OK; + + if (!m_cudaContext) { + CLog::Log(LOGERROR, " InitDecoder called without a cuda context"); + return false; + } culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + + // Free old device + DestroyDecoder(false); + + // Flush Display Queue + memset(&m_DisplayQueue, 0, sizeof(m_DisplayQueue)); + for (int i=0; i= Level C)"); + return false; + } + + /*m_bUseTimestampQueue = (cudaCodec == cudaVideoCodec_H264 && m_pCallback->H264IsAVI()) culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + || (cudaCodec == cudaVideoCodec_MPEG4 && pmt->formattype != FORMAT_MPEG2Video) + || (cudaCodec == cudaVideoCodec_VC1 && m_pCallback->VC1IsDTS());*/ + m_bUseTimestampQueue = (CODEC_ID_MPEG4 == hints.codec); + m_bWaitForKeyframe = m_bUseTimestampQueue; + m_bInterlaced = TRUE; + m_bFormatIncompatible = FALSE; + m_bTFF = TRUE; + m_rtPrevDiff = AV_NOPTS_VALUE; + m_bARPresent = TRUE; + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + // Create the CUDA Video Parser + CUVIDPARSERPARAMS oVideoParserParameters; + ZeroMemory(&oVideoParserParameters, sizeof(CUVIDPARSERPARAMS)); + oVideoParserParameters.CodecType = cudaCodec; + oVideoParserParameters.ulMaxNumDecodeSurfaces = MAX_DECODE_FRAMES; + oVideoParserParameters.ulMaxDisplayDelay = DISPLAY_DELAY; + oVideoParserParameters.pUserData = this; + oVideoParserParameters.pfnSequenceCallback = CDVDVideoCodecCuda::HandleVideoSequence; // Called before decoding frames and/or whenever there is a format change + oVideoParserParameters.pfnDecodePicture = CDVDVideoCodecCuda::HandlePictureDecode; // Called when a picture is ready to be decoded (decode order) + oVideoParserParameters.pfnDisplayPicture = CDVDVideoCodecCuda::HandlePictureDisplay; // Called whenever a picture is ready to be displayed (display order) culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + oVideoParserParameters.ulErrorThreshold = m_bUseTimestampQueue ? 100 : 0; + + memset(&m_VideoParserExInfo, 0, sizeof(CUVIDEOFORMATEX)); + //TODO + //pmt->formattype == FORMAT_MPEG2Video && (pmt->subtype == MEDIASUBTYPE_AVC1 || pmt->subtype == MEDIASUBTYPE_avc1 || pmt->subtype == MEDIASUBTYPE_CCV1)) { + if (hints.codec != CODEC_ID_MPEG4) + { + //MPEG2VIDEOINFO *mp2vi = (MPEG2VIDEOINFO *)pmt->Format(); + m_AVC1Converter = new CAVC1AnnexBConverter(); + m_AVC1Converter->SetNALUSize(2); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + BYTE* extradata = (BYTE*)hints.extradata; + + int nalusize = (extradata[4] & 3) + 1; + + BYTE *annexBextra = NULL; + int size = 0; + BYTE* dwSequenceHeader; + dwSequenceHeader = (byte*)malloc(hints.extrasize); + int cbSequenceHeader; + cbSequenceHeader = avc_quant(extradata, (BYTE *)(dwSequenceHeader), hints.extrasize); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + //m_AVC1Converter->Convert(&annexBextra, &size, (BYTE *)mp2vi->dwSequenceHeader, mp2vi->cbSequenceHeader); + m_AVC1Converter->Convert(&annexBextra, &size, (BYTE *)dwSequenceHeader, cbSequenceHeader); + if (annexBextra && size) { + memcpy(m_VideoParserExInfo.raw_seqhdr_data, annexBextra, size); + m_VideoParserExInfo.format.seqhdr_data_length = size; + m_dllAvUtil.av_freep(&annexBextra); + } + + //m_AVC1Converter->SetNALUSize(smp2vi->dwFlags); + m_AVC1Converter->SetNALUSize(4); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + } else { + size_t hdr_len = 0; + memcpy( m_VideoParserExInfo.raw_seqhdr_data, hints.extradata,hints.extrasize); + //getExtraData(*pmt, m_VideoParserExInfo.raw_seqhdr_data, &hdr_len); + m_VideoParserExInfo.format.seqhdr_data_length = (unsigned int)hints.extrasize; + } + + m_bNeedSequenceCheck = FALSE; + if (m_VideoParserExInfo.format.seqhdr_data_length) { + if (cudaCodec == cudaVideoCodec_H264) { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + hr = CheckH264Sequence(m_VideoParserExInfo.raw_seqhdr_data, m_VideoParserExInfo.format.seqhdr_data_length); + if (FAILED(hr)) { + return false; + } else if (hr == S_FALSE) { + m_bNeedSequenceCheck = TRUE; + } + } else if (cudaCodec == cudaVideoCodec_MPEG2) { + CLog::Log(LOGINFO, "-> Scanning extradata for MPEG2 sequence header"); + CMPEG2HeaderParser mpeg2parser(m_VideoParserExInfo.raw_seqhdr_data, m_VideoParserExInfo.format.seqhdr_data_length); + if (mpeg2parser.hdr.valid) { culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + if (mpeg2parser.hdr.chroma >= 2) { + CLog::Log(LOGERROR, "Sequence header indicates incompatible chroma sampling (chroma: %d)", mpeg2parser.hdr.chroma); + return false; + } + m_bInterlaced = mpeg2parser.hdr.interlaced; + } + } else if (cudaCodec == cudaVideoCodec_VC1) { + CVC1HeaderParser vc1Parser(m_VideoParserExInfo.raw_seqhdr_data, m_VideoParserExInfo.format.seqhdr_data_length); + m_bInterlaced = vc1Parser.hdr.interlaced; + } culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + } else { + m_bNeedSequenceCheck = (cudaCodec == cudaVideoCodec_H264); + } + + oVideoParserParameters.pExtVideoInfo = &m_VideoParserExInfo; + CUresult oResult = cuda.cuvidCreateVideoParser(&m_hParser, &oVideoParserParameters); + if (oResult != CUDA_SUCCESS) { + CLog::Log(LOGERROR, "Creating parser for type %d failed with code %d", cudaCodec, oResult); + return E_FAIL; + } culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + { + cuda.cuvidCtxLock(m_cudaCtxLock, 0); + oResult = cuda.cuStreamCreate(&m_hStream, 0); + cuda.cuvidCtxUnlock(m_cudaCtxLock, 0); + if (oResult != CUDA_SUCCESS) { + CLog::Log(LOGERROR, "::InitCodec(): Creating stream failed"); + return E_FAIL; + } + } culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + //BITMAPINFOHEADER *bmi = NULL; + //videoFormatTypeHandler(pmt->Format(), pmt->FormatType(), &bmi); + + { + RECT rcDisplayArea = {0, 0, hints.width, hints.height}; + hr = CreateCUVIDDecoder(cudaCodec, hints.width, hints.height, hints.width, hints.height, rcDisplayArea); + if (FAILED(hr)) { + CLog::Log(LOGERROR,"Creating CUVID decoder failed"); + return false; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + } + } + + m_bForceSequenceUpdate = TRUE; + + DecodeSequenceData(); + + + + return true; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +} + +bool CDVDVideoCodecCuda::CheckH264Sequence(const BYTE *buffer, int buflen) +{ + CLog::Log(LOGINFO, "CDecCuvid::CheckH264Sequence(): Checking H264 frame for SPS"); + CH264SequenceParser h264parser; + h264parser.ParseNALs(buffer, buflen, 0); + if (h264parser.sps.valid) { + m_bInterlaced = h264parser.sps.interlaced; + m_iFullRange = h264parser.sps.full_range; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + m_bARPresent = h264parser.sps.ar_present; + CLog::Log(LOGINFO, "SPS found"); + if (h264parser.sps.profile > 100 || h264parser.sps.chroma != 1 || h264parser.sps.luma_bitdepth != 8 || h264parser.sps.chroma_bitdepth != 8) { + CLog::Log(LOGERROR, "SPS indicates video incompatible with CUVID, aborting (profile: %d, chroma: %d, bitdepth: %d/%d)", h264parser.sps.profile, h264parser.sps.chroma, h264parser.sps.luma_bitdepth, h264parser.sps.chroma_bitdepth); + return false; + } + CLog::Log(LOGINFO, "Video seems compatible with CUVID"); + return true; + } + return false; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +} + +void fillDXVAExtFormat(DXVA2_ExtendedFormat &fmt, int range, int primaries, int matrix, int transfer) +{ + fmt.value = 0; + + if (range != -1) + fmt.NominalRange = range ? DXVA2_NominalRange_0_255 : DXVA2_NominalRange_16_235; + + // Color Primaries culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + switch(primaries) { + case AVCOL_PRI_BT709: + fmt.VideoPrimaries = DXVA2_VideoPrimaries_BT709; + break; + case AVCOL_PRI_BT470M: + fmt.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysM; + break; + case AVCOL_PRI_BT470BG: + fmt.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysBG; + break; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + case AVCOL_PRI_SMPTE170M: + fmt.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE170M; + break; + case AVCOL_PRI_SMPTE240M: + fmt.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE240M; + break; + } + + // Color Space / Transfer Matrix + switch (matrix) { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + case AVCOL_SPC_BT709: + fmt.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT709; + break; + case AVCOL_SPC_FCC: + fmt.VideoTransferMatrix = (DXVA2_VideoTransferMatrix)6; + break; + case AVCOL_SPC_BT470BG: + case AVCOL_SPC_SMPTE170M: + fmt.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT601; + break; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + case AVCOL_SPC_SMPTE240M: + fmt.VideoTransferMatrix = DXVA2_VideoTransferMatrix_SMPTE240M; + break; + case 8://AVCOL_SPC_YCGCO + fmt.VideoTransferMatrix = (DXVA2_VideoTransferMatrix)7; + break; + } + + // Color Transfer Function + switch(transfer) { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + case AVCOL_TRC_BT709: + fmt.VideoTransferFunction = DXVA2_VideoTransFunc_709; + break; + case AVCOL_TRC_GAMMA22: + fmt.VideoTransferFunction = DXVA2_VideoTransFunc_22; + break; + case AVCOL_TRC_GAMMA28: + fmt.VideoTransferFunction = DXVA2_VideoTransFunc_28; + break; + case AVCOL_SPC_SMPTE240M: culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + fmt.VideoTransferFunction = DXVA2_VideoTransFunc_240M; + break; + } +} + +CUVIDPARSERDISPINFO* CDVDVideoCodecCuda::GetNextFrame() +{ + int next = (m_DisplayPos + 1) % DISPLAY_DELAY; + return &m_DisplayQueue[next]; +} culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +int CUDAAPI CDVDVideoCodecCuda::HandleVideoSequence(void *obj, CUVIDEOFORMAT *cuvidfmt) +{ + CLog::Log(LOGINFO, "%s: New Video Sequence",__FUNCTION__); + CDVDVideoCodecCuda *filter = static_cast(obj); + + CUVIDDECODECREATEINFO *dci = &filter->m_VideoDecoderInfo; + + if ((cuvidfmt->codec != dci->CodecType) + || (cuvidfmt->coded_width != dci->ulWidth) culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + || (cuvidfmt->coded_height != dci->ulHeight) + || (cuvidfmt->display_area.right != dci->ulTargetWidth) + || (cuvidfmt->display_area.bottom != dci->ulTargetHeight) + || (cuvidfmt->chroma_format != dci->ChromaFormat) + || filter->m_bForceSequenceUpdate) + { + filter->m_bForceSequenceUpdate = FALSE; + RECT rcDisplayArea = {cuvidfmt->display_area.left, cuvidfmt->display_area.top, cuvidfmt->display_area.right, cuvidfmt->display_area.bottom}; + filter->CreateCUVIDDecoder(cuvidfmt->codec, cuvidfmt->coded_width, cuvidfmt->coded_height, cuvidfmt->display_area.right, cuvidfmt->display_area.bottom, rcDisplayArea); + } culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + + filter->m_bInterlaced = !cuvidfmt->progressive_sequence; + filter->m_bDoubleRateDeint = FALSE; + if (filter->m_bInterlaced && cuvidfmt->frame_rate.numerator && cuvidfmt->frame_rate.denominator) { + double dFrameTime = 10000000.0 / ((double)cuvidfmt->frame_rate.numerator / cuvidfmt->frame_rate.denominator); + if (filter->m_AccelDeintOutput == 0/*DeintOutput_FramePerField*/ && filter->m_VideoDecoderInfo.DeinterlaceMode != cudaVideoDeinterlaceMode_Weave && !filter->m_DeintTreatAsProgressive && (int)(dFrameTime / 10000.0) != 41) { + filter->m_bDoubleRateDeint = TRUE; + dFrameTime /= 2.0; + } + if (cuvidfmt->codec != cudaVideoCodec_MPEG4) culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + filter->m_rtAvgTimePerFrame = REFERENCE_TIME(dFrameTime + 0.5); + else + filter->m_rtAvgTimePerFrame = AV_NOPTS_VALUE; //TODO: base on media type + } else { + filter->m_rtAvgTimePerFrame = AV_NOPTS_VALUE; + } + filter->m_VideoFormat = *cuvidfmt; + + if (cuvidfmt->chroma_format != cudaVideoChromaFormat_420) { + CLog::Log(LOGERROR, "CDecCuvid::HandleVideoSequence(): Incompatible Chroma Format detected"); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + filter->m_bFormatIncompatible = TRUE; + } + + fillDXVAExtFormat(filter->m_DXVAExtendedFormat, filter->m_iFullRange, cuvidfmt->video_signal_description.color_primaries, cuvidfmt->video_signal_description.matrix_coefficients, cuvidfmt->video_signal_description.transfer_characteristics); + + return TRUE; +} + +int CUDAAPI CDVDVideoCodecCuda::HandlePictureDecode(void *obj, CUVIDPICPARAMS *cuvidpic) +{ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CDVDVideoCodecCuda *filter = reinterpret_cast(obj); + + if (filter->m_bFlushing) + return FALSE; + + if (filter->m_bWaitForKeyframe) { + if (cuvidpic->intra_pic_flag) + filter->m_bWaitForKeyframe = FALSE; + else { + // Pop timestamp from the queue, drop frame culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + if (!filter->m_timestampQueue.empty()) { + filter->m_timestampQueue.pop(); + } + return FALSE; + } + } + + int flush_pos = filter->m_DisplayPos; + for (;;) { + bool frame_in_use = false; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + for (int i=0; im_DisplayQueue[i].picture_index == cuvidpic->CurrPicIdx) { + frame_in_use = true; + break; + } + } + if (!frame_in_use) { + // No problem: we're safe to use this frame + break; + } culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + // The target frame is still pending in the display queue: + // Flush the oldest entry from the display queue and repeat + if (filter->m_DisplayQueue[flush_pos].picture_index >= 0) { + //TODO + //filter->Display(&filter->m_DisplayQueue[flush_pos]); + filter->m_DisplayQueue[flush_pos].picture_index = -1; + } + flush_pos = (flush_pos + 1) % DISPLAY_DELAY; + } + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + filter->cuda.cuvidCtxLock(filter->m_cudaCtxLock, 0); + filter->m_PicParams[cuvidpic->CurrPicIdx] = *cuvidpic; + __try { + CUresult cuStatus = filter->cuda.cuvidDecodePicture(filter->m_hDecoder, cuvidpic); + #ifdef _DEBUG + if (cuStatus != CUDA_SUCCESS) { + CLog::Log(LOGERROR, "CDVDVideoCodecCuda::HandlePictureDecode(): cuvidDecodePicture returned error code %d", cuStatus); + } + #endif + } __except(1) { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CLog::Log(LOGERROR, "CDVDVideoCodecCuda::HandlePictureDecode(): cuvidDecodePicture threw an exception"); + } + filter->cuda.cuvidCtxUnlock(filter->m_cudaCtxLock, 0); + + return TRUE; +} + +int CUDAAPI CDVDVideoCodecCuda::HandlePictureDisplay(void *obj, CUVIDPARSERDISPINFO *cuviddisp) +{ + CDVDVideoCodecCuda *filter = reinterpret_cast(obj); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + if (filter->m_bFlushing) + return FALSE; + + if (filter->m_bUseTimestampQueue) { + if (filter->m_timestampQueue.empty()) { + cuviddisp->timestamp = AV_NOPTS_VALUE; + } else { + cuviddisp->timestamp = filter->m_timestampQueue.front(); + filter->m_timestampQueue.pop(); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + } + } + + // Drop samples with negative timestamps (preroll) or during flushing + if (cuviddisp->timestamp != AV_NOPTS_VALUE && cuviddisp->timestamp < 0) + return TRUE; + + /*if (filter->m_DisplayQueue[filter->m_DisplayPos].picture_index >= 0) { + filter->Display(&filter->m_DisplayQueue[filter->m_DisplayPos]); + filter->m_DisplayQueue[filter->m_DisplayPos].picture_index = -1; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + } + */filter->m_DisplayQueue[filter->m_DisplayPos] = *cuviddisp; + /*filter->m_DisplayPos = (filter->m_DisplayPos + 1) % DISPLAY_DELAY;*/ + + return TRUE; +} + +bool CDVDVideoCodecCuda::CreateCUVIDDecoder(cudaVideoCodec codec, DWORD dwWidth, DWORD dwHeight, DWORD dwDisplayWidth, DWORD dwDisplayHeight, RECT rcDisplayArea) +{ + //DbgLog((LOG_TRACE, 10, L"CDecCuvid::CreateCUVIDDecoder(): Creating CUVID decoder instance")); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + HRESULT hr = S_OK; + BOOL bDXVAMode = (g_Windowing.Get3DDevice() && /*m_pSettings->GetHWAccelDeintHQ() &&*/ g_sysinfo.IsVistaOrHigher()); + + cuda.cuvidCtxLock(m_cudaCtxLock, 0); + CUVIDDECODECREATEINFO *dci = &m_VideoDecoderInfo; + +retry: + if (m_hDecoder) { + cuda.cuvidDestroyDecoder(m_hDecoder); + m_hDecoder = 0; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + } + ZeroMemory(dci, sizeof(*dci)); + dci->ulWidth = dwWidth; + dci->ulHeight = dwHeight; + dci->ulNumDecodeSurfaces = MAX_DECODE_FRAMES; + dci->CodecType = codec; + dci->ChromaFormat = cudaVideoChromaFormat_420; + dci->OutputFormat = cudaVideoSurfaceFormat_NV12; + //TODO + dci->DeinterlaceMode = (cudaVideoDeinterlaceMode)0;//m_pSettings->GetHWAccelDeintMode(); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + dci->ulNumOutputSurfaces = 1; + + dci->ulTargetWidth = dwDisplayWidth; + dci->ulTargetHeight = dwDisplayHeight; + + dci->display_area.left = (short)rcDisplayArea.left; + dci->display_area.right = (short)rcDisplayArea.right; + dci->display_area.top = (short)rcDisplayArea.top; + dci->display_area.bottom = (short)rcDisplayArea.bottom; + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + dci->ulCreationFlags = bDXVAMode ? cudaVideoCreate_PreferDXVA : cudaVideoCreate_PreferCUVID; + dci->vidLock = m_cudaCtxLock; + + // create the decoder + CUresult oResult = cuda.cuvidCreateDecoder(&m_hDecoder, dci); + if (oResult != CUDA_SUCCESS) { + //DbgLog((LOG_ERROR, 10, L"-> Creation of decoder for type %d failed with code %d", dci->CodecType, oResult)); + if (bDXVAMode) { + //DbgLog((LOG_ERROR, 10, L" -> Retrying in pure CUVID mode")); + bDXVAMode = FALSE; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + goto retry; + } + hr = E_FAIL; + } + cuda.cuvidCtxUnlock(m_cudaCtxLock, 0); + + return SUCCEEDED(hr); +} +bool CDVDVideoCodecCuda::DecodeSequenceData() +{ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + CUresult oResult; + + CUVIDSOURCEDATAPACKET pCuvidPacket; + ZeroMemory(&pCuvidPacket, sizeof(pCuvidPacket)); + + pCuvidPacket.payload = m_VideoParserExInfo.raw_seqhdr_data; + pCuvidPacket.payload_size = m_VideoParserExInfo.format.seqhdr_data_length; + + if (pCuvidPacket.payload && pCuvidPacket.payload_size) { + cuda.cuvidCtxLock(m_cudaCtxLock, 0); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + oResult = cuda.cuvidParseVideoData(m_hParser, &pCuvidPacket); + cuda.cuvidCtxUnlock(m_cudaCtxLock, 0); + } + + return true; +} + +bool CDVDVideoCodecCuda::Display(CUVIDPARSERDISPINFO *cuviddisp, DVDVideoPicture* pDvdVideoPicture) +{ + BOOL bTreatAsProgressive = m_DeintTreatAsProgressive; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + if (bTreatAsProgressive) { + cuviddisp->progressive_frame = TRUE; + m_nSoftTelecine = FALSE; + } else { + if (m_VideoFormat.codec == cudaVideoCodec_MPEG2 || m_VideoFormat.codec == cudaVideoCodec_H264) { + if (cuviddisp->repeat_first_field) { + m_nSoftTelecine = 2; + } else if (m_nSoftTelecine) { + m_nSoftTelecine--; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + } + if (!m_nSoftTelecine) + m_bTFF = cuviddisp->top_field_first; + } + + cuviddisp->progressive_frame = (cuviddisp->progressive_frame && !(m_bInterlaced && m_DeintAggressive && m_VideoFormat.codec != cudaVideoCodec_VC1) && !m_DeintForce); + } + + DeintFieldOrder fo = m_DeIntFieldOrder; + cuviddisp->top_field_first = (fo == DeintFieldOrder_Auto) ? (m_nSoftTelecine ? m_bTFF : cuviddisp->top_field_first) : (fo == DeintFieldOrder_TopFieldFirst); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + if (m_bDoubleRateDeint) { + if (cuviddisp->progressive_frame || m_nSoftTelecine) { + Deliver(cuviddisp, pDvdVideoPicture, 2); + } else { + Deliver(cuviddisp, pDvdVideoPicture, 0); + Deliver(cuviddisp, pDvdVideoPicture, 1); + } + } else { + Deliver(cuviddisp, pDvdVideoPicture); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + } + return S_OK; +} + +bool CDVDVideoCodecCuda::Deliver(CUVIDPARSERDISPINFO *cuviddisp, DVDVideoPicture* pDvdVideoPicture, int field) +{ + CUdeviceptr devPtr = 0; + unsigned int pitch = 0, width = 0, height = 0; + CUVIDPROCPARAMS vpp; + CUresult cuStatus = CUDA_SUCCESS; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + memset(&vpp, 0, sizeof(vpp)); + vpp.progressive_frame = !m_nSoftTelecine && cuviddisp->progressive_frame; + vpp.top_field_first = cuviddisp->top_field_first; + vpp.second_field = (field == 1); + + cuda.cuvidCtxLock(m_cudaCtxLock, 0); + cuStatus = cuda.cuvidMapVideoFrame(m_hDecoder, cuviddisp->picture_index, &devPtr, &pitch, &vpp); + if (cuStatus != CUDA_SUCCESS) { + CLog::Log(LOGERROR, "CDecCuvid::Deliver(): cuvidMapVideoFrame failed on index %d", cuviddisp->picture_index); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + goto cuda_fail; + } + + width = m_VideoDecoderInfo.display_area.right; + height = m_VideoDecoderInfo.display_area.bottom; + int size = pitch * height * 3 / 2; + + if(!m_pbRawNV12 || size > m_cRawNV12) { + if (m_pbRawNV12) { + cuda.cuMemFreeHost(m_pbRawNV12); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + m_pbRawNV12 = NULL; + m_cRawNV12 = 0; + } + cuStatus = cuda.cuMemAllocHost((void **)&m_pbRawNV12, size); + if (cuStatus != CUDA_SUCCESS) { + CLog::Log(LOGERROR, "CDecCuvid::Deliver(): cuMemAllocHost failed to allocate %d bytes (%d)", size, cuStatus); + goto cuda_fail; + } + m_cRawNV12 = size; + } culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + // Copy memory from the device into the staging area + if (m_pbRawNV12) { +#if USE_ASYNC_COPY + cuStatus = cuda.cuMemcpyDtoHAsync(m_pbRawNV12, devPtr, size, m_hStream); + if (cuStatus != CUDA_SUCCESS) { + CLog::Log(LOGERROR, "Async Memory Transfer failed (%d)", cuStatus); + goto cuda_fail; + } + while (CUDA_ERROR_NOT_READY == cuda.cuStreamQuery(m_hStream)) { + Sleep(1); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + } +#else + cuStatus = cuda.cuMemcpyDtoH(m_pbRawNV12, devPtr, size); + if (cuStatus != CUDA_SUCCESS) { + CLog::Log(LOGERROR, "Memory Transfer failed (%d)", cuStatus); + goto cuda_fail; + } +#endif + } else { + // If we don't have our memory, this is bad. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CLog::Log(LOGERROR, "No Valid Staging Memory - failing"); + goto cuda_fail; + } + cuda.cuvidUnmapVideoFrame(m_hDecoder, devPtr); + cuda.cuvidCtxUnlock(m_cudaCtxLock, 0); + + + // Setup the LAVFrame + //DVDVideoPicture* pFrame = NULL; + //LAVFrame *pFrame = NULL; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + //AllocateFrame(&pFrame); + + + if (m_rtAvgTimePerFrame != AV_NOPTS_VALUE) { + pDvdVideoPicture->iDuration = m_rtAvgTimePerFrame;//supposed to be avg frame duration + } + + REFERENCE_TIME rtStart = cuviddisp->timestamp, rtStop = AV_NOPTS_VALUE; + if (rtStart != AV_NOPTS_VALUE) { + CUVIDPARSERDISPINFO *next = GetNextFrame(); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + if (next->picture_index != -1 && next->timestamp != AV_NOPTS_VALUE) { + m_rtPrevDiff = next->timestamp - cuviddisp->timestamp; + } + + if (m_rtPrevDiff != AV_NOPTS_VALUE) { + REFERENCE_TIME rtHalfDiff = m_rtPrevDiff >> 1; + if (field == 1) + rtStart += rtHalfDiff; + + rtStop = rtStart + rtHalfDiff; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + if (field == 2 || !m_bDoubleRateDeint) + rtStop += rtHalfDiff; + } + + // Sanity check in case the duration is null + if (rtStop <= rtStart) + rtStop = AV_NOPTS_VALUE; + } + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + pDvdVideoPicture->format = DVDVideoPicture::FMT_NV12; + pDvdVideoPicture->iWidth = width; + pDvdVideoPicture->iHeight = height; + pDvdVideoPicture->pts = rtStart; + if (rtStop>0) + pDvdVideoPicture->iDuration = rtStop - rtStart; + pDvdVideoPicture->iRepeatPicture = cuviddisp->repeat_first_field; + { + AVRational ar = { m_VideoFormat.display_aspect_ratio.x, m_VideoFormat.display_aspect_ratio.y }; + AVRational arDim = { width, height }; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + double aspect_ratio; + if (m_bARPresent || av_cmp_q(ar, arDim) != 0) { + if (ar.num == 0) + aspect_ratio = 0; + else + aspect_ratio = av_q2d(ar) * pDvdVideoPicture->iWidth / pDvdVideoPicture->iHeight; + + if (aspect_ratio <= 0.0) + aspect_ratio = (float)pDvdVideoPicture->iWidth / (float)pDvdVideoPicture->iHeight; + pDvdVideoPicture->iDisplayHeight = pDvdVideoPicture->iHeight; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + pDvdVideoPicture->iDisplayWidth = ((int)RINT(pDvdVideoPicture->iHeight * aspect_ratio)) & -3; + if (pDvdVideoPicture->iDisplayWidth > pDvdVideoPicture->iWidth) + { + pDvdVideoPicture->iDisplayWidth = pDvdVideoPicture->iWidth; + pDvdVideoPicture->iDisplayHeight = ((int)RINT(pDvdVideoPicture->iWidth / aspect_ratio)) & -3; + } + //pFrame->aspect_ratio = ar; + } + } + pDvdVideoPicture->iDisplayHeight = pDvdVideoPicture->iHeight; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + pDvdVideoPicture->iDisplayWidth = pDvdVideoPicture->iWidth; + //pFrame->cuda //ext_format = m_DXVAExtendedFormat; + bool interlaced = !cuviddisp->progressive_frame && m_VideoDecoderInfo.DeinterlaceMode == cudaVideoDeinterlaceMode_Weave; + pDvdVideoPicture->iFlags |= interlaced ? DVP_FLAG_INTERLACED : 0; + pDvdVideoPicture->iFlags |= cuviddisp->top_field_first ? DVP_FLAG_TOP_FIELD_FIRST: 0; + + // TODO: This may be wrong for H264 where B-Frames can be references + + pDvdVideoPicture->iFrameType = m_PicParams[cuviddisp->picture_index].intra_pic_flag ? 'I' : (m_PicParams[cuviddisp->picture_index].ref_pic_flag ? 'P' : 'B'); + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + // Assign the buffer to the LAV Frame bufers + int Ysize = height * pitch; + pDvdVideoPicture->data[0] = m_pbRawNV12; + pDvdVideoPicture->data[1] = m_pbRawNV12+Ysize; + pDvdVideoPicture->iLineSize[0] = pDvdVideoPicture->iLineSize[1] = pitch; + //TODO + //pFrame->stride[0] = pFrame->stride[1] = pitch; + //TODO + //m_pCallback->Deliver(pFrame); + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + return true; + +cuda_fail: + cuda.cuvidUnmapVideoFrame(m_hDecoder, devPtr); + cuda.cuvidCtxUnlock(m_cudaCtxLock, 0); + return false; +} + + +void CDVDVideoCodecCuda::Dispose() culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +{ + +} + +void CDVDVideoCodecCuda::SetDropState(bool bDrop) +{ + +} + +int CDVDVideoCodecCuda::Decode(BYTE* pData, int iSize, double dts, double pts) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +{ + CUresult result; + HRESULT hr; + + CUVIDSOURCEDATAPACKET pCuvidPacket; + ZeroMemory(&pCuvidPacket, sizeof(pCuvidPacket)); + + BYTE *pBuffer = NULL; + if (m_AVC1Converter) { + int size = 0; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + hr = m_AVC1Converter->Convert(&pBuffer, &size, pData, iSize); + if (SUCCEEDED(hr)) { + pCuvidPacket.payload = pBuffer; + pCuvidPacket.payload_size = size; + } + } else { + pCuvidPacket.payload = pData; + pCuvidPacket.payload_size = iSize; + } + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + if (m_bNeedSequenceCheck && m_VideoDecoderInfo.CodecType == cudaVideoCodec_H264) { + hr = CheckH264Sequence(pCuvidPacket.payload, pCuvidPacket.payload_size); + if (FAILED(hr)) { + m_bFormatIncompatible = TRUE; + } else if (hr == S_OK) { + m_bNeedSequenceCheck = FALSE; + } + } + + if (dts != AV_NOPTS_VALUE) { culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + pCuvidPacket.flags |= CUVID_PKT_TIMESTAMP; + pCuvidPacket.timestamp = dts; + } + + //if (bDiscontinuity) + // pCuvidPacket.flags |= CUVID_PKT_DISCONTINUITY; + + if (m_bUseTimestampQueue) + m_timestampQueue.push(dts); + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + cuda.cuvidCtxLock(m_cudaCtxLock, 0); + __try { + result = cuda.cuvidParseVideoData(m_hParser, &pCuvidPacket); + } __except(1) { + CLog::Log(LOGERROR, "CDecCuvid::Decode(): cuvidParseVideoData threw an exception"); + } + cuda.cuvidCtxUnlock(m_cudaCtxLock, 0); + + m_dllAvUtil.av_freep(&pBuffer); + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + if (m_bFormatIncompatible) { + CLog::Log(LOGERROR, "CDecCuvid::Decode(): Incompatible format detected, indicating failure..."); + return VC_ERROR; + } + + if (m_DisplayQueue[m_DisplayPos].picture_index >= 0) + return VC_BUFFER | VC_PICTURE; + return VC_BUFFER; +} + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +void CDVDVideoCodecCuda::Reset(void) +{ + +} + +bool CDVDVideoCodecCuda::GetPicture(DVDVideoPicture* pDvdVideoPicture) +{ + if (m_DisplayQueue[m_DisplayPos].picture_index >= 0) { + Display(&m_DisplayQueue[m_DisplayPos], pDvdVideoPicture); + m_DisplayQueue[m_DisplayPos].picture_index = -1; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + } + else + return VC_BUFFER; + //m_DisplayQueue[m_DisplayPos] = *cuviddisp; + m_DisplayPos = (m_DisplayPos + 1) % DISPLAY_DELAY; + + + return VC_PICTURE | VC_BUFFER; +} + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + + + + +#endif \ No newline at end of file diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/CUDA.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/CUDA.h new file mode 100644 index 0000000..245c916 culaunchkernel.ptsz PasteShr culaunchkernel.ptsz --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/CUDA.h @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2012 Team XBMC + * http://www.xbmc.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with XBMC; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * http://www.gnu.org/copyleft/gpl.html + * + */ +#ifdef HAS_DX +#pragma once +#include "DVDCodecs/Video/DVDVideoCodecFFmpeg.h" +#include "Cuda/AVC1AnnexBConverter.h" +#include + +#define MAX_DECODE_FRAMES 20 culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +#define DISPLAY_DELAY 4 +#define USE_ASYNC_COPY 1 +#define MAX_PIC_INDEX 64 + +#define CUDA_FORCE_API_VERSION 3010 +#include "cuda/cuda.h" +#include "cuda/nvcuvid.h" +#include "cuda/cuda_dynlink.h" +#include +#include "dllavutil.h" culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +#define CUMETHOD(name) t##name *##name +#define countof( array ) ( sizeof( array )/sizeof( array[0] ) ) + +namespace CUDA { + + // Control the field order of the deinterlacer +typedef enum DeintFieldOrder { + DeintFieldOrder_Auto, + DeintFieldOrder_TopFieldFirst, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + DeintFieldOrder_BottomFieldFirst, +}; + +class CDVDVideoCodecCuda : public CDVDVideoCodec +{ +public: + CDVDVideoCodecCuda(); + virtual ~CDVDVideoCodecCuda(); + + // Required overrides culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + virtual bool Open(CDVDStreamInfo &hints, CDVDCodecOptions &options); + virtual void Dispose(void); + virtual int Decode(BYTE *pData, int iSize, double dts, double pts); + virtual void Reset(void); + virtual bool GetPicture(DVDVideoPicture *pDvdVideoPicture); + virtual void SetDropState(bool bDrop); + virtual const char* GetName(void) { return (const char*)m_pFormatName; } + +protected: + bool DestroyDecoder(bool full); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + bool LoadCUDAFuncRefs(); + int GetMaxGflopsGraphicsDeviceId(); + bool Init(); + + CStdString m_pFormatName; + DVDVideoPicture m_videobuffer; +private: + struct { + HMODULE cudaLib; + CUMETHOD(cuInit); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CUMETHOD(cuCtxCreate); + CUMETHOD(cuCtxDestroy); + CUMETHOD(cuCtxPushCurrent); + CUMETHOD(cuCtxPopCurrent); + CUMETHOD(cuD3D9CtxCreate); + CUMETHOD(cuMemAllocHost); + CUMETHOD(cuMemFreeHost); + CUMETHOD(cuMemcpyDtoH); + CUMETHOD(cuMemcpyDtoHAsync); + CUMETHOD(cuStreamCreate); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CUMETHOD(cuStreamDestroy); + CUMETHOD(cuStreamQuery); + CUMETHOD(cuDeviceGetCount); + CUMETHOD(cuDriverGetVersion); + CUMETHOD(cuDeviceGetName); + CUMETHOD(cuDeviceComputeCapability); + CUMETHOD(cuDeviceGetAttribute); + + HMODULE cuvidLib; + CUMETHOD(cuvidCtxLockCreate); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CUMETHOD(cuvidCtxLockDestroy); + CUMETHOD(cuvidCtxLock); + CUMETHOD(cuvidCtxUnlock); + CUMETHOD(cuvidCreateVideoParser); + CUMETHOD(cuvidParseVideoData); + CUMETHOD(cuvidDestroyVideoParser); + CUMETHOD(cuvidCreateDecoder); + CUMETHOD(cuvidDecodePicture); + CUMETHOD(cuvidDestroyDecoder); + CUMETHOD(cuvidMapVideoFrame); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + CUMETHOD(cuvidUnmapVideoFrame); + } cuda; + + IDirect3D9 *m_pD3D; + + CUcontext m_cudaContext; + CUvideoctxlock m_cudaCtxLock; + + CUvideoparser m_hParser; + CUVIDEOFORMATEX m_VideoParserExInfo; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + CUvideodecoder m_hDecoder; + CUVIDDECODECREATEINFO m_VideoDecoderInfo; + + CUVIDEOFORMAT m_VideoFormat; + + CUVIDPARSERDISPINFO m_DisplayQueue[DISPLAY_DELAY]; + int m_DisplayPos; + + CUVIDPICPARAMS m_PicParams[MAX_PIC_INDEX]; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + CUstream m_hStream; + + BOOL m_bVDPAULevelC; + + BOOL m_bForceSequenceUpdate; + BOOL m_bInterlaced; + BOOL m_bDoubleRateDeint; + BOOL m_bFlushing; + REFERENCE_TIME m_rtAvgTimePerFrame; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + REFERENCE_TIME m_rtPrevDiff; + BOOL m_bWaitForKeyframe; + int m_iFullRange; + + DXVA2_ExtendedFormat m_DXVAExtendedFormat; + + BYTE *m_pbRawNV12; + int m_cRawNV12; + + CAVC1AnnexBConverter *m_AVC1Converter; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + BOOL m_bFormatIncompatible; + BOOL m_bNeedSequenceCheck; + + BOOL m_bUseTimestampQueue; + std::queue m_timestampQueue; + + int m_nSoftTelecine; + BOOL m_bTFF; + BOOL m_bARPresent; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + + int m_AccelDeintOutput; + BOOL m_DeintTreatAsProgressive; + BOOL m_DeintAggressive; + BOOL m_DeintForce; + DeintFieldOrder m_DeIntFieldOrder; + + // CUDA Callbacks + static int CUDAAPI HandleVideoSequence(void *obj, CUVIDEOFORMAT *cuvidfmt); + static int CUDAAPI HandlePictureDecode(void *obj, CUVIDPICPARAMS *cuvidpic); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + static int CUDAAPI HandlePictureDisplay(void *obj, CUVIDPARSERDISPINFO *cuviddisp); + + + bool Display(CUVIDPARSERDISPINFO *cuviddisp, DVDVideoPicture* pDvdVideoPicture); + bool Deliver(CUVIDPARSERDISPINFO *cuviddisp, DVDVideoPicture* pDvdVideoPicture ,int field = 0); + + bool CreateCUVIDDecoder(cudaVideoCodec codec, DWORD dwWidth, DWORD dwHeight, DWORD dwDisplayWidth, DWORD dwDisplayHeight, RECT rcDisplayArea); + bool DecodeSequenceData(); + bool CheckH264Sequence(const BYTE *buffer, int buflen); + //STDMETHODIMP FlushParser(); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CUVIDPARSERDISPINFO* GetNextFrame(); + DllAvUtil m_dllAvUtil; +}; + + +}; +#endif \ No newline at end of file diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/AVC1AnnexBConverter.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/AVC1AnnexBConverter.cpp new file mode 100644 culaunchkernel.ptsz PasteShr culaunchkernel.ptsz index 0000000..71de694 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/AVC1AnnexBConverter.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + + +#include "AVC1AnnexBConverter.h" + + + +#include "libavutil/intreadwrite.h" + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +CAVC1AnnexBConverter::CAVC1AnnexBConverter(void) +{ + m_dllAvUtil.Load(); +} + +CAVC1AnnexBConverter::~CAVC1AnnexBConverter(void) +{ + +} + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +HRESULT CAVC1AnnexBConverter::alloc_and_copy(uint8_t **poutbuf, int *poutbuf_size, const uint8_t *in, uint32_t in_size) +{ + uint32_t offset = *poutbuf_size; + uint8_t nal_header_size = offset ? 3 : 4; + void *tmp; + + *poutbuf_size += in_size+nal_header_size; + tmp = m_dllAvUtil.av_realloc(*poutbuf, *poutbuf_size); + if (!tmp) + return E_OUTOFMEMORY; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + *poutbuf = (uint8_t *)tmp; + memcpy(*poutbuf+nal_header_size+offset, in, in_size); + if (!offset) { + AV_WB32(*poutbuf, 1); + } else { + (*poutbuf+offset)[0] = (*poutbuf+offset)[1] = 0; + (*poutbuf+offset)[2] = 1; + } + + return S_OK; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +} + +HRESULT CAVC1AnnexBConverter::Convert(BYTE **poutbuf, int *poutbuf_size, const BYTE *buf, int buf_size) +{ + int32_t nal_size; + const uint8_t *buf_end = buf + buf_size; + + *poutbuf_size = 0; + + do { culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + if (buf + m_NaluSize > buf_end) + goto fail; + + if (m_NaluSize == 1) { + nal_size = buf[0]; + } else if (m_NaluSize == 2) { + nal_size = AV_RB16(buf); + } else { + nal_size = AV_RB32(buf); + if (m_NaluSize == 3) culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + nal_size >>= 8; + } + + buf += m_NaluSize; + + if (buf + nal_size > buf_end || nal_size < 0) + goto fail; + + if (FAILED(alloc_and_copy(poutbuf, poutbuf_size, buf, nal_size))) + goto fail; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + buf += nal_size; + buf_size -= (nal_size + m_NaluSize); + } while (buf_size > 0); + + return S_OK; +fail: + m_dllAvUtil.av_freep(poutbuf); + return E_FAIL; +} culaunchkernel.ptsz PasteShr culaunchkernel.ptsz diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/AVC1AnnexBConverter.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/AVC1AnnexBConverter.h new file mode 100644 index 0000000..dd45b17 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/AVC1AnnexBConverter.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#pragma once +#include "DllAvUtil.h" +class CAVC1AnnexBConverter +{ +public: culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CAVC1AnnexBConverter(void); + ~CAVC1AnnexBConverter(void); + + HRESULT SetNALUSize(int nalusize) { m_NaluSize = nalusize; return S_OK; } + HRESULT Convert(BYTE **poutbuf, int *poutbuf_size, const BYTE *buf, int buf_size); + HRESULT alloc_and_copy(uint8_t **poutbuf, int *poutbuf_size, const uint8_t *in, uint32_t in_size); +private: + int m_NaluSize; + DllAvUtil m_dllAvUtil; +}; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/ByteParser.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/ByteParser.cpp new file mode 100644 index 0000000..bd8b2b8 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/ByteParser.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Initial design and concept by Gabest and the MPC-HC Team, copyright under GPLv2 + */ + +#include "ByteParser.h" + +#pragma warning( push ) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +#pragma warning( disable : 4018 ) +#pragma warning( disable : 4244 ) +extern "C" { +#define AVCODEC_X86_MATHOPS_H +#define __STDC_CONSTANT_MACROS + +#include "libavcodec/get_bits.h" +}; +#pragma warning( pop ) + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +CByteParser::CByteParser(const BYTE *pData, size_t length) + : m_pData(pData), m_pEnd(pData+length) +{ + m_dllAvUtil.Load(); + m_gbCtx = (GetBitContext *)m_dllAvUtil.av_mallocz(sizeof(GetBitContext)); + init_get_bits(m_gbCtx, pData, (int)(length << 3)); +} + +CByteParser::~CByteParser() +{ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + m_dllAvUtil.av_freep(&m_gbCtx); +} + +unsigned int CByteParser::BitRead(unsigned int numBits, bool peek) +{ + if (numBits == 0) + return 0; + + if (peek) + return show_bits_long(m_gbCtx, numBits); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + else + return get_bits_long(m_gbCtx, numBits); +} + +size_t CByteParser::RemainingBits() const +{ + return get_bits_left(m_gbCtx); +} + +size_t CByteParser::Pos() const culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +{ + return (size_t)(m_pEnd - m_pData - Remaining()); +} + +// Exponential Golomb Coding (with k = 0) +// As used in H.264/MPEG-4 AVC +// http://en.wikipedia.org/wiki/Exponential-Golomb_coding + +unsigned CByteParser::UExpGolombRead() +{ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + int n = -1; + for(BYTE b = 0; !b && RemainingBits(); n++) { + b = get_bits1(m_gbCtx); + } + if (!RemainingBits()) + return 0; + return ((1 << n) | BitRead(n)) - 1; +} + +int CByteParser::SExpGolombRead() culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +{ + int k = UExpGolombRead() + 1; + // Negative numbers are interleaved in the series + // unsigned: 0, 1, 2, 3, 4, 5, 6, ... + // signed: 0, 1, -1, 2, -2, 3, -3, .... + // So all even numbers are negative (last bit = 0) + // Note that we added 1 to the unsigned value already, so the check is inverted + if (k&1) + return -(k>>1); + else culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + return (k>>1); +} + +void CByteParser::BitByteAlign() +{ + align_get_bits(m_gbCtx); +} \ No newline at end of file diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/ByteParser.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/ByteParser.h new file mode 100644 culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz index 0000000..f1c927a --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/ByteParser.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Initial design and concept by Gabest and the MPC-HC Team, copyright under GPLv2 + */ + +#pragma once + +#include "DllAvUtil.h" + +struct GetBitContext; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +/** +* Byte Parser Utility Class +*/ +class CByteParser +{ +public: + /** Construct a Byte Parser to parse the given BYTE array with the given length */ + CByteParser(const BYTE *pData, size_t length); + virtual ~CByteParser(); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + /** Read 1 to 32 Bits from the Byte Array. If peek is set, the data will just be returned, and the buffer not advanced. */ + unsigned int BitRead(unsigned int numBits, bool peek = false); + + /** Read a unsigned number in Exponential Golomb encoding (with k = 0) */ + unsigned int UExpGolombRead(); + /** Read a signed number in Exponential Golomb encoding (with k = 0) */ + int SExpGolombRead(); + + /** Pointer to the start of the byte array */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + const BYTE *Start() const { return m_pData; } + /** Pointer to the end of the byte array */ + const BYTE *End() const { return m_pEnd; } + + /** Overall length (in bytes) of the byte array */ + size_t Length() const; + + size_t Pos() const; + + /** Number of bytes remaining in the array */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + size_t Remaining() const { return RemainingBits() >> 3; } + + /** Number of bits remaining */ + size_t RemainingBits() const; + + void BitByteAlign(); + +private: + GetBitContext *m_gbCtx; + DllAvUtil m_dllAvUtil; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + const BYTE *m_pData; + const BYTE *m_pEnd; +}; diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264Nalu.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264Nalu.cpp new file mode 100644 index 0000000..7524d92 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264Nalu.cpp @@ -0,0 +1,106 @@ +/* culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Initial design and concept by Gabest and the MPC-HC Team, copyright under GPLv2 + */ + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +#include "H264Nalu.h" + +void CH264Nalu::SetBuffer(const BYTE* pBuffer, size_t nSize, int nNALSize) +{ + m_pBuffer = pBuffer; + m_nSize = nSize; + m_nNALSize = nNALSize; + m_nCurPos = 0; + m_nNextRTP = 0; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + + m_nNALStartPos = 0; + m_nNALDataPos = 0; + + if (nNALSize == 0 && nSize > 0) + MoveToNextAnnexBStartcode(); +} + +bool CH264Nalu::MoveToNextAnnexBStartcode() +{ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + if (m_nSize < 4) + return false; + size_t nBuffEnd = m_nSize - 4; + + for (size_t i=m_nCurPos; i= m_nSize) return false; + + if ((m_nNALSize != 0) && (m_nCurPos == m_nNextRTP)) + { + if (m_nCurPos+m_nNALSize >= m_nSize) return false; + // RTP Nalu type : (XX XX) XX XX NAL..., with XX XX XX XX or XX XX equal to NAL size + m_nNALStartPos = m_nCurPos; + m_nNALDataPos = m_nCurPos + m_nNALSize; + unsigned nTemp = 0; + for (int i=0; i>7) & 1; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + nal_reference_idc = (m_pBuffer[m_nNALDataPos]>>5) & 3; + nal_unit_type = (NALU_TYPE) (m_pBuffer[m_nNALDataPos] & 0x1f); + + return true; +} diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264Nalu.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264Nalu.h new file mode 100644 index 0000000..772c852 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264Nalu.h culaunchkernel.ptsz How to get it? culaunchkernel.ptsz @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Initial design and concept by Gabest and the MPC-HC Team, copyright under GPLv2 culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + */ + +#pragma once + + +typedef enum +{ + NALU_TYPE_SLICE = 1, + NALU_TYPE_DPA = 2, + NALU_TYPE_DPB = 3, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + NALU_TYPE_DPC = 4, + NALU_TYPE_IDR = 5, + NALU_TYPE_SEI = 6, + NALU_TYPE_SPS = 7, + NALU_TYPE_PPS = 8, + NALU_TYPE_AUD = 9, + NALU_TYPE_EOSEQ = 10, + NALU_TYPE_EOSTREAM = 11, + NALU_TYPE_FILL = 12 +} NALU_TYPE; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + + +class CH264Nalu +{ +private : + int forbidden_bit; //! should be always FALSE + int nal_reference_idc; //! NALU_PRIORITY_xxxx + NALU_TYPE nal_unit_type; //! NALU_TYPE_xxxx + + size_t m_nNALStartPos; //! NALU start (including startcode / size) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + size_t m_nNALDataPos; //! Useful part + + const BYTE *m_pBuffer; + size_t m_nCurPos; + size_t m_nNextRTP; + size_t m_nSize; + int m_nNALSize; + + bool MoveToNextAnnexBStartcode(); + bool MoveToNextRTPStartcode(); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +public : + CH264Nalu() { SetBuffer(NULL, 0, 0); } + NALU_TYPE GetType() const { return nal_unit_type; } + bool IsRefFrame() const { return (nal_reference_idc != 0); } + + size_t GetDataLength() const { return m_nCurPos - m_nNALDataPos; } + const BYTE *GetDataBuffer() { return m_pBuffer + m_nNALDataPos; } + size_t GetRoundedDataLength() const + { culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + size_t nSize = m_nCurPos - m_nNALDataPos; + return nSize + 128 - (nSize %128); + } + + size_t GetLength() const { return m_nCurPos - m_nNALStartPos; } + const BYTE *GetNALBuffer() { return m_pBuffer + m_nNALStartPos; } + bool IsEOF() const { return m_nCurPos >= m_nSize; } + + void SetBuffer (const BYTE *pBuffer, size_t nSize, int nNALSize); + bool ReadNext(); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +}; diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264SequenceParser.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264SequenceParser.cpp new file mode 100644 index 0000000..f25700e --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264SequenceParser.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + + +#include "H264SequenceParser.h" + +#include "ByteParser.h" culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +#include "H264Nalu.h" +#include "DllAvCodec.h" + +CH264SequenceParser::CH264SequenceParser(void) +{ + ZeroMemory(&sps, sizeof(sps)); + ZeroMemory(&pps, sizeof(pps)); +} + + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +CH264SequenceParser::~CH264SequenceParser(void) +{ +} + +HRESULT CH264SequenceParser::ParseNALs(const BYTE *buffer, size_t buflen, int nal_size) +{ + CH264Nalu nalu; + nalu.SetBuffer(buffer, buflen, nal_size); + + while (nalu.ReadNext()) { culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + const BYTE *data = nalu.GetDataBuffer() + 1; + const size_t len = nalu.GetDataLength() - 1; + if (nalu.GetType() == NALU_TYPE_SPS) { + ParseSPS(data, len); + break; + } + } + + return S_OK; +} culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +static void SPSDecodeScalingList(CByteParser &parser, int size) { + int i, last = 8, next = 8; + int matrix = parser.BitRead(1); + if (matrix) { + for (i = 0; i < size; i++) { + if(next) + next = (last + parser.SExpGolombRead()) & 0xff; + if(!i && !next){ /* matrix not written */ + break; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + } + last = next ? next : last; + } + } +} + +HRESULT CH264SequenceParser::ParseSPS(const BYTE *buffer, size_t buflen) +{ + CByteParser parser(buffer, buflen); + int i; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + ZeroMemory(&sps, sizeof(sps)); + // Defaults + sps.valid = 1; + sps.primaries = AVCOL_PRI_UNSPECIFIED; + sps.trc = AVCOL_TRC_UNSPECIFIED; + sps.colorspace = AVCOL_SPC_UNSPECIFIED; + sps.full_range = -1; + + // Parse culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + sps.profile = parser.BitRead(8); + parser.BitRead(4); // constraint flags + parser.BitRead(4); // reserved + sps.level = parser.BitRead(8); + parser.UExpGolombRead(); // sps id + + if (sps.profile >= 100) { + sps.chroma = (int)parser.UExpGolombRead(); + if (sps.chroma == 3) + parser.BitRead(1); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + sps.luma_bitdepth = (int)parser.UExpGolombRead() + 8; + sps.chroma_bitdepth = (int)parser.UExpGolombRead() + 8; + parser.BitRead(1); // transform_bypass + + // decode scaling matrices + int scaling = parser.BitRead(1); + if (scaling) { + // Decode scaling lists + SPSDecodeScalingList(parser, 16); // Intra, Y + SPSDecodeScalingList(parser, 16); // Intra, Cr culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + SPSDecodeScalingList(parser, 16); // Intra, Cb + SPSDecodeScalingList(parser, 16); // Inter, Y + SPSDecodeScalingList(parser, 16); // Inter, Cr + SPSDecodeScalingList(parser, 16); // Inter, Cb + + SPSDecodeScalingList(parser, 64); // Intra, Y + if (sps.chroma == 3) { + SPSDecodeScalingList(parser, 64); // Intra, Cr + SPSDecodeScalingList(parser, 64); // Intra, Cb + } culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + SPSDecodeScalingList(parser, 64); // Inter, Y + if (sps.chroma == 3) { + SPSDecodeScalingList(parser, 64); // Inter, Cr + SPSDecodeScalingList(parser, 64); // Inter, Cb + } + } + } else { + sps.chroma = 1; + sps.luma_bitdepth = 8; + sps.chroma_bitdepth = 8; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + } + + parser.UExpGolombRead(); // log2_max_frame_num + int poc_type = (int)parser.UExpGolombRead(); // poc_type + if (poc_type == 0) + parser.UExpGolombRead(); // log2_max_poc_lsb + else if (poc_type == 1) { + parser.BitRead(1); // delta_pic_order_always_zero_flag + parser.SExpGolombRead(); // offset_for_non_ref_pic + parser.SExpGolombRead(); // offset_for_top_to_bottom_field culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + int cyclen = (int)parser.UExpGolombRead(); // poc_cycle_length + for (i = 0; i < cyclen; i++) + parser.SExpGolombRead(); // offset_for_ref_frame[i] + } + + sps.ref_frames = parser.UExpGolombRead(); // ref_frame_count + parser.BitRead(1); // gaps_in_frame_num_allowed_flag + parser.UExpGolombRead(); // mb_width + parser.UExpGolombRead(); // mb_height + sps.interlaced = !parser.BitRead(1); // frame_mbs_only_flag culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + if (sps.interlaced) + parser.BitRead(1); // mb_aff + + parser.BitRead(1); // direct_8x8_inference_flag + int crop = parser.BitRead(1); // crop + if (crop) { + parser.UExpGolombRead(); // crop_left + parser.UExpGolombRead(); // crop_right + parser.UExpGolombRead(); // crop_top + parser.UExpGolombRead(); // crop_bottom culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + } + + int vui_present = parser.BitRead(1); // vui_parameters_present_flag + if (vui_present) { + sps.ar_present = parser.BitRead(1); // aspect_ratio_info_present_flag + if (sps.ar_present) { + int ar_idc = parser.BitRead(8); // aspect_ratio_idc + if (ar_idc == 255) { + parser.BitRead(16); // sar.num + parser.BitRead(16); // sar.den culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + } + } + + int overscan = parser.BitRead(1); // overscan_info_present_flag + if (overscan) + parser.BitRead(1); // overscan_appropriate_flag + + int vid_sig_type = parser.BitRead(1); // video_signal_type_present_flag + if (vid_sig_type) { + parser.BitRead(3); // video_format culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + sps.full_range = parser.BitRead(1); // video_full_range_flag + + int colorinfo = parser.BitRead(1); // colour_description_present_flag + if (colorinfo) { + sps.primaries = parser.BitRead(8); + sps.trc = parser.BitRead(8); + sps.colorspace = parser.BitRead(8); + } + } + } culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + return S_OK; +} diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264SequenceParser.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264SequenceParser.h new file mode 100644 index 0000000..2827bfd --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/H264SequenceParser.h @@ -0,0 +1,55 @@ +/* culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#pragma once + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +class CH264SequenceParser +{ +public: + CH264SequenceParser(void); + virtual ~CH264SequenceParser(void); + + HRESULT ParseNALs(const BYTE *buffer, size_t buflen, int nal_size); + +public: + struct { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + int valid; + + int profile; + int level; + int chroma; + int luma_bitdepth; + int chroma_bitdepth; + int ref_frames; + int interlaced; + int ar_present; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + + int full_range; + int primaries; + int trc; + int colorspace; + } sps; + + struct { + int valid; + } pps; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +private: + HRESULT ParseSPS(const BYTE *buffer, size_t buflen); +}; diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/MPEG2HeaderParser.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/MPEG2HeaderParser.cpp new file mode 100644 index 0000000..6889638 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/MPEG2HeaderParser.cpp @@ -0,0 +1,109 @@ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +#include "MPEG2HeaderParser.h" + +#pragma warning( push ) +#pragma warning( disable : 4018 ) +#pragma warning( disable : 4244 ) +#define AVCODEC_X86_MATHOPS_H +#include "libavcodec/get_bits.h" +#pragma warning( pop ) + +#define SEQ_START_CODE 0x000001b3 culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +#define EXT_START_CODE 0x000001b5 + +static inline const uint8_t* find_next_marker(const uint8_t *src, const uint8_t *end) +{ + uint32_t mrk = 0xFFFFFFFF; + + if(end-src < 4) return end; + while(src < end){ + mrk = (mrk << 8) | *src++; + if((mrk & ~0xFF) == 0x00000100) culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + return src-4; + } + return end; +} + +CMPEG2HeaderParser::CMPEG2HeaderParser(const BYTE *pData, size_t length) +{ + memset(&hdr, 0, sizeof(hdr)); + ParseMPEG2Header(pData, length); +} culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +CMPEG2HeaderParser::~CMPEG2HeaderParser(void) +{ +} + +void CMPEG2HeaderParser::ParseMPEG2Header(const BYTE *pData, size_t length) +{ + if (length < 16) + return; + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + GetBitContext gb; + + const uint8_t *start = pData; + const uint8_t *end = start + length; + const uint8_t *next = NULL; + + int size; + + start = find_next_marker(start, end); + next = start; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + for(; next < end; start = next) { + next = find_next_marker(start + 4, end); + size = (int)(next - start - 4); + if(size <= 0) continue; + + init_get_bits(&gb, start + 4, (size - 4) * 8); + + switch(AV_RB32(start)) { + case SEQ_START_CODE: culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + MPEG2ParseSequenceHeader(&gb); + break; + case EXT_START_CODE: + MPEG2ParseExtHeader(&gb); + break; + } + } +} + +void CMPEG2HeaderParser::MPEG2ParseSequenceHeader(GetBitContext *gb) culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +{ +} + +void CMPEG2HeaderParser::MPEG2ParseExtHeader(GetBitContext *gb) +{ + int startcode = get_bits(gb, 4); // Start Code + if (startcode == 1) { + hdr.valid = 1; + + skip_bits(gb, 1); // profile and level esc culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + hdr.profile = get_bits(gb, 3); + hdr.level = get_bits(gb, 4); + + hdr.interlaced = !get_bits1(gb); + hdr.chroma = get_bits(gb, 2); + + // TODO: Fill in other fields, if needed + } +} diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/MPEG2HeaderParser.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/MPEG2HeaderParser.h culaunchkernel.ptsz PasteShr culaunchkernel.ptsz new file mode 100644 index 0000000..b45837a --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/MPEG2HeaderParser.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * + * This program is free software; you can redistribute it and/or modify culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#pragma once + +struct GetBitContext; + +class CMPEG2HeaderParser +{ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +public: + CMPEG2HeaderParser(const BYTE *pData, size_t length); + ~CMPEG2HeaderParser(void); + +public: + struct { + int valid; + + int profile; + int level; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + int interlaced; + int chroma; + } hdr; + +private: + void ParseMPEG2Header(const BYTE *pData, size_t length); + void MPEG2ParseSequenceHeader(GetBitContext *gb); + void MPEG2ParseExtHeader(GetBitContext *gb); +}; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/VC1HeaderParser.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/VC1HeaderParser.cpp new file mode 100644 index 0000000..e731eab --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/VC1HeaderParser.cpp @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + + +#include "VC1HeaderParser.h" + +#pragma warning( push ) +#pragma warning( disable : 4018 ) culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +#pragma warning( disable : 4244 ) +extern "C" { +#define AVCODEC_X86_MATHOPS_H +#include "libavcodec/get_bits.h" +//extern __declspec(dllimport) const AVRational ff_vc1_pixel_aspect[16]; +}; +#pragma warning( pop ) + +const AVRational ff_vc1_pixel_aspect[16] = { + { 0, 1 }, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + { 1, 1 }, + { 12, 11 }, + { 10, 11 }, + { 16, 11 }, + { 40, 33 }, + { 24, 11 }, + { 20, 11 }, + { 32, 11 }, + { 80, 33 }, + { 18, 11 }, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + { 15, 11 }, + { 64, 33 }, + { 160, 99 }, + { 0, 1 }, + { 0, 1 } +}; + +/** Markers used in VC-1 AP frame data */ +//@{ +enum VC1Code{ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + VC1_CODE_RES0 = 0x00000100, + VC1_CODE_ENDOFSEQ = 0x0000010A, + VC1_CODE_SLICE, + VC1_CODE_FIELD, + VC1_CODE_FRAME, + VC1_CODE_ENTRYPOINT, + VC1_CODE_SEQHDR, +}; +//@} + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** Available Profiles */ +//@{ +enum Profile { + PROFILE_SIMPLE, + PROFILE_MAIN, + PROFILE_COMPLEX, ///< TODO: WMV9 specific + PROFILE_ADVANCED +}; +//@} + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +#define IS_MARKER(x) (((x) & ~0xFF) == VC1_CODE_RES0) + +/** Find VC-1 marker in buffer +* @return position where next marker starts or end of buffer if no marker found +*/ +static inline const uint8_t* find_next_marker(const uint8_t *src, const uint8_t *end) +{ + uint32_t mrk = 0xFFFFFFFF; + + if(end-src < 4) return end; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + while(src < end){ + mrk = (mrk << 8) | *src++; + if(IS_MARKER(mrk)) + return src-4; + } + return end; +} + +static inline int vc1_unescape_buffer(const uint8_t *src, int size, uint8_t *dst) +{ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + int dsize = 0, i; + + if(size < 4){ + for(dsize = 0; dsize < size; dsize++) *dst++ = *src++; + return size; + } + for(i = 0; i < size; i++, src++) { + if(src[0] == 3 && i >= 2 && !src[-1] && !src[-2] && i < size-1 && src[1] < 4) { + dst[dsize++] = src[1]; + src++; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + i++; + } else + dst[dsize++] = *src; + } + return dsize; +} + +CVC1HeaderParser::CVC1HeaderParser(const BYTE *pData, size_t length) +{ + m_dllAvUtil.Load(); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + memset(&hdr, 0, sizeof(hdr)); + ParseVC1Header(pData, length); +} + +CVC1HeaderParser::~CVC1HeaderParser(void) +{ +} + +void CVC1HeaderParser::ParseVC1Header(const BYTE *pData, size_t length) +{ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + if (length < 16) + return; + + GetBitContext gb; + + const uint8_t *start = pData; + const uint8_t *end = start + length; + const uint8_t *next = NULL; + + int size, buf2_size; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + uint8_t *buf2; + + buf2 = (uint8_t *)m_dllAvUtil.av_mallocz(length + 16);//FF_INPUT_BUFFER_PADDING_SIZE); + + start = find_next_marker(start, end); + next = start; + + for(; next < end; start = next) { + next = find_next_marker(start + 4, end); + size = (int)(next - start - 4); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + if(size <= 0) continue; + buf2_size = vc1_unescape_buffer(start + 4, size, buf2); + + init_get_bits(&gb, buf2, buf2_size * 8); + + switch(AV_RB32(start)) { + case VC1_CODE_SEQHDR: + VC1ParseSequenceHeader(&gb); + break; + } culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + } + m_dllAvUtil.av_freep(&buf2); +} + +void CVC1HeaderParser::VC1ParseSequenceHeader(GetBitContext *gb) +{ + hdr.profile = get_bits(gb, 2); + + if (hdr.profile == PROFILE_ADVANCED) { + hdr.valid = 1; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + hdr.level = get_bits(gb, 3); + skip_bits(gb, 2); // Chroma Format, only 1 should be set for 4:2:0 + skip_bits(gb, 3); // frmrtq_postproc + skip_bits(gb, 5); // bitrtq_postproc + skip_bits1(gb); // postprocflag + + hdr.width = (get_bits(gb, 12) + 1) << 1; + hdr.height = (get_bits(gb, 12) + 1) << 1; + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + hdr.broadcast = get_bits1(gb); // broadcast + hdr.interlaced = get_bits1(gb); // interlaced + + skip_bits1(gb); // tfcntrflag + skip_bits1(gb); // finterpflag + skip_bits1(gb); // reserved + skip_bits1(gb); // psf + + if (get_bits1(gb)) { // Display Info + int w, h, ar = 0; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + w = get_bits(gb, 14) + 1; + h = get_bits(gb, 14) + 1; + if (get_bits1(gb)) + ar = get_bits(gb, 4); + if (ar && ar < 14) { + hdr.ar = ff_vc1_pixel_aspect[ar]; + } else if (ar == 15) { + w = get_bits(gb, 8) + 1; + h = get_bits(gb, 8) + 1; + hdr.ar.num = w; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + hdr.ar.den = h; + } else { + m_dllAvUtil.av_reduce(&hdr.ar.num, &hdr.ar.den, hdr.height * w, hdr.width * h, 1 << 30); + } + } + + // TODO: add other fields + } +} \ No newline at end of file culaunchkernel.ptsz PasteShr culaunchkernel.ptsz diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/VC1HeaderParser.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/VC1HeaderParser.h new file mode 100644 index 0000000..ef999c7 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/VC1HeaderParser.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2010-2012 Hendrik Leppkes + * http://www.1f0.de + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#pragma once + +#include "DllAvUtil.h" + +struct GetBitContext; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +class CVC1HeaderParser +{ +public: + CVC1HeaderParser(const BYTE *pData, size_t length); + ~CVC1HeaderParser(void); + +public: + struct { + int valid; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + + int profile; + int level; + + int width; + int height; + + int broadcast; + int interlaced; + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + AVRational ar; + } hdr; + +private: + void ParseVC1Header(const BYTE *pData, size_t length); + void VC1ParseSequenceHeader(GetBitContext *gb); + DllAvUtil m_dllAvUtil; +}; diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuda.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuda.h new file mode 100644 culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz index 0000000..4c6505a --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuda.h @@ -0,0 +1,8127 @@ +/* + * Copyright 1993-2011 NVIDIA Corporation. All rights reserved. + * + * NOTICE TO LICENSEE: + * + * This source code and/or documentation ("Licensed Deliverables") are culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + * + * These Licensed Deliverables contained herein is PROPRIETARY and + * CONFIDENTIAL to NVIDIA and is being provided under the terms and + * conditions of a form of NVIDIA software license agreement by and + * between NVIDIA and Licensee ("License Agreement") or electronically + * accepted by Licensee. Notwithstanding any terms or conditions to + * the contrary in the License Agreement, reproduction or disclosure + * of the Licensed Deliverables to any third party without the express culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * written consent of NVIDIA is prohibited. + * + * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE + * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE + * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS + * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. + * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED + * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, + * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. + * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY + * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY + * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THESE LICENSED DELIVERABLES. + * + * U.S. Government End Users. These Licensed Deliverables are a + * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT + * 1995), consisting of "commercial computer software" and "commercial culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * computer software documentation" as such terms are used in 48 + * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government + * only as a commercial end item. Consistent with 48 C.F.R.12.212 and + * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all + * U.S. Government End Users acquire the Licensed Deliverables with + * only those rights set forth herein. + * + * Any use of the Licensed Deliverables in individual and commercial + * software must include, in the user documentation and internal + * comments to the code, the above Disclaimer and U.S. Government End culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * Users Notice. + */ + +#ifndef __cuda_cuda_h__ +#define __cuda_cuda_h__ + +#include + +/** + * CUDA API versioning support culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + */ +#if defined(CUDA_FORCE_API_VERSION) + #if (CUDA_FORCE_API_VERSION == 3010) + #define __CUDA_API_VERSION 3010 + #else + #error "Unsupported value of CUDA_FORCE_API_VERSION" + #endif +#else + #define __CUDA_API_VERSION 4020 +#endif /* CUDA_FORCE_API_VERSION */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION >= 3020 + #define cuDeviceTotalMem cuDeviceTotalMem_v2 + #define cuCtxCreate cuCtxCreate_v2 + #define cuModuleGetGlobal cuModuleGetGlobal_v2 + #define cuMemGetInfo cuMemGetInfo_v2 + #define cuMemAlloc cuMemAlloc_v2 + #define cuMemAllocPitch cuMemAllocPitch_v2 + #define cuMemFree cuMemFree_v2 + #define cuMemGetAddressRange cuMemGetAddressRange_v2 culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + #define cuMemAllocHost cuMemAllocHost_v2 + #define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2 + #define cuMemcpyHtoD cuMemcpyHtoD_v2 + #define cuMemcpyDtoH cuMemcpyDtoH_v2 + #define cuMemcpyDtoD cuMemcpyDtoD_v2 + #define cuMemcpyDtoA cuMemcpyDtoA_v2 + #define cuMemcpyAtoD cuMemcpyAtoD_v2 + #define cuMemcpyHtoA cuMemcpyHtoA_v2 + #define cuMemcpyAtoH cuMemcpyAtoH_v2 + #define cuMemcpyAtoA cuMemcpyAtoA_v2 culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + #define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2 + #define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2 + #define cuMemcpy2D cuMemcpy2D_v2 + #define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2 + #define cuMemcpy3D cuMemcpy3D_v2 + #define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2 + #define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2 + #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 + #define cuMemcpy2DAsync cuMemcpy2DAsync_v2 + #define cuMemcpy3DAsync cuMemcpy3DAsync_v2 culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + #define cuMemsetD8 cuMemsetD8_v2 + #define cuMemsetD16 cuMemsetD16_v2 + #define cuMemsetD32 cuMemsetD32_v2 + #define cuMemsetD2D8 cuMemsetD2D8_v2 + #define cuMemsetD2D16 cuMemsetD2D16_v2 + #define cuMemsetD2D32 cuMemsetD2D32_v2 + #define cuArrayCreate cuArrayCreate_v2 + #define cuArrayGetDescriptor cuArrayGetDescriptor_v2 + #define cuArray3DCreate cuArray3DCreate_v2 + #define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2 culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + #define cuTexRefSetAddress cuTexRefSetAddress_v2 + #define cuTexRefGetAddress cuTexRefGetAddress_v2 + #define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMappedPointer_v2 +#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION >= 3020 */ +#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION >= 4000 + #define cuCtxDestroy cuCtxDestroy_v2 + #define cuCtxPopCurrent cuCtxPopCurrent_v2 + #define cuCtxPushCurrent cuCtxPushCurrent_v2 + #define cuStreamDestroy cuStreamDestroy_v2 + #define cuEventDestroy cuEventDestroy_v2 culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION >= 4000 */ +#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION >= 4010 + #define cuTexRefSetAddress2D cuTexRefSetAddress2D_v3 +#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION >= 4010 */ + +#if !defined(__CUDA_API_VERSION_INTERNAL) +#if defined(__CUDA_API_VERSION) && __CUDA_API_VERSION >= 3020 && __CUDA_API_VERSION < 4010 + #define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2 +#endif /* __CUDA_API_VERSION && __CUDA_API_VERSION >= 3020 && __CUDA_API_VERSION < 4010 */ +#endif /* __CUDA_API_VERSION_INTERNAL */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +/** + * \defgroup CUDA_DRIVER CUDA Driver API + * + * This section describes the low-level CUDA driver application programming + * interface. + * + * @{ + */ + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +/** + * \defgroup CUDA_TYPES Data types used by CUDA driver + * @{ + */ + +/** + * CUDA API version number + */ +#define CUDA_VERSION 4020 + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +#ifdef __cplusplus +extern "C" { +#endif + +/** + * CUDA device pointer + */ +#if __CUDA_API_VERSION >= 3020 + +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +typedef unsigned long long CUdeviceptr; +#else +typedef unsigned int CUdeviceptr; +#endif + +#endif /* __CUDA_API_VERSION >= 3020 */ + +typedef int CUdevice; /**< CUDA device */ +typedef struct CUctx_st *CUcontext; /**< CUDA context */ +typedef struct CUmod_st *CUmodule; /**< CUDA module */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +typedef struct CUfunc_st *CUfunction; /**< CUDA function */ +typedef struct CUarray_st *CUarray; /**< CUDA array */ +typedef struct CUtexref_st *CUtexref; /**< CUDA texture reference */ +typedef struct CUsurfref_st *CUsurfref; /**< CUDA surface reference */ +typedef struct CUevent_st *CUevent; /**< CUDA event */ +typedef struct CUstream_st *CUstream; /**< CUDA stream */ +typedef struct CUgraphicsResource_st *CUgraphicsResource; /**< CUDA graphics interop resource */ + +typedef struct CUuuid_st { /**< CUDA definition of UUID */ + char bytes[16]; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +} CUuuid; + + +#if __CUDA_API_VERSION >= 4010 + +/** + * Interprocess Handles + */ +#define CU_IPC_HANDLE_SIZE 64 + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +typedef struct CUipcEventHandle_st { + char reserved[CU_IPC_HANDLE_SIZE]; +} CUipcEventHandle; + +typedef struct CUipcMemHandle_st { + char reserved[CU_IPC_HANDLE_SIZE]; +} CUipcMemHandle; + +typedef enum CUipcMem_flags_enum { + CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1 /**< Automatically enable peer access between remote devices as needed */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +} CUipcMem_flags; + +#endif + +/** + * Context creation flags + */ +typedef enum CUctx_flags_enum { + CU_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */ + CU_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CU_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */ + CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */ + CU_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling + * \deprecated This flag was deprecated as of CUDA 4.0 + * and was replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. */ + CU_CTX_SCHED_MASK = 0x07, + CU_CTX_MAP_HOST = 0x08, /**< Support mapped pinned allocations */ + CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */ + CU_CTX_FLAGS_MASK = 0x1f +} CUctx_flags; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +/** + * Event creation flags + */ +typedef enum CUevent_flags_enum { + CU_EVENT_DEFAULT = 0x0, /**< Default event flag */ + CU_EVENT_BLOCKING_SYNC = 0x1, /**< Event uses blocking synchronization */ + CU_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */ + CU_EVENT_INTERPROCESS = 0x4 /**< Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set */ +} CUevent_flags; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +/** + * Array formats + */ +typedef enum CUarray_format_enum { + CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */ + CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */ + CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */ + CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */ + CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */ + CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */ + CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */ +} CUarray_format; + +/** + * Texture reference addressing modes + */ +typedef enum CUaddress_mode_enum { + CU_TR_ADDRESS_MODE_WRAP = 0, /**< Wrapping address mode */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CU_TR_ADDRESS_MODE_CLAMP = 1, /**< Clamp to edge address mode */ + CU_TR_ADDRESS_MODE_MIRROR = 2, /**< Mirror address mode */ + CU_TR_ADDRESS_MODE_BORDER = 3 /**< Border address mode */ +} CUaddress_mode; + +/** + * Texture reference filtering modes + */ +typedef enum CUfilter_mode_enum { + CU_TR_FILTER_MODE_POINT = 0, /**< Point filter mode */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CU_TR_FILTER_MODE_LINEAR = 1 /**< Linear filter mode */ +} CUfilter_mode; + +/** + * Device properties + */ +typedef enum CUdevice_attribute_enum { + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximum number of threads per block */ + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximum block dimension X */ + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximum block dimension Y */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximum block dimension Z */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximum grid dimension X */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximum grid dimension Y */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximum grid dimension Z */ + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximum shared memory available per block in bytes */ + CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ + CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */ + CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp size in threads */ + CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximum pitch in bytes allowed by memory copies */ + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximum number of 32-bit registers available per block */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ + CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Peak clock frequency in kilohertz */ + CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignment requirement for textures */ + CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */ + CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number of multiprocessors on device */ + CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specifies whether there is a run time limit on kernels */ + CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device is integrated with host memory */ + CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device can map host memory into CUDA address space */ + CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Compute mode (See ::CUcomputemode for details) */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximum 1D texture width */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximum 2D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximum 2D texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximum 3D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximum 3D texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximum 3D texture depth */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, /**< Maximum 2D layered texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, /**< Maximum 2D layered texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, /**< Maximum layers in a 2D layered texture */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */ + CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignment requirement for surfaces */ + CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device can possibly execute multiple kernels concurrently */ + CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device has ECC support enabled */ + CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bus ID of the device */ + CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI device ID of the device */ + CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, /**< Device is using TCC driver model */ + CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, /**< Peak memory clock frequency in kilohertz */ + CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, /**< Global memory bus width in bits */ + CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, /**< Size of L2 cache in bytes */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, /**< Maximum resident threads per multiprocessor */ + CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, /**< Number of asynchronous engines */ + CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, /**< Device shares a unified address space with the host */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, /**< Maximum 1D layered texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, /**< Maximum layers in a 1D layered texture */ + CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, /**< Deprecated, do not use. */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, /**< Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, /**< Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, /**< Alternate maximum 3D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,/**< Alternate maximum 3D texture height */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, /**< Alternate maximum 3D texture depth */ + CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, /**< PCI domain ID of the device */ + CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, /**< Pitch alignment requirement for textures */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, /**< Maximum cubemap texture width/height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, /**< Maximum cubemap layered texture width/height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, /**< Maximum layers in a cubemap layered texture */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, /**< Maximum 1D surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, /**< Maximum 2D surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, /**< Maximum 2D surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, /**< Maximum 3D surface width */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, /**< Maximum 3D surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, /**< Maximum 3D surface depth */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, /**< Maximum 1D layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, /**< Maximum layers in a 1D layered surface */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, /**< Maximum 2D layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, /**< Maximum 2D layered surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, /**< Maximum layers in a 2D layered surface */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, /**< Maximum cubemap surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, /**< Maximum cubemap layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, /**< Maximum layers in a cubemap layered surface */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, /**< Maximum 1D linear texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, /**< Maximum 2D linear texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, /**< Maximum 2D linear texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72 /**< Maximum 2D linear texture pitch in bytes */ +} CUdevice_attribute; + +/** + * Legacy device properties + */ +typedef struct CUdevprop_st { culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + int maxThreadsPerBlock; /**< Maximum number of threads per block */ + int maxThreadsDim[3]; /**< Maximum size of each dimension of a block */ + int maxGridSize[3]; /**< Maximum size of each dimension of a grid */ + int sharedMemPerBlock; /**< Shared memory available per block in bytes */ + int totalConstantMemory; /**< Constant memory available on device in bytes */ + int SIMDWidth; /**< Warp size in threads */ + int memPitch; /**< Maximum pitch in bytes allowed by memory copies */ + int regsPerBlock; /**< 32-bit registers available per block */ + int clockRate; /**< Clock frequency in kilohertz */ + int textureAlign; /**< Alignment requirement for textures */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +} CUdevprop; + +/** + * Pointer information + */ +typedef enum CUpointer_attribute_enum { + CU_POINTER_ATTRIBUTE_CONTEXT = 1, /**< The ::CUcontext on which a pointer was allocated or registered */ + CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2, /**< The ::CUmemorytype describing the physical location of a pointer */ + CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3, /**< The address at which a pointer's memory may be accessed on the device */ + CU_POINTER_ATTRIBUTE_HOST_POINTER = 4 /**< The address at which a pointer's memory may be accessed on the host */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +} CUpointer_attribute; + +/** + * Function properties + */ +typedef enum CUfunction_attribute_enum { + /** + * The maximum number of threads per block, beyond which a launch of the + * function would fail. This number depends on both the function and the + * device on which the function is currently loaded. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + */ + CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, + + /** + * The size in bytes of statically-allocated shared memory required by + * this function. This does not include dynamically-allocated shared + * memory requested by the user at runtime. + */ + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + /** + * The size in bytes of user-allocated constant memory required by this + * function. + */ + CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, + + /** + * The size in bytes of local memory used by each thread of this function. + */ + CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + /** + * The number of registers used by each thread of this function. + */ + CU_FUNC_ATTRIBUTE_NUM_REGS = 4, + + /** + * The PTX virtual architecture version for which the function was + * compiled. This value is the major PTX version * 10 + the minor PTX + * version, so a PTX version 1.3 function would return the value 13. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Note that this may return the undefined value of 0 for cubins + * compiled prior to CUDA 3.0. + */ + CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, + + /** + * The binary architecture version for which the function was compiled. + * This value is the major binary version * 10 + the minor binary version, + * so a binary version 1.3 function would return the value 13. Note that + * this will return a value of 10 for legacy cubins that do not have a culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * properly-encoded binary architecture version. + */ + CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, + + CU_FUNC_ATTRIBUTE_MAX +} CUfunction_attribute; + +/** + * Function cache configurations + */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +typedef enum CUfunc_cache_enum { + CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */ + CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */ + CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */ + CU_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */ +} CUfunc_cache; + +/** + * Shared memory configurations + */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +typedef enum CUsharedconfig_enum { + CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00, /**< set default shared memory bank size */ + CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01, /**< set shared memory bank width to four bytes */ + CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02 /**< set shared memory bank width to eight bytes */ +} CUsharedconfig; + +/** + * Memory types + */ +typedef enum CUmemorytype_enum { culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */ + CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */ + CU_MEMORYTYPE_ARRAY = 0x03, /**< Array memory */ + CU_MEMORYTYPE_UNIFIED = 0x04 /**< Unified device or host memory */ +} CUmemorytype; + +/** + * Compute Modes + */ +typedef enum CUcomputemode_enum { culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CU_COMPUTEMODE_DEFAULT = 0, /**< Default compute mode (Multiple contexts allowed per device) */ + CU_COMPUTEMODE_EXCLUSIVE = 1, /**< Compute-exclusive-thread mode (Only one context used by a single thread can be present on this device at a time) */ + CU_COMPUTEMODE_PROHIBITED = 2, /**< Compute-prohibited mode (No contexts can be created on this device at this time) */ + CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 /**< Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time) */ +} CUcomputemode; + +/** + * Online compiler options + */ +typedef enum CUjit_option_enum culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +{ + /** + * Max number of registers that a thread may use.\n + * Option type: unsigned int + */ + CU_JIT_MAX_REGISTERS = 0, + + /** + * IN: Specifies minimum number of threads per block to target compilation + * for\n culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * OUT: Returns the number of threads the compiler actually targeted. + * This restricts the resource utilization fo the compiler (e.g. max + * registers) such that a block with the given number of threads should be + * able to launch based on register limitations. Note, this option does not + * currently take into account any other resource limitations, such as + * shared memory utilization.\n + * Option type: unsigned int + */ + CU_JIT_THREADS_PER_BLOCK, + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + /** + * Returns a float value in the option of the wall clock time, in + * milliseconds, spent creating the cubin\n + * Option type: float + */ + CU_JIT_WALL_TIME, + + /** + * Pointer to a buffer in which to print any log messsages from PTXAS + * that are informational in nature (the buffer size is specified via culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) \n + * Option type: char* + */ + CU_JIT_INFO_LOG_BUFFER, + + /** + * IN: Log buffer size in bytes. Log messages will be capped at this size + * (including null terminator)\n + * OUT: Amount of log buffer filled with messages\n + * Option type: unsigned int culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + */ + CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, + + /** + * Pointer to a buffer in which to print any log messages from PTXAS that + * reflect errors (the buffer size is specified via option + * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n + * Option type: char* + */ + CU_JIT_ERROR_LOG_BUFFER, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + + /** + * IN: Log buffer size in bytes. Log messages will be capped at this size + * (including null terminator)\n + * OUT: Amount of log buffer filled with messages\n + * Option type: unsigned int + */ + CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, + + /** culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Level of optimizations to apply to generated code (0 - 4), with 4 + * being the default and highest level of optimizations.\n + * Option type: unsigned int + */ + CU_JIT_OPTIMIZATION_LEVEL, + + /** + * No option value required. Determines the target based on the current + * attached context (default)\n + * Option type: No option value needed culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + */ + CU_JIT_TARGET_FROM_CUCONTEXT, + + /** + * Target is chosen based on supplied ::CUjit_target_enum.\n + * Option type: unsigned int for enumerated type ::CUjit_target_enum + */ + CU_JIT_TARGET, + + /** culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Specifies choice of fallback strategy if matching cubin is not found. + * Choice is based on supplied ::CUjit_fallback_enum.\n + * Option type: unsigned int for enumerated type ::CUjit_fallback_enum + */ + CU_JIT_FALLBACK_STRATEGY + +} CUjit_option; + +/** + * Online compilation targets culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + */ +typedef enum CUjit_target_enum +{ + CU_TARGET_COMPUTE_10 = 0, /**< Compute device class 1.0 */ + CU_TARGET_COMPUTE_11, /**< Compute device class 1.1 */ + CU_TARGET_COMPUTE_12, /**< Compute device class 1.2 */ + CU_TARGET_COMPUTE_13, /**< Compute device class 1.3 */ + CU_TARGET_COMPUTE_20, /**< Compute device class 2.0 */ + CU_TARGET_COMPUTE_21, /**< Compute device class 2.1 */ + CU_TARGET_COMPUTE_30 /**< Compute device class 3.0 */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +} CUjit_target; + +/** + * Cubin matching fallback strategies + */ +typedef enum CUjit_fallback_enum +{ + CU_PREFER_PTX = 0, /**< Prefer to compile ptx */ + + CU_PREFER_BINARY /**< Prefer to fall back to compatible binary code */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +} CUjit_fallback; + +/** + * Flags to register a graphics resource + */ +typedef enum CUgraphicsRegisterFlags_enum { + CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00, + CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01, + CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04, + CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08 +} CUgraphicsRegisterFlags; + +/** + * Flags for mapping and unmapping interop resources + */ +typedef enum CUgraphicsMapResourceFlags_enum { + CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, + CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 +} CUgraphicsMapResourceFlags; + +/** + * Array indices for cube faces + */ +typedef enum CUarray_cubemap_face_enum { + CU_CUBEMAP_FACE_POSITIVE_X = 0x00, /**< Positive X face of cubemap */ + CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, /**< Negative X face of cubemap */ + CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, /**< Positive Y face of cubemap */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, /**< Negative Y face of cubemap */ + CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, /**< Positive Z face of cubemap */ + CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 /**< Negative Z face of cubemap */ +} CUarray_cubemap_face; + +/** + * Limits + */ +typedef enum CUlimit_enum { + CU_LIMIT_STACK_SIZE = 0x00, /**< GPU thread stack size */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, /**< GPU printf FIFO size */ + CU_LIMIT_MALLOC_HEAP_SIZE = 0x02 /**< GPU malloc heap size */ +} CUlimit; + +/** + * Error codes + */ +typedef enum cudaError_enum { + /** + * The API call returned with no errors. In the case of query calls, this culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * can also mean that the operation being queried is complete (see + * ::cuEventQuery() and ::cuStreamQuery()). + */ + CUDA_SUCCESS = 0, + + /** + * This indicates that one or more of the parameters passed to the API call + * is not within an acceptable range of values. + */ + CUDA_ERROR_INVALID_VALUE = 1, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + /** + * The API call failed because it was unable to allocate enough memory to + * perform the requested operation. + */ + CUDA_ERROR_OUT_OF_MEMORY = 2, + + /** + * This indicates that the CUDA driver has not been initialized with + * ::cuInit() or that initialization has failed. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + */ + CUDA_ERROR_NOT_INITIALIZED = 3, + + /** + * This indicates that the CUDA driver is in the process of shutting down. + */ + CUDA_ERROR_DEINITIALIZED = 4, + + /** + * This indicates profiling APIs are called while application is running culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * in visual profiler mode. + */ + CUDA_ERROR_PROFILER_DISABLED = 5, + /** + * This indicates profiling has not been initialized for this context. + * Call cuProfilerInitialize() to resolve this. + */ + CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, + /** + * This indicates profiler has already been started and probably culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * cuProfilerStart() is incorrectly called. + */ + CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, + /** + * This indicates profiler has already been stopped and probably + * cuProfilerStop() is incorrectly called. + */ + CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, + /** + * This indicates that no CUDA-capable devices were detected by the installed culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * CUDA driver. + */ + CUDA_ERROR_NO_DEVICE = 100, + + /** + * This indicates that the device ordinal supplied by the user does not + * correspond to a valid CUDA device. + */ + CUDA_ERROR_INVALID_DEVICE = 101, + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + /** + * This indicates that the device kernel image is invalid. This can also + * indicate an invalid CUDA module. + */ + CUDA_ERROR_INVALID_IMAGE = 200, + + /** + * This most frequently indicates that there is no context bound to the + * current thread. This can also be returned if the context passed to an culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * API call is not a valid handle (such as a context that has had + * ::cuCtxDestroy() invoked on it). This can also be returned if a user + * mixes different API versions (i.e. 3010 context with 3020 API calls). + * See ::cuCtxGetApiVersion() for more details. + */ + CUDA_ERROR_INVALID_CONTEXT = 201, + + /** + * This indicated that the context being supplied as a parameter to the + * API call was already the active context. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \deprecated + * This error return is deprecated as of CUDA 3.2. It is no longer an + * error to attempt to push the active context via ::cuCtxPushCurrent(). + */ + CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, + + /** + * This indicates that a map or register operation has failed. + */ + CUDA_ERROR_MAP_FAILED = 205, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + + /** + * This indicates that an unmap or unregister operation has failed. + */ + CUDA_ERROR_UNMAP_FAILED = 206, + + /** + * This indicates that the specified array is currently mapped and thus + * cannot be destroyed. + */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CUDA_ERROR_ARRAY_IS_MAPPED = 207, + + /** + * This indicates that the resource is already mapped. + */ + CUDA_ERROR_ALREADY_MAPPED = 208, + + /** + * This indicates that there is no kernel image available that is suitable + * for the device. This can occur when a user specifies code generation culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * options for a particular CUDA source file that do not include the + * corresponding device configuration. + */ + CUDA_ERROR_NO_BINARY_FOR_GPU = 209, + + /** + * This indicates that a resource has already been acquired. + */ + CUDA_ERROR_ALREADY_ACQUIRED = 210, + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + /** + * This indicates that a resource is not mapped. + */ + CUDA_ERROR_NOT_MAPPED = 211, + + /** + * This indicates that a mapped resource is not available for access as an + * array. + */ + CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + + /** + * This indicates that a mapped resource is not available for access as a + * pointer. + */ + CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, + + /** + * This indicates that an uncorrectable ECC error was detected during + * execution. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + */ + CUDA_ERROR_ECC_UNCORRECTABLE = 214, + + /** + * This indicates that the ::CUlimit passed to the API call is not + * supported by the active device. + */ + CUDA_ERROR_UNSUPPORTED_LIMIT = 215, + + /** culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * This indicates that the ::CUcontext passed to the API call can + * only be bound to a single CPU thread at a time but is already + * bound to a CPU thread. + */ + CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, + + /** + * This indicates that the device kernel source is invalid. + */ + CUDA_ERROR_INVALID_SOURCE = 300, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + /** + * This indicates that the file specified was not found. + */ + CUDA_ERROR_FILE_NOT_FOUND = 301, + + /** + * This indicates that a link to a shared object failed to resolve. + */ + CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + /** + * This indicates that initialization of a shared object failed. + */ + CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, + + /** + * This indicates that an OS call failed. + */ + CUDA_ERROR_OPERATING_SYSTEM = 304, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + + /** + * This indicates that a resource handle passed to the API call was not + * valid. Resource handles are opaque types like ::CUstream and ::CUevent. + */ + CUDA_ERROR_INVALID_HANDLE = 400, + + + /** culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * This indicates that a named symbol was not found. Examples of symbols + * are global/constant variable names, texture names, and surface names. + */ + CUDA_ERROR_NOT_FOUND = 500, + + + /** + * This indicates that asynchronous operations issued previously have not + * completed yet. This result is not actually an error, but must be indicated + * differently than ::CUDA_SUCCESS (which indicates completion). Calls that culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * may return this value include ::cuEventQuery() and ::cuStreamQuery(). + */ + CUDA_ERROR_NOT_READY = 600, + + + /** + * An exception occurred on the device while executing a kernel. Common + * causes include dereferencing an invalid device pointer and accessing + * out of bounds shared memory. The context cannot be used, so it must + * be destroyed (and a new one should be created). All existing device culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * memory allocations from this context are invalid and must be + * reconstructed if the program is to continue using CUDA. + */ + CUDA_ERROR_LAUNCH_FAILED = 700, + + /** + * This indicates that a launch did not occur because it did not have + * appropriate resources. This error usually indicates that the user has + * attempted to pass too many arguments to the device kernel, or the + * kernel launch specifies too many threads for the kernel's register culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * count. Passing arguments of the wrong size (i.e. a 64-bit pointer + * when a 32-bit int is expected) is equivalent to passing too many + * arguments and can also result in this error. + */ + CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, + + /** + * This indicates that the device kernel took too long to execute. This can + * only occur if timeouts are enabled - see the device attribute + * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * context cannot be used (and must be destroyed similar to + * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from + * this context are invalid and must be reconstructed if the program is to + * continue using CUDA. + */ + CUDA_ERROR_LAUNCH_TIMEOUT = 702, + + /** + * This error indicates a kernel launch that uses an incompatible texturing + * mode. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + */ + CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, + + /** + * This error indicates that a call to ::cuCtxEnablePeerAccess() is + * trying to re-enable peer access to a context which has already + * had peer access to it enabled. + */ + CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + /** + * This error indicates that ::cuCtxDisablePeerAccess() is + * trying to disable peer access which has not been enabled yet + * via ::cuCtxEnablePeerAccess(). + */ + CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, + + /** + * This error indicates that the primary context for the specified device + * has already been initialized. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + */ + CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, + + /** + * This error indicates that the context current to the calling thread + * has been destroyed using ::cuCtxDestroy, or is a primary context which + * has not yet been initialized. + */ + CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + /** + * A device-side assert triggered during kernel execution. The context + * cannot be used anymore, and must be destroyed. All existing device + * memory allocations from this context are invalid and must be + * reconstructed if the program is to continue using CUDA. + */ + CUDA_ERROR_ASSERT = 710, + + /** + * This error indicates that the hardware resources required to enable culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * peer access have been exhausted for one or more of the devices + * passed to ::cuCtxEnablePeerAccess(). + */ + CUDA_ERROR_TOO_MANY_PEERS = 711, + + /** + * This error indicates that the memory range passed to ::cuMemHostRegister() + * has already been registered. + */ + CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + /** + * This error indicates that the pointer passed to ::cuMemHostUnregister() + * does not correspond to any currently registered memory region. + */ + CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, + + /** + * This indicates that an unknown internal error has occurred. + */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CUDA_ERROR_UNKNOWN = 999 +} CUresult; + +/** + * If set, host memory is portable between CUDA contexts. + * Flag for ::cuMemHostAlloc() + */ +#define CU_MEMHOSTALLOC_PORTABLE 0x01 + +/** culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * If set, host memory is mapped into CUDA address space and + * ::cuMemHostGetDevicePointer() may be called on the host pointer. + * Flag for ::cuMemHostAlloc() + */ +#define CU_MEMHOSTALLOC_DEVICEMAP 0x02 + +/** + * If set, host memory is allocated as write-combined - fast to write, + * faster to DMA, slow to read except via SSE4 streaming load instruction + * (MOVNTDQA). culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * Flag for ::cuMemHostAlloc() + */ +#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 + +/** + * If set, host memory is portable between CUDA contexts. + * Flag for ::cuMemHostRegister() + */ +#define CU_MEMHOSTREGISTER_PORTABLE 0x01 + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +/** + * If set, host memory is mapped into CUDA address space and + * ::cuMemHostGetDevicePointer() may be called on the host pointer. + * Flag for ::cuMemHostRegister() + */ +#define CU_MEMHOSTREGISTER_DEVICEMAP 0x02 + +#if __CUDA_API_VERSION >= 3020 + +/** culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * 2D memory copy parameters + */ +typedef struct CUDA_MEMCPY2D_st { + size_t srcXInBytes; /**< Source X in bytes */ + size_t srcY; /**< Source Y */ + + CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ + const void *srcHost; /**< Source host pointer */ + CUdeviceptr srcDevice; /**< Source device pointer */ + CUarray srcArray; /**< Source array reference */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + size_t srcPitch; /**< Source pitch (ignored when src is array) */ + + size_t dstXInBytes; /**< Destination X in bytes */ + size_t dstY; /**< Destination Y */ + + CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ + void *dstHost; /**< Destination host pointer */ + CUdeviceptr dstDevice; /**< Destination device pointer */ + CUarray dstArray; /**< Destination array reference */ + size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + size_t WidthInBytes; /**< Width of 2D memory copy in bytes */ + size_t Height; /**< Height of 2D memory copy */ +} CUDA_MEMCPY2D; + +/** + * 3D memory copy parameters + */ +typedef struct CUDA_MEMCPY3D_st { + size_t srcXInBytes; /**< Source X in bytes */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + size_t srcY; /**< Source Y */ + size_t srcZ; /**< Source Z */ + size_t srcLOD; /**< Source LOD */ + CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ + const void *srcHost; /**< Source host pointer */ + CUdeviceptr srcDevice; /**< Source device pointer */ + CUarray srcArray; /**< Source array reference */ + void *reserved0; /**< Must be NULL */ + size_t srcPitch; /**< Source pitch (ignored when src is array) */ + size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + + size_t dstXInBytes; /**< Destination X in bytes */ + size_t dstY; /**< Destination Y */ + size_t dstZ; /**< Destination Z */ + size_t dstLOD; /**< Destination LOD */ + CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ + void *dstHost; /**< Destination host pointer */ + CUdeviceptr dstDevice; /**< Destination device pointer */ + CUarray dstArray; /**< Destination array reference */ + void *reserved1; /**< Must be NULL */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ + size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ + + size_t WidthInBytes; /**< Width of 3D memory copy in bytes */ + size_t Height; /**< Height of 3D memory copy */ + size_t Depth; /**< Depth of 3D memory copy */ +} CUDA_MEMCPY3D; + +/** + * 3D memory cross-context copy parameters culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + */ +typedef struct CUDA_MEMCPY3D_PEER_st { + size_t srcXInBytes; /**< Source X in bytes */ + size_t srcY; /**< Source Y */ + size_t srcZ; /**< Source Z */ + size_t srcLOD; /**< Source LOD */ + CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ + const void *srcHost; /**< Source host pointer */ + CUdeviceptr srcDevice; /**< Source device pointer */ + CUarray srcArray; /**< Source array reference */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CUcontext srcContext; /**< Source context (ignored with srcMemoryType is ::CU_MEMORYTYPE_ARRAY) */ + size_t srcPitch; /**< Source pitch (ignored when src is array) */ + size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ + + size_t dstXInBytes; /**< Destination X in bytes */ + size_t dstY; /**< Destination Y */ + size_t dstZ; /**< Destination Z */ + size_t dstLOD; /**< Destination LOD */ + CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ + void *dstHost; /**< Destination host pointer */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CUdeviceptr dstDevice; /**< Destination device pointer */ + CUarray dstArray; /**< Destination array reference */ + CUcontext dstContext; /**< Destination context (ignored with dstMemoryType is ::CU_MEMORYTYPE_ARRAY) */ + size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ + size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ + + size_t WidthInBytes; /**< Width of 3D memory copy in bytes */ + size_t Height; /**< Height of 3D memory copy */ + size_t Depth; /**< Depth of 3D memory copy */ +} CUDA_MEMCPY3D_PEER; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +/** + * Array descriptor + */ +typedef struct CUDA_ARRAY_DESCRIPTOR_st +{ + size_t Width; /**< Width of array */ + size_t Height; /**< Height of array */ + + CUarray_format Format; /**< Array format */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + unsigned int NumChannels; /**< Channels per array element */ +} CUDA_ARRAY_DESCRIPTOR; + +/** + * 3D array descriptor + */ +typedef struct CUDA_ARRAY3D_DESCRIPTOR_st +{ + size_t Width; /**< Width of 3D array */ + size_t Height; /**< Height of 3D array */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + size_t Depth; /**< Depth of 3D array */ + + CUarray_format Format; /**< Array format */ + unsigned int NumChannels; /**< Channels per array element */ + unsigned int Flags; /**< Flags */ +} CUDA_ARRAY3D_DESCRIPTOR; + +#endif /* __CUDA_API_VERSION >= 3020 */ + +/** culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * If set, the CUDA array is a collection of layers, where each layer is either a 1D + * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number + * of layers, not the depth of a 3D array. + */ +#define CUDA_ARRAY3D_LAYERED 0x01 + +/** + * Deprecated, use CUDA_ARRAY3D_LAYERED + */ +#define CUDA_ARRAY3D_2DARRAY 0x01 culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + +/** + * This flag must be set in order to bind a surface reference + * to the CUDA array + */ +#define CUDA_ARRAY3D_SURFACE_LDST 0x02 + +/** + * If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The + * width of such a CUDA array must be equal to its height, and Depth must be six. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps + * and Depth must be a multiple of six. + */ +#define CUDA_ARRAY3D_CUBEMAP 0x04 + +/** + * This flag must be set in order to perform texture gather operations + * on a CUDA array. + */ +#define CUDA_ARRAY3D_TEXTURE_GATHER 0x08 culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +/** + * Override the texref format with a format inferred from the array. + * Flag for ::cuTexRefSetArray() + */ +#define CU_TRSA_OVERRIDE_FORMAT 0x01 + +/** + * Read the texture as integers rather than promoting the values to floats + * in the range [0,1]. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * Flag for ::cuTexRefSetFlags() + */ +#define CU_TRSF_READ_AS_INTEGER 0x01 + +/** + * Use normalized texture coordinates in the range [0,1) instead of [0,dim). + * Flag for ::cuTexRefSetFlags() + */ +#define CU_TRSF_NORMALIZED_COORDINATES 0x02 + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +/** + * Perform sRGB->linear conversion during texture read. + * Flag for ::cuTexRefSetFlags() + */ +#define CU_TRSF_SRGB 0x10 + +/** + * End of array terminator for the \p extra parameter to + * ::cuLaunchKernel + */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +#define CU_LAUNCH_PARAM_END ((void*)0x00) + +/** + * Indicator that the next value in the \p extra parameter to + * ::cuLaunchKernel will be a pointer to a buffer containing all kernel + * parameters used for launching kernel \p f. This buffer needs to + * honor all alignment/padding requirements of the individual parameters. + * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the + * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no + * effect. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + */ +#define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) + +/** + * Indicator that the next value in the \p extra parameter to + * ::cuLaunchKernel will be a pointer to a size_t which contains the + * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER. + * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified + * in the \p extra array if the value associated with + * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + */ +#define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) + +/** + * For texture references loaded into the module, use default texunit from + * texture reference. + */ +#define CU_PARAM_TR_DEFAULT -1 + +/** @} */ /* END CUDA_TYPES */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +#ifdef _WIN32 +#define CUDAAPI __stdcall +#else +#define CUDAAPI +#endif + +/** + * \defgroup CUDA_INITIALIZE Initialization + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * This section describes the initialization functions of the low-level CUDA + * driver application programming interface. + * + * @{ + */ + +/** + * \brief Initialize the CUDA driver API + * + * Initializes the driver API and must be called before any other function from culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * the driver API. Currently, the \p Flags parameter must be 0. If ::cuInit() + * has not been called, any function from the driver API will return + * ::CUDA_ERROR_NOT_INITIALIZED. + * + * \param Flags - Initialization flag for CUDA. + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \notefnerr + */ +CUresult CUDAAPI cuInit(unsigned int Flags); + +/** @} */ /* END CUDA_INITIALIZE */ + +/** + * \defgroup CUDA_VERSION Version Management + * + * This section describes the version management functions of the low-level culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * CUDA driver application programming interface. + * + * @{ + */ + +/** + * \brief Returns the CUDA driver version + * + * Returns in \p *driverVersion the version number of the installed CUDA + * driver. This function automatically returns ::CUDA_ERROR_INVALID_VALUE if culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * the \p driverVersion argument is NULL. + * + * \param driverVersion - Returns the CUDA driver version + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + */ +CUresult CUDAAPI cuDriverGetVersion(int *driverVersion); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +/** @} */ /* END CUDA_VERSION */ + +/** + * \defgroup CUDA_DEVICE Device Management + * + * This section describes the device management functions of the low-level + * CUDA driver application programming interface. + * + * @{ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + */ + +/** + * \brief Returns a handle to a compute device + * + * Returns in \p *device a device handle given an ordinal in the range [0, + * ::cuDeviceGetCount()-1]. + * + * \param device - Returned device handle + * \param ordinal - Device number to get handle for culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE + * \notefnerr + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \sa ::cuDeviceComputeCapability, + * ::cuDeviceGetAttribute, + * ::cuDeviceGetCount, + * ::cuDeviceGetName, + * ::cuDeviceGetProperties, + * ::cuDeviceTotalMem + */ +CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal); + +/** culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \brief Returns the number of compute-capable devices + * + * Returns in \p *count the number of devices with compute capability greater + * than or equal to 1.0 that are available for execution. If there is no such + * device, ::cuDeviceGetCount() returns 0. + * + * \param count - Returned number of compute-capable devices + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuDeviceComputeCapability, + * ::cuDeviceGetAttribute, + * ::cuDeviceGetName, + * ::cuDeviceGet, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuDeviceGetProperties, + * ::cuDeviceTotalMem + */ +CUresult CUDAAPI cuDeviceGetCount(int *count); + +/** + * \brief Returns an identifer string for the device + * + * Returns an ASCII string identifying the device \p dev in the NULL-terminated + * string pointed to by \p name. \p len specifies the maximum length of the culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * string that may be returned. + * + * \param name - Returned identifier string for the device + * \param len - Maximum length of string to store in \p name + * \param dev - Device to get identifier string for + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE + * \notefnerr + * + * \sa ::cuDeviceComputeCapability, + * ::cuDeviceGetAttribute, + * ::cuDeviceGetCount, + * ::cuDeviceGet, + * ::cuDeviceGetProperties, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuDeviceTotalMem + */ +CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev); + +/** + * \brief Returns the compute capability of the device + * + * Returns in \p *major and \p *minor the major and minor revision numbers that + * define the compute capability of the device \p dev. + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param major - Major revision number + * \param minor - Minor revision number + * \param dev - Device handle + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_DEVICE + * \notefnerr + * + * \sa + * ::cuDeviceGetAttribute, + * ::cuDeviceGetCount, + * ::cuDeviceGetName, + * ::cuDeviceGet, + * ::cuDeviceGetProperties, + * ::cuDeviceTotalMem culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice dev); + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Returns the total amount of memory on the device + * + * Returns in \p *bytes the total amount of memory available on the device + * \p dev in bytes. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \param bytes - Returned memory available on device in bytes + * \param dev - Device handle + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuDeviceComputeCapability, + * ::cuDeviceGetAttribute, + * ::cuDeviceGetCount, + * ::cuDeviceGetName, + * ::cuDeviceGet, + * ::cuDeviceGetProperties, + */ +CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +#endif /* __CUDA_API_VERSION >= 3020 */ + +/** + * \brief Returns properties for a selected device + * + * Returns in \p *prop the properties of device \p dev. The ::CUdevprop + * structure is defined as: + * + * \code + typedef struct CUdevprop_st { culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + int maxThreadsPerBlock; + int maxThreadsDim[3]; + int maxGridSize[3]; + int sharedMemPerBlock; + int totalConstantMemory; + int SIMDWidth; + int memPitch; + int regsPerBlock; + int clockRate; + int textureAlign culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + } CUdevprop; + * \endcode + * where: + * + * - ::maxThreadsPerBlock is the maximum number of threads per block; + * - ::maxThreadsDim[3] is the maximum sizes of each dimension of a block; + * - ::maxGridSize[3] is the maximum sizes of each dimension of a grid; + * - ::sharedMemPerBlock is the total amount of shared memory available per + * block in bytes; + * - ::totalConstantMemory is the total amount of constant memory available on culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * the device in bytes; + * - ::SIMDWidth is the warp size; + * - ::memPitch is the maximum pitch allowed by the memory copy functions that + * involve memory regions allocated through ::cuMemAllocPitch(); + * - ::regsPerBlock is the total number of registers available per block; + * - ::clockRate is the clock frequency in kilohertz; + * - ::textureAlign is the alignment requirement; texture base addresses that + * are aligned to ::textureAlign bytes do not need an offset applied to + * texture fetches. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \param prop - Returned properties of device + * \param dev - Device to get properties for + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuDeviceComputeCapability, + * ::cuDeviceGetAttribute, + * ::cuDeviceGetCount, + * ::cuDeviceGetName, + * ::cuDeviceGet, + * ::cuDeviceTotalMem + */ +CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +/** + * \brief Returns information about the device + * + * Returns in \p *pi the integer value of the attribute \p attrib on device + * \p dev. The supported attributes are: + * - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads per + * block; + * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: Maximum x-dimension of a block; + * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: Maximum y-dimension of a block; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: Maximum z-dimension of a block; + * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: Maximum x-dimension of a grid; + * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: Maximum y-dimension of a grid; + * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: Maximum z-dimension of a grid; + * - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of + * shared memory available to a thread block in bytes; this amount is shared + * by all thread blocks simultaneously resident on a multiprocessor; + * - ::CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on device for + * __constant__ variables in a CUDA C kernel in bytes; + * - ::CU_DEVICE_ATTRIBUTE_WARP_SIZE: Warp size in threads; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_MAX_PITCH: Maximum pitch in bytes allowed by the + * memory copy functions that involve memory regions allocated through + * ::cuMemAllocPitch(); + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: Maximum 1D + * texture width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: Maximum width + * for a 1D texture bound to linear memory; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: Maximum 2D + * texture width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: Maximum 2D culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * texture height; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: Maximum width + * for a 2D texture bound to linear memory; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: Maximum height + * for a 2D texture bound to linear memory; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: Maximum pitch + * in bytes for a 2D texture bound to linear memory; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: Maximum 3D + * texture width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: Maximum 3D culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * texture height; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: Maximum 3D + * texture depth; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: + * Alternate maximum 3D texture width, 0 if no alternate + * maximum 3D texture size is supported; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: + * Alternate maximum 3D texture height, 0 if no alternate + * maximum 3D texture size is supported; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Alternate maximum 3D texture depth, 0 if no alternate + * maximum 3D texture size is supported; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: + * Maximum cubemap texture width or height; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: + * Maximum 1D layered texture width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: + * Maximum layers in a 1D layered texture; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: + * Maximum 2D layered texture width; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: + * Maximum 2D layered texture height; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: + * Maximum layers in a 2D layered texture; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: + * Maximum cubemap layered texture width or height; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: + * Maximum layers in a cubemap layered texture; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: + * Maximum 1D surface width; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: + * Maximum 2D surface width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: + * Maximum 2D surface height; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: + * Maximum 3D surface width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: + * Maximum 3D surface height; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: + * Maximum 3D surface depth; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: + * Maximum 1D layered surface width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: + * Maximum layers in a 1D layered surface; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: + * Maximum 2D layered surface width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: + * Maximum 2D layered surface height; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: + * Maximum layers in a 2D layered surface; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: + * Maximum cubemap surface width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: + * Maximum cubemap layered surface width; + * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: + * Maximum layers in a cubemap layered surface; + * - ::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bit + * registers available to a thread block; this number is shared by all thread + * blocks simultaneously resident on a multiprocessor; + * - ::CU_DEVICE_ATTRIBUTE_CLOCK_RATE: Peak clock frequency in kilohertz; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: Alignment requirement; texture + * base addresses aligned to ::textureAlign bytes do not need an offset + * applied to texture fetches; + * - ::CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: Pitch alignment requirement + * for 2D texture references bound to pitched memory; + * - ::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: 1 if the device can concurrently copy + * memory between host and device while executing a kernel, or 0 if not; + * - ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors on + * the device; + * - ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: 1 if there is a run time limit culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * for kernels executed on the device, or 0 if not; + * - ::CU_DEVICE_ATTRIBUTE_INTEGRATED: 1 if the device is integrated with the + * memory subsystem, or 0 if not; + * - ::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: 1 if the device can map host + * memory into the CUDA address space, or 0 if not; + * - ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: Compute mode that device is currently + * in. Available modes are as follows: + * - ::CU_COMPUTEMODE_DEFAULT: Default mode - Device is not restricted and + * can have multiple CUDA contexts present at a single time. + * - ::CU_COMPUTEMODE_EXCLUSIVE: Compute-exclusive mode - Device can have culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * only one CUDA context present on it at a time. + * - ::CU_COMPUTEMODE_PROHIBITED: Compute-prohibited mode - Device is + * prohibited from creating new CUDA contexts. + * - ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS: Compute-exclusive-process mode - Device + * can have only one context used by a single process at a time. + * - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: 1 if the device supports + * executing multiple kernels within the same context simultaneously, or 0 if + * not. It is not guaranteed that multiple kernels will be resident + * on the device concurrently so this feature should not be relied upon for + * correctness; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_ECC_ENABLED: 1 if error correction is enabled on the + * device, 0 if error correction is disabled or not supported by the device; + * - ::CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: PCI bus identifier of the device; + * - ::CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: PCI device (also known as slot) identifier + * of the device; + * - ::CU_DEVICE_ATTRIBUTE_TCC_DRIVER: 1 if the device is using a TCC driver. TCC + * is only available on Tesla hardware running Windows Vista or later; + * - ::CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE: Peak memory clock frequency in kilohertz; + * - ::CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: Global memory bus width in bits; + * - ::CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE: Size of L2 cache in bytes. 0 if the device doesn't have L2 cache; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: Maximum resident threads per multiprocessor; + * - ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING: 1 if the device shares a unified address space with + * the host, or 0 if not; + * + * \param pi - Returned device attribute value + * \param attrib - Device attribute to query + * \param dev - Device handle + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE + * \notefnerr + * + * \sa ::cuDeviceComputeCapability, + * ::cuDeviceGetCount, + * ::cuDeviceGetName, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuDeviceGet, + * ::cuDeviceGetProperties, + * ::cuDeviceTotalMem + */ +CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev); + +/** @} */ /* END CUDA_DEVICE */ + + +/** culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \defgroup CUDA_CTX Context Management + * + * This section describes the context management functions of the low-level + * CUDA driver application programming interface. + * + * @{ + */ + +#if __CUDA_API_VERSION >= 3020 +/** culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \brief Create a CUDA context + * + * Creates a new CUDA context and associates it with the calling thread. The + * \p flags parameter is described below. The context is created with a usage + * count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy() or + * when done using the context. If a context is already current to the thread, + * it is supplanted by the newly created context and may be restored by a subsequent + * call to ::cuCtxPopCurrent(). + * + * The three LSBs of the \p flags parameter can be used to control how the OS culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * thread, which owns the CUDA context at the time of an API call, interacts + * with the OS scheduler when waiting for results from the GPU. Only one of + * the scheduling flags can be set when creating a context. + * + * - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero, + * uses a heuristic based on the number of active CUDA contexts in the + * process \e C and the number of logical processors in the system \e P. If + * \e C > \e P, then CUDA will yield to other OS threads when waiting for + * the GPU, otherwise CUDA will not yield while waiting for results and + * actively spin on the processor. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for + * results from the GPU. This can decrease latency when waiting for the GPU, + * but may lower the performance of CPU threads if they are performing work in + * parallel with the CUDA thread. + * + * - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for + * results from the GPU. This can increase latency when waiting for the GPU, + * but can increase the performance of CPU threads performing work in parallel + * with the GPU. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + * synchronization primitive when waiting for the GPU to finish work. + * + * - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + * synchronization primitive when waiting for the GPU to finish work.
+ * Deprecated: This flag was deprecated as of CUDA 4.0 and was + * replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. + * + * - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * This flag must be set in order to allocate pinned host memory that is + * accessible to the GPU. + * + * - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory + * after resizing local memory for a kernel. This can prevent thrashing by + * local memory allocations when launching many kernels with high local + * memory usage at the cost of potentially increased memory usage. + * + * Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of + * the device is ::CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * also fail with ::CUDA_ERROR_UNKNOWN if the compute mode for the device is + * set to ::CU_COMPUTEMODE_EXCLUSIVE and there is already an active context on + * the device. The function ::cuDeviceGetAttribute() can be used with + * ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the + * device. The nvidia-smi tool can be used to set the compute mode for + * devices. Documentation for nvidia-smi can be obtained by passing a + * -h option to it. + * + * \param pctx - Returned context handle of the new context + * \param flags - Context creation flags culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param dev - Device to create context on + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_DEVICE, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_UNKNOWN + * \notefnerr + * + * \sa ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuCtxSetCacheConfig, + * ::cuCtxSetLimit, + * ::cuCtxSynchronize + */ +CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev); +#endif /* __CUDA_API_VERSION >= 3020 */ + +#if __CUDA_API_VERSION >= 4000 +/** + * \brief Destroy a CUDA context culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * Destroys the CUDA context specified by \p ctx. The context \p ctx will be + * destroyed regardless of how many threads it is current to. + * It is the responsibility of the calling function to ensure that no API + * call issues using \p ctx while ::cuCtxDestroy() is executing. + * + * If \p ctx is current to the calling thread then \p ctx will also be + * popped from the current thread's context stack (as though ::cuCtxPopCurrent() + * were called). If \p ctx is current to other threads, then \p ctx will + * remain current to those threads, and attempting to access \p ctx from culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * those threads will result in the error ::CUDA_ERROR_CONTEXT_IS_DESTROYED. + * + * \param ctx - Context to destroy + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, + * ::cuCtxSetCacheConfig, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuCtxSetLimit, + * ::cuCtxSynchronize + */ +CUresult CUDAAPI cuCtxDestroy(CUcontext ctx); +#endif /* __CUDA_API_VERSION >= 4000 */ + +/** + * \defgroup CUDA_CTX_DEPRECATED Context Management [DEPRECATED] + * + * This section describes the deprecated context management functions of the low-level culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * CUDA driver application programming interface. + * + * @{ + */ + +/** + * \brief Increment a context's usage-count + * + * \deprecated + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * Note that this function is deprecated and should not be used. + * + * Increments the usage count of the context and passes back a context handle + * in \p *pctx that must be passed to ::cuCtxDetach() when the application is + * done with the context. ::cuCtxAttach() fails if there is no context current + * to the thread. + * + * Currently, the \p flags parameter must be 0. + * + * \param pctx - Returned context handle of the current context culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \param flags - Context attach flags (must be 0) + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxDetach, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, + * ::cuCtxSetCacheConfig, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuCtxSetLimit, + * ::cuCtxSynchronize + */ +CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags); + +/** + * \brief Decrement a context's usage-count + * + * \deprecated + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * Note that this function is deprecated and should not be used. + * + * Decrements the usage count of the context \p ctx, and destroys the context + * if the usage count goes to 0. The context must be a handle that was passed + * back by ::cuCtxCreate() or ::cuCtxAttach(), and must be current to the + * calling thread. + * + * \param ctx - Context to destroy + * + * \return culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, + * ::cuCtxSetCacheConfig, + * ::cuCtxSetLimit, + * ::cuCtxSynchronize + */ +CUresult CUDAAPI cuCtxDetach(CUcontext ctx); + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** @} */ /* END CUDA_CTX_DEPRECATED */ + +#if __CUDA_API_VERSION >= 4000 +/** + * \brief Pushes a context on the current CPU thread + * + * Pushes the given context \p ctx onto the CPU thread's stack of current + * contexts. The specified context becomes the CPU thread's current context, so + * all CUDA functions that operate on the current context are affected. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * The previous current context may be made current again by calling + * ::cuCtxDestroy() or ::cuCtxPopCurrent(). + * + * \param ctx - Context to push + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuCtxSetCacheConfig, + * ::cuCtxSetLimit, + * ::cuCtxSynchronize + */ +CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx); + +/** + * \brief Pops the current CUDA context from the current CPU thread. + * + * Pops the current CUDA context from the CPU thread and passes back the culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * old context handle in \p *pctx. That context may then be made current + * to a different CPU thread by calling ::cuCtxPushCurrent(). + * + * If a context was current to the CPU thread before ::cuCtxCreate() or + * ::cuCtxPushCurrent() was called, this function makes that context current to + * the CPU thread again. + * + * \param pctx - Returned new context handle + * + * \return culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPushCurrent, + * ::cuCtxSetCacheConfig, + * ::cuCtxSetLimit, + * ::cuCtxSynchronize + */ +CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx); + +/** culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \brief Binds the specified CUDA context to the calling CPU thread + * + * Binds the specified CUDA context to the calling CPU thread. + * If \p ctx is NULL then the CUDA context previously bound to the + * calling CPU thread is unbound and ::CUDA_SUCCESS is returned. + * + * If there exists a CUDA context stack on the calling CPU thread, this + * will replace the top of that stack with \p ctx. + * If \p ctx is NULL then this will be equivalent to popping the top + * of the calling CPU thread's CUDA context stack (or a no-op if the culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * calling CPU thread's CUDA context stack is empty). + * + * \param ctx - Context to bind to the calling CPU thread + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT + * \notefnerr culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \sa ::cuCtxGetCurrent, ::cuCtxCreate, ::cuCtxDestroy + */ +CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx); + +/** + * \brief Returns the CUDA context bound to the calling CPU thread. + * + * Returns in \p *pctx the CUDA context bound to the calling CPU thread. + * If no context is bound to the calling CPU thread then \p *pctx is culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * set to NULL and ::CUDA_SUCCESS is returned. + * + * \param pctx - Returned context handle + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * \notefnerr + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \sa ::cuCtxSetCurrent, ::cuCtxCreate, ::cuCtxDestroy + */ +CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx); +#endif /* __CUDA_API_VERSION >= 4000 */ + +/** + * \brief Returns the device ID for the current context + * + * Returns in \p *device the ordinal of the current context's device. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \param device - Returned device ID for the current context + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * \notefnerr + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, + * ::cuCtxSetCacheConfig, + * ::cuCtxSetLimit, + * ::cuCtxSynchronize culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuCtxGetDevice(CUdevice *device); + +/** + * \brief Block for a context's tasks to complete + * + * Blocks until the device has completed all preceding requested tasks. + * ::cuCtxSynchronize() returns an error if one of the preceding tasks failed. + * If the context was created with the ::CU_CTX_SCHED_BLOCKING_SYNC flag, the + * CPU thread will block until the GPU context has finished its work. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent + * ::cuCtxSetCacheConfig, + * ::cuCtxSetLimit + */ +CUresult CUDAAPI cuCtxSynchronize(void); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +/** + * \brief Set resource limits + * + * Setting \p limit to \p value is a request by the application to update + * the current limit maintained by the context. The driver is free to + * modify the requested value to meet h/w requirements (this could be + * clamping to minimum or maximum values, rounding up to nearest element + * size, etc). The application can use ::cuCtxGetLimit() to find out exactly + * what the limit has been set to. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * Setting each ::CUlimit has its own specific restrictions, so each is + * discussed here. + * + * - ::CU_LIMIT_STACK_SIZE controls the stack size of each GPU thread. + * This limit is only applicable to devices of compute capability + * 2.0 and higher. Attempting to set this limit on devices of + * compute capability less than 2.0 will result in the error + * ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - ::CU_LIMIT_PRINTF_FIFO_SIZE controls the size of the FIFO used + * by the ::printf() device system call. Setting + * ::CU_LIMIT_PRINTF_FIFO_SIZE must be performed before launching any + * kernel that uses the ::printf() device system call, otherwise + * ::CUDA_ERROR_INVALID_VALUE will be returned. + * This limit is only applicable to devices of compute capability + * 2.0 and higher. Attempting to set this limit on devices of + * compute capability less than 2.0 will result in the error + * ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned. + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * - ::CU_LIMIT_MALLOC_HEAP_SIZE controls the size of the heap used + * by the ::malloc() and ::free() device system calls. Setting + * ::CU_LIMIT_MALLOC_HEAP_SIZE must be performed before launching + * any kernel that uses the ::malloc() or ::free() device system calls, + * otherwise ::CUDA_ERROR_INVALID_VALUE will be returned. + * This limit is only applicable to devices of compute capability + * 2.0 and higher. Attempting to set this limit on devices of + * compute capability less than 2.0 will result in the error + * ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \param limit - Limit to set + * \param value - Size in bytes of limit + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_UNSUPPORTED_LIMIT + * \notefnerr + * + * \sa ::cuCtxCreate, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, + * ::cuCtxSetCacheConfig, + * ::cuCtxSynchronize + */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value); + +/** + * \brief Returns resource limits + * + * Returns in \p *pvalue the current size of \p limit. The supported + * ::CUlimit values are: + * - ::CU_LIMIT_STACK_SIZE: stack size of each GPU thread; + * - ::CU_LIMIT_PRINTF_FIFO_SIZE: size of the FIFO used by the + * ::printf() device system call. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - ::CU_LIMIT_MALLOC_HEAP_SIZE: size of the heap used by the + * ::malloc() and ::free() device system calls; + * + * \param limit - Limit to query + * \param pvalue - Returned size in bytes of limit + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_UNSUPPORTED_LIMIT culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, + * ::cuCtxSetCacheConfig, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuCtxSetLimit, + * ::cuCtxSynchronize + */ +CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit); + +/** + * \brief Returns the preferred cache configuration for the current context. + * + * On devices where the L1 cache and shared memory use the same hardware + * resources, this function returns through \p pconfig the preferred cache configuration culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * for the current context. This is only a preference. The driver will use + * the requested configuration if possible, but it is free to choose a different + * configuration if required to execute functions. + * + * This will return a \p pconfig of ::CU_FUNC_CACHE_PREFER_NONE on devices + * where the size of the L1 cache and shared memory are fixed. + * + * The supported cache configurations are: + * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) + * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory + * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory + * + * \param pconfig - Returned cache configuration + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuCtxSetCacheConfig, + * ::cuCtxSetLimit, + * ::cuCtxSynchronize, + * ::cuFuncSetCacheConfig + */ +CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig); + +/** + * \brief Sets the preferred cache configuration for the current context. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * On devices where the L1 cache and shared memory use the same hardware + * resources, this sets through \p config the preferred cache configuration for + * the current context. This is only a preference. The driver will use + * the requested configuration if possible, but it is free to choose a different + * configuration if required to execute the function. Any function preference + * set via ::cuFuncSetCacheConfig() will be preferred over this context-wide + * setting. Setting the context-wide cache configuration to + * ::CU_FUNC_CACHE_PREFER_NONE will cause subsequent kernel launches to prefer + * to not change the cache configuration unless required to launch the kernel. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * This setting does nothing on devices where the size of the L1 cache and + * shared memory are fixed. + * + * Launching a kernel with a different preference than the most recent + * preference setting may insert a device-side synchronization point. + * + * The supported cache configurations are: + * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) + * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache + * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory + * + * \param config - Requested cache configuration + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuCtxSetLimit, + * ::cuCtxSynchronize, + * ::cuFuncSetCacheConfig + */ +CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config); + +#if __CUDA_API_VERSION >= 4020 +/** + * \brief Returns the current shared memory configuration for the current context. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * This function will return in \p pConfig the current size of shared memory banks + * in the current context. On devices with configurable shared memory banks, + * ::cuCtxSetSharedMemConfig can be used to change this setting, so that all + * subsequent kernel launches will by default use the new bank size. When + * ::cuCtxGetSharedMemConfig is called on devices without configurable shared + * memory, it will return the fixed bank size of the hardware. + * + * The returned bank configurations can be either: + * - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: shared memory bank width is + * four bytes. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: shared memory bank width will + * eight bytes. + * + * \param pConfig - returned shared memory configuration + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuCtxSetLimit, + * ::cuCtxSynchronize, + * ::cuCtxGetSharedMemConfig, + * ::cuFuncSetCacheConfig, + */ +CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig); + +/** + * \brief Sets the shared memory configuration for the current context. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * On devices with configurable shared memory banks, this function will set + * the context's shared memory bank size which is used for subsequent kernel + * launches. + * + * Changed the shared memory configuration between launches may insert a device + * side synchronization point between those launches. + * + * Changing the shared memory bank size will not increase shared memory usage + * or affect occupancy of kernels, but may have major effects on performance. + * Larger bank sizes will allow for greater potential bandwidth to shared memory, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * but will change what kinds of accesses to shared memory will result in bank + * conflicts. + * + * This function will do nothing on devices with fixed shared memory bank size. + * + * The supported bank configurations are: + * - ::CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: set bank width to the default initial + * setting (currently, four bytes). + * - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width to + * be natively four bytes. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank width to + * be natively eight bytes. + * + * \param config - requested shared memory configuration + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetApiVersion, + * ::cuCtxGetCacheConfig, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuCtxPushCurrent, + * ::cuCtxSetLimit, + * ::cuCtxSynchronize, + * ::cuCtxGetSharedMemConfig, + * ::cuFuncSetCacheConfig, + */ +CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config); +#endif + +/** culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \brief Gets the context's API version. + * + * Returns a version number in \p version corresponding to the capabilities of + * the context (e.g. 3010 or 3020), which library developers can use to direct + * callers to a specific API version. If \p ctx is NULL, returns the API version + * used to create the currently bound context. + * + * Note that new API versions are only introduced when context capabilities are + * changed that break binary compatibility, so the API version and driver version + * may be different. For example, it is valid for the API version to be 3020 while culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * the driver version is 4010. + * + * \param ctx - Context to check + * \param version - Pointer to version + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_UNKNOWN + * \notefnerr + * + * \sa ::cuCtxCreate, + * ::cuCtxDestroy, + * ::cuCtxGetDevice, + * ::cuCtxGetLimit, + * ::cuCtxPopCurrent, + * ::cuCtxPushCurrent, + * ::cuCtxSetCacheConfig, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuCtxSetLimit, + * ::cuCtxSynchronize + */ +CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version); + +/** @} */ /* END CUDA_CTX */ + + +/** + * \defgroup CUDA_MODULE Module Management culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * This section describes the module management functions of the low-level CUDA + * driver application programming interface. + * + * @{ + */ + +/** + * \brief Loads a compute module + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * Takes a filename \p fname and loads the corresponding module \p module into + * the current context. The CUDA driver API does not attempt to lazily + * allocate the resources needed by a module; if the memory for functions and + * data (constant and global) needed by the module cannot be allocated, + * ::cuModuleLoad() fails. The file should be a \e cubin file as output by + * \b nvcc, or a \e PTX file either as output by \b nvcc or handwritten, or + * a \e fatbin file as output by \b nvcc from toolchain 4.0 or later. + * + * \param module - Returned module + * \param fname - Filename of module to load culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_NOT_FOUND, + * ::CUDA_ERROR_OUT_OF_MEMORY, + * ::CUDA_ERROR_FILE_NOT_FOUND, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + * \notefnerr + * + * \sa ::cuModuleGetFunction, + * ::cuModuleGetGlobal, + * ::cuModuleGetTexRef, + * ::cuModuleLoadData, + * ::cuModuleLoadDataEx, + * ::cuModuleLoadFatBinary, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuModuleUnload + */ +CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname); + +/** + * \brief Load a module's data + * + * Takes a pointer \p image and loads the corresponding module \p module into + * the current context. The pointer may be obtained by mapping a \e cubin or + * \e PTX or \e fatbin file, passing a \e cubin or \e PTX or \e fatbin file culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * as a NULL-terminated text string, or incorporating a \e cubin or \e fatbin + * object into the executable resources and using operating system calls such + * as Windows \c FindResource() to obtain the pointer. + * + * \param module - Returned module + * \param image - Module data to load + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY, + * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + * \notefnerr + * + * \sa ::cuModuleGetFunction, + * ::cuModuleGetGlobal, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuModuleGetTexRef, + * ::cuModuleLoad, + * ::cuModuleLoadDataEx, + * ::cuModuleLoadFatBinary, + * ::cuModuleUnload + */ +CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image); + +/** + * \brief Load a module's data with options culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * Takes a pointer \p image and loads the corresponding module \p module into + * the current context. The pointer may be obtained by mapping a \e cubin or + * \e PTX or \e fatbin file, passing a \e cubin or \e PTX or \e fatbin file + * as a NULL-terminated text string, or incorporating a \e cubin or \e fatbin + * object into the executable resources and using operating system calls such + * as Windows \c FindResource() to obtain the pointer. Options are passed as + * an array via \p options and any corresponding parameters are passed in + * \p optionValues. The number of total options is supplied via \p numOptions. + * Any outputs will be returned via \p optionValues. Supported options are culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * (types for the option values are specified in parentheses after the option + * name): + * + * - ::CU_JIT_MAX_REGISTERS: (unsigned int) input specifies the maximum number + * of registers per thread; + * - ::CU_JIT_THREADS_PER_BLOCK: (unsigned int) input specifies number of + * threads per block to target compilation for; output returns the number of + * threads the compiler actually targeted; + * - ::CU_JIT_WALL_TIME: (float) output returns the float value of wall clock + * time, in milliseconds, spent compiling the \e PTX code; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * - ::CU_JIT_INFO_LOG_BUFFER: (char*) input is a pointer to a buffer in + * which to print any informational log messages from \e PTX assembly (the + * buffer size is specified via option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES); + * - ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: (unsigned int) input is the size in + * bytes of the buffer; output is the number of bytes filled with messages; + * - ::CU_JIT_ERROR_LOG_BUFFER: (char*) input is a pointer to a buffer in + * which to print any error log messages from \e PTX assembly (the buffer size + * is specified via option ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES); + * - ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: (unsigned int) input is the size in + * bytes of the buffer; output is the number of bytes filled with messages; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * - ::CU_JIT_OPTIMIZATION_LEVEL: (unsigned int) input is the level of + * optimization to apply to generated code (0 - 4), with 4 being the default + * and highest level; + * - ::CU_JIT_TARGET_FROM_CUCONTEXT: (No option value) causes compilation + * target to be determined based on current attached context (default); + * - ::CU_JIT_TARGET: (unsigned int for enumerated type ::CUjit_target_enum) + * input is the compilation target based on supplied ::CUjit_target_enum; + * possible values are: + * - ::CU_TARGET_COMPUTE_10 + * - ::CU_TARGET_COMPUTE_11 culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * - ::CU_TARGET_COMPUTE_12 + * - ::CU_TARGET_COMPUTE_13 + * - ::CU_TARGET_COMPUTE_20 + * - ::CU_JIT_FALLBACK_STRATEGY: (unsigned int for enumerated type + * ::CUjit_fallback_enum) chooses fallback strategy if matching cubin is not + * found; possible values are: + * - ::CU_PREFER_PTX + * - ::CU_PREFER_BINARY + * + * \param module - Returned module culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \param image - Module data to load + * \param numOptions - Number of options + * \param options - Options for JIT + * \param optionValues - Option values for JIT + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY, + * ::CUDA_ERROR_NO_BINARY_FOR_GPU, + * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + * \notefnerr + * + * \sa ::cuModuleGetFunction, + * ::cuModuleGetGlobal, + * ::cuModuleGetTexRef, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuModuleLoad, + * ::cuModuleLoadData, + * ::cuModuleLoadFatBinary, + * ::cuModuleUnload + */ +CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues); + +/** + * \brief Load a module's data + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * Takes a pointer \p fatCubin and loads the corresponding module \p module + * into the current context. The pointer represents a fat binary object, + * which is a collection of different \e cubin and/or \e PTX files, all + * representing the same device code, but compiled and optimized for different + * architectures. + * + * Prior to CUDA 4.0, there was no documented API for constructing and using + * fat binary objects by programmers. Starting with CUDA 4.0, fat binary + * objects can be constructed by providing the -fatbin option to \b nvcc. + * More information can be found in the \b nvcc document. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * \param module - Returned module + * \param fatCubin - Fat binary to load + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_FOUND, + * ::CUDA_ERROR_OUT_OF_MEMORY, + * ::CUDA_ERROR_NO_BINARY_FOR_GPU, + * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + * \notefnerr + * + * \sa ::cuModuleGetFunction, + * ::cuModuleGetGlobal, + * ::cuModuleGetTexRef, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuModuleLoad, + * ::cuModuleLoadData, + * ::cuModuleLoadDataEx, + * ::cuModuleUnload + */ +CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin); + +/** + * \brief Unloads a module + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Unloads a module \p hmod from the current context. + * + * \param hmod - Module to unload + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuModuleGetFunction, + * ::cuModuleGetGlobal, + * ::cuModuleGetTexRef, + * ::cuModuleLoad, + * ::cuModuleLoadData, + * ::cuModuleLoadDataEx, + * ::cuModuleLoadFatBinary + */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +CUresult CUDAAPI cuModuleUnload(CUmodule hmod); + +/** + * \brief Returns a function handle + * + * Returns in \p *hfunc the handle of the function of name \p name located in + * module \p hmod. If no function of that name exists, ::cuModuleGetFunction() + * returns ::CUDA_ERROR_NOT_FOUND. + * + * \param hfunc - Returned function handle culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \param hmod - Module to retrieve function from + * \param name - Name of function to retrieve + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_NOT_FOUND culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuModuleGetGlobal, + * ::cuModuleGetTexRef, + * ::cuModuleLoad, + * ::cuModuleLoadData, + * ::cuModuleLoadDataEx, + * ::cuModuleLoadFatBinary, + * ::cuModuleUnload + */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name); + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Returns a global pointer from a module + * + * Returns in \p *dptr and \p *bytes the base pointer and size of the + * global of name \p name located in module \p hmod. If no variable of that name + * exists, ::cuModuleGetGlobal() returns ::CUDA_ERROR_NOT_FOUND. Both + * parameters \p dptr and \p bytes are optional. If one of them is culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * NULL, it is ignored. + * + * \param dptr - Returned global device pointer + * \param bytes - Returned global size in bytes + * \param hmod - Module to retrieve global from + * \param name - Name of global to retrieve + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_NOT_FOUND + * \notefnerr + * + * \sa ::cuModuleGetFunction, + * ::cuModuleGetTexRef, + * ::cuModuleLoad, + * ::cuModuleLoadData, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuModuleLoadDataEx, + * ::cuModuleLoadFatBinary, + * ::cuModuleUnload + */ +CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name); +#endif /* __CUDA_API_VERSION >= 3020 */ + +/** + * \brief Returns a handle to a texture reference + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * Returns in \p *pTexRef the handle of the texture reference of name \p name + * in the module \p hmod. If no texture reference of that name exists, + * ::cuModuleGetTexRef() returns ::CUDA_ERROR_NOT_FOUND. This texture reference + * handle should not be destroyed, since it will be destroyed when the module + * is unloaded. + * + * \param pTexRef - Returned texture reference + * \param hmod - Module to retrieve texture reference from + * \param name - Name of texture reference to retrieve + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_NOT_FOUND + * \notefnerr + * + * \sa ::cuModuleGetFunction, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuModuleGetGlobal, + * ::cuModuleGetSurfRef, + * ::cuModuleLoad, + * ::cuModuleLoadData, + * ::cuModuleLoadDataEx, + * ::cuModuleLoadFatBinary, + * ::cuModuleUnload + */ +CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name); + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +/** + * \brief Returns a handle to a surface reference + * + * Returns in \p *pSurfRef the handle of the surface reference of name \p name + * in the module \p hmod. If no surface reference of that name exists, + * ::cuModuleGetSurfRef() returns ::CUDA_ERROR_NOT_FOUND. + * + * \param pSurfRef - Returned surface reference + * \param hmod - Module to retrieve surface reference from + * \param name - Name of surface reference to retrieve culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_NOT_FOUND + * \notefnerr + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \sa ::cuModuleGetFunction, + * ::cuModuleGetGlobal, + * ::cuModuleGetTexRef, + * ::cuModuleLoad, + * ::cuModuleLoadData, + * ::cuModuleLoadDataEx, + * ::cuModuleLoadFatBinary, + * ::cuModuleUnload + */ +CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +/** @} */ /* END CUDA_MODULE */ + + +/** + * \defgroup CUDA_MEM Memory Management + * + * This section describes the memory management functions of the low-level CUDA + * driver application programming interface. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * @{ + */ + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Gets free and total memory + * + * Returns in \p *free and \p *total respectively, the free and total amount of + * memory available for allocation by the CUDA context, in bytes. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \param free - Returned free memory in bytes + * \param total - Returned total memory in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total); + +/** + * \brief Allocates device memory + * + * Allocates \p bytesize bytes of linear memory on the device and returns in + * \p *dptr a pointer to the allocated memory. The allocated memory is suitably + * aligned for any kind of variable. The memory is not cleared. If \p bytesize culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * is 0, ::cuMemAlloc() returns ::CUDA_ERROR_INVALID_VALUE. + * + * \param dptr - Returned device pointer + * \param bytesize - Requested allocation size in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize); + +/** + * \brief Allocates pitched device memory + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Allocates at least \p WidthInBytes * \p Height bytes of linear memory on + * the device and returns in \p *dptr a pointer to the allocated memory. The + * function may pad the allocation to ensure that corresponding pointers in + * any given row will continue to meet the alignment requirements for + * coalescing as the address is updated from row to row. \p ElementSizeBytes + * specifies the size of the largest reads and writes that will be performed + * on the memory range. \p ElementSizeBytes may be 4, 8 or 16 (since coalesced + * memory transactions are not possible on other data sizes). If + * \p ElementSizeBytes is smaller than the actual read/write size of a kernel, + * the kernel will run correctly, but possibly at reduced speed. The pitch culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * returned in \p *pPitch by ::cuMemAllocPitch() is the width in bytes of the + * allocation. The intended usage of pitch is as a separate parameter of the + * allocation, used to compute addresses within the 2D array. Given the row + * and column of an array element of type \b T, the address is computed as: + * \code + T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; + * \endcode + * + * The pitch returned by ::cuMemAllocPitch() is guaranteed to work with + * ::cuMemcpy2D() under all circumstances. For allocations of 2D arrays, it is culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * recommended that programmers consider performing pitch allocations using + * ::cuMemAllocPitch(). Due to alignment restrictions in the hardware, this is + * especially true if the application will be performing 2D memory copies + * between different regions of device memory (whether linear memory or CUDA + * arrays). + * + * The byte alignment of the pitch returned by ::cuMemAllocPitch() is guaranteed + * to match or exceed the alignment requirement for texture binding with + * ::cuTexRefSetAddress2D(). + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \param dptr - Returned device pointer + * \param pPitch - Returned pitch of allocation in bytes + * \param WidthInBytes - Requested allocation width in bytes + * \param Height - Requested allocation height in rows + * \param ElementSizeBytes - Size of largest reads/writes for range + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes); + +/** + * \brief Frees device memory culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * Frees the memory space pointed to by \p dptr, which must have been returned + * by a previous call to ::cuMemAlloc() or ::cuMemAllocPitch(). + * + * \param dptr - Pointer to memory to free + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemFree(CUdeviceptr dptr); + +/** + * \brief Get information on memory allocations + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Returns the base address in \p *pbase and size in \p *psize of the + * allocation by ::cuMemAlloc() or ::cuMemAllocPitch() that contains the input + * pointer \p dptr. Both parameters \p pbase and \p psize are optional. If one + * of them is NULL, it is ignored. + * + * \param pbase - Returned base address + * \param psize - Returned size of device memory allocation + * \param dptr - Device pointer to query + * + * \return culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr); + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** + * \brief Allocates page-locked host memory + * + * Allocates \p bytesize bytes of host memory that is page-locked and + * accessible to the device. The driver tracks the virtual memory ranges + * allocated with this function and automatically accelerates calls to + * functions such as ::cuMemcpy(). Since the memory can be accessed directly by + * the device, it can be read or written with much higher bandwidth than + * pageable memory obtained with functions such as ::malloc(). Allocating + * excessive amounts of memory with ::cuMemAllocHost() may degrade system culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * performance, since it reduces the amount of memory available to the system + * for paging. As a result, this function is best used sparingly to allocate + * staging areas for data exchange between host and device. + * + * Note all host memory allocated using ::cuMemHostAlloc() will automatically + * be immediately accessible to all contexts on all devices which support unified + * addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). + * The device pointer that may be used to access this host memory from those + * contexts is always equal to the returned host pointer \p *pp. + * See \ref CUDA_UNIFIED for additional details. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \param pp - Returned host pointer to page-locked memory + * \param bytesize - Requested allocation size in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_OUT_OF_MEMORY + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize); +#endif /* __CUDA_API_VERSION >= 3020 */ + +/** + * \brief Frees page-locked host memory + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Frees the memory space pointed to by \p p, which must have been returned by + * a previous call to ::cuMemAllocHost(). + * + * \param p - Pointer to memory to free + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemFreeHost(void *p); + +/** + * \brief Allocates page-locked host memory + * + * Allocates \p bytesize bytes of host memory that is page-locked and accessible culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * to the device. The driver tracks the virtual memory ranges allocated with + * this function and automatically accelerates calls to functions such as + * ::cuMemcpyHtoD(). Since the memory can be accessed directly by the device, + * it can be read or written with much higher bandwidth than pageable memory + * obtained with functions such as ::malloc(). Allocating excessive amounts of + * pinned memory may degrade system performance, since it reduces the amount + * of memory available to the system for paging. As a result, this function is + * best used sparingly to allocate staging areas for data exchange between + * host and device. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * The \p Flags parameter enables different options to be specified that + * affect the allocation, as follows. + * + * - ::CU_MEMHOSTALLOC_PORTABLE: The memory returned by this call will be + * considered as pinned memory by all CUDA contexts, not just the one that + * performed the allocation. + * + * - ::CU_MEMHOSTALLOC_DEVICEMAP: Maps the allocation into the CUDA address + * space. The device pointer to the memory may be obtained by calling + * ::cuMemHostGetDevicePointer(). This feature is available only on GPUs culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * with compute capability greater than or equal to 1.1. + * + * - ::CU_MEMHOSTALLOC_WRITECOMBINED: Allocates the memory as write-combined + * (WC). WC memory can be transferred across the PCI Express bus more + * quickly on some system configurations, but cannot be read efficiently by + * most CPUs. WC memory is a good option for buffers that will be written by + * the CPU and read by the GPU via mapped pinned memory or host->device + * transfers. + * + * All of these flags are orthogonal to one another: a developer may allocate culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * memory that is portable, mapped and/or write-combined with no restrictions. + * + * The CUDA context must have been created with the ::CU_CTX_MAP_HOST flag in + * order for the ::CU_MEMHOSTALLOC_MAPPED flag to have any effect. + * + * The ::CU_MEMHOSTALLOC_MAPPED flag may be specified on CUDA contexts for + * devices that do not support mapped pinned memory. The failure is deferred + * to ::cuMemHostGetDevicePointer() because the memory may be mapped into + * other CUDA contexts via the ::CU_MEMHOSTALLOC_PORTABLE flag. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * The memory allocated by this function must be freed with ::cuMemFreeHost(). + * + * Note all host memory allocated using ::cuMemHostAlloc() will automatically + * be immediately accessible to all contexts on all devices which support unified + * addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). + * Unless the flag ::CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device pointer + * that may be used to access this host memory from those contexts is always equal + * to the returned host pointer \p *pp. If the flag ::CU_MEMHOSTALLOC_WRITECOMBINED + * is specified, then the function ::cuMemHostGetDevicePointer() must be used + * to query the device pointer, even if the context supports unified addressing. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * See \ref CUDA_UNIFIED for additional details. + * + * \param pp - Returned host pointer to page-locked memory + * \param bytesize - Requested allocation size in bytes + * \param Flags - Flags for allocation request + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags); + +#if __CUDA_API_VERSION >= 3020 +/** culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \brief Passes back device pointer of mapped pinned memory + * + * Passes back the device pointer \p pdptr corresponding to the mapped, pinned + * host buffer \p p allocated by ::cuMemHostAlloc. + * + * ::cuMemHostGetDevicePointer() will fail if the ::CU_MEMALLOCHOST_DEVICEMAP + * flag was not specified at the time the memory was allocated, or if the + * function is called on a GPU that does not support mapped pinned memory. + * + * \p Flags provides for future releases. For now, it must be set to 0. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \param pdptr - Returned device pointer + * \param p - Host pointer + * \param Flags - Options (must be 0) + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags); +#endif /* __CUDA_API_VERSION >= 3020 */ + +/** + * \brief Passes back flags that were used for a pinned allocation + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Passes back the flags \p pFlags that were specified when allocating + * the pinned host buffer \p p allocated by ::cuMemHostAlloc. + * + * ::cuMemHostGetFlags() will fail if the pointer does not reside in + * an allocation performed by ::cuMemAllocHost() or ::cuMemHostAlloc(). + * + * \param pFlags - Returned flags word + * \param p - Host pointer + * + * \return culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuMemAllocHost, ::cuMemHostAlloc + */ +CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +#if __CUDA_API_VERSION >= 4010 + +/** + * \brief Returns a handle to a compute device + * + * Returns in \p *device a device handle given a PCI bus ID string. + * + * \param dev - Returned device handle + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param pciBusId - String in one of the following forms: + * [domain]:[bus]:[device].[function] + * [domain]:[bus]:[device] + * [bus]:[device].[function] + * where \p domain, \p bus, \p device, and \p function are all hexadecimal values + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE + * \notefnerr + * + * \sa ::cuDeviceGet, ::cuDeviceGetAttribute, ::cuDeviceGetPCIBusId + */ +CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, char *pciBusId); + +/** + * \brief Returns a PCI Bus Id string for the device culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * Returns an ASCII string identifying the device \p dev in the NULL-terminated + * string pointed to by \p pciBusId. \p len specifies the maximum length of the + * string that may be returned. + * + * \param pciBusId - Returned identifier string for the device in the following format + * [domain]:[bus]:[device].[function] + * where \p domain, \p bus, \p device, and \p function are all hexadecimal values. + * pciBusId should be large enough to store 13 characters including the NULL-terminator. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \param len - Maximum length of string to store in \p name + * + * \param dev - Device to get identifier string for + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuDeviceGet, ::cuDeviceGetAttribute, ::cuDeviceGetByPCIBusId + */ +CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev); + +/** + * \brief Gets an interprocess handle for a previously allocated event + * + * Takes as input a previously allocated event. This event must have been culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * created with the ::CU_EVENT_INTERPROCESS and ::CU_EVENT_DISABLE_TIMING + * flags set. This opaque handle may be copied into other processes and + * opened with ::cuIpcOpenEventHandle to allow efficient hardware + * synchronization between GPU work in different processes. + * + * After the event has been been opened in the importing process, + * ::cuEventRecord, ::cuEventSynchronize, ::cuStreamWaitEvent and + * ::cuEventQuery may be used in either process. Performing operations + * on the imported event after the exported event has been freed + * with ::cuEventDestroy will result in undefined behavior. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * IPC functionality is restricted to devices with support for unified + * addressing on Linux operating systems. + * + * \param pHandle - Pointer to a user allocated CUipcEventHandle + * in which to return the opaque event handle + * \param event - Event allocated with ::CU_EVENT_INTERPROCESS and + * ::CU_EVENT_DISABLE_TIMING flags. + * + * \return culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_OUT_OF_MEMORY, + * ::CUDA_ERROR_MAP_FAILED + * + * \sa + * ::cuEventCreate, + * ::cuEventDestroy, + * ::cuEventSynchronize, + * ::cuEventQuery, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuStreamWaitEvent, + * ::cuIpcOpenEventHandle, + * ::cuIpcGetMemHandle, + * ::cuIpcOpenMemHandle, + * ::cuIpcCloseMemHandle + */ +CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event); + +/** + * \brief Opens an interprocess event handle for use in the current process culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * Opens an interprocess event handle exported from another process with + * ::cuIpcGetEventHandle. This function returns a ::CUevent that behaves like + * a locally created event with the ::CU_EVENT_DISABLE_TIMING flag specified. + * This event must be freed with ::cuEventDestroy. + * + * Performing operations on the imported event after the exported event has + * been freed with ::cuEventDestroy will result in undefined behavior. + * + * IPC functionality is restricted to devices with support for unified culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * addressing on Linux operating systems. + * + * \param phEvent - Returns the imported event + * \param handle - Interprocess handle to open + * + * \returns + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_MAP_FAILED, + * ::CUDA_ERROR_INVALID_HANDLE culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * \sa + * ::cuEventCreate, + * ::cuEventDestroy, + * ::cuEventSynchronize, + * ::cuEventQuery, + * ::cuStreamWaitEvent, + * ::cuIpcGetEventHandle, + * ::cuIpcGetMemHandle, + * ::cuIpcOpenMemHandle, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuIpcCloseMemHandle + */ +CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, CUipcEventHandle handle); + +/** + * /brief Gets an interprocess memory handle for an existing device memory + * allocation + * + * Takes a pointer to the base of an existing device memory allocation created + * with ::cuMemAlloc and exports it for use in another process. This is a culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * lightweight operation and may be called multiple times on an allocation + * without adverse effects. + * + * If a region of memory is freed with ::cuMemFree and a subsequent call + * to ::cuMemAlloc returns memory with the same device address, + * ::cuIpcGetMemHandle will return a unique handle for the + * new memory. + * + * IPC functionality is restricted to devices with support for unified + * addressing on Linux operating systems. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \param pHandle - Pointer to user allocated ::CUipcMemHandle to return + * the handle in. + * \param dptr - Base pointer to previously allocated device memory + * + * \returns + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_OUT_OF_MEMORY, + * ::CUDA_ERROR_MAP_FAILED, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \sa + * ::cuMemAlloc, + * ::cuMemFree, + * ::cuIpcGetEventHandle, + * ::cuIpcOpenEventHandle, + * ::cuIpcOpenMemHandle, + * ::cuIpcCloseMemHandle + */ +CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + +/** + * /brief Opens an interprocess memory handle exported from another process + * and returns a device pointer usable in the local process. + * + * Maps memory exported from another process with ::cuIpcGetMemHandle into + * the current device address space. For contexts on different devices + * ::cuIpcOpenMemHandle can attempt to enable peer access between the + * devices as if the user called ::cuCtxEnablePeerAccess. This behavior is + * controlled by the ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuDeviceCanAccessPeer can determine if a mapping is possible. + * + * Contexts that may open ::CUipcMemHandles are restricted in the following way. + * ::CUipcMemHandles from each ::CUdevice in a given process may only be opened + * by one ::CUcontext per ::CUdevice per other process. + * + * Memory returned from ::cuIpcOpenMemHandle must be freed with + * ::cuIpcCloseMemHandle. + * + * Calling ::cuMemFree on an exported memory region before calling culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuIpcCloseMemHandle in the importing context will result in undefined + * behavior. + * + * IPC functionality is restricted to devices with support for unified + * addressing on Linux operating systems. + * + * \param pdptr - Returned device pointer + * \param handle - ::CUipcMemHandle to open + * \param Flags - Flags for this operation. Must be specified as ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \returns + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_MAP_FAILED, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_TOO_MANY_PEERS + * + * \sa + * ::cuMemAlloc, + * ::cuMemFree, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuIpcGetEventHandle, + * ::cuIpcOpenEventHandle, + * ::cuIpcGetMemHandle, + * ::cuIpcCloseMemHandle, + * ::cuCtxEnablePeerAccess, + * ::cuDeviceCanAccessPeer, + */ +CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int Flags); + +/** culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * /brief Close memory mapped with ::cuIpcOpenMemHandle + * + * Unmaps memory returnd by ::cuIpcOpenMemHandle. The original allocation + * in the exporting process as well as imported mappings in other processes + * will be unaffected. + * + * Any resources used to enable peer access will be freed if this is the + * last mapping using them. + * + * IPC functionality is restricted to devices with support for unified culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * addressing on Linux operating systems. + * + * \param dptr - Device pointer returned by ::cuIpcOpenMemHandle + * + * \returns + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_MAP_FAILED, + * ::CUDA_ERROR_INVALID_HANDLE, + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \sa + * ::cuMemAlloc, + * ::cuMemFree, + * ::cuIpcGetEventHandle, + * ::cuIpcOpenEventHandle, + * ::cuIpcGetMemHandle, + * ::cuIpcOpenMemHandle, + */ +CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr); + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +#endif /* __CUDA_API_VERSION >= 4010 */ + +#if __CUDA_API_VERSION >= 4000 +/** + * \brief Registers an existing host memory range for use by CUDA + * + * Page-locks the memory range specified by \p p and \p bytesize and maps it + * for the device(s) as specified by \p Flags. This memory range also is added + * to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + * calls to functions such as ::cuMemcpyHtoD(). Since the memory can be accessed culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * directly by the device, it can be read or written with much higher bandwidth + * than pageable memory that has not been registered. Page-locking excessive + * amounts of memory may degrade system performance, since it reduces the amount + * of memory available to the system for paging. As a result, this function is + * best used sparingly to register staging areas for data exchange between + * host and device. + * + * This function has limited support on Mac OS X. OS 10.7 or higher is required. + * + * The \p Flags parameter enables different options to be specified that culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * affect the allocation, as follows. + * + * - ::CU_MEMHOSTREGISTER_PORTABLE: The memory returned by this call will be + * considered as pinned memory by all CUDA contexts, not just the one that + * performed the allocation. + * + * - ::CU_MEMHOSTREGISTER_DEVICEMAP: Maps the allocation into the CUDA address + * space. The device pointer to the memory may be obtained by calling + * ::cuMemHostGetDevicePointer(). This feature is available only on GPUs + * with compute capability greater than or equal to 1.1. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * All of these flags are orthogonal to one another: a developer may page-lock + * memory that is portable or mapped with no restrictions. + * + * The CUDA context must have been created with the ::CU_CTX_MAP_HOST flag in + * order for the ::CU_MEMHOSTREGISTER_DEVICEMAP flag to have any effect. + * + * The ::CU_MEMHOSTREGISTER_DEVICEMAP flag may be specified on CUDA contexts for + * devices that do not support mapped pinned memory. The failure is deferred + * to ::cuMemHostGetDevicePointer() because the memory may be mapped into culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * other CUDA contexts via the ::CU_MEMHOSTREGISTER_PORTABLE flag. + * + * The memory page-locked by this function must be unregistered with + * ::cuMemHostUnregister(). + * + * \param p - Host pointer to memory to page-lock + * \param bytesize - Size in bytes of the address range to page-lock + * \param Flags - Flags for allocation request + * + * \return culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY + * \notefnerr + * + * \sa ::cuMemHostUnregister, ::cuMemHostGetFlags, ::cuMemHostGetDevicePointer + */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, unsigned int Flags); + +/** + * \brief Unregisters a memory range that was registered with ::cuMemHostRegister(). + * + * Unmaps the memory range whose base address is specified by \p p, and makes + * it pageable again. + * + * The base address must be the same one specified to ::cuMemHostRegister(). + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \param p - Host pointer to memory to unregister + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY + * \notefnerr culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \sa ::cuMemHostRegister + */ +CUresult CUDAAPI cuMemHostUnregister(void *p); + +/** + * \brief Copies memory + * + * Copies data between two pointers. + * \p dst and \p src are base pointers of the destination and source, respectively. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \p ByteCount specifies the number of bytes to copy. + * Note that this function infers the type of the transfer (host to host, host to + * device, device to device, or device to host) from the pointer values. This + * function is only allowed in contexts which support unified addressing. + * Note that this function is synchronous. + * + * \param dst - Destination unified virtual address space pointer + * \param src - Source unified virtual address space pointer + * \param ByteCount - Size of memory copy in bytes + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +/** + * \brief Copies device memory between two contexts + * + * Copies from device memory in one context to device memory in another + * context. \p dstDevice is the base device pointer of the destination memory + * and \p dstContext is the destination context. \p srcDevice is the base + * device pointer of the source memory and \p srcContext is the source pointer. + * \p ByteCount specifies the number of bytes to copy. + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Note that this function is asynchronous with respect to the host, but + * serialized with respect all pending and future asynchronous work in to the + * current context, \p srcContext, and \p dstContext (use ::cuMemcpyPeerAsync + * to avoid this synchronization). + * + * \param dstDevice - Destination device pointer + * \param dstContext - Destination context + * \param srcDevice - Source device pointer + * \param srcContext - Source context + * \param ByteCount - Size of memory copy in bytes culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuMemcpyDtoD, ::cuMemcpy3DPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemcpy3DPeerAsync + */ +CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount); + +#endif /* __CUDA_API_VERSION >= 4000 */ + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Copies memory from Host to Device + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Copies from host memory to device memory. \p dstDevice and \p srcHost are + * the base addresses of the destination and source, respectively. \p ByteCount + * specifies the number of bytes to copy. Note that this function is + * synchronous. + * + * \param dstDevice - Destination device pointer + * \param srcHost - Source host pointer + * \param ByteCount - Size of memory copy in bytes + * + * \return culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount); + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** + * \brief Copies memory from Device to Host + * + * Copies from device to host memory. \p dstHost and \p srcDevice specify the + * base pointers of the destination and source, respectively. \p ByteCount + * specifies the number of bytes to copy. Note that this function is + * synchronous. + * + * \param dstHost - Destination host pointer + * \param srcDevice - Source device pointer culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \param ByteCount - Size of memory copy in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount); + +/** + * \brief Copies memory from Device to Device + * + * Copies from device memory to device memory. \p dstDevice and \p srcDevice + * are the base pointers of the destination and source, respectively. + * \p ByteCount specifies the number of bytes to copy. Note that this function + * is asynchronous. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \param dstDevice - Destination device pointer + * \param srcDevice - Source device pointer + * \param ByteCount - Size of memory copy in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount); + +/** + * \brief Copies memory from Device to Array + * + * Copies from device memory to a 1D CUDA array. \p dstArray and \p dstOffset culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * specify the CUDA array handle and starting index of the destination data. + * \p srcDevice specifies the base pointer of the source. \p ByteCount + * specifies the number of bytes to copy. + * + * \param dstArray - Destination array + * \param dstOffset - Offset in bytes of destination array + * \param srcDevice - Source device pointer + * \param ByteCount - Size of memory copy in bytes + * + * \return culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount); + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** + * \brief Copies memory from Array to Device + * + * Copies from one 1D CUDA array to device memory. \p dstDevice specifies the + * base pointer of the destination and must be naturally aligned with the CUDA + * array elements. \p srcArray and \p srcOffset specify the CUDA array handle + * and the offset in bytes into the array where the copy is to begin. + * \p ByteCount specifies the number of bytes to copy and must be evenly + * divisible by the array element size. + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param dstDevice - Destination device pointer + * \param srcArray - Source array + * \param srcOffset - Offset in bytes of source array + * \param ByteCount - Size of memory copy in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount); + +/** + * \brief Copies memory from Host to Array + * + * Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * specify the CUDA array handle and starting offset in bytes of the destination + * data. \p pSrc specifies the base address of the source. \p ByteCount specifies + * the number of bytes to copy. + * + * \param dstArray - Destination array + * \param dstOffset - Offset in bytes of destination array + * \param srcHost - Source host pointer + * \param ByteCount - Size of memory copy in bytes + * + * \return culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount); + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +/** + * \brief Copies memory from Array to Host + * + * Copies from one 1D CUDA array to host memory. \p dstHost specifies the base + * pointer of the destination. \p srcArray and \p srcOffset specify the CUDA + * array handle and starting offset in bytes of the source data. + * \p ByteCount specifies the number of bytes to copy. + * + * \param dstHost - Destination device pointer + * \param srcArray - Source array culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \param srcOffset - Offset in bytes of source array + * \param ByteCount - Size of memory copy in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount); + +/** + * \brief Copies memory from Array to Array + * + * Copies from one 1D CUDA array to another. \p dstArray and \p srcArray + * specify the handles of the destination and source CUDA arrays for the copy, + * respectively. \p dstOffset and \p srcOffset specify the destination and culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * source offsets in bytes into the CUDA arrays. \p ByteCount is the number of + * bytes to be copied. The size of the elements in the CUDA arrays need not be + * the same format, but the elements must be the same size; and count must be + * evenly divisible by that size. + * + * \param dstArray - Destination array + * \param dstOffset - Offset in bytes of destination array + * \param srcArray - Source array + * \param srcOffset - Offset in bytes of source array + * \param ByteCount - Size of memory copy in bytes culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount); + +/** + * \brief Copies memory for 2D arrays + * + * Perform a 2D memory copy according to the parameters specified in \p pCopy. + * The ::CUDA_MEMCPY2D structure is defined as: + * + * \code + typedef struct CUDA_MEMCPY2D_st { culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + unsigned int srcXInBytes, srcY; + CUmemorytype srcMemoryType; + const void *srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + unsigned int srcPitch; + + unsigned int dstXInBytes, dstY; + CUmemorytype dstMemoryType; + void *dstHost; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CUdeviceptr dstDevice; + CUarray dstArray; + unsigned int dstPitch; + + unsigned int WidthInBytes; + unsigned int Height; + } CUDA_MEMCPY2D; + * \endcode + * where: + * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * source and destination, respectively; ::CUmemorytype_enum is defined as: + * + * \code + typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 0x01, + CU_MEMORYTYPE_DEVICE = 0x02, + CU_MEMORYTYPE_ARRAY = 0x03, + CU_MEMORYTYPE_UNIFIED = 0x04 + } CUmemorytype; + * \endcode culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::srcArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * specify the (host) base address of the source data and the bytes per row to + * apply. ::srcArray is ignored. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch + * specify the (device) base address of the source data and the bytes per row + * to apply. ::srcArray is ignored. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are + * ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch + * specify the (host) base address of the destination data and the bytes per + * row to apply. ::dstArray is ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::dstArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + * specify the (device) base address of the destination data and the bytes per + * row to apply. ::dstArray is ignored. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the + * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are + * ignored. + * + * - ::srcXInBytes and ::srcY specify the base address of the source data for + * the copy. + * + * \par + * For host pointers, the starting address is culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \code + void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); + * \endcode + * + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; + * \endcode + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \par + * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array + * element size. + * + * - ::dstXInBytes and ::dstY specify the base address of the destination data + * for the copy. + * + * \par + * For host pointers, the base address is + * \code culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); + * \endcode + * + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; + * \endcode + * + * \par culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + * element size. + * + * - ::WidthInBytes and ::Height specify the width (in bytes) and height of + * the 2D copy being performed. + * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + * ::srcXInBytes, and ::dstPitch must be greater than or equal to + * ::WidthInBytes + dstXInBytes. + * + * \par culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemcpy2D() returns an error if any pitch is greater than the maximum + * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back + * pitches that always work with ::cuMemcpy2D(). On intra-device memory copies + * (device to device, CUDA array to device, CUDA array to CUDA array), + * ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch(). + * ::cuMemcpy2DUnaligned() does not have this restriction, but may run + * significantly slower in the cases where ::cuMemcpy2D() would have returned + * an error code. + * + * \param pCopy - Parameters for the memory copy culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy); + +/** + * \brief Copies memory for 2D arrays + * + * Perform a 2D memory copy according to the parameters specified in \p pCopy. + * The ::CUDA_MEMCPY2D structure is defined as: + * + * \code + typedef struct CUDA_MEMCPY2D_st { culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + unsigned int srcXInBytes, srcY; + CUmemorytype srcMemoryType; + const void *srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + unsigned int srcPitch; + unsigned int dstXInBytes, dstY; + CUmemorytype dstMemoryType; + void *dstHost; + CUdeviceptr dstDevice; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CUarray dstArray; + unsigned int dstPitch; + unsigned int WidthInBytes; + unsigned int Height; + } CUDA_MEMCPY2D; + * \endcode + * where: + * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + * source and destination, respectively; ::CUmemorytype_enum is defined as: + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \code + typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 0x01, + CU_MEMORYTYPE_DEVICE = 0x02, + CU_MEMORYTYPE_ARRAY = 0x03, + CU_MEMORYTYPE_UNIFIED = 0x04 + } CUmemorytype; + * \endcode + * + * \par culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::srcArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch + * specify the (host) base address of the source data and the bytes per row to + * apply. ::srcArray is ignored. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch + * specify the (device) base address of the source data and the bytes per row + * to apply. ::srcArray is ignored. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are + * ignored. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::dstArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * specify the (host) base address of the destination data and the bytes per + * row to apply. ::dstArray is ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + * specify the (device) base address of the destination data and the bytes per + * row to apply. ::dstArray is ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are + * ignored. + * + * - ::srcXInBytes and ::srcY specify the base address of the source data for + * the copy. + * + * \par + * For host pointers, the starting address is + * \code + void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \endcode + * + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; + * \endcode + * + * \par + * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * element size. + * + * - ::dstXInBytes and ::dstY specify the base address of the destination data + * for the copy. + * + * \par + * For host pointers, the base address is + * \code + void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); + * \endcode culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; + * \endcode + * + * \par + * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + * element size. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * - ::WidthInBytes and ::Height specify the width (in bytes) and height of + * the 2D copy being performed. + * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + * ::srcXInBytes, and ::dstPitch must be greater than or equal to + * ::WidthInBytes + dstXInBytes. + * + * \par + * ::cuMemcpy2D() returns an error if any pitch is greater than the maximum + * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * pitches that always work with ::cuMemcpy2D(). On intra-device memory copies + * (device to device, CUDA array to device, CUDA array to CUDA array), + * ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch(). + * ::cuMemcpy2DUnaligned() does not have this restriction, but may run + * significantly slower in the cases where ::cuMemcpy2D() would have returned + * an error code. + * + * \param pCopy - Parameters for the memory copy + * + * \return culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy); + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +/** + * \brief Copies memory for 3D arrays + * + * Perform a 3D memory copy according to the parameters specified in + * \p pCopy. The ::CUDA_MEMCPY3D structure is defined as: + * + * \code + typedef struct CUDA_MEMCPY3D_st { + + unsigned int srcXInBytes, srcY, srcZ; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + unsigned int srcLOD; + CUmemorytype srcMemoryType; + const void *srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + unsigned int srcPitch; // ignored when src is array + unsigned int srcHeight; // ignored when src is array; may be 0 if Depth==1 + + unsigned int dstXInBytes, dstY, dstZ; + unsigned int dstLOD; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CUmemorytype dstMemoryType; + void *dstHost; + CUdeviceptr dstDevice; + CUarray dstArray; + unsigned int dstPitch; // ignored when dst is array + unsigned int dstHeight; // ignored when dst is array; may be 0 if Depth==1 + + unsigned int WidthInBytes; + unsigned int Height; + unsigned int Depth; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + } CUDA_MEMCPY3D; + * \endcode + * where: + * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + * source and destination, respectively; ::CUmemorytype_enum is defined as: + * + * \code + typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 0x01, + CU_MEMORYTYPE_DEVICE = 0x02, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CU_MEMORYTYPE_ARRAY = 0x03, + CU_MEMORYTYPE_UNIFIED = 0x04 + } CUmemorytype; + * \endcode + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::srcArray is ignored. + * This value may be used only if unified addressing is supported in the calling culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * context. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and + * ::srcHeight specify the (host) base address of the source data, the bytes + * per row, and the height of each 2D slice of the 3D array. ::srcArray is + * ignored. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch and culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::srcHeight specify the (device) base address of the source data, the bytes + * per row, and the height of each 2D slice of the 3D array. ::srcArray is + * ignored. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + * handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and + * ::srcHeight are ignored. + * + * \par culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::dstArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch + * specify the (host) base address of the destination data, the bytes per row, + * and the height of each 2D slice of the 3D array. ::dstArray is ignored. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + * specify the (device) base address of the destination data, the bytes per + * row, and the height of each 2D slice of the 3D array. ::dstArray is ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the + * handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and + * ::dstHeight are ignored. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the source + * data for the copy. + * + * \par + * For host pointers, the starting address is + * \code + void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes); + * \endcode + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes; + * \endcode + * + * \par + * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array + * element size. + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * - dstXInBytes, ::dstY and ::dstZ specify the base address of the + * destination data for the copy. + * + * \par + * For host pointers, the base address is + * \code + void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes); + * \endcode + * + * \par culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * For device pointers, the starting address is + * \code + CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes; + * \endcode + * + * \par + * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + * element size. + * + * - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), height culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * and depth of the 3D copy being performed. + * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + * ::srcXInBytes, and ::dstPitch must be greater than or equal to + * ::WidthInBytes + dstXInBytes. + * - If specified, ::srcHeight must be greater than or equal to ::Height + + * ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY. + * + * \par + * ::cuMemcpy3D() returns an error if any pitch is greater than the maximum + * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must be + * set to 0. + * + * \param pCopy - Parameters for the memory copy + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy); +#endif /* __CUDA_API_VERSION >= 3020 */ + +#if __CUDA_API_VERSION >= 4000 +/** culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \brief Copies memory between contexts + * + * Perform a 3D memory copy according to the parameters specified in + * \p pCopy. See the definition of the ::CUDA_MEMCPY3D_PEER structure + * for documentation of its parameters. + * + * Note that this function is synchronous with respect to the host only if + * the source or destination memory is of type ::CU_MEMORYTYPE_HOST. + * Note also that this copy is serialized with respect all pending and future + * asynchronous work in to the current context, the copy's source context, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * and the copy's destination context (use ::cuMemcpy3DPeerAsync to avoid + * this synchronization). + * + * \param pCopy - Parameters for the memory copy + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync, + * ::cuMemcpy3DPeerAsync + */ +CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy); + +/** + * \brief Copies memory asynchronously culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * Copies data between two pointers. + * \p dst and \p src are base pointers of the destination and source, respectively. + * \p ByteCount specifies the number of bytes to copy. + * Note that this function infers the type of the transfer (host to host, host to + * device, device to device, or device to host) from the pointer values. This + * function is only allowed in contexts which support unified addressing. + * Note that this function is asynchronous and can optionally be associated to + * a stream by passing a non-zero \p hStream argument + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \param dst - Destination unified virtual address space pointer + * \param src - Source unified virtual address space pointer + * \param ByteCount - Size of memory copy in bytes + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream); + +/** + * \brief Copies device memory between two contexts asynchronously. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * Copies from device memory in one context to device memory in another + * context. \p dstDevice is the base device pointer of the destination memory + * and \p dstContext is the destination context. \p srcDevice is the base + * device pointer of the source memory and \p srcContext is the source pointer. + * \p ByteCount specifies the number of bytes to copy. Note that this function + * is asynchronous with respect to the host and all work in other streams in + * other devices. + * + * \param dstDevice - Destination device pointer culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param dstContext - Destination context + * \param srcDevice - Source device pointer + * \param srcContext - Source context + * \param ByteCount - Size of memory copy in bytes + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpy3DPeer, ::cuMemcpyDtoDAsync, + * ::cuMemcpy3DPeerAsync + */ +CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream); +#endif /* __CUDA_API_VERSION >= 4000 */ + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Copies memory from Host to Device + * + * Copies from host memory to device memory. \p dstDevice and \p srcHost are + * the base addresses of the destination and source, respectively. \p ByteCount + * specifies the number of bytes to copy. + * + * ::cuMemcpyHtoDAsync() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p hStream argument. It only works on culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * page-locked memory and returns an error if a pointer to pageable memory is + * passed as input. + * + * \param dstDevice - Destination device pointer + * \param srcHost - Source host pointer + * \param ByteCount - Size of memory copy in bytes + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +/** + * \brief Copies memory from Device to Host + * + * Copies from device to host memory. \p dstHost and \p srcDevice specify the + * base pointers of the destination and source, respectively. \p ByteCount + * specifies the number of bytes to copy. + * + * ::cuMemcpyDtoHAsync() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p hStream argument. It only works on culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * page-locked memory and returns an error if a pointer to pageable memory is + * passed as input. + * + * \param dstHost - Destination host pointer + * \param srcDevice - Source device pointer + * \param ByteCount - Size of memory copy in bytes + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +/** + * \brief Copies memory from Device to Device + * + * Copies from device memory to device memory. \p dstDevice and \p srcDevice + * are the base pointers of the destination and source, respectively. + * \p ByteCount specifies the number of bytes to copy. Note that this function + * is asynchronous and can optionally be associated to a stream by passing a + * non-zero \p hStream argument + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param dstDevice - Destination device pointer + * \param srcDevice - Source device pointer + * \param ByteCount - Size of memory copy in bytes + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); + +/** + * \brief Copies memory from Host to Array culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset + * specify the CUDA array handle and starting offset in bytes of the + * destination data. \p srcHost specifies the base address of the source. + * \p ByteCount specifies the number of bytes to copy. + * + * ::cuMemcpyHtoAAsync() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p hStream argument. It only works on + * page-locked memory and returns an error if a pointer to pageable memory is + * passed as input. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \param dstArray - Destination array + * \param dstOffset - Offset in bytes of destination array + * \param srcHost - Source host pointer + * \param ByteCount - Size of memory copy in bytes + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream); + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** + * \brief Copies memory from Array to Host + * + * Copies from one 1D CUDA array to host memory. \p dstHost specifies the base + * pointer of the destination. \p srcArray and \p srcOffset specify the CUDA + * array handle and starting offset in bytes of the source data. + * \p ByteCount specifies the number of bytes to copy. + * + * ::cuMemcpyAtoHAsync() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p stream argument. It only works on culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * page-locked host memory and returns an error if a pointer to pageable + * memory is passed as input. + * + * \param dstHost - Destination pointer + * \param srcArray - Source array + * \param srcOffset - Offset in bytes of source array + * \param ByteCount - Size of memory copy in bytes + * \param hStream - Stream identifier + * + * \return culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream); + +/** + * \brief Copies memory for 2D arrays + * + * Perform a 2D memory copy according to the parameters specified in \p pCopy. + * The ::CUDA_MEMCPY2D structure is defined as: + * + * \code + typedef struct CUDA_MEMCPY2D_st { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + unsigned int srcXInBytes, srcY; + CUmemorytype srcMemoryType; + const void *srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + unsigned int srcPitch; + unsigned int dstXInBytes, dstY; + CUmemorytype dstMemoryType; + void *dstHost; + CUdeviceptr dstDevice; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + CUarray dstArray; + unsigned int dstPitch; + unsigned int WidthInBytes; + unsigned int Height; + } CUDA_MEMCPY2D; + * \endcode + * where: + * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + * source and destination, respectively; ::CUmemorytype_enum is defined as: + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \code + typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 0x01, + CU_MEMORYTYPE_DEVICE = 0x02, + CU_MEMORYTYPE_ARRAY = 0x03, + CU_MEMORYTYPE_UNIFIED = 0x04 + } CUmemorytype; + * \endcode + * + * \par culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch + * specify the (host) base address of the source data and the bytes per row to + * apply. ::srcArray is ignored. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::srcArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch + * specify the (device) base address of the source data and the bytes per row + * to apply. ::srcArray is ignored. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are + * ignored. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::dstArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * specify the (host) base address of the destination data and the bytes per + * row to apply. ::dstArray is ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + * specify the (device) base address of the destination data and the bytes per + * row to apply. ::dstArray is ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are + * ignored. + * + * - ::srcXInBytes and ::srcY specify the base address of the source data for + * the copy. + * + * \par + * For host pointers, the starting address is + * \code + void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \endcode + * + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; + * \endcode + * + * \par + * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * element size. + * + * - ::dstXInBytes and ::dstY specify the base address of the destination data + * for the copy. + * + * \par + * For host pointers, the base address is + * \code + void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); + * \endcode culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; + * \endcode + * + * \par + * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + * element size. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * - ::WidthInBytes and ::Height specify the width (in bytes) and height of + * the 2D copy being performed. + * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + * ::srcXInBytes, and ::dstPitch must be greater than or equal to + * ::WidthInBytes + dstXInBytes. + * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + * ::srcXInBytes, and ::dstPitch must be greater than or equal to + * ::WidthInBytes + dstXInBytes. + * - If specified, ::srcHeight must be greater than or equal to ::Height + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY. + * + * \par + * ::cuMemcpy2D() returns an error if any pitch is greater than the maximum + * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back + * pitches that always work with ::cuMemcpy2D(). On intra-device memory copies + * (device to device, CUDA array to device, CUDA array to CUDA array), + * ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch(). + * ::cuMemcpy2DUnaligned() does not have this restriction, but may run + * significantly slower in the cases where ::cuMemcpy2D() would have returned culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * an error code. + * + * ::cuMemcpy2DAsync() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p hStream argument. It only works on + * page-locked host memory and returns an error if a pointer to pageable + * memory is passed as input. + * + * \param pCopy - Parameters for the memory copy + * \param hStream - Stream identifier + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream); + +/** + * \brief Copies memory for 3D arrays + * + * Perform a 3D memory copy according to the parameters specified in + * \p pCopy. The ::CUDA_MEMCPY3D structure is defined as: + * + * \code culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + typedef struct CUDA_MEMCPY3D_st { + + unsigned int srcXInBytes, srcY, srcZ; + unsigned int srcLOD; + CUmemorytype srcMemoryType; + const void *srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + unsigned int srcPitch; // ignored when src is array + unsigned int srcHeight; // ignored when src is array; may be 0 if Depth==1 culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + + unsigned int dstXInBytes, dstY, dstZ; + unsigned int dstLOD; + CUmemorytype dstMemoryType; + void *dstHost; + CUdeviceptr dstDevice; + CUarray dstArray; + unsigned int dstPitch; // ignored when dst is array + unsigned int dstHeight; // ignored when dst is array; may be 0 if Depth==1 + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + unsigned int WidthInBytes; + unsigned int Height; + unsigned int Depth; + } CUDA_MEMCPY3D; + * \endcode + * where: + * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + * source and destination, respectively; ::CUmemorytype_enum is defined as: + * + * \code culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 0x01, + CU_MEMORYTYPE_DEVICE = 0x02, + CU_MEMORYTYPE_ARRAY = 0x03, + CU_MEMORYTYPE_UNIFIED = 0x04 + } CUmemorytype; + * \endcode + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::srcArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and + * ::srcHeight specify the (host) base address of the source data, the bytes + * per row, and the height of each 2D slice of the 3D array. ::srcArray is + * ignored. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch and + * ::srcHeight specify the (device) base address of the source data, the bytes + * per row, and the height of each 2D slice of the 3D array. ::srcArray is + * ignored. + * + * \par + * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + * handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::srcHeight are ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + * specify the (unified virtual address space) base address of the source data + * and the bytes per row to apply. ::dstArray is ignored. + * This value may be used only if unified addressing is supported in the calling + * context. + * + * \par culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch + * specify the (host) base address of the destination data, the bytes per row, + * and the height of each 2D slice of the 3D array. ::dstArray is ignored. + * + * \par + * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + * specify the (device) base address of the destination data, the bytes per + * row, and the height of each 2D slice of the 3D array. ::dstArray is ignored. + * + * \par culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the + * handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and + * ::dstHeight are ignored. + * + * - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the source + * data for the copy. + * + * \par + * For host pointers, the starting address is + * \code culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes); + * \endcode + * + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes; + * \endcode + * + * \par culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array + * element size. + * + * - dstXInBytes, ::dstY and ::dstZ specify the base address of the + * destination data for the copy. + * + * \par + * For host pointers, the base address is + * \code + void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \endcode + * + * \par + * For device pointers, the starting address is + * \code + CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes; + * \endcode + * + * \par + * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * element size. + * + * - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), height + * and depth of the 3D copy being performed. + * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + * ::srcXInBytes, and ::dstPitch must be greater than or equal to + * ::WidthInBytes + dstXInBytes. + * - If specified, ::srcHeight must be greater than or equal to ::Height + + * ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \par + * ::cuMemcpy3D() returns an error if any pitch is greater than the maximum + * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). + * + * ::cuMemcpy3DAsync() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p hStream argument. It only works on + * page-locked host memory and returns an error if a pointer to pageable + * memory is passed as input. + * + * The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must be culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * set to 0. + * + * \param pCopy - Parameters for the memory copy + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream); +#endif /* __CUDA_API_VERSION >= 3020 */ + +#if __CUDA_API_VERSION >= 4000 culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +/** + * \brief Copies memory between contexts asynchronously. + * + * Perform a 3D memory copy according to the parameters specified in + * \p pCopy. See the definition of the ::CUDA_MEMCPY3D_PEER structure + * for documentation of its parameters. + * + * \param pCopy - Parameters for the memory copy + * \param hStream - Stream identifier + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync, + * ::cuMemcpy3DPeerAsync culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, CUstream hStream); +#endif /* __CUDA_API_VERSION >= 4000 */ + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Initializes device memory + * + * Sets the memory range of \p N 8-bit values to the specified value + * \p uc. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * Note that this function is asynchronous with respect to the host unless + * \p dstDevice refers to pinned host memory. + * + * \param dstDevice - Destination device pointer + * \param uc - Value to set + * \param N - Number of elements + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +/** + * \brief Initializes device memory + * + * Sets the memory range of \p N 16-bit values to the specified value + * \p us. The \p dstDevice pointer must be two byte aligned. + * + * Note that this function is asynchronous with respect to the host unless + * \p dstDevice refers to pinned host memory. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \param dstDevice - Destination device pointer + * \param us - Value to set + * \param N - Number of elements + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N); + +/** + * \brief Initializes device memory + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * Sets the memory range of \p N 32-bit values to the specified value + * \p ui. The \p dstDevice pointer must be four byte aligned. + * + * Note that this function is asynchronous with respect to the host unless + * \p dstDevice refers to pinned host memory. + * + * \param dstDevice - Destination device pointer + * \param ui - Value to set + * \param N - Number of elements + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32Async culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N); + +/** + * \brief Initializes device memory + * + * Sets the 2D memory range of \p Width 8-bit values to the specified value + * \p uc. \p Height specifies the number of rows to set, and \p dstPitch + * specifies the number of bytes between each row. This function performs + * fastest when the pitch is one that has been passed back by culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemAllocPitch(). + * + * Note that this function is asynchronous with respect to the host unless + * \p dstDevice refers to pinned host memory. + * + * \param dstDevice - Destination device pointer + * \param dstPitch - Pitch of destination device pointer + * \param uc - Value to set + * \param Width - Width of row + * \param Height - Number of rows culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height); + +/** + * \brief Initializes device memory + * + * Sets the 2D memory range of \p Width 16-bit values to the specified value + * \p us. \p Height specifies the number of rows to set, and \p dstPitch + * specifies the number of bytes between each row. The \p dstDevice pointer culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * and \p dstPitch offset must be two byte aligned. This function performs + * fastest when the pitch is one that has been passed back by + * ::cuMemAllocPitch(). + * + * Note that this function is asynchronous with respect to the host unless + * \p dstDevice refers to pinned host memory. + * + * \param dstDevice - Destination device pointer + * \param dstPitch - Pitch of destination device pointer + * \param us - Value to set culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \param Width - Width of row + * \param Height - Number of rows + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height); + +/** + * \brief Initializes device memory + * + * Sets the 2D memory range of \p Width 32-bit values to the specified value culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \p ui. \p Height specifies the number of rows to set, and \p dstPitch + * specifies the number of bytes between each row. The \p dstDevice pointer + * and \p dstPitch offset must be four byte aligned. This function performs + * fastest when the pitch is one that has been passed back by + * ::cuMemAllocPitch(). + * + * Note that this function is asynchronous with respect to the host unless + * \p dstDevice refers to pinned host memory. + * + * \param dstDevice - Destination device pointer culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \param dstPitch - Pitch of destination device pointer + * \param ui - Value to set + * \param Width - Width of row + * \param Height - Number of rows + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height); + +/** + * \brief Sets device memory culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * Sets the memory range of \p N 8-bit values to the specified value + * \p uc. + * + * ::cuMemsetD8Async() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p stream argument. + * + * \param dstDevice - Destination device pointer + * \param uc - Value to set + * \param N - Number of elements culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream); + +/** + * \brief Sets device memory + * + * Sets the memory range of \p N 16-bit values to the specified value + * \p us. The \p dstDevice pointer must be two byte aligned. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * ::cuMemsetD16Async() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p stream argument. + * + * \param dstDevice - Destination device pointer + * \param us - Value to set + * \param N - Number of elements + * \param hStream - Stream identifier + * + * \return culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, + * ::cuMemsetD32, ::cuMemsetD32Async + */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream); + +/** + * \brief Sets device memory + * + * Sets the memory range of \p N 32-bit values to the specified value + * \p ui. The \p dstDevice pointer must be four byte aligned. + * + * ::cuMemsetD32Async() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p stream argument. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \param dstDevice - Destination device pointer + * \param ui - Value to set + * \param N - Number of elements + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, ::cuMemsetD32 + */ +CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream); + +/** + * \brief Sets device memory culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * Sets the 2D memory range of \p Width 8-bit values to the specified value + * \p uc. \p Height specifies the number of rows to set, and \p dstPitch + * specifies the number of bytes between each row. This function performs + * fastest when the pitch is one that has been passed back by + * ::cuMemAllocPitch(). + * + * ::cuMemsetD2D8Async() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p stream argument. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \param dstDevice - Destination device pointer + * \param dstPitch - Pitch of destination device pointer + * \param uc - Value to set + * \param Width - Width of row + * \param Height - Number of rows + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream); + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** + * \brief Sets device memory + * + * Sets the 2D memory range of \p Width 16-bit values to the specified value + * \p us. \p Height specifies the number of rows to set, and \p dstPitch + * specifies the number of bytes between each row. The \p dstDevice pointer + * and \p dstPitch offset must be two byte aligned. This function performs + * fastest when the pitch is one that has been passed back by + * ::cuMemAllocPitch(). + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuMemsetD2D16Async() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p stream argument. + * + * \param dstDevice - Destination device pointer + * \param dstPitch - Pitch of destination device pointer + * \param us - Value to set + * \param Width - Width of row + * \param Height - Number of rows + * \param hStream - Stream identifier + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + * ::cuMemsetD2D16, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream); + +/** + * \brief Sets device memory + * + * Sets the 2D memory range of \p Width 32-bit values to the specified value + * \p ui. \p Height specifies the number of rows to set, and \p dstPitch + * specifies the number of bytes between each row. The \p dstDevice pointer + * and \p dstPitch offset must be four byte aligned. This function performs culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * fastest when the pitch is one that has been passed back by + * ::cuMemAllocPitch(). + * + * ::cuMemsetD2D32Async() is asynchronous and can optionally be associated to a + * stream by passing a non-zero \p stream argument. + * + * \param dstDevice - Destination device pointer + * \param dstPitch - Pitch of destination device pointer + * \param ui - Value to set + * \param Width - Width of row culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \param Height - Number of rows + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, + * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + * ::cuMemsetD32, ::cuMemsetD32Async + */ +CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream); + +/** + * \brief Creates a 1D or 2D CUDA array + * + * Creates a CUDA array according to the ::CUDA_ARRAY_DESCRIPTOR structure culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \p pAllocateArray and returns a handle to the new CUDA array in \p *pHandle. + * The ::CUDA_ARRAY_DESCRIPTOR is defined as: + * + * \code + typedef struct { + unsigned int Width; + unsigned int Height; + CUarray_format Format; + unsigned int NumChannels; + } CUDA_ARRAY_DESCRIPTOR; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \endcode + * where: + * + * - \p Width, and \p Height are the width, and height of the CUDA array (in + * elements); the CUDA array is one-dimensional if height is 0, two-dimensional + * otherwise; + * - ::Format specifies the format of the elements; ::CUarray_format is + * defined as: + * \code + typedef enum CUarray_format_enum { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, + CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, + CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, + CU_AD_FORMAT_SIGNED_INT8 = 0x08, + CU_AD_FORMAT_SIGNED_INT16 = 0x09, + CU_AD_FORMAT_SIGNED_INT32 = 0x0a, + CU_AD_FORMAT_HALF = 0x10, + CU_AD_FORMAT_FLOAT = 0x20 + } CUarray_format; + * \endcode culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * - \p NumChannels specifies the number of packed components per CUDA array + * element; it may be 1, 2, or 4; + * + * Here are examples of CUDA array descriptions: + * + * Description for a CUDA array of 2048 floats: + * \code + CUDA_ARRAY_DESCRIPTOR desc; + desc.Format = CU_AD_FORMAT_FLOAT; + desc.NumChannels = 1; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + desc.Width = 2048; + desc.Height = 1; + * \endcode + * + * Description for a 64 x 64 CUDA array of floats: + * \code + CUDA_ARRAY_DESCRIPTOR desc; + desc.Format = CU_AD_FORMAT_FLOAT; + desc.NumChannels = 1; + desc.Width = 64; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + desc.Height = 64; + * \endcode + * + * Description for a \p width x \p height CUDA array of 64-bit, 4x16-bit + * float16's: + * \code + CUDA_ARRAY_DESCRIPTOR desc; + desc.FormatFlags = CU_AD_FORMAT_HALF; + desc.NumChannels = 4; + desc.Width = width; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + desc.Height = height; + * \endcode + * + * Description for a \p width x \p height CUDA array of 16-bit elements, each + * of which is two 8-bit unsigned chars: + * \code + CUDA_ARRAY_DESCRIPTOR arrayDesc; + desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8; + desc.NumChannels = 2; + desc.Width = width; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + desc.Height = height; + * \endcode + * + * \param pHandle - Returned array + * \param pAllocateArray - Array descriptor + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY, + * ::CUDA_ERROR_UNKNOWN + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray); + +/** culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \brief Get a 1D or 2D CUDA array descriptor + * + * Returns in \p *pArrayDescriptor a descriptor containing information on the + * format and dimensions of the CUDA array \p hArray. It is useful for + * subroutines that have been passed a CUDA array, but need to know the CUDA + * array parameters for validation or other purposes. + * + * \param pArrayDescriptor - Returned array descriptor + * \param hArray - Array to get descriptor of + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_HANDLE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuArrayDestroy, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray); +#endif /* __CUDA_API_VERSION >= 3020 */ + + +/** + * \brief Destroys a CUDA array + * + * Destroys the CUDA array \p hArray. + * + * \param hArray - Array to destroy culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_ARRAY_IS_MAPPED + * \notefnerr + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuArrayDestroy(CUarray hArray); + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Creates a 3D CUDA array + * + * Creates a CUDA array according to the ::CUDA_ARRAY3D_DESCRIPTOR structure + * \p pAllocateArray and returns a handle to the new CUDA array in \p *pHandle. + * The ::CUDA_ARRAY3D_DESCRIPTOR is defined as: culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \code + typedef struct { + unsigned int Width; + unsigned int Height; + unsigned int Depth; + CUarray_format Format; + unsigned int NumChannels; + unsigned int Flags; + } CUDA_ARRAY3D_DESCRIPTOR; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \endcode + * where: + * + * - \p Width, \p Height, and \p Depth are the width, height, and depth of the + * CUDA array (in elements); the following types of CUDA arrays can be allocated: + * - A 1D array is allocated if \p Height and \p Depth extents are both zero. + * - A 2D array is allocated if only \p Depth extent is zero. + * - A 3D array is allocated if all three extents are non-zero. + * - A 1D layered CUDA array is allocated if only \p Height is zero and the + * ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * of layers is determined by the depth extent. + * - A 2D layered CUDA array is allocated if all three extents are non-zero and + * the ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number + * of layers is determined by the depth extent. + * - A cubemap CUDA array is allocated if all three extents are non-zero and the + * ::CUDA_ARRAY3D_CUBEMAP flag is set. \p Width must be equal to \p Height, and + * \p Depth must be six. A cubemap is a special type of 2D layered CUDA array, + * where the six layers represent the six faces of a cube. The order of the six + * layers in memory is the same as that listed in ::CUarray_cubemap_face. + * - A cubemap layered CUDA array is allocated if all three extents are non-zero, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * and both, ::CUDA_ARRAY3D_CUBEMAP and ::CUDA_ARRAY3D_LAYERED flags are set. + * \p Width must be equal to \p Height, and \p Depth must be a multiple of six. + * A cubemap layered CUDA array is a special type of 2D layered CUDA array that + * consists of a collection of cubemaps. The first six layers represent the first + * cubemap, the next six layers form the second cubemap, and so on. + * + * - ::Format specifies the format of the elements; ::CUarray_format is + * defined as: + * \code + typedef enum CUarray_format_enum { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, + CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, + CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, + CU_AD_FORMAT_SIGNED_INT8 = 0x08, + CU_AD_FORMAT_SIGNED_INT16 = 0x09, + CU_AD_FORMAT_SIGNED_INT32 = 0x0a, + CU_AD_FORMAT_HALF = 0x10, + CU_AD_FORMAT_FLOAT = 0x20 + } CUarray_format; + * \endcode culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * - \p NumChannels specifies the number of packed components per CUDA array + * element; it may be 1, 2, or 4; + * + * - ::Flags may be set to + * - ::CUDA_ARRAY3D_LAYERED to enable creation of layered CUDA arrays. If this flag is set, + * \p Depth specifies the number of layers, not the depth of a 3D array. + * - ::CUDA_ARRAY3D_SURFACE_LDST to enable surface references to be bound to the CUDA array. + * If this flag is not set, ::cuSurfRefSetArray will fail when attempting to bind the CUDA array + * to a surface reference. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * - ::CUDA_ARRAY3D_CUBEMAP to enable creation of cubemaps. If this flag is set, \p Width must be + * equal to \p Height, and \p Depth must be six. If the ::CUDA_ARRAY3D_LAYERED flag is also set, + * then \p Depth must be a multiple of six. + * - ::CUDA_ARRAY3D_TEXTURE_GATHER to indicate that the CUDA array will be used for texture gather. + * Texture gather can only be performed on 2D CUDA arrays. + * + * \p Width, \p Height and \p Depth must meet certain size requirements as listed in the following table. + * All values are specified in elements. Note that for brevity's sake, the full name of the device attribute + * is not specified. For ex., TEXTURE1D_WIDTH refers to the device attribute + * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * Note that 2D CUDA arrays have different size requirements if the ::CUDA_ARRAY3D_TEXTURE_GATHER flag + * is set. \p Width and \p Height must not be greater than ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH + * and ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT respectively, in that case. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + *
CUDA array typeValid extents that must always be met
{(width range in elements), (height range), + * (depth range)}
Valid extents with CUDA_ARRAY3D_SURFACE_LDST set
culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * {(width range in elements), (height range), (depth range)}
1D{ (1,TEXTURE1D_WIDTH), 0, 0 }{ (1,SURFACE1D_WIDTH), 0, 0 }
2D{ (1,TEXTURE2D_WIDTH), (1,TEXTURE2D_HEIGHT), 0 }{ (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }
3D{ (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } + *
OR
{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * (1,TEXTURE3D_DEPTH_ALTERNATE) }
{ (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT), + * (1,SURFACE3D_DEPTH) }
1D Layered{ (1,TEXTURE1D_LAYERED_WIDTH), 0, + * (1,TEXTURE1D_LAYERED_LAYERS) }{ (1,SURFACE1D_LAYERED_WIDTH), 0, + * (1,SURFACE1D_LAYERED_LAYERS) }
2D Layered{ (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * (1,TEXTURE2D_LAYERED_LAYERS) }{ (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), + * (1,SURFACE2D_LAYERED_LAYERS) }
Cubemap{ (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }{ (1,SURFACECUBEMAP_WIDTH), + * (1,SURFACECUBEMAP_WIDTH), 6 }
Cubemap Layered{ (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), + * (1,TEXTURECUBEMAP_LAYERED_LAYERS) }{ (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), + * (1,SURFACECUBEMAP_LAYERED_LAYERS) }
+ * + * Here are examples of CUDA array descriptions: + * + * Description for a CUDA array of 2048 floats: + * \code + CUDA_ARRAY3D_DESCRIPTOR desc; + desc.Format = CU_AD_FORMAT_FLOAT; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + desc.NumChannels = 1; + desc.Width = 2048; + desc.Height = 0; + desc.Depth = 0; + * \endcode + * + * Description for a 64 x 64 CUDA array of floats: + * \code + CUDA_ARRAY3D_DESCRIPTOR desc; + desc.Format = CU_AD_FORMAT_FLOAT; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + desc.NumChannels = 1; + desc.Width = 64; + desc.Height = 64; + desc.Depth = 0; + * \endcode + * + * Description for a \p width x \p height x \p depth CUDA array of 64-bit, + * 4x16-bit float16's: + * \code + CUDA_ARRAY3D_DESCRIPTOR desc; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + desc.FormatFlags = CU_AD_FORMAT_HALF; + desc.NumChannels = 4; + desc.Width = width; + desc.Height = height; + desc.Depth = depth; + * \endcode + * + * \param pHandle - Returned array + * \param pAllocateArray - 3D array descriptor + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY, + * ::CUDA_ERROR_UNKNOWN + * \notefnerr + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \sa ::cuArray3DGetDescriptor, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); + +/** + * \brief Get a 3D CUDA array descriptor + * + * Returns in \p *pArrayDescriptor a descriptor containing information on the + * format and dimensions of the CUDA array \p hArray. It is useful for + * subroutines that have been passed a CUDA array, but need to know the CUDA + * array parameters for validation or other purposes. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * This function may be called on 1D and 2D arrays, in which case the \p Height + * and/or \p Depth members of the descriptor struct will be set to 0. + * + * \param pArrayDescriptor - Returned 3D array descriptor + * \param hArray - 3D array to get descriptor of + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_HANDLE + * \notefnerr + * + * \sa ::cuArray3DCreate, ::cuArrayCreate, + * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 + */ +CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray); +#endif /* __CUDA_API_VERSION >= 3020 */ + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +/** @} */ /* END CUDA_MEM */ + +/** + * \defgroup CUDA_UNIFIED Unified Addressing + * + * This section describes the unified addressing functions of the + * low-level CUDA driver application programming interface. + * + * @{ + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \section CUDA_UNIFIED_overview Overview + * + * CUDA devices can share a unified address space with the host. + * For these devices there is no distinction between a device + * pointer and a host pointer -- the same pointer value may be + * used to access memory from the host program and from a kernel + * running on the device (with exceptions enumerated below). + * + * \section CUDA_UNIFIED_support Supported Platforms + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Whether or not a device supports unified addressing may be + * queried by calling ::cuDeviceGetAttribute() with the device + * attribute ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. + * + * Unified addressing is automatically enabled in 64-bit processes + * on devices with compute capability greater than or equal to 2.0. + * + * Unified addressing is not yet supported on Windows Vista or + * Windows 7 for devices that do not use the TCC driver model. + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \section CUDA_UNIFIED_lookup Looking Up Information from Pointer Values + * + * It is possible to look up information about the memory which backs a + * pointer value. For instance, one may want to know if a pointer points + * to host or device memory. As another example, in the case of device + * memory, one may want to know on which CUDA device the memory + * resides. These properties may be queried using the function + * ::cuPointerGetAttribute() + * + * Since pointers are unique, it is not necessary to specify information culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * about the pointers specified to the various copy functions in the + * CUDA API. The function ::cuMemcpy() may be used to perform a copy + * between two pointers, ignoring whether they point to host or device + * memory (making ::cuMemcpyHtoD(), ::cuMemcpyDtoD(), and ::cuMemcpyDtoH() + * unnecessary for devices supporting unified addressing). For + * multidimensional copies, the memory type ::CU_MEMORYTYPE_UNIFIED may be + * used to specify that the CUDA driver should infer the location of the + * pointer from its value. + * + * \section CUDA_UNIFIED_automaphost Automatic Mapping of Host Allocated Host Memory culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * All host memory allocated in all contexts using ::cuMemAllocHost() and + * ::cuMemHostAlloc() is always directly accessible from all contexts on + * all devices that support unified addressing. This is the case regardless + * of whether or not the flags ::CU_MEMHOSTALLOC_PORTABLE and + * ::CU_MEMHOSTALLOC_DEVICEMAP are specified. + * + * The pointer value through which allocated host memory may be accessed + * in kernels on all devices that support unified addressing is the same + * as the pointer value through which that memory is accessed on the host, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * so it is not necessary to call ::cuMemHostGetDevicePointer() to get the device + * pointer for these allocations. + * + * Note that this is not the case for memory allocated using the flag + * ::CU_MEMHOSTALLOC_WRITECOMBINED, as discussed below. + * + * \section CUDA_UNIFIED_autopeerregister Automatic Registration of Peer Memory + * + * Upon enabling direct access from a context that supports unified addressing + * to another peer context that supports unified addressing using culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuCtxEnablePeerAccess() all memory allocated in the peer context using + * ::cuMemAlloc() and ::cuMemAllocPitch() will immediately be accessible + * by the current context. The device pointer value through + * which any peer memory may be accessed in the current context + * is the same pointer value through which that memory may be + * accessed in the peer context. + * + * \section CUDA_UNIFIED_exceptions Exceptions, Disjoint Addressing + * + * Not all memory may be accessed on devices through the same pointer culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * value through which they are accessed on the host. These exceptions + * are host memory registered using ::cuMemHostRegister() and host memory + * allocated using the flag ::CU_MEMHOSTALLOC_WRITECOMBINED. For these + * exceptions, there exists a distinct host and device address for the + * memory. The device address is guaranteed to not overlap any valid host + * pointer range and is guaranteed to have the same value across all + * contexts that support unified addressing. + * + * This device address may be queried using ::cuMemHostGetDevicePointer() + * when a context using unified addressing is current. Either the host culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * or the unified device pointer value may be used to refer to this memory + * through ::cuMemcpy() and similar functions using the + * ::CU_MEMORYTYPE_UNIFIED memory type. + * + */ + +#if __CUDA_API_VERSION >= 4000 +/** + * \brief Returns information about a pointer + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * The supported attributes are: + * + * - ::CU_POINTER_ATTRIBUTE_CONTEXT: + * + * Returns in \p *data the ::CUcontext in which \p ptr was allocated or + * registered. + * The type of \p data must be ::CUcontext *. + * + * If \p ptr was not allocated by, mapped by, or registered with + * a ::CUcontext which uses unified virtual addressing then culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE is returned. + * + * - ::CU_POINTER_ATTRIBUTE_MEMORY_TYPE: + * + * Returns in \p *data the physical memory type of the memory that + * \p ptr addresses as a ::CUmemorytype enumerated value. + * The type of \p data must be unsigned int. + * + * If \p ptr addresses device memory then \p *data is set to + * ::CU_MEMORYTYPE_DEVICE. The particular ::CUdevice on which the culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * memory resides is the ::CUdevice of the ::CUcontext returned by the + * ::CU_POINTER_ATTRIBUTE_CONTEXT attribute of \p ptr. + * + * If \p ptr addresses host memory then \p *data is set to + * ::CU_MEMORYTYPE_HOST. + * + * If \p ptr was not allocated by, mapped by, or registered with + * a ::CUcontext which uses unified virtual addressing then + * ::CUDA_ERROR_INVALID_VALUE is returned. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * If the current ::CUcontext does not support unified virtual + * addressing then ::CUDA_ERROR_INVALID_CONTEXT is returned. + * + * - ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER: + * + * Returns in \p *data the device pointer value through which + * \p ptr may be accessed by kernels running in the current + * ::CUcontext. + * The type of \p data must be CUdeviceptr *. + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * If there exists no device pointer value through which + * kernels running in the current ::CUcontext may access + * \p ptr then ::CUDA_ERROR_INVALID_VALUE is returned. + * + * If there is no current ::CUcontext then + * ::CUDA_ERROR_INVALID_CONTEXT is returned. + * + * Except in the exceptional disjoint addressing cases discussed + * below, the value returned in \p *data will equal the input + * value \p ptr. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * - ::CU_POINTER_ATTRIBUTE_HOST_POINTER: + * + * Returns in \p *data the host pointer value through which + * \p ptr may be accessed by by the host program. + * The type of \p data must be void **. + * If there exists no host pointer value through which + * the host program may directly access \p ptr then + * ::CUDA_ERROR_INVALID_VALUE is returned. + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Except in the exceptional disjoint addressing cases discussed + * below, the value returned in \p *data will equal the input + * value \p ptr. + * + * + * \par + * + * Note that for most allocations in the unified virtual address space + * the host and device pointer for accessing the allocation will be the + * same. The exceptions to this are culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * - user memory registered using ::cuMemHostRegister + * - host memory allocated using ::cuMemHostAlloc with the + * ::CU_MEMHOSTALLOC_WRITECOMBINED flag + * For these types of allocation there will exist separate, disjoint host + * and device addresses for accessing the allocation. In particular + * - The host address will correspond to an invalid unmapped device address + * (which will result in an exception if accessed from the device) + * - The device address will correspond to an invalid unmapped host address + * (which will result in an exception if accessed from the host). + * For these types of allocations, querying ::CU_POINTER_ATTRIBUTE_HOST_POINTER culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * and ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER may be used to retrieve the host + * and device addresses from either address. + * + * \param data - Returned pointer attribute value + * \param attribute - Pointer attribute to query + * \param ptr - Pointer + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_DEVICE + * \notefnerr + * + * \sa ::cuMemAlloc, + * ::cuMemFree, + * ::cuMemAllocHost, + * ::cuMemFreeHost, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuMemHostAlloc, + * ::cuMemHostRegister, + * ::cuMemHostUnregister + */ +CUresult CUDAAPI cuPointerGetAttribute(void *data, CUpointer_attribute attribute, CUdeviceptr ptr); +#endif /* __CUDA_API_VERSION >= 4000 */ + +/** @} */ /* END CUDA_UNIFIED */ + +/** culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \defgroup CUDA_STREAM Stream Management + * + * This section describes the stream management functions of the low-level CUDA + * driver application programming interface. + * + * @{ + */ + +/** + * \brief Create a stream culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * Creates a stream and returns a handle in \p phStream. \p Flags is required + * to be 0. + * + * \param phStream - Returned newly created stream + * \param Flags - Parameters for stream creation (must be 0) + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY + * \notefnerr + * + * \sa ::cuStreamDestroy, + * ::cuStreamWaitEvent, + * ::cuStreamQuery, + * ::cuStreamSynchronize culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags); + +/** + * \brief Make a compute stream wait on an event + * + * Makes all future work submitted to \p hStream wait until \p hEvent + * reports completion before beginning execution. This synchronization + * will be performed efficiently on the device. The event \p hEvent may + * be from a different context than \p hStream, in which case this function culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * will perform cross-device synchronization. + * + * The stream \p hStream will wait only for the completion of the most recent + * host call to ::cuEventRecord() on \p hEvent. Once this call has returned, + * any functions (including ::cuEventRecord() and ::cuEventDestroy()) may be + * called on \p hEvent again, and subsequent calls will not have any + * effect on \p hStream. + * + * If \p hStream is 0 (the NULL stream) any future work submitted in any stream + * will wait for \p hEvent to complete before beginning execution. This culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * effectively creates a barrier for all future work submitted to the context. + * + * If ::cuEventRecord() has not been called on \p hEvent, this call acts as if + * the record has already completed, and so is a functional no-op. + * + * \param hStream - Stream to wait + * \param hEvent - Event to wait on (may not be NULL) + * \param Flags - Parameters for the operation (must be 0) + * + * \return culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * \notefnerr + * + * \sa ::cuStreamCreate, + * ::cuEventRecord, + * ::cuStreamQuery, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuStreamSynchronize, + * ::cuStreamDestroy + */ +CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags); + +/** + * \brief Determine status of a compute stream + * + * Returns ::CUDA_SUCCESS if all operations in the stream specified by + * \p hStream have completed, or ::CUDA_ERROR_NOT_READY if not. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \param hStream - Stream to query status of + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_NOT_READY culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuStreamCreate, + * ::cuStreamWaitEvent, + * ::cuStreamDestroy, + * ::cuStreamSynchronize + */ +CUresult CUDAAPI cuStreamQuery(CUstream hStream); + +/** culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \brief Wait until a stream's tasks are completed + * + * Waits until the device has completed all operations in the stream specified + * by \p hStream. If the context was created with the + * ::CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will block until the + * stream is finished with all of its tasks. + * + * \param hStream - Stream to wait for + * + * \return culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE + * \notefnerr + * + * \sa ::cuStreamCreate, + * ::cuStreamDestroy, + * ::cuStreamWaitEvent, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuStreamQuery + */ +CUresult CUDAAPI cuStreamSynchronize(CUstream hStream); + +#if __CUDA_API_VERSION >= 4000 +/** + * \brief Destroys a stream + * + * Destroys the stream specified by \p hStream. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * In case the device is still doing work in the stream \p hStream + * when ::cuStreamDestroy() is called, the function will return immediately + * and the resources associated with \p hStream will be released automatically + * once the device has completed all work in \p hStream. + * + * \param hStream - Stream to destroy + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuStreamCreate, + * ::cuStreamWaitEvent, + * ::cuStreamQuery, + * ::cuStreamSynchronize + */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +CUresult CUDAAPI cuStreamDestroy(CUstream hStream); +#endif /* __CUDA_API_VERSION >= 4000 */ + +/** @} */ /* END CUDA_STREAM */ + + +/** + * \defgroup CUDA_EVENT Event Management + * + * This section describes the event management functions of the low-level CUDA culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * driver application programming interface. + * + * @{ + */ + +/** + * \brief Creates an event + * + * Creates an event *phEvent with the flags specified via \p Flags. Valid flags + * include: culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * - ::CU_EVENT_DEFAULT: Default event creation flag. + * - ::CU_EVENT_BLOCKING_SYNC: Specifies that the created event should use blocking + * synchronization. A CPU thread that uses ::cuEventSynchronize() to wait on + * an event created with this flag will block until the event has actually + * been recorded. + * - ::CU_EVENT_DISABLE_TIMING: Specifies that the created event does not need + * to record timing data. Events created with this flag specified and + * the ::CU_EVENT_BLOCKING_SYNC flag not specified will provide the best + * performance when used with ::cuStreamWaitEvent() and ::cuEventQuery(). + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \param phEvent - Returns newly created event + * \param Flags - Event creation flags + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_OUT_OF_MEMORY culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \notefnerr + * + * \sa + * ::cuEventRecord, + * ::cuEventQuery, + * ::cuEventSynchronize, + * ::cuEventDestroy, + * ::cuEventElapsedTime + */ +CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +/** + * \brief Records an event + * + * Records an event. If \p hStream is non-zero, the event is recorded after all + * preceding operations in \p hStream have been completed; otherwise, it is + * recorded after all preceding operations in the CUDA context have been + * completed. Since operation is asynchronous, ::cuEventQuery and/or + * ::cuEventSynchronize() must be used to determine when the event has actually + * been recorded. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * If ::cuEventRecord() has previously been called on \p hEvent, then this + * call will overwrite any existing state in \p hEvent. Any subsequent calls + * which examine the status of \p hEvent will only examine the completion of + * this most recent call to ::cuEventRecord(). + * + * It is necessary that \p hEvent and \p hStream be created on the same context. + * + * \param hEvent - Event to record + * \param hStream - Stream to record event for culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \sa ::cuEventCreate, + * ::cuEventQuery, + * ::cuEventSynchronize, + * ::cuStreamWaitEvent, + * ::cuEventDestroy, + * ::cuEventElapsedTime + */ +CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream); + +/** culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \brief Queries an event's status + * + * Query the status of all device work preceding the most recent + * call to ::cuEventRecord() (in the appropriate compute streams, + * as specified by the arguments to ::cuEventRecord()). + * + * If this work has successfully been completed by the device, or if + * ::cuEventRecord() has not been called on \p hEvent, then ::CUDA_SUCCESS is + * returned. If this work has not yet been completed by the device then + * ::CUDA_ERROR_NOT_READY is returned. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \param hEvent - Event to query + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_NOT_READY culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuEventCreate, + * ::cuEventRecord, + * ::cuEventSynchronize, + * ::cuEventDestroy, + * ::cuEventElapsedTime + */ +CUresult CUDAAPI cuEventQuery(CUevent hEvent); + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +/** + * \brief Waits for an event to complete + * + * Wait until the completion of all device work preceding the most recent + * call to ::cuEventRecord() (in the appropriate compute streams, as specified + * by the arguments to ::cuEventRecord()). + * + * If ::cuEventRecord() has not been called on \p hEvent, ::CUDA_SUCCESS is + * returned immediately. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Waiting for an event that was created with the ::CU_EVENT_BLOCKING_SYNC + * flag will cause the calling CPU thread to block until the event has + * been completed by the device. If the ::CU_EVENT_BLOCKING_SYNC flag has + * not been set, then the CPU thread will busy-wait until the event has + * been completed by the device. + * + * \param hEvent - Event to wait for + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE + * \notefnerr + * + * \sa ::cuEventCreate, + * ::cuEventRecord, + * ::cuEventQuery, + * ::cuEventDestroy, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuEventElapsedTime + */ +CUresult CUDAAPI cuEventSynchronize(CUevent hEvent); + +#if __CUDA_API_VERSION >= 4000 +/** + * \brief Destroys an event + * + * Destroys the event specified by \p hEvent. + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * In case \p hEvent has been recorded but has not yet been completed + * when ::cuEventDestroy() is called, the function will return immediately and + * the resources associated with \p hEvent will be released automatically once + * the device has completed \p hEvent. + * + * \param hEvent - Event to destroy + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE + * \notefnerr + * + * \sa ::cuEventCreate, + * ::cuEventRecord, + * ::cuEventQuery, + * ::cuEventSynchronize, + * ::cuEventElapsedTime culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuEventDestroy(CUevent hEvent); +#endif /* __CUDA_API_VERSION >= 4000 */ + +/** + * \brief Computes the elapsed time between two events + * + * Computes the elapsed time between two events (in milliseconds with a + * resolution of around 0.5 microseconds). + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * If either event was last recorded in a non-NULL stream, the resulting time + * may be greater than expected (even if both used the same stream handle). This + * happens because the ::cuEventRecord() operation takes place asynchronously + * and there is no guarantee that the measured latency is actually just between + * the two events. Any number of other different stream operations could execute + * in between the two measured events, thus altering the timing in a significant + * way. + * + * If ::cuEventRecord() has not been called on either event then + * ::CUDA_ERROR_INVALID_HANDLE is returned. If ::cuEventRecord() has been called culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * on both events but one or both of them has not yet been completed (that is, + * ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY on at least one of the + * events), ::CUDA_ERROR_NOT_READY is returned. If either event was created with + * the ::CU_EVENT_DISABLE_TIMING flag, then this function will return + * ::CUDA_ERROR_INVALID_HANDLE. + * + * \param pMilliseconds - Time between \p hStart and \p hEnd in ms + * \param hStart - Starting event + * \param hEnd - Ending event + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_NOT_READY + * \notefnerr + * + * \sa ::cuEventCreate, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuEventRecord, + * ::cuEventQuery, + * ::cuEventSynchronize, + * ::cuEventDestroy + */ +CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd); + +/** @} */ /* END CUDA_EVENT */ + + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** + * \defgroup CUDA_EXEC Execution Control + * + * This section describes the execution control functions of the low-level CUDA + * driver application programming interface. + * + * @{ + */ + +/** culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \brief Returns information about a function + * + * Returns in \p *pi the integer value of the attribute \p attrib on the kernel + * given by \p hfunc. The supported attributes are: + * - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads + * per block, beyond which a launch of the function would fail. This number + * depends on both the function and the device on which the function is + * currently loaded. + * - ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of + * statically-allocated shared memory per block required by this function. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * This does not include dynamically-allocated shared memory requested by + * the user at runtime. + * - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-allocated + * constant memory required by this function. + * - ::CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of local memory + * used by each thread of this function. + * - ::CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thread + * of this function. + * - ::CU_FUNC_ATTRIBUTE_PTX_VERSION: The PTX virtual architecture version for + * which the function was compiled. This value is the major PTX version * 10 culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + the minor PTX version, so a PTX version 1.3 function would return the + * value 13. Note that this may return the undefined value of 0 for cubins + * compiled prior to CUDA 3.0. + * - ::CU_FUNC_ATTRIBUTE_BINARY_VERSION: The binary architecture version for + * which the function was compiled. This value is the major binary + * version * 10 + the minor binary version, so a binary version 1.3 function + * would return the value 13. Note that this will return a value of 10 for + * legacy cubins that do not have a properly-encoded binary architecture + * version. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \param pi - Returned attribute value + * \param attrib - Attribute requested + * \param hfunc - Function to query attribute of + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuCtxGetCacheConfig, + * ::cuCtxSetCacheConfig, + * ::cuFuncSetCacheConfig, + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc); + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +/** + * \brief Sets the preferred cache configuration for a device function + * + * On devices where the L1 cache and shared memory use the same hardware + * resources, this sets through \p config the preferred cache configuration for + * the device function \p hfunc. This is only a preference. The driver will use + * the requested configuration if possible, but it is free to choose a different + * configuration if required to execute \p hfunc. Any context-wide preference + * set via ::cuCtxSetCacheConfig() will be overridden by this per-function + * setting unless the per-function setting is ::CU_FUNC_CACHE_PREFER_NONE. In culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * that case, the current context-wide setting will be used. + * + * This setting does nothing on devices where the size of the L1 cache and + * shared memory are fixed. + * + * Launching a kernel with a different preference than the most recent + * preference setting may insert a device-side synchronization point. + * + * + * The supported cache configurations are: culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) + * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache + * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory + * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory + * + * \param hfunc - Kernel to configure cache for + * \param config - Requested cache configuration + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT + * \notefnerr + * + * \sa ::cuCtxGetCacheConfig, + * ::cuCtxSetCacheConfig, + * ::cuFuncGetAttribute, + * ::cuLaunchKernel culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + */ +CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config); + +#if __CUDA_API_VERSION >= 4020 +/** + * \brief Sets the shared memory configuration for a device function. + * + * On devices with configurable shared memory banks, this function will + * force all subsequent launches of the specified device function to have + * the given shared memory bank size configuration. On any given launch of the culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * function, the shared memory configuration of the device will be temporarily + * changed if needed to suit the function's preferred configuration. Changes in + * shared memory configuration between subsequent launches of functions, + * may introduce a device side synchronization point. + * + * Any per-function setting of shared memory bank size set via + * ::cuFuncSetSharedMemConfig will override the context wide setting set with + * ::cuCtxSetSharedMemConfig. + * + * Changing the shared memory bank size will not increase shared memory usage culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * or affect occupancy of kernels, but may have major effects on performance. + * Larger bank sizes will allow for greater potential bandwidth to shared memory, + * but will change what kinds of accesses to shared memory will result in bank + * conflicts. + * + * This function will do nothing on devices with fixed shared memory bank size. + * + * The supported bank configurations are: + * - ::CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: use the context's shared memory + * configuration when launching this function. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width to + * be natively four bytes when launching this function. + * - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank width to + * be natively eight bytes when launching this function. + * + * \param hfunc - kernel to be given a shared memory config + * \param config - requested shared memory configuration + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT + * \notefnerr + * + * \sa ::cuCtxGetCacheConfig, + * ::cuCtxSetCacheConfig, + * ::cuCtxGetSharedMemConfig + * ::cuCtxSetSharedMemConfig culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuFuncGetAttribute, + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config); +#endif + +#if __CUDA_API_VERSION >= 4000 +/** + * \brief Launches a CUDA function + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Invokes the kernel \p f on a \p gridDimX x \p gridDimY x \p gridDimZ + * grid of blocks. Each block contains \p blockDimX x \p blockDimY x + * \p blockDimZ threads. + * + * \p sharedMemBytes sets the amount of dynamic shared memory that will be + * available to each thread block. + * + * ::cuLaunchKernel() can optionally be associated to a stream by passing a + * non-zero \p hStream argument. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * Kernel parameters to \p f can be specified in one of two ways: + * + * 1) Kernel parameters can be specified via \p kernelParams. If \p f + * has N parameters, then \p kernelParams needs to be an array of N + * pointers. Each of \p kernelParams[0] through \p kernelParams[N-1] + * must point to a region of memory from which the actual kernel + * parameter will be copied. The number of kernel parameters and their + * offsets and sizes do not need to be specified as that information is + * retrieved directly from the kernel's image. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * 2) Kernel parameters can also be packaged by the application into + * a single buffer that is passed in via the \p extra parameter. + * This places the burden on the application of knowing each kernel + * parameter's size and alignment/padding within the buffer. Here is + * an example of using the \p extra parameter in this manner: + * \code + size_t argBufferSize; + char argBuffer[256]; + + // populate argBuffer and argBufferSize culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + + void *config[] = { + CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer, + CU_LAUNCH_PARAM_BUFFER_SIZE, &argBufferSize, + CU_LAUNCH_PARAM_END + }; + status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL, config); + * \endcode + * + * The \p extra parameter exists to allow ::cuLaunchKernel to take culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * additional less commonly used arguments. \p extra specifies a list of + * names of extra settings and their corresponding values. Each extra + * setting name is immediately followed by the corresponding value. The + * list must be terminated with either NULL or ::CU_LAUNCH_PARAM_END. + * + * - ::CU_LAUNCH_PARAM_END, which indicates the end of the \p extra + * array; + * - ::CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that the next + * value in \p extra will be a pointer to a buffer containing all + * the kernel parameters for launching kernel \p f; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * - ::CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies that the next + * value in \p extra will be a pointer to a size_t containing the + * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER; + * + * The error ::CUDA_ERROR_INVALID_VALUE will be returned if kernel + * parameters are specified with both \p kernelParams and \p extra + * (i.e. both \p kernelParams and \p extra are non-NULL). + * + * Calling ::cuLaunchKernel() sets persistent function state that is + * the same as function state set through the following deprecated APIs: culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * ::cuFuncSetBlockShape() + * ::cuFuncSetSharedSize() + * ::cuParamSetSize() + * ::cuParamSeti() + * ::cuParamSetf() + * ::cuParamSetv() + * + * When the kernel \p f is launched via ::cuLaunchKernel(), the previous + * block shape, shared size and parameter info associated with \p f culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * is overwritten. + * + * Note that to use ::cuLaunchKernel(), the kernel \p f must either have + * been compiled with toolchain version 3.2 or later so that it will + * contain kernel parameter information, or have no kernel parameters. + * If either of these conditions is not met, then ::cuLaunchKernel() will + * return ::CUDA_ERROR_INVALID_IMAGE. + * + * \param f - Kernel to launch + * \param gridDimX - Width of grid in blocks culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \param gridDimY - Height of grid in blocks + * \param gridDimZ - Depth of grid in blocks + * \param blockDimX - X dimension of each thread block + * \param blockDimY - Y dimension of each thread block + * \param blockDimZ - Z dimension of each thread block + * \param sharedMemBytes - Dynamic shared-memory size per thread block in bytes + * \param hStream - Stream identifier + * \param kernelParams - Array of pointers to kernel parameters + * \param extra - Extra options + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_INVALID_IMAGE, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_LAUNCH_FAILED, + * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_LAUNCH_TIMEOUT, + * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + * \notefnerr + * + * \sa ::cuCtxGetCacheConfig, + * ::cuCtxSetCacheConfig, + * ::cuFuncSetCacheConfig, + * ::cuFuncGetAttribute, + */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +CUresult CUDAAPI cuLaunchKernel(CUfunction f, + unsigned int gridDimX, + unsigned int gridDimY, + unsigned int gridDimZ, + unsigned int blockDimX, + unsigned int blockDimY, + unsigned int blockDimZ, + unsigned int sharedMemBytes, + CUstream hStream, + void **kernelParams, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + void **extra); +#endif /* __CUDA_API_VERSION >= 4000 */ + +/** + * \defgroup CUDA_EXEC_DEPRECATED Execution Control [DEPRECATED] + * + * This section describes the deprecated execution control functions of the + * low-level CUDA driver application programming interface. + * + * @{ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + */ + +/** + * \brief Sets the block-dimensions for the function + * + * \deprecated + * + * Specifies the \p x, \p y, and \p z dimensions of the thread blocks that are + * created when the kernel given by \p hfunc is launched. + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param hfunc - Kernel to specify dimensions of + * \param x - X dimension + * \param y - Y dimension + * \param z - Z dimension + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuFuncSetSharedSize, + * ::cuFuncSetCacheConfig, + * ::cuFuncGetAttribute, + * ::cuParamSetSize, + * ::cuParamSeti, + * ::cuParamSetf, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuParamSetv, + * ::cuLaunch, + * ::cuLaunchGrid, + * ::cuLaunchGridAsync, + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z); + +/** + * \brief Sets the dynamic shared-memory size for the function culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \deprecated + * + * Sets through \p bytes the amount of dynamic shared memory that will be + * available to each thread block when the kernel given by \p hfunc is launched. + * + * \param hfunc - Kernel to specify dynamic shared-memory size for + * \param bytes - Dynamic shared-memory size per thread in bytes + * + * \return culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuFuncSetBlockShape, + * ::cuFuncSetCacheConfig, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuFuncGetAttribute, + * ::cuParamSetSize, + * ::cuParamSeti, + * ::cuParamSetf, + * ::cuParamSetv, + * ::cuLaunch, + * ::cuLaunchGrid, + * ::cuLaunchGridAsync, + * ::cuLaunchKernel + */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes); + +/** + * \brief Sets the parameter size for the function + * + * \deprecated + * + * Sets through \p numbytes the total size in bytes needed by the function + * parameters of the kernel corresponding to \p hfunc. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \param hfunc - Kernel to set parameter size for + * \param numbytes - Size of parameter list in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \sa ::cuFuncSetBlockShape, + * ::cuFuncSetSharedSize, + * ::cuFuncGetAttribute, + * ::cuParamSetf, + * ::cuParamSeti, + * ::cuParamSetv, + * ::cuLaunch, + * ::cuLaunchGrid, + * ::cuLaunchGridAsync, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes); + +/** + * \brief Adds an integer parameter to the function's argument list + * + * \deprecated + * + * Sets an integer parameter that will be specified the next time the culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset. + * + * \param hfunc - Kernel to add parameter to + * \param offset - Offset to add parameter to argument list + * \param value - Value of parameter + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuFuncSetBlockShape, + * ::cuFuncSetSharedSize, + * ::cuFuncGetAttribute, + * ::cuParamSetSize, + * ::cuParamSetf, + * ::cuParamSetv, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuLaunch, + * ::cuLaunchGrid, + * ::cuLaunchGridAsync, + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, unsigned int value); + +/** + * \brief Adds a floating-point parameter to the function's argument list + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \deprecated + * + * Sets a floating-point parameter that will be specified the next time the + * kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset. + * + * \param hfunc - Kernel to add parameter to + * \param offset - Offset to add parameter to argument list + * \param value - Value of parameter + * + * \return culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuFuncSetBlockShape, + * ::cuFuncSetSharedSize, + * ::cuFuncGetAttribute, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuParamSetSize, + * ::cuParamSeti, + * ::cuParamSetv, + * ::cuLaunch, + * ::cuLaunchGrid, + * ::cuLaunchGridAsync, + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, float value); + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** + * \brief Adds arbitrary data to the function's argument list + * + * \deprecated + * + * Copies an arbitrary amount of data (specified in \p numbytes) from \p ptr + * into the parameter space of the kernel corresponding to \p hfunc. \p offset + * is a byte offset. + * + * \param hfunc - Kernel to add data to culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \param offset - Offset to add data to argument list + * \param ptr - Pointer to arbitrary data + * \param numbytes - Size of data to copy in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuFuncSetBlockShape, + * ::cuFuncSetSharedSize, + * ::cuFuncGetAttribute, + * ::cuParamSetSize, + * ::cuParamSetf, + * ::cuParamSeti, + * ::cuLaunch, + * ::cuLaunchGrid, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuLaunchGridAsync, + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes); + +/** + * \brief Launches a CUDA function + * + * \deprecated + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Invokes the kernel \p f on a 1 x 1 x 1 grid of blocks. The block + * contains the number of threads specified by a previous call to + * ::cuFuncSetBlockShape(). + * + * \param f - Kernel to launch + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_LAUNCH_FAILED, + * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + * ::CUDA_ERROR_LAUNCH_TIMEOUT, + * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + * \notefnerr + * + * \sa ::cuFuncSetBlockShape, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuFuncSetSharedSize, + * ::cuFuncGetAttribute, + * ::cuParamSetSize, + * ::cuParamSetf, + * ::cuParamSeti, + * ::cuParamSetv, + * ::cuLaunchGrid, + * ::cuLaunchGridAsync, + * ::cuLaunchKernel + */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +CUresult CUDAAPI cuLaunch(CUfunction f); + +/** + * \brief Launches a CUDA function + * + * \deprecated + * + * Invokes the kernel \p f on a \p grid_width x \p grid_height grid of + * blocks. Each block contains the number of threads specified by a previous + * call to ::cuFuncSetBlockShape(). culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * \param f - Kernel to launch + * \param grid_width - Width of grid in blocks + * \param grid_height - Height of grid in blocks + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_LAUNCH_FAILED, + * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + * ::CUDA_ERROR_LAUNCH_TIMEOUT, + * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + * \notefnerr + * + * \sa ::cuFuncSetBlockShape, + * ::cuFuncSetSharedSize, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuFuncGetAttribute, + * ::cuParamSetSize, + * ::cuParamSetf, + * ::cuParamSeti, + * ::cuParamSetv, + * ::cuLaunch, + * ::cuLaunchGridAsync, + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, int grid_height); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + +/** + * \brief Launches a CUDA function + * + * \deprecated + * + * Invokes the kernel \p f on a \p grid_width x \p grid_height grid of + * blocks. Each block contains the number of threads specified by a previous + * call to ::cuFuncSetBlockShape(). + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuLaunchGridAsync() can optionally be associated to a stream by passing a + * non-zero \p hStream argument. + * + * \param f - Kernel to launch + * \param grid_width - Width of grid in blocks + * \param grid_height - Height of grid in blocks + * \param hStream - Stream identifier + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_LAUNCH_FAILED, + * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + * ::CUDA_ERROR_LAUNCH_TIMEOUT, + * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \notefnerr + * + * \sa ::cuFuncSetBlockShape, + * ::cuFuncSetSharedSize, + * ::cuFuncGetAttribute, + * ::cuParamSetSize, + * ::cuParamSetf, + * ::cuParamSeti, + * ::cuParamSetv, + * ::cuLaunch, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuLaunchGrid, + * ::cuLaunchKernel + */ +CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream); + + +/** + * \brief Adds a texture-reference to the function's argument list + * + * \deprecated culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * Makes the CUDA array or linear memory bound to the texture reference + * \p hTexRef available to a device program as a texture. In this version of + * CUDA, the texture-reference must be obtained via ::cuModuleGetTexRef() and + * the \p texunit parameter must be set to ::CU_PARAM_TR_DEFAULT. + * + * \param hfunc - Kernel to add texture-reference to + * \param texunit - Texture unit (must be ::CU_PARAM_TR_DEFAULT) + * \param hTexRef - Texture-reference to add to argument list + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + */ +CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef); +/** @} */ /* END CUDA_EXEC_DEPRECATED */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +/** @} */ /* END CUDA_EXEC */ + + +/** + * \defgroup CUDA_TEXREF Texture Reference Management + * + * This section describes the texture reference management functions of the + * low-level CUDA driver application programming interface. + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * @{ + */ + +/** + * \brief Binds an array as a texture reference + * + * Binds the CUDA array \p hArray to the texture reference \p hTexRef. Any + * previous address or CUDA array state associated with the texture reference + * is superseded by this function. \p Flags must be set to + * ::CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound to \p hTexRef is culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * unbound. + * + * \param hTexRef - Texture reference to bind + * \param hArray - Array to bind + * \param Flags - Options (must be ::CU_TRSA_OVERRIDE_FORMAT) + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Binds an address as a texture reference + * + * Binds a linear address range to the texture reference \p hTexRef. Any + * previous address or CUDA array state associated with the texture reference + * is superseded by this function. Any memory previously bound to \p hTexRef + * is unbound. + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * Since the hardware enforces an alignment requirement on texture base + * addresses, ::cuTexRefSetAddress() passes back a byte offset in + * \p *ByteOffset that must be applied to texture fetches in order to read from + * the desired memory. This offset must be divided by the texel size and + * passed to kernels that read from the texture so they can be applied to the + * ::tex1Dfetch() function. + * + * If the device memory pointer was returned from ::cuMemAlloc(), the offset + * is guaranteed to be 0 and NULL may be passed as the \p ByteOffset parameter. + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * The total number of elements (or texels) in the linear address range + * cannot exceed ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. + * The number of elements is computed as (\p bytes / bytesPerElement), + * where bytesPerElement is determined from the data format and number of + * components set using ::cuTexRefSetFormat(). + * + * \param ByteOffset - Returned byte offset + * \param hTexRef - Texture reference to bind + * \param dptr - Device pointer to bind + * \param bytes - Size of memory to bind in bytes culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes); + +/** + * \brief Binds an address as a 2D texture reference + * + * Binds a linear address range to the texture reference \p hTexRef. Any + * previous address or CUDA array state associated with the texture reference culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * is superseded by this function. Any memory previously bound to \p hTexRef + * is unbound. + * + * Using a ::tex2D() function inside a kernel requires a call to either + * ::cuTexRefSetArray() to bind the corresponding texture reference to an + * array, or ::cuTexRefSetAddress2D() to bind the texture reference to linear + * memory. + * + * Function calls to ::cuTexRefSetFormat() cannot follow calls to + * ::cuTexRefSetAddress2D() for the same texture reference. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * It is required that \p dptr be aligned to the appropriate hardware-specific + * texture alignment. You can query this value using the device attribute + * ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned \p dptr is + * supplied, ::CUDA_ERROR_INVALID_VALUE is returned. + * + * \p Pitch has to be aligned to the hardware-specific texture pitch alignment. + * This value can be queried using the device attribute + * ::CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. If an unaligned \p Pitch is + * supplied, ::CUDA_ERROR_INVALID_VALUE is returned. culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * Width and Height, which are specified in elements (or texels), cannot exceed + * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and + * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. + * \p Pitch, which is specified in bytes, cannot exceed + * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH. + * + * \param hTexRef - Texture reference to bind + * \param desc - Descriptor of CUDA array + * \param dptr - Device pointer to bind culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \param Pitch - Line pitch in bytes + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch); +#endif /* __CUDA_API_VERSION >= 3020 */ + +/** + * \brief Sets the format for a texture reference culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * Specifies the format of the data to be read by the texture reference + * \p hTexRef. \p fmt and \p NumPackedComponents are exactly analogous to the + * ::Format and ::NumChannels members of the ::CUDA_ARRAY_DESCRIPTOR structure: + * They specify the format of each component and the number of components per + * array element. + * + * \param hTexRef - Texture reference + * \param fmt - Format to set + * \param NumPackedComponents - Number of components per array element culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents); + +/** + * \brief Sets the addressing mode for a texture reference + * + * Specifies the addressing mode \p am for the given dimension \p dim of the culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * texture reference \p hTexRef. If \p dim is zero, the addressing mode is + * applied to the first parameter of the functions used to fetch from the + * texture; if \p dim is 1, the second, and so on. ::CUaddress_mode is defined + * as: + * \code + typedef enum CUaddress_mode_enum { + CU_TR_ADDRESS_MODE_WRAP = 0, + CU_TR_ADDRESS_MODE_CLAMP = 1, + CU_TR_ADDRESS_MODE_MIRROR = 2, + CU_TR_ADDRESS_MODE_BORDER = 3 culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + } CUaddress_mode; + * \endcode + * + * Note that this call has no effect if \p hTexRef is bound to linear memory. + * Also, if the flag, ::CU_TRSF_NORMALIZED_COORDINATES, is not set, the only + * supported address mode is ::CU_TR_ADDRESS_MODE_CLAMP. + * + * \param hTexRef - Texture reference + * \param dim - Dimension + * \param am - Addressing mode to set culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetArray, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am); + +/** + * \brief Sets the filtering mode for a texture reference + * + * Specifies the filtering mode \p fm to be used when reading memory through culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * the texture reference \p hTexRef. ::CUfilter_mode_enum is defined as: + * + * \code + typedef enum CUfilter_mode_enum { + CU_TR_FILTER_MODE_POINT = 0, + CU_TR_FILTER_MODE_LINEAR = 1 + } CUfilter_mode; + * \endcode + * + * Note that this call has no effect if \p hTexRef is bound to linear memory. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * \param hTexRef - Texture reference + * \param fm - Filtering mode to set + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + * ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm); + +/** culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \brief Sets the flags for a texture reference + * + * Specifies optional flags via \p Flags to specify the behavior of data + * returned through the texture reference \p hTexRef. The valid flags are: + * + * - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of + * having the texture promote integer data to floating point data in the + * range [0, 1]. Note that texture with 32-bit integer format + * would not be promoted, regardless of whether or not this + * flag is specified; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the + * default behavior of having the texture coordinates range + * from [0, Dim) where Dim is the width or height of the CUDA + * array. Instead, the texture coordinates [0, 1.0) reference + * the entire breadth of the array dimension; + * + * \param hTexRef - Texture reference + * \param Flags - Optional flags to set + * + * \return culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + * ::cuTexRefSetFilterMode, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags); + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Gets the address associated with a texture reference + * + * Returns in \p *pdptr the base address bound to the texture reference + * \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture reference culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * is not bound to any device memory range. + * + * \param pdptr - Returned device address + * \param hTexRef - Texture reference + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef); +#endif /* __CUDA_API_VERSION >= 3020 */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +/** + * \brief Gets the array bound to a texture reference + * + * Returns in \p *phArray the CUDA array bound to the texture reference + * \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture reference + * is not bound to any CUDA array. + * + * \param phArray - Returned array + * \param hTexRef - Texture reference culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef); + +/** + * \brief Gets the addressing mode used by a texture reference + * + * Returns in \p *pam the addressing mode corresponding to the culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * dimension \p dim of the texture reference \p hTexRef. Currently, the only + * valid value for \p dim are 0 and 1. + * + * \param pam - Returned addressing mode + * \param hTexRef - Texture reference + * \param dim - Dimension + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim); + +/** + * \brief Gets the filter-mode used by a texture reference + * + * Returns in \p *pfm the filtering mode of the texture reference + * \p hTexRef. + * + * \param pfm - Returned filtering mode + * \param hTexRef - Texture reference culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFlags, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef); + +/** + * \brief Gets the format used by a texture reference + * + * Returns in \p *pFormat and \p *pNumChannels the format and number culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * of components of the CUDA array bound to the texture reference \p hTexRef. + * If \p pFormat or \p pNumChannels is NULL, it will be ignored. + * + * \param pFormat - Returned format + * \param pNumChannels - Returned number of components + * \param hTexRef - Texture reference + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags + */ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef); + +/** + * \brief Gets the flags used by a texture reference + * + * Returns in \p *pFlags the flags of the texture reference \p hTexRef. + * + * \param pFlags - Returned flags + * \param hTexRef - Texture reference + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefSetAddress, + * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + * ::cuTexRefGetFilterMode, ::cuTexRefGetFormat + */ +CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef); + +/** + * \defgroup CUDA_TEXREF_DEPRECATED Texture Reference Management [DEPRECATED] + * + * This section describes the deprecated texture reference management + * functions of the low-level CUDA driver application programming interface. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * @{ + */ + +/** + * \brief Creates a texture reference + * + * \deprecated + * + * Creates a texture reference and returns its handle in \p *pTexRef. Once culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * created, the application must call ::cuTexRefSetArray() or + * ::cuTexRefSetAddress() to associate the reference with allocated memory. + * Other texture reference functions are used to specify the format and + * interpretation (addressing, filtering, etc.) to be used when the memory is + * read through this texture reference. + * + * \param pTexRef - Returned texture reference + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefDestroy + */ +CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef); + +/** culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * \brief Destroys a texture reference + * + * \deprecated + * + * Destroys the texture reference specified by \p hTexRef. + * + * \param hTexRef - Texture reference to destroy + * + * \return + * ::CUDA_SUCCESS, culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuTexRefCreate + */ +CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef); + +/** @} */ /* END CUDA_TEXREF_DEPRECATED */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +/** @} */ /* END CUDA_TEXREF */ + + +/** + * \defgroup CUDA_SURFREF Surface Reference Management + * + * This section describes the surface reference management functions of the + * low-level CUDA driver application programming interface. + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * @{ + */ + +/** + * \brief Sets the CUDA array for a surface reference. + * + * Sets the CUDA array \p hArray to be read and written by the surface reference + * \p hSurfRef. Any previous CUDA array state associated with the surface + * reference is superseded by this function. \p Flags must be set to 0. + * The ::CUDA_ARRAY3D_SURFACE_LDST flag must have been set for the CUDA array. culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * Any CUDA array previously bound to \p hSurfRef is unbound. + + * \param hSurfRef - Surface reference handle + * \param hArray - CUDA array handle + * \param Flags - set to 0 + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuModuleGetSurfRef, ::cuSurfRefGetArray + */ +CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags); + +/** + * \brief Passes back the CUDA array bound to a surface reference. + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * Returns in \p *phArray the CUDA array bound to the surface reference + * \p hSurfRef, or returns ::CUDA_ERROR_INVALID_VALUE if the surface reference + * is not bound to any CUDA array. + + * \param phArray - Surface reference handle + * \param hSurfRef - Surface reference handle + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * + * \sa ::cuModuleGetSurfRef, ::cuSurfRefSetArray + */ +CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef); + +/** @} */ /* END CUDA_SURFREF */ + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +#if __CUDA_API_VERSION >= 4000 +/** + * \defgroup CUDA_PEER_ACCESS Peer Context Memory Access + * + * This section describes the direct peer context memory access functions + * of the low-level CUDA driver application programming interface. + * + * @{ + */ + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +/** + * \brief Queries if a device may directly access a peer device's memory. + * + * Returns in \p *canAccessPeer a value of 1 if contexts on \p dev are capable of + * directly accessing memory from contexts on \p peerDev and 0 otherwise. + * If direct access of \p peerDev from \p dev is possible, then access may be + * enabled on two specific contexts by calling ::cuCtxEnablePeerAccess(). + * + * \param canAccessPeer - Returned access capability + * \param dev - Device from which allocations on \p peerDev are to culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * be directly accessed. + * \param peerDev - Device on which the allocations to be directly accessed + * by \p dev reside. + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_DEVICE + * \notefnerr culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * \sa ::cuCtxEnablePeerAccess, + * ::cuCtxDisablePeerAccess + */ +CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, CUdevice peerDev); + +/** + * \brief Enables direct access to memory allocations in a peer context. + * + If both the current context and \p peerContext are on devices which support unified culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING), then + * on success all allocations from \p peerContext will immediately be accessible + * by the current context. See \ref CUDA_UNIFIED for additional + * details. + * + * Note that access granted by this call is unidirectional and that in order to access + * memory from the current context in \p peerContext, a separate symmetric call + * to ::cuCtxEnablePeerAccess() is required. + * + * Returns ::CUDA_ERROR_INVALID_DEVICE if ::cuDeviceCanAccessPeer() indicates culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * that the ::CUdevice of the current context cannot directly access memory + * from the ::CUdevice of \p peerContext. + * + * Returns ::CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED if direct access of + * \p peerContext from the current context has already been enabled. + * + * Returns ::CUDA_ERROR_TOO_MANY_PEERS if direct peer access is not possible + * because hardware resources required for peer access have been exhausted. + * + * Returns ::CUDA_ERROR_INVALID_CONTEXT if there is no current context, \p peerContext culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * is not a valid context, or if the current context is \p peerContext. + * + * Returns ::CUDA_ERROR_INVALID_VALUE if \p Flags is not 0. + * + * \param peerContext - Peer context to enable direct access to from the current context + * \param Flags - Reserved for future use and must be set to 0 + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_DEVICE, + * ::CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, + * ::CUDA_ERROR_TOO_MANY_PEERS, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE + * \notefnerr + * + * \sa ::cuDeviceCanAccessPeer, + * ::cuCtxDisablePeerAccess culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + */ +CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags); + +/** + * \brief Disables direct access to memory allocations in a peer context and + * unregisters any registered allocations. + * + Returns ::CUDA_ERROR_PEER_ACCESS_NOT_ENABLED if direct peer access has + * not yet been enabled from \p peerContext to the current context. + * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * Returns ::CUDA_ERROR_INVALID_CONTEXT if there is no current context, or if + * \p peerContext is not a valid context. + * + * \param peerContext - Peer context to disable direct access to + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_CONTEXT, + * \notefnerr + * + * \sa ::cuDeviceCanAccessPeer, + * ::cuCtxEnablePeerAccess + */ +CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext); + +/** @} */ /* END CUDA_PEER_ACCESS */ +#endif /* __CUDA_API_VERSION >= 4000 */ culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +/** + * \defgroup CUDA_GRAPHICS Graphics Interoperability + * + * This section describes the graphics interoperability functions of the + * low-level CUDA driver application programming interface. + * + * @{ + */ + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +/** + * \brief Unregisters a graphics resource for access by CUDA + * + * Unregisters the graphics resource \p resource so it is not accessible by + * CUDA unless registered again. + * + * If \p resource is invalid then ::CUDA_ERROR_INVALID_HANDLE is + * returned. + * + * \param resource - Resource to unregister culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_UNKNOWN + * \notefnerr + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \sa + * ::cuGraphicsD3D9RegisterResource, + * ::cuGraphicsD3D10RegisterResource, + * ::cuGraphicsD3D11RegisterResource, + * ::cuGraphicsGLRegisterBuffer, + * ::cuGraphicsGLRegisterImage + */ +CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource); + +/** culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \brief Get an array through which to access a subresource of a mapped graphics resource. + * + * Returns in \p *pArray an array through which the subresource of the mapped + * graphics resource \p resource which corresponds to array index \p arrayIndex + * and mipmap level \p mipLevel may be accessed. The value set in \p *pArray may + * change every time that \p resource is mapped. + * + * If \p resource is not a texture then it cannot be accessed via an array and + * ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. + * If \p arrayIndex is not a valid array index for \p resource then culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_VALUE is returned. + * If \p mipLevel is not a valid mipmap level for \p resource then + * ::CUDA_ERROR_INVALID_VALUE is returned. + * If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned. + * + * \param pArray - Returned array through which a subresource of \p resource may be accessed + * \param resource - Mapped resource to access + * \param arrayIndex - Array index for array textures or cubemap face + * index as defined by ::CUarray_cubemap_face for + * cubemap textures for the subresource to access culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * \param mipLevel - Mipmap level for the subresource to access + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_NOT_MAPPED culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY + * \notefnerr + * + * \sa ::cuGraphicsResourceGetMappedPointer + */ +CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); + +#if __CUDA_API_VERSION >= 3020 +/** + * \brief Get a device pointer through which to access a mapped graphics resource. culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * + * Returns in \p *pDevPtr a pointer through which the mapped graphics resource + * \p resource may be accessed. + * Returns in \p pSize the size of the memory in bytes which may be accessed from that pointer. + * The value set in \p pPointer may change every time that \p resource is mapped. + * + * If \p resource is not a buffer then it cannot be accessed via a pointer and + * ::CUDA_ERROR_NOT_MAPPED_AS_POINTER is returned. + * If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned. + * * culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param pDevPtr - Returned pointer through which \p resource may be accessed + * \param pSize - Returned size of the buffer accessible starting at \p *pPointer + * \param resource - Mapped resource to access + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_NOT_MAPPED + * ::CUDA_ERROR_NOT_MAPPED_AS_POINTER + * \notefnerr + * + * \sa + * ::cuGraphicsMapResources, + * ::cuGraphicsSubResourceGetMappedArray + */ +CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +#endif /* __CUDA_API_VERSION >= 3020 */ + +/** + * \brief Set usage flags for mapping a graphics resource + * + * Set \p flags for mapping the graphics resource \p resource. + * + * Changes to \p flags will take effect the next time \p resource is mapped. + * The \p flags argument may be any of the following: + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this + * resource will be used. It is therefore assumed that this resource will be + * read from and written to by CUDA kernels. This is the default value. + * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA kernels which + * access this resource will not write to this resource. + * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels + * which access this resource will not read from this resource and will + * write over the entire contents of the resource, so none of the data + * previously stored in the resource will be preserved. + * culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * If \p resource is presently mapped for access by CUDA then + * ::CUDA_ERROR_ALREADY_MAPPED is returned. + * If \p flags is not one of the above values then ::CUDA_ERROR_INVALID_VALUE is returned. + * + * \param resource - Registered resource to set flags for + * \param flags - Parameters for resource mapping + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_VALUE, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_ALREADY_MAPPED + * \notefnerr + * + * \sa + * ::cuGraphicsMapResources + */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags); + +/** + * \brief Map graphics resources for access by CUDA + * + * Maps the \p count graphics resources in \p resources for access by CUDA. + * + * The resources in \p resources may be accessed by CUDA until they + * are unmapped. The graphics API from which \p resources were registered + * should not access any resources while they are mapped by CUDA. If an culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * application does so, the results are undefined. + * + * This function provides the synchronization guarantee that any graphics calls + * issued before ::cuGraphicsMapResources() will complete before any subsequent CUDA + * work issued in \p stream begins. + * + * If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID_HANDLE is returned. + * If any of \p resources are presently mapped for access by CUDA then ::CUDA_ERROR_ALREADY_MAPPED is returned. + * + * \param count - Number of resources to map culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * \param resources - Resources to map for CUDA usage + * \param hStream - Stream with which to synchronize + * + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_ALREADY_MAPPED, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * ::CUDA_ERROR_UNKNOWN + * \notefnerr + * + * \sa + * ::cuGraphicsResourceGetMappedPointer + * ::cuGraphicsSubResourceGetMappedArray + * ::cuGraphicsUnmapResources + */ +CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream); + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +/** + * \brief Unmap graphics resources. + * + * Unmaps the \p count graphics resources in \p resources. + * + * Once unmapped, the resources in \p resources may not be accessed by CUDA + * until they are mapped again. + * + * This function provides the synchronization guarantee that any CUDA work issued + * in \p stream before ::cuGraphicsUnmapResources() will complete before any culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * subsequently issued graphics work begins. + * + * + * If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID_HANDLE is returned. + * If any of \p resources are not presently mapped for access by CUDA then ::CUDA_ERROR_NOT_MAPPED is returned. + * + * \param count - Number of resources to unmap + * \param resources - Resources to unmap + * \param hStream - Stream with which to synchronize + * culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + * \return + * ::CUDA_SUCCESS, + * ::CUDA_ERROR_DEINITIALIZED, + * ::CUDA_ERROR_NOT_INITIALIZED, + * ::CUDA_ERROR_INVALID_CONTEXT, + * ::CUDA_ERROR_INVALID_HANDLE, + * ::CUDA_ERROR_NOT_MAPPED, + * ::CUDA_ERROR_UNKNOWN + * \notefnerr + * culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * \sa + * ::cuGraphicsMapResources + */ +CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream); + +/** @} */ /* END CUDA_GRAPHICS */ + +CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId); + + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +/** @} */ /* END CUDA_DRIVER */ + +/** + * CUDA API versioning support + */ +#if defined(__CUDA_API_VERSION_INTERNAL) + #undef cuDeviceTotalMem + #undef cuCtxCreate + #undef cuModuleGetGlobal + #undef cuMemGetInfo culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + #undef cuMemAlloc + #undef cuMemAllocPitch + #undef cuMemFree + #undef cuMemGetAddressRange + #undef cuMemAllocHost + #undef cuMemHostGetDevicePointer + #undef cuMemcpyHtoD + #undef cuMemcpyDtoH + #undef cuMemcpyDtoD + #undef cuMemcpyDtoA culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + #undef cuMemcpyAtoD + #undef cuMemcpyHtoA + #undef cuMemcpyAtoH + #undef cuMemcpyAtoA + #undef cuMemcpyHtoAAsync + #undef cuMemcpyAtoHAsync + #undef cuMemcpy2D + #undef cuMemcpy2DUnaligned + #undef cuMemcpy3D + #undef cuMemcpyHtoDAsync culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + #undef cuMemcpyDtoHAsync + #undef cuMemcpyDtoDAsync + #undef cuMemcpy2DAsync + #undef cuMemcpy3DAsync + #undef cuMemsetD8 + #undef cuMemsetD16 + #undef cuMemsetD32 + #undef cuMemsetD2D8 + #undef cuMemsetD2D16 + #undef cuMemsetD2D32 culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + #undef cuArrayCreate + #undef cuArrayGetDescriptor + #undef cuArray3DCreate + #undef cuArray3DGetDescriptor + #undef cuTexRefSetAddress + #undef cuTexRefSetAddress2D + #undef cuTexRefGetAddress + #undef cuGraphicsResourceGetMappedPointer + #undef cuCtxDestroy + #undef cuCtxPopCurrent culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + #undef cuCtxPushCurrent + #undef cuStreamDestroy + #undef cuEventDestroy +#endif /* __CUDA_API_VERSION_INTERNAL */ + +#if defined(__CUDA_API_VERSION_INTERNAL) || (__CUDA_API_VERSION >= 3020 && __CUDA_API_VERSION < 4010) +CUresult CUDAAPI cuTexRefSetAddress2D_v2(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch); +#endif /* __CUDA_API_VERSION_INTERNAL || (__CUDA_API_VERSION >= 3020 && __CUDA_API_VERSION < 4010) */ + +/** culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + * CUDA API made obselete at API version 3020 + */ +#if defined(__CUDA_API_VERSION_INTERNAL) + #define CUdeviceptr CUdeviceptr_v1 + #define CUDA_MEMCPY2D_st CUDA_MEMCPY2D_v1_st + #define CUDA_MEMCPY2D CUDA_MEMCPY2D_v1 + #define CUDA_MEMCPY3D_st CUDA_MEMCPY3D_v1_st + #define CUDA_MEMCPY3D CUDA_MEMCPY3D_v1 + #define CUDA_ARRAY_DESCRIPTOR_st CUDA_ARRAY_DESCRIPTOR_v1_st + #define CUDA_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR_v1 culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + #define CUDA_ARRAY3D_DESCRIPTOR_st CUDA_ARRAY3D_DESCRIPTOR_v1_st + #define CUDA_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR_v1 +#endif /* CUDA_FORCE_LEGACY32_INTERNAL */ + +#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020 + +typedef unsigned int CUdeviceptr; + +typedef struct CUDA_MEMCPY2D_st +{ culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + unsigned int srcXInBytes; /**< Source X in bytes */ + unsigned int srcY; /**< Source Y */ + CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ + const void *srcHost; /**< Source host pointer */ + CUdeviceptr srcDevice; /**< Source device pointer */ + CUarray srcArray; /**< Source array reference */ + unsigned int srcPitch; /**< Source pitch (ignored when src is array) */ + + unsigned int dstXInBytes; /**< Destination X in bytes */ + unsigned int dstY; /**< Destination Y */ culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ + void *dstHost; /**< Destination host pointer */ + CUdeviceptr dstDevice; /**< Destination device pointer */ + CUarray dstArray; /**< Destination array reference */ + unsigned int dstPitch; /**< Destination pitch (ignored when dst is array) */ + + unsigned int WidthInBytes; /**< Width of 2D memory copy in bytes */ + unsigned int Height; /**< Height of 2D memory copy */ +} CUDA_MEMCPY2D; + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +typedef struct CUDA_MEMCPY3D_st +{ + unsigned int srcXInBytes; /**< Source X in bytes */ + unsigned int srcY; /**< Source Y */ + unsigned int srcZ; /**< Source Z */ + unsigned int srcLOD; /**< Source LOD */ + CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ + const void *srcHost; /**< Source host pointer */ + CUdeviceptr srcDevice; /**< Source device pointer */ + CUarray srcArray; /**< Source array reference */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + void *reserved0; /**< Must be NULL */ + unsigned int srcPitch; /**< Source pitch (ignored when src is array) */ + unsigned int srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ + + unsigned int dstXInBytes; /**< Destination X in bytes */ + unsigned int dstY; /**< Destination Y */ + unsigned int dstZ; /**< Destination Z */ + unsigned int dstLOD; /**< Destination LOD */ + CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ + void *dstHost; /**< Destination host pointer */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CUdeviceptr dstDevice; /**< Destination device pointer */ + CUarray dstArray; /**< Destination array reference */ + void *reserved1; /**< Must be NULL */ + unsigned int dstPitch; /**< Destination pitch (ignored when dst is array) */ + unsigned int dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ + + unsigned int WidthInBytes; /**< Width of 3D memory copy in bytes */ + unsigned int Height; /**< Height of 3D memory copy */ + unsigned int Depth; /**< Depth of 3D memory copy */ +} CUDA_MEMCPY3D; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +typedef struct CUDA_ARRAY_DESCRIPTOR_st +{ + unsigned int Width; /**< Width of array */ + unsigned int Height; /**< Height of array */ + + CUarray_format Format; /**< Array format */ + unsigned int NumChannels; /**< Channels per array element */ +} CUDA_ARRAY_DESCRIPTOR; + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +typedef struct CUDA_ARRAY3D_DESCRIPTOR_st +{ + unsigned int Width; /**< Width of 3D array */ + unsigned int Height; /**< Height of 3D array */ + unsigned int Depth; /**< Depth of 3D array */ + + CUarray_format Format; /**< Array format */ + unsigned int NumChannels; /**< Channels per array element */ + unsigned int Flags; /**< Flags */ +} CUDA_ARRAY3D_DESCRIPTOR; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev); +CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev); +CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *bytes, CUmodule hmod, const char *name); +CUresult CUDAAPI cuMemGetInfo(unsigned int *free, unsigned int *total); +CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, unsigned int bytesize); +CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, unsigned int *pPitch, unsigned int WidthInBytes, unsigned int Height, unsigned int ElementSizeBytes); +CUresult CUDAAPI cuMemFree(CUdeviceptr dptr); +CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, unsigned int *psize, CUdeviceptr dptr); +CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags); +CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, unsigned int ByteCount); +CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount); +CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, unsigned int ByteCount); +CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, unsigned int dstOffset, CUdeviceptr srcDevice, unsigned int ByteCount); +CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); +CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, unsigned int dstOffset, const void *srcHost, unsigned int ByteCount); +CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); +CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, unsigned int dstOffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); +CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstOffset, const void *srcHost, unsigned int ByteCount, CUstream hStream); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount, CUstream hStream); +CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy); +CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy); +CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy); +CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, unsigned int ByteCount, CUstream hStream); +CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream); +CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream); +CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream); +CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream); +CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, unsigned int N); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, unsigned int N); +CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, unsigned int N); +CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned char uc, unsigned int Width, unsigned int Height); +CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned short us, unsigned int Width, unsigned int Height); +CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int ui, unsigned int Width, unsigned int Height); +CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray); +CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray); +CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); +CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray); +CUresult CUDAAPI cuTexRefSetAddress(unsigned int *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, unsigned int bytes); culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch); +CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef); +CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, unsigned int *pSize, CUgraphicsResource resource); +#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION < 3020 */ +#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 4000 +CUresult CUDAAPI cuCtxDestroy(CUcontext ctx); +CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx); +CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx); +CUresult CUDAAPI cuStreamDestroy(CUstream hStream); +CUresult CUDAAPI cuEventDestroy(CUevent hEvent); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION < 4000 */ + +#if defined(__CUDA_API_VERSION_INTERNAL) + #undef CUdeviceptr + #undef CUDA_MEMCPY2D_st + #undef CUDA_MEMCPY2D + #undef CUDA_MEMCPY3D_st + #undef CUDA_MEMCPY3D + #undef CUDA_ARRAY_DESCRIPTOR_st + #undef CUDA_ARRAY_DESCRIPTOR culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + #undef CUDA_ARRAY3D_DESCRIPTOR_st + #undef CUDA_ARRAY3D_DESCRIPTOR +#endif /* __CUDA_API_VERSION_INTERNAL */ + +#ifdef __cplusplus +} +#endif + +#undef __CUDA_API_VERSION + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +#endif /* __cuda_cuda_h__ */ diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuda_dynlink.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuda_dynlink.h new file mode 100644 index 0000000..24434ee --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuda_dynlink.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2011 Hendrik Leppkes + * http://www.1f0.de culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * Assembled from parts of the NVIDIA CUDA SDK, Copyright by NVIDIA, All rights reserved. + */ + +#pragma once culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +#include "cuda.h" +//////////////////////////////////////////////////// +/// CUDA functions +//////////////////////////////////////////////////// +typedef CUresult CUDAAPI tcuInit(unsigned int Flags); +typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev ); +typedef CUresult CUDAAPI tcuCtxDestroy( CUcontext ctx ); +typedef CUresult CUDAAPI tcuCtxPushCurrent( CUcontext ctx ); +typedef CUresult CUDAAPI tcuCtxPopCurrent( CUcontext *pctx ); +typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, unsigned int bytesize); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +typedef CUresult CUDAAPI tcuMemFreeHost(void *p); +typedef CUresult CUDAAPI tcuMemcpyDtoH (void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount ); +typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream); +typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags); +typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream); +typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream); +typedef CUresult CUDAAPI tcuDeviceGetCount(int *count); +typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion); +typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev); +typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev); culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev); + +//////////////////////////////////////////////////// +/// D3D Interop +//////////////////////////////////////////////////// +typedef CUresult CUDAAPI tcuD3D9CtxCreate( CUcontext *pCtx, CUdevice *pCudaDevice, unsigned int Flags, IDirect3DDevice9 *pD3DDevice ); + +//////////////////////////////////////////////////// +/// CUVID functions +//////////////////////////////////////////////////// culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx); +typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck); +typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags); +typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags); + +typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams); +typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket); +typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj); + +// Create/Destroy the decoder object culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci); +typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder); + +// Decode a single picture (field or frame) +typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams); + +// Post-process and map a video frame for use in cuda +typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); +// Unmap a previously mapped video frame +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr); culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuviddec.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuviddec.h new file mode 100644 index 0000000..4c2674e --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/cuviddec.h @@ -0,0 +1,523 @@ +/* + * Copyright 1993-2008 NVIDIA Corporation. All rights reserved. + * + * NOTICE TO USER: culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * + * This source code is subject to NVIDIA ownership rights under U.S. and + * international Copyright laws. Users and possessors of this source code + * are hereby granted a nonexclusive, royalty-free license to use this code + * in individual and commercial software. + * + * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE + * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR + * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. + * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE + * OR PERFORMANCE OF THIS SOURCE CODE. + * + * U.S. Government End Users. This source code is a "commercial item" as + * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of + * "commercial computer software" and "commercial computer software culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) + * and is provided to the U.S. Government only as a commercial end item. + * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through + * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the + * source code with only those rights set forth herein. + * + * Any use of this source code in individual and commercial software must + * include, in the user documentation and internal comments to the code, + * the above Disclaimer and U.S. Government End Users Notice. + */ culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +#if !defined(__CUDA_VIDEO_H__) +#define __CUDA_VIDEO_H__ + +#ifndef __cuda_cuda_h__ +#include +#endif // __cuda_cuda_h__ + +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020)) culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +#define __CUVID_DEVPTR64 +#endif +#endif + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +typedef void *CUvideodecoder; +typedef struct _CUcontextlock_st *CUvideoctxlock; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +typedef enum cudaVideoCodec_enum { + cudaVideoCodec_MPEG1=0, + cudaVideoCodec_MPEG2, + cudaVideoCodec_MPEG4, + cudaVideoCodec_VC1, + cudaVideoCodec_H264, + cudaVideoCodec_JPEG, + cudaVideoCodec_H264_SVC, + cudaVideoCodec_H264_MVC, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + cudaVideoCodec_NumCodecs, + // Uncompressed YUV + cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), // Y,U,V (4:2:0) + cudaVideoCodec_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,V,U (4:2:0) + cudaVideoCodec_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,UV (4:2:0) + cudaVideoCodec_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), // YUYV/YUY2 (4:2:2) + cudaVideoCodec_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')), // UYVY (4:2:2) +} cudaVideoCodec; + +typedef enum cudaVideoSurfaceFormat_enum { culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + cudaVideoSurfaceFormat_NV12=0, // NV12 (currently the only supported output format) +} cudaVideoSurfaceFormat; + +typedef enum cudaVideoDeinterlaceMode_enum { + cudaVideoDeinterlaceMode_Weave=0, // Weave both fields (no deinterlacing) + cudaVideoDeinterlaceMode_Bob, // Drop one field + cudaVideoDeinterlaceMode_Adaptive, // Adaptive deinterlacing +} cudaVideoDeinterlaceMode; + +typedef enum cudaVideoChromaFormat_enum { culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + cudaVideoChromaFormat_Monochrome=0, + cudaVideoChromaFormat_420, + cudaVideoChromaFormat_422, + cudaVideoChromaFormat_444, +} cudaVideoChromaFormat; + +typedef enum cudaVideoCreateFlags_enum { + cudaVideoCreate_Default = 0x00, // Default operation mode: use dedicated video engines + cudaVideoCreate_PreferCUDA = 0x01, // Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading) + cudaVideoCreate_PreferDXVA = 0x02, // Go through DXVA internally if possible (requires D3D9 interop) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + cudaVideoCreate_PreferCUVID = 0x04, // Use dedicated video engines directly +} cudaVideoCreateFlags; + + +typedef struct _CUVIDDECODECREATEINFO +{ + // Decoding + unsigned long ulWidth; // Coded Sequence Width + unsigned long ulHeight; // Coded Sequence Height + unsigned long ulNumDecodeSurfaces; // Maximum number of internal decode surfaces culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + cudaVideoCodec CodecType; // cudaVideoCodec_XXX + cudaVideoChromaFormat ChromaFormat; // cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) + unsigned long ulCreationFlags; // Decoder creation flags (cudaVideoCreateFlags_XXX) + unsigned long Reserved1[5]; // Reserved for future use - set to zero + struct { // area of the frame that should be displayed + short left; + short top; + short right; + short bottom; + } display_area; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + // Output format + cudaVideoSurfaceFormat OutputFormat; // cudaVideoSurfaceFormat_XXX + cudaVideoDeinterlaceMode DeinterlaceMode; // cudaVideoDeinterlaceMode_XXX + unsigned long ulTargetWidth; // Post-processed Output Width + unsigned long ulTargetHeight; // Post-processed Output Height + unsigned long ulNumOutputSurfaces; // Maximum number of output surfaces simultaneously mapped + CUvideoctxlock vidLock; // If non-NULL, context lock used for synchronizing ownership of the cuda context + struct { // target rectangle in the output frame (for aspect ratio conversion) + short left; + short top; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + short right; + short bottom; + } target_rect; // if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used + unsigned long Reserved2[5]; // Reserved for future use - set to zero +} CUVIDDECODECREATEINFO; + + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// H.264 Picture Parameters culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +// + +typedef struct _CUVIDH264DPBENTRY +{ + int PicIdx; // picture index of reference frame + int FrameIdx; // frame_num(short-term) or LongTermFrameIdx(long-term) + int is_long_term; // 0=short term reference, 1=long term reference + int not_existing; // non-existing reference frame (corresponding PicIdx should be set to -1) + int used_for_reference; // 0=unused, 1=top_field, 2=bottom_field, 3=both_fields + int FieldOrderCnt[2]; // field order count of top and bottom fields culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +} CUVIDH264DPBENTRY; + +typedef struct _CUVIDH264MVCEXT +{ + int num_views_minus1; + int view_id; + unsigned char inter_view_flag; + unsigned char num_inter_view_refs_l0; + unsigned char num_inter_view_refs_l1; + unsigned char MVCReserved8Bits; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + int InterViewRefsL0[16]; + int InterViewRefsL1[16]; +} CUVIDH264MVCEXT; + +typedef struct _CUVIDH264SVCEXT +{ + unsigned char profile_idc; + unsigned char level_idc; + unsigned char DQId; + unsigned char DQIdMax; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + unsigned char disable_inter_layer_deblocking_filter_idc; + unsigned char ref_layer_chroma_phase_y_plus1; + signed char inter_layer_slice_alpha_c0_offset_div2; + signed char inter_layer_slice_beta_offset_div2; + + unsigned short DPBEntryValidFlag; + unsigned char inter_layer_deblocking_filter_control_present_flag; + unsigned char extended_spatial_scalability_idc; + unsigned char adaptive_tcoeff_level_prediction_flag; + unsigned char slice_header_restriction_flag; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + unsigned char chroma_phase_x_plus1_flag; + unsigned char chroma_phase_y_plus1; + + unsigned char tcoeff_level_prediction_flag; + unsigned char constrained_intra_resampling_flag; + unsigned char ref_layer_chroma_phase_x_plus1_flag; + unsigned char store_ref_base_pic_flag; + unsigned char Reserved8BitsA; + unsigned char Reserved8BitsB; + // For the 4 scaled_ref_layer_XX fields below, culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_" + // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4, + short scaled_ref_layer_left_offset; + short scaled_ref_layer_top_offset; + short scaled_ref_layer_right_offset; + short scaled_ref_layer_bottom_offset; + unsigned short Reserved16Bits; + struct _CUVIDPICPARAMS *pNextLayer; // Points to the picparams for the next layer to be decoded. Linked list ends at the target layer. + int bRefBaseLayer; // whether to store ref base pic +} CUVIDH264SVCEXT; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +typedef struct _CUVIDH264PICPARAMS +{ + // SPS + int log2_max_frame_num_minus4; + int pic_order_cnt_type; + int log2_max_pic_order_cnt_lsb_minus4; + int delta_pic_order_always_zero_flag; + int frame_mbs_only_flag; + int direct_8x8_inference_flag; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + int num_ref_frames; // NOTE: shall meet level 4.1 restrictions + unsigned char residual_colour_transform_flag; + unsigned char bit_depth_luma_minus8; // Must be 0 (only 8-bit supported) + unsigned char bit_depth_chroma_minus8; // Must be 0 (only 8-bit supported) + unsigned char qpprime_y_zero_transform_bypass_flag; + // PPS + int entropy_coding_mode_flag; + int pic_order_present_flag; + int num_ref_idx_l0_active_minus1; + int num_ref_idx_l1_active_minus1; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + int weighted_pred_flag; + int weighted_bipred_idc; + int pic_init_qp_minus26; + int deblocking_filter_control_present_flag; + int redundant_pic_cnt_present_flag; + int transform_8x8_mode_flag; + int MbaffFrameFlag; + int constrained_intra_pred_flag; + int chroma_qp_index_offset; + int second_chroma_qp_index_offset; culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + int ref_pic_flag; + int frame_num; + int CurrFieldOrderCnt[2]; + // DPB + CUVIDH264DPBENTRY dpb[16]; // List of reference frames within the DPB + // Quantization Matrices (raster-order) + unsigned char WeightScale4x4[6][16]; + unsigned char WeightScale8x8[2][64]; + // FMO/ASO + unsigned char fmo_aso_enable; culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + unsigned char num_slice_groups_minus1; + unsigned char slice_group_map_type; + signed char pic_init_qs_minus26; + unsigned int slice_group_change_rate_minus1; + union + { + unsigned long long slice_group_map_addr; + const unsigned char *pMb2SliceGroupMap; + } fmo; + unsigned int Reserved[12]; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + // SVC/MVC + union + { + CUVIDH264MVCEXT mvcext; + CUVIDH264SVCEXT svcext; + }; +} CUVIDH264PICPARAMS; + + +//////////////////////////////////////////////////////////////////////////////////////////////// culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +// +// MPEG-2 Picture Parameters +// + +typedef struct _CUVIDMPEG2PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + int picture_coding_type; + int full_pel_forward_vector; culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + int full_pel_backward_vector; + int f_code[2][2]; + int intra_dc_precision; + int frame_pred_frame_dct; + int concealment_motion_vectors; + int q_scale_type; + int intra_vlc_format; + int alternate_scan; + int top_field_first; + // Quantization matrices (raster order) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + unsigned char QuantMatrixIntra[64]; + unsigned char QuantMatrixInter[64]; +} CUVIDMPEG2PICPARAMS; + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// MPEG-4 Picture Parameters +// + +// MPEG-4 has VOP types instead of Picture types culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +#define I_VOP 0 +#define P_VOP 1 +#define B_VOP 2 +#define S_VOP 3 + +typedef struct _CUVIDMPEG4PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + // VOL culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + int video_object_layer_width; + int video_object_layer_height; + int vop_time_increment_bitcount; + int top_field_first; + int resync_marker_disable; + int quant_type; + int quarter_sample; + int short_video_header; + int divx_flags; + // VOP culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + int vop_coding_type; + int vop_coded; + int vop_rounding_type; + int alternate_vertical_scan_flag; + int interlaced; + int vop_fcode_forward; + int vop_fcode_backward; + int trd[2]; + int trb[2]; + // Quantization matrices (raster order) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + unsigned char QuantMatrixIntra[64]; + unsigned char QuantMatrixInter[64]; + int gmc_enabled; +} CUVIDMPEG4PICPARAMS; + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// VC1 Picture Parameters +// + culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +typedef struct _CUVIDVC1PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + int FrameWidth; // Actual frame width + int FrameHeight; // Actual frame height + // PICTURE + int intra_pic_flag; // Set to 1 for I,BI frames + int ref_pic_flag; // Set to 1 for I,P frames + int progressive_fcm; // Progressive frame culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + // SEQUENCE + int profile; + int postprocflag; + int pulldown; + int interlace; + int tfcntrflag; + int finterpflag; + int psf; + int multires; + int syncmarker; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + int rangered; + int maxbframes; + // ENTRYPOINT + int panscan_flag; + int refdist_flag; + int extended_mv; + int dquant; + int vstransform; + int loopfilter; + int fastuvmc; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + int overlap; + int quantizer; + int extended_dmv; + int range_mapy_flag; + int range_mapy; + int range_mapuv_flag; + int range_mapuv; + int rangeredfrm; // range reduction state +} CUVIDVC1PICPARAMS; + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// JPEG Picture Parameters +// + +typedef struct _CUVIDJPEGPICPARAMS +{ + int Reserved; +} CUVIDJPEGPICPARAMS; + culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// Picture Parameters for Decoding +// + +typedef struct _CUVIDPICPARAMS +{ + int PicWidthInMbs; // Coded Frame Size + int FrameHeightInMbs; // Coded Frame Height + int CurrPicIdx; // Output index of the current picture culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + int field_pic_flag; // 0=frame picture, 1=field picture + int bottom_field_flag; // 0=top field, 1=bottom field (ignored if field_pic_flag=0) + int second_field; // Second field of a complementary field pair + // Bitstream data + unsigned int nBitstreamDataLen; // Number of bytes in bitstream data buffer + const unsigned char *pBitstreamData; // Ptr to bitstream data for this picture (slice-layer) + unsigned int nNumSlices; // Number of slices in this picture + const unsigned int *pSliceDataOffsets; // nNumSlices entries, contains offset of each slice within the bitstream data buffer + int ref_pic_flag; // This picture is a reference picture + int intra_pic_flag; // This picture is entirely intra coded culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + unsigned int Reserved[30]; // Reserved for future use + // Codec-specific data + union { + CUVIDMPEG2PICPARAMS mpeg2; // Also used for MPEG-1 + CUVIDH264PICPARAMS h264; + CUVIDVC1PICPARAMS vc1; + CUVIDMPEG4PICPARAMS mpeg4; + CUVIDJPEGPICPARAMS jpeg; + unsigned int CodecReserved[1024]; + } CodecSpecific; culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +} CUVIDPICPARAMS; + + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// Post-processing +// + +typedef struct _CUVIDPROCPARAMS +{ culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + int progressive_frame; // Input is progressive (deinterlace_mode will be ignored) + int second_field; // Output the second field (ignored if deinterlace mode is Weave) + int top_field_first; // Input frame is top field first (1st field is top, 2nd field is bottom) + int unpaired_field; // Input only contains one field (2nd field is invalid) + // The fields below are used for raw YUV input + unsigned int reserved_flags; // Reserved for future use (set to zero) + unsigned int reserved_zero; // Reserved (set to zero) + unsigned long long raw_input_dptr; // Input CUdeviceptr for raw YUV extensions + unsigned int raw_input_pitch; // pitch in bytes of raw YUV input (should be aligned appropriately) + unsigned int raw_input_format; // Reserved for future use (set to zero) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + unsigned long long raw_output_dptr; // Reserved for future use (set to zero) + unsigned int raw_output_pitch; // Reserved for future use (set to zero) + unsigned int Reserved[48]; + void *Reserved3[3]; +} CUVIDPROCPARAMS; + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// In order to maximize decode latencies, there should be always at least 2 pictures in the decode +// queue at any time, in order to make sure that all decode engines are always busy. culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +// +// Overall data flow: +// - cuvidCreateDecoder(...) +// For each picture: +// - cuvidDecodePicture(N) +// - cuvidMapVideoFrame(N-4) +// - do some processing in cuda +// - cuvidUnmapVideoFrame(N-4) +// - cuvidDecodePicture(N+1) +// - cuvidMapVideoFrame(N-3) culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +// ... +// - cuvidDestroyDecoder(...) +// +// NOTE: +// - In the current version, the cuda context MUST be created from a D3D device, using cuD3D9CtxCreate function. +// For multi-threaded operation, the D3D device must also be created with the D3DCREATE_MULTITHREADED flag. +// - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces) +// - cuVidDecodePicture may block the calling thread if there are too many pictures pending +// in the decode queue +// culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +//////////////////////////////////////////////////////////////////////////////////////////////// + +// Create/Destroy the decoder object +extern CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci); +extern CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder); + +// Decode a single picture (field or frame) +extern CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams); + +#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL) culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz +// Post-process and map a video frame for use in cuda +extern CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, + unsigned int *pDevPtr, unsigned int *pPitch, + CUVIDPROCPARAMS *pVPP); +// Unmap a previously mapped video frame +extern CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr); +#endif + +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +extern CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); +extern CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); +#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL) +#define cuvidMapVideoFrame cuvidMapVideoFrame64 +#define cuvidUnmapVideoFrame cuvidUnmapVideoFrame64 +#endif +#endif + +// Get the pointer to the d3d9 surface that is the decode RT +extern CUresult CUDAAPI cuvidGetVideoFrameSurface(CUvideodecoder hDecoder, int nPicIdx, void **pSrcSurface); culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// Context-locking: to facilitate multi-threaded implementations, the following 4 functions +// provide a simple mutex-style host synchronization. If a non-NULL context is specified +// in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given +// context before making any cuda calls. +// A multi-threaded application could create a lock associated with a context handle so that +// multiple threads can safely share the same cuda context: +// - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +// that can be passed to cuvidCtxLockCreate. +// - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section. +// +// NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video +// decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls). + +extern CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx); +extern CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck); +extern CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags); +extern CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags); culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + +//////////////////////////////////////////////////////////////////////////////////////////////// + +#if defined(__cplusplus) +} + +// Auto-lock helper for C++ applications +class CCtxAutoLock +{ +private: culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + CUvideoctxlock m_ctx; +public: + CCtxAutoLock(CUvideoctxlock ctx):m_ctx(ctx) { cuvidCtxLock(m_ctx,0); } + ~CCtxAutoLock() { cuvidCtxUnlock(m_ctx,0); } +}; + +#endif /* __cplusplus */ + +#endif // __CUDA_VIDEO_H__ diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/nvcuvid.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/nvcuvid.h culaunchkernel.ptsz How to get it? culaunchkernel.ptsz new file mode 100644 index 0000000..0b81ee4 --- /dev/null +++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/Cuda/nvcuvid.h @@ -0,0 +1,228 @@ +/* + * Copyright 1993-2008 NVIDIA Corporation. All rights reserved. + * + * NOTICE TO USER: + * culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + * This source code is subject to NVIDIA ownership rights under U.S. and + * international Copyright laws. Users and possessors of this source code + * are hereby granted a nonexclusive, royalty-free license to use this code + * in individual and commercial software. + * + * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE + * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR + * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. culaunchkernel.ptsz PasteShr culaunchkernel.ptsz + * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE + * OR PERFORMANCE OF THIS SOURCE CODE. + * + * U.S. Government End Users. This source code is a "commercial item" as + * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of + * "commercial computer software" and "commercial computer software + * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + * and is provided to the U.S. Government only as a commercial end item. + * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through + * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the + * source code with only those rights set forth herein. + * + * Any use of this source code in individual and commercial software must + * include, in the user documentation and internal comments to the code, + * the above Disclaimer and U.S. Government End Users Notice. + */ + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +#if !defined(__NVCUVID_H__) +#define __NVCUVID_H__ + +#include "cuviddec.h" + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +//////////////////////////////////////////////////////////////////////////////////////////////// culaunchkernel.ptsz PasteShr culaunchkernel.ptsz +// +// High-level helper APIs for video sources +// + +typedef void *CUvideosource; +typedef void *CUvideoparser; +typedef long long CUvideotimestamp; + +//////////////////////////////////////////////////////////////////////////////////////////////// +// culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +// video data structures +// + +// Video Source State +typedef enum { + cudaVideoState_Error = -1, // Error state (invalid source) + cudaVideoState_Stopped = 0, // Source is stopped (or reached end-of-stream) + cudaVideoState_Started = 1, // Source is running and delivering data +} cudaVideoState; + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +// Audio compression +typedef enum { + cudaAudioCodec_MPEG1=0, // MPEG-1 Audio + cudaAudioCodec_MPEG2, // MPEG-2 Audio + cudaAudioCodec_MP3, // MPEG-1 Layer III Audio + cudaAudioCodec_AC3, // Dolby Digital (AC3) Audio + cudaAudioCodec_LPCM, // PCM Audio +} cudaAudioCodec; + + culaunchkernel.ptsz How to get it? culaunchkernel.ptsz +// Video format +typedef struct +{ + cudaVideoCodec codec; // Compression format + struct { + unsigned int numerator; // frame rate numerator (0 = unspecified or variable frame rate) + unsigned int denominator; // frame rate denominator (0 = unspecified or variable frame rate) + } frame_rate; // frame rate = numerator / denominator (for example: 30000/1001) + int progressive_sequence; // 0=interlaced, 1=progressive + unsigned int coded_width; // coded frame width culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + unsigned int coded_height; // coded frame height + struct { // area of the frame that should be displayed + int left; // typical example: + int top; // coded_width = 1920, coded_height = 1088 + int right; // display_area = { 0,0,1920,1080 } + int bottom; + } display_area; + cudaVideoChromaFormat chroma_format; // Chroma format + unsigned int bitrate; // video bitrate (bps, 0=unknown) + struct { // Display Aspect Ratio = x:y (4:3, 16:9, etc) culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + int x; + int y; + } display_aspect_ratio; + struct { + unsigned char video_format; + unsigned char color_primaries; + unsigned char transfer_characteristics; + unsigned char matrix_coefficients; + } video_signal_description; + unsigned int seqhdr_data_length; // Additional bytes following (CUVIDEOFORMATEX) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +} CUVIDEOFORMAT; + +// Video format including raw sequence header information +typedef struct +{ + CUVIDEOFORMAT format; + unsigned char raw_seqhdr_data[1024]; +} CUVIDEOFORMATEX; + + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +// Audio Format +typedef struct +{ + cudaAudioCodec codec; // Compression format + unsigned int channels; // number of audio channels + unsigned int samplespersec; // sampling frequency + unsigned int bitrate; // For uncompressed, can also be used to determine bits per sample + unsigned int reserved1; + unsigned int reserved2; +} CUAUDIOFORMAT; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + + + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// video source +// + +// Data packet +typedef enum { culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CUVID_PKT_ENDOFSTREAM = 0x01, // Set when this is the last packet for this stream + CUVID_PKT_TIMESTAMP = 0x02, // Timestamp is valid + CUVID_PKT_DISCONTINUITY = 0x04, // Set when a discontinuity has to be signalled +} CUvideopacketflags; + +typedef struct _CUVIDSOURCEDATAPACKET +{ + unsigned long flags; // Combination of CUVID_PKT_XXX flags + unsigned long payload_size; // number of bytes in the payload (may be zero if EOS flag is set) + const unsigned char *payload; // Pointer to packet payload data (may be NULL if EOS flag is set) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + CUvideotimestamp timestamp; // Presentation timestamp (10MHz clock), only valid if CUVID_PKT_TIMESTAMP flag is set +} CUVIDSOURCEDATAPACKET; + +// Callback for packet delivery +typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *); + +typedef struct _CUVIDSOURCEPARAMS +{ + unsigned int ulClockRate; // Timestamp units in Hz (0=default=10000000Hz) + unsigned int uReserved1[7]; // Reserved for future use - set to zero culaunchkernel.ptsz How to use it? culaunchkernel.ptsz + void *pUserData; // Parameter passed in to the data handlers + PFNVIDSOURCECALLBACK pfnVideoDataHandler; // Called to deliver audio packets + PFNVIDSOURCECALLBACK pfnAudioDataHandler; // Called to deliver video packets + void *pvReserved2[8]; // Reserved for future use - set to NULL +} CUVIDSOURCEPARAMS; + +typedef enum { + CUVID_FMT_EXTFORMATINFO = 0x100, // Return extended format structure (CUVIDEOFORMATEX) +} CUvideosourceformat_flags; + culaunchkernel.ptsz How to use it? culaunchkernel.ptsz +#if !defined(__APPLE__) +// Video file source +CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams); +CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams); +CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj); +CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state); +cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj); +CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags); +CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags); +#endif culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// Video parser +// + +typedef struct _CUVIDPARSERDISPINFO +{ + int picture_index; + int progressive_frame; culaunchkernel.ptsz How to get it? culaunchkernel.ptsz + int top_field_first; + int repeat_first_field; // Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, -1=unpaired field) + CUvideotimestamp timestamp; +} CUVIDPARSERDISPINFO; + +// +// Parser callbacks +// The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to +// be decoded and/or displayed. +// culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *); +typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *); +typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *); + +typedef struct _CUVIDPARSERPARAMS +{ + cudaVideoCodec CodecType; // cudaVideoCodec_XXX + unsigned int ulMaxNumDecodeSurfaces; // Max # of decode surfaces (parser will cycle through these) + unsigned int ulClockRate; // Timestamp units in Hz (0=default=10000000Hz) + unsigned int ulErrorThreshold; // % Error threshold (0-100) for calling pfnDecodePicture (100=always call pfnDecodePicture even if picture bitstream is fully corrupted) culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz + unsigned int ulMaxDisplayDelay; // Max display queue delay (improves pipelining of decode with display) - 0=no delay (recommended values: 2..4) + unsigned int uReserved1[5]; // Reserved for future use - set to 0 + void *pUserData; // User data for callbacks + PFNVIDSEQUENCECALLBACK pfnSequenceCallback; // Called before decoding frames and/or whenever there is a format change + PFNVIDDECODECALLBACK pfnDecodePicture; // Called when a picture is ready to be decoded (decode order) + PFNVIDDISPLAYCALLBACK pfnDisplayPicture; // Called whenever a picture is ready to be displayed (display order) + void *pvReserved2[7]; // Reserved for future use - set to NULL + CUVIDEOFORMATEX *pExtVideoInfo; // [Optional] sequence header data from system layer +} CUVIDPARSERPARAMS; + culaunchkernel.ptsz How to get it for free? culaunchkernel.ptsz + +CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams); +CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket); +CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj); + + +//////////////////////////////////////////////////////////////////////////////////////////////// + +#if defined(__cplusplus) +} culaunchkernel.ptsz How to dowload it? culaunchkernel.ptsz +#endif /* __cplusplus */ + +#endif // __NVCUVID_H__ culaunchkernel.ptsz