// 
// Notice Regarding Standards.  AMD does not provide a license or sublicense to
// any Intellectual Property Rights relating to any standards, including but not
// limited to any audio and/or video codec technologies such as MPEG-2, MPEG-4;
// AVC/H.264; HEVC/H.265; AAC decode/FFMPEG; AAC encode/FFMPEG; VC-1; and MP3
// (collectively, the "Media Technologies"). For clarity, you will pay any
// royalties due for such third party technologies, which may include the Media
// Technologies that are owed as a result of AMD providing the Software to you.
// 
// MIT license 
// 
// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//

#include <iostream>
#include <limits.h>
#include <locale>

#include "OCLApplication.h"
#ifdef _WIN32
    #define _CRT_NONSTDC_NO_DEPRECATE
    #define _CRT_SECURE_NO_WARNINGS
    #pragma warning(disable: 4996)
#elif __linux__
    #include <sys/mman.h>
    #include <stdio.h>
    #include <string.h>
    #include <GL/glx.h>
#endif

#include <CL/cl_gl.h>





OCLApplication::OCLApplication()
{
    // file information
    m_clSsgFile = NULL;

    // OpenCL information
    m_clContext = NULL;
    m_clPlatformId = NULL;

    // OpenCL function pointers
    clCreateSsgFileObjectAMD = nullptr;
    clGetSsgFileObjectInfoAMD = nullptr;
    clRetainSsgFileObjectAMD = nullptr;
    clReleaseSsgFileObjectAMD = nullptr;
    clEnqueueReadSsgFileAMD = nullptr;
    clEnqueueWriteSsgFileAMD = nullptr;

#ifdef DX11_APP
    clCreateFromD3D11BufferKHR = nullptr;
    clEnqueueAcquireD3D11ObjectsKHR = nullptr;
    clEnqueueReleaseD3D11ObjectsKHR = nullptr;
#elif OGL_APP
    m_IsInteropUsed = true;
#endif
}

OCLApplication::~OCLApplication()
{
    Term();
}


//
//
// Initialize
//
//

bool OCLApplication::FindPlatformID()
{
    // check to see how many platform IDs we have...
    cl_uint numPlatforms = 0;
    cl_int  clStatus     = clGetPlatformIDs(0, NULL, &numPlatforms);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Error: Unable get any platform IDs - clGetPlatformIDs() failed." << std::endl;
        return false;
    }
    if (numPlatforms == 0)
    {
        std::cout << "Error: clGetPlatformIDs() returned 0 platforms." << std::endl;
        return false;
    }

    // get the platform IDs
    std::vector<cl_platform_id> platforms;
    platforms.resize(numPlatforms);
    clStatus = clGetPlatformIDs(numPlatforms, platforms.data(), NULL);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Error: Unable get any platform IDs - clGetPlatformIDs() failed." << std::endl;
        return false;
    }

    // look for the AMD info...
    for (cl_uint i = 0; i < numPlatforms; ++i)
    {
        char pbuf[1000];
        clStatus = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
        if (clStatus != CL_SUCCESS)
        {
            std::cout << "Error: clGetPlatformInfoclGetPlatformInfo() failed." << std::endl;
            continue;
        }

        if (!strcmp(pbuf, "Advanced Micro Devices, Inc."))
        {
            m_clPlatformId = platforms[i];
            return true;
        }
    }

    return false;
}

bool  OCLApplication::InitExtensions()
{
#ifdef DX11_APP
    clCreateFromD3D11BufferKHR = (clCreateFromD3D11BufferKHR_fn) clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clCreateFromD3D11BufferKHR");
    clEnqueueAcquireD3D11ObjectsKHR = (clEnqueueAcquireD3D11ObjectsKHR_fn) clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clEnqueueAcquireD3D11ObjectsKHR");
    clEnqueueReleaseD3D11ObjectsKHR = (clEnqueueReleaseD3D11ObjectsKHR_fn) clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clEnqueueReleaseD3D11ObjectsKHR");

    return (clCreateFromD3D11BufferKHR != nullptr) &&
           (clEnqueueAcquireD3D11ObjectsKHR != nullptr) &&
           (clEnqueueReleaseD3D11ObjectsKHR != nullptr);
#elif OGL_APP
    return true;
#endif
}

bool  OCLApplication::InitSsgExtensions()
{
    clCreateSsgFileObjectAMD = (clCreateSsgFileObjectAMD_fn)clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clCreateSsgFileObjectAMD");
    clGetSsgFileObjectInfoAMD = (clGetSsgFileObjectInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clGetSsgFileObjectInfoAMD");
    clRetainSsgFileObjectAMD = (clRetainSsgFileObjectAMD_fn)clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clRetainSsgFileObjectAMD");
    clReleaseSsgFileObjectAMD = (clReleaseSsgFileObjectAMD_fn)clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clReleaseSsgFileObjectAMD");
    clEnqueueReadSsgFileAMD = (clEnqueueReadSsgFileAMD_fn)clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clEnqueueReadSsgFileAMD");
    clEnqueueWriteSsgFileAMD = (clEnqueueWriteSsgFileAMD_fn)clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clEnqueueWriteSsgFileAMD");

    return (clCreateSsgFileObjectAMD != nullptr) &&
           (clGetSsgFileObjectInfoAMD != nullptr) &&
           (clRetainSsgFileObjectAMD != nullptr) &&
           (clReleaseSsgFileObjectAMD != nullptr) &&
           (clEnqueueReadSsgFileAMD != nullptr) &&
           (clEnqueueWriteSsgFileAMD != nullptr);
}

bool  OCLApplication::InitCommandQueue()
{
    // add devices if needed
    cl_device_id  clInteroppedDeviceID = NULL;
    std::vector<cl_context_properties> cps;

#ifdef DX11_APP
    if (m_Device)
    {
        clGetDeviceIDsFromD3D11KHR_fn pClGetDeviceIDsFromD3D11KHR = static_cast<clGetDeviceIDsFromD3D11KHR_fn>(clGetExtensionFunctionAddressForPlatform(m_clPlatformId, "clGetDeviceIDsFromD3D11KHR"));
        if (pClGetDeviceIDsFromD3D11KHR == NULL)
        {
            std::cout << "Error: Cannot resolve ClGetDeviceIDsFromD3D11KHR function." << std::endl;
            return false;
        }

        ID3D11Device* pD3DDevice11 = m_Device;
        cl_int clStatus = (*pClGetDeviceIDsFromD3D11KHR)(m_clPlatformId, CL_D3D11_DEVICE_KHR, pD3DDevice11, CL_PREFERRED_DEVICES_FOR_D3D11_KHR, 1, &clInteroppedDeviceID, NULL);
        if (clStatus != CL_SUCCESS)
        {
            std::cout << "Error: pClGetDeviceIDsFromD3D11KHR() failed." << std::endl;
            return false;
        }

        cps.push_back(CL_CONTEXT_D3D11_DEVICE_KHR);
        cps.push_back((cl_context_properties) pD3DDevice11);
    }
#elif OGL_APP
    cps.push_back(CL_GL_CONTEXT_KHR);
  #ifdef _WIN32
    cps.push_back((cl_context_properties) wglGetCurrentContext());
  #elif __linux__
    cps.push_back((cl_context_properties) glXGetCurrentContext());
  #endif

  #ifdef _WIN32
    cps.push_back(CL_WGL_HDC_KHR);
    cps.push_back((cl_context_properties) wglGetCurrentDC());
  #elif __linux__
    cps.push_back(CL_GLX_DISPLAY_KHR);
    cps.push_back((cl_context_properties) glXGetCurrentDisplay());
  #endif
#endif

    if (clInteroppedDeviceID == NULL)
    {
        cl_int clStatus = clGetDeviceIDs(m_clPlatformId, CL_DEVICE_TYPE_GPU, 1, &clInteroppedDeviceID, NULL);
        if (clStatus != CL_SUCCESS)
        {
            std::cout << "Error: clGetDeviceIDs() failed." << std::endl;
            return false;
        }
    }

    cps.push_back(CL_CONTEXT_INTEROP_USER_SYNC);
    cps.push_back(CL_TRUE);

    cps.push_back(CL_CONTEXT_PLATFORM);
    cps.push_back((cl_context_properties) m_clPlatformId);
    cps.push_back(0);

    cl_int clStatus = CL_SUCCESS;
    m_clContext = clCreateContext(&cps[0], 1, &clInteroppedDeviceID, NULL, NULL, &clStatus);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Error: clCreateContext() failed." << std::endl;
        return false;
    }

    for (int i = 0; i < m_NumBuffers; i++)
    {
        m_clCommandQueueA[i] = clCreateCommandQueue(m_clContext, clInteroppedDeviceID, (cl_command_queue_properties)NULL, &clStatus);
        if (clStatus != CL_SUCCESS)
        {
            std::cout << "Error: clCreateCommandQueue() failed." << std::endl;
            return false;
        }

        m_clCommandQueueB[i] = clCreateCommandQueue(m_clContext, clInteroppedDeviceID, (cl_command_queue_properties)NULL, &clStatus);
        if (clStatus != CL_SUCCESS)
        {
            std::cout << "Error: clCreateCommandQueue() failed." << std::endl;
            return false;
        }
    }

    return true;
}


bool  OCLApplication::InitBuffers(long long dataBufferSize)
{
    // check the passed in buffer size is valid
    // obviously a negative number wouldn't work
    if (dataBufferSize <= 0)
    {
        std::cout << "Error: Invalid buffer size!" << std::endl;
        return false;
    }
    
    for (int i = 0; i < m_NumBuffers; i++)
    {
        cl_int clStatus = CL_SUCCESS;
        m_clBuffers[i] = clCreateBuffer(m_clContext, CL_MEM_USE_PERSISTENT_MEM_AMD, dataBufferSize, nullptr, &clStatus);
        if (clStatus != CL_SUCCESS)
        {
            std::cout << "Error: Unable to create buffer #" << i << " with CL_MEM_USE_PERSISTENT_MEM_AMD" << std::endl;
            return false;
        }

#ifdef __linux__
        // buffers get allocated on first use so under Linux we have to figure out 
        // if they did indeed get allocated of the required GMA address, otherwise
        // the allocation could fail (in Windows it will automatically allocate in
        // a different region)
        int testRead = 0;
        clStatus = clEnqueueReadBuffer(m_clCommandQueueA[0], m_clBuffers[i], CL_TRUE, 0, sizeof(testRead), &testRead, 0, nullptr, nullptr);
        if (clStatus == CL_INVALID_MEM_OBJECT || clStatus == CL_MEM_OBJECT_ALLOCATION_FAILURE || clStatus == CL_OUT_OF_HOST_MEMORY)
        {
            std::cout << "Error: Buffer " << i << " allocation failed with CL_MEM_USE_PERSISTENT_MEM_AMD" << std::endl;
            return false;
        }
        else if (clStatus != CL_SUCCESS)
        {
            std::cout << "Error: Buffer " << i << " failed to read" << std::endl;
            return false;
        }
#endif

#ifdef DX11_APP
        if (m_DataBuffer[i])
        {
            m_iopBuffers[i] = clCreateFromD3D11BufferKHR(m_clContext, CL_MEM_READ_WRITE, m_DataBuffer[i], &clStatus);
            if (clStatus != CL_SUCCESS)
            {
                std::cout << "Warning: clCreateFromD3D11Texture2DKHR failed" << clStatus << std::endl;
                return false;
            }
        }
#elif OGL_APP
        if (m_Textures[i])
        {
            m_iopBuffers[i] = clCreateFromGLBuffer(m_clContext, CL_MEM_READ_WRITE, m_Buffers[i], &clStatus);
            if (clStatus != CL_SUCCESS)
            {
                std::cout << "Warning: clCreateFromGLBuffer failed " << clStatus << std::endl;
                return false;
            }
        }
#endif
    }

    return true;
}

bool OCLApplication::Init(IApplication * pApplication, int argc, char ** argv)
{
    m_Application = pApplication;
    m_Settings = pApplication->GetSettings();

    //////////////////////////
    // figure out the platform we're running on
    if (!FindPlatformID())
    {
        std::cout << "OpenCL: Unable to obtain platform IDs!" << std::endl;
        return false;
    }

    //////////////////////////
    // initialize DX
   if (!TAppBase::Init(pApplication, argc, argv))
    {
        std::cout << "OpenCL: Unable to initialize base!" << std::endl;
        return false;
    }

    // reserve enough space in the buffers that depend
    // on how many of them we have
    m_iopBuffers.resize(m_NumBuffers, NULL);
    m_clBuffers.resize(m_NumBuffers, NULL);
    m_clCommandQueueA.resize(m_NumBuffers, NULL);
    m_clCommandQueueB.resize(m_NumBuffers, NULL);
    m_clEventA.resize(m_NumBuffers, NULL);
    m_clEventB.resize(m_NumBuffers, NULL);

    //////////////////////////
    // initailize the command queue
    if (!InitCommandQueue())
    {
        std::cout << "OpenCL: Unable to initialize the command queue!" << std::endl;
        return false;
    }

    //////////////////////////
    // check for a number of functions we need
    if (!InitExtensions())
    {
        std::cout << "OpenCL: Unable to intialize extension!" << std::endl;
        return false;
    }

    //////////////////////////
    // check for SSG extension support
    if (!InitSsgExtensions())
    {
        std::cout << "OpenCL: Unable to intialize SSG extension!" << std::endl;
        return false;
    }

    //////////////////////////
    // allocate buffers
    const long long dataSize = m_Settings->FrameSize();
    if (dataSize > INT_MAX)
    {
        std::cout << "OpenCL: Buffer too large for allocation!" << std::endl;
        return false;
    }

    if (!InitBuffers(dataSize))
    {
        std::cout << "OpenCL: Unable to initialize all required buffers!" << std::endl;
        return false;
    }

	return true;
}

void OCLApplication::Term()
{
    // delete buffers
    for (int i = 0; i < m_clBuffers.size(); i++)
    {
        if (m_clBuffers[i]) 
        {
            clReleaseMemObject(m_clBuffers[i]);
            m_clBuffers[i] = NULL;
        }
    }
    m_clBuffers.clear();

    for (int i = 0; i < m_iopBuffers.size(); i++)
    {
        if (m_iopBuffers[i]) 
        {
            clReleaseMemObject(m_iopBuffers[i]);
            m_iopBuffers[i] = NULL;
        }
    }
    m_iopBuffers.clear();

    // delete command queues
    for (int i = 0; i < m_clCommandQueueA.size(); i++)
    {
        if (m_clCommandQueueA[i])
        {
            clReleaseCommandQueue(m_clCommandQueueA[i]);
            m_clCommandQueueA[i] = NULL;
        }
    }
    m_clCommandQueueA.clear();

    for (int i = 0; i < m_clCommandQueueB.size(); i++)
    {
        if (m_clCommandQueueB[i])
        {
            clReleaseCommandQueue(m_clCommandQueueB[i]);
            m_clCommandQueueB[i] = NULL;
        }
    }
    m_clCommandQueueB.clear();

    // release the context
    if (m_clContext)
    {
        clReleaseContext(m_clContext);
        m_clContext = NULL;
    }

    // call the base class to complete it's
    // processing too...
    TAppBase::Term();
}

void OCLApplication::Stop()
{
    // pause so we no longer read any frames...
    // PostQuitMessage is a "post" (in the base class) 
    // so it will return immediately, which means that 
    // it is possible we can still have frame processing
    // before the Quit occurs, which is the reason for 
    // the pause at this point
    Pause(true);

	// finish all the pending commands
	for (int i = 0; i < m_clCommandQueueA.size(); i++)
	{
		if (m_clCommandQueueA[i])
		{
			clFinish(m_clCommandQueueA[i]);
		}
	}

	for (int i = 0; i < m_clCommandQueueB.size(); i++)
	{
		if (m_clCommandQueueB[i])
		{
			clFinish(m_clCommandQueueB[i]);
		}
	}

    // call the base class to complete it's
    // processing too...
    TAppBase::Stop();
}


//
//
// Handle a frame
//
//
void OCLApplication::DrawFrame()
{
    // acquire
#ifdef DX11_APP
    cl_int clStatus = clEnqueueAcquireD3D11ObjectsKHR(m_clCommandQueueA[m_waitBuffer], 1, &m_iopBuffers[m_waitBuffer], 0, NULL, NULL);
#elif OGL_APP
    cl_int clStatus = clEnqueueAcquireGLObjects(m_clCommandQueueA[m_waitBuffer], 1, &m_iopBuffers[m_waitBuffer], 0, NULL, NULL);
#endif
    if (clStatus != CL_SUCCESS)
    {
#ifdef DX11_APP
        std::cout << "Warning: clEnqueueAcquireD3D11ObjectsKHR failed" << clStatus << std::endl;
#elif OGL_APP
        std::cout << "Warning: clEnqueueAcquireGLObjects failed" << clStatus << std::endl;
#endif
        return;
    }


    // set-up the event wait list
    cl_event  waitList[2] = { NULL, NULL };
    cl_uint   waitCount   = 0;
    if (m_clEventA[m_waitBuffer] != NULL)
        waitList[waitCount++] = m_clEventA[m_waitBuffer];
    if (m_clEventB[m_waitBuffer] != NULL)
        waitList[waitCount++] = m_clEventB[m_waitBuffer];

    // the image was read into m_clBuffers[0] but DX will be using
    // information from m_clBuffer to display
    // NOTE: m_clBuffers[0] and m_clBuffer are of different types 
    //       which is why the copy is needed
    clStatus = clEnqueueCopyBuffer(m_clCommandQueueA[m_waitBuffer], m_clBuffers[m_waitBuffer], m_iopBuffers[m_waitBuffer], 0, 0, m_Settings->FrameSize(), 
                                   waitCount, (waitCount > 0) ? waitList : NULL, NULL);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Warning: clEnqueueCopyBuffer failed" << std::endl;
        return;
    }

    // if we had to wait on any events, time to clean up...
    if (m_clEventA[m_waitBuffer] != NULL)
    {
        clReleaseEvent(m_clEventA[m_waitBuffer]);
        m_clEventA[m_waitBuffer] = NULL;
    }
    if (m_clEventB[m_waitBuffer] != NULL)
    {
        clReleaseEvent(m_clEventB[m_waitBuffer]);
        m_clEventB[m_waitBuffer] = NULL;
    }


    // release
    cl_event clWaitEvent = 0;
#ifdef DX11_APP
    clStatus = clEnqueueReleaseD3D11ObjectsKHR(m_clCommandQueueA[m_waitBuffer], 1, &m_iopBuffers[m_waitBuffer], 0, NULL, &clWaitEvent);
#elif OGL_APP
    clStatus = clEnqueueReleaseGLObjects(m_clCommandQueueA[m_waitBuffer], 1, &m_iopBuffers[m_waitBuffer], 0, NULL, &clWaitEvent);
#endif
    if (clStatus != CL_SUCCESS)
    {
#ifdef DX11_APP
        std::cout << "Warning: clEnqueueReleaseD3D11ObjectsKHR failed" << clStatus << std::endl;
#elif OGL_APP
        std::cout << "Warning: clEnqueueReleaseGLObjects failed" << clStatus << std::endl;
#endif
        return;
    }

    // wait to finish
    clStatus = clWaitForEvents(1, &clWaitEvent);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Warning: clWaitForEvents failed" << std::endl;
        return;
    }

    clStatus = clReleaseEvent(clWaitEvent);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Warning: clReleaseEvent failed" << std::endl;
        return;
    }

    // get the base class to do the drawing...
    TAppBase::DrawFrame();
}




//
//
// P2P file access through OpenCL extension
//
//

bool OCLApplication::P2PIsExtensionActive()
{
    return (clCreateSsgFileObjectAMD != nullptr) &&
           (clGetSsgFileObjectInfoAMD != nullptr) &&
           (clRetainSsgFileObjectAMD != nullptr) &&
           (clReleaseSsgFileObjectAMD != nullptr) &&
           (clEnqueueReadSsgFileAMD != nullptr) &&
           (clEnqueueWriteSsgFileAMD != nullptr);
}

bool OCLApplication::P2POpenFile(const char * filename)
{
    // if extension is not active, P2P is not available
    if (!P2PIsExtensionActive())
        return false;

    // if the context doesn't exist, can't call
    // OpenCL functions...
    if (!m_clContext)
        return false;

    // convert to widechar...
    const size_t len = mbstowcs(NULL, filename, 0) + 1;
    if (len <= 0)
        return false;
        
    std::wstring  wFileName(len, L'\0');
    if (mbstowcs((wchar_t*) wFileName.data(), filename, wFileName.size()) <= 0)
        return false;

    // open the file
    cl_int clStatus = CL_SUCCESS;
    m_clSsgFile = clCreateSsgFileObjectAMD(m_clContext, CL_FILE_READ_ONLY_AMD, wFileName.c_str(), &clStatus);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Error: Unable to open file - clCreateSsgFileObjectAMD() failed." << std::endl;
        return false;
    }
    if (m_clSsgFile == NULL)
    {
        std::cout << "Error: File handle is NULL - clCreateSsgFileObjectAMD() failed." << std::endl;
        return false;
    }


    // get the file size information
    size_t  retSize = 0;
    clStatus = clGetSsgFileObjectInfoAMD(m_clSsgFile, CL_FILE_SIZE_AMD, sizeof(m_SsgFileSize), &m_SsgFileSize, &retSize);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Error: Unable to retrieve file size - clGetSsgFileObjectInfoAMD() failed." << std::endl;
        return false;
    }
    if (retSize != sizeof(m_SsgFileSize))
    {
        std::cout << "Error: invalid file size information returned - clGetSsgFileObjectInfoAMD() failed." << std::endl;
        return false;
    }


    // get the sector size
    retSize = 0;
    clStatus = clGetSsgFileObjectInfoAMD(m_clSsgFile, CL_FILE_BLOCK_SIZE_AMD, sizeof(m_sectorSize), &m_sectorSize, &retSize);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Error: Unable to retrieve file sector size - clGetSsgFileObjectInfoAMD() failed." << std::endl;
        return false;
    }
    if (retSize != sizeof(m_sectorSize))
    {
        std::cout << "Error: invalid file sector size information returned - clGetSsgFileObjectInfoAMD() failed." << std::endl;
        return false;
    }


    return true;
}

unsigned long long OCLApplication::P2PGetFileSize()
{
    // if extension is not active, P2P is not available
    if (!P2PIsExtensionActive())
        return 0;

#ifdef DX11_APP
    return m_SsgFileSize.QuadPart;
#elif OGL_APP
    return m_SsgFileSize;
#endif
}

unsigned long long OCLApplication::P2PGetSectorSize()
{
    // if extension is not active, P2P is not available
    if (!P2PIsExtensionActive())
        return 0;

    return m_sectorSize;
}

bool OCLApplication::P2PReadFile(unsigned long long fileOffset, unsigned long long size)
{
    // if extension is not active, P2P is not available
    if (!P2PIsExtensionActive())
        return false;


    const unsigned int  nrOfRegions = m_Settings->NumThreads();
    if (nrOfRegions == 0)
    {
        cl_int clStatus = clEnqueueReadSsgFileAMD(m_clCommandQueueA[m_readBuffer], m_clBuffers[m_readBuffer], CL_TRUE, 0, size, m_clSsgFile, fileOffset, 0, NULL, NULL);
        if (clStatus != CL_SUCCESS)
        {
            std::cout << "Error: Unable to do the data transfer - clEnqueueReadSsgFileAMD() failed." << std::endl;
            return false;
        }

        clFinish(m_clCommandQueueA[m_readBuffer]);
    }
    else
    {
        const unsigned long long  regionSize  = ((size + m_sectorSize - 1) / m_sectorSize / nrOfRegions) * m_sectorSize;
        for (unsigned int i = 0; i < nrOfRegions; i++)
        {
            const unsigned long long srcOffset  = fileOffset + i * regionSize;
            const unsigned long long destOffset = i * regionSize;
            const unsigned long long length     = (i < nrOfRegions - 1) ? regionSize
                                                                        : size - i * regionSize;

            if (length > 0)
            {
                cl_command_queue  cl_cmdQueue = NULL;
                if (i % 2 == 0)
                    cl_cmdQueue = m_clCommandQueueB[m_readBuffer];
                if (i % 2 == 1)
                    cl_cmdQueue = m_clCommandQueueA[m_readBuffer];

                cl_int clStatus = clEnqueueReadSsgFileAMD(cl_cmdQueue, m_clBuffers[m_readBuffer], CL_FALSE, destOffset, length, m_clSsgFile, srcOffset, 
                                                          0, NULL, (i % 2 == 0) ? &m_clEventA[m_readBuffer] : &m_clEventB[m_readBuffer]);
                if (clStatus != CL_SUCCESS)
                {
                    std::cout << "Error: Unable to do the data transfer - clEnqueueReadSsgFileAMD() failed." << std::endl;
                    return false;
                }
            }
        }
   }

    m_readBuffer = (m_readBuffer + 1) % m_NumBuffers;

    // update the first frame(s) flag - if we sent requests
    // for all frames, now we can start waiting on them to 
    // finish to start displaying them...
    //   m_NumBuffers == 1    -->  m_readBuffer = 0
    //   m_NumBuffers == 2    -->  m_readBuffer = 0
    //   m_NumBuffers == 3    -->  m_readBuffer = 2
    //   m_NumBuffers == 4    -->  m_readBuffer = 3
    // it seems that for double buffering, it's better
    // to submit read(0) and read(1) first and 
    // then go with wait(0), present(0), read(0), even though 
    // present(0), read(0) should cause a bit of serialization 
    // as we can't start reading into the buffer that's in use 
    // till present is done with it
    if (m_IsFirstFrame && 
         (  ((m_NumBuffers != 2) && (m_readBuffer == m_NumBuffers - 1)) ||
            ((m_NumBuffers == 2) && (m_readBuffer == 0))  )  )
        m_IsFirstFrame = false;

    return true;
}

void OCLApplication::P2PCloseFile()
{
    // if extension is not active, P2P is not available
    if (!P2PIsExtensionActive())
        return;

    cl_int clStatus = clReleaseSsgFileObjectAMD(m_clSsgFile);
    if (clStatus != CL_SUCCESS)
    {
        std::cout << "Error: Unable to close file - clReleaseSsgFileObjectAMD() failed." << std::endl;
        return;
    }
}
