/*****************************************************************************\
*
*  Module Name    main.cpp
*  Project        Radeon pro render rendering tutorial
*
*  Description    How to render a scene using tiles. This doesn't require you to
                  allocate the full framebuffer.
*
*  Copyright 2011 - 2019 Advanced Micro Devices, Inc. (unpublished)
*
*  All rights reserved.  This notice is intended as a precaution against
*  inadvertent publication and does not imply publication or any waiver
*  of confidentiality.  The year included in the foregoing notice is the
*  year of creation of the work.
*
\*****************************************************************************/
#include "RadeonProRender.h"
#include "RprLoadStore.h"
#include "rprDeprecatedApi.h"
#include "Math/mathutils.h"

#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "../3rdParty/stbi/stbi.h"

#include <cassert>
#include <iostream>
#include <vector>
#include <string>

//-------------------Utilities-----------------------------
static void StudyErrorCode(rpr_int errorCode, rpr_context context__ = nullptr)
{
	if (errorCode != RPR_SUCCESS && context__)
	{
		rpr_int status = 0;
		size_t sizeParamA = 0;
		status = rprContextGetInfo(context__, RPR_CONTEXT_LAST_ERROR_MESSAGE, 0, 0, &sizeParamA);
		if (status == RPR_SUCCESS && sizeParamA >= 1)
		{
			char* paramData = new char[sizeParamA];
			status = rprContextGetInfo(context__, RPR_CONTEXT_LAST_ERROR_MESSAGE, sizeParamA, paramData, 0);
			if (status == RPR_SUCCESS)
			{
				printf("ErrorMessage = %s\n", paramData);
			}
			delete[] paramData; paramData = 0;
		}
	}
}

template <typename T> T clamp(T x, T a, T b)
{
	return x < a ? a : (x > b ? b : x);
}
#ifndef CHECK
#define CHECK        StudyErrorCode(status,context);  assert(status == RPR_SUCCESS);
#endif

#ifndef CHECK_C
#define CHECK_C(c)   StudyErrorCode(status,c);         assert(status == RPR_SUCCESS);
#endif

#ifndef min
#define min(a,b)            (((a) < (b)) ? (a) : (b))
#endif
//------------------------------------------------------------

//4k resolution
const int RenderTargetSizeX = 3840;
const int RenderTargetSizeY = 2160;

/*
Define a sensor size to match the render aspect ratio.
The width is based off the standard 35mm sensor size, but adjusted to match aspect ratio.
*/
constexpr float SensorY = 24.f;
constexpr float SensorX = SensorY * ((float)RenderTargetSizeX / (float)RenderTargetSizeY);
const int maxIterationRendering = 3;

/*
For very large render targets, it is beneficial to break down the framebuffer into smaller render regions (tiles)
*/
struct sFrameBufferMetadata
{
	int mRenderTargetSizeX, mRenderTargetSizeY;
	int mTileSizeX, mTileSizeY;
	std::vector<rpr_uchar> mData;
};
void rprextMultiTileRender(sFrameBufferMetadata& meta, rpr_scene scene, rpr_context context, rpr_uint maxIterationRendering)
{
	//for obvious reasons...
	assert(meta.mRenderTargetSizeX > meta.mTileSizeX);
	assert(meta.mRenderTargetSizeY > meta.mTileSizeY);

	meta.mData.resize(meta.mRenderTargetSizeX * meta.mRenderTargetSizeY * 3);

	rpr_int status = RPR_SUCCESS;

	float tilesXf = meta.mRenderTargetSizeX / float(meta.mTileSizeX);
	float tilesYf = meta.mRenderTargetSizeY / float(meta.mTileSizeY);

	int tilesX = (int)ceil(tilesXf);
	int tilesY = (int)ceil(tilesYf);

	printf("info:\n");
	printf("  Virtual resolution: %dx%d\n", meta.mRenderTargetSizeX, meta.mRenderTargetSizeY);
	printf("  Tile resolution:    %dx%d\n", meta.mTileSizeX, meta.mTileSizeY);
	printf("  Tiled resolution:   %fx%f\n", tilesXf, tilesYf);
	printf("  Tiled offset:       %f,%f\n", -tilesXf / 2.0f + .5f, -tilesYf / 2.0f + .5f);

	//allocate the frambuffer data
	rpr_framebuffer_desc desc;
	desc.fb_width = meta.mTileSizeX;
	desc.fb_height = meta.mTileSizeY;
	rpr_framebuffer_format fmt = { 4, RPR_COMPONENT_TYPE_FLOAT32 };

	rpr_framebuffer frame_buffer = NULL; status = rprContextCreateFrameBuffer(context, fmt, &desc, &frame_buffer); CHECK_C(context);
	rpr_framebuffer frame_bufferSolved = NULL; status = rprContextCreateFrameBuffer(context, fmt, &desc, &frame_bufferSolved); CHECK_C(context);
	status = rprContextSetAOV(context, RPR_AOV_COLOR, frame_buffer); CHECK_C(context);

	//Tempory allocation for framebuffer data needed for the stitching phase.
	size_t frame_buffer_dataSize = 0;
	status = rprFrameBufferGetInfo(frame_bufferSolved, RPR_FRAMEBUFFER_DATA, 0, NULL, &frame_buffer_dataSize); CHECK_C(context);
	if (frame_buffer_dataSize != meta.mTileSizeX * meta.mTileSizeY * 4 * sizeof(float))
	{
		assert(0);
	}
	float* frame_buffer_data = (float*)malloc(frame_buffer_dataSize);

	//we need to change the camera sensor size. It controls aspect ratios which in turn 
	//defines how rays are being cast. We are effectively tracing subsets of the sensor
	rpr_camera camera = nullptr;
	status = rprSceneGetCamera(scene, &camera); CHECK_C(context);
	status = rprCameraSetSensorSize(camera, SensorX / tilesXf, SensorY / tilesYf);   CHECK_C(context);

	//render each tile and blit onto the rtbacking
	float shiftY = -(tilesYf / 2.0f) + 0.5f;//shift the render plane by H/2
	float deltaX = 1;
	float deltaY = 1;
	for (int yTile = 0; yTile < tilesY; yTile++)
	{
		float shiftX = -(tilesXf / 2.0f) + 0.5f;//shift the render plane by W/2
		for (int xTile = 0; xTile < tilesX; xTile++)
		{
			//shift the camera viewport to account for the tile offset
			status = rprCameraSetLensShift(camera, shiftX, shiftY);  CHECK_C(context);

			status = rprFrameBufferClear(frame_buffer); CHECK_C(context);
			//render all iterations on the current tile
			for (rpr_uint i = 0; i < maxIterationRendering; ++i)
			{
				// force the framecount, so we ensure each tiles is using the same seed.
				status = rprContextSetParameter1u(context, "framecount", i); CHECK_C(context);
				status = rprContextRender(context); CHECK_C(context);
			}
			rprContextResolveFrameBuffer(context, frame_buffer, frame_bufferSolved, false);

			//read back
			status = rprFrameBufferGetInfo(frame_bufferSolved, RPR_FRAMEBUFFER_DATA, frame_buffer_dataSize, frame_buffer_data, NULL); CHECK_C(context);

			//stitch current rendered tile data to the FB
			int offsetInRenderTargetX = xTile*meta.mTileSizeX;
			int offsetInRenderTargetY = yTile*meta.mTileSizeY;

			for (unsigned j = 0; j < meta.mTileSizeY; ++j)
			{
				for (unsigned i = 0; i < meta.mTileSizeX; ++i)
				{
					int dstX = i + offsetInRenderTargetX;
					int dstY = j + offsetInRenderTargetY;

					if (dstX >= meta.mRenderTargetSizeX || dstY >= meta.mRenderTargetSizeY)
						continue;

					//Vertical flip source and dest
					int idx = dstX + (meta.mRenderTargetSizeY - dstY - 1) * meta.mRenderTargetSizeX;
					int sIdx = i + (meta.mTileSizeY - j - 1) * meta.mTileSizeX;

					//normalization (divide by number of samples)
					float invW = 1.f / frame_buffer_data[4 * sIdx + 3];

					meta.mData[3 * idx + 0] = clamp(int(frame_buffer_data[4 * sIdx] * 255.f), 0, 255);
					meta.mData[3 * idx + 1] = clamp(int(frame_buffer_data[4 * sIdx + 1] * 255.f), 0, 255);
					meta.mData[3 * idx + 2] = clamp(int(frame_buffer_data[4 * sIdx + 2] * 255.f), 0, 255);
				}
			}

			shiftX += deltaX;
		}
		shiftY += deltaY;
	}

	free(frame_buffer_data);
	if (frame_buffer) { status = rprObjectDelete(frame_buffer); frame_buffer = NULL; CHECK_C(context); }
	if (frame_bufferSolved) { status = rprObjectDelete(frame_bufferSolved); frame_bufferSolved = NULL; CHECK_C(context); }
}

//#define NO_TILE //<--- Uncomment if you want to render to a full FB
int main()
{
	//	enable Radeon ProRender API trace
	//	set this before any rpr API calls
	//	rprContextSetParameter1u(0,"tracing",1);

	std::cout << "Radeon ProRender SDK simple rendering tutorial.\n";
	// Indicates whether the last operation has suceeded or not
	rpr_int status = RPR_SUCCESS;
	// Create OpenCL context using a single GPU 
	rpr_context context = NULL;

	// Register Tahoe ray tracing plugin.
	rpr_int tahoePluginID = rprRegisterPlugin("Tahoe64.dll"); 
	assert(tahoePluginID != -1);
	rpr_int plugins[] = { tahoePluginID };
	size_t pluginCount = sizeof(plugins) / sizeof(plugins[0]);

	// Create context using a single GPU 
	CHECK( rprCreateContext(RPR_API_VERSION, plugins, pluginCount, RPR_CREATION_FLAGS_ENABLE_GPU0, NULL, NULL, &context) );

	// Set active plugin.
	CHECK(  rprContextSetActivePlugin(context, plugins[0]) );


	rpr_material_system matsys;
	CHECK( rprContextCreateMaterialSystem(context, 0, &matsys) );
	// Check if it is created successfully
	if (status != RPR_SUCCESS)
	{
		std::cout << "Context creation failed: check your OpenCL runtime and driver versions.\n";
		return -1;
	}

	std::cout << "Context successfully created.\n";


	rpr_scene scene = NULL;
	status = rprsImport("../../Resources/Meshes/JerryTheOgre.rprs", context, matsys, &scene, false); CHECK;


#ifdef NO_TILE
	{
		rpr_camera camera = nullptr;
		status = rprSceneGetCamera(scene, &camera); CHECK_C(context);
		status = rprCameraSetSensorSize(camera, SensorX, SensorY);   CHECK_C(context);

		rpr_framebuffer_desc desc;
		desc.fb_width = RenderTargetSizeX;
		desc.fb_height = RenderTargetSizeY;
		rpr_framebuffer_format fmt = { 4, RPR_COMPONENT_TYPE_FLOAT32 };

		rpr_framebuffer frame_buffer = NULL; status = rprContextCreateFrameBuffer(context, fmt, &desc, &frame_buffer); CHECK;
		rpr_framebuffer frame_bufferSolved = NULL; status = rprContextCreateFrameBuffer(context, fmt, &desc, &frame_bufferSolved); CHECK;
		status = rprContextSetAOV(context, RPR_AOV_COLOR, frame_buffer); CHECK;

		status = rprFrameBufferClear(frame_buffer); CHECK;
		for (rpr_uint i = 0; i < maxIterationRendering; ++i)
		{
			status = rprContextSetParameter1u(context, "framecount", i); CHECK; // force the framecount, so we ensure each tiles is using the same seed.
			status = rprContextRender(context); CHECK;
		}
		rprContextResolveFrameBuffer(context, frame_buffer, frame_bufferSolved, false);

		status = rprFrameBufferSaveToFile(frame_bufferSolved, "feature_multiTile_no_tile.png"); CHECK;

		if (frame_buffer) { status = rprObjectDelete(frame_buffer); frame_buffer = NULL; CHECK; }
		if (frame_bufferSolved) { status = rprObjectDelete(frame_bufferSolved); frame_bufferSolved = NULL; CHECK; }
	}
#else
	sFrameBufferMetadata meta;
	meta.mRenderTargetSizeX = RenderTargetSizeX;
	meta.mRenderTargetSizeY = RenderTargetSizeY;
	for (int i = 128; i <= 512; i *= 2)
	{
		meta.mTileSizeX = i;
		meta.mTileSizeY = i;
		rprextMultiTileRender(meta, scene, context, maxIterationRendering);
		if (!stbi_write_png((std::string("30_tiled_render") + std::to_string(i) + ".png").c_str(), RenderTargetSizeX, RenderTargetSizeY, 3, &meta.mData[0], RenderTargetSizeX * 3)) { assert(0); }
	}
#endif

	if (scene) { status = rprObjectDelete(scene); scene = NULL; CHECK; }
	if (matsys) { status = rprObjectDelete(matsys); matsys = NULL; CHECK; }
	if (context) { status = rprObjectDelete(context); context = NULL; CHECK; }
	return 0;
}