using System;
using System.Collections.Generic;
using Core.BRG;
using Unity.Collections;
using Unity.Collections.LowLevel.Unsafe;
using Unity.Burst;
using Unity.Mathematics;
using Unity.Jobs;
using UnityEngine;
using UnityEngine.Rendering;
/// 
/// BRG容器类,用于管理使用BatchRendererGroup的实例化渲染
/// 
public unsafe class BRGRenderBasic
{
    // 在GLES模式下,BRG原始缓冲区是一个常量缓冲区(UBO)
    private bool UseConstantBuffer => BatchRendererGroup.BufferTarget == BatchBufferTarget.ConstantBuffer;
 
    private int m_maxInstances; // 此容器中的最大项目数
    private int m_instanceCount; // 当前项目数量
    private int m_alignedGPUWindowSize; // BRG原始窗口大小
    private int m_maxInstancePerWindow; // 每个窗口的最大实例数
    private int m_windowCount; // 窗口数量(在SSBO模式下为1,在UBO模式下为n)
    private int m_totalGpuBufferSize; // 原始缓冲区的总大小
    private NativeArray m_transfromBuffer; // 原始缓冲区的系统内存副本
    public NativeArray m_sysmemColorBuffer;
    private bool m_initialized; // 是否已初始化
    private int m_instanceSize; // 项目大小(以字节为单位)
    private BatchID[] m_batchIDs; // 每个窗口对应一个batchID
    private BatchMaterialID m_materialID; // 材质ID
    private BatchMeshID m_meshID; // 网格ID
    private BatchRendererGroup m_BatchRendererGroup; // BRG对象
    private GraphicsBuffer m_GPUPersistentInstanceData; // GPU原始缓冲区(可能是SSBO或UBO)
    protected BRGSamples m_samples;
    /// 
    /// 创建BRG对象并分配缓冲区
    /// 
    /// 要渲染的网格
    /// 要使用的材质
    /// 最大实例数
    /// 每个实例的大小(以字节为单位)
    /// 是否投射阴影
    /// 初始化是否成功
    protected bool Init(BRGSamples samples, int maxInstances, int instanceSize)
    {
        // 创建BRG对象,指定我们的BRG回调函数
        m_BatchRendererGroup = new BatchRendererGroup(this.OnPerformCulling, IntPtr.Zero);
        instanceSize+=(3*2*16); // 额外添加obj2world和world2obj矩阵的大小
        m_instanceSize = instanceSize;
        m_instanceCount = 0;
        m_maxInstances = maxInstances;
        m_samples = samples;
        // BRG使用一个大的GPU缓冲区。这在几乎所有平台上都是一个原始缓冲区,在GLES上是一个常量缓冲区
        // 在常量缓冲区的情况下,我们将其分割成几个大小为BatchRendererGroup.GetConstantBufferMaxWindowSize()字节的"窗口"
        if (UseConstantBuffer)
        {
            // 获取常量缓冲区的最大窗口大小
            m_alignedGPUWindowSize = BatchRendererGroup.GetConstantBufferMaxWindowSize();
            // 计算每个窗口可以容纳的最大实例数
            m_maxInstancePerWindow = m_alignedGPUWindowSize / instanceSize;
            // 计算需要的窗口数量(向上取整)
            m_windowCount = (m_maxInstances + m_maxInstancePerWindow - 1) / m_maxInstancePerWindow;
            // 计算总的GPU缓冲区大小
            m_totalGpuBufferSize = m_windowCount * m_alignedGPUWindowSize;
            // 创建常量缓冲区(目标类型为Constant,大小为总字节数/16,每个元素16字节)
            m_GPUPersistentInstanceData =
                new GraphicsBuffer(GraphicsBuffer.Target.Constant, m_totalGpuBufferSize / 16, 16);
        }
        else
        {
            // 计算对齐后的GPU窗口大小,确保是16字节对齐 ((size + 15) & (-16) 是向上取整到16的倍数的位运算技巧)
            m_alignedGPUWindowSize = (m_maxInstances * instanceSize + 15) & (-16);
            // 在SSBO模式下,每个窗口可以容纳所有实例
            m_maxInstancePerWindow = maxInstances;
            // SSBO模式只需要一个窗口
            m_windowCount = 1;
            // 总的GPU缓冲区大小等于单个窗口大小
            m_totalGpuBufferSize = m_windowCount * m_alignedGPUWindowSize;
            // 创建原始缓冲区(目标类型为Raw,大小为总字节数/4,每个元素4字节)
            m_GPUPersistentInstanceData = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_totalGpuBufferSize / 4, 4);
        }
        // 在我们的示例游戏中,我们处理3个实例化属性:obj2world、world2obj和baseColor
        var batchMetadata = new NativeArray(2, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
        // 批处理元数据缓冲区
        int objectToWorldID = Shader.PropertyToID("unity_ObjectToWorld");
        int worldToObjectID = Shader.PropertyToID("unity_WorldToObject");
        // int colorID = Shader.PropertyToID("_BaseColor");
        // 创建大GPU原始缓冲区的系统内存副本
        m_transfromBuffer =
            new NativeArray(maxInstances * 2, Allocator.Persistent, NativeArrayOptions.ClearMemory);
        m_sysmemColorBuffer =
            new NativeArray(maxInstances, Allocator.Persistent, NativeArrayOptions.ClearMemory);
        // register one kind of batch per "window" in the large BRG raw buffer
        m_batchIDs = new BatchID[m_windowCount];
        for (int b = 0; b < m_windowCount; b++)
        {
            // 设置obj2world矩阵属性元数据,偏移量为0
            batchMetadata[0] = CreateMetadataValue(objectToWorldID, 0, true);
            // 设置world2obj矩阵属性元数据,偏移量为窗口内矩阵数据之后
            batchMetadata[1] = CreateMetadataValue(worldToObjectID, m_maxInstancePerWindow * 3 * 16, true);
            int startOffset = m_maxInstancePerWindow * 3 * 2 * 16;
            NativeArray metadata = ProInitBatchMetadata(startOffset,m_maxInstancePerWindow);
            NativeArray newBatchMetadata = new NativeArray(
                batchMetadata.Length + metadata.Length, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
            for (int i = 0; i < batchMetadata.Length; i++)
            {
                newBatchMetadata[i] = batchMetadata[i];
            }
            for (int i = 0; i < metadata.Length; i++)
            {
                newBatchMetadata[batchMetadata.Length + i] = metadata[i];
            }
            // // 设置颜色属性元数据,偏移量为窗口内所有矩阵数据之后
            // batchMetadata[2] = CreateMetadataValue(colorID, m_maxInstancePerWindow * 3 * 2 * 16, true);
            // 计算当前批次在GPU缓冲区中的偏移量
            int offset = b * m_alignedGPUWindowSize;
            // 添加批次到BatchRendererGroup,指定元数据、缓冲区句柄和偏移量
            m_batchIDs[b] = m_BatchRendererGroup.AddBatch(newBatchMetadata, m_GPUPersistentInstanceData.bufferHandle,
                (uint)offset, UseConstantBuffer ? (uint)m_alignedGPUWindowSize : 0);
            newBatchMetadata.Dispose();
            metadata.Dispose();
        }
        // 我们不再需要这个元数据描述数组
        batchMetadata.Dispose();
        // 设置非常大的边界以确保BRG永远不会被剔除
        UnityEngine.Bounds bounds = ProGetBounds();
        m_BatchRendererGroup.SetGlobalBounds(bounds);
        // 注册网格和材质
        if (m_samples.Mesh) m_meshID = m_BatchRendererGroup.RegisterMesh(m_samples.Mesh);
        if (m_samples.Material) m_materialID = m_BatchRendererGroup.RegisterMaterial(m_samples.Material);
        m_initialized = true;
        return true;
    }
    protected virtual Bounds ProGetBounds()
    {
       return new Bounds(new Vector3(0, 0, 0), new Vector3(1048576.0f, 1048576.0f, 1048576.0f));
    }
    protected virtual NativeArray ProInitBatchMetadata(int startOffset,int count)
    {
        return new NativeArray(0, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
    }
    /// 
    /// 更新位置信息
    /// 
    /// 
    /// 
    protected bool UploadTransformData(int instanceCount)
    {
        if ((uint)instanceCount > (uint)m_maxInstances)
            return false;
        // 更新当前实例数量
        m_instanceCount = instanceCount;
        // 计算完整窗口的数量
        int completeWindows = m_instanceCount / m_maxInstancePerWindow;
        // 一次性更新所有完整的窗口
        if (completeWindows >= 0)
        {
            // 计算需要更新的数据大小(以float4为单位)
            // int sizeInFloat4 = (completeWindows * m_alignedGPUWindowSize) / (16 * 4);
       
            // 将系统内存缓冲区的数据上传到GPU缓冲区
            m_GPUPersistentInstanceData.SetData(m_transfromBuffer, 0, 0, m_maxInstancePerWindow * 2);
         
            // int off = m_maxInstancePerWindow * 2 * 3 * 16;
            // m_GPUPersistentInstanceData.SetData(m_sysmemColorBuffer, 0, off / 16, m_maxInstancePerWindow);
        }
        return true;
    }
    /// 
    /// 根据"instanceCount"上传最小的GPU数据
    /// 由于使用了SoA且此类管理3个BRG属性(2个矩阵和1个颜色),最后一个窗口可能需要多达3次SetData调用
    /// 
    /// 实例数量
    /// 上传是否成功
    public bool UploadGpuData(int instanceCount,List shaderBinds=null)
    {
        // 检查实例数量是否超过最大限制
        if ((uint)instanceCount > (uint)m_maxInstances)
            return false;
        // 更新当前实例数量
        m_instanceCount = instanceCount;
        // 计算完整窗口的数量
        int completeWindows = m_instanceCount / m_maxInstancePerWindow;
        // 一次性更新所有完整的窗口
        if (completeWindows >= 0)
        {
            // 计算需要更新的数据大小(以float4为单位)
            // int sizeInFloat4 = (completeWindows * m_alignedGPUWindowSize) / (16 * 4);
       
            // 将系统内存缓冲区的数据上传到GPU缓冲区
            m_GPUPersistentInstanceData.SetData(m_transfromBuffer, 0, 0, m_maxInstancePerWindow * 2);
            if (shaderBinds != null)
            {
                for (int i = 0; i < shaderBinds.Count; i++)
                {
                    shaderBinds[i].SetData(m_GPUPersistentInstanceData,m_instanceCount);
                }
            }
            // int off = m_maxInstancePerWindow * 2 * 3 * 16;
            // m_GPUPersistentInstanceData.SetData(m_sysmemColorBuffer, 0, off / 16, m_maxInstancePerWindow);
        }
        // 然后上传最后一个(不完整)窗口的数据
        int lastBatchId = completeWindows;
        // 计算最后一个窗口中的实例数量
        int itemInLastBatch = m_instanceCount - m_maxInstancePerWindow * completeWindows;
    
        // if (itemInLastBatch > 0)
        // {
        //
        //     m_GPUPersistentInstanceData.SetData(m_transfromBuffer, 0, 0, itemInLastBatch * 3);
        //     // 上传world2obj矩阵数据(每个实例3个float4)
        //     m_GPUPersistentInstanceData.SetData(m_sysmemBuffer, offsetMat2, offsetMat2, itemInLastBatch * 3);
        //     // // 上传颜色数据(每个实例1个float4)
        //     // m_GPUPersistentInstanceData.SetData(m_sysmemBuffer, offsetColor, offsetColor, itemInLastBatch * 1);  
        // }
        return true;
    }
    /// 
    /// 释放所有已分配的缓冲区
    /// 
    public void Shutdown()
    {
        if (m_initialized)
        {
            for (uint b = 0; b < m_windowCount; b++)
                m_BatchRendererGroup.RemoveBatch(m_batchIDs[b]);
            m_BatchRendererGroup.UnregisterMaterial(m_materialID);
            m_BatchRendererGroup.UnregisterMesh(m_meshID);
            m_BatchRendererGroup.Dispose();
            m_GPUPersistentInstanceData.Dispose();
            m_transfromBuffer.Dispose();
        }
    }
    /// 
    /// 返回系统内存缓冲区和窗口大小,以便BRG_Background和BRG_Debris可以用新内容填充缓冲区
    /// 
    /// 总大小
    /// 对齐的窗口大小
    /// 系统内存缓冲区
    public NativeArray GetSysmemBuffer(out int totalSize, out int alignedWindowSize)
    {
        totalSize = m_totalGpuBufferSize;
        alignedWindowSize = m_alignedGPUWindowSize;
        return m_transfromBuffer;
    }
    /// 
    /// 创建32位元数据值的辅助函数。Bit 31表示属性是否每个实例都有不同的值
    /// 
    /// 属性名称ID
    /// GPU偏移量
    /// 是否每个实例都不同
    /// 元数据值
    protected MetadataValue CreateMetadataValue(int nameID, int gpuOffset, bool isPerInstance)
    {
        // 定义实例化标志位(最高位,即第31位)
        const uint kIsPerInstanceBit = 0x80000000;
        return new MetadataValue
        {
            NameID = nameID, // Shader属性名称ID
            // 将GPU偏移量与实例化标志位进行按位或运算
            // 如果是实例化属性,则设置最高位为1,否则保持原偏移量
            Value = (uint)gpuOffset | (isPerInstance ? (kIsPerInstanceBit) : 0),
        };
    }
    /// 
    /// 在BRG回调函数期间分配BRG缓冲区的辅助函数
    /// 
    /// 元素类型
    /// 元素数量
    /// 分配的内存指针
    private static T* Malloc(uint count) where T : unmanaged
    {
        return (T*)UnsafeUtility.Malloc(
            UnsafeUtility.SizeOf() * count,
            UnsafeUtility.AlignOf(),
            Allocator.TempJob);
    }
    /// 
    /// 每帧的主BRG入口点。在此示例中我们使用BatchCullingContext进行视锥剔除
    /// 此回调负责用所有需要渲染的项目填充cullingOutput
    /// 
    /// 渲染组
    /// 剔除上下文
    /// 剔除输出
    /// 用户上下文
    /// 作业句柄
    public virtual JobHandle OnPerformCulling(BatchRendererGroup rendererGroup, BatchCullingContext cullingContext,
        BatchCullingOutput cullingOutput, IntPtr userContext)
    {
        if (m_initialized)
        {
            // 创建绘制命令结构体,用于存储渲染命令信息
            BatchCullingOutputDrawCommands drawCommands = new BatchCullingOutputDrawCommands();
            // 计算UBO模式下我们需要的绘制命令数量(每个窗口一个绘制命令)
            int drawCommandCount = (m_instanceCount + m_maxInstancePerWindow - 1) / m_maxInstancePerWindow;
            int maxInstancePerDrawCommand = m_maxInstancePerWindow;
            drawCommands.drawCommandCount = drawCommandCount;
            // 分配单个BatchDrawRange。(所有绘制命令都将引用此BatchDrawRange)
            drawCommands.drawRangeCount = 1;
            drawCommands.drawRanges = Malloc(1);
            drawCommands.drawRanges[0] = new BatchDrawRange
            {
                // 绘制命令开始索引
                drawCommandsBegin = 0,
                // 绘制命令数量
                drawCommandsCount = (uint)drawCommandCount,
                // 过滤设置
                filterSettings = new BatchFilterSettings
                {
                    // 渲染层掩码
                    renderingLayerMask = 1,
                    // 层级
                    layer = 0,
                    // 运动向量生成模式
                    motionMode = MotionVectorGenerationMode.Camera,
                    // 阴影投射模式,根据m_castShadows决定是否投射阴影
                    shadowCastingMode = m_samples.castShadows ? ShadowCastingMode.On : ShadowCastingMode.Off,
                    // 是否接收阴影
                    receiveShadows = m_samples.receiveShadows,
                    // 是否为静态阴影投射器
                    staticShadowCaster = m_samples.staticShadowCaster,
                    // 是否全部深度排序
                    allDepthSorted = m_samples.allDepthSorted
                }
            };
            // 如果有绘制命令需要处理
            if (drawCommands.drawCommandCount > 0)
            {
                // 由于我们不需要剔除,可见性整数数组缓冲区对于每个绘制命令将始终是{0,1,2,3,...}
                // 所以我们只需分配maxInstancePerDrawCommand并填充它
                int visibilityArraySize = maxInstancePerDrawCommand;
                // 如果实例数量小于最大实例数,则调整可见性数组大小
                if (m_instanceCount < visibilityArraySize)
                    visibilityArraySize = m_instanceCount;
                // for (int i = 0; i < visibilityArraySize; i++)
                // {
                //    
                // }
                // 为可见性实例分配内存
                drawCommands.visibleInstances = Malloc((uint)visibilityArraySize);
                // 由于在此上下文中我们不需要任何视锥剔除,我们将可见性数组填充为{0,1,2,3...}
                for (int i = 0; i < visibilityArraySize; i++)
                {
                    drawCommands.visibleInstances[i] = i;
                    // drawCommands.visibleInstances[i] = 0;
                }
                // 分配BatchDrawCommand数组(drawCommandCount个条目)
                // 在SSBO模式下,drawCommandCount将仅为1
                drawCommands.drawCommands = Malloc((uint)drawCommandCount);
                // 剩余需要处理的实例数
                int left = m_instanceCount;
                // 为每个绘制命令填充信息
                for (int b = 0; b < drawCommandCount; b++)
                {
                    // 计算当前批次中的实例数量
                    int inBatchCount = left > maxInstancePerDrawCommand ? maxInstancePerDrawCommand : left;
                    drawCommands.drawCommands[b] = new BatchDrawCommand
                    {
                        // 可见性偏移量,所有绘制命令都使用相同的{0,1,2,3...}可见性数组
                        visibleOffset = (uint)0,
                        // 可见实例数量
                        visibleCount = (uint)inBatchCount,
                        // 批次ID
                        batchID = m_batchIDs[b],
                        // 材质ID
                        materialID = m_materialID,
                        // 网格ID
                        meshID = m_meshID,
                        // 子网格索引
                        submeshIndex = 0,
                        // 分割可见性掩码
                        splitVisibilityMask = 0xff,
                        // 标志位
                        flags = BatchDrawCommandFlags.None,
                        // 排序位置
                        sortingPosition = 0
                    };
                    // 减去已处理的实例数
                    left -= inBatchCount;
                }
            }
            // 将绘制命令设置到剔除输出中
            cullingOutput.drawCommands[0] = drawCommands;
            // 实例排序位置设置为空
            drawCommands.instanceSortingPositions = null;
            // 实例排序位置浮点数计数设置为0
            drawCommands.instanceSortingPositionFloatCount = 0;
        }
        // 返回空的作业句柄
        return new JobHandle();
    }
    
}