BRGRenderBasic.cs 21 KB


  1. using System;
  2. using System.Collections.Generic;
  3. using Core.BRG;
  4. using Unity.Collections;
  5. using Unity.Collections.LowLevel.Unsafe;
  6. using Unity.Burst;
  7. using Unity.Mathematics;
  8. using Unity.Jobs;
  9. using UnityEngine;
  10. using UnityEngine.Rendering;
  11. /*
  12. 这个类使用BRG(Batch Renderer Group)处理地面格子和碎片的渲染。
  13. 地面格子和碎片可以使用相同的GPU数据布局:
  14. - obj2world 矩阵 (3 * float4)
  15. - world2obj 矩阵 (3 * float4)
  16. - 颜色 (1 * float4)
  17. 所以每个网格需要7个float4。
  18. 不要忘记数据是以SoA(Structure of Arrays)方式存储的
  19. */
  20. /// <summary>
  21. /// BRG容器类,用于管理使用BatchRendererGroup的实例化渲染
  22. /// </summary>
  23. public unsafe class BRGRenderBasic
  24. {
  25. // 在GLES模式下,BRG原始缓冲区是一个常量缓冲区(UBO)
  26. private bool UseConstantBuffer => BatchRendererGroup.BufferTarget == BatchBufferTarget.ConstantBuffer;
  27. private int m_maxInstances; // 此容器中的最大项目数
  28. private int m_instanceCount; // 当前项目数量
  29. private int m_alignedGPUWindowSize; // BRG原始窗口大小
  30. private int m_maxInstancePerWindow; // 每个窗口的最大实例数
  31. private int m_windowCount; // 窗口数量(在SSBO模式下为1,在UBO模式下为n)
  32. private int m_totalGpuBufferSize; // 原始缓冲区的总大小
  33. private NativeArray<float3x4> m_transfromBuffer; // 原始缓冲区的系统内存副本
  34. public NativeArray<float4> m_sysmemColorBuffer;
  35. private bool m_initialized; // 是否已初始化
  36. private int m_instanceSize; // 项目大小(以字节为单位)
  37. private BatchID[] m_batchIDs; // 每个窗口对应一个batchID
  38. private BatchMaterialID m_materialID; // 材质ID
  39. private BatchMeshID m_meshID; // 网格ID
  40. private BatchRendererGroup m_BatchRendererGroup; // BRG对象
  41. private GraphicsBuffer m_GPUPersistentInstanceData; // GPU原始缓冲区(可能是SSBO或UBO)
  42. protected BRGSamples m_samples;
  43. /// <summary>
  44. /// 创建BRG对象并分配缓冲区
  45. /// </summary>
  46. /// <param name="mesh">要渲染的网格</param>
  47. /// <param name="mat">要使用的材质</param>
  48. /// <param name="maxInstances">最大实例数</param>
  49. /// <param name="instanceSize">每个实例的大小(以字节为单位)</param>
  50. /// <param name="castShadows">是否投射阴影</param>
  51. /// <returns>初始化是否成功</returns>
  52. protected bool Init(BRGSamples samples, int maxInstances, int instanceSize)
  53. {
  54. // 创建BRG对象,指定我们的BRG回调函数
  55. m_BatchRendererGroup = new BatchRendererGroup(this.OnPerformCulling, IntPtr.Zero);
  56. instanceSize+=(3*2*16); // 额外添加obj2world和world2obj矩阵的大小
  57. m_instanceSize = instanceSize;
  58. m_instanceCount = 0;
  59. m_maxInstances = maxInstances;
  60. m_samples = samples;
  61. // BRG使用一个大的GPU缓冲区。这在几乎所有平台上都是一个原始缓冲区,在GLES上是一个常量缓冲区
  62. // 在常量缓冲区的情况下,我们将其分割成几个大小为BatchRendererGroup.GetConstantBufferMaxWindowSize()字节的"窗口"
  63. if (UseConstantBuffer)
  64. {
  65. // 获取常量缓冲区的最大窗口大小
  66. m_alignedGPUWindowSize = BatchRendererGroup.GetConstantBufferMaxWindowSize();
  67. // 计算每个窗口可以容纳的最大实例数
  68. m_maxInstancePerWindow = m_alignedGPUWindowSize / instanceSize;
  69. // 计算需要的窗口数量(向上取整)
  70. m_windowCount = (m_maxInstances + m_maxInstancePerWindow - 1) / m_maxInstancePerWindow;
  71. // 计算总的GPU缓冲区大小
  72. m_totalGpuBufferSize = m_windowCount * m_alignedGPUWindowSize;
  73. // 创建常量缓冲区(目标类型为Constant,大小为总字节数/16,每个元素16字节)
  74. m_GPUPersistentInstanceData =
  75. new GraphicsBuffer(GraphicsBuffer.Target.Constant, m_totalGpuBufferSize / 16, 16);
  76. }
  77. else
  78. {
  79. // 计算对齐后的GPU窗口大小,确保是16字节对齐 ((size + 15) & (-16) 是向上取整到16的倍数的位运算技巧)
  80. m_alignedGPUWindowSize = (m_maxInstances * instanceSize + 15) & (-16);
  81. // 在SSBO模式下,每个窗口可以容纳所有实例
  82. m_maxInstancePerWindow = maxInstances;
  83. // SSBO模式只需要一个窗口
  84. m_windowCount = 1;
  85. // 总的GPU缓冲区大小等于单个窗口大小
  86. m_totalGpuBufferSize = m_windowCount * m_alignedGPUWindowSize;
  87. // 创建原始缓冲区(目标类型为Raw,大小为总字节数/4,每个元素4字节)
  88. m_GPUPersistentInstanceData = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_totalGpuBufferSize / 4, 4);
  89. }
  90. // 在我们的示例游戏中,我们处理3个实例化属性:obj2world、world2obj和baseColor
  91. var batchMetadata = new NativeArray<MetadataValue>(2, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
  92. // 批处理元数据缓冲区
  93. int objectToWorldID = Shader.PropertyToID("unity_ObjectToWorld");
  94. int worldToObjectID = Shader.PropertyToID("unity_WorldToObject");
  95. // int colorID = Shader.PropertyToID("_BaseColor");
  96. // 创建大GPU原始缓冲区的系统内存副本
  97. m_transfromBuffer =
  98. new NativeArray<float3x4>(maxInstances * 2, Allocator.Persistent, NativeArrayOptions.ClearMemory);
  99. m_sysmemColorBuffer =
  100. new NativeArray<float4>(maxInstances, Allocator.Persistent, NativeArrayOptions.ClearMemory);
  101. // register one kind of batch per "window" in the large BRG raw buffer
  102. m_batchIDs = new BatchID[m_windowCount];
  103. for (int b = 0; b < m_windowCount; b++)
  104. {
  105. // 设置obj2world矩阵属性元数据,偏移量为0
  106. batchMetadata[0] = CreateMetadataValue(objectToWorldID, 0, true);
  107. // 设置world2obj矩阵属性元数据,偏移量为窗口内矩阵数据之后
  108. batchMetadata[1] = CreateMetadataValue(worldToObjectID, m_maxInstancePerWindow * 3 * 16, true);
  109. int startOffset = m_maxInstancePerWindow * 3 * 2 * 16;
  110. NativeArray<MetadataValue> metadata = ProInitBatchMetadata(startOffset,m_maxInstancePerWindow);
  111. NativeArray<MetadataValue> newBatchMetadata = new NativeArray<MetadataValue>(
  112. batchMetadata.Length + metadata.Length, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
  113. for (int i = 0; i < batchMetadata.Length; i++)
  114. {
  115. newBatchMetadata[i] = batchMetadata[i];
  116. }
  117. for (int i = 0; i < metadata.Length; i++)
  118. {
  119. newBatchMetadata[batchMetadata.Length + i] = metadata[i];
  120. }
  121. // // 设置颜色属性元数据,偏移量为窗口内所有矩阵数据之后
  122. // batchMetadata[2] = CreateMetadataValue(colorID, m_maxInstancePerWindow * 3 * 2 * 16, true);
  123. // 计算当前批次在GPU缓冲区中的偏移量
  124. int offset = b * m_alignedGPUWindowSize;
  125. // 添加批次到BatchRendererGroup,指定元数据、缓冲区句柄和偏移量
  126. m_batchIDs[b] = m_BatchRendererGroup.AddBatch(newBatchMetadata, m_GPUPersistentInstanceData.bufferHandle,
  127. (uint)offset, UseConstantBuffer ? (uint)m_alignedGPUWindowSize : 0);
  128. newBatchMetadata.Dispose();
  129. metadata.Dispose();
  130. }
  131. // 我们不再需要这个元数据描述数组
  132. batchMetadata.Dispose();
  133. // 设置非常大的边界以确保BRG永远不会被剔除
  134. UnityEngine.Bounds bounds = ProGetBounds();
  135. m_BatchRendererGroup.SetGlobalBounds(bounds);
  136. // 注册网格和材质
  137. if (m_samples.Mesh) m_meshID = m_BatchRendererGroup.RegisterMesh(m_samples.Mesh);
  138. if (m_samples.Material) m_materialID = m_BatchRendererGroup.RegisterMaterial(m_samples.Material);
  139. m_initialized = true;
  140. return true;
  141. }
  142. protected virtual Bounds ProGetBounds()
  143. {
  144. return new Bounds(new Vector3(0, 0, 0), new Vector3(1048576.0f, 1048576.0f, 1048576.0f));
  145. }
  146. protected virtual NativeArray<MetadataValue> ProInitBatchMetadata(int startOffset,int count)
  147. {
  148. return new NativeArray<MetadataValue>(0, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
  149. }
  150. /// <summary>
  151. /// 更新位置信息
  152. /// </summary>
  153. /// <param name="instanceCount"></param>
  154. /// <returns></returns>
  155. protected bool UploadTransformData(int instanceCount)
  156. {
  157. if ((uint)instanceCount > (uint)m_maxInstances)
  158. return false;
  159. // 更新当前实例数量
  160. m_instanceCount = instanceCount;
  161. // 计算完整窗口的数量
  162. int completeWindows = m_instanceCount / m_maxInstancePerWindow;
  163. // 一次性更新所有完整的窗口
  164. if (completeWindows >= 0)
  165. {
  166. // 计算需要更新的数据大小(以float4为单位)
  167. // int sizeInFloat4 = (completeWindows * m_alignedGPUWindowSize) / (16 * 4);
  168. // 将系统内存缓冲区的数据上传到GPU缓冲区
  169. m_GPUPersistentInstanceData.SetData(m_transfromBuffer, 0, 0, m_maxInstancePerWindow * 2);
  170. // int off = m_maxInstancePerWindow * 2 * 3 * 16;
  171. // m_GPUPersistentInstanceData.SetData(m_sysmemColorBuffer, 0, off / 16, m_maxInstancePerWindow);
  172. }
  173. return true;
  174. }
  175. /// <summary>
  176. /// 根据"instanceCount"上传最小的GPU数据
  177. /// 由于使用了SoA且此类管理3个BRG属性(2个矩阵和1个颜色),最后一个窗口可能需要多达3次SetData调用
  178. /// </summary>
  179. /// <param name="instanceCount">实例数量</param>
  180. /// <returns>上传是否成功</returns>
  181. public bool UploadGpuData(int instanceCount,List<BatchShaderBind> shaderBinds=null)
  182. {
  183. // 检查实例数量是否超过最大限制
  184. if ((uint)instanceCount > (uint)m_maxInstances)
  185. return false;
  186. // 更新当前实例数量
  187. m_instanceCount = instanceCount;
  188. // 计算完整窗口的数量
  189. int completeWindows = m_instanceCount / m_maxInstancePerWindow;
  190. // 一次性更新所有完整的窗口
  191. if (completeWindows >= 0)
  192. {
  193. // 计算需要更新的数据大小(以float4为单位)
  194. // int sizeInFloat4 = (completeWindows * m_alignedGPUWindowSize) / (16 * 4);
  195. // 将系统内存缓冲区的数据上传到GPU缓冲区
  196. m_GPUPersistentInstanceData.SetData(m_transfromBuffer, 0, 0, m_maxInstancePerWindow * 2);
  197. if (shaderBinds != null)
  198. {
  199. for (int i = 0; i < shaderBinds.Count; i++)
  200. {
  201. shaderBinds[i].SetData(m_GPUPersistentInstanceData);
  202. }
  203. }
  204. // int off = m_maxInstancePerWindow * 2 * 3 * 16;
  205. // m_GPUPersistentInstanceData.SetData(m_sysmemColorBuffer, 0, off / 16, m_maxInstancePerWindow);
  206. }
  207. // 然后上传最后一个(不完整)窗口的数据
  208. int lastBatchId = completeWindows;
  209. // 计算最后一个窗口中的实例数量
  210. int itemInLastBatch = m_instanceCount - m_maxInstancePerWindow * completeWindows;
  211. // 如果最后一个窗口中有数据需要上传
  212. // if (itemInLastBatch > 0)
  213. // {
  214. // // 计算窗口在float4单位中的偏移量
  215. // int windowOffsetInFloat4 = (lastBatchId * m_alignedGPUWindowSize) / 16 * 3;
  216. // // 计算obj2world矩阵在缓冲区中的偏移量
  217. // int offsetMat1 = windowOffsetInFloat4 + m_maxInstancePerWindow * 0;
  218. // // 计算world2obj矩阵在缓冲区中的偏移量
  219. // int offsetMat2 = windowOffsetInFloat4 + m_maxInstancePerWindow * 3;
  220. // // 计算颜色数据在缓冲区中的偏移量
  221. // int offsetColor = windowOffsetInFloat4 + m_maxInstancePerWindow * 3 * 2;
  222. // // 上传obj2world矩阵数据(每个实例3个float4)
  223. //
  224. // m_GPUPersistentInstanceData.SetData(m_sysmemBuffer, offsetMat1, offsetMat1, itemInLastBatch * 3);
  225. // // 上传world2obj矩阵数据(每个实例3个float4)
  226. // m_GPUPersistentInstanceData.SetData(m_sysmemBuffer, offsetMat2, offsetMat2, itemInLastBatch * 3);
  227. // // // 上传颜色数据(每个实例1个float4)
  228. // // m_GPUPersistentInstanceData.SetData(m_sysmemBuffer, offsetColor, offsetColor, itemInLastBatch * 1);
  229. // }
  230. return true;
  231. }
  232. /// <summary>
  233. /// 释放所有已分配的缓冲区
  234. /// </summary>
  235. public void Shutdown()
  236. {
  237. if (m_initialized)
  238. {
  239. for (uint b = 0; b < m_windowCount; b++)
  240. m_BatchRendererGroup.RemoveBatch(m_batchIDs[b]);
  241. m_BatchRendererGroup.UnregisterMaterial(m_materialID);
  242. m_BatchRendererGroup.UnregisterMesh(m_meshID);
  243. m_BatchRendererGroup.Dispose();
  244. m_GPUPersistentInstanceData.Dispose();
  245. m_transfromBuffer.Dispose();
  246. }
  247. }
  248. /// <summary>
  249. /// 返回系统内存缓冲区和窗口大小,以便BRG_Background和BRG_Debris可以用新内容填充缓冲区
  250. /// </summary>
  251. /// <param name="totalSize">总大小</param>
  252. /// <param name="alignedWindowSize">对齐的窗口大小</param>
  253. /// <returns>系统内存缓冲区</returns>
  254. public NativeArray<float3x4> GetSysmemBuffer(out int totalSize, out int alignedWindowSize)
  255. {
  256. totalSize = m_totalGpuBufferSize;
  257. alignedWindowSize = m_alignedGPUWindowSize;
  258. return m_transfromBuffer;
  259. }
  260. /// <summary>
  261. /// 创建32位元数据值的辅助函数。Bit 31表示属性是否每个实例都有不同的值
  262. /// </summary>
  263. /// <param name="nameID">属性名称ID</param>
  264. /// <param name="gpuOffset">GPU偏移量</param>
  265. /// <param name="isPerInstance">是否每个实例都不同</param>
  266. /// <returns>元数据值</returns>
  267. protected MetadataValue CreateMetadataValue(int nameID, int gpuOffset, bool isPerInstance)
  268. {
  269. // 定义实例化标志位(最高位,即第31位)
  270. const uint kIsPerInstanceBit = 0x80000000;
  271. return new MetadataValue
  272. {
  273. NameID = nameID, // Shader属性名称ID
  274. // 将GPU偏移量与实例化标志位进行按位或运算
  275. // 如果是实例化属性,则设置最高位为1,否则保持原偏移量
  276. Value = (uint)gpuOffset | (isPerInstance ? (kIsPerInstanceBit) : 0),
  277. };
  278. }
  279. /// <summary>
  280. /// 在BRG回调函数期间分配BRG缓冲区的辅助函数
  281. /// </summary>
  282. /// <typeparam name="T">元素类型</typeparam>
  283. /// <param name="count">元素数量</param>
  284. /// <returns>分配的内存指针</returns>
  285. private static T* Malloc<T>(uint count) where T : unmanaged
  286. {
  287. return (T*)UnsafeUtility.Malloc(
  288. UnsafeUtility.SizeOf<T>() * count,
  289. UnsafeUtility.AlignOf<T>(),
  290. Allocator.TempJob);
  291. }
  292. /// <summary>
  293. /// 每帧的主BRG入口点。在此示例中我们使用BatchCullingContext进行视锥剔除
  294. /// 此回调负责用所有需要渲染的项目填充cullingOutput
  295. /// </summary>
  296. /// <param name="rendererGroup">渲染组</param>
  297. /// <param name="cullingContext">剔除上下文</param>
  298. /// <param name="cullingOutput">剔除输出</param>
  299. /// <param name="userContext">用户上下文</param>
  300. /// <returns>作业句柄</returns>
  301. public virtual JobHandle OnPerformCulling(BatchRendererGroup rendererGroup, BatchCullingContext cullingContext,
  302. BatchCullingOutput cullingOutput, IntPtr userContext)
  303. {
  304. if (m_initialized)
  305. {
  306. // 创建绘制命令结构体,用于存储渲染命令信息
  307. BatchCullingOutputDrawCommands drawCommands = new BatchCullingOutputDrawCommands();
  308. // 计算UBO模式下我们需要的绘制命令数量(每个窗口一个绘制命令)
  309. int drawCommandCount = (m_instanceCount + m_maxInstancePerWindow - 1) / m_maxInstancePerWindow;
  310. int maxInstancePerDrawCommand = m_maxInstancePerWindow;
  311. drawCommands.drawCommandCount = drawCommandCount;
  312. // 分配单个BatchDrawRange。(所有绘制命令都将引用此BatchDrawRange)
  313. drawCommands.drawRangeCount = 1;
  314. drawCommands.drawRanges = Malloc<BatchDrawRange>(1);
  315. drawCommands.drawRanges[0] = new BatchDrawRange
  316. {
  317. // 绘制命令开始索引
  318. drawCommandsBegin = 0,
  319. // 绘制命令数量
  320. drawCommandsCount = (uint)drawCommandCount,
  321. // 过滤设置
  322. filterSettings = new BatchFilterSettings
  323. {
  324. // 渲染层掩码
  325. renderingLayerMask = 1,
  326. // 层级
  327. layer = 0,
  328. // 运动向量生成模式
  329. motionMode = MotionVectorGenerationMode.Camera,
  330. // 阴影投射模式,根据m_castShadows决定是否投射阴影
  331. shadowCastingMode = m_samples.castShadows ? ShadowCastingMode.On : ShadowCastingMode.Off,
  332. // 是否接收阴影
  333. receiveShadows = m_samples.receiveShadows,
  334. // 是否为静态阴影投射器
  335. staticShadowCaster = m_samples.staticShadowCaster,
  336. // 是否全部深度排序
  337. allDepthSorted = m_samples.allDepthSorted
  338. }
  339. };
  340. // 如果有绘制命令需要处理
  341. if (drawCommands.drawCommandCount > 0)
  342. {
  343. // 由于我们不需要剔除,可见性整数数组缓冲区对于每个绘制命令将始终是{0,1,2,3,...}
  344. // 所以我们只需分配maxInstancePerDrawCommand并填充它
  345. int visibilityArraySize = maxInstancePerDrawCommand;
  346. // 如果实例数量小于最大实例数,则调整可见性数组大小
  347. if (m_instanceCount < visibilityArraySize)
  348. visibilityArraySize = m_instanceCount;
  349. // for (int i = 0; i < visibilityArraySize; i++)
  350. // {
  351. //
  352. // }
  353. // 为可见性实例分配内存
  354. drawCommands.visibleInstances = Malloc<int>((uint)visibilityArraySize);
  355. // 由于在此上下文中我们不需要任何视锥剔除,我们将可见性数组填充为{0,1,2,3...}
  356. for (int i = 0; i < visibilityArraySize; i++)
  357. {
  358. drawCommands.visibleInstances[i] = i;
  359. // drawCommands.visibleInstances[i] = 0;
  360. }
  361. // 分配BatchDrawCommand数组(drawCommandCount个条目)
  362. // 在SSBO模式下,drawCommandCount将仅为1
  363. drawCommands.drawCommands = Malloc<BatchDrawCommand>((uint)drawCommandCount);
  364. // 剩余需要处理的实例数
  365. int left = m_instanceCount;
  366. // 为每个绘制命令填充信息
  367. for (int b = 0; b < drawCommandCount; b++)
  368. {
  369. // 计算当前批次中的实例数量
  370. int inBatchCount = left > maxInstancePerDrawCommand ? maxInstancePerDrawCommand : left;
  371. drawCommands.drawCommands[b] = new BatchDrawCommand
  372. {
  373. // 可见性偏移量,所有绘制命令都使用相同的{0,1,2,3...}可见性数组
  374. visibleOffset = (uint)0,
  375. // 可见实例数量
  376. visibleCount = (uint)inBatchCount,
  377. // 批次ID
  378. batchID = m_batchIDs[b],
  379. // 材质ID
  380. materialID = m_materialID,
  381. // 网格ID
  382. meshID = m_meshID,
  383. // 子网格索引
  384. submeshIndex = 0,
  385. // 分割可见性掩码
  386. splitVisibilityMask = 0xff,
  387. // 标志位
  388. flags = BatchDrawCommandFlags.None,
  389. // 排序位置
  390. sortingPosition = 0
  391. };
  392. // 减去已处理的实例数
  393. left -= inBatchCount;
  394. }
  395. }
  396. // 将绘制命令设置到剔除输出中
  397. cullingOutput.drawCommands[0] = drawCommands;
  398. // 实例排序位置设置为空
  399. drawCommands.instanceSortingPositions = null;
  400. // 实例排序位置浮点数计数设置为0
  401. drawCommands.instanceSortingPositionFloatCount = 0;
  402. }
  403. // 返回空的作业句柄
  404. return new JobHandle();
  405. }
  406. }