概述
代码版本5.3。书接上回
https://www.yuque.com/chenweilin-tryw7/gbfomp/enpney50q2g2syc9?singleDoc# 《UE 渲染入门 v3》
这里是哪里呢?
BeginRenderingViewFamilies 这个函数准备开始画 3D 场景了
这个函数前面通过 SendAllEndOfFrameUpdates 确保之前已经画完了
SendAllEndOfFrameUpdates 做了一些 tick actor component 结束帧相关,并且把 transform 和其他数据传递给了 GPU
然后做了一些 xxxxxxxxxxxxxxxxx
然后往渲染线程有了如下调用
RenderViewFamilies_RenderThread(RHICmdList, LocalSceneRenderers);
其中有一步
SceneRenderer->Render(GraphBuilder);
这个 SceneRenderer 可能是延迟渲染的,或者是移动渲染
比如 FDeferredShadingSceneRenderer::Render
这个函数一路向下翻翻翻
就到了这里
FGPUScene::Update
{
RDG_CSV_STAT_EXCLUSIVE_SCOPE(GraphBuilder, UpdateGPUScene);
RDG_GPU_STAT_SCOPE(GraphBuilder, GPUSceneUpdate);
if (bIsFirstSceneRenderer) // 第一次刷新RHI
{
GraphBuilder.SetFlushResourcesRHI();
}
Scene->GPUScene.Update(GraphBuilder, GetSceneUniforms(), *Scene, ExternalAccessQueue); // GPU SCENE Update
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++) // 遍历VIEW
{
FViewInfo& View = Views[ViewIndex];
RDG_GPU_MASK_SCOPE(GraphBuilder, View.GPUMask);
Scene->GPUScene.UploadDynamicPrimitiveShaderDataForView(GraphBuilder, *Scene, View, ExternalAccessQueue); // 更新动态基元shader数据
Scene->GPUScene.DebugRender(GraphBuilder, *Scene, GetSceneUniforms(), View);
}
InstanceCullingManager.BeginDeferredCulling(GraphBuilder, Scene->GPUScene); // 延迟剔除
if (Views.Num() > 0)
{
FViewInfo& View = Views[0];
Scene->UpdatePhysicsField(GraphBuilder, View); // 物理
}
}
首先看GPU SCENE Update
GPU SCENE是 CPU Scene的一个镜像
void FGPUScene::Update(FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUB, FScene& Scene, FRDGExternalAccessQueue& ExternalAccessQueue)
{
if (bIsEnabled)
{
RDG_GPU_MASK_SCOPE(GraphBuilder, FRHIGPUMask::All());
ensure(bInBeginEndBlock);
UpdateInternal(GraphBuilder, SceneUB, Scene, ExternalAccessQueue);
}
}
void FGPUScene::UpdateInternal(FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUB, FScene& Scene, FRDGExternalAccessQueue& ExternalAccessQueue)
{
// 性能分析和条件检查跳过
// 。。。
LastDeferredGPUWritePass = EGPUSceneGPUWritePass::None; // 上一次GPU延迟写的实际,可以是None和上一次渲染不透明物体后
if ((CVarGPUSceneUploadEveryFrame.GetValueOnRenderThread() != 0) || bUpdateAllPrimitives) // 控制台变量,是否每一帧需要更新上传
{
// 把所有图元设置为,全部需要改变
PrimitivesToUpdate.Reset();
for (int32 Index = 0; Index < Scene.Primitives.Num(); ++Index)
{
PrimitiveDirtyState[Index] |= EPrimitiveDirtyState::ChangedAll;
PrimitivesToUpdate.Add(Index);
}
// Clear the full instance data range 清空 实例范围的结构体
InstanceRangesToClear.Empty();
InstanceRangesToClear.Add(FInstanceRange{ 0U, uint32(GetInstanceIdUpperBoundGPU()) });
bUpdateAllPrimitives = false;
}
超出范围的挪到后面去
void FGPUScene::UpdateInternal(FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUB, FScene& Scene, FRDGExternalAccessQueue& ExternalAccessQueue)
{
// 。。。
// Store in GPU-scene to enable validation that update has been carried out.
SceneFrameNumber = Scene.GetFrameNumber();
// Strip all out-of-range ID's (left over because of deletes) so we don't need to check later
for (int32 Index = 0; Index < PrimitivesToUpdate.Num();)
{
if (PrimitivesToUpdate[Index] >= Scene.PrimitiveSceneProxies.Num())
{
PrimitivesToUpdate.RemoveAtSwap(Index, 1, false);
}
else
{
++Index;
}
}
check(!BufferState.IsValid());
如何更新buff的
FUploadDataSourceAdapterScenePrimitives& Adapter = *GraphBuilder.AllocObject<FUploadDataSourceAdapterScenePrimitives>(Scene, SceneFrameNumber, MoveTemp(PrimitivesToUpdate), MoveTemp(PrimitiveDirtyState));
UpdateBufferState(GraphBuilder, SceneUB, Scene, Adapter, true);
// Run a pass that clears (Sets ID to invalid) any instances that need it
AddClearInstancesPass(GraphBuilder);
核心是最后一行的
SceneUB.Set(SceneUB::GPUScene, ShaderParameters);
前面都在准备这个参数,然后设置了一些shader参数
template<typename FUploadDataSourceAdapter>
void FGPUScene::UpdateBufferState(FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUB, FScene& Scene, const FUploadDataSourceAdapter& UploadDataSourceAdapter, bool bIsMainUpdate)
{
// 。。。
constexpr int32 InitialBufferSize = 256;
const uint32 SizeReserve = FMath::RoundUpToPowerOfTwo(FMath::Max(DynamicPrimitivesOffset, InitialBufferSize)); // 向上取整到最近的2幂次方
BufferState.PrimitiveBuffer = ResizeStructuredBufferIfNeeded(GraphBuilder, PrimitiveBuffer, SizeReserve * sizeof(FPrimitiveSceneShaderData::Data), TEXT("GPUScene.PrimitiveData"));
// Clamp buffer to be smaller than the MAX_INSTANCE_ID.
const uint32 InstanceSceneDataSizeReserve = FMath::Min(MAX_INSTANCE_ID, FMath::RoundUpToPowerOfTwo(FMath::Max(InstanceSceneDataAllocator.GetMaxSize(), InitialBufferSize)));
FResizeResourceSOAParams ResizeParams;
ResizeParams.NumBytes = InstanceSceneDataSizeReserve * FInstanceSceneShaderData::GetEffectiveNumBytes();
ResizeParams.NumArrays = FInstanceSceneShaderData::GetDataStrideInFloat4s();
BufferState.InstanceSceneDataBuffer = ResizeStructuredBufferSOAIfNeeded(GraphBuilder, InstanceSceneDataBuffer, ResizeParams, TEXT("GPUScene.InstanceSceneData"));
InstanceSceneDataSOAStride = InstanceSceneDataSizeReserve;
BufferState.InstanceSceneDataSOAStride = InstanceSceneDataSizeReserve;
const uint32 PayloadFloat4Count = FMath::Max(InstancePayloadDataAllocator.GetMaxSize(), InitialBufferSize);
const uint32 InstancePayloadDataSizeReserve = FMath::RoundUpToPowerOfTwo(PayloadFloat4Count * sizeof(FVector4f));
BufferState.InstancePayloadDataBuffer = ResizeStructuredBufferIfNeeded(GraphBuilder, InstancePayloadDataBuffer, InstancePayloadDataSizeReserve, TEXT("GPUScene.InstancePayloadData"));
const uint32 NumNodes = FMath::RoundUpToPowerOfTwo(FMath::Max(Scene.InstanceBVH.GetNumNodes(), InitialBufferSize));
BufferState.InstanceBVHBuffer = ResizeStructuredBufferIfNeeded(GraphBuilder, InstanceBVHBuffer, NumNodes * sizeof(FBVHNode), TEXT("InstanceBVH"));
const bool bNaniteEnabled = DoesPlatformSupportNanite(GMaxRHIShaderPlatform);
if (UploadDataSourceAdapter.bUpdateNaniteMaterialTables && bNaniteEnabled)
{
// Nanite draw commands build raster material tables.
Scene.WaitForCacheNaniteDrawCommandsTask();
for (int32 NaniteMeshPassIndex = 0; NaniteMeshPassIndex < ENaniteMeshPass::Num; ++NaniteMeshPassIndex)
{
Scene.NaniteMaterials[NaniteMeshPassIndex].UpdateBufferState(GraphBuilder, Scene.Primitives.Num());
}
}
const uint32 LightMapDataBufferSize = FMath::RoundUpToPowerOfTwo(FMath::Max(LightmapDataAllocator.GetMaxSize(), InitialBufferSize));
BufferState.LightmapDataBuffer = ResizeStructuredBufferIfNeeded(GraphBuilder, LightmapDataBuffer, LightMapDataBufferSize * sizeof(FLightmapSceneShaderData::Data), TEXT("GPUScene.LightmapData"));
BufferState.LightMapDataBufferSize = LightMapDataBufferSize;
if (bIsMainUpdate)
{
const uint32 LightDataBufferSize = FMath::RoundUpToPowerOfTwo(FMath::Max(Scene.Lights.Num(), InitialBufferSize));
BufferState.LightDataBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(FLightSceneData), FMath::Max(1, Scene.Lights.Num())), TEXT("GPUScene.LightData"));
}
ShaderParameters.GPUSceneInstanceSceneData = GraphBuilder.CreateSRV(BufferState.InstanceSceneDataBuffer);
ShaderParameters.GPUSceneInstancePayloadData = GraphBuilder.CreateSRV(BufferState.InstancePayloadDataBuffer);
ShaderParameters.GPUScenePrimitiveSceneData = GraphBuilder.CreateSRV(BufferState.PrimitiveBuffer);
ShaderParameters.GPUSceneLightmapData = GraphBuilder.CreateSRV(BufferState.LightmapDataBuffer);
ShaderParameters.GPUSceneLightData = GraphBuilder.CreateSRV(BufferState.LightDataBuffer);
ShaderParameters.InstanceDataSOAStride = InstanceSceneDataSOAStride;
ShaderParameters.NumScenePrimitives = NumScenePrimitives;
ShaderParameters.NumInstances = InstanceSceneDataAllocator.GetMaxSize();
ShaderParameters.GPUSceneFrameNumber = GetSceneFrameNumber();
SceneUB.Set(SceneUB::GPUScene, ShaderParameters);
}
回退
void FGPUScene::UpdateInternal(FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUB, FScene& Scene, FRDGExternalAccessQueue& ExternalAccessQueue)
{
// 。。。
// Pull out instances needing only primitive ID update, they still have to go to the general update such that the primitive gets updated (as it moved)
{
FInstanceGPULoadBalancer IdOnlyUpdateData;
for (int32 Index = 0; Index < Adapter.PrimitivesToUpdate.Num(); ++Index)
{
int32 PrimitiveId = Adapter.PrimitivesToUpdate[Index];
check(PrimitiveId < Scene.PrimitiveSceneProxies.Num());
if (Adapter.PrimitiveDirtyState[PrimitiveId] == EPrimitiveDirtyState::ChangedId)
{
const FPrimitiveSceneInfo* PrimitiveSceneInfo = Scene.Primitives[PrimitiveId];
check(PrimitiveSceneInfo->GetInstanceSceneDataOffset() >= 0 || PrimitiveSceneInfo->GetNumInstanceSceneDataEntries() == 0);
IdOnlyUpdateData.Add(PrimitiveSceneInfo->GetInstanceSceneDataOffset(), PrimitiveSceneInfo->GetNumInstanceSceneDataEntries(), GetPrimitiveID(Scene, PrimitiveId));
}
}
AddUpdatePrimitiveIdsPass(GraphBuilder, IdOnlyUpdateData);
}
// The adapter copies the IDs of primitives to update such that any that are (incorrectly) marked for update after are not lost.
PrimitivesToUpdate.Reset();
PrimitiveDirtyState.Init(EPrimitiveDirtyState::None, PrimitiveDirtyState.Num());
{
SCOPED_NAMED_EVENT(UpdateGPUScene, FColor::Green);
QUICK_SCOPE_CYCLE_COUNTER(STAT_UpdateGPUScene);
SCOPE_CYCLE_COUNTER(STAT_UpdateGPUSceneTime);
UploadGeneral<FUploadDataSourceAdapterScenePrimitives>(GraphBuilder, Scene, ExternalAccessQueue, Adapter);
UpdateGPULights(GraphBuilder, Scene);
}
UploadDynamicPrimitiveShaderDataForView
void FGPUScene::UploadDynamicPrimitiveShaderDataForViewInternal(FRDGBuilder& GraphBuilder, FScene& Scene, FViewInfo& View, FRDGExternalAccessQueue& ExternalAccessQueue, bool bIsShadowView)
{
FGPUScenePrimitiveCollector& Collector = View.DynamicPrimitiveCollector;
// Auto-commit if not done (should usually not be done, but sometimes the UploadDynamicPrimitiveShaderDataForViewInternal is called to ensure the
// CachedViewUniformShaderParameters is set on the view.
if (!Collector.bCommitted)
{
Collector.Commit();
}
提交了啥东西?
FGPUScenePrimitiveCollector.FPrimitiveData
/**
* Used to manage dynamic primitives for a given view, during InitViews the data is collected and then can be committed to the GPU-Scene.
* Once committed the range of indices are valid and can be used to calculate the PrimitiveIds.
*/
class FGPUScenePrimitiveCollector
{
// 。。。
struct FPrimitiveData
{
FMeshBatchDynamicPrimitiveData SourceData; // 下面,描述 网格批次动态几何数据
const FPrimitiveUniformShaderParameters* ShaderParams = nullptr; // shader参数 下面
uint32 NumInstances = 0;
uint32 LocalInstanceSceneDataOffset = INDEX_NONE;
uint32 LocalPayloadDataOffset = INDEX_NONE;
};
/**
* Dynamic primitive/instance data for a mesh batch element.
*
* NOTES:
* - When applied to a FMeshBatchElement, data provided to the TConstArrayView members are expected to live until the end of the frame on the render thread
* - If `DataWriterGPU` is bound and the TConstArrayView members are left empty, the delegate is expected to write any missing data, as it will not be uploaded
*/
struct FMeshBatchDynamicPrimitiveData
{
TConstArrayView<FInstanceSceneData> InstanceSceneData; // FInstanceSceneData 其实就是一个坐标, 一个点和方向
TConstArrayView<FInstanceDynamicData> InstanceDynamicData;
TConstArrayView<FRenderBounds> InstanceLocalBounds; // AABB
TConstArrayView<float> InstanceCustomData;
FGPUSceneWriteDelegate DataWriterGPU;
EGPUSceneGPUWritePass DataWriterGPUPass = EGPUSceneGPUWritePass::None;
uint16 PayloadDataFlags = 0;
uint32 NumInstanceCustomDataFloats = 0;
FPrimitiveUniformShaderParameters
这里直接展开了一个类和对于函数
BEGIN_GLOBAL_SHADER_PARAMETER_STRUCT(FPrimitiveUniformShaderParameters,ENGINE_API)
SHADER_PARAMETER(uint32, Flags) // 标志位
SHADER_PARAMETER(uint32, InstanceSceneDataOffset) // 实例场景的偏移值
SHADER_PARAMETER(uint32, NumInstanceSceneDataEntries) // 实例数据数量
SHADER_PARAMETER(int32, SingleCaptureIndex) // Should default to 0 if no reflection captures are provided, as there will be a default black (0,0,0,0) cubemap in that slot
SHADER_PARAMETER(FVector3f, TilePosition)
SHADER_PARAMETER(uint32, PrimitiveComponentId) // TODO: Refactor to use PersistentPrimitiveIndex, ENGINE USE ONLY - will be removed
SHADER_PARAMETER(FMatrix44f, LocalToRelativeWorld) // Always needed
SHADER_PARAMETER(FMatrix44f, RelativeWorldToLocal) // Rarely needed
SHADER_PARAMETER(FMatrix44f, PreviousLocalToRelativeWorld) // Used to calculate velocity
SHADER_PARAMETER(FMatrix44f, PreviousRelativeWorldToLocal) // Rarely used when calculating velocity, if material uses vertex offset along with world->local transform
SHADER_PARAMETER(FMatrix44f, WorldToPreviousWorld) // Used when calculating instance prev local->world for static instances that do not store it (calculated via doubles to resolve precision issues)
SHADER_PARAMETER_EX(FVector3f, InvNonUniformScale, EShaderPrecisionModifier::Half) // Often needed
SHADER_PARAMETER(float, ObjectBoundsX) // Only needed for editor/development
SHADER_PARAMETER(FVector4f, ObjectRelativeWorldPositionAndRadius) // Needed by some materials
SHADER_PARAMETER(FVector3f, ActorRelativeWorldPosition)
SHADER_PARAMETER(uint32, LightmapUVIndex) // Only needed if static lighting is enabled
SHADER_PARAMETER_EX(FVector3f, ObjectOrientation, EShaderPrecisionModifier::Half)
SHADER_PARAMETER(uint32, LightmapDataIndex) // Only needed if static lighting is enabled
SHADER_PARAMETER_EX(FVector4f, NonUniformScale, EShaderPrecisionModifier::Half)
SHADER_PARAMETER(FVector3f, PreSkinnedLocalBoundsMin) // Local space min bounds, pre-skinning
SHADER_PARAMETER(uint32, NaniteResourceID)
SHADER_PARAMETER(FVector3f, PreSkinnedLocalBoundsMax) // Local space bounds, pre-skinning
SHADER_PARAMETER(uint32, NaniteHierarchyOffset)
SHADER_PARAMETER(FVector3f, LocalObjectBoundsMin) // This is used in a custom material function (ObjectLocalBounds.uasset)
SHADER_PARAMETER(float, ObjectBoundsY) // Only needed for editor/development
SHADER_PARAMETER(FVector3f, LocalObjectBoundsMax) // This is used in a custom material function (ObjectLocalBounds.uasset)
SHADER_PARAMETER(float, ObjectBoundsZ) // Only needed for editor/development
SHADER_PARAMETER(FVector3f, InstanceLocalBoundsCenter)
SHADER_PARAMETER(uint32, InstancePayloadDataOffset)
SHADER_PARAMETER(FVector3f, InstanceLocalBoundsExtent)
SHADER_PARAMETER(uint32, InstancePayloadDataStride)
SHADER_PARAMETER(uint32, InstancePayloadExtensionSize)
SHADER_PARAMETER(FVector3f, WireframeColor) // Only needed for editor/development
SHADER_PARAMETER(uint32, PackedNaniteFlags)
SHADER_PARAMETER(FVector3f, LevelColor) // Only needed for editor/development
SHADER_PARAMETER(int32, PersistentPrimitiveIndex)
SHADER_PARAMETER(FVector2f, InstanceDrawDistanceMinMaxSquared)
SHADER_PARAMETER(float, InstanceWPODisableDistanceSquared) // // WPO World Pos Offset
SHADER_PARAMETER(uint32, NaniteRayTracingDataOffset)
SHADER_PARAMETER(float, MaxWPOExtent) // WPO World Pos Offset
SHADER_PARAMETER(float, MinMaterialDisplacement)
SHADER_PARAMETER(float, MaxMaterialDisplacement)
SHADER_PARAMETER(uint32, CustomStencilValueAndMask)
SHADER_PARAMETER(uint32, VisibilityFlags)
SHADER_PARAMETER_ARRAY(FVector4f, CustomPrimitiveData, [FCustomPrimitiveData::NumCustomPrimitiveDataFloat4s]) // Custom data per primitive that can be accessed through material expression parameters and modified through UStaticMeshComponent
END_GLOBAL_SHADER_PARAMETER_STRUCT()
FGPUScenePrimitiveCollector.FUploadData
然后是上传数据
也在这里面
class FGPUScenePrimitiveCollector
{
public:
struct FUploadData
{
TArray<FPrimitiveData, TInlineAllocator<8>> PrimitiveData; // 基原数据
TArray<uint32> GPUWritePrimitives; // GPU写入的原始数据
uint32 InstanceSceneDataOffset = INDEX_NONE;
uint32 TotalInstanceCount = 0;
uint32 InstancePayloadDataOffset = INDEX_NONE;
uint32 InstancePayloadDataFloat4Count = 0;
bool bIsUploaded = false;
};
/**
* Range in GPU scene allocated to the dynamic primitives.
*/
TRange<int32> PrimitiveIdRange = TRange<int32>::Empty();
FUploadData* UploadData = nullptr; // Owned by FGPUSceneDynamicContext
bool bCommitted = false;
FGPUSceneDynamicContext* GPUSceneDynamicContext = nullptr;
FGPUScene::UploadDynamicPrimitiveShaderDataForViewInternal
回来,我们熟悉了 FGPUScenePrimitiveCollector 这个结构
void FGPUScene::UploadDynamicPrimitiveShaderDataForViewInternal(FRDGBuilder& GraphBuilder, FScene& Scene, FViewInfo& View, FRDGExternalAccessQueue& ExternalAccessQueue, bool bIsShadowView)
{
FGPUScenePrimitiveCollector& Collector = View.DynamicPrimitiveCollector;
// Auto-commit if not done (should usually not be done, but sometimes the UploadDynamicPrimitiveShaderDataForViewInternal is called to ensure the
// CachedViewUniformShaderParameters is set on the view.
if (!Collector.bCommitted)
{
Collector.Commit();
}
const int32 NumPrimitiveDataUploads = Collector.Num();
// 一些check
ensure(Collector.GetPrimitiveIdRange().Size<int32>() == NumPrimitiveDataUploads);
// Make sure we are not trying to upload data that lives in a different context.
ensure(Collector.UploadData == nullptr || CurrentDynamicContext->DymamicPrimitiveUploadData.Find(Collector.UploadData) != INDEX_NONE);
check(BufferState.IsValid());
是否需要上传
// Skip uploading empty & already uploaded data
const bool bNeedsUpload = Collector.UploadData != nullptr && NumPrimitiveDataUploads > 0 && !Collector.UploadData->bIsUploaded;
if (bNeedsUpload)
{
RDG_CSV_STAT_EXCLUSIVE_SCOPE(GraphBuilder, UploadDynamicPrimitiveShaderData);
QUICK_SCOPE_CYCLE_COUNTER(STAT_UploadDynamicPrimitiveShaderData);
Collector.UploadData->bIsUploaded = true;
const int32 UploadIdStart = Collector.GetPrimitiveIdRange().GetLowerBoundValue();
const int32 InstanceIdStart = Collector.UploadData->InstanceSceneDataOffset;
ensure(UploadIdStart < DynamicPrimitivesOffset);
ensure(InstanceIdStart != INDEX_NONE);
if (bIsShadowView && Scene.GetVirtualShadowMapCache(View) != nullptr) // 如果是阴影贴图,且场景中存在虚拟阴影贴图的时候
{
// Enqueue cache invalidations for all dynamic primitives' instances, as they will be removed this frame and are not associated
// with any particular FPrimitiveSceneInfo. Will occur on the next call to UpdateAllPrimitiveSceneInfos
// 将所有动态基元实例的缓存失效排入队列,因为它们将在此帧中删除,并且不相关
// 任何特定的 FPrimitiveSceneInfo。将在下次调用 UpdateAllPrimitiveSceneInfos 时发生
for (const FGPUScenePrimitiveCollector::FPrimitiveData& PrimitiveData : Collector.UploadData->PrimitiveData)
{
ensure(PrimitiveData.LocalInstanceSceneDataOffset != INDEX_NONE);
DynamicPrimitiveInstancesToInvalidate.Add( // 渲染中需要失效的动态实例
FInstanceRange // 添加一个范围进来
{
PrimitiveData.LocalInstanceSceneDataOffset + InstanceIdStart,
PrimitiveData.NumInstances
}
);
}
}
// 上传适配器
FUploadDataSourceAdapterDynamicPrimitives& UploadAdapter = *GraphBuilder.AllocObject<FUploadDataSourceAdapterDynamicPrimitives>(
Collector.UploadData->PrimitiveData,
UploadIdStart,
InstanceIdStart,
Collector.UploadData->InstancePayloadDataOffset,
SceneFrameNumber);
// 。。。
}
struct FUploadDataSourceAdapterDynamicPrimitives
{
// 。。。
const TArray<FGPUScenePrimitiveCollector::FPrimitiveData, TInlineAllocator<8>> &PrimitiveData; // 数据
const int32 PrimitiveIDStartOffset; // Primitive 的开始偏移
const int32 InstanceIDStartOffset; // InstanceID 的开始偏移
const int32 PayloadStartOffset; // PayloadS 的开始偏移
const uint32 SceneFrameNumber; // 场景帧数
TArray<uint32, SceneRenderingAllocator> PrimitivesIds; //
}
// Skip uploading empty & already uploaded data
const bool bNeedsUpload = Collector.UploadData != nullptr && NumPrimitiveDataUploads > 0 && !Collector.UploadData->bIsUploaded;
if (bNeedsUpload)
{
// 。。。
// 上传适配器
FUploadDataSourceAdapterDynamicPrimitives& UploadAdapter = *GraphBuilder.AllocObject<FUploadDataSourceAdapterDynamicPrimitives>(
Collector.UploadData->PrimitiveData,
UploadIdStart,
InstanceIdStart,
Collector.UploadData->InstancePayloadDataOffset,
SceneFrameNumber);
UpdateBufferState(GraphBuilder, View.GetSceneUniforms(), Scene, UploadAdapter, false);
// Run a pass that clears (Sets ID to invalid) any instances that need it.
AddClearInstancesPass(GraphBuilder);
UploadGeneral<FUploadDataSourceAdapterDynamicPrimitives>(GraphBuilder, Scene, ExternalAccessQueue, UploadAdapter);
}
填充shader参数
FSceneUniformBuffer& SceneUniforms = View.GetSceneUniforms();
FillSceneUniformBuffer(GraphBuilder, SceneUniforms);
bool FGPUScene::FillSceneUniformBuffer(FRDGBuilder& GraphBuilder, FSceneUniformBuffer& SceneUB) const
{
if (!bIsEnabled)
{
return false;
}
if (ShaderParameters.GPUScenePrimitiveSceneData != nullptr)
{
return SceneUB.Set(SceneUB::GPUScene, ShaderParameters);
}
else if (PrimitiveBuffer != nullptr)
{
FGPUSceneResourceParameters TmpParameters;
// Not in an active rendering context, must register the buffers and fill in the data structure.
TmpParameters.GPUSceneInstanceSceneData = GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(InstanceSceneDataBuffer));
TmpParameters.GPUSceneInstancePayloadData = GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(InstancePayloadDataBuffer));
TmpParameters.GPUScenePrimitiveSceneData = GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(PrimitiveBuffer));
TmpParameters.GPUSceneLightmapData = GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(LightmapDataBuffer));
TmpParameters.GPUSceneLightData = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultStructuredBuffer(GraphBuilder, sizeof(FLightSceneData)));
TmpParameters.InstanceDataSOAStride = InstanceSceneDataSOAStride;
TmpParameters.NumScenePrimitives = NumScenePrimitives;
TmpParameters.NumInstances = InstanceSceneDataAllocator.GetMaxSize();
TmpParameters.GPUSceneFrameNumber = GetSceneFrameNumber();
return SceneUB.Set(SceneUB::GPUScene, TmpParameters);
}
else
{
// leave the dummy data in place - the gpu scene is not yet populated
return false;
}
}
// Execute any instance data GPU writer callbacks. (Note: Done after the UB update, in case the user requires it)
if (bNeedsUpload)
{
const uint32 PrimitiveIdStart = Collector.GetPrimitiveIdRange().GetLowerBoundValue();
const uint32 InstanceIdStart = Collector.UploadData->InstanceSceneDataOffset;
// Determine if we have any GPU data writers this frame and simultaneously defer any writes that must happen later in the frame
// 后面要填充这个数组
TArray<uint32, SceneRenderingAllocator> ImmediateWrites;
ImmediateWrites.Reserve(Collector.UploadData->GPUWritePrimitives.Num());
for (uint32 PrimitiveIndex : Collector.UploadData->GPUWritePrimitives)
{
const FGPUScenePrimitiveCollector::FPrimitiveData& PrimData = Collector.UploadData->PrimitiveData[PrimitiveIndex];
const EGPUSceneGPUWritePass GPUWritePass = PrimData.SourceData.DataWriterGPUPass;
// We're going to immediately execute any GPU writers whose write pass is immediate or has already happened this frame
// 立即执行还是延迟执行
if (GPUWritePass == EGPUSceneGPUWritePass::None || GPUWritePass <= LastDeferredGPUWritePass)
{
ImmediateWrites.Add(PrimitiveIndex);
}
else
{
// Defer this write to a later GPU write pass
FDeferredGPUWrite DeferredWrite;
DeferredWrite.DataWriterGPU = PrimData.SourceData.DataWriterGPU;
DeferredWrite.ViewId = View.GPUSceneViewId;
DeferredWrite.PrimitiveId = PrimitiveIdStart + PrimitiveIndex;
DeferredWrite.InstanceSceneDataOffset = InstanceIdStart + PrimData.LocalInstanceSceneDataOffset;
uint32 PassIndex = uint32(PrimData.SourceData.DataWriterGPUPass);
DeferredGPUWritePassDelegates[PassIndex].Add(DeferredWrite);
}
}
if (ImmediateWrites.Num() > 0)
{
// Execute writes that should execute immediately
RDG_EVENT_SCOPE(GraphBuilder, "GPU Writer Delegates");
FGPUSceneWriteDelegateParams Params;
Params.View = &View;
Params.GPUWritePass = EGPUSceneGPUWritePass::None;
GetWriteParameters(GraphBuilder, Params.GPUWriteParams);
for (uint32 PrimitiveIndex : ImmediateWrites)
{
const FGPUScenePrimitiveCollector::FPrimitiveData& PrimData = Collector.UploadData->PrimitiveData[PrimitiveIndex];
Params.PrimitiveId = PrimitiveIdStart + PrimitiveIndex;
Params.InstanceSceneDataOffset = InstanceIdStart + PrimData.LocalInstanceSceneDataOffset;
PrimData.SourceData.DataWriterGPU.Execute(GraphBuilder, Params); // 执行 GPU写的代理
}
}
}
FDeferredShadingSceneRenderer::Render
回来
我们看完了 Scene->GPUScene.UploadDynamicPrimitiveShaderDataForView
然后继续下去
void FDeferredShadingSceneRenderer::Render(FRDGBuilder& GraphBuilder)
{
{
// 。。。
Scene->GPUScene.Update(GraphBuilder, GetSceneUniforms(), *Scene, ExternalAccessQueue);
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
FViewInfo& View = Views[ViewIndex];
RDG_GPU_MASK_SCOPE(GraphBuilder, View.GPUMask);
Scene->GPUScene.UploadDynamicPrimitiveShaderDataForView(GraphBuilder, *Scene, View, ExternalAccessQueue);
// 这里继续,DEBUG
Scene->GPUScene.DebugRender(GraphBuilder, *Scene, GetSceneUniforms(), View);
}
// 延迟剔除
InstanceCullingManager.BeginDeferredCulling(GraphBuilder, Scene->GPUScene);
// 更新物理Field
if (Views.Num() > 0)
{
FViewInfo& View = Views[0];
Scene->UpdatePhysicsField(GraphBuilder, View);
}
}
FInstanceCullingManager::BeginDeferredCulling
延迟剔除在做什么
void FInstanceCullingManager::BeginDeferredCulling(FRDGBuilder& GraphBuilder, FGPUScene& GPUScene)
{
// 刷新已经注册的试图
FlushRegisteredViews(GraphBuilder);
// 。。。
}
void FInstanceCullingManager::FlushRegisteredViews(FRDGBuilder& GraphBuilder)
{
if (CullingIntermediate.NumViews != CullingViews.Num()) // 数量不要一致,后面就更新到一致
{
CullingIntermediate.CullingViews = CreateStructuredBuffer(GraphBuilder, TEXT("InstanceCulling.CullingViews"), CullingViews);
CullingIntermediate.NumViews = CullingViews.Num();
}
}
回来看
结果一些简单的判断后
DeferredContext = FInstanceCullingContext::CreateDeferredContext(GraphBuilder, GPUScene, this);
void FInstanceCullingManager::BeginDeferredCulling(FRDGBuilder& GraphBuilder, FGPUScene& GPUScene)
{
// 刷新已经注册的试图
FlushRegisteredViews(GraphBuilder);
// Cannot defer pass execution in immediate mode.
if (!AllowBatchedBuildRenderingCommands(GPUScene))
{
return;
}
// If there are no instances, there can be no work to perform later.
if (GPUScene.GetNumInstances() == 0 || CullingViews.Num() == 0)
{
return;
}
DeferredContext = FInstanceCullingContext::CreateDeferredContext(GraphBuilder, GPUScene, this);
}
FInstanceCullingDeferredContext *FInstanceCullingContext::CreateDeferredContext(
FRDGBuilder& GraphBuilder,
FGPUScene& GPUScene,
FInstanceCullingManager* InstanceCullingManager)
{
check(InstanceCullingManager != nullptr);
// 一些宏定义的lambda,后面调用了再看吧
// 。。。
// 获取context
const ERHIFeatureLevel::Type FeatureLevel = GPUScene.GetFeatureLevel();
FInstanceCullingDeferredContext* DeferredContext = GraphBuilder.AllocObject<FInstanceCullingDeferredContext>(FeatureLevel, InstanceCullingManager);
const bool bCullInstances = CVarCullInstances.GetValueOnRenderThread() != 0;
const bool bAllowWPODisable = true;
TStaticArray<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FParameters*, static_cast<uint32>(EBatchProcessingMode::Num)> PassParameters;
for (uint32 Mode = 0U; Mode < uint32(EBatchProcessingMode::Num); ++Mode) // 遍历剔除的处理方式
{
PassParameters[Mode] = GraphBuilder.AllocParameters<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FParameters>();
}
出现了一些结构看看 FInstanceCullingDeferredContext
class FInstanceCullingDeferredContext : public FInstanceCullingMergedContext
{
public:
FInstanceCullingDeferredContext(ERHIFeatureLevel::Type InFeatureLevel, FInstanceCullingManager* InInstanceCullingManager = nullptr)
: FInstanceCullingMergedContext(InFeatureLevel)
, InstanceCullingManager(InInstanceCullingManager)
{}
FInstanceCullingManager* InstanceCullingManager; // 看下面
FRDGBufferRef DrawIndirectArgsBuffer = nullptr; //需要绘制的索引
FRDGBufferRef InstanceDataBuffer = nullptr; // 缓冲区
TRDGUniformBufferRef<FInstanceCullingGlobalUniforms> UniformBuffer = nullptr; // buff
bool bProcessed = false;
void ProcessBatched(TStaticArray<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FParameters*, static_cast<uint32>(EBatchProcessingMode::Num)> PassParameters); // 合并批次
};
看一些类 FInstanceCullingManager
/**
* Manages allocation of indirect arguments and culling jobs for all instanced draws (use the GPU Scene culling).
*/
class FInstanceCullingManager
{
public:
FInstanceCullingManager(FSceneUniformBuffer& SceneUB, bool bInIsEnabled, FRDGBuilder& GraphBuilder); // 构造函数
~FInstanceCullingManager();
bool IsEnabled() const { return bIsEnabled; } // 是否开启
int32 RegisterView(const Nanite::FPackedViewParams& Params); // 注册试图
int32 RegisterView(const FViewInfo& ViewInfo); // 注册视图
void FlushRegisteredViews(FRDGBuilder& GraphBuilder); // 如果视图数量变化,就上传。必须保证 BuildRenderingCommands 被调用构建过了
void BeginDeferredCulling(FRDGBuilder& GraphBuilder, FGPUScene& GPUScene); // 开启一个延迟剔除
FInstanceCullingIntermediate CullingIntermediate; // 剔除的所有视图都在这
private:
friend class FInstanceCullingContext;
// Polulated by FInstanceCullingContext::BuildRenderingCommandsDeferred, used to hold instance culling related data that needs to be passed around
FInstanceCullingDeferredContext *DeferredContext = nullptr;
FSceneUniformBuffer& SceneUB;
TArray<Nanite::FPackedView> CullingViews;
bool bIsEnabled;
};
看父类
class FInstanceCullingMergedContext
{
public:
struct FBatchItem
{
FInstanceCullingContext* Context = nullptr; // 剔除上下文
FInstanceCullingDrawParams* Result = nullptr; // 剔除绘制参数
};
// Info about a batch of culling work produced by a context, when part of a batched job 有关由上下文生成的一批剔除工作的信息,当其属于批量作业的一部分时
// Store once per context, provides start offsets to commands/etc for the context. 每个上下文存储一次,为上下文的命令/等提供起始偏移量。
struct FContextBatchInfo
{
uint32 IndirectArgsOffset; // 间接参数的偏移
uint32 InstanceDataWriteOffset; // 实例数据写入的偏移
uint32 PayloadDataOffset; // Payload 有效载荷
uint32 CompactionDataOffset;// 压缩数据的起始偏移
uint32 ViewIdsOffset;
uint32 NumViewIds;
uint32 DynamicInstanceIdOffset;
uint32 DynamicInstanceIdMax;
uint32 ItemDataOffset[uint32(EBatchProcessingMode::Num)]; // EBatchProcessingMode 模式
};
TArray<FBatchItem, SceneRenderingAllocator> Batches; /** Batches of GPU instance culling input data. */
TArray<FBatchItem, SceneRenderingAllocator> AsyncBatches; /** Async (and thus added as to the above as late as possible) Batches of GPU instance culling input data. */
/**
* Merged data, derived in MergeBatches(), follows.
*/
TArray<int32, SceneRenderingAllocator> ViewIds;
//TArray<FMeshDrawCommandInfo, SceneRenderingAllocator> MeshDrawCommandInfos;
TArray<FRHIDrawIndexedIndirectParameters, SceneRenderingAllocator> IndirectArgs;
TArray<uint32, SceneRenderingAllocator> DrawCommandDescs;
TArray<FInstanceCullingContext::FPayloadData, SceneRenderingAllocator> PayloadData;
TArray<uint32, SceneRenderingAllocator> InstanceIdOffsets;
TArray<FInstanceCullingContext::FCompactionData, SceneRenderingAllocator> DrawCommandCompactionData;
TArray<uint32, SceneRenderingAllocator> CompactionBlockDataIndices;
TStaticArray<TInstanceCullingLoadBalancer<SceneRenderingAllocator>, static_cast<uint32>(EBatchProcessingMode::Num)> LoadBalancers;
TStaticArray<TArray<uint32, SceneRenderingAllocator>, static_cast<uint32>(EBatchProcessingMode::Num)> BatchInds;
TArray<FContextBatchInfo, SceneRenderingAllocator> BatchInfos;
ERHIFeatureLevel::Type FeatureLevel = ERHIFeatureLevel::Num;
// if true, the contexts that are supplied through calling AddBatch must all have an 1:1 entry in the resulting merged Batches array
// this adds a check to prevent empty contexts from being added (!HasCullingCommands()).
bool bMustAddAllContexts = false;
// Counters to sum up all sizes to facilitate pre-sizing
uint32 InstanceIdBufferSize = 0U;
TStaticArray<int32, uint32(EBatchProcessingMode::Num)> TotalBatches = TStaticArray<int32, uint32(EBatchProcessingMode::Num)>(InPlace, 0);
TStaticArray<int32, uint32(EBatchProcessingMode::Num)> TotalItems = TStaticArray<int32, uint32(EBatchProcessingMode::Num)>(InPlace, 0);
int32 TotalIndirectArgs = 0;
int32 TotalPayloads = 0;
int32 TotalViewIds = 0;
int32 TotalInstances = 0;
int32 TotalCompactionDrawCommands = 0;
int32 TotalCompactionBlocks = 0;
int32 TotalCompactionInstances = 0;
// Single Previous frame HZB which is shared among all batched contexts, thus only one is allowed (but the same can be used in multiple passes). (Needs atlas or bindless to expand).
FRDGTextureRef PrevHZB = nullptr;
int32 NumCullingViews = 0;
// Merge the queued batches and populate the derived data.
void MergeBatches();
void AddBatch(FRDGBuilder& GraphBuilder, FInstanceCullingContext* Context, FInstanceCullingDrawParams* InstanceCullingDrawParams);
private:
void AddBatchItem(const FBatchItem& BatchItem);
};
回来
FInstanceCullingDeferredContext *FInstanceCullingContext::CreateDeferredContext(
FRDGBuilder& GraphBuilder,
FGPUScene& GPUScene,
FInstanceCullingManager* InstanceCullingManager)
{
check(InstanceCullingManager != nullptr);
// 一些宏定义的lambda,后面调用了再看吧
// 。。。
const ERHIFeatureLevel::Type FeatureLevel = GPUScene.GetFeatureLevel();
// 创建了延迟的context
FInstanceCullingDeferredContext* DeferredContext = GraphBuilder.AllocObject<FInstanceCullingDeferredContext>(FeatureLevel, InstanceCullingManager);
const bool bCullInstances = CVarCullInstances.GetValueOnRenderThread() != 0;
const bool bAllowWPODisable = true;
TStaticArray<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FParameters*, static_cast<uint32>(EBatchProcessingMode::Num)> PassParameters;
for (uint32 Mode = 0U; Mode < uint32(EBatchProcessingMode::Num); ++Mode) // 遍历剔除的处理方式
{
PassParameters[Mode] = GraphBuilder.AllocParameters<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FParameters>();
}
// Create buffers for compacting【压缩】 instances for draw commands that need it
// 创建缓冲区以【压缩】需要它的绘制命令的实例。
const bool bEnableInstanceCompaction = IsInstanceOrderPreservationAllowed(FeatureLevel); // Is Instance Order Preservation(实例顺序保存) Allowed
FRDGBufferSRVRef DrawCommandCompactionDataSRV = nullptr;
FRDGBufferRef CompactInstanceIdsBuffer = nullptr;
FRDGBufferUAVRef CompactInstanceIdsUAV = nullptr;
FRDGBufferRef CompactionBlockCountsBuffer = nullptr;
FRDGBufferUAVRef CompactionBlockCountsUAV = nullptr;
if (bEnableInstanceCompaction) // 允许实例压缩
{
// 【1】创建SRV,这个宏定义是为了快速包装参数准备的
DrawCommandCompactionDataSRV = GraphBuilder.CreateSRV(CreateStructuredBuffer(INST_CULL_CREATE_STRUCT_BUFF_ARGS(DrawCommandCompactionData)));
// 【2】和上面一样,只是没用宏填参数
CompactInstanceIdsBuffer = CreateStructuredBuffer(
GraphBuilder,
TEXT("InstanceCulling.Compaction.TempInstanceIdsBuffer"),
sizeof(uint32),
INST_CULL_CALLBACK(FMath::Max(DeferredContext->TotalCompactionInstances, 1)),
INST_CULL_CALLBACK(nullptr),
INST_CULL_CALLBACK(0));
CompactInstanceIdsUAV = GraphBuilder.CreateUAV(CompactInstanceIdsBuffer);
// 【4】
CompactionBlockCountsBuffer = CreateStructuredBuffer(
GraphBuilder,
TEXT("InstanceCulling.Compaction.BlockInstanceCounts"),
sizeof(uint32),
INST_CULL_CALLBACK(FMath::Max(DeferredContext->TotalCompactionBlocks, 1)),
INST_CULL_CALLBACK(nullptr),
INST_CULL_CALLBACK(0));
CompactionBlockCountsUAV = GraphBuilder.CreateUAV(CompactionBlockCountsBuffer);
// We must clear the block counts buffer, as they will be written to using atomic increments
// 我们必须清除块计数缓冲区,因为它们将使用原子增量进行写入
// 反正这里就是为了清除 CompactionBlockCountsUAV
AddClearUAVPass(GraphBuilder, CompactionBlockCountsUAV, 0);
}
数据上传和缓冲区管理
FInstanceCullingDeferredContext *FInstanceCullingContext::CreateDeferredContext(
FRDGBuilder& GraphBuilder,
FGPUScene& GPUScene,
FInstanceCullingManager* InstanceCullingManager)
{
// 。。。
FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FParameters PassParametersTmp;
// 4个 RDG buff
FRDGBufferRef DrawCommandDescsRDG = CreateStructuredBuffer(INST_CULL_CREATE_STRUCT_BUFF_ARGS(DrawCommandDescs));
FRDGBufferRef InstanceCullingPayloadsRDG = CreateStructuredBuffer(INST_CULL_CREATE_STRUCT_BUFF_ARGS(PayloadData));
FRDGBufferRef ViewIdsRDG = CreateStructuredBuffer(INST_CULL_CREATE_STRUCT_BUFF_ARGS(ViewIds));
FRDGBufferRef BatchInfosRDG = CreateStructuredBuffer(INST_CULL_CREATE_STRUCT_BUFF_ARGS(BatchInfos));
// 用一个 描述,名字,callback,标志位 创建一个RDG Buff
DeferredContext->DrawIndirectArgsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc(), TEXT("InstanceCulling.DrawIndirectArgsBuffer"), INST_CULL_CALLBACK(IndirectArgsNumWords * DeferredContext->IndirectArgs.Num()));
这个描述
struct FRDGBufferDesc
{
static FRDGBufferDesc CreateIndirectDesc(uint32 BytesPerElement, uint32 NumElements)
{
FRDGBufferDesc Desc;
Desc.Usage = EBufferUsageFlags::Static | EBufferUsageFlags::DrawIndirect | EBufferUsageFlags::UnorderedAccess | EBufferUsageFlags::ShaderResource | EBufferUsageFlags::VertexBuffer;
Desc.BytesPerElement = BytesPerElement;
Desc.NumElements = NumElements;
return Desc;
}
这个枚举描述了缓冲区的使用方式
/**
* Resource usage flags - for vertex and index buffers.
*/
enum class EBufferUsageFlags : uint32
{
// 自己去代码看
};
回来
FInstanceCullingDeferredContext *FInstanceCullingContext::CreateDeferredContext(
FRDGBuilder& GraphBuilder,
FGPUScene& GPUScene,
FInstanceCullingManager* InstanceCullingManager)
{
// 。。。
// 缓冲区创建好了
DeferredContext->DrawIndirectArgsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc(), TEXT("InstanceCulling.DrawIndirectArgsBuffer"), INST_CULL_CALLBACK(IndirectArgsNumWords * DeferredContext->IndirectArgs.Num()));
// 丢到上传缓冲区
GraphBuilder.QueueBufferUpload(DeferredContext->DrawIndirectArgsBuffer, INST_CULL_CALLBACK(DeferredContext->IndirectArgs.GetData()), INST_CULL_CALLBACK(GetArrayDataSize(DeferredContext->IndirectArgs)));
inline void FRDGBuilder::QueueBufferUpload(FRDGBufferRef Buffer, FRDGBufferInitialDataCallback&& InitialDataCallback, FRDGBufferInitialDataSizeCallback&& InitialDataSizeCallback)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateUploadBuffer(Buffer, InitialDataCallback, InitialDataSizeCallback)); // 合法检查
UploadedBuffers.Emplace(Buffer, MoveTemp(InitialDataCallback), MoveTemp(InitialDataSizeCallback)); // 丢进去,然后设置bool
Buffer->bQueuedForUpload = 1;
Buffer->bForceNonTransient = 1; // 非序列化,非持久的,下一帧可能就没了
}
FInstanceCullingDeferredContext *FInstanceCullingContext::CreateDeferredContext(
FRDGBuilder& GraphBuilder,
FGPUScene& GPUScene,
FInstanceCullingManager* InstanceCullingManager)
{
// 。。。
FGlobalShaderMap* ShaderMap = GetGlobalShaderMap(FeatureLevel);
// Note: we redundantly clear the instance counts here as there is some issue with replays on certain consoles.
// 注意:我们在这里冗余地清除实例计数,因为某些控制台上的重播存在一些问题。
AddClearIndirectArgInstanceCountPass(GraphBuilder, ShaderMap, DeferredContext->DrawIndirectArgsBuffer, INST_CULL_CALLBACK(DeferredContext->IndirectArgs.Num()));
// 【】InstanceIdOffsetBuffer
// not using structured buffer as we want/have to get at it as a vertex buffer
FRDGBufferRef InstanceIdOffsetBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateBufferDesc(sizeof(uint32), 1), TEXT("InstanceCulling.InstanceIdOffsetBuffer"), INST_CULL_CALLBACK(DeferredContext->InstanceIdOffsets.Num()));
GraphBuilder.QueueBufferUpload(InstanceIdOffsetBuffer, INST_CULL_CALLBACK(DeferredContext->InstanceIdOffsets.GetData()), INST_CULL_CALLBACK(DeferredContext->InstanceIdOffsets.GetTypeSize() * DeferredContext->InstanceIdOffsets.Num()));
// 【】InstanceIdsBuffer
FRDGBufferRef InstanceIdsBuffer = GraphBuilder.CreateBuffer(
CreateInstanceIdsBufferDesc(FeatureLevel, 1),
TEXT("InstanceCulling.InstanceIdsBuffer"),
INST_CULL_CALLBACK(GetInstanceIdsNumElements(DeferredContext->FeatureLevel, DeferredContext->InstanceIdBufferSize))
);
FRDGBufferUAVRef InstanceIdsBufferUAV = GraphBuilder.CreateUAV(InstanceIdsBuffer, ERDGUnorderedAccessViewFlags::SkipBarrier);
if (FeatureLevel == ERHIFeatureLevel::ES3_1)
{
DeferredContext->InstanceDataBuffer = InstanceIdsBuffer;
}
else
{
DeferredContext->InstanceDataBuffer = InstanceIdOffsetBuffer;
}
// 拿到 shader params 准备赋值
const FGPUSceneResourceParameters GPUSceneParameters = GPUScene.GetShaderParameters();
// Because the view uniforms are not set up by the time this runs
// PassParameters->View = View.ViewUniformBuffer;
// Set up global GPU-scene data instead...
PassParametersTmp.GPUSceneInstanceSceneData = GPUSceneParameters.GPUSceneInstanceSceneData;
PassParametersTmp.GPUSceneInstancePayloadData = GPUSceneParameters.GPUSceneInstancePayloadData;
PassParametersTmp.GPUScenePrimitiveSceneData = GPUSceneParameters.GPUScenePrimitiveSceneData;
PassParametersTmp.GPUSceneLightmapData = GPUSceneParameters.GPUSceneLightmapData;
PassParametersTmp.InstanceSceneDataSOAStride = GPUScene.InstanceSceneDataSOAStride;
PassParametersTmp.GPUSceneFrameNumber = GPUSceneParameters.GPUSceneFrameNumber;
PassParametersTmp.GPUSceneNumInstances = GPUSceneParameters.NumInstances;
PassParametersTmp.GPUSceneNumPrimitives = GPUSceneParameters.NumScenePrimitives;
PassParametersTmp.GPUSceneNumLightmapDataItems = GPUScene.GetNumLightmapDataItems();
PassParametersTmp.DrawCommandDescs = GraphBuilder.CreateSRV(DrawCommandDescsRDG);
PassParametersTmp.InstanceCullingPayloads = GraphBuilder.CreateSRV(InstanceCullingPayloadsRDG);
PassParametersTmp.BatchInfos = GraphBuilder.CreateSRV(BatchInfosRDG);
PassParametersTmp.ViewIds = GraphBuilder.CreateSRV(ViewIdsRDG);
// only one of these will be used in the shader
PassParametersTmp.InstanceIdsBufferOut = InstanceIdsBufferUAV;
PassParametersTmp.InstanceIdsBufferOutMobile = InstanceIdsBufferUAV;
PassParametersTmp.DrawIndirectArgsBufferOut = GraphBuilder.CreateUAV(DeferredContext->DrawIndirectArgsBuffer, PF_R32_UINT, ERDGUnorderedAccessViewFlags::SkipBarrier);
PassParametersTmp.InstanceIdOffsetBuffer = GraphBuilder.CreateSRV(InstanceIdOffsetBuffer, PF_R32_UINT);
if (bCullInstances || bAllowWPODisable)
{
PassParametersTmp.InViews = GraphBuilder.CreateSRV(InstanceCullingManager->CullingIntermediate.CullingViews);
PassParametersTmp.NumCullingViews = InstanceCullingManager->CullingIntermediate.NumViews;
}
// Compaction parameters
PassParametersTmp.DrawCommandCompactionData = DrawCommandCompactionDataSRV;
PassParametersTmp.CompactInstanceIdsBufferOut = CompactInstanceIdsUAV;
PassParametersTmp.CompactionBlockCounts = CompactionBlockCountsUAV;
// Record the number of culling views to be able to check that no views referencing out-of bounds views are queued up
// 记录剔除视图的数量,以便检查没有引用超出范围的视图排队
DeferredContext->NumCullingViews = InstanceCullingManager->CullingIntermediate.NumViews;
FInstanceCullingDeferredContext *FInstanceCullingContext::CreateDeferredContext(
FRDGBuilder& GraphBuilder,
FGPUScene& GPUScene,
FInstanceCullingManager* InstanceCullingManager)
{
// 。。。
// 遍历这个类型
for (uint32 Mode = 0U; Mode < uint32(EBatchProcessingMode::Num); ++Mode)
{
*PassParameters[Mode] = PassParametersTmp;
// 获取 批处理Index
FRDGBufferRef BatchIndsRDG = CreateStructuredBuffer(INST_CULL_CREATE_STRUCT_BUFF_ARGS_MODE(BatchInds));
PassParameters[Mode]->BatchInds = GraphBuilder.CreateSRV(BatchIndsRDG);
//
FInstanceProcessingGPULoadBalancer::FGPUData Result;
FRDGBufferRef BatchBuffer = CreateStructuredBuffer(
GraphBuilder,
TEXT("InstanceCullingLoadBalancer.Batches"),
sizeof(FInstanceProcessingGPULoadBalancer::FPackedBatch),
INST_CULL_CALLBACK_MODE(DeferredContext->LoadBalancers[Mode].GetBatches().Num()),
INST_CULL_CALLBACK_MODE(DeferredContext->LoadBalancers[Mode].GetBatches().GetData()),
INST_CULL_CALLBACK_MODE(GetArrayDataSize(DeferredContext->LoadBalancers[Mode].GetBatches())));
FRDGBufferRef ItemBuffer = CreateStructuredBuffer(
GraphBuilder,
TEXT("InstanceCullingLoadBalancer.Items"),
sizeof(FInstanceProcessingGPULoadBalancer::FPackedItem),
INST_CULL_CALLBACK_MODE(DeferredContext->LoadBalancers[Mode].GetItems().Num()),
INST_CULL_CALLBACK_MODE(DeferredContext->LoadBalancers[Mode].GetItems().GetData()),
INST_CULL_CALLBACK_MODE(GetArrayDataSize(DeferredContext->LoadBalancers[Mode].GetItems())));
// 目的是创建 BatchBuffer 和 ItemBuffer
PassParameters[Mode]->LoadBalancerParameters.BatchBuffer = GraphBuilder.CreateSRV(BatchBuffer);
PassParameters[Mode]->LoadBalancerParameters.ItemBuffer = GraphBuilder.CreateSRV(ItemBuffer);
PassParameters[Mode]->CurrentBatchProcessingMode = Mode;
// HZB的设置
const bool bOcclusionCullInstances = FInstanceCullingContext::IsOcclusionCullingEnabled();
if (bOcclusionCullInstances)
{
// Fill with a placeholder as AddPass expects HZBTexture to be valid. ProcessBatched will fill with real HZB textures.
PassParameters[Mode]->HZBTexture = GraphBuilder.RegisterExternalTexture(GSystemTextures.BlackDummy);
PassParameters[Mode]->HZBSize = PassParameters[Mode]->HZBTexture->Desc.Extent;
PassParameters[Mode]->HZBSampler = TStaticSamplerState< SF_Point, AM_Clamp, AM_Clamp, AM_Clamp >::GetRHI();
}
//
FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FPermutationDomain PermutationVector;
PermutationVector.Set<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FBatchedDim>(true);
PermutationVector.Set<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FSingleInstanceModeDim>(EBatchProcessingMode(Mode) == EBatchProcessingMode::UnCulled);
PermutationVector.Set<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FCullInstancesDim>(bCullInstances && EBatchProcessingMode(Mode) != EBatchProcessingMode::UnCulled);
PermutationVector.Set<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FAllowWPODisableDim>(bAllowWPODisable);
PermutationVector.Set<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FOcclusionCullInstancesDim>(bOcclusionCullInstances);
PermutationVector.Set<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs::FInstanceCompactionDim>(bEnableInstanceCompaction);
// 获取计算着色器加到pass
auto ComputeShader = ShaderMap->GetShader<FBuildInstanceIdBufferAndCommandsFromPrimitiveIdsCs>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("CullInstances(%s)", BatchProcessingModeStr[Mode]),
ComputeShader,
PassParameters[Mode],
INST_CULL_CALLBACK_MODE(DeferredContext->LoadBalancers[Mode].GetWrappedCsGroupCount()));
}
FInstanceCullingDeferredContext *FInstanceCullingContext::CreateDeferredContext(
FRDGBuilder& GraphBuilder,
FGPUScene& GPUScene,
FInstanceCullingManager* InstanceCullingManager)
{
// 。。。
// TODO: Come up with a way to cull these passes when no compaction is needed. The group count resulting in (0, 0, 0) causes the pass lambdas to not execute,
// but currently cannot cull resource transitions
// 是否开启了实例压缩
if (bEnableInstanceCompaction)
{
FRDGBufferRef BlockDestInstanceOffsets = CreateStructuredBuffer(
GraphBuilder,
TEXT("InstanceCulling.Compaction.BlockDestInstanceOffsets"),
sizeof(uint32),
INST_CULL_CALLBACK(FMath::Max<uint32>(DeferredContext->TotalCompactionBlocks, 1U)),
INST_CULL_CALLBACK(nullptr),
INST_CULL_CALLBACK(0));
// Compaction phase one - prefix sum of the compaction "blocks" 压缩“块”的前缀和
{
auto PassParameters2 = GraphBuilder.AllocParameters<FCalculateCompactBlockInstanceOffsetsCs::FParameters>();
PassParameters2->DrawCommandCompactionData = DrawCommandCompactionDataSRV;
PassParameters2->BlockInstanceCounts = GraphBuilder.CreateSRV(CompactionBlockCountsBuffer);
PassParameters2->BlockDestInstanceOffsetsOut = GraphBuilder.CreateUAV(BlockDestInstanceOffsets);
PassParameters2->DrawIndirectArgsBufferOut = PassParametersTmp.DrawIndirectArgsBufferOut;
auto ComputeShader = ShaderMap->GetShader<FCalculateCompactBlockInstanceOffsetsCs>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Instance Compaction Phase 1"),
ComputeShader,
PassParameters2,
[DeferredContext]()
{
return FComputeShaderUtils::GetGroupCountWrapped(DeferredContext->TotalCompactionDrawCommands);
});
}
// Compaction phase two - write instances to compact final location 将实例写入紧凑的最终位置
{
FRDGBufferRef BlockDrawCommandIndices = CreateStructuredBuffer(INST_CULL_CREATE_STRUCT_BUFF_ARGS(CompactionBlockDataIndices));
auto PassParameters2 = GraphBuilder.AllocParameters<FCompactVisibleInstancesCs::FParameters>();
PassParameters2->DrawCommandCompactionData = DrawCommandCompactionDataSRV;
PassParameters2->BlockDrawCommandIndices = GraphBuilder.CreateSRV(BlockDrawCommandIndices);
PassParameters2->InstanceIdsBufferIn = GraphBuilder.CreateSRV(CompactInstanceIdsBuffer);
PassParameters2->BlockDestInstanceOffsets = GraphBuilder.CreateSRV(BlockDestInstanceOffsets);
PassParameters2->InstanceIdsBufferOut = InstanceIdsBufferUAV;
PassParameters2->InstanceIdsBufferOutMobile = InstanceIdsBufferUAV;
auto ComputeShader = ShaderMap->GetShader<FCompactVisibleInstancesCs>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Instance Compaction Phase 2"),
ComputeShader,
PassParameters2,
[PassParameters2, DeferredContext]()
{
return FComputeShaderUtils::GetGroupCountWrapped(DeferredContext->TotalCompactionBlocks);
});
}
}
if (FeatureLevel > ERHIFeatureLevel::ES3_1)
{
FInstanceCullingGlobalUniforms* UniformParameters = GraphBuilder.AllocParameters<FInstanceCullingGlobalUniforms>();
UniformParameters->InstanceIdsBuffer = GraphBuilder.CreateSRV(InstanceIdsBuffer);
UniformParameters->PageInfoBuffer = GraphBuilder.CreateSRV(InstanceIdsBuffer);
UniformParameters->BufferCapacity = 0U; // TODO: this is not used at the moment, but is intended for range checks so would have been good.
DeferredContext->UniformBuffer = GraphBuilder.CreateUniformBuffer(UniformParameters);
}
#undef INST_CULL_CREATE_STRUCT_BUFF_ARGS
#undef INST_CULL_CALLBACK
#undef INST_CULL_CALLBACK_MODE
#undef INST_CULL_CREATE_STRUCT_BUFF_ARGS_MODE
return DeferredContext;
}
总结 FInstanceCullingContext::CreateDeferredContext
为了提高渲染效率,进行的剔除操作
FDeferredShadingSceneRenderer::Render
回退到这里继续看
void FDeferredShadingSceneRenderer::Render(FRDGBuilder& GraphBuilder)
{
{
// 。。。
// 延迟剔除
InstanceCullingManager.BeginDeferredCulling(GraphBuilder, Scene->GPUScene);
// 更新物理场 Field
if (Views.Num() > 0)
{
FViewInfo& View = Views[0];
Scene->UpdatePhysicsField(GraphBuilder, View);
}
}
// 初始化场景贴图
FSceneTextures::InitializeViewFamily(GraphBuilder, ViewFamily);
FSceneTextures& SceneTextures = GetActiveSceneTextures();
FSceneTextures::InitializeViewFamily
void FSceneTextures::InitializeViewFamily(FRDGBuilder& GraphBuilder, FViewFamilyInfo& ViewFamily)
{
const FSceneTexturesConfig& Config = ViewFamily.SceneTexturesConfig; // 拿到配置
FSceneTextures& SceneTextures = ViewFamily.SceneTextures;
FMinimalSceneTextures::InitializeViewFamily(GraphBuilder, ViewFamily);
// 。。。
FSceneTexturesConfig
/** Struct containing the scene texture configuration used to create scene textures. Use InitializeViewFamily to initialize the
* SceneTexturesConfig structure in the FViewFamilyInfo. A global singleton instance is maintained manually with static Set / Get
* functions, but will soon be deprecated, in preference of using the structure from the FViewFamilyInfo.
*/
struct FSceneTexturesConfig
{
// 。。。
ESceneTextureExtracts Extracts = ESceneTextureExtracts::All; // 提取纹理的选择,None不提取,Depth提取深度纹理,CustomDepth 自定义深度纹理,
EShadingPath ShadingPath = EShadingPath::Num; // shader path 移动端 还是 延迟
ERHIFeatureLevel::Type FeatureLevel = ERHIFeatureLevel::SM5;
EShaderPlatform ShaderPlatform = SP_PCD3D_SM5;
// 描述分辨率的
FIntPoint Extent = FIntPoint::ZeroValue; // Extent of all full-resolution textures.
FIntPoint MobilePixelProjectedReflectionExtent = FIntPoint::ZeroValue; // Extend of the mobile Pixel Projected Reflection texture
uint32 SmallDepthDownsampleFactor = 2; // Downsample factors to divide against the full resolution texture extent.
uint32 NumSamples = 1; // Number of MSAA samples used by color and depth targets.
uint32 EditorPrimitiveNumSamples = 1; // Number of MSAA sampled used by the editor primitive composition targets.
EPixelFormat ColorFormat = PF_Unknown; // Pixel format to use when creating scene color.
// Create flags when creating scene color / depth textures
ETextureCreateFlags ColorCreateFlags = ETextureCreateFlags::None;
ETextureCreateFlags DepthCreateFlags = ETextureCreateFlags::None;
// Flags passed in from initializer
ETextureCreateFlags ExtraSceneColorCreateFlags = ETextureCreateFlags::None;
ETextureCreateFlags ExtraSceneDepthCreateFlags = ETextureCreateFlags::None;
// Optimized clear values to use for color / depth textures.
FClearValueBinding ColorClearValue = FClearValueBinding::Black;
FClearValueBinding DepthClearValue = FClearValueBinding::DepthFar;
// (Deferred Shading) Dynamic GBuffer configuration used to control allocation and slotting of base pass textures.
// GBuff
FGBufferParams GBufferParams[GBL_Num];
FGBufferBindings GBufferBindings[GBL_Num]; // 每个 GBuff 都有 index,格式,标志位
uint32 bRequireMultiView : 1; // (VR) True if scene color and depth should be multi-view allocated.
uint32 bIsUsingGBuffers : 1; // True if platform is using GBuffers.
uint32 bKeepDepthContent : 1; // (Mobile) True if the platform should write depth content back to memory.
uint32 bPreciseDepthAux : 1; // (Mobile) True if SceneDepthAux should use a precise pixel format
uint32 bSamplesCustomStencil : 1; // (Mobile) True if CustomStencil are sampled in a shader
uint32 bMemorylessMSAA : 1; // (Mobile) True if MSAA targets can be memoryless
uint32 bSupportsXRTargetManagerDepthAlloc : 1; // (XR) True if we can request an XR depth swapchain
bool bRequiresAlphaChannel = false; // True if we require an alpha channel for scene color
FMinimalSceneTextures
/** RDG struct containing the minimal set of scene textures common across all rendering configurations. */
// 获取所有渲染配置中通用的最小场景纹理集
struct FMinimalSceneTextures
{
// Initializes the minimal scene textures structure in the FViewFamilyInfo
static RENDERER_API void InitializeViewFamily(FRDGBuilder& GraphBuilder, FViewFamilyInfo& ViewFamily);
FSceneTexturesConfig Config; // Immutable copy of the config used to create scene textures.
// Uniform buffers for deferred or mobile. 存参数的缓冲区引用
TRDGUniformBufferRef<FSceneTextureUniformParameters> UniformBuffer{};
TRDGUniformBufferRef<FMobileSceneTextureUniformParameters> MobileUniformBuffer{};
// Setup modes used when creating uniform buffers. These are updated on demand. 设置模式
ESceneTextureSetupMode SetupMode = ESceneTextureSetupMode::None;
EMobileSceneTextureSetupMode MobileSetupMode = EMobileSceneTextureSetupMode::None;
//
FRDGTextureMSAA Color{}; // Texture containing scene color information with lighting but without post processing. Will be two textures if MSAA.
FRDGTextureMSAA Depth{}; // Texture containing scene depth. Will be two textures if MSAA.
FRDGTextureSRVRef Stencil{}; // Texture containing a stencil view of the resolved (if MSAA) scene depth.
FRDGTextureMSAA PartialDepth{}; // Textures containing primary depth buffer copied before other meshes are rendered in the secondary depth pass.
FCustomDepthTextures CustomDepth{}; // Textures containing depth / stencil information from the custom depth pass.
/** RDG struct containing the complete set of scene textures for the deferred or mobile renderers. */
// 为延迟渲染器或移动渲染器提供完整的场景纹理集
struct FSceneTextures : public FMinimalSceneTextures
{
// (Deferred) Texture containing conservative downsampled depth for occlusion.
// 包含用于遮挡的保守下采样深度的纹理。
FRDGTextureRef SmallDepth{};
// (Deferred) Textures containing geometry information for deferred shading.
// GBuffer
FRDGTextureRef GBufferA{};
FRDGTextureRef GBufferB{};
FRDGTextureRef GBufferC{};
FRDGTextureRef GBufferD{};
FRDGTextureRef GBufferE{};
FRDGTextureRef GBufferF{};
FRDGTextureMSAA DepthAux{}; // 移动设备附加的 Additional Buffer texture used by mobile
FRDGTextureRef Velocity{}; // 包含运动矢量的纹理 Texture containing dynamic motion vectors. Can be bound by the base pass or its own velocity pass.
// (Mobile Local Light Prepass) Textures containing LocalLight Direction and Color
FRDGTextureRef MobileLocalLightTextureA {};
FRDGTextureRef MobileLocalLightTextureB {};
// [SSAO] Texture containing the screen space ambient occlusion result.
FRDGTextureRef ScreenSpaceAO{};
// Texture used by the quad overdraw debug view mode when enabled.
FRDGTextureRef QuadOverdraw{};
// (Mobile) Texture used by mobile PPR in the next frame.
FRDGTextureRef PixelProjectedReflection{};
// Textures used to composite editor primitives. Also used by the base pass when in wireframe mode.
#if WITH_EDITOR
FRDGTextureRef EditorPrimitiveColor{};
FRDGTextureRef EditorPrimitiveDepth{};
#endif
};
FMinimalSceneTextures::InitializeViewFamily
void FMinimalSceneTextures::InitializeViewFamily(FRDGBuilder& GraphBuilder, FViewFamilyInfo& ViewFamily)
{
const FSceneTexturesConfig& Config = ViewFamily.SceneTexturesConfig;
FSceneTextures& SceneTextures = ViewFamily.SceneTextures;
checkf(Config.IsValid(), TEXT("Attempted to create scene textures with an empty config."));
SceneTextures.Config = Config;
// Scene Depth 处理场景深度贴图
// If not using MSAA, we need to make sure to grab the stereo depth texture if appropriate.
FTexture2DRHIRef StereoDepthRHI;
// 采样数 == 1 , FindStereoDepthTexture 寻找立体深度纹理 Stereo 立体渲染,就是VR
if (Config.NumSamples == 1 && (StereoDepthRHI = FindStereoDepthTexture(Config.bSupportsXRTargetManagerDepthAlloc, Config.Extent, ETextureCreateFlags::None)) != nullptr)
{
SceneTextures.Depth = RegisterExternalTexture(GraphBuilder, StereoDepthRHI, TEXT("SceneDepthZ"));
SceneTextures.Stencil = GraphBuilder.CreateSRV(FRDGTextureSRVDesc::CreateWithPixelFormat(SceneTextures.Depth.Target, PF_X24_G8));
}
else // 不是VR
{
// TODO: Array-size could be values > 2, on upcoming XR devices. This should be part of the config.
// 准备一个描述
FRDGTextureDesc Desc(Config.bRequireMultiView ?
FRDGTextureDesc::Create2DArray(SceneTextures.Config.Extent, PF_DepthStencil, Config.DepthClearValue, Config.DepthCreateFlags, 2) :
FRDGTextureDesc::Create2D(SceneTextures.Config.Extent, PF_DepthStencil, Config.DepthClearValue, Config.DepthCreateFlags));
Desc.NumSamples = Config.NumSamples;
// 创建深度纹理
SceneTextures.Depth = GraphBuilder.CreateTexture(Desc, TEXT("SceneDepthZ"));
if (Desc.NumSamples > 1)
{
Desc.NumSamples = 1;
if ((StereoDepthRHI = FindStereoDepthTexture(Config.bSupportsXRTargetManagerDepthAlloc, Config.Extent, ETextureCreateFlags::DepthStencilResolveTarget)) != nullptr)
{
ensureMsgf(Desc.ArraySize == StereoDepthRHI->GetDesc().ArraySize, TEXT("Resolve texture does not agree in dimensionality with Target (Resolve.ArraySize=%d, Target.ArraySize=%d)"),
Desc.ArraySize, StereoDepthRHI->GetDesc().ArraySize);
SceneTextures.Depth.Resolve = RegisterExternalTexture(GraphBuilder, StereoDepthRHI, TEXT("SceneDepthZ"));
}
else
{
SceneTextures.Depth.Resolve = GraphBuilder.CreateTexture(Desc, TEXT("SceneDepthZ"));
}
}
// 创建模板SRV
SceneTextures.Stencil = GraphBuilder.CreateSRV(FRDGTextureSRVDesc::CreateWithPixelFormat(SceneTextures.Depth.Target, PF_X24_G8));
}
// Scene Color 场景颜色贴图
{
const bool bIsMobilePlatform = Config.ShadingPath == EShadingPath::Mobile;
const ETextureCreateFlags sRGBFlag = (bIsMobilePlatform && IsMobileColorsRGB()) ? TexCreate_SRGB : TexCreate_None;
// Create the scene color.
// TODO: Array-size could be values > 2, on upcoming XR devices. This should be part of the config.
FRDGTextureDesc Desc(Config.bRequireMultiView ?
FRDGTextureDesc::Create2DArray(Config.Extent, Config.ColorFormat, Config.ColorClearValue, Config.ColorCreateFlags, 2) :
FRDGTextureDesc::Create2D(Config.Extent, Config.ColorFormat, Config.ColorClearValue, Config.ColorCreateFlags));
Desc.NumSamples = Config.NumSamples;
SceneTextures.Color = CreateTextureMSAA(GraphBuilder, Desc, TEXT("SceneColorMS"), TEXT("SceneColor"), GFastVRamConfig.SceneColor | sRGBFlag);
}
// Custom Depth 自定义深度
SceneTextures.CustomDepth = FCustomDepthTextures::Create(GraphBuilder, Config.Extent, Config.ShaderPlatform);
ViewFamily.bIsSceneTexturesInitialized = true;
}
FSceneTextures::InitializeViewFamily
我们看完了
FMinimalSceneTextures::InitializeViewFamily(GraphBuilder, ViewFamily);
然后回来
void FSceneTextures::InitializeViewFamily(FRDGBuilder& GraphBuilder, FViewFamilyInfo& ViewFamily)
{
const FSceneTexturesConfig& Config = ViewFamily.SceneTexturesConfig; // 拿到配置
FSceneTextures& SceneTextures = ViewFamily.SceneTextures;
FMinimalSceneTextures::InitializeViewFamily(GraphBuilder, ViewFamily);
// 延迟还是移动
if (Config.ShadingPath == EShadingPath::Deferred)
{
// Screen Space Ambient Occlusion 创建SSAO 的一些环境参数
SceneTextures.ScreenSpaceAO = CreateScreenSpaceAOTexture(GraphBuilder, Config.Extent);
// Small Depth 缩小范围
const FIntPoint SmallDepthExtent = GetDownscaledExtent(Config.Extent, Config.SmallDepthDownsampleFactor);
const FRDGTextureDesc SmallDepthDesc(FRDGTextureDesc::Create2D(SmallDepthExtent, PF_DepthStencil, FClearValueBinding::None, TexCreate_DepthStencilTargetable | TexCreate_ShaderResource));
SceneTextures.SmallDepth = GraphBuilder.CreateTexture(SmallDepthDesc, TEXT("SmallDepthZ"));
}
else
{
// Mobile Screen Space Ambient Occlusion 移动端的SSAO
SceneTextures.ScreenSpaceAO = CreateMobileScreenSpaceAOTexture(GraphBuilder, Config);
if (Config.MobilePixelProjectedReflectionExtent != FIntPoint::ZeroValue)
{
SceneTextures.PixelProjectedReflection = CreateMobilePixelProjectedReflectionTexture(GraphBuilder, Config.MobilePixelProjectedReflectionExtent);
}
}
// Velocity
SceneTextures.Velocity = GraphBuilder.CreateTexture(FVelocityRendering::GetRenderTargetDesc(Config.ShaderPlatform, Config.Extent), TEXT("SceneVelocity"));
// 创建 GBuffer
if (Config.bIsUsingGBuffers)
{
ETextureCreateFlags FlagsToAdd = TexCreate_None;
const FGBufferBindings& Bindings = Config.GBufferBindings[GBL_Default];
if (Bindings.GBufferA.Index >= 0)
{
const FRDGTextureDesc Desc(FRDGTextureDesc::Create2D(Config.Extent, Bindings.GBufferA.Format, FClearValueBinding::Transparent, Bindings.GBufferA.Flags | FlagsToAdd | GFastVRamConfig.GBufferA));
SceneTextures.GBufferA = GraphBuilder.CreateTexture(Desc, TEXT("GBufferA"));
}
if (Bindings.GBufferB.Index >= 0)
{
const FRDGTextureDesc Desc(FRDGTextureDesc::Create2D(Config.Extent, Bindings.GBufferB.Format, FClearValueBinding::Transparent, Bindings.GBufferB.Flags | FlagsToAdd | GFastVRamConfig.GBufferB));
SceneTextures.GBufferB = GraphBuilder.CreateTexture(Desc, TEXT("GBufferB"));
}
if (Bindings.GBufferC.Index >= 0)
{
const FRDGTextureDesc Desc(FRDGTextureDesc::Create2D(Config.Extent, Bindings.GBufferC.Format, FClearValueBinding::Transparent, Bindings.GBufferC.Flags | FlagsToAdd | GFastVRamConfig.GBufferC));
SceneTextures.GBufferC = GraphBuilder.CreateTexture(Desc, TEXT("GBufferC"));
}
if (Bindings.GBufferD.Index >= 0)
{
const FRDGTextureDesc Desc(FRDGTextureDesc::Create2D(Config.Extent, Bindings.GBufferD.Format, FClearValueBinding::Transparent, Bindings.GBufferD.Flags | FlagsToAdd | GFastVRamConfig.GBufferD));
SceneTextures.GBufferD = GraphBuilder.CreateTexture(Desc, TEXT("GBufferD"));
}
if (Bindings.GBufferE.Index >= 0)
{
const FRDGTextureDesc Desc(FRDGTextureDesc::Create2D(Config.Extent, Bindings.GBufferE.Format, FClearValueBinding::Transparent, Bindings.GBufferE.Flags | FlagsToAdd | GFastVRamConfig.GBufferE));
SceneTextures.GBufferE = GraphBuilder.CreateTexture(Desc, TEXT("GBufferE"));
}
// GBufferF is not yet part of the data driven GBuffer info.
if (Config.ShadingPath == EShadingPath::Deferred)
{
ETextureCreateFlags GBufferFCreateFlags;
EPixelFormat GBufferFPixelFormat = GetGBufferFFormatAndCreateFlags(GBufferFCreateFlags);
const FRDGTextureDesc Desc = FRDGTextureDesc::Create2D(Config.Extent, GBufferFPixelFormat, FClearValueBinding({ 0.5f, 0.5f, 0.5f, 0.5f }), GBufferFCreateFlags | FlagsToAdd);
SceneTextures.GBufferF = GraphBuilder.CreateTexture(Desc, TEXT("GBufferF"));
}
}
// 移动端 且
if (Config.ShadingPath == EShadingPath::Mobile && MobileRequiresSceneDepthAux(Config.ShaderPlatform))
{
const float FarDepth = (float)ERHIZBuffer::FarPlane;
const FLinearColor FarDepthColor(FarDepth, FarDepth, FarDepth, FarDepth);
ETextureCreateFlags MemorylessFlag = TexCreate_None;
// 是否开启了移动端延迟着色
if (IsMobileDeferredShadingEnabled(Config.ShaderPlatform) || (Config.NumSamples > 1 && Config.bMemorylessMSAA))
{
// hotfix for a crash on a Mac mobile preview, proper fix is in 5.2
#if !PLATFORM_MAC
MemorylessFlag = TexCreate_Memoryless;
#endif
}
// 创建 DepthAux
EPixelFormat DepthAuxFormat = GetMobileSceneDepthAuxPixelFormat(Config.ShaderPlatform, Config.bPreciseDepthAux); // 像素格式
FRDGTextureDesc Desc = Config.bRequireMultiView ?
FRDGTextureDesc::Create2DArray(Config.Extent, DepthAuxFormat, FClearValueBinding(FarDepthColor), TexCreate_RenderTargetable | TexCreate_ShaderResource | TexCreate_InputAttachmentRead | MemorylessFlag, 2) :
FRDGTextureDesc::Create2D(Config.Extent, DepthAuxFormat, FClearValueBinding(FarDepthColor), TexCreate_RenderTargetable | TexCreate_ShaderResource | TexCreate_InputAttachmentRead| MemorylessFlag);
Desc.NumSamples = Config.NumSamples;
SceneTextures.DepthAux = CreateTextureMSAA(GraphBuilder, Desc, TEXT("SceneDepthAuxMS"), TEXT("SceneDepthAux"));
}
// 编辑器相关
#if WITH_EDITOR
{
const FRDGTextureDesc ColorDesc(FRDGTextureDesc::Create2D(Config.Extent, PF_B8G8R8A8, FClearValueBinding::Transparent, TexCreate_ShaderResource | TexCreate_RenderTargetable, 1, Config.EditorPrimitiveNumSamples));
SceneTextures.EditorPrimitiveColor = GraphBuilder.CreateTexture(ColorDesc, TEXT("Editor.PrimitivesColor"));
const FRDGTextureDesc DepthDesc(FRDGTextureDesc::Create2D(Config.Extent, PF_DepthStencil, FClearValueBinding::DepthFar, TexCreate_ShaderResource | TexCreate_DepthStencilTargetable, 1, Config.EditorPrimitiveNumSamples));
SceneTextures.EditorPrimitiveDepth = GraphBuilder.CreateTexture(DepthDesc, TEXT("Editor.PrimitivesDepth"));
}
#endif
// 移动端 Prepass Local Lights 光照贴图
extern bool MobileForwardEnablePrepassLocalLights(const FStaticShaderPlatform Platform);
if(MobileForwardEnablePrepassLocalLights(Config.ShaderPlatform))
{
FRDGTextureDesc MobileLocalLightTextureADesc = FRDGTextureDesc::Create2D(Config.Extent, PF_FloatR11G11B10, FClearValueBinding::Transparent, TexCreate_RenderTargetable | TexCreate_ShaderResource);
SceneTextures.MobileLocalLightTextureA = GraphBuilder.CreateTexture(MobileLocalLightTextureADesc, TEXT("MobileLocalLightTextureA"));
FRDGTextureDesc MobileLocalLightTextureBDesc = FRDGTextureDesc::Create2D(Config.Extent, PF_A2B10G10R10, FClearValueBinding::Transparent, TexCreate_RenderTargetable | TexCreate_ShaderResource);
SceneTextures.MobileLocalLightTextureB = GraphBuilder.CreateTexture(MobileLocalLightTextureBDesc, TEXT("MobileLocalLightTextureB"));
}
#if WITH_DEBUG_VIEW_MODES
if (AllowDebugViewShaderMode(DVSM_QuadComplexity, Config.ShaderPlatform, Config.FeatureLevel))
{
FIntPoint QuadOverdrawExtent;
QuadOverdrawExtent.X = 2 * FMath::Max<uint32>((Config.Extent.X + 1) / 2, 1); // The size is time 2 since left side is QuadDescriptor, and right side QuadComplexity.
QuadOverdrawExtent.Y = FMath::Max<uint32>((Config.Extent.Y + 1) / 2, 1);
const FRDGTextureDesc QuadOverdrawDesc(FRDGTextureDesc::Create2D(QuadOverdrawExtent, PF_R32_UINT, FClearValueBinding::None, TexCreate_ShaderResource | TexCreate_RenderTargetable | TexCreate_UAV));
SceneTextures.QuadOverdraw = GraphBuilder.CreateTexture(QuadOverdrawDesc, TEXT("QuadOverdrawTexture"));
}
#endif
}
FDeferredShadingSceneRenderer::Render
回来,我们这里看完了 场景贴图的初始化
void FDeferredShadingSceneRenderer::Render(FRDGBuilder& GraphBuilder)
{
// 。。。
FSceneTextures::InitializeViewFamily(GraphBuilder, ViewFamily);
FSceneTextures& SceneTextures = GetActiveSceneTextures();
准备一些变量
const bool bUseGBuffer = IsUsingGBuffers(ShaderPlatform);
const bool bRenderDeferredLighting = ViewFamily.EngineShowFlags.Lighting
&& FeatureLevel >= ERHIFeatureLevel::SM5
&& ViewFamily.EngineShowFlags.DeferredLighting
&& bUseGBuffer
&& !bHasRayTracedOverlay;
bool bComputeLightGrid = false;
if (RendererOutput == ERendererOutput::FinalSceneColor)
{
if (bUseVirtualTexturing) // 使用虚拟纹理,包含颜色反射率,粗糙度等信息,能模拟木材金属。把这些信息应用在三维模型的表面。比如一个山模型,和地面衔接的地方用上虚拟纹理过度更自然
{
// Note, should happen after the GPU-Scene update to ensure rendering to runtime virtual textures is using the correctly updated scene
FVirtualTextureSystem::Get().EndUpdate(GraphBuilder, MoveTemp(VirtualTextureUpdater), FeatureLevel);
}
// 光线追踪
#if RHI_RAYTRACING
GatherRayTracingWorldInstancesForView(GraphBuilder, ReferenceView, RayTracingScene, InitViewTaskDatas.RayTracingRelevantPrimitives);
#endif // RHI_RAYTRACING
// 。。。
}
FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView
给视图添加光线追踪的实例列表,并将光线追踪的数据添加到视图中
开始都是一些检测
bool FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView(FRDGBuilder& GraphBuilder, FViewInfo& View, FRayTracingScene& RayTracingScene, FRayTracingRelevantPrimitiveTaskData* RayTracingRelevantPrimitiveTaskData)
{
TRACE_CPUPROFILER_EVENT_SCOPE(FDeferredShadingSceneRenderer::GatherRayTracingWorldInstances);
SCOPE_CYCLE_COUNTER(STAT_GatherRayTracingWorldInstances);
if (!bAnyRayTracingPassEnabled)
{
return false;
}
check(RayTracingRelevantPrimitiveTaskData)
// Wait until RayTracingRelevantPrimitiveList is ready
// 等待 这个列表准备好了
if (RayTracingRelevantPrimitiveTaskData->Task.IsValid())
{
RayTracingRelevantPrimitiveTaskData->Task->Wait();
RayTracingRelevantPrimitiveTaskData->Task.SafeRelease();
}
FRayTracingRelevantPrimitiveList& RelevantPrimitiveList = RayTracingRelevantPrimitiveTaskData->List;
// Prepare ray tracing scene instance list
checkf(RelevantPrimitiveList.bValid, TEXT("Ray tracing relevant primitive list is expected to have been created before GatherRayTracingWorldInstancesForView() is called."));
// Check that any invalidated cached uniform expressions have been updated on the rendering thread.
// Normally this work is done through FMaterialRenderProxy::UpdateUniformExpressionCacheIfNeeded,
// however ray tracing material processing (FMaterialShader::GetShaderBindings, which accesses UniformExpressionCache)
// is done on task threads, therefore all work must be done here up-front as UpdateUniformExpressionCacheIfNeeded is not free-threaded.
check(!FMaterialRenderProxy::HasDeferredUniformExpressionCacheRequests());
bool FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView(FRDGBuilder& GraphBuilder, FViewInfo& View, FRayTracingScene& RayTracingScene, FRayTracingRelevantPrimitiveTaskData* RayTracingRelevantPrimitiveTaskData)
{
// 。。。
RayTracingCollector.ClearViewMeshArrays();
看看这个收集器
/**
* Encapsulates the gathering of meshes from the various FPrimitiveSceneProxy classes.
*/
class FMeshElementCollector
{
public:
TChunkedArray<FMeshBatch> MeshBatchStorage; // mesh批次的储存
/**
* A batch of mesh elements, all with the same material and vertex buffer
*/
struct FMeshBatch
{
TArray<FMeshBatchElement,TInlineAllocator<1> > Elements;
const FVertexFactory* VertexFactory; // 顶点工厂
const FMaterialRenderProxy* MaterialRenderProxy; //
const FLightCacheInterface* LCI;
FHitProxyId BatchHitProxyId; // 命中代理ID
float TessellationDisablingShadowMapMeshSize; // This is the threshold that will be used to know if we should use this mesh batch or use one with no tessellation enabled
uint16 MeshIdInPrimitive; // Mesh Id in a primitive. Used for stable sorting of draws belonging to the same primitive
int8 LODIndex;
uint8 SegmentIndex;
/**
* Pass feature relevance flags. Allows a proxy to submit fast representations for passes which can take advantage of it,
* for example separate index buffer for depth-only rendering since vertices can be merged based on position and ignore UV differences.
*/
// 下面是一些 【Pass feature relevance flags】
uint32 CastShadow : 1; // Whether it can be used in shadow renderpasses.
uint32 bUseForMaterial : 1; // Whether it can be used in renderpasses requiring material outputs.
uint32 bUseForDepthPass : 1; // Whether it can be used in depth pass.
uint32 bUseAsOccluder : 1; // Hint whether this mesh is a good occluder.
uint32 bWireframe : 1;
uint32 Type : PT_NumBits; // e.g. PT_TriangleList(default), PT_LineList, ..
uint32 DepthPriorityGroup : SDPG_NumBits; // e.g. SDPG_World (default), SDPG_Foreground
uint32 bCanApplyViewModeOverrides : 1; /** Whether view mode overrides can be applied to this mesh eg unlit, wireframe. */
uint32 bUseWireframeSelectionColoring : 1;
uint32 bUseSelectionOutline : 1;
uint32 bSelectable : 1; /** Whether the mesh batch can be selected through editor selection, aka hit proxies. */
uint32 bDitheredLODTransition : 1; /** Whether the mesh batch should apply dithered LOD. */
uint32 bRenderToVirtualTexture : 1; /** Whether the mesh batch can be rendered to virtual textures. */
uint32 RuntimeVirtualTextureMaterialType : RuntimeVirtualTexture::MaterialType_NumBits; /** What virtual texture material type this mesh batch should be rendered with. */
uint32 bOverlayMaterial : 1; /** Whether mesh is rendered with overlay material. */
#if RHI_RAYTRACING
uint32 CastRayTracedShadow : 1; // Whether it casts ray traced shadow.
#endif
uint32 bViewDependentArguments : 1; // Whether mesh has a view dependent draw arguments.
#if UE_ENABLE_DEBUG_DRAWING
int8 VisualizeHLODIndex; /** Conceptual HLOD index used for the HLOD Coloration visualization. */
int8 VisualizeLODIndex; /** Conceptual LOD index used for the LOD Coloration visualization. */
#endif
/**
* A batch mesh element definition.
*/
struct FMeshBatchElement
{
FRHIUniformBuffer* PrimitiveUniformBuffer; // buff
const TUniformBuffer<FPrimitiveUniformShaderParameters>* PrimitiveUniformBufferResource; // shader参数
FUniformBufferRHIRef LooseParametersUniformBuffer; /** Uniform buffer containing the "loose" parameters that aren't wrapped in other uniform buffers. Those parameters can be unique per mesh batch, e.g. view dependent. */
const FIndexBuffer* IndexBuffer; // 绘制到那个batch的 Index The index buffer to draw the mesh batch with.
FMeshBatchElementDynamicIndexBuffer DynamicIndexBuffer;
union
{
/** If !bIsSplineProxy, Instance runs, where number of runs is specified by NumInstances. Run structure is [StartInstanceIndex, EndInstanceIndex]. */
uint32* InstanceRuns;
/** If bIsSplineProxy, a pointer back to the proxy */
const class FSplineMeshSceneProxy* SplineMeshSceneProxy;
};
const void* UserData;
void* VertexFactoryUserData; // Meaning depends on the vertex factory, e.g. FGPUSkinPassthroughVertexFactory: element index in FGPUSkinCache::CachedElements
FRHIBuffer* IndirectArgsBuffer; // 间接参数的缓冲区
uint32 IndirectArgsOffset; // 间接参数的偏移量
EPrimitiveIdMode PrimitiveIdMode : PrimID_NumBits + 1; // 图元的模式,如何获取 PrimitiveId 的数据方式
uint32 FirstIndex;
uint32 NumPrimitives;
/** Number of instances to draw. If InstanceRuns is valid, this is actually the number of runs in InstanceRuns. */
uint32 NumInstances;
uint32 BaseVertexIndex;
uint32 MinVertexIndex;
uint32 MaxVertexIndex;
int32 UserIndex;
float MinScreenSize;
float MaxScreenSize;
uint32 InstancedLODIndex : 4;
uint32 InstancedLODRange : 4;
uint32 bUserDataIsColorVertexBuffer : 1;
uint32 bIsSplineProxy : 1;
uint32 bIsInstanceRuns : 1;
uint32 bForceInstanceCulling : 1;
uint32 bPreserveInstanceOrder : 1;
// 动态图元的数据,
const FMeshBatchDynamicPrimitiveData* DynamicPrimitiveData;
uint32 DynamicPrimitiveIndex;
uint32 DynamicPrimitiveInstanceSceneDataOffset;
回都这个收集器
/**
* Encapsulates the gathering of meshes from the various FPrimitiveSceneProxy classes.
*/
class FMeshElementCollector
{
public:
TChunkedArray<FMeshBatch> MeshBatchStorage; // mesh批次的储存
TArray<TArray<FMeshBatchAndRelevance, SceneRenderingAllocator>*, TInlineAllocator<2, SceneRenderingAllocator> > MeshBatches;
/**
* A reference to a mesh batch that is added to the collector, together with some cached relevance flags.
*/
struct FMeshBatchAndRelevance
{
const FMeshBatch* Mesh; // 渲染的是什么
const FPrimitiveSceneProxy* PrimitiveSceneProxy; /** The render info for the primitive which created this mesh, required. */
private:
// 状态,不透明材质,有遮罩的材质,在主pass中渲染
uint32 bHasOpaqueMaterial : 1;
uint32 bHasMaskedMaterial : 1;
uint32 bRenderInMainPass : 1;
又回来
/**
* Encapsulates the gathering of meshes from the various FPrimitiveSceneProxy classes.
*/
class FMeshElementCollector
{
public:
TChunkedArray<FMeshBatch> MeshBatchStorage; // mesh批次的储存
TArray<TArray<FMeshBatchAndRelevance, SceneRenderingAllocator>*, TInlineAllocator<2, SceneRenderingAllocator> > MeshBatches;
TArray<int32, TInlineAllocator<2, SceneRenderingAllocator> > NumMeshBatchElementsPerView; // 视口view中又多少个批次元素 /** Number of elements in gathered meshes per view. */
TArray<FSimpleElementCollector*, TInlineAllocator<2, SceneRenderingAllocator> > SimpleElementCollectors; // PDI(FPrimitiveDrawInterface),提供了接口如 SetHitProxy,DrawLine,DrawSprite 等,用于在渲染过程中绘制图元
TArray<FSceneView*, TInlineAllocator<2, SceneRenderingAllocator>> Views; // 收集了哪些视口
TArray<uint16, TInlineAllocator<2, SceneRenderingAllocator>> MeshIdInPrimitivePerView;
TArray<FMaterialRenderProxy*, SceneRenderingAllocator> TemporaryProxies; // Material proxies that will be deleted at the end of the frame.
FSceneRenderingBulkObjectAllocator& OneFrameResources; // Resources that will be deleted at the end of the frame
const FPrimitiveSceneProxy* PrimitiveSceneProxy; // Current primitive being gathered
/** Dynamic buffer pools. */
FGlobalDynamicIndexBuffer* DynamicIndexBuffer;
FGlobalDynamicVertexBuffer* DynamicVertexBuffer;
FGlobalDynamicReadBuffer* DynamicReadBuffer;
ERHIFeatureLevel::Type FeatureLevel;
/** Tracks dynamic primitive data for upload to GPU Scene for every view, when enabled. */
TArray<FGPUScenePrimitiveCollector*, TInlineAllocator<2, SceneRenderingAllocator>> DynamicPrimitiveCollectorPerView;
FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView
回来,我们简单看了一下这个 RayTracingCollector
bool FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView(FRDGBuilder& GraphBuilder, FViewInfo& View, FRayTracingScene& RayTracingScene, FRayTracingRelevantPrimitiveTaskData* RayTracingRelevantPrimitiveTaskData)
{
// 。。。
RayTracingCollector.ClearViewMeshArrays();
// 添加,从view里面拿
FGPUScenePrimitiveCollector DummyDynamicPrimitiveCollector;
RayTracingCollector.AddViewMeshArrays(
&View,
&View.RayTracedDynamicMeshElements,
&View.SimpleElementCollector,
&DummyDynamicPrimitiveCollector,
ViewFamily.GetFeatureLevel(),
&DynamicIndexBufferForInitViews,
&DynamicVertexBufferForInitViews,
&DynamicReadBufferForInitViews
);
// 预留空间
View.DynamicRayTracingMeshCommandStorage.Reserve(Scene->Primitives.Num());
View.VisibleRayTracingMeshCommands.Reserve(Scene->Primitives.Num());
// 用户扩展
extern TSet<IPersistentViewUniformBufferExtension*> PersistentViewUniformBufferExtensions;
for (IPersistentViewUniformBufferExtension* Extension : PersistentViewUniformBufferExtensions)
{
Extension->BeginRenderView(&View);
}
// 创建 FRayTracingMeshResourceCollector 用于材质网格批次的收集资源
View.RayTracingMeshResourceCollector = MakeUnique<FRayTracingMeshResourceCollector>(
Scene->GetFeatureLevel(),
Allocator,
&DynamicIndexBufferForInitViews,
&DynamicVertexBufferForInitViews,
&DynamicReadBufferForInitViews);
// 初始化光线追踪裁剪参数
View.RayTracingCullingParameters.Init(View);
// 材质收集的上下文
FDeferredShadingRayTracingMaterialGatheringContext MaterialGatheringContext
(
Scene,
&View,
ViewFamily,
GraphBuilder,
*View.RayTracingMeshResourceCollector
);
const float CurrentWorldTime = View.Family->Time.GetWorldTimeSeconds();
// Consume output of the relevant primitive gathering task // CoarseMesh 粗网格
RayTracingScene.UsedCoarseMeshStreamingHandles = MoveTemp(RelevantPrimitiveList.UsedCoarseMeshStreamingHandles);
// Inform the coarse mesh streaming manager about all the used streamable render assets in the scene
Nanite::FCoarseMeshStreamingManager* CoarseMeshSM = IStreamingManager::Get().GetNaniteCoarseMeshStreamingManager();
if (CoarseMeshSM)
{
CoarseMeshSM->AddUsedStreamingHandles(RayTracingScene.UsedCoarseMeshStreamingHandles);
}
// 看到stat就是统计相关的了
INC_DWORD_STAT_BY(STAT_VisibleRayTracingPrimitives, RelevantPrimitiveList.DynamicPrimitives.Num() + RelevantPrimitiveList.StaticPrimitives.Num());
// 。。。
{
TRACE_CPUPROFILER_EVENT_SCOPE(GatherRayTracingWorldInstances_DynamicElements);
const bool bParallelMeshBatchSetup = GRayTracingParallelMeshBatchSetup && FApp::ShouldUseThreadingForPerformance(); // 是否使用并行网格的批处理设置
const int64 SharedBufferGenerationID = Scene->GetRayTracingDynamicGeometryCollection()->BeginUpdate(); // 清理
struct FRayTracingMeshBatchWorkItem // 工作的item,应该是并行处理的结构
{
const FPrimitiveSceneProxy* SceneProxy = nullptr;
TArray<FMeshBatch> MeshBatchesOwned;
TArrayView<const FMeshBatch> MeshBatchesView;
uint32 InstanceIndex;
uint32 DecalInstanceIndex;
TArrayView<const FMeshBatch> GetMeshBatches() const
{
if (MeshBatchesOwned.Num())
{
check(MeshBatchesView.Num() == 0);
return TArrayView<const FMeshBatch>(MeshBatchesOwned);
}
else
{
check(MeshBatchesOwned.Num() == 0);
return MeshBatchesView;
}
}
};
static constexpr uint32 MaxWorkItemsPerPage = 128; // Try to keep individual pages small to avoid slow-path memory allocations
struct FRayTracingMeshBatchTaskPage
{
FRayTracingMeshBatchWorkItem WorkItems[MaxWorkItemsPerPage];
uint32 NumWorkItems = 0;
FRayTracingMeshBatchTaskPage* Next = nullptr; // 链表
};
FRayTracingMeshBatchTaskPage* MeshBatchTaskHead = nullptr;
FRayTracingMeshBatchTaskPage* MeshBatchTaskPage = nullptr;
uint32 NumPendingMeshBatches = 0;
const uint32 RayTracingParallelMeshBatchSize = GRayTracingParallelMeshBatchSize;
auto KickRayTracingMeshBatchTask = [&Allocator = Allocator, &View, &MeshBatchTaskHead, &MeshBatchTaskPage, &NumPendingMeshBatches, Scene = this->Scene]()
{
// 。。。这个内容后面再看吧。。。
}
// Local temporary array of instances used for GetDynamicRayTracingInstances() 临时的光线追踪实例
TArray<FRayTracingInstance> TempRayTracingInstances;
// 可以看到for所有动态图元,然后执行上面那个lambda
for (const FRayTracingRelevantPrimitive& RelevantPrimitive : RelevantPrimitiveList.DynamicPrimitives)
{
const int32 PrimitiveIndex = RelevantPrimitive.PrimitiveIndex;
FPrimitiveSceneInfo* SceneInfo = Scene->Primitives[PrimitiveIndex];
// 清除
FPrimitiveSceneProxy* SceneProxy = Scene->PrimitiveSceneProxies[PrimitiveIndex];
TempRayTracingInstances.Reset();
MaterialGatheringContext.DynamicRayTracingGeometriesToUpdate.Reset();
// 收集实例
SceneProxy->GetDynamicRayTracingInstances(MaterialGatheringContext, TempRayTracingInstances);
// 添加到场景的 FRayTracingDynamicGeometryCollection 光线追踪集合体收集器里面
for (const FRayTracingDynamicGeometryUpdateParams& DynamicRayTracingGeometryUpdate : MaterialGatheringContext.DynamicRayTracingGeometriesToUpdate)
{
Scene->GetRayTracingDynamicGeometryCollection()->AddDynamicMeshBatchForGeometryUpdate(
GraphBuilder.RHICmdList,
Scene,
&View,
SceneProxy,
DynamicRayTracingGeometryUpdate,
PrimitiveIndex
);
}
if (TempRayTracingInstances.Num() > 0)
{
// 。。。
}
}
KickRayTracingMeshBatchTask();
}
if (TempRayTracingInstances.Num() > 0)
看这个前先看看类
这是个array FRayTracingInstance 这个对象
struct FRayTracingInstance
{
const FRayTracingGeometry* Geometry; // 几何体 /** The underlying geometry of this instance specification. */
TArray<FMeshBatch> Materials; // 每个几何段的材质信息,
TArrayView<const FMeshBatch> MaterialsView; /** 上面变量的引用 Similar to Materials, but memory is owned by someone else (i.g. FPrimitiveSceneProxy). */
FRayTracingMaskAndFlags MaskAndFlags; // 标志位状态
bool bApplyLocalBoundsTransform = false; // Whether local bounds scale and center translation should be applied to the instance transform
bool bThinGeometry = false; // Whether the instance is thin(薄) geometry (e.g., Hair strands 发丝)
ERayTracingInstanceLayer InstanceLayer = ERayTracingInstanceLayer::NearField; // 实例的层级,近距离场,远距离场
bool bInstanceMaskAndFlagsDirty = true;
uint32 NumTransforms = 0;
TArray<FMatrix> InstanceTransforms;
TArrayView<const FMatrix> InstanceTransformsView; // 实例矩阵的访问方式
FShaderResourceViewRHIRef InstanceGPUTransformsSRV; // When instance transforms are only available in GPU, this SRV holds them.
/** A ray tracing geometry resource */
class FRayTracingGeometry : public FRenderResource
{
public:
TResourceArray<uint8> RawData; // 数据
#if RHI_RAYTRACING
/** When set to NonSharedVertexBuffers, then shared vertex buffers are not used */
static constexpr int64 NonSharedVertexBuffers = -1;
/**
Vertex buffers for dynamic geometries may be sub-allocated from a shared pool, which is periodically reset and its generation ID is incremented.
Geometries that use the shared buffer must be updated (rebuilt or refit) before they are used for rendering after the pool is reset.
This is validated by comparing the current shared pool generation ID against generation IDs stored in FRayTracingGeometry during latest update.
*/
int64 DynamicGeometrySharedBufferGenerationID = NonSharedVertexBuffers; // 动态几何shader 缓冲池 ID
FRayTracingGeometryInitializer Initializer; // 初始化信息
FRayTracingGeometryRHIRef RayTracingGeometryRHI; // 几何体的RHI
enum class EGeometryStateFlags : uint32 // Flags for tracking the state of RayTracingGeometryRHI.
{
Invalid = 0, // Initial state when the geometry was not created or was created for streaming but not yet streamed in.
RequiresBuild = 1 << 0, // If the geometry needs to be built.
Valid = 1 << 1, // If the geometry was successfully created or streamed in.
StreamedIn = 1 << 2 // Special flag that is used when ray tracing is dynamic to mark the streamed geometry to be recreated when ray tracing is switched on. Only set when mesh streaming is used.
};
FRIEND_ENUM_CLASS_FLAGS(EGeometryStateFlags);
int8 LODIndex = -1;
int32 RayTracingBuildRequestIndex = INDEX_NONE;
int32 RayTracingGeometryHandle = INDEX_NONE; // Only valid when ray tracing is dynamic
EGeometryStateFlags GeometryState = EGeometryStateFlags::Invalid;
if (TempRayTracingInstances.Num() > 0)
{
// 。。。
}
这个if有点多,单独拿出来看一下
if (TempRayTracingInstances.Num() > 0)
{
for (FRayTracingInstance& Instance : TempRayTracingInstances)
{
const FRayTracingGeometry* Geometry = Instance.Geometry; // 遍历所有实例,获取几何体
// 验证
if (!ensureMsgf(Geometry->DynamicGeometrySharedBufferGenerationID == FRayTracingGeometry::NonSharedVertexBuffers
|| Geometry->DynamicGeometrySharedBufferGenerationID == SharedBufferGenerationID,
TEXT("GenerationID %lld, but expected to be %lld or %lld. Geometry debug name: '%s'. ")
TEXT("When shared vertex buffers are used, the contents is expected to be written every frame. ")
TEXT("Possibly AddDynamicMeshBatchForGeometryUpdate() was not called for this geometry."),
Geometry->DynamicGeometrySharedBufferGenerationID, SharedBufferGenerationID, FRayTracingGeometry::NonSharedVertexBuffers,
*Geometry->Initializer.DebugName.ToString()))
{
continue;
}
// If geometry still has pending build request then add to list which requires a force build
// 有构建的请求,就加进来后面处理
if (Geometry->HasPendingBuildRequest())
{
RayTracingScene.GeometriesToBuild.Add(Geometry);
}
// Validate the material/segment counts 验证材质的段数
if (!ensureMsgf(Instance.GetMaterials().Num() == Geometry->Initializer.Segments.Num() ||
(Geometry->Initializer.Segments.Num() == 0 && Instance.GetMaterials().Num() == 1),
TEXT("Ray tracing material assignment validation failed for geometry '%s'. "
"Instance.GetMaterials().Num() = %d, Geometry->Initializer.Segments.Num() = %d, Instance.Mask = 0x%X."),
*Geometry->Initializer.DebugName.ToString(), Instance.GetMaterials().Num(),
Geometry->Initializer.Segments.Num(), Instance.MaskAndFlags.Mask))
{
continue;
}
// Autobuild of InstanceMaskAndFlags if the mask and flags are not built。更新flag
UpdateRayTracingInstanceMaskAndFlagsIfNeeded(Instance, *SceneProxy, &ViewFamily);
// if primitive has mixed decal and non-decal segments we need to have two ray tracing instances
// one containing non-decal segments and the other with decal segments
// masking of segments is done using "hidden" hitgroups
// TODO: Debug Visualization to highlight primitives using this?
// 需要分离贴花实例 Separate Decal Instance
const bool bNeedSeparateDecalInstance = Instance.MaskAndFlags.bAnySegmentsDecal && !Instance.MaskAndFlags.bAllSegmentsDecal;
// 要不要追踪贴画
if (GRayTracingExcludeDecals && Instance.MaskAndFlags.bAnySegmentsDecal && !bNeedSeparateDecalInstance)
{
continue;
}
// 后面一大段都是赋值这个属性,所以我们看看这个类
FRayTracingGeometryInstance RayTracingInstance;
// 。。。
英文注释很详细
/**
* High level descriptor of one or more instances of a mesh in a ray tracing scene.
* All instances covered by this descriptor will share shader bindings, but may have different transforms and user data.
*/
struct FRayTracingGeometryInstance
{
FRHIRayTracingGeometry* GeometryRHI = nullptr;
// A single physical mesh may be duplicated many times in the scene with different transforms and user data.
// 单个物理网格可能会在场景中重复多次,并具有不同的变换和用户数据
// All copies share the same shader binding table entries and therefore will have the same material and shader resources.
// 所有副本共享相同的着色器绑定表条目,因此将具有相同的材质和着色器资源。
TArrayView<const FMatrix> Transforms;
// Offsets into the scene's instance scene data buffer used to get instance transforms from GPUScene
// If BaseInstanceSceneDataOffset != -1, instances are assumed to be continuous.
int32 BaseInstanceSceneDataOffset = -1;
TArrayView<const uint32> InstanceSceneDataOffsets;
// Optional buffer that stores GPU transforms. Used instead of CPU-side transform data.
FShaderResourceViewRHIRef GPUTransformsSRV = nullptr;
// Conservative number of instances. Some of the actual instances may be made inactive if GPU transforms are used.
// Must be less or equal to number of entries in Transforms view if CPU transform data is used.
// Must be less or equal to number of entries in GPUTransformsSRV if it is non-null.
uint32 NumTransforms = 0;
// Each geometry copy can receive a user-provided integer, which can be used to retrieve extra shader parameters or customize appearance.
// This data can be retrieved using GetInstanceUserData() in closest/any hit shaders.
// If UserData view is empty, then DefaultUserData value will be used for all instances.
// If UserData view is used, then it must have the same number of entries as NumInstances.
uint32 DefaultUserData = 0;
TArrayView<const uint32> UserData;
// Each geometry copy can have one bit to make it individually deactivated (removed from TLAS while maintaining hit group indexing). Useful for culling.
TArrayView<const uint32> ActivationMask;
// Whether local bounds scale and center translation should be applied to the instance transform.
bool bApplyLocalBoundsTransform = false;
// Mask that will be tested against one provided to TraceRay() in shader code.
// If binary AND of instance mask with ray mask is zero, then the instance is considered not intersected / invisible.
uint8 Mask = 0xFF;
uint8 LayerIndex = 0;
// Flags to control triangle back face culling, whether to allow any-hit shaders, etc.
ERayTracingInstanceFlags Flags = ERayTracingInstanceFlags::None;
};
if (TempRayTracingInstances.Num() > 0)
{
for (FRayTracingInstance& Instance : TempRayTracingInstances)
{
// 。。。
// 这一大段都是为了填充这个 RayTracingInstance 并且最后调用 AddInstance
FRayTracingGeometryInstance RayTracingInstance;
RayTracingInstance.GeometryRHI = Geometry->RayTracingGeometryRHI;
checkf(RayTracingInstance.GeometryRHI, TEXT("Ray tracing instance must have a valid geometry."));
RayTracingInstance.DefaultUserData = PrimitiveIndex;
RayTracingInstance.bApplyLocalBoundsTransform = Instance.bApplyLocalBoundsTransform;
RayTracingInstance.LayerIndex = (uint8)(Instance.MaskAndFlags.bAnySegmentsDecal && !bNeedSeparateDecalInstance ? ERayTracingSceneLayer::Decals : ERayTracingSceneLayer::Base);
RayTracingInstance.Mask = Instance.MaskAndFlags.Mask;
if (Instance.MaskAndFlags.bForceOpaque)
{
RayTracingInstance.Flags |= ERayTracingInstanceFlags::ForceOpaque;
}
if (Instance.MaskAndFlags.bDoubleSided)
{
RayTracingInstance.Flags |= ERayTracingInstanceFlags::TriangleCullDisable;
}
AddDebugRayTracingInstanceFlags(RayTracingInstance.Flags);
if (Instance.InstanceGPUTransformsSRV.IsValid())
{
RayTracingInstance.NumTransforms = Instance.NumTransforms;
RayTracingInstance.GPUTransformsSRV = Instance.InstanceGPUTransformsSRV;
}
else
{
if (Instance.OwnsTransforms())
{
// Slow path: copy transforms to the owned storage
checkf(Instance.InstanceTransformsView.Num() == 0, TEXT("InstanceTransformsView is expected to be empty if using InstanceTransforms"));
TArrayView<FMatrix> SceneOwnedTransforms = RayTracingScene.Allocate<FMatrix>(Instance.InstanceTransforms.Num());
FMemory::Memcpy(SceneOwnedTransforms.GetData(), Instance.InstanceTransforms.GetData(), Instance.InstanceTransforms.Num() * sizeof(RayTracingInstance.Transforms[0]));
static_assert(std::is_same_v<decltype(SceneOwnedTransforms[0]), decltype(Instance.InstanceTransforms[0])>, "Unexpected transform type");
RayTracingInstance.NumTransforms = SceneOwnedTransforms.Num();
RayTracingInstance.Transforms = SceneOwnedTransforms;
}
else
{
// Fast path: just reference persistently-allocated transforms and avoid a copy
checkf(Instance.InstanceTransforms.Num() == 0, TEXT("InstanceTransforms is expected to be empty if using InstanceTransformsView"));
RayTracingInstance.NumTransforms = Instance.InstanceTransformsView.Num();
RayTracingInstance.Transforms = Instance.InstanceTransformsView;
}
}
// 光线追踪的实例
const uint32 InstanceIndex = RayTracingScene.AddInstance(RayTracingInstance, SceneProxy, true);
// 贴花的实例
uint32 DecalInstanceIndex = INDEX_NONE;
if (bNeedSeparateDecalInstance && !GRayTracingExcludeDecals)
{
FRayTracingGeometryInstance DecalRayTracingInstance = RayTracingInstance;
DecalRayTracingInstance.LayerIndex = (uint8)ERayTracingSceneLayer::Decals;
DecalInstanceIndex = RayTracingScene.AddInstance(MoveTemp(DecalRayTracingInstance), SceneProxy, true);
}
先看看这个lambda
auto KickRayTracingMeshBatchTask = [&Allocator = Allocator, &View, &MeshBatchTaskHead, &MeshBatchTaskPage, &NumPendingMeshBatches, Scene = this->Scene]()
{
// 处理光线追踪的任务,包括了任务创建,资源分配和执行,和最后的清除
if (MeshBatchTaskHead)
{
FDynamicRayTracingMeshCommandStorage* TaskDynamicCommandStorage = Allocator.Create<FDynamicRayTracingMeshCommandStorage>();
View.DynamicRayTracingMeshCommandStoragePerTask.Add(TaskDynamicCommandStorage);
FRayTracingMeshCommandOneFrameArray* TaskVisibleCommands = Allocator.Create<FRayTracingMeshCommandOneFrameArray>();
TaskVisibleCommands->Reserve(NumPendingMeshBatches);
View.VisibleRayTracingMeshCommandsPerTask.Add(TaskVisibleCommands);
View.AddRayTracingMeshBatchTaskList.Add(FFunctionGraphTask::CreateAndDispatchWhenReady(
[TaskDataHead = MeshBatchTaskHead, &View, Scene, TaskDynamicCommandStorage, TaskVisibleCommands]()
{
FTaskTagScope TaskTagScope(ETaskTag::EParallelRenderingThread);
TRACE_CPUPROFILER_EVENT_SCOPE(RayTracingMeshBatchTask);
FRayTracingMeshBatchTaskPage* Page = TaskDataHead;
const int32 ExpectedMaxVisibieCommands = TaskVisibleCommands->Max();
while (Page)
{
for (uint32 ItemIndex = 0; ItemIndex < Page->NumWorkItems; ++ItemIndex)
{
const FRayTracingMeshBatchWorkItem& WorkItem = Page->WorkItems[ItemIndex];
TArrayView<const FMeshBatch> MeshBatches = WorkItem.GetMeshBatches();
for (int32 SegmentIndex = 0; SegmentIndex < MeshBatches.Num(); SegmentIndex++)
{
const FMeshBatch& MeshBatch = MeshBatches[SegmentIndex];
FDynamicRayTracingMeshCommandContext CommandContext(
*TaskDynamicCommandStorage, *TaskVisibleCommands,
SegmentIndex, WorkItem.InstanceIndex, WorkItem.DecalInstanceIndex);
FMeshPassProcessorRenderState PassDrawRenderState;
FRayTracingMeshProcessor RayTracingMeshProcessor(&CommandContext, Scene, &View, PassDrawRenderState, Scene->CachedRayTracingMeshCommandsMode);
RayTracingMeshProcessor.AddMeshBatch(MeshBatch, 1, WorkItem.SceneProxy);
}
}
FRayTracingMeshBatchTaskPage* NextPage = Page->Next;
Page = NextPage;
}
check(ExpectedMaxVisibieCommands <= TaskVisibleCommands->Max());
}, TStatId(), nullptr, ENamedThreads::AnyThread));
}
MeshBatchTaskHead = nullptr;
MeshBatchTaskPage = nullptr;
NumPendingMeshBatches = 0;
};
if (TempRayTracingInstances.Num() > 0)
{
for (FRayTracingInstance& Instance : TempRayTracingInstances)
{
// 。。。
if (bParallelMeshBatchSetup)
{
if (NumPendingMeshBatches >= RayTracingParallelMeshBatchSize) // 待处理的数量 》 并行的批次数量 就处理
{
KickRayTracingMeshBatchTask();
}
// 需要创建新的任务页
if (MeshBatchTaskPage == nullptr || MeshBatchTaskPage->NumWorkItems == MaxWorkItemsPerPage)
{
FRayTracingMeshBatchTaskPage* NextPage = Allocator.Create<FRayTracingMeshBatchTaskPage>();
if (MeshBatchTaskHead == nullptr)
{
MeshBatchTaskHead = NextPage;
}
if (MeshBatchTaskPage)
{
MeshBatchTaskPage->Next = NextPage;
}
MeshBatchTaskPage = NextPage;
}
//
FRayTracingMeshBatchWorkItem& WorkItem = MeshBatchTaskPage->WorkItems[MeshBatchTaskPage->NumWorkItems];
MeshBatchTaskPage->NumWorkItems++;
NumPendingMeshBatches += Instance.GetMaterials().Num();
if (Instance.OwnsMaterials())
{
Swap(WorkItem.MeshBatchesOwned, Instance.Materials);
}
else
{
WorkItem.MeshBatchesView = Instance.MaterialsView;
}
WorkItem.SceneProxy = SceneProxy;
WorkItem.InstanceIndex = InstanceIndex;
WorkItem.DecalInstanceIndex = DecalInstanceIndex;
}
else
{
TArrayView<const FMeshBatch> InstanceMaterials = Instance.GetMaterials();
for (int32 SegmentIndex = 0; SegmentIndex < InstanceMaterials.Num(); SegmentIndex++)
{
const FMeshBatch& MeshBatch = InstanceMaterials[SegmentIndex];
FDynamicRayTracingMeshCommandContext CommandContext(View.DynamicRayTracingMeshCommandStorage, View.VisibleRayTracingMeshCommands, SegmentIndex, InstanceIndex, DecalInstanceIndex);
FMeshPassProcessorRenderState PassDrawRenderState;
FRayTracingMeshProcessor RayTracingMeshProcessor(&CommandContext, Scene, &View, PassDrawRenderState, Scene->CachedRayTracingMeshCommandsMode);
RayTracingMeshProcessor.AddMeshBatch(MeshBatch, 1, SceneProxy);
}
}
} // for 结束
if (CVarRayTracingDynamicGeometryLastRenderTimeUpdateDistance.GetValueOnRenderThread() > 0.0f)
{
if (FVector::Distance(SceneProxy->GetActorPosition(), View.ViewMatrices.GetViewOrigin()) < CVarRayTracingDynamicGeometryLastRenderTimeUpdateDistance.GetValueOnRenderThread())
{
// Update LastRenderTime for components so that visibility based ticking (like skeletal meshes) can get updated
// We are only doing this for dynamic geometries now
SceneInfo->LastRenderTime = CurrentWorldTime;
SceneInfo->UpdateComponentLastRenderTime(CurrentWorldTime, /*bUpdateLastRenderTimeOnScreen=*/true);
}
}
}
FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView
好长啊啊啊啊啊啊啊啊啊啊啊啊啊啊
bool FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView(FRDGBuilder& GraphBuilder, FViewInfo& View, FRayTracingScene& RayTracingScene, FRayTracingRelevantPrimitiveTaskData* RayTracingRelevantPrimitiveTaskData)
{
// 。。。
// Task to iterate over static ray tracing instances, perform auto-instancing and culling.
// 任务是对静态光线追踪实例进行迭代,执行自动实例化和剔除。
// This adds final instances to the ray tracing scene and must be done before FRayTracingScene::BuildInitializationData().
// 这会将最终实例添加到光线追踪场景
struct FRayTracingSceneAddInstancesTask
{
// ...
};
// ...
return true;
}
看 FRayTracingSceneAddInstancesTask
struct FRayTracingSceneAddInstancesTask
{
// 任务模式,TrackSubsequents FireAndForget 描述任务是否需要考虑有其他依赖
static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }
TStatId GetStatId() const { return TStatId(); }
ENamedThreads::Type GetDesiredThread() { return ENamedThreads::AnyThread; }
// Inputs 输入
const FScene& Scene;
TChunkedArray<FRayTracingRelevantPrimitive>& RelevantStaticPrimitives;
const FRayTracingCullingParameters& CullingParameters;
const bool bIsPathTracing;
// Outputs 输出
FRayTracingScene& RayTracingScene; // New instances are added into FRayTracingScene::Instances and FRayTracingScene::Allocator is used for temporary data
TArray<FVisibleRayTracingMeshCommand>& VisibleRayTracingMeshCommands; // New elements are added here by this task
TArray<FPrimitiveSceneProxy*>& ProxiesWithDirtyCachedInstance;
struct FAutoInstanceBatch
{
int32 Index = INDEX_NONE;
int32 DecalIndex = INDEX_NONE;
// Copies the next InstanceSceneDataOffset and user data into the current batch, returns true if arrays were re-allocated.
bool Add(FRayTracingScene& InRayTracingScene, uint32 InInstanceSceneDataOffset, uint32 InUserData);
bool IsValid() const { return InstanceSceneDataOffsets.Num() != 0; }
TArrayView<uint32> InstanceSceneDataOffsets;
TArrayView<uint32> UserData;
uint32 Cursor = 0;
};
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent);
bool FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView(FRDGBuilder& GraphBuilder, FViewInfo& View, FRayTracingScene& RayTracingScene, FRayTracingRelevantPrimitiveTaskData* RayTracingRelevantPrimitiveTaskData)
{
// 。。。
// Task to iterate over static ray tracing instances, perform auto-instancing and culling.
// 任务是对静态光线追踪实例进行迭代,执行自动实例化和剔除。
// This adds final instances to the ray tracing scene and must be done before FRayTracingScene::BuildInitializationData().
// 这会将最终实例添加到光线追踪场景
struct FRayTracingSceneAddInstancesTask
{
// ...
};
FGraphEventArray AddInstancesTaskPrerequisites;
AddInstancesTaskPrerequisites.Add(RelevantPrimitiveList.StaticPrimitiveLODTask);
FGraphEventRef AddInstancesTask = TGraphTask<FRayTracingSceneAddInstancesTask>::CreateTask(&AddInstancesTaskPrerequisites).ConstructAndDispatchWhenReady(
*Scene, RelevantPrimitiveList.StaticPrimitives, View.RayTracingCullingParameters, bool(View.Family->EngineShowFlags.PathTracing), // inputs
RayTracingScene, View.VisibleRayTracingMeshCommands, View.ProxiesWithDirtyCachedInstance // outputs
);
// Scene init task can run only when all pre-init tasks are complete (including culling tasks that are spawned while adding instances)
View.RayTracingSceneInitTask = FFunctionGraphTask::CreateAndDispatchWhenReady(
[&View, &RayTracingScene]()
{
TRACE_CPUPROFILER_EVENT_SCOPE(RayTracingSceneInitTask);
View.RayTracingSceneInitData = RayTracingScene.BuildInitializationData();
},
TStatId(), AddInstancesTask, ENamedThreads::AnyThread);
return true;
}
CreateTask 创建的线程会去执行dotask
7-20
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
{
FTaskTagScope TaskTagScope(ETaskTag::EParallelRenderingThread);
TRACE_CPUPROFILER_EVENT_SCOPE(RayTracingSceneStaticInstanceTask);
FGraphEventArray CullingTasks;
const bool bAutoInstance = CVarRayTracingAutoInstance.GetValueOnRenderThread() != 0; // 是否要自动化实例,通过命令
Experimental::TSherwoodMap<uint64, FAutoInstanceBatch> InstanceBatches; // Instance batches by FRayTracingRelevantPrimitive::InstancingKey()
TArray<FRayTracingCullPrimitiveInstancesClosure> CullInstancesClosures;
if (CullingParameters.CullingMode != RayTracing::ECullingMode::Disabled && GetRayTracingCullingPerInstance())
{
CullInstancesClosures.Reserve(RelevantStaticPrimitives.Num());
CullingTasks.Reserve(RelevantStaticPrimitives.Num() / 256 + 1);
}
// scan relevant primitives computing hash data to look for duplicate instances
for (const FRayTracingRelevantPrimitive& RelevantPrimitive : RelevantStaticPrimitives)
{
const int32 PrimitiveIndex = RelevantPrimitive.PrimitiveIndex;
FPrimitiveSceneInfo* SceneInfo = Scene.Primitives[PrimitiveIndex];
FPrimitiveSceneProxy* SceneProxy = Scene.PrimitiveSceneProxies[PrimitiveIndex];
ERayTracingPrimitiveFlags Flags = Scene.PrimitiveRayTracingFlags[PrimitiveIndex];
if (EnumHasAnyFlags(Flags, ERayTracingPrimitiveFlags::CacheInstances))
{
const bool bUsingNaniteRayTracing = (Nanite::GetRayTracingMode() != Nanite::ERayTracingMode::Fallback) && SceneProxy->IsNaniteMesh();
if (bUsingNaniteRayTracing)
{
Nanite::GRayTracingManager.AddVisiblePrimitive(SceneInfo);
if (RelevantPrimitive.CachedRayTracingInstance->GeometryRHI == nullptr)
{
// Nanite ray tracing geometry not ready yet, doesn't include primitive in ray tracing scene
continue;
}
}
else if (!RelevantPrimitive.bCachedRayTracingGeometryValid)
{
// cached instance is not valid (eg: was streamed out) need to invalidate for next frame
ProxiesWithDirtyCachedInstance.Add(Scene.PrimitiveSceneProxies[PrimitiveIndex]);
continue;
}
// TODO: Consider requesting a recache of all ray tracing commands during which decals are excluded
// if primitive has mixed decal and non-decal segments we need to have two ray tracing instances
// one containing non-decal segments and the other with decal segments
// masking of segments is done using "hidden" hitgroups
// TODO: Debug Visualization to highlight primitives using this?
const bool bNeedSeparateDecalInstance = RelevantPrimitive.bAnySegmentsDecal && !RelevantPrimitive.bAllSegmentsDecal;
if (GRayTracingExcludeDecals && RelevantPrimitive.bAnySegmentsDecal && !bNeedSeparateDecalInstance)
{
continue;
}
check(RelevantPrimitive.CachedRayTracingInstance);
const int32 NewInstanceIndex = RayTracingScene.AddInstance(*RelevantPrimitive.CachedRayTracingInstance, SceneProxy, false);
uint32 DecalInstanceIndex = INDEX_NONE;
{
FRayTracingGeometryInstance& NewInstance = RayTracingScene.GetInstance(NewInstanceIndex);
AddDebugRayTracingInstanceFlags(NewInstance.Flags);
NewInstance.LayerIndex = (uint8)(RelevantPrimitive.bAnySegmentsDecal && !bNeedSeparateDecalInstance ? ERayTracingSceneLayer::Decals : ERayTracingSceneLayer::Base);
const Experimental::FHashElementId GroupId = Scene.PrimitiveRayTracingGroupIds[PrimitiveIndex];
const bool bUseGroupBounds = CullingParameters.bCullUsingGroupIds && GroupId.IsValid();
if (CullingParameters.CullingMode != RayTracing::ECullingMode::Disabled && GetRayTracingCullingPerInstance() && RelevantPrimitive.CachedRayTracingInstance->NumTransforms > 1 && !bUseGroupBounds)
{
const bool bIsFarFieldPrimitive = EnumHasAnyFlags(Flags, ERayTracingPrimitiveFlags::FarField);
TArrayView<uint32> InstanceActivationMask = RayTracingScene.Allocate<uint32>(FMath::DivideAndRoundUp(NewInstance.NumTransforms, 32u));
NewInstance.ActivationMask = InstanceActivationMask;
FRayTracingCullPrimitiveInstancesClosure Closure;
Closure.Scene = &Scene;
Closure.SceneInfo = SceneInfo;
Closure.PrimitiveIndex = PrimitiveIndex;
Closure.bIsFarFieldPrimitive = bIsFarFieldPrimitive;
Closure.CullingParameters = &CullingParameters;
Closure.OutInstanceActivationMask = InstanceActivationMask;
CullInstancesClosures.Add(MoveTemp(Closure));
if (CullInstancesClosures.Num() >= 256)
{
CullingTasks.Add(FFunctionGraphTask::CreateAndDispatchWhenReady([CullInstancesClosures = MoveTemp(CullInstancesClosures)]()
{
for (auto& Closure : CullInstancesClosures)
{
Closure();
}
}, TStatId(), nullptr, ENamedThreads::AnyThread));
}
}
if (bNeedSeparateDecalInstance && !GRayTracingExcludeDecals)
{
FRayTracingGeometryInstance DecalRayTracingInstance = NewInstance;
DecalRayTracingInstance.LayerIndex = (uint8)ERayTracingSceneLayer::Decals;
DecalInstanceIndex = RayTracingScene.AddInstance(MoveTemp(DecalRayTracingInstance), SceneProxy, false);
}
}
// At the moment we only support SM & ISMs on this path
check(EnumHasAnyFlags(Flags, ERayTracingPrimitiveFlags::CacheMeshCommands));
const bool bHasDecalInstanceIndex = DecalInstanceIndex != INDEX_NONE;
for (int32 CommandIndex : RelevantPrimitive.CachedRayTracingMeshCommandIndices)
{
const FRayTracingMeshCommand& MeshCommand = Scene.CachedRayTracingMeshCommands[CommandIndex];
{
const bool bHidden = bHasDecalInstanceIndex && MeshCommand.bDecal;
FVisibleRayTracingMeshCommand NewVisibleMeshCommand(&MeshCommand, NewInstanceIndex, bHidden);
VisibleRayTracingMeshCommands.Add(NewVisibleMeshCommand);
}
if(bHasDecalInstanceIndex)
{
const bool bHidden = !MeshCommand.bDecal;
FVisibleRayTracingMeshCommand NewVisibleMeshCommand(&MeshCommand, DecalInstanceIndex, bHidden);
VisibleRayTracingMeshCommands.Add(NewVisibleMeshCommand);
}
}
}
else
{
const int8 LODIndex = RelevantPrimitive.LODIndex;
if (LODIndex < 0 || !RelevantPrimitive.bStatic)
{
continue; // skip dynamic primitives and other
}
// if primitive has mixed decal and non-decal segments we need to have two ray tracing instances
// one containing non-decal segments and the other with decal segments
// masking of segments is done using "hidden" hitgroups
// TODO: Debug Visualization to highlight primitives using this?
const bool bNeedSeparateDecalInstance = RelevantPrimitive.bAnySegmentsDecal && !RelevantPrimitive.bAllSegmentsDecal;
if (GRayTracingExcludeDecals && RelevantPrimitive.bAnySegmentsDecal && !bNeedSeparateDecalInstance)
{
continue;
}
if ((GRayTracingExcludeDecals && RelevantPrimitive.bAnySegmentsDecal)
|| (GRayTracingExcludeTranslucent && RelevantPrimitive.bAllSegmentsTranslucent)
|| (GRayTracingExcludeSky && RelevantPrimitive.bIsSky && !bIsPathTracing))
{
continue;
}
// location if this is a new entry
const uint64 InstanceKey = RelevantPrimitive.InstancingKey();
FAutoInstanceBatch DummyInstanceBatch = { };
FAutoInstanceBatch& InstanceBatch = bAutoInstance ? InstanceBatches.FindOrAdd(InstanceKey, DummyInstanceBatch) : DummyInstanceBatch;
if (InstanceBatch.IsValid())
{
// Reusing a previous entry, just append to the instance list.
bool bReallocated = InstanceBatch.Add(RayTracingScene, SceneInfo->GetInstanceSceneDataOffset(), (uint32)PrimitiveIndex);
check(InstanceBatch.Index != INDEX_NONE);
{
FRayTracingGeometryInstance& RayTracingInstance = RayTracingScene.GetInstance(InstanceBatch.Index);
++RayTracingInstance.NumTransforms;
check(RayTracingInstance.NumTransforms == InstanceBatch.Cursor); // sanity check
if (bReallocated)
{
RayTracingInstance.InstanceSceneDataOffsets = InstanceBatch.InstanceSceneDataOffsets;
RayTracingInstance.UserData = InstanceBatch.UserData;
}
}
if(InstanceBatch.DecalIndex != INDEX_NONE)
{
FRayTracingGeometryInstance& RayTracingInstance = RayTracingScene.GetInstance(InstanceBatch.DecalIndex);
++RayTracingInstance.NumTransforms;
check(RayTracingInstance.NumTransforms == InstanceBatch.Cursor); // sanity check
if (bReallocated)
{
RayTracingInstance.InstanceSceneDataOffsets = InstanceBatch.InstanceSceneDataOffsets;
RayTracingInstance.UserData = InstanceBatch.UserData;
}
}
}
else
{
// Starting new instance batch
InstanceBatch.Add(RayTracingScene, SceneInfo->GetInstanceSceneDataOffset(), (uint32)PrimitiveIndex);
FRayTracingGeometryInstance RayTracingInstance;
RayTracingInstance.GeometryRHI = RelevantPrimitive.RayTracingGeometryRHI;
checkf(RayTracingInstance.GeometryRHI, TEXT("Ray tracing instance must have a valid geometry."));
RayTracingInstance.InstanceSceneDataOffsets = InstanceBatch.InstanceSceneDataOffsets;
RayTracingInstance.UserData = InstanceBatch.UserData;
RayTracingInstance.NumTransforms = 1;
RayTracingInstance.Mask = RelevantPrimitive.InstanceMask; // When no cached command is found, InstanceMask == 0 and the instance is effectively filtered out
if (RelevantPrimitive.bAllSegmentsOpaque && RelevantPrimitive.bAllSegmentsCastShadow)
{
RayTracingInstance.Flags |= ERayTracingInstanceFlags::ForceOpaque;
}
if (RelevantPrimitive.bTwoSided)
{
RayTracingInstance.Flags |= ERayTracingInstanceFlags::TriangleCullDisable;
}
AddDebugRayTracingInstanceFlags(RayTracingInstance.Flags);
RayTracingInstance.LayerIndex = (uint8)(RelevantPrimitive.bAnySegmentsDecal && !bNeedSeparateDecalInstance ? ERayTracingSceneLayer::Decals : ERayTracingSceneLayer::Base);
InstanceBatch.Index = RayTracingScene.AddInstance(RayTracingInstance, SceneProxy, false);
if (bNeedSeparateDecalInstance && !GRayTracingExcludeDecals)
{
FRayTracingGeometryInstance DecalRayTracingInstance = RayTracingInstance;
DecalRayTracingInstance.LayerIndex = (uint8)ERayTracingSceneLayer::Decals;
InstanceBatch.DecalIndex = RayTracingScene.AddInstance(MoveTemp(DecalRayTracingInstance), SceneProxy, false);
}
const bool bHasDecalInstanceIndex = InstanceBatch.DecalIndex != INDEX_NONE;
for (int32 CommandIndex : RelevantPrimitive.CachedRayTracingMeshCommandIndices)
{
if (CommandIndex >= 0)
{
const FRayTracingMeshCommand& MeshCommand = Scene.CachedRayTracingMeshCommands[CommandIndex];
{
const bool bHidden = bHasDecalInstanceIndex && MeshCommand.bDecal;
FVisibleRayTracingMeshCommand NewVisibleMeshCommand(&MeshCommand, InstanceBatch.Index, bHidden);
VisibleRayTracingMeshCommands.Add(NewVisibleMeshCommand);
}
if (bHasDecalInstanceIndex)
{
const bool bHidden = !MeshCommand.bDecal;
FVisibleRayTracingMeshCommand NewVisibleMeshCommand(&MeshCommand, InstanceBatch.DecalIndex, bHidden);
VisibleRayTracingMeshCommands.Add(NewVisibleMeshCommand);
}
}
else
{
// CommandIndex == -1 indicates that the mesh batch has been filtered by FRayTracingMeshProcessor (like the shadow depth pass batch)
// Do nothing in this case
}
}
}
}
}
CullingTasks.Add(FFunctionGraphTask::CreateAndDispatchWhenReady([CullInstancesClosures = MoveTemp(CullInstancesClosures)]()
{
for (auto& Closure : CullInstancesClosures)
{
Closure();
}
}, TStatId(), nullptr, ENamedThreads::AnyThread));
for (FGraphEventRef& CullingTask : CullingTasks)
{
MyCompletionGraphEvent->DontCompleteUntil(CullingTask);
}
}
BuildInitializationData
bool FDeferredShadingSceneRenderer::GatherRayTracingWorldInstancesForView(FRDGBuilder& GraphBuilder, FViewInfo& View, FRayTracingScene& RayTracingScene, FRayTracingRelevantPrimitiveTaskData* RayTracingRelevantPrimitiveTaskData)
{
// 。。。
// Scene init task can run only when all pre-init tasks are complete (including culling tasks that are spawned while adding instances)
View.RayTracingSceneInitTask = FFunctionGraphTask::CreateAndDispatchWhenReady(
[&View, &RayTracingScene]()
{
TRACE_CPUPROFILER_EVENT_SCOPE(RayTracingSceneInitTask);
View.RayTracingSceneInitData = RayTracingScene.BuildInitializationData();
},
TStatId(), AddInstancesTask, ENamedThreads::AnyThread);
return true;
}
最后一小块
RayTracingScene.BuildInitializationData();
FDeferredShadingSceneRenderer::Render
又回来了
我们看完了
GatherRayTracingWorldInstancesForView
void FDeferredShadingSceneRenderer::Render(FRDGBuilder& GraphBuilder)
{
// 。。。
if (RendererOutput == ERendererOutput::FinalSceneColor)
{
// 。。。
// 光线追踪
#if RHI_RAYTRACING
GatherRayTracingWorldInstancesForView(GraphBuilder, ReferenceView, RayTracingScene, InitViewTaskDatas.RayTracingRelevantPrimitives); // 上面我们展开了这里
#endif // RHI_RAYTRACING
bool bAnyLumenEnabled = false;
{
if (bUseGBuffer)
{
bComputeLightGrid = bRenderDeferredLighting;
}
else
{
bComputeLightGrid = ViewFamily.EngineShowFlags.Lighting;
}
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
FViewInfo& View = Views[ViewIndex];
bAnyLumenEnabled = bAnyLumenEnabled
|| GetViewPipelineState(View).DiffuseIndirectMethod == EDiffuseIndirectMethod::Lumen
|| GetViewPipelineState(View).ReflectionsMethod == EReflectionsMethod::Lumen;
}
bComputeLightGrid |= (
ShouldRenderVolumetricFog() ||
VolumetricCloudWantsToSampleLocalLights(Scene, ViewFamily.EngineShowFlags) ||
ViewFamily.ViewMode != VMI_Lit ||
bAnyLumenEnabled ||
VirtualShadowMapArray.IsEnabled() ||
ShouldVisualizeLightGrid());
}
}
总结
在 FRendererModule::BeginRenderingViewFamilies 的最后
调用了渲染线程的 RenderViewFamilies_RenderThread 这个函数在渲染线程执行,
到这里,就进入渲染线程了
ENQUEUE_RENDER_COMMAND(FDrawSceneCommand)(
[LocalSceneRenderers = CopyTemp(SceneRenderers), DrawSceneEnqueue](FRHICommandListImmediate& RHICmdList)
{
uint64 SceneRenderStart = FPlatformTime::Cycles64();
const float StartDelayMillisec = FPlatformTime::ToMilliseconds64(SceneRenderStart - DrawSceneEnqueue);
CSV_CUSTOM_STAT_GLOBAL(DrawSceneCommand_StartDelay, StartDelayMillisec, ECsvCustomStatOp::Set);
RenderViewFamilies_RenderThread(RHICmdList, LocalSceneRenderers);
FlushPendingDeleteRHIResources_RenderThread();
});
RenderViewFamilies_RenderThread
他做了一些事情
拿到基础场景,*FScene const Scene = SceneRenderers[0]->Scene;**
然后渲染开启线程 FSceneRenderer::RenderThreadBegin(RHICmdList, SceneRenderers);
启动场景渲染。他主要为每个场景FSceneRenderer 添加了AllFamilies, AllFamilyViews字段。然后把第一个场景的 FXSystem 赋值到后面所有场景
FDeferredUpdateResource::UpdateResources(RHICmdList); 更新所有需要延迟更新的资源
for (FSceneRenderer* SceneRenderer : SceneRenderers)
FRDGBuilder GraphBuilder(...) // 构建 RDG
FSceneRenderer::ViewExtensionPreRender_RenderThread(...) // 渲染扩展
const uint64 FamilyRenderStart = FPlatformTime::Cycles64(); // 渲染开始时间
if (bAllowGPUSkinCacheVisualization && SceneRenderer->Views.Num() > 0) { /* 蒙皮相关 */ }
if (ViewFamily.bForceCopyCrossGPU) { GraphBuilder.EnableForceCopyCrossGPU(); } // 处理多GPU相关,这里只是标志位设置
if (ViewFamily.EngineShowFlags.HitProxies) // 渲染命中代理?
SceneRenderer->RenderHitProxies(GraphBuilder);
else:
SceneRenderer->Render(GraphBuilder);
SceneRenderer->FlushCrossGPUFences(GraphBuilder); // 同步操作
GraphBuilder.Execute(); // 执行RDG中排队的任务
*SceneRenderer->ViewFamily.ProfileSceneRenderTime = (float)FPlatformTime::ToSeconds64(FPlatformTime::Cycles64() - FamilyRenderStart); // 记录渲染耗时
for里面 SceneRenderer->Render(GraphBuilder); 是核心
for结束后,做一些收尾了
包括
IsHairStrandsEnabled 头发相关,扩展,统计等。
FDeferredShadingSceneRenderer::Render
SceneRenderer->Render(GraphBuilder);
这个 SceneRenderer有两个子类,一个是移动端的,一个是延迟渲染的,我们在PC。所以走的是 延迟版本的
也就是 FDeferredShadingSceneRenderer::Render
这里面讲了 (((φ(◎ロ◎;)φ)))。。。 很杂了,光线追踪拉,nanite啦,虚拟贴图了。。。