19 #define WAVEFRONT_SIZE 32
20 #define WAVEFRONT_BLOCK_MULTIPLIER 2
21 #define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
22 #define LINKS_PER_SIMD_LANE 16
24 #define STRINGIFY( S ) STRINGIFY2( S )
25 #define STRINGIFY2( S ) #S
36 #define MSTRINGIFY(A) #A
38 #include "HLSL/UpdatePositionsFromVelocities.hlsl"
39 static char* SolvePositionsSIMDBatchedHLSLString =
40 #include "HLSL/SolvePositionsSIMDBatched.hlsl"
41 static char* UpdateNodesHLSLString =
42 #include "HLSL/UpdateNodes.hlsl"
43 static char* UpdatePositionsHLSLString =
44 #include "HLSL/UpdatePositions.hlsl"
45 static char* UpdateConstantsHLSLString =
46 #include "HLSL/UpdateConstants.hlsl"
47 static char* IntegrateHLSLString =
48 #include "HLSL/Integrate.hlsl"
49 static char* ApplyForcesHLSLString =
50 #include "HLSL/ApplyForces.hlsl"
51 static char* UpdateNormalsHLSLString =
52 #include "HLSL/UpdateNormals.hlsl"
53 static char* OutputToVertexArrayHLSLString =
54 #include "HLSL/OutputToVertexArray.hlsl"
55 static char* VSolveLinksHLSLString =
56 #include "HLSL/VSolveLinks.hlsl"
57 static char* ComputeBoundsHLSLString =
58 #include "HLSL/ComputeBounds.hlsl"
59 static char* SolveCollisionsAndUpdateVelocitiesHLSLString =
60 #include "HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl"
65 m_d3dDevice( d3dDevice ),
82 m_d3dDevice = d3dDevice;
99 int newSize = previousSize + numLinks;
178 m_linkData(m_dx11Device, m_dx11Context)
213 for(
int softBodyIndex = 0; softBodyIndex < softBodies.
size(); ++softBodyIndex )
215 btSoftBody *softBody = softBodies[ softBodyIndex ];
247 int maxTriangles = numTriangles;
251 for(
int vertex = 0; vertex < numVertices; ++vertex )
253 Point3 multPoint(softBody->
m_nodes[vertex].m_x.getX(), softBody->
m_nodes[vertex].m_x.getY(), softBody->
m_nodes[vertex].m_x.getZ());
261 float vertexInverseMass = softBody->
m_nodes[vertex].m_im;
262 desc.setInverseMass(vertexInverseMass);
268 for(
int triangle = 0; triangle < numTriangles; ++triangle )
272 int vertexIndex0 = (softBody->
m_faces[triangle].m_n[0] - &(softBody->
m_nodes[0]));
273 int vertexIndex1 = (softBody->
m_faces[triangle].m_n[1] - &(softBody->
m_nodes[0]));
274 int vertexIndex2 = (softBody->
m_faces[triangle].m_n[2] - &(softBody->
m_nodes[0]));
286 int maxLinks = numLinks;
292 for(
int link = 0; link < numLinks; ++link )
294 int vertexIndex0 = softBody->
m_links[link].m_n[0] - &(softBody->
m_nodes[0]);
295 int vertexIndex1 = softBody->
m_links[link].m_n[1] - &(softBody->
m_nodes[0]);
385 using namespace Vectormath::Aos;
395 for(
int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
402 float massLSC = (invMass0 + invMass1)/linearStiffness;
405 float restLengthSquared = restLength*restLength;
429 constBuffer.
kst = kst;
432 D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
456 ID3D11ShaderResourceView* pViewNULL = NULL;
464 ID3D11UnorderedAccessView* pUAViewNULL = NULL;
465 m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
467 ID3D11Buffer *pBufferNull = NULL;
490 bool returnVal =
true;
501 char maxVerticesPerWavefront[20];
502 char maxBatchesPerWavefront[20];
503 char waveFrontSize[20];
504 char waveFrontBlockMultiplier[20];
513 D3D10_SHADER_MACRO solvePositionsMacros[6] = {
"MAX_NUM_VERTICES_PER_WAVE", maxVerticesPerWavefront,
"MAX_BATCHES_PER_WAVE", maxBatchesPerWavefront,
"WAVEFRONT_SIZE", waveFrontSize,
"WAVEFRONT_BLOCK_MULTIPLIER", waveFrontBlockMultiplier,
"BLOCK_SIZE", blockSize, 0, 0 };
581 for(
int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
587 while( batch < wavefrontBatches.size() && !placed )
590 bool foundSharedVertex =
false;
591 for(
int link = 0; link < wavefront.
size(); ++link )
594 if( (mapOfVerticesInBatches[batch])[vertices.
vertex0] || (mapOfVerticesInBatches[batch])[vertices.
vertex1] )
596 foundSharedVertex =
true;
600 if( !foundSharedVertex )
602 wavefrontBatches[batch].push_back( waveIndex );
604 for(
int link = 0; link < wavefront.
size(); ++link )
607 (mapOfVerticesInBatches[batch])[vertices.
vertex0] =
true;
608 (mapOfVerticesInBatches[batch])[vertices.
vertex1] =
true;
614 if( batch == wavefrontBatches.size() && !placed )
616 wavefrontBatches.resize( batch + 1 );
617 wavefrontBatches[batch].push_back( waveIndex );
620 mapOfVerticesInBatches.
resize( batch + 1 );
623 mapOfVerticesInBatches[batch].
resize( numVertices+1,
false );
626 for(
int link = 0; link < wavefront.
size(); ++link )
629 (mapOfVerticesInBatches[batch])[vertices.
vertex0] =
true;
630 (mapOfVerticesInBatches[batch])[vertices.
vertex1] =
true;
634 mapOfVerticesInBatches.
clear();
640 int currentSize = vectorToUpdate.
size();
641 for(
int i = indexToRemove; i < (currentSize-1); ++i )
643 vectorToUpdate[i] = vectorToUpdate[i+1];
645 if( currentSize > 0 )
646 vectorToUpdate.
resize( currentSize - 1 );
654 vectorToUpdate.
resize( vectorToUpdate.
size() + 1 );
655 for(
int i = (vectorToUpdate.
size() - 1); i > index; --i )
657 vectorToUpdate[i] = vectorToUpdate[i-1];
659 vectorToUpdate[index] = element;
669 while( index < vectorToUpdate.
size() && vectorToUpdate[index] < element )
673 if( index == vectorToUpdate.
size() || vectorToUpdate[index] != element )
679 for(
int linkIndex = 0; linkIndex < linkData.
getNumLinks(); ++linkIndex )
682 numLinksPerVertex[nodes.vertex0]++;
683 numLinksPerVertex[nodes.vertex1]++;
685 int maxLinksPerVertex = 0;
686 for(
int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
688 maxLinksPerVertex =
btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
690 maxLinks = maxLinksPerVertex;
693 linksFoundPerVertex.
resize( numVertices, 0 );
695 listOfLinksPerVertex.
resize( maxLinksPerVertex * numVertices );
697 for(
int linkIndex = 0; linkIndex < linkData.
getNumLinks(); ++linkIndex )
702 int vertexIndex = nodes.
vertex0;
703 int linkForVertex = linksFoundPerVertex[nodes.vertex0];
704 int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
706 listOfLinksPerVertex[linkAddress] = linkIndex;
708 linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
712 int vertexIndex = nodes.vertex1;
713 int linkForVertex = linksFoundPerVertex[nodes.vertex1];
714 int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
716 listOfLinksPerVertex[linkAddress] = linkIndex;
718 linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
726 int linksPerWorkItem,
727 int maxLinksPerWavefront,
739 int maxLinksPerVertex = 0;
743 for(
int linkIndex = 0; linkIndex < linkData.
getNumLinks(); ++linkIndex )
746 numVertices =
btMax( numVertices, nodes.vertex0 + 1 );
747 numVertices =
btMax( numVertices, nodes.vertex1 + 1 );
753 numLinksPerVertex.
resize(0);
754 numLinksPerVertex.
resize( numVertices, 0 );
756 generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
762 int currentVertex = 0;
763 int linksProcessed = 0;
770 int nextWavefront = linksForWavefronts.
size();
771 linksForWavefronts.resize( nextWavefront + 1 );
773 verticesForWavefronts.resize( nextWavefront + 1 );
776 linksForWavefront.
resize(0);
780 while( linksProcessed < linkData.
getNumLinks() && linksForWavefront.
size() < maxLinksPerWavefront )
783 for(
int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.
size() < maxLinksPerWavefront; ++link )
785 int linkAddress = currentVertex * maxLinksPerVertex + link;
786 int linkIndex = listOfLinksPerVertex[linkAddress];
791 if( !processedLink[linkIndex] )
793 linksForWavefront.
push_back( linkIndex );
795 processedLink[linkIndex] =
true;
798 if( v0 == currentVertex )
804 if( verticesToProcess.
size() > 0 )
807 currentVertex = verticesToProcess[0];
815 while( processedLink[searchLink] )
823 for(
int link = 0; link < linksForWavefront.
size(); ++link )
833 batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
837 for(
int link = 0; link < linksForWavefront.
size(); ++link )
839 int linkIndex = linksForWavefront[link];
844 while( batch < batchesWithinWave.size() && !placed )
846 bool foundSharedVertex =
false;
847 if( batchesWithinWave[batch].
size() >= wavefrontSize )
850 foundSharedVertex =
true;
852 for(
int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
861 foundSharedVertex =
true;
866 if( !foundSharedVertex )
868 batchesWithinWave[batch].push_back( linkIndex );
874 if( batch == batchesWithinWave.size() && !placed )
876 batchesWithinWave.resize( batch + 1 );
877 batchesWithinWave[batch].push_back( linkIndex );
904 int numBatches = wavefrontBatches.
size();
905 m_wavefrontBatchStartLengths.resize(0);
907 for(
int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
909 int wavesInBatch = wavefrontBatches[batchIndex].
size();
910 int nextPrefixSum = prefixSum + wavesInBatch;
911 m_wavefrontBatchStartLengths.push_back(
BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
913 prefixSum += wavesInBatch;
918 m_maxVerticesWithinWave = 0;
923 int batchesInCurrentWave = batchesWithinWaves[waveIndex].
size();
924 int verticesInCurrentWave = verticesForWavefronts[waveIndex].
size();
926 m_maxVerticesWithinWave =
btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
930 m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
933 m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
957 int wavefrontCount = 0;
960 for(
int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
963 int wavefrontsInBatch = batch.
size();
966 for(
int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
969 int originalWavefrontIndex = batch[wavefrontIndex];
971 int verticesUsedByWavefront = wavefrontVertices.
size();
976 for(
int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
978 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
980 for(
int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
982 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
990 batchesAndVertices.
numVertices = verticesUsedByWavefront;
991 m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
995 for(
int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.
size(); ++wavefrontBatch )
998 int wavefrontBatchSize = linksInBatch.
size();
1002 for(
int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
1004 int originalLinkAddress = linksInBatch[linkIndex];
1006 m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
1007 m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
1008 m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
1009 m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
1010 m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
1011 m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
1019 m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
1021 for(
int linkIndex = wavefrontBatchSize; linkIndex <
m_wavefrontSize; ++linkIndex )
1025 m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
1026 m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
1027 m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
1028 m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
1029 m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
1037 localPair.
vertex0 = verticesUsedByWavefront + (linkIndex % 16);
1038 localPair.
vertex1 = verticesUsedByWavefront + (linkIndex % 16);
1039 m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;