26 #define WAVEFRONT_SIZE 32
27 #define WAVEFRONT_BLOCK_MULTIPLIER 2
28 #define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
29 #define LINKS_PER_SIMD_LANE 16
37 #define MSTRINGIFY(A) #A
39 #include "OpenCLC10/UpdatePositionsFromVelocities.cl"
40 static const char* SolvePositionsCLString =
41 #include "OpenCLC10/SolvePositionsSIMDBatched.cl"
42 static const char* UpdateNodesCLString =
43 #include "OpenCLC10/UpdateNodes.cl"
44 static const char* UpdatePositionsCLString =
45 #include "OpenCLC10/UpdatePositions.cl"
46 static const char* UpdateConstantsCLString =
47 #include "OpenCLC10/UpdateConstants.cl"
48 static const char* IntegrateCLString =
49 #include "OpenCLC10/Integrate.cl"
50 static const char* ApplyForcesCLString =
51 #include "OpenCLC10/ApplyForces.cl"
52 static const char* UpdateFixedVertexPositionsCLString =
53 #include "OpenCLC10/UpdateFixedVertexPositions.cl"
54 static const char* UpdateNormalsCLString =
55 #include "OpenCLC10/UpdateNormals.cl"
56 static const char* VSolveLinksCLString =
57 #include "OpenCLC10/VSolveLinks.cl"
58 static const char* SolveCollisionsAndUpdateVelocitiesCLString =
59 #include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
61 #include "OpenCLC10/OutputToVertexArray.cl"
66 m_cqCommandQue(queue),
98 int newSize = previousSize + numLinks;
174 m_linkData(queue, ctx)
200 int maxPiterations = 0;
201 int maxViterations = 0;
203 for(
int softBodyIndex = 0; softBodyIndex < softBodies.
size(); ++softBodyIndex )
205 btSoftBody *softBody = softBodies[ softBodyIndex ];
236 int maxTriangles = numTriangles;
240 for(
int vertex = 0; vertex < numVertices; ++vertex )
242 Point3 multPoint(softBody->
m_nodes[vertex].m_x.getX(), softBody->
m_nodes[vertex].m_x.getY(), softBody->
m_nodes[vertex].m_x.getZ());
250 float vertexInverseMass = softBody->
m_nodes[vertex].m_im;
251 desc.setInverseMass(vertexInverseMass);
256 for(
int vertex = numVertices; vertex < maxVertices; ++vertex )
263 for(
int triangle = 0; triangle < numTriangles; ++triangle )
267 int vertexIndex0 = (softBody->
m_faces[triangle].m_n[0] - &(softBody->
m_nodes[0]));
268 int vertexIndex1 = (softBody->
m_faces[triangle].m_n[1] - &(softBody->
m_nodes[0]));
269 int vertexIndex2 = (softBody->
m_faces[triangle].m_n[2] - &(softBody->
m_nodes[0]));
281 int maxLinks = numLinks;
287 for(
int link = 0; link < numLinks; ++link )
289 int vertexIndex0 = softBody->
m_links[link].m_n[0] - &(softBody->
m_nodes[0]);
290 int vertexIndex1 = softBody->
m_links[link].m_n[1] - &(softBody->
m_nodes[0]);
309 if ( piterations > maxPiterations )
310 maxPiterations = piterations;
314 if ( viterations > maxViterations )
315 maxViterations = viterations;
318 for(
int vertex = 0; vertex < numVertices; ++vertex )
320 if ( softBody->
m_nodes[vertex].m_im == 0 )
331 if ( numVertices > 0 )
333 for (
int anchorIndex = 0; anchorIndex < softBody->
m_anchors.
size(); anchorIndex++ )
339 nodeInfo.
clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
340 nodeInfo.
pNode = anchorNode;
365 for(
int softBodyIndex = 0; softBodyIndex <
m_softBodySet.
size(); ++softBodyIndex )
395 using namespace Vectormath::Aos;
405 for(
int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
412 float massLSC = (invMass0 + invMass1)/linearStiffness;
415 float restLengthSquared = restLength*restLength;
503 btAssert( 0 &&
"enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
540 btAssert( 0 &&
"enqueueNDRangeKernel(m_solveCollisionsAndUpdateVelocitiesKernel)");
558 const char* additionalMacros=
"";
562 char *wavefrontMacros =
new char[256];
566 "-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d",
587 delete [] wavefrontMacros;
617 for(
int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
623 while( batch < wavefrontBatches.size() && !placed )
626 bool foundSharedVertex =
false;
627 for(
int link = 0; link < wavefront.
size(); ++link )
630 if( (mapOfVerticesInBatches[batch])[vertices.
vertex0] || (mapOfVerticesInBatches[batch])[vertices.
vertex1] )
632 foundSharedVertex =
true;
636 if( !foundSharedVertex )
638 wavefrontBatches[batch].push_back( waveIndex );
640 for(
int link = 0; link < wavefront.
size(); ++link )
643 (mapOfVerticesInBatches[batch])[vertices.
vertex0] =
true;
644 (mapOfVerticesInBatches[batch])[vertices.
vertex1] =
true;
650 if( batch == wavefrontBatches.size() && !placed )
652 wavefrontBatches.resize( batch + 1 );
653 wavefrontBatches[batch].push_back( waveIndex );
656 mapOfVerticesInBatches.
resize( batch + 1 );
659 mapOfVerticesInBatches[batch].
resize( numVertices+1,
false );
662 for(
int link = 0; link < wavefront.
size(); ++link )
665 (mapOfVerticesInBatches[batch])[vertices.
vertex0] =
true;
666 (mapOfVerticesInBatches[batch])[vertices.
vertex1] =
true;
670 mapOfVerticesInBatches.
clear();
676 int currentSize = vectorToUpdate.
size();
677 for(
int i = indexToRemove; i < (currentSize-1); ++i )
679 vectorToUpdate[i] = vectorToUpdate[i+1];
681 if( currentSize > 0 )
682 vectorToUpdate.
resize( currentSize - 1 );
690 vectorToUpdate.
resize( vectorToUpdate.
size() + 1 );
691 for(
int i = (vectorToUpdate.
size() - 1); i > index; --i )
693 vectorToUpdate[i] = vectorToUpdate[i-1];
695 vectorToUpdate[index] = element;
705 while( index < vectorToUpdate.
size() && vectorToUpdate[index] < element )
709 if( index == vectorToUpdate.
size() || vectorToUpdate[index] != element )
715 for(
int linkIndex = 0; linkIndex < linkData.
getNumLinks(); ++linkIndex )
718 numLinksPerVertex[nodes.vertex0]++;
719 numLinksPerVertex[nodes.vertex1]++;
721 int maxLinksPerVertex = 0;
722 for(
int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
724 maxLinksPerVertex =
btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
726 maxLinks = maxLinksPerVertex;
729 linksFoundPerVertex.
resize( numVertices, 0 );
731 listOfLinksPerVertex.
resize( maxLinksPerVertex * numVertices );
733 for(
int linkIndex = 0; linkIndex < linkData.
getNumLinks(); ++linkIndex )
738 int vertexIndex = nodes.
vertex0;
739 int linkForVertex = linksFoundPerVertex[nodes.vertex0];
740 int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
742 listOfLinksPerVertex[linkAddress] = linkIndex;
744 linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
748 int vertexIndex = nodes.vertex1;
749 int linkForVertex = linksFoundPerVertex[nodes.vertex1];
750 int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
752 listOfLinksPerVertex[linkAddress] = linkIndex;
754 linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
762 int linksPerWorkItem,
763 int maxLinksPerWavefront,
775 int maxLinksPerVertex = 0;
779 for(
int linkIndex = 0; linkIndex < linkData.
getNumLinks(); ++linkIndex )
782 numVertices =
btMax( numVertices, nodes.vertex0 + 1 );
783 numVertices =
btMax( numVertices, nodes.vertex1 + 1 );
789 numLinksPerVertex.
resize(0);
790 numLinksPerVertex.
resize( numVertices, 0 );
792 generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
797 for(
int vertex = 0; vertex < 10; ++vertex )
799 for(
int link = 0; link < numLinksPerVertex[vertex]; ++link )
801 int linkAddress = vertex * maxLinksPerVertex + link;
809 int currentVertex = 0;
810 int linksProcessed = 0;
817 int nextWavefront = linksForWavefronts.
size();
818 linksForWavefronts.resize( nextWavefront + 1 );
820 verticesForWavefronts.resize( nextWavefront + 1 );
823 linksForWavefront.
resize(0);
827 while( linksProcessed < linkData.
getNumLinks() && linksForWavefront.
size() < maxLinksPerWavefront )
830 for(
int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.
size() < maxLinksPerWavefront; ++link )
832 int linkAddress = currentVertex * maxLinksPerVertex + link;
833 int linkIndex = listOfLinksPerVertex[linkAddress];
838 if( !processedLink[linkIndex] )
840 linksForWavefront.
push_back( linkIndex );
842 processedLink[linkIndex] =
true;
845 if( v0 == currentVertex )
851 if( verticesToProcess.
size() > 0 )
854 currentVertex = verticesToProcess[0];
862 while( processedLink[searchLink] )
870 for(
int link = 0; link < linksForWavefront.
size(); ++link )
880 batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
884 for(
int link = 0; link < linksForWavefront.
size(); ++link )
886 int linkIndex = linksForWavefront[link];
891 while( batch < batchesWithinWave.size() && !placed )
893 bool foundSharedVertex =
false;
894 if( batchesWithinWave[batch].
size() >= wavefrontSize )
897 foundSharedVertex =
true;
899 for(
int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
908 foundSharedVertex =
true;
913 if( !foundSharedVertex )
915 batchesWithinWave[batch].push_back( linkIndex );
921 if( batch == batchesWithinWave.size() && !placed )
923 batchesWithinWave.resize( batch + 1 );
924 batchesWithinWave[batch].push_back( linkIndex );
951 int numBatches = wavefrontBatches.
size();
952 m_wavefrontBatchStartLengths.resize(0);
954 for(
int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
956 int wavesInBatch = wavefrontBatches[batchIndex].
size();
957 int nextPrefixSum = prefixSum + wavesInBatch;
958 m_wavefrontBatchStartLengths.push_back(
BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
960 prefixSum += wavesInBatch;
965 m_maxVerticesWithinWave = 0;
970 int batchesInCurrentWave = batchesWithinWaves[waveIndex].
size();
971 int verticesInCurrentWave = verticesForWavefronts[waveIndex].
size();
973 m_maxVerticesWithinWave =
btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
977 m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
980 m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
1004 int wavefrontCount = 0;
1007 for(
int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
1010 int wavefrontsInBatch = batch.
size();
1013 for(
int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
1016 int originalWavefrontIndex = batch[wavefrontIndex];
1018 int verticesUsedByWavefront = wavefrontVertices.
size();
1023 for(
int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
1025 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
1027 for(
int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
1029 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
1037 batchesAndVertices.
numVertices = verticesUsedByWavefront;
1038 m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
1042 for(
int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.
size(); ++wavefrontBatch )
1045 int wavefrontBatchSize = linksInBatch.
size();
1049 for(
int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
1051 int originalLinkAddress = linksInBatch[linkIndex];
1053 m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
1054 m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
1055 m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
1056 m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
1057 m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
1058 m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
1066 m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
1068 for(
int linkIndex = wavefrontBatchSize; linkIndex <
m_wavefrontSize; ++linkIndex )
1072 m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
1073 m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
1074 m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
1075 m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
1076 m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
1084 localPair.
vertex0 = verticesUsedByWavefront + (linkIndex % 16);
1085 localPair.
vertex1 = verticesUsedByWavefront + (linkIndex % 16);
1086 m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;