16 #ifndef DISTRIBUTED_MEMORY_ANALYSIS_IMPLEMENTATION_H
17 #define DISTRIBUTED_MEMORY_ANALYSIS_IMPLEMENTATION_H
22 #define DIS_DEBUG_OUTPUT false
24 #define RUN_DECLARATION false
25 #define ALLGATHER_MPI false
31 template <
class InheritedAttributeType>
33 DistributedMemoryAnalysisBase<InheritedAttributeType>::
34 computeFunctionIndices(
36 InheritedAttributeType rootInheritedValue,
44 std::cout <<
" >>> >>>>>>>>>>>>> starting traversal of nodeCounter (preTraversal)" << std::endl;
46 nodeCounter.traverse(root, rootInheritedValue);
48 funcDecls = nodeCounter.get_funcDecls();
49 initialInheritedValues = nodeCounter.get_initialInheritedValues();
50 std::vector<size_t> &nodeCounts = nodeCounter.get_nodeCounts();
51 std::vector<size_t> &funcWeights = nodeCounter.get_funcWeights();
52 ROSE_ASSERT(funcDecls.size() == initialInheritedValues.size());
53 ROSE_ASSERT(funcDecls.size() == nodeCounts.size());
54 ROSE_ASSERT(funcDecls.size() == funcWeights.size());
57 std::cout <<
" >>> nr of funcs: " << funcDecls.size() <<
" inheritedValues:" << initialInheritedValues.size()
58 <<
" nodeCounts: " << nodeCounts.size() << std::endl;
59 if (funcDecls.size()>0)
60 std::cout <<
" >>> example entry 0: funcs[0] " << funcDecls.back() <<
" inheritedValues[0]:"
61 << initialInheritedValues.back() <<
" nodeCounts[0]: " << nodeCounts.back() << std::endl;
62 std::cout <<
" >>> >>>>>>>>>>>>> done traversal of nodeCounter (preTraversal) \n\n\n" << std::endl;
65 std::vector<size_t>::const_iterator itr;
66 size_t totalNodes = 0;
67 size_t totalWeight = 0;
68 for (itr = nodeCounts.begin(); itr != nodeCounts.end(); ++itr)
70 for (itr = funcWeights.begin(); itr != funcWeights.end(); ++itr)
72 myNodeCounts = nodeCounts;
73 myFuncWeights = funcWeights;
75 if (DistributedMemoryAnalysisBase<InheritedAttributeType>::myID() == 0)
77 nrOfNodes = totalNodes;
80 std::cout <<
"ROOT - splitting functions: " << funcDecls.size() <<
", total nodes: " << totalNodes
81 <<
" totalWeight: " << totalWeight << std::endl;
88 size_t nodesSoFar = 0, nodesOfPredecessors = 0;
90 functionsPerProcess.clear();
91 for (
int rank = 0; rank < processes; rank++)
93 const size_t myNodesHigh = (totalNodes / processes + 1) * (rank + 1);
96 nodesOfPredecessors = nodesSoFar;
97 size_t myNodes = nodesOfPredecessors + nodeCounts[hi];
99 std::cout <<
"SPLITTING - rank : " << rank <<
" myNodesHigh " << myNodesHigh <<
" myNodes: " << myNodes
100 <<
" lo: " << lo <<
" hi: " << hi << std::endl;
103 for (hi = lo + 1; hi < funcDecls.size(); hi++)
106 std::cout <<
" SPLITTING - func-hi : " << hi <<
" myNodes: " << myNodes << std::endl;
107 std::cout <<
" SPLITTING - mynodes > mynodesHigh : " << myNodes <<
" > " << myNodesHigh << std::endl;
108 std::cout <<
" SPLITTING - mynodesHigh - myNodes < nodeCounts["<<hi<<
"]/2 : " << (myNodesHigh-myNodes)
109 <<
" < " << (nodeCounts[hi]/2) << std::endl;
111 if (myNodes > myNodesHigh)
113 else if (myNodesHigh - myNodes < nodeCounts[hi] / 2)
116 std::cout <<
" hi++" << std::endl;
118 myNodes += nodeCounts[hi];
120 nodesSoFar = myNodes;
121 functionsPerProcess.push_back(hi - lo);
124 if (rank == processes - 1)
126 if (hi != funcDecls.size())
128 std::cout <<
"hi: " << hi <<
", funcDecls.size(): " << funcDecls.size() << std::endl;
129 std::cout <<
"It looks like you asked me to analyze too few functions in too many processes."
130 << std::endl <<
" Please give me fewer processes or a larger program to work on." << std::endl;
132 ROSE_ASSERT(hi == funcDecls.size());
140 std::cout <<
"process " << rank <<
": functions [" << lo <<
"," << hi
141 <<
"[ for a total of "
142 << myNodes - nodesOfPredecessors
143 <<
" nodes" << std::endl;
148 return std::make_pair(my_lo, my_hi);
152 struct SortDescending :
public std::binary_function<std::pair<double, size_t>,std::pair<double, size_t>,bool>
154 bool operator()(
const std::pair<double, size_t>& s1,
const std::pair<double, size_t>& s2)
const
156 return (s1.first > s2.first);
160 template <
class InheritedAttributeType>
162 DistributedMemoryAnalysisBase<InheritedAttributeType>::
163 sortFunctions(std::vector<SgFunctionDeclaration*>& funcDecls, std::vector<InheritedAttributeType>& inheritedValues,
164 std::vector<size_t>& nodeCounts, std::vector<size_t>& funcWeights)
167 std::vector<SgFunctionDeclaration*> funcDecls_temp(funcDecls.size());
168 std::vector<InheritedAttributeType> inheritedValues_temp(funcDecls.size());
169 std::vector<size_t> nodeCounts_temp(funcDecls.size());
170 std::vector<size_t> funcWeights_temp(funcDecls.size());
174 std::vector<std::pair<double, size_t> > weights(funcDecls.size());
175 for (
int i=0; i< (int) funcDecls.size(); i++) {
176 double currentWeight = (((double)nodeCounts[i]*funcWeights[i])/(double)funcDecls.size()/100);
177 weights[i].first = currentWeight;
178 weights[i].second = i;
181 for (
int i=0; i< (int) funcDecls.size(); i++) {
182 funcDecls_temp[i]=funcDecls[weights[i].second];
183 inheritedValues_temp[i]=inheritedValues[weights[i].second];
184 nodeCounts_temp[i]=nodeCounts[weights[i].second];
185 funcWeights_temp[i]=funcWeights[weights[i].second];
188 std::cout <<
" function : " << funcDecls_temp[i]->get_qualified_name().str()
189 <<
" weight_mul : " << (weights[i].first) <<
" nodes: " << nodeCounts_temp[i]
190 <<
" weight : " << funcWeights_temp[i] << std::endl;
192 funcDecls.swap(funcDecls_temp);
193 inheritedValues.swap(inheritedValues_temp);
194 nodeCounts.swap(nodeCounts_temp);
195 funcWeights.swap(funcWeights_temp);
203 template <
class InheritedAttributeType>
205 DistributedMemoryAnalysisBase<InheritedAttributeType>::
206 computeFunctionIndicesPerNode(
207 SgNode *root, std::vector<int>& functionToProcessor,
208 InheritedAttributeType rootInheritedValue,
216 std::cout <<
" >>> >>>>>>>>>>>>> starting traversal of nodeCounter (preTraversal)" << std::endl;
218 nodeCounter.traverse(root, rootInheritedValue);
220 funcDecls = nodeCounter.get_funcDecls();
221 initialInheritedValues = nodeCounter.get_initialInheritedValues();
222 std::vector<size_t> &nodeCounts = nodeCounter.get_nodeCounts();
223 std::vector<size_t> &funcWeights = nodeCounter.get_funcWeights();
224 ROSE_ASSERT(funcDecls.size() == initialInheritedValues.size());
225 ROSE_ASSERT(funcDecls.size() == nodeCounts.size());
226 ROSE_ASSERT(funcDecls.size() == funcWeights.size());
228 sortFunctions(funcDecls, initialInheritedValues, nodeCounts, funcWeights);
231 std::cout <<
" >>> nr of funcs: " << funcDecls.size() <<
" inheritedValues:" << initialInheritedValues.size()
232 <<
" nodeCounts: " << nodeCounts.size() << std::endl;
233 if (funcDecls.size()>0)
234 std::cout <<
" >>> example entry 0: funcs[0] " << funcDecls.back() <<
" inheritedValues[0]:"
235 << initialInheritedValues.back() <<
" nodeCounts[0]: " << nodeCounts.back() << std::endl;
236 std::cout <<
" >>> >>>>>>>>>>>>> done traversal of nodeCounter (preTraversal) \n\n\n" << std::endl;
239 std::vector<size_t>::const_iterator itr, itr2;
240 size_t totalNodes = 0;
241 double totalWeight = 0;
242 for (itr = nodeCounts.begin(), itr2 = funcWeights.begin(); itr != nodeCounts.end(); ++itr, ++itr2) {
244 double currentWeight = (((double)(*itr)*(*itr2))/(
double)funcDecls.size()/100);
245 totalWeight += currentWeight;
247 myNodeCounts = nodeCounts;
248 myFuncWeights = funcWeights;
250 if (DistributedMemoryAnalysisBase<InheritedAttributeType>::myID() == 0)
252 nrOfNodes = totalNodes;
255 std::cout <<
"ROOT - splitting functions: " << funcDecls.size() <<
", total nodes: " << totalNodes
256 <<
" totalWeight: " << totalWeight << std::endl;
269 double processorWeight[processes];
270 int nrOfFunctions[processes];
271 for (
int rank = 0; rank < processes; rank++) {
272 processorWeight[rank] = 0;
273 nrOfFunctions[rank]=0;
275 for (
unsigned int i=0; i< funcDecls.size(); i++) {
276 double currentWeight = (((double)nodeCounts[i]*funcWeights[i])/(double)funcDecls.size()/100);
278 int min_rank=start_rank;
280 for (
int rank = start_rank; rank < processes; rank++) {
281 if (processorWeight[rank]<min) {
282 min = processorWeight[rank];
286 processorWeight[min_rank]+=currentWeight;
287 functionToProcessor.push_back(min_rank);
288 nrOfFunctions[min_rank]+=1;
291 std::cout <<
" Function per Processor : " << funcDecls[i]->get_name().str() <<
" weight : " <<
292 currentWeight <<
"/" << totalWeight <<
" on proc: " << min_rank << std::endl;
295 for (
int rank = 0; rank < processes; rank++) {
297 std::cerr <<
" Processor : " << rank <<
" has " << nrOfFunctions[rank] <<
298 " functions. Processor weight: " << processorWeight[rank] << std::endl;
311 template <
class InheritedAttributeType,
class SynthesizedAttributeType>
313 DistributedMemoryTraversal<InheritedAttributeType, SynthesizedAttributeType>::
314 performAnalysis(
SgNode *root, InheritedAttributeType rootInheritedValue,
319 std::cout <<
" >>> >>>>>>>>>>>>> start computeFunctionIndeces" << std::endl;
322 std::pair<int, int> my_limits = computeFunctionIndices(root, rootInheritedValue, preTraversal);
324 std::cout <<
" >>> >>>>>>>>>>>>> done computeFunctionIndeces\n\n\n" << std::endl;
328 std::cout <<
" >>> >>>>>>>>>>>>> start serialize results" << std::endl;
331 std::vector<std::pair<int, void *> > serializedResults;
332 for (
int i = my_limits.first; i < my_limits.second; i++)
334 SynthesizedAttributeType result =
335 analyzeSubtree(DistributedMemoryAnalysisBase<InheritedAttributeType>::funcDecls[i],
336 DistributedMemoryAnalysisBase<InheritedAttributeType>::initialInheritedValues[i]);
337 serializedResults.push_back(serializeAttribute(result));
341 std::cout <<
" >>> >>>>>>>>>>>>> done serialize results\n" << std::endl;
348 std::cout <<
" >>> >>>>>>>>>>>>> start creating buffer" << std::endl;
352 size_t functions = DistributedMemoryAnalysisBase<InheritedAttributeType>::funcDecls.size();
353 int myFunctionsPerProcess = DistributedMemoryAnalysisBase<InheritedAttributeType>::functionsPerProcess[
354 DistributedMemoryAnalysisBase<InheritedAttributeType>::myID()];
355 int *myStateSizes =
new int[myFunctionsPerProcess];
357 for (
int i = 0; i < myFunctionsPerProcess; i++)
359 myStateSizes[i] = serializedResults[i].first;
360 myTotalSize += myStateSizes[i];
362 unsigned char *myBuffer =
new unsigned char[myTotalSize];
365 std::cout <<
" total buffer size = " << myTotalSize <<
" FunctionsPerProcess: " << myFunctionsPerProcess << std::endl;
368 for (
int i = 0; i < myFunctionsPerProcess; i++)
370 std::memcpy(myBuffer + sizeSoFar, serializedResults[i].second, serializedResults[i].first);
371 sizeSoFar += serializedResults[i].first;
374 std::cout <<
" buffer ["<<i<<
"] = " << ((
char*)serializedResults[i].second)
375 <<
" myStateSizes["<<i<<
"] = " << myStateSizes[i] << std::endl;
378 deleteSerializedAttribute(serializedResults[i]);
381 std::cout <<
" >>> >>>>>>>>>>>>> end creating buffer\n" << std::endl;
387 std::cout <<
" >>> >>>>>>>>>>>>> start communication" << std::endl;
390 int *stateSizes = NULL;
391 int *displacements = NULL;
392 if (
ALLGATHER_MPI || DistributedMemoryAnalysisBase<InheritedAttributeType>::myID() == 0) {
393 displacements =
new int[DistributedMemoryAnalysisBase<InheritedAttributeType>::numberOfProcesses()];
394 displacements[0] = 0;
395 for (
int i = 1; i < DistributedMemoryAnalysisBase<InheritedAttributeType>::numberOfProcesses(); i++)
398 displacements[i] = displacements[i-1] + DistributedMemoryAnalysisBase<InheritedAttributeType>::functionsPerProcess[i-1];
400 std::cout <<
" displacements["<<i<<
"] = " << displacements[i] <<
" == functionsPerProcess[i] " << std::endl;
403 stateSizes =
new int[functions];
407 MPI_Allgatherv(myStateSizes, myFunctionsPerProcess, MPI_INT,
408 stateSizes, &DistributedMemoryAnalysisBase<InheritedAttributeType>::functionsPerProcess[0],
409 displacements, MPI_INT, MPI_COMM_WORLD);
411 MPI_Gatherv(myStateSizes, myFunctionsPerProcess, MPI_INT,
412 stateSizes, &DistributedMemoryAnalysisBase<InheritedAttributeType>::functionsPerProcess[0],
413 displacements, MPI_INT, 0, MPI_COMM_WORLD);
417 if (
ALLGATHER_MPI || DistributedMemoryAnalysisBase<InheritedAttributeType>::myID() == 0) {
418 std::cout <<
" MPI_Allgatherv: " << functions << std::endl;
419 for (
unsigned int k=0; k<functions;k++) {
420 std::cout <<
" stateSize["<<k<<
"] = " << stateSizes[k] << std::endl;
428 int *totalStateSizes = NULL;
429 unsigned char* recvbuf = NULL;
430 if (
ALLGATHER_MPI || DistributedMemoryAnalysisBase<InheritedAttributeType>::myID() == 0) {
431 totalStateSizes =
new int[DistributedMemoryAnalysisBase<InheritedAttributeType>::numberOfProcesses()];
433 for (
int i = 0; i < DistributedMemoryAnalysisBase<InheritedAttributeType>::numberOfProcesses(); i++)
435 displacements[i] = totalSize;
436 totalStateSizes[i] = 0;
437 int j_lim = j + DistributedMemoryAnalysisBase<InheritedAttributeType>::functionsPerProcess[i];
439 totalStateSizes[i] += stateSizes[j++];
440 totalSize += totalStateSizes[i];
442 recvbuf =
new unsigned char[totalSize];
448 MPI_Allgatherv(myBuffer, myTotalSize, MPI_UNSIGNED_CHAR,
449 recvbuf, totalStateSizes, displacements, MPI_UNSIGNED_CHAR,
452 MPI_Gatherv(myBuffer, myTotalSize, MPI_UNSIGNED_CHAR,
453 recvbuf, totalStateSizes, displacements, MPI_UNSIGNED_CHAR,
458 std::cout <<
" >>> >>>>>>>>>>>>> end communication\n\n" << std::endl;
463 std::cout <<
" >>> >>>>>>>>>>>>> start deserialize" << std::endl;
465 if (
ALLGATHER_MPI || DistributedMemoryAnalysisBase<InheritedAttributeType>::myID() == 0) {
467 functionResults.clear();
470 for (
int i = 0; i < DistributedMemoryAnalysisBase<InheritedAttributeType>::numberOfProcesses(); i++)
472 int j_lim = j + DistributedMemoryAnalysisBase<InheritedAttributeType>::functionsPerProcess[i];
476 std::pair<int, void *> serializedAttribute
477 = std::make_pair(stateSizes[j], recvbuf + displacements[i] + sizeSoFar);
478 SynthesizedAttributeType attribute = deserializeAttribute(serializedAttribute);
479 functionResults.push_back(attribute);
480 sizeSoFar += stateSizes[j];
487 std::cout <<
" >>> >>>>>>>>>>>>> end deserialize\n" << std::endl;
491 std::cout <<
" >>> >>>>>>>>>>>>> start postTraversal" << std::endl;
496 finalResults = postT.traverse(root,
false);
499 std::cout <<
" >>> >>>>>>>>>>>>> end postTraversal\n" << std::endl;
504 delete[] totalStateSizes;
505 delete[] displacements;
508 delete[] myStateSizes;
519 template <
class InheritedAttributeType>
520 InheritedAttributeType
548 std::cout <<
" >>>> found function : " << funcname << std::endl;
554 if (funcname.find(
"__")!=std::string::npos) {
559 std::cout <<
" >>>> found function : " << funcname << std::endl;
564 return inheritedValue;
577 funcDecls.push_back(funcDecl);
578 initialInheritedValues.push_back(inheritedValue);
581 std::cout <<
" >>>> found defining function declaration: " << funcname
582 <<
" inheritedValue: " << inheritedValue << std::endl;
593 std::cout <<
" . outside function : " << node->
class_name() << std::endl;
595 if (preTraversal != NULL)
598 return inheritedValue;
603 std::cerr <<
" inside function: " << node->
class_name() <<
" nodeCount =" << nodeCount
604 <<
" depth=" << inheritedValue << std::endl;
614 weightAssignOp+=(weightAssignOp)/5;
625 return inheritedValue;
629 template <
class InheritedAttributeType>
642 if (!inFunc && preTraversal != NULL)
658 nodeCounts.push_back(nodeCount);
662 funcWeights.push_back(weightAssignOp);
667 std::cout <<
" destroying - save nodes " << nodeCount << std::endl;
683 template <
class SynthesizedAttributeType>
692 std::string funcname=
"";
695 if (funcname.find(
"__")!=std::string::npos)
704 std::cout <<
" >>> postTraversal - inheritedAttribute: found function: " << funcname << std::endl;
708 std::cout <<
" postTraversal - inheritedAttribute on node: " << node->
class_name() <<
709 " parentEval (inFunction?): " << inFunction << std::endl;
732 template <
class SynthesizedAttributeType>
733 SynthesizedAttributeType
746 std::string funcname=
"none";
749 if (funcname.find(
"__")!=std::string::npos) {
754 std::cout <<
" .. post synthesizedAttribute function: " << funcname <<
" id-nr:" << functionCounter << std::endl;
759 return defaultSynthesizedAttribute(inFunction);
764 std::cout <<
" post in (NODE): >> " << node->
class_name() <<
" currentNode (inFunction?) " << inFunction << std::endl;
773 return functionResults[functionCounter++];
778 if (!synAttrs.empty()) {
779 SynthesizedAttributeType attr = synAttrs.front();
782 return defaultSynthesizedAttribute(inFunction);
787 std::cout <<
" ... EVALUATE synthesized Attribute " << std::endl;