VPR-7.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
cluster.c
Go to the documentation of this file.
1 /*
2  * Main clustering algorithm
3  * Author(s): Vaughn Betz (first revision - VPack), Alexander Marquardt (second revision - T-VPack), Jason Luu (third revision - AAPack)
4  * June 8, 2011
5  */
6 
7 #include <stdio.h>
8 #include <assert.h>
9 #include <string.h>
10 #include <stdlib.h>
11 #include <map>
12 
13 #include "util.h"
14 #include "vpr_types.h"
15 #include "globals.h"
16 #include "cluster.h"
17 #include "heapsort.h"
18 #include "output_clustering.h"
19 #include "output_blif.h"
20 #include "SetupGrid.h"
21 #include "read_xml_arch_file.h"
22 #include "cluster_legality.h"
23 #include "path_delay2.h"
24 #include "path_delay.h"
25 #include "vpr_utils.h"
26 #include "cluster_placement.h"
27 #include "ReadOptions.h"
28 
29 /*#define DEBUG_FAILED_PACKING_CANDIDATES*/
30 
31 #define AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC 2 /* Maximum relative number of pins that can exceed input pins before giving up */
32 #define AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST 5 /* Maximum constant number of pins that can exceed input pins before giving up */
33 
34 #define AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE 30 /* This value is used to determine the max size of the priority queue for candidates that pass the early filter legality test but not the more detailed routing test */
35 #define AAPACK_MAX_NET_SINKS_IGNORE 256 /* The packer looks at all sinks of a net when deciding what next candidate block to pack, for high-fanout nets, this is too runtime costly for marginal benefit, thus ignore those high fanout nets */
36 #define AAPACK_MAX_HIGH_FANOUT_EXPLORE 10 /* For high-fanout nets that are ignored, consider a maximum of this many nets */
37 
38 #define SCALE_NUM_PATHS 1e-2 /*this value is used as a multiplier to assign a *
39  *slightly higher criticality value to nets that *
40  *affect a large number of critical paths versus *
41  *nets that do not have such a broad effect. *
42  *Note that this multiplier is intentionally very *
43  *small compared to the total criticality because *
44  *we want to make sure that vpack_net criticality is *
45  *primarily determined by slacks, with this acting *
46  *only as a tie-breaker between otherwise equal nets*/
47 #define SCALE_DISTANCE_VAL 1e-4 /*this value is used as a multiplier to assign a *
48  *slightly higher criticality value to nets that *
49  *are otherwise equal but one is farther *
50  *from sources (the farther one being made slightly *
51  *more critical) */
52 
55 };
58 };
61 };
64 };
65 /* TODO: REMOVE_CLUSTERED no longer used, remove */
68 };
69 
72 };
73 
74 
75 /* Linked list structure. Stores one integer (iblk). */
79 };
80 
81 /* 1/MARKED_FRAC is the fraction of nets or blocks that must be *
82  * marked in order for the brute force (go through the whole *
83  * data structure linearly) gain update etc. code to be used. *
84  * This is done for speed only; make MARKED_FRAC whatever *
85  * number speeds the code up most. */
86 #define MARKED_FRAC 2
87 
88 /* Keeps a linked list of the unclustered blocks to speed up looking for *
89  * unclustered blocks with a certain number of *external* inputs. *
90  * [0..lut_size]. Unclustered_list_head[i] points to the head of the *
91  * list of blocks with i inputs to be hooked up via external interconnect. */
94 static struct s_molecule_link *memory_pool; /*Declared here so I can free easily.*/
95 
96 /* Does the logical_block that drives the output of this vpack_net also appear as a *
97  * receiver (input) pin of the vpack_net? [0..num_logical_nets-1]. If so, then by how much? This is used *
98  * in the gain routines to avoid double counting the connections from *
99  * the current cluster to other blocks (hence yielding better *
100  * clusterings). The only time a logical_block should connect to the same vpack_net *
101  * twice is when one connection is an output and the other is an input, *
102  * so this should take care of all multiple connections. */
104 
105 /* Timing information for blocks */
106 
107 static float *block_criticality = NULL;
108 static int *critindexarray = NULL;
109 
110 /*****************************************/
111 /*local functions*/
112 /*****************************************/
113 
114 static void check_clocks(boolean *is_clock);
115 
116 #if 0
117 static void check_for_duplicate_inputs ();
118 #endif
119 
120 static boolean is_logical_blk_in_pb(int iblk, t_pb *pb);
121 
123  std::map<int, float> &gain, t_pb *pb);
124 
125 static void alloc_and_init_clustering(boolean global_clocks, float alpha,
126  float beta, int max_cluster_size, int max_molecule_inputs,
127  int max_pb_depth, int max_models,
128  t_cluster_placement_stats **cluster_placement_stats,
129  t_pb_graph_node ***primitives_list, t_pack_molecule *molecules_head,
130  int num_molecules);
131 static void free_pb_stats_recursive(t_pb *pb);
132 static void try_update_lookahead_pins_used(t_pb *cur_pb);
133 static void reset_lookahead_pins_used(t_pb *cur_pb);
134 static void compute_and_mark_lookahead_pins_used(int ilogical_block);
136  t_pb_graph_pin *pb_graph_pin, t_pb *primitive_pb, int inet);
137 static void commit_lookahead_pins_used(t_pb *cur_pb);
138 static boolean check_lookahead_pins_used(t_pb *cur_pb);
139 static boolean primitive_feasible(int iblk, t_pb *cur_pb);
140 static boolean primitive_type_and_memory_feasible(int iblk,
141  const t_pb_type *cur_pb_type, t_pb *memory_class_pb,
142  int sibling_memory_blk);
143 
145  INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb,
146  INP int ext_inps, INP enum e_removal_policy remove_flag,
147  INP t_cluster_placement_stats *cluster_placement_stats_ptr);
148 
150  INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb,
151  INP t_cluster_placement_stats *cluster_placement_stats_ptr);
152 
154  int max_molecule_inputs);
155 
157  INOUTP t_cluster_placement_stats *cluster_placement_stats_ptr,
158  INP t_pack_molecule *molecule, INOUTP t_pb_graph_node **primitives_list,
159  INOUTP t_pb * pb, INP int max_models, INP int max_cluster_size,
160  INP int clb_index, INP int max_nets_in_pb_type, INP int detailed_routing_stage);
162  INP t_pb_graph_node *pb_graph_node, INP int ilogical_block,
163  INP t_pb *cb, OUTP t_pb **parent, INP int max_models,
164  INP int max_cluster_size, INP int clb_index,
165  INP int max_nets_in_pb_type,
166  INP t_cluster_placement_stats *cluster_placement_stats_ptr,
167  INP boolean is_root_of_chain, INP t_pb_graph_pin *chain_root_pin);
168 static void revert_place_logical_block(INP int ilogical_block,
169  INP int max_models);
170 
171 static void update_connection_gain_values(int inet, int clustered_block,
172  t_pb * cur_pb,
173  enum e_net_relation_to_clustered_block net_relation_to_clustered_block);
174 
175 static void update_timing_gain_values(int inet, int clustered_block,
176  t_pb* cur_pb,
177  enum e_net_relation_to_clustered_block net_relation_to_clustered_block,
178  t_slack * slacks);
179 
180 static void mark_and_update_partial_gain(int inet, enum e_gain_update gain_flag,
181  int clustered_block, int port_on_clustered_block,
182  int pin_on_clustered_block, boolean timing_driven,
183  boolean connection_driven,
184  enum e_net_relation_to_clustered_block net_relation_to_clustered_block,
185  t_slack * slacks);
186 
187 static void update_total_gain(float alpha, float beta, boolean timing_driven,
188  boolean connection_driven, boolean global_clocks, t_pb *pb);
189 
190 static void update_cluster_stats( INP t_pack_molecule *molecule,
191  INP int clb_index, INP boolean *is_clock, INP boolean global_clocks,
192  INP float alpha, INP float beta, INP boolean timing_driven,
193  INP boolean connection_driven, INP t_slack * slacks);
194 
195 static void start_new_cluster(
196  INP t_cluster_placement_stats *cluster_placement_stats,
197  INP t_pb_graph_node **primitives_list, INP const t_arch * arch,
198  INOUTP t_block *new_cluster, INP int clb_index,
199  INP t_pack_molecule *molecule, INP float aspect,
200  INOUTP int *num_used_instances_type, INOUTP int *num_instances_type,
201  INP int num_models, INP int max_cluster_size,
202  INP int max_nets_in_pb_type, INP int detailed_routing_stage);
203 
205  INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb,
206  INP enum e_gain_type gain_mode,
207  INP t_cluster_placement_stats *cluster_placement_stats_ptr);
208 
210  INP enum e_packer_algorithm packer_algorithm, INP t_pb *cur_pb,
211  INP boolean allow_unrelated_clustering,
212  INOUTP int *num_unrelated_clustering_attempts,
213  INP t_cluster_placement_stats *cluster_placement_stats_ptr);
214 
215 static void alloc_and_load_cluster_info(INP int num_clb, INOUTP t_block *clb);
216 
217 static void check_clustering(int num_clb, t_block *clb, boolean *is_clock);
218 
219 static void check_cluster_logical_blocks(t_pb *pb, boolean *blocks_checked);
220 
221 static t_pack_molecule* get_most_critical_seed_molecule(int * indexofcrit);
222 
223 static float get_molecule_gain(t_pack_molecule *molecule, std::map<int, float> &blk_gain);
224 static int compare_molecule_gain(const void *a, const void *b);
226  t_pb_graph_pin *pb_graph_pin);
227 
228 static void print_block_criticalities(const char * fname);
229 
230 /*****************************************/
231 /*globally accessable function*/
232 void do_clustering(const t_arch *arch, t_pack_molecule *molecule_head,
233  int num_models, boolean global_clocks, boolean *is_clock,
234  boolean hill_climbing_flag, char *out_fname, boolean timing_driven,
235  enum e_cluster_seed cluster_seed_type, float alpha, float beta,
236  int recompute_timing_after, float block_delay,
237  float intra_cluster_net_delay, float inter_cluster_net_delay,
238  float aspect, boolean allow_unrelated_clustering,
239  boolean allow_early_exit, boolean connection_driven,
240  enum e_packer_algorithm packer_algorithm, t_timing_inf timing_inf) {
241 
242  /* Does the actual work of clustering multiple netlist blocks *
243  * into clusters. */
244 
245  /* Algorithm employed
246  1. Find type that can legally hold block and create cluster with pb info
247  2. Populate started cluster
248  3. Repeat 1 until no more blocks need to be clustered
249 
250  */
251 
252  int i, iblk, num_molecules, blocks_since_last_analysis, num_clb, max_nets_in_pb_type,
253  cur_nets_in_pb_type, num_blocks_hill_added, max_cluster_size, cur_cluster_size,
254  max_molecule_inputs, max_pb_depth, cur_pb_depth, num_unrelated_clustering_attempts,
255  indexofcrit, savedindexofcrit /* index of next most timing critical block */,
256  detailed_routing_stage, *hill_climbing_inputs_avail;
257 
258  int *num_used_instances_type, *num_instances_type;
259  /* [0..num_types] Holds array for total number of each cluster_type available */
260 
261  boolean early_exit, is_cluster_legal;
262  enum e_block_pack_status block_pack_status;
263  float crit;
264 
265  t_cluster_placement_stats *cluster_placement_stats, *cur_cluster_placement_stats_ptr;
266  t_pb_graph_node **primitives_list;
267  t_block *clb;
268  t_slack * slacks = NULL;
269  t_pack_molecule *istart, *next_molecule, *prev_molecule, *cur_molecule;
270 
271 #ifdef PATH_COUNTING
272  int inet, ipin;
273 #else
274  int inode;
275  float num_paths_scaling, distance_scaling;
276 #endif
277 
278  /* TODO: This is memory inefficient, fix if causes problems */
279  clb = (t_block*)my_calloc(num_logical_blocks, sizeof(t_block));
280  num_clb = 0;
281  istart = NULL;
282 
283  /* determine bound on cluster size and primitive input size */
284  max_cluster_size = 0;
285  max_molecule_inputs = 0;
286  max_pb_depth = 0;
287  max_nets_in_pb_type = 0;
288 
289  indexofcrit = 0;
290 
291  cur_molecule = molecule_head;
292  num_molecules = 0;
293  while (cur_molecule != NULL) {
294  cur_molecule->valid = TRUE;
295  if (cur_molecule->num_ext_inputs > max_molecule_inputs) {
296  max_molecule_inputs = cur_molecule->num_ext_inputs;
297  }
298  num_molecules++;
299  cur_molecule = cur_molecule->next;
300  }
301 
302  for (i = 0; i < num_types; i++) {
303  if (EMPTY_TYPE == &type_descriptors[i])
304  continue;
305  cur_cluster_size = get_max_primitives_in_pb_type(
306  type_descriptors[i].pb_type);
307  cur_pb_depth = get_max_depth_of_pb_type(type_descriptors[i].pb_type);
308  cur_nets_in_pb_type = get_max_nets_in_pb_type(
309  type_descriptors[i].pb_type);
310  if (cur_cluster_size > max_cluster_size) {
311  max_cluster_size = cur_cluster_size;
312  }
313  if (cur_pb_depth > max_pb_depth) {
314  max_pb_depth = cur_pb_depth;
315  }
316  if (cur_nets_in_pb_type > max_nets_in_pb_type) {
317  max_nets_in_pb_type = cur_nets_in_pb_type;
318  }
319  }
320 
321  if (hill_climbing_flag) {
322  hill_climbing_inputs_avail = (int *) my_calloc(max_cluster_size + 1,
323  sizeof(int));
324  } else {
325  hill_climbing_inputs_avail = NULL; /* if used, die hard */
326  }
327 
328  /* TODO: make better estimate for nx and ny */
329  nx = ny = 1;
330 
331  check_clocks(is_clock);
332 #if 0
333  check_for_duplicate_inputs ();
334 #endif
335  alloc_and_init_clustering(global_clocks, alpha, beta, max_cluster_size,
336  max_molecule_inputs, max_pb_depth, num_models,
337  &cluster_placement_stats, &primitives_list, molecule_head,
338  num_molecules);
339 
340  blocks_since_last_analysis = 0;
341  early_exit = FALSE;
342  num_blocks_hill_added = 0;
343  num_used_instances_type = (int*) my_calloc(num_types, sizeof(int));
344  num_instances_type = (int*) my_calloc(num_types, sizeof(int));
345 
346  assert(max_cluster_size < MAX_SHORT);
347  /* Limit maximum number of elements for each cluster */
348 
349  if (timing_driven) {
350  slacks = alloc_and_load_pre_packing_timing_graph(block_delay,
351  inter_cluster_net_delay, arch->models, timing_inf);
352  do_timing_analysis(slacks, TRUE, FALSE, FALSE);
353 
354  if (getEchoEnabled()) {
357 #ifndef PATH_COUNTING
360 #endif
365  }
366 
367  block_criticality = (float*) my_calloc(num_logical_blocks, sizeof(float));
368 
369  critindexarray = (int*) my_malloc(num_logical_blocks * sizeof(int));
370 
371  for (i = 0; i < num_logical_blocks; i++) {
372  assert(logical_block[i].index == i);
373  critindexarray[i] = i;
374  }
375 
376 #ifdef PATH_COUNTING
377  /* Calculate block criticality from a weighted sum of timing and path criticalities. */
378  for (inet = 0; inet < num_logical_nets; inet++) {
379  for (ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++) {
380 
381  /* Find the logical block iblk which this pin is a sink on. */
382  iblk = vpack_net[inet].node_block[ipin];
383 
384  /* The criticality of this pin is a sum of its timing and path criticalities. */
385  crit = PACK_PATH_WEIGHT * slacks->path_criticality[inet][ipin]
386  + (1 - PACK_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin];
387 
388  /* The criticality of each block is the maximum of the criticalities of all its pins. */
389  if (block_criticality[iblk] < crit) {
390  block_criticality[iblk] = crit;
391  }
392  }
393  }
394 
395 #else
396  /* Calculate criticality based on slacks and tie breakers (# paths, distance from source) */
397  for (inode = 0; inode < num_tnodes; inode++) {
398  /* Only calculate for tnodes which have valid normalized values.
399  Either all values will be accurate or none will, so we only have
400  to check whether one particular value (normalized_T_arr) is valid
401  Tnodes that do not have both times valid were not part of the analysis.
402  Because we calloc-ed the array criticality, such nodes will have criticality 0, the lowest possible value. */
403  if (has_valid_normalized_T_arr(inode)) {
404  iblk = tnode[inode].block;
405  num_paths_scaling = SCALE_NUM_PATHS
406  * (float) tnode[inode].prepacked_data->normalized_total_critical_paths;
407  distance_scaling = SCALE_DISTANCE_VAL
408  * (float) tnode[inode].prepacked_data->normalized_T_arr;
409  crit = (1 - tnode[inode].prepacked_data->normalized_slack) + num_paths_scaling
410  + distance_scaling;
411  if (block_criticality[iblk] < crit) {
412  block_criticality[iblk] = crit;
413  }
414  }
415  }
416 #endif
417  heapsort(critindexarray, block_criticality, num_logical_blocks, 1);
418 
421  }
422 
423 
424  if (cluster_seed_type == VPACK_TIMING) {
425  istart = get_most_critical_seed_molecule(&indexofcrit);
426  } else {/*max input seed*/
428  max_molecule_inputs);
429  }
430 
431  } else /*cluster seed is max input (since there is no timing information)*/ {
433  max_molecule_inputs);
434  }
435 
436 
437  while (istart != NULL) {
438  is_cluster_legal = FALSE;
439  savedindexofcrit = indexofcrit;
440  for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_END; detailed_routing_stage++) {
441  reset_legalizer_for_cluster(&clb[num_clb]);
442 
443  /* start a new cluster and reset all stats */
444  start_new_cluster(cluster_placement_stats, primitives_list, arch,
445  &clb[num_clb], num_clb, istart, aspect, num_used_instances_type,
446  num_instances_type, num_models, max_cluster_size,
447  max_nets_in_pb_type, detailed_routing_stage);
448  vpr_printf(TIO_MESSAGE_INFO, "Complex block %d: %s, type: %s\n",
449  num_clb, clb[num_clb].name, clb[num_clb].type->name);
450  vpr_printf(TIO_MESSAGE_INFO, "\t");
451  fflush(stdout);
452  update_cluster_stats(istart, num_clb, is_clock, global_clocks, alpha,
453  beta, timing_driven, connection_driven, slacks);
454  num_clb++;
455 
456  if (timing_driven && !early_exit) {
457  blocks_since_last_analysis++;
458  /*it doesn't make sense to do a timing analysis here since there*
459  *is only one logical_block clustered it would not change anything */
460  }
461  cur_cluster_placement_stats_ptr = &cluster_placement_stats[clb[num_clb
462  - 1].type->index];
463  num_unrelated_clustering_attempts = 0;
465  clb[num_clb - 1].pb, allow_unrelated_clustering,
466  &num_unrelated_clustering_attempts,
467  cur_cluster_placement_stats_ptr);
468  prev_molecule = istart;
469  while (next_molecule != NULL && prev_molecule != next_molecule) {
470  block_pack_status = try_pack_molecule(
471  cur_cluster_placement_stats_ptr, next_molecule,
472  primitives_list, clb[num_clb - 1].pb, num_models,
473  max_cluster_size, num_clb - 1, max_nets_in_pb_type, detailed_routing_stage);
474  prev_molecule = next_molecule;
475  if (block_pack_status != BLK_PASSED) {
476  if (next_molecule != NULL) {
477  if (block_pack_status == BLK_FAILED_ROUTE) {
478 #ifdef DEBUG_FAILED_PACKING_CANDIDATES
479  vpr_printf(TIO_MESSAGE_DIRECT, "\tNO_ROUTE:%s type %s/n",
480  next_molecule->logical_block_ptrs[next_molecule->root]->name,
481  next_molecule->logical_block_ptrs[next_molecule->root]->model->name);
482  fflush(stdout);
483  #else
484  vpr_printf(TIO_MESSAGE_DIRECT, ".");
485  #endif
486  } else {
487  #ifdef DEBUG_FAILED_PACKING_CANDIDATES
488  vpr_printf(TIO_MESSAGE_DIRECT, "\tFAILED_CHECK:%s type %s check %d\n",
489  next_molecule->logical_block_ptrs[next_molecule->root]->name,
490  next_molecule->logical_block_ptrs[next_molecule->root]->model->name,
491  block_pack_status);
492  fflush(stdout);
493  #else
494  vpr_printf(TIO_MESSAGE_DIRECT, ".");
495  #endif
496  }
497  }
498 
500  clb[num_clb - 1].pb, allow_unrelated_clustering,
501  &num_unrelated_clustering_attempts,
502  cur_cluster_placement_stats_ptr);
503  continue;
504  } else {
505  /* Continue packing by filling smallest cluster */
506  #ifdef DEBUG_FAILED_PACKING_CANDIDATES
507  vpr_printf(TIO_MESSAGE_DIRECT, "\tPASSED:%s type %s\n",
508  next_molecule->logical_block_ptrs[next_molecule->root]->name,
509  next_molecule->logical_block_ptrs[next_molecule->root]->model->name);
510  fflush(stdout);
511  #else
512  vpr_printf(TIO_MESSAGE_DIRECT, ".");
513  #endif
514  }
515  update_cluster_stats(next_molecule, num_clb - 1, is_clock,
516  global_clocks, alpha, beta, timing_driven,
517  connection_driven, slacks);
518  num_unrelated_clustering_attempts = 0;
519 
520  if (timing_driven && !early_exit) {
521  blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */
522  }
524  clb[num_clb - 1].pb, allow_unrelated_clustering,
525  &num_unrelated_clustering_attempts,
526  cur_cluster_placement_stats_ptr);
527  }
528  vpr_printf(TIO_MESSAGE_DIRECT, "\n");
529  if (detailed_routing_stage == (int)E_DETAILED_ROUTE_AT_END_ONLY) {
530  is_cluster_legal = try_breadth_first_route_cluster();
531  if (is_cluster_legal == TRUE) {
532  vpr_printf(TIO_MESSAGE_INFO, "Passed route at end.\n");
533  } else {
534  vpr_printf(TIO_MESSAGE_INFO, "Failed route at end, repack cluster trying detailed routing at each stage.\n");
535  }
536  } else {
537  is_cluster_legal = TRUE;
538  }
539  if (is_cluster_legal == TRUE) {
541  if (timing_driven) {
542  if (num_blocks_hill_added > 0 && !early_exit) {
543  blocks_since_last_analysis += num_blocks_hill_added;
544  }
545  if (cluster_seed_type == VPACK_TIMING) {
546  istart = get_most_critical_seed_molecule(&indexofcrit);
547  } else { /*max input seed*/
549  max_molecule_inputs);
550  }
551  } else
552  /*cluster seed is max input (since there is no timing information)*/
554  max_molecule_inputs);
555 
556  free_pb_stats_recursive(clb[num_clb - 1].pb);
557  } else {
558  /* Free up data structures and requeue used molecules */
559  num_used_instances_type[clb[num_clb - 1].type->index]--;
560  free_cb(clb[num_clb - 1].pb);
561  free(clb[num_clb - 1].pb);
562  free(clb[num_clb - 1].name);
563  clb[num_clb - 1].name = NULL;
564  clb[num_clb - 1].pb = NULL;
565  num_clb--;
566  indexofcrit = savedindexofcrit;
567  }
568  }
569  }
570 
572 
573  alloc_and_load_cluster_info(num_clb, clb);
574 
575  check_clustering(num_clb, clb, is_clock);
576 
577  output_clustering(clb, num_clb, global_clocks, is_clock, out_fname, FALSE);
579  output_blif (clb, num_clb, global_clocks, is_clock,
581  }
582 
583  if (hill_climbing_flag) {
584  free(hill_climbing_inputs_avail);
585  }
586  free_cluster_placement_stats(cluster_placement_stats);
587 
588  for (i = 0; i < num_clb; i++) {
589  free_cb(clb[i].pb);
590  free(clb[i].name);
591  free(clb[i].nets);
592  free(clb[i].pb);
593  }
594  free(clb);
595 
596  free(num_used_instances_type);
597  free(num_instances_type);
598  free(unclustered_list_head);
599  free(memory_pool);
601 
602  if (timing_driven) {
603  free(block_criticality);
604  free(critindexarray);
605 
606  block_criticality = NULL;
607  critindexarray = NULL;
608  }
609 
610  if (timing_driven) {
611  free_timing_graph(slacks);
612  }
613 
614  free (primitives_list);
615 
616 }
617 
618 /*****************************************/
619 static void check_clocks(boolean *is_clock) {
620 
621  /* Checks that nets used as clock inputs to latches are never also used *
622  * as VPACK_LUT inputs. It's electrically questionable, and more importantly *
623  * would break the clustering code. */
624 
625  int inet, iblk, ipin;
626  t_model_ports *port;
627 
628  for (iblk = 0; iblk < num_logical_blocks; iblk++) {
629  if (logical_block[iblk].type != VPACK_OUTPAD) {
630  port = logical_block[iblk].model->inputs;
631  while (port) {
632  for (ipin = 0; ipin < port->size; ipin++) {
633  inet = logical_block[iblk].input_nets[port->index][ipin];
634  if (inet != OPEN) {
635  if (is_clock[inet]) {
636  vpr_printf(TIO_MESSAGE_ERROR, "Error in check_clocks.\n");
637  vpr_printf(TIO_MESSAGE_ERROR, "Net %d (%s) is a clock, but also connects to a logic block input on logical_block %d (%s).\n",
638  inet, vpack_net[inet].name, iblk, logical_block[iblk].name);
639  vpr_printf(TIO_MESSAGE_ERROR, "This would break the current clustering implementation and is electrically questionable, so clustering has been aborted.\n");
640  exit(1);
641  }
642  }
643  }
644  port = port->next;
645  }
646  }
647  }
648 }
649 
650 /* Determine if logical block is in pb */
651 static boolean is_logical_blk_in_pb(int iblk, t_pb *pb) {
652  t_pb * cur_pb;
653  cur_pb = logical_block[iblk].pb;
654  while (cur_pb) {
655  if (cur_pb == pb) {
656  return TRUE;
657  }
658  cur_pb = cur_pb->parent_pb;
659  }
660  return FALSE;
661 }
662 
663 /* Add blk to list of feasible blocks sorted according to gain */
665  std::map<int, float> &gain, t_pb *pb) {
666  int i, j;
667 
668  for (i = 0; i < pb->pb_stats->num_feasible_blocks; i++) {
669  if (pb->pb_stats->feasible_blocks[i] == molecule) {
670  return; /* already in queue, do nothing */
671  }
672  }
673 
676  /* maximum size for array, remove smallest gain element and sort */
677  if (get_molecule_gain(molecule, gain)
678  > get_molecule_gain(pb->pb_stats->feasible_blocks[0], gain)) {
679  /* single loop insertion sort */
680  for (j = 0; j < pb->pb_stats->num_feasible_blocks - 1; j++) {
681  if (get_molecule_gain(molecule, gain)
683  pb->pb_stats->feasible_blocks[j + 1], gain)) {
684  pb->pb_stats->feasible_blocks[j] = molecule;
685  break;
686  } else {
687  pb->pb_stats->feasible_blocks[j] =
688  pb->pb_stats->feasible_blocks[j + 1];
689  }
690  }
691  if (j == pb->pb_stats->num_feasible_blocks - 1) {
692  pb->pb_stats->feasible_blocks[j] = molecule;
693  }
694  }
695  } else {
696  /* Expand array and single loop insertion sort */
697  for (j = pb->pb_stats->num_feasible_blocks - 1; j >= 0; j--) {
698  if (get_molecule_gain(pb->pb_stats->feasible_blocks[j], gain)
699  > get_molecule_gain(molecule, gain)) {
700  pb->pb_stats->feasible_blocks[j + 1] =
701  pb->pb_stats->feasible_blocks[j];
702  } else {
703  pb->pb_stats->feasible_blocks[j + 1] = molecule;
704  break;
705  }
706  }
707  if (j < 0) {
708  pb->pb_stats->feasible_blocks[0] = molecule;
709  }
711  }
712 }
713 
714 /*****************************************/
715 static void alloc_and_init_clustering(boolean global_clocks, float alpha,
716  float beta, int max_cluster_size, int max_molecule_inputs,
717  int max_pb_depth, int max_models,
718  t_cluster_placement_stats **cluster_placement_stats,
719  t_pb_graph_node ***primitives_list, t_pack_molecule *molecules_head,
720  int num_molecules) {
721 
722  /* Allocates the main data structures used for clustering and properly *
723  * initializes them. */
724 
725  int i, ext_inps, ipin, driving_blk, inet;
726  struct s_molecule_link *next_ptr;
727  t_pack_molecule *cur_molecule;
728  t_pack_molecule **molecule_array;
729  int max_molecule_size;
730 
732  /**cluster_placement_stats = alloc_and_load_cluster_placement_stats();*/
733 
734  for (i = 0; i < num_logical_blocks; i++) {
736  }
737 
738  /* alloc and load list of molecules to pack */
739  unclustered_list_head = (struct s_molecule_link *) my_calloc(
740  max_molecule_inputs + 1, sizeof(struct s_molecule_link));
741  unclustered_list_head_size = max_molecule_inputs + 1;
742 
743  for (i = 0; i <= max_molecule_inputs; i++) {
744  unclustered_list_head[i].next = NULL;
745  }
746 
747  molecule_array = (t_pack_molecule **) my_malloc(
748  num_molecules * sizeof(t_pack_molecule*));
749  cur_molecule = molecules_head;
750  for (i = 0; i < num_molecules; i++) {
751  assert(cur_molecule != NULL);
752  molecule_array[i] = cur_molecule;
753  cur_molecule = cur_molecule->next;
754  }
755  assert(cur_molecule == NULL);
756  qsort((void*) molecule_array, num_molecules, sizeof(t_pack_molecule*),
758 
759  memory_pool = (struct s_molecule_link *) my_malloc(
760  num_molecules * sizeof(struct s_molecule_link));
761  next_ptr = memory_pool;
762 
763  for (i = 0; i < num_molecules; i++) {
764  ext_inps = molecule_array[i]->num_ext_inputs;
765  next_ptr->moleculeptr = molecule_array[i];
766  next_ptr->next = unclustered_list_head[ext_inps].next;
767  unclustered_list_head[ext_inps].next = next_ptr;
768  next_ptr++;
769  }
770  free(molecule_array);
771 
772  /* alloc and load net info */
774  num_logical_nets * sizeof(int));
775 
776  for (inet = 0; inet < num_logical_nets; inet++) {
778  driving_blk = vpack_net[inet].node_block[0];
779  for (ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++) {
780  if (vpack_net[inet].node_block[ipin] == driving_blk) {
782  }
783  }
784  }
785 
786  /* alloc and load cluster placement info */
787  *cluster_placement_stats = alloc_and_load_cluster_placement_stats();
788 
789  /* alloc array that will store primitives that a molecule gets placed to,
790  primitive_list is referenced by index, for example a logical block in index 2 of a molecule matches to a primitive in index 2 in primitive_list
791  this array must be the size of the biggest molecule
792  */
793  max_molecule_size = 1;
794  cur_molecule = molecules_head;
795  while (cur_molecule != NULL) {
796  if (cur_molecule->num_blocks > max_molecule_size) {
797  max_molecule_size = cur_molecule->num_blocks;
798  }
799  cur_molecule = cur_molecule->next;
800  }
801  *primitives_list = (t_pb_graph_node **)my_calloc(max_molecule_size, sizeof(t_pb_graph_node *));
802 }
803 
804 /*****************************************/
805 static void free_pb_stats_recursive(t_pb *pb) {
806 
807  int i, j;
808  /* Releases all the memory used by clustering data structures. */
809  if (pb) {
810  if (pb->pb_graph_node != NULL) {
811  if (pb->pb_graph_node->pb_type->num_modes != 0) {
812  for (i = 0;
813  i
815  i++) {
816  for (j = 0;
817  j
819  j++) {
820  if (pb->child_pbs && pb->child_pbs[i]) {
822  }
823  }
824  }
825  }
826  }
827  free_pb_stats(pb);
828  }
829 }
830 
831 static boolean primitive_feasible(int iblk, t_pb *cur_pb) {
832  const t_pb_type *cur_pb_type;
833  int i;
834  t_pb *memory_class_pb; /* Used for memory class only, for memories, open pins must be the same among siblings */
835  int sibling_memory_blk;
836 
837  cur_pb_type = cur_pb->pb_graph_node->pb_type;
838  memory_class_pb = NULL;
839  sibling_memory_blk = OPEN;
840 
841  assert(cur_pb_type->num_modes == 0);
842  /* primitive */
843  if (cur_pb->logical_block != OPEN && cur_pb->logical_block != iblk) {
844  /* This pb already has a different logical block */
845  return FALSE;
846  }
847 
848  if (cur_pb_type->class_type == MEMORY_CLASS) {
849  /* memory class is special, all siblings must share all nets, including open nets, with the exception of data nets */
850  /* find sibling if one exists */
851  memory_class_pb = cur_pb->parent_pb;
852  for (i = 0; i < cur_pb_type->parent_mode->num_pb_type_children; i++) {
853  if (memory_class_pb->child_pbs[cur_pb->mode][i].name != NULL
854  && memory_class_pb->child_pbs[cur_pb->mode][i].logical_block
855  != OPEN) {
856  sibling_memory_blk =
857  memory_class_pb->child_pbs[cur_pb->mode][i].logical_block;
858  }
859  }
860  if (sibling_memory_blk == OPEN) {
861  memory_class_pb = NULL;
862  }
863  }
864 
865  return primitive_type_and_memory_feasible(iblk, cur_pb_type,
866  memory_class_pb, sibling_memory_blk);
867 }
868 
869 static boolean primitive_type_and_memory_feasible(int iblk,
870  const t_pb_type *cur_pb_type, t_pb *memory_class_pb,
871  int sibling_memory_blk) {
872  t_model_ports *port;
873  int i, j;
874  boolean second_pass;
875 
876  /* check if ports are big enough */
877  /* for memories, also check that pins are the same with existing siblings */
878  port = logical_block[iblk].model->inputs;
879  second_pass = FALSE;
880  while (port || !second_pass) {
881  /* TODO: This is slow if the number of ports are large, fix if becomes a problem */
882  if (!port) {
883  second_pass = TRUE;
884  port = logical_block[iblk].model->outputs;
885  }
886  for (i = 0; i < cur_pb_type->num_ports; i++) {
887  if (cur_pb_type->ports[i].model_port == port) {
888  /* TODO: This is slow, I only need to check from 0 if it is a memory block, other blocks I can check from port->size onwards */
889  for (j = 0; j < port->size; j++) {
890  if (port->dir == IN_PORT && !port->is_clock) {
891  if (memory_class_pb) {
892  if (cur_pb_type->ports[i].port_class == NULL
893  || strstr(cur_pb_type->ports[i].port_class,
894  "data")
895  != cur_pb_type->ports[i].port_class) {
896  if (logical_block[iblk].input_nets[port->index][j]
897  != logical_block[sibling_memory_blk].input_nets[port->index][j]) {
898  return FALSE;
899  }
900  }
901  }
902  if (logical_block[iblk].input_nets[port->index][j]
903  != OPEN
904  && j >= cur_pb_type->ports[i].num_pins) {
905  return FALSE;
906  }
907  } else if (port->dir == OUT_PORT) {
908  if (memory_class_pb) {
909  if (cur_pb_type->ports[i].port_class == NULL
910  || strstr(cur_pb_type->ports[i].port_class,
911  "data")
912  != cur_pb_type->ports[i].port_class) {
913  if (logical_block[iblk].output_nets[port->index][j]
914  != logical_block[sibling_memory_blk].output_nets[port->index][j]) {
915  return FALSE;
916  }
917  }
918  }
919  if (logical_block[iblk].output_nets[port->index][j]
920  != OPEN
921  && j >= cur_pb_type->ports[i].num_pins) {
922  return FALSE;
923  }
924  } else {
925  assert(port->dir == IN_PORT && port->is_clock);
926  assert(j == 0);
927  if (memory_class_pb) {
928  if (logical_block[iblk].clock_net
929  != logical_block[sibling_memory_blk].clock_net) {
930  return FALSE;
931  }
932  }
933  if (logical_block[iblk].clock_net != OPEN
934  && j >= cur_pb_type->ports[i].num_pins) {
935  return FALSE;
936  }
937  }
938  }
939  break;
940  }
941  }
942  if (i == cur_pb_type->num_ports) {
943  if (((logical_block[iblk].model->inputs != NULL) && !second_pass)
944  || (logical_block[iblk].model->outputs != NULL
945  && second_pass)) {
946  /* physical port not found */
947  return FALSE;
948  }
949  }
950  if (port) {
951  port = port->next;
952  }
953  }
954  return TRUE;
955 }
956 
957 /*****************************************/
959  INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb,
960  INP int ext_inps, INP enum e_removal_policy remove_flag,
961  INP t_cluster_placement_stats *cluster_placement_stats_ptr) {
962 
963  /* This routine returns a logical_block which has not been clustered, has *
964  * no connection to the current cluster, satisfies the cluster *
965  * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available,
966  and has ext_inps external inputs. If *
967  * there is no such logical_block it returns NO_CLUSTER. Remove_flag *
968  * controls whether or not blocks that have already been clustered *
969  * are removed from the unclustered_list data structures. NB: *
970  * to get a logical_block regardless of clock constraints just set clocks_ *
971  * avail > 0. */
972 
973  struct s_molecule_link *ptr, *prev_ptr;
974  int ilogical_blk, i;
975  boolean success;
976 
977  prev_ptr = &unclustered_list_head[ext_inps];
978  ptr = unclustered_list_head[ext_inps].next;
979  while (ptr != NULL) {
980  /* TODO: Get better candidate logical block in future, eg. return most timing critical or some other smarter metric */
981  if (ptr->moleculeptr->valid) {
982  success = TRUE;
983  for (i = 0; i < get_array_size_of_molecule(ptr->moleculeptr); i++) {
984  if (ptr->moleculeptr->logical_block_ptrs[i] != NULL) {
985  ilogical_blk =
988  cluster_placement_stats_ptr, ilogical_blk)) { /* TODO: I should be using a better filtering check especially when I'm dealing with multiple clock/multiple global reset signals where the clock/reset packed in matters, need to do later when I have the circuits to check my work */
989  success = FALSE;
990  break;
991  }
992  }
993  }
994  if (success == TRUE) {
995  return ptr->moleculeptr;
996  }
997  prev_ptr = ptr;
998  }
999 
1000  else if (remove_flag == REMOVE_CLUSTERED) {
1001  assert(0); /* this doesn't work right now with 2 the pass packing for each complex block */
1002  prev_ptr->next = ptr->next;
1003  }
1004 
1005  ptr = ptr->next;
1006  }
1007 
1008  return NULL;
1009 }
1010 
1011 /*****************************************/
1013  INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb,
1014  INP t_cluster_placement_stats *cluster_placement_stats_ptr) {
1015 
1016  /* This routine is used to find new blocks for clustering when there are no feasible *
1017  * blocks with any attraction to the current cluster (i.e. it finds *
1018  * blocks which are unconnected from the current cluster). It returns *
1019  * the logical_block with the largest number of used inputs that satisfies the *
1020  * clocking and number of inputs constraints. If no suitable logical_block is *
1021  * found, the routine returns NO_CLUSTER.
1022  * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count
1023  */
1024 
1025  int ext_inps;
1026  int i, j;
1027  t_pack_molecule *molecule;
1028 
1029  int inputs_avail = 0;
1030 
1031  for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
1032  for (j = 0; j < cur_pb->pb_graph_node->input_pin_class_size[i]; j++) {
1033  if (cur_pb->pb_stats->input_pins_used[i][j] != OPEN)
1034  inputs_avail++;
1035  }
1036  }
1037 
1038  molecule = NULL;
1039 
1040  if (inputs_avail >= unclustered_list_head_size) {
1041  inputs_avail = unclustered_list_head_size - 1;
1042  }
1043 
1044  for (ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
1045  molecule = get_molecule_by_num_ext_inputs(packer_algorithm, cur_pb,
1046  ext_inps, LEAVE_CLUSTERED, cluster_placement_stats_ptr);
1047  if (molecule != NULL) {
1048  break;
1049  }
1050  }
1051  return molecule;
1052 }
1053 
1054 /*****************************************/
1056  int max_molecule_inputs) {
1057 
1058  /* This routine is used to find the first seed logical_block for the clustering. It returns *
1059  * the logical_block with the largest number of used inputs that satisfies the *
1060  * clocking and number of inputs constraints. If no suitable logical_block is *
1061  * found, the routine returns NO_CLUSTER. */
1062 
1063  int ext_inps;
1064  struct s_molecule_link *ptr;
1065 
1066  for (ext_inps = max_molecule_inputs; ext_inps >= 0; ext_inps--) {
1067  ptr = unclustered_list_head[ext_inps].next;
1068 
1069  while (ptr != NULL) {
1070  if (ptr->moleculeptr->valid) {
1071  return ptr->moleculeptr;
1072  }
1073  ptr = ptr->next;
1074  }
1075  }
1076  return NULL;
1077 }
1078 
1079 /*****************************************/
1080 
1081 /*****************************************/
1082 static void alloc_and_load_pb_stats(t_pb *pb, int max_models,
1083  int max_nets_in_pb_type) {
1084 
1085  /* Call this routine when starting to fill up a new cluster. It resets *
1086  * the gain vector, etc. */
1087 
1088  int i, j;
1089 
1090  pb->pb_stats = new t_pb_stats;
1091 
1092  /* If statement below is for speed. If nets are reasonably low-fanout, *
1093  * only a relatively small number of blocks will be marked, and updating *
1094  * only those logical_block structures will be fastest. If almost all blocks *
1095  * have been touched it should be faster to just run through them all *
1096  * in order (less addressing and better cache locality). */
1097  pb->pb_stats->input_pins_used = (int **) my_malloc(
1098  pb->pb_graph_node->num_input_pin_class * sizeof(int*));
1099  pb->pb_stats->output_pins_used = (int **) my_malloc(
1100  pb->pb_graph_node->num_output_pin_class * sizeof(int*));
1102  pb->pb_graph_node->num_input_pin_class * sizeof(int*));
1104  pb->pb_graph_node->num_output_pin_class * sizeof(int*));
1108 
1110  for (i = 0; i < pb->pb_graph_node->num_input_pin_class; i++) {
1111  pb->pb_stats->input_pins_used[i] = (int*) my_malloc(
1112  pb->pb_graph_node->input_pin_class_size[i] * sizeof(int));
1113  for (j = 0; j < pb->pb_graph_node->input_pin_class_size[i]; j++) {
1114  pb->pb_stats->input_pins_used[i][j] = OPEN;
1115  }
1116  }
1117 
1118  for (i = 0; i < pb->pb_graph_node->num_output_pin_class; i++) {
1119  pb->pb_stats->output_pins_used[i] = (int*) my_malloc(
1120  pb->pb_graph_node->output_pin_class_size[i] * sizeof(int));
1121  for (j = 0; j < pb->pb_graph_node->output_pin_class_size[i]; j++) {
1122  pb->pb_stats->output_pins_used[i][j] = OPEN;
1123  }
1124  }
1125 
1126  for (i = 0; i < pb->pb_graph_node->num_input_pin_class; i++) {
1127  pb->pb_stats->lookahead_input_pins_used[i] = (int*) my_malloc(
1129  * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC) * sizeof(int));
1130  for (j = 0;
1131  j
1133  * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; j++) {
1135  }
1136  }
1137 
1138  for (i = 0; i < pb->pb_graph_node->num_output_pin_class; i++) {
1142  * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC) * sizeof(int));
1143  for (j = 0;
1144  j
1146  * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; j++) {
1148  }
1149  }
1150 
1151  pb->pb_stats->gain.clear();
1152  pb->pb_stats->timinggain.clear();
1153  pb->pb_stats->connectiongain.clear();
1154  pb->pb_stats->sharinggain.clear();
1155  pb->pb_stats->hillgain.clear();
1156 
1157  pb->pb_stats->num_pins_of_net_in_pb.clear();
1158 
1159  pb->pb_stats->marked_nets = (int *) my_malloc(
1160  max_nets_in_pb_type * sizeof(int));
1161  pb->pb_stats->marked_blocks = (int *) my_malloc(
1162  num_logical_blocks * sizeof(int));
1163 
1164  pb->pb_stats->num_marked_nets = 0;
1165  pb->pb_stats->num_marked_blocks = 0;
1166 
1168 }
1169 /*****************************************/
1170 
1171 /**
1172  * Try pack molecule into current cluster
1173  */
1175  INOUTP t_cluster_placement_stats *cluster_placement_stats_ptr,
1176  INP t_pack_molecule *molecule, INOUTP t_pb_graph_node **primitives_list,
1177  INOUTP t_pb * pb, INP int max_models, INP int max_cluster_size,
1178  INP int clb_index, INP int max_nets_in_pb_type, INP int detailed_routing_stage) {
1179  int molecule_size, failed_location;
1180  int i;
1181  enum e_block_pack_status block_pack_status;
1182  struct s_linked_vptr *cur_molecule;
1183  t_pb *parent;
1184  t_pb *cur_pb;
1185  t_logical_block *chain_root_block;
1186  boolean is_root_of_chain;
1187  t_pb_graph_pin *chain_root_pin;
1188 
1189 
1190  parent = NULL;
1191 
1192  block_pack_status = BLK_STATUS_UNDEFINED;
1193 
1194  molecule_size = get_array_size_of_molecule(molecule);
1195  failed_location = 0;
1196 
1197  while (block_pack_status != BLK_PASSED) {
1198  save_and_reset_routing_cluster(); /* save current routing information because speculative packing will change routing*/
1199  if (get_next_primitive_list(cluster_placement_stats_ptr, molecule,
1200  primitives_list, clb_index)) {
1201  block_pack_status = BLK_PASSED;
1202 
1203  for (i = 0; i < molecule_size && block_pack_status == BLK_PASSED;
1204  i++) {
1205  assert(
1206  (primitives_list[i] == NULL) == (molecule->logical_block_ptrs[i] == NULL));
1207  failed_location = i + 1;
1208  if (molecule->logical_block_ptrs[i] != NULL) {
1209  if(molecule->type == MOLECULE_FORCED_PACK && molecule->pack_pattern->is_chain && i == molecule->pack_pattern->root_block->block_id) {
1210  chain_root_pin = molecule->pack_pattern->chain_root_pin;
1211  is_root_of_chain = TRUE;
1212  } else {
1213  chain_root_pin = NULL;
1214  is_root_of_chain = FALSE;
1215  }
1216  block_pack_status = try_place_logical_block_rec(
1217  primitives_list[i],
1218  molecule->logical_block_ptrs[i]->index, pb, &parent,
1219  max_models, max_cluster_size, clb_index,
1220  max_nets_in_pb_type, cluster_placement_stats_ptr, is_root_of_chain, chain_root_pin);
1221  }
1222  }
1223  if (block_pack_status == BLK_PASSED) {
1224  /* Check if pin usage is feasible for the current packing assigment */
1227  if (!check_lookahead_pins_used(pb)) {
1228  block_pack_status = BLK_FAILED_FEASIBLE;
1229  }
1230  }
1231  if (block_pack_status == BLK_PASSED) {
1232  /* Try to route if heuristic is to route for every atom
1233  Skip routing if heuristic is to route at the end of packing complex block
1234  */
1235  setup_intracluster_routing_for_molecule(molecule, primitives_list);
1236  if (detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM && try_breadth_first_route_cluster() == FALSE) {
1237  /* Cannot pack */
1238  block_pack_status = BLK_FAILED_ROUTE;
1239  } else {
1240  /* Pack successful, commit
1241  TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside
1242  */
1243  assert(block_pack_status == BLK_PASSED);
1244  if(molecule->type == MOLECULE_FORCED_PACK && molecule->pack_pattern->is_chain) {
1245  /* Chained molecules often take up lots of area and are important, if a chain is packed in, want to rename logic block to match chain name */
1246  chain_root_block = molecule->logical_block_ptrs[molecule->pack_pattern->root_block->block_id];
1247  cur_pb = chain_root_block->pb->parent_pb;
1248  while(cur_pb != NULL) {
1249  free(cur_pb->name);
1250  cur_pb->name = my_strdup(chain_root_block->name);
1251  cur_pb = cur_pb->parent_pb;
1252  }
1253  }
1254  for (i = 0; i < molecule_size; i++) {
1255  if (molecule->logical_block_ptrs[i] != NULL) {
1256  /* invalidate all molecules that share logical block with current molecule */
1257  cur_molecule =
1258  molecule->logical_block_ptrs[i]->packed_molecules;
1259  while (cur_molecule != NULL) {
1260  ((t_pack_molecule*) cur_molecule->data_vptr)->valid =
1261  FALSE;
1262  cur_molecule = cur_molecule->next;
1263  }
1264  commit_primitive(cluster_placement_stats_ptr,
1265  primitives_list[i]);
1266  }
1267  }
1268  }
1269  }
1270  if (block_pack_status != BLK_PASSED) {
1271  for (i = 0; i < failed_location; i++) {
1272  if (molecule->logical_block_ptrs[i] != NULL) {
1274  molecule->logical_block_ptrs[i]->index,
1275  max_models);
1276  }
1277  }
1279  }
1280  } else {
1281  block_pack_status = BLK_FAILED_FEASIBLE;
1283  break; /* no more candidate primitives available, this molecule will not pack, return fail */
1284  }
1285  }
1286  return block_pack_status;
1287 }
1288 
1289 /**
1290  * Try place logical block into current primitive location
1291  */
1292 
1294  INP t_pb_graph_node *pb_graph_node, INP int ilogical_block,
1295  INP t_pb *cb, OUTP t_pb **parent, INP int max_models,
1296  INP int max_cluster_size, INP int clb_index,
1297  INP int max_nets_in_pb_type,
1298  INP t_cluster_placement_stats *cluster_placement_stats_ptr,
1299  INP boolean is_root_of_chain, INP t_pb_graph_pin *chain_root_pin) {
1300  int i, j;
1301  boolean is_primitive;
1302  enum e_block_pack_status block_pack_status;
1303 
1304  t_pb *my_parent;
1305  t_pb *pb, *parent_pb;
1306  const t_pb_type *pb_type;
1307 
1308  t_model_ports *root_port;
1309 
1310  my_parent = NULL;
1311 
1312  block_pack_status = BLK_PASSED;
1313 
1314  /* Discover parent */
1315  if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) {
1316  block_pack_status = try_place_logical_block_rec(
1317  pb_graph_node->parent_pb_graph_node, ilogical_block, cb,
1318  &my_parent, max_models, max_cluster_size, clb_index,
1319  max_nets_in_pb_type, cluster_placement_stats_ptr, is_root_of_chain, chain_root_pin);
1320  parent_pb = my_parent;
1321  } else {
1322  parent_pb = cb;
1323  }
1324 
1325  /* Create siblings if siblings are not allocated */
1326  if (parent_pb->child_pbs == NULL) {
1327  assert(parent_pb->name == NULL);
1328  parent_pb->logical_block = OPEN;
1329  parent_pb->name = my_strdup(logical_block[ilogical_block].name);
1330  parent_pb->mode = pb_graph_node->pb_type->parent_mode->index;
1331  set_pb_graph_mode(parent_pb->pb_graph_node, 0, 0); /* TODO: default mode is to use mode 0, document this! */
1332  set_pb_graph_mode(parent_pb->pb_graph_node, parent_pb->mode, 1);
1333  parent_pb->child_pbs =
1334  (t_pb **) my_calloc(
1335  parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].num_pb_type_children,
1336  sizeof(t_pb *));
1337  for (i = 0;
1338  i
1339  < parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].num_pb_type_children;
1340  i++) {
1341  parent_pb->child_pbs[i] =
1342  (t_pb *) my_calloc(
1343  parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].pb_type_children[i].num_pb,
1344  sizeof(t_pb));
1345  for (j = 0;
1346  j
1347  < parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].pb_type_children[i].num_pb;
1348  j++) {
1349  parent_pb->child_pbs[i][j].parent_pb = parent_pb;
1350  parent_pb->child_pbs[i][j].logical_block = OPEN;
1351  parent_pb->child_pbs[i][j].pb_graph_node =
1352  &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]);
1353  }
1354  }
1355  } else {
1356  assert(parent_pb->mode == pb_graph_node->pb_type->parent_mode->index);
1357  }
1358 
1359  for (i = 0;
1360  i
1361  < parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].num_pb_type_children;
1362  i++) {
1363  if (pb_graph_node->pb_type
1364  == &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].pb_type_children[i]) {
1365  break;
1366  }
1367  }
1368  assert(
1369  i < parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].num_pb_type_children);
1370  pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index];
1371  *parent = pb; /* this pb is parent of it's child that called this function */
1372  assert(pb->pb_graph_node == pb_graph_node);
1373  if (pb->pb_stats == NULL) {
1374  alloc_and_load_pb_stats(pb, max_models, max_nets_in_pb_type);
1375  }
1376  pb_type = pb_graph_node->pb_type;
1377 
1378  is_primitive = (boolean) (pb_type->num_modes == 0);
1379 
1380  if (is_primitive) {
1381  assert(
1382  pb->logical_block == OPEN && logical_block[ilogical_block].pb == NULL && logical_block[ilogical_block].clb_index == NO_CLUSTER);
1383  /* try pack to location */
1384  pb->name = my_strdup(logical_block[ilogical_block].name);
1385  pb->logical_block = ilogical_block;
1386  logical_block[ilogical_block].clb_index = clb_index;
1387  logical_block[ilogical_block].pb = pb;
1388 
1389  if (!primitive_feasible(ilogical_block, pb)) {
1390  /* failed location feasibility check, revert pack */
1391  block_pack_status = BLK_FAILED_FEASIBLE;
1392  }
1393 
1394  if (block_pack_status == BLK_PASSED && is_root_of_chain == TRUE) {
1395  /* is carry chain, must check if this carry chain spans multiple logic blocks or not */
1396  root_port = chain_root_pin->port->model_port;
1397  if(logical_block[ilogical_block].input_nets[root_port->index][chain_root_pin->pin_number] != OPEN) {
1398  /* this carry chain spans multiple logic blocks, must match up correctly with previous chain for this to route */
1399  if(pb_graph_node != chain_root_pin->parent_node) {
1400  /* this location does not match with the dedicated chain input from outside logic block, therefore not feasible */
1401  block_pack_status = BLK_FAILED_FEASIBLE;
1402  }
1403  }
1404  }
1405  }
1406 
1407  return block_pack_status;
1408 }
1409 
1410 /* Revert trial logical block iblock and free up memory space accordingly
1411  */
1412 static void revert_place_logical_block(INP int iblock, INP int max_models) {
1413  t_pb *pb, *next;
1414 
1415  pb = logical_block[iblock].pb;
1417  logical_block[iblock].pb = NULL;
1418 
1419  if (pb != NULL) {
1420  /* When freeing molecules, the current block might already have been freed by a prior revert
1421  When this happens, no need to do anything beyond basic book keeping at the logical block
1422  */
1423 
1424  next = pb->parent_pb;
1425  free_pb(pb);
1426  pb = next;
1427 
1428  while (pb != NULL) {
1429  /* If this is pb is created only for the purposes of holding new molecule, remove it
1430  Must check if cluster is already freed (which can be the case)
1431  */
1432  next = pb->parent_pb;
1433 
1434  if (pb->child_pbs != NULL && pb->pb_stats != NULL
1435  && pb->pb_stats->num_child_blocks_in_pb == 0) {
1436  set_pb_graph_mode(pb->pb_graph_node, pb->mode, 0); /* default mode is to use mode 1 */
1437  set_pb_graph_mode(pb->pb_graph_node, 0, 1);
1438  if (next != NULL) {
1439  /* If the code gets here, then that means that placing the initial seed molecule failed, don't free the actual complex block itself as the seed needs to find another placement */
1440  free_pb(pb);
1441  }
1442  }
1443  pb = next;
1444  }
1445  }
1446 }
1447 
1448 static void update_connection_gain_values(int inet, int clustered_block,
1449  t_pb *cur_pb,
1450  enum e_net_relation_to_clustered_block net_relation_to_clustered_block) {
1451  /*This function is called when the connectiongain values on the vpack_net*
1452  *inet require updating. */
1453 
1454  int iblk, ipin;
1455  int clb_index;
1456  int num_internal_connections, num_open_connections, num_stuck_connections;
1457 
1458  num_internal_connections = num_open_connections = num_stuck_connections = 0;
1459 
1460  clb_index = logical_block[clustered_block].clb_index;
1461 
1462  /* may wish to speed things up by ignoring clock nets since they are high fanout */
1463 
1464  for (ipin = 0; ipin <= vpack_net[inet].num_sinks; ipin++) {
1465  iblk = vpack_net[inet].node_block[ipin];
1466  if (logical_block[iblk].clb_index == clb_index
1467  && is_logical_blk_in_pb(iblk,
1468  logical_block[clustered_block].pb)) {
1469  num_internal_connections++;
1470  } else if (logical_block[iblk].clb_index == OPEN) {
1471  num_open_connections++;
1472  } else {
1473  num_stuck_connections++;
1474  }
1475  }
1476 
1477  if (net_relation_to_clustered_block == OUTPUT) {
1478  for (ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++) {
1479  iblk = vpack_net[inet].node_block[ipin];
1480  if (logical_block[iblk].clb_index == NO_CLUSTER) {
1481  /* TODO: Gain function accurate only if net has one connection to block, TODO: Should we handle case where net has multi-connection to block? Gain computation is only off by a bit in this case */
1482  if(cur_pb->pb_stats->connectiongain.count(iblk) == 0) {
1483  cur_pb->pb_stats->connectiongain[iblk] = 0;
1484  }
1485 
1486  if (num_internal_connections > 1) {
1487  cur_pb->pb_stats->connectiongain[iblk] -= 1
1488  / (float) (vpack_net[inet].num_sinks
1489  - (num_internal_connections - 1)
1490  + 1 * num_stuck_connections);
1491  }
1492  cur_pb->pb_stats->connectiongain[iblk] += 1
1493  / (float) (vpack_net[inet].num_sinks
1494  - num_internal_connections
1495  + 1 * num_stuck_connections);
1496  }
1497  }
1498  }
1499 
1500  if (net_relation_to_clustered_block == INPUT) {
1501  /*Calculate the connectiongain for the logical_block which is driving *
1502  *the vpack_net that is an input to a logical_block in the cluster */
1503 
1504  iblk = vpack_net[inet].node_block[0];
1505  if (logical_block[iblk].clb_index == NO_CLUSTER) {
1506  if(cur_pb->pb_stats->connectiongain.count(iblk) == 0) {
1507  cur_pb->pb_stats->connectiongain[iblk] = 0;
1508  }
1509  if (num_internal_connections > 1) {
1510  cur_pb->pb_stats->connectiongain[iblk] -= 1
1511  / (float) (vpack_net[inet].num_sinks
1512  - (num_internal_connections - 1) + 1
1513  + 1 * num_stuck_connections);
1514  }
1515  cur_pb->pb_stats->connectiongain[iblk] += 1
1516  / (float) (vpack_net[inet].num_sinks
1517  - num_internal_connections + 1
1518  + 1 * num_stuck_connections);
1519  }
1520  }
1521 }
1522 /*****************************************/
1523 static void update_timing_gain_values(int inet, int clustered_block,
1524  t_pb *cur_pb,
1525  enum e_net_relation_to_clustered_block net_relation_to_clustered_block, t_slack * slacks) {
1526 
1527  /*This function is called when the timing_gain values on the vpack_net*
1528  *inet requires updating. */
1529  float timinggain;
1530  int newblk, ifirst;
1531  int iblk, ipin;
1532 
1533  /* Check if this vpack_net lists its driving logical_block twice. If so, avoid *
1534  * double counting this logical_block by skipping the first (driving) pin. */
1536  ifirst = 0;
1537  else
1538  ifirst = 1;
1539 
1540  if (net_relation_to_clustered_block == OUTPUT
1541  && !vpack_net[inet].is_global) {
1542  for (ipin = ifirst; ipin <= vpack_net[inet].num_sinks; ipin++) {
1543  iblk = vpack_net[inet].node_block[ipin];
1544  if (logical_block[iblk].clb_index == NO_CLUSTER) {
1545 #ifdef PATH_COUNTING
1546  /* Timing gain is a weighted sum of timing and path criticalities. */
1547  timinggain = TIMING_GAIN_PATH_WEIGHT * slacks->path_criticality[inet][ipin]
1548  + (1 - TIMING_GAIN_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin];
1549 #else
1550  /* Timing gain is the timing criticality. */
1551  timinggain = slacks->timing_criticality[inet][ipin];
1552 #endif
1553  if(cur_pb->pb_stats->timinggain.count(iblk) == 0) {
1554  cur_pb->pb_stats->timinggain[iblk] = 0;
1555  }
1556  if (timinggain > cur_pb->pb_stats->timinggain[iblk])
1557  cur_pb->pb_stats->timinggain[iblk] = timinggain;
1558  }
1559  }
1560  }
1561 
1562  if (net_relation_to_clustered_block == INPUT
1563  && !vpack_net[inet].is_global) {
1564  /*Calculate the timing gain for the logical_block which is driving *
1565  *the vpack_net that is an input to a logical_block in the cluster */
1566  newblk = vpack_net[inet].node_block[0];
1567  if (logical_block[newblk].clb_index == NO_CLUSTER) {
1568  for (ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++) {
1569 #ifdef PATH_COUNTING
1570  /* Timing gain is a weighted sum of timing and path criticalities. */
1571  timinggain = TIMING_GAIN_PATH_WEIGHT * slacks->path_criticality[inet][ipin]
1572  + (1 - TIMING_GAIN_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin];
1573 #else
1574  /* Timing gain is the timing criticality. */
1575  timinggain = slacks->timing_criticality[inet][ipin];
1576 #endif
1577  if(cur_pb->pb_stats->timinggain.count(newblk) == 0) {
1578  cur_pb->pb_stats->timinggain[newblk] = 0;
1579  }
1580  if (timinggain > cur_pb->pb_stats->timinggain[newblk])
1581  cur_pb->pb_stats->timinggain[newblk] = timinggain;
1582 
1583  }
1584  }
1585  }
1586 }
1587 /*****************************************/
1588 static void mark_and_update_partial_gain(int inet, enum e_gain_update gain_flag,
1589  int clustered_block, int port_on_clustered_block,
1590  int pin_on_clustered_block, boolean timing_driven,
1591  boolean connection_driven,
1592  enum e_net_relation_to_clustered_block net_relation_to_clustered_block,
1593  t_slack * slacks) {
1594 
1595  /* Updates the marked data structures, and if gain_flag is GAIN, *
1596  * the gain when a logic logical_block is added to a cluster. The *
1597  * sharinggain is the number of inputs that a logical_block shares with *
1598  * blocks that are already in the cluster. Hillgain is the *
1599  * reduction in number of pins-required by adding a logical_block to the *
1600  * cluster. The timinggain is the criticality of the most critical*
1601  * vpack_net between this logical_block and a logical_block in the cluster. */
1602 
1603  int iblk, ipin, ifirst, stored_net;
1604  t_pb *cur_pb;
1605 
1606  cur_pb = logical_block[clustered_block].pb->parent_pb;
1607 
1608 
1609  if (vpack_net[inet].num_sinks > AAPACK_MAX_NET_SINKS_IGNORE) {
1610  /* Optimization: It can be too runtime costly for marking all sinks for a high fanout-net that probably has no hope of ever getting packed, thus ignore those high fanout nets */
1611  if(vpack_net[inet].is_global != TRUE) {
1612  /* If no low/medium fanout nets, we may need to consider high fan-out nets for packing, so select one and store it */
1613  while(cur_pb->parent_pb != NULL) {
1614  cur_pb = cur_pb->parent_pb;
1615  }
1616  stored_net = cur_pb->pb_stats->tie_break_high_fanout_net;
1617  if(stored_net == OPEN || vpack_net[inet].num_sinks < vpack_net[stored_net].num_sinks) {
1618  cur_pb->pb_stats->tie_break_high_fanout_net = inet;
1619  }
1620  }
1621  return;
1622  }
1623 
1624  while (cur_pb) {
1625  /* Mark vpack_net as being visited, if necessary. */
1626 
1627  if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(inet) == 0) {
1628  cur_pb->pb_stats->marked_nets[cur_pb->pb_stats->num_marked_nets] =
1629  inet;
1630  cur_pb->pb_stats->num_marked_nets++;
1631  }
1632 
1633  /* Update gains of affected blocks. */
1634 
1635  if (gain_flag == GAIN) {
1636 
1637  /* Check if this vpack_net lists its driving logical_block twice. If so, avoid *
1638  * double counting this logical_block by skipping the first (driving) pin. */
1639 
1640  if (net_output_feeds_driving_block_input[inet] == 0)
1641  ifirst = 0;
1642  else
1643  ifirst = 1;
1644 
1645  if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(inet) == 0) {
1646  for (ipin = ifirst; ipin <= vpack_net[inet].num_sinks; ipin++) {
1647  iblk = vpack_net[inet].node_block[ipin];
1648  if (logical_block[iblk].clb_index == NO_CLUSTER) {
1649 
1650  if (cur_pb->pb_stats->sharinggain.count(iblk) == 0) {
1651  cur_pb->pb_stats->marked_blocks[cur_pb->pb_stats->num_marked_blocks] =
1652  iblk;
1653  cur_pb->pb_stats->num_marked_blocks++;
1654  cur_pb->pb_stats->sharinggain[iblk] = 1;
1655  cur_pb->pb_stats->hillgain[iblk] = 1
1657  } else {
1658  cur_pb->pb_stats->sharinggain[iblk]++;
1659  cur_pb->pb_stats->hillgain[iblk]++;
1660  }
1661  }
1662  }
1663  }
1664 
1665  if (connection_driven) {
1666  update_connection_gain_values(inet, clustered_block, cur_pb,
1667  net_relation_to_clustered_block);
1668  }
1669 
1670  if (timing_driven) {
1671  update_timing_gain_values(inet, clustered_block, cur_pb,
1672  net_relation_to_clustered_block, slacks);
1673  }
1674  }
1675  if(cur_pb->pb_stats->num_pins_of_net_in_pb.count(inet) == 0) {
1676  cur_pb->pb_stats->num_pins_of_net_in_pb[inet] = 0;
1677  }
1678  cur_pb->pb_stats->num_pins_of_net_in_pb[inet]++;
1679  cur_pb = cur_pb->parent_pb;
1680  }
1681 }
1682 
1683 /*****************************************/
1684 static void update_total_gain(float alpha, float beta, boolean timing_driven,
1685  boolean connection_driven, boolean global_clocks, t_pb *pb) {
1686 
1687  /*Updates the total gain array to reflect the desired tradeoff between*
1688  *input sharing (sharinggain) and path_length minimization (timinggain)*/
1689 
1690  int i, iblk, j, k;
1691  t_pb * cur_pb;
1692  int num_input_pins, num_output_pins;
1693  int num_used_input_pins, num_used_output_pins;
1694  t_model_ports *port;
1695 
1696  cur_pb = pb;
1697  while (cur_pb) {
1698 
1699  for (i = 0; i < cur_pb->pb_stats->num_marked_blocks; i++) {
1700  iblk = cur_pb->pb_stats->marked_blocks[i];
1701 
1702  if(cur_pb->pb_stats->connectiongain.count(iblk) == 0) {
1703  cur_pb->pb_stats->connectiongain[iblk] = 0;
1704  }
1705  if(cur_pb->pb_stats->sharinggain.count(iblk) == 0) {
1706  cur_pb->pb_stats->connectiongain[iblk] = 0;
1707  }
1708 
1709  /* Todo: This was used to explore different normalization options, can be made more efficient once we decide on which one to use*/
1710  num_input_pins = 0;
1711  port = logical_block[iblk].model->inputs;
1712  j = 0;
1713  num_used_input_pins = 0;
1714  while (port) {
1715  num_input_pins += port->size;
1716  if (!port->is_clock) {
1717  for (k = 0; k < port->size; k++) {
1718  if (logical_block[iblk].input_nets[j][k] != OPEN) {
1719  num_used_input_pins++;
1720  }
1721  }
1722  j++;
1723  }
1724  port = port->next;
1725  }
1726  if (num_input_pins == 0) {
1727  num_input_pins = 1;
1728  }
1729 
1730  num_used_output_pins = 0;
1731  j = 0;
1732  num_output_pins = 0;
1733  port = logical_block[iblk].model->outputs;
1734  while (port) {
1735  num_output_pins += port->size;
1736  for (k = 0; k < port->size; k++) {
1737  if (logical_block[iblk].output_nets[j][k] != OPEN) {
1738  num_used_output_pins++;
1739  }
1740  }
1741  port = port->next;
1742  j++;
1743  }
1744  /* end todo */
1745 
1746  /* Calculate area-only cost function */
1747  if (connection_driven) {
1748  /*try to absorb as many connections as possible*/
1749  /*cur_pb->pb_stats->gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats->sharinggain[iblk] + beta*(float)cur_pb->pb_stats->connectiongain[iblk])/(num_input_pins + num_output_pins);*/
1750  cur_pb->pb_stats->gain[iblk] = ((1 - beta)
1751  * (float) cur_pb->pb_stats->sharinggain[iblk]
1752  + beta * (float) cur_pb->pb_stats->connectiongain[iblk])
1753  / (num_used_input_pins + num_used_output_pins);
1754  } else {
1755  /*cur_pb->pb_stats->gain[iblk] = ((float)cur_pb->pb_stats->sharinggain[iblk])/(num_input_pins + num_output_pins); */
1756  cur_pb->pb_stats->gain[iblk] =
1757  ((float) cur_pb->pb_stats->sharinggain[iblk])
1758  / (num_used_input_pins + num_used_output_pins);
1759 
1760  }
1761 
1762  /* Add in timing driven cost into cost function */
1763  if (timing_driven) {
1764  cur_pb->pb_stats->gain[iblk] = alpha
1765  * cur_pb->pb_stats->timinggain[iblk]
1766  + (1.0 - alpha) * (float) cur_pb->pb_stats->gain[iblk];
1767  }
1768  }
1769  cur_pb = cur_pb->parent_pb;
1770  }
1771 }
1772 
1773 /*****************************************/
1775  INP int clb_index, INP boolean *is_clock, INP boolean global_clocks,
1776  INP float alpha, INP float beta, INP boolean timing_driven,
1777  INP boolean connection_driven, INP t_slack * slacks) {
1778 
1779  /* Updates cluster stats such as gain, used pins, and clock structures. */
1780 
1781  int ipin, inet;
1782  int new_blk, molecule_size;
1783  int iblock;
1784  t_model_ports *port;
1785  t_pb *cur_pb, *cb;
1786 
1787  /* TODO: what a scary comment from Vaughn, we'll have to watch out for this causing problems */
1788  /* Output can be open so the check is necessary. I don't change *
1789  * the gain for clock outputs when clocks are globally distributed *
1790  * because I assume there is no real need to pack similarly clocked *
1791  * FFs together then. Note that by updating the gain when the *
1792  * clock driver is placed in a cluster implies that the output of *
1793  * LUTs can be connected to clock inputs internally. Probably not *
1794  * true, but it doesn't make much difference, since it will still *
1795  * make local routing of this clock very short, and none of my *
1796  * benchmarks actually generate local clocks (all come from pads). */
1797 
1798  molecule_size = get_array_size_of_molecule(molecule);
1799  cb = NULL;
1800 
1801  for (iblock = 0; iblock < molecule_size; iblock++) {
1802  if (molecule->logical_block_ptrs[iblock] == NULL) {
1803  continue;
1804  }
1805  new_blk = molecule->logical_block_ptrs[iblock]->index;
1806 
1807  logical_block[new_blk].clb_index = clb_index;
1808 
1809  cur_pb = logical_block[new_blk].pb->parent_pb;
1810  while (cur_pb) {
1811  /* reset list of feasible blocks */
1813  cur_pb->pb_stats->num_child_blocks_in_pb++;
1814  if (cur_pb->parent_pb == NULL) {
1815  cb = cur_pb;
1816  }
1817  cur_pb = cur_pb->parent_pb;
1818  }
1819 
1820  port = logical_block[new_blk].model->outputs;
1821  while (port) {
1822  for (ipin = 0; ipin < port->size; ipin++) {
1823  inet = logical_block[new_blk].output_nets[port->index][ipin]; /* Output pin first. */
1824  if (inet != OPEN) {
1825  if (!is_clock[inet] || !global_clocks)
1826  mark_and_update_partial_gain(inet, GAIN, new_blk,
1827  port->index, ipin, timing_driven,
1828  connection_driven, OUTPUT, slacks);
1829  else
1830  mark_and_update_partial_gain(inet, NO_GAIN, new_blk,
1831  port->index, ipin, timing_driven,
1832  connection_driven, OUTPUT, slacks);
1833  }
1834  }
1835  port = port->next;
1836  }
1837  port = logical_block[new_blk].model->inputs;
1838  while (port) {
1839  if (port->is_clock) {
1840  port = port->next;
1841  continue;
1842  }
1843  for (ipin = 0; ipin < port->size; ipin++) { /* VPACK_BLOCK input pins. */
1844 
1845  inet = logical_block[new_blk].input_nets[port->index][ipin];
1846  if (inet != OPEN) {
1847  mark_and_update_partial_gain(inet, GAIN, new_blk,
1848  port->index, ipin, timing_driven, connection_driven,
1849  INPUT, slacks);
1850  }
1851  }
1852  port = port->next;
1853  }
1854 
1855  /* Note: The code below ONLY WORKS when nets that go to clock inputs *
1856  * NEVER go to the input of a VPACK_COMB. It doesn't really make electrical *
1857  * sense for that to happen, and I check this in the check_clocks *
1858  * function. Don't disable that sanity check. */
1859  inet = logical_block[new_blk].clock_net; /* Clock input pin. */
1860  if (inet != OPEN) {
1861  if (global_clocks)
1862  mark_and_update_partial_gain(inet, NO_GAIN, new_blk, 0, 0,
1863  timing_driven, connection_driven, INPUT, slacks);
1864  else
1865  mark_and_update_partial_gain(inet, GAIN, new_blk, 0, 0,
1866  timing_driven, connection_driven, INPUT, slacks);
1867 
1868  }
1869 
1870  update_total_gain(alpha, beta, timing_driven, connection_driven,
1871  global_clocks, logical_block[new_blk].pb->parent_pb);
1872 
1874  }
1875 
1876 }
1877 
1878 static void start_new_cluster(
1879  INP t_cluster_placement_stats *cluster_placement_stats,
1880  INOUTP t_pb_graph_node **primitives_list, INP const t_arch * arch,
1881  INOUTP t_block *new_cluster, INP int clb_index,
1882  INP t_pack_molecule *molecule, INP float aspect,
1883  INOUTP int *num_used_instances_type, INOUTP int *num_instances_type,
1884  INP int num_models, INP int max_cluster_size,
1885  INP int max_nets_in_pb_type, INP int detailed_routing_stage) {
1886  /* Given a starting seed block, start_new_cluster determines the next cluster type to use
1887  It expands the FPGA if it cannot find a legal cluster for the logical block
1888  */
1889  int i, j;
1890  boolean success;
1891  int count;
1892 
1893  assert(new_cluster->name == NULL);
1894  /* Check if this cluster is really empty */
1895 
1896  /* Allocate a dummy initial cluster and load a logical block as a seed and check if it is legal */
1897  new_cluster->name = (char*) my_malloc(
1898  (strlen(molecule->logical_block_ptrs[molecule->root]->name) + 4) * sizeof(char));
1899  sprintf(new_cluster->name, "cb.%s",
1900  molecule->logical_block_ptrs[molecule->root]->name);
1901  new_cluster->nets = NULL;
1902  new_cluster->type = NULL;
1903  new_cluster->pb = NULL;
1904  new_cluster->x = UNDEFINED;
1905  new_cluster->y = UNDEFINED;
1906  new_cluster->z = UNDEFINED;
1907 
1908  success = FALSE;
1909 
1910  while (!success) {
1911  count = 0;
1912  for (i = 0; i < num_types; i++) {
1913  if (num_used_instances_type[i] < num_instances_type[i]) {
1914  new_cluster->type = &type_descriptors[i];
1915  if (new_cluster->type == EMPTY_TYPE) {
1916  continue;
1917  }
1918  new_cluster->pb = (t_pb*)my_calloc(1, sizeof(t_pb));
1919  new_cluster->pb->pb_graph_node =
1920  new_cluster->type->pb_graph_head;
1921  alloc_and_load_pb_stats(new_cluster->pb, num_models,
1922  max_nets_in_pb_type);
1923  new_cluster->pb->parent_pb = NULL;
1924 
1925  alloc_and_load_legalizer_for_cluster(new_cluster, clb_index,
1926  arch);
1927  for (j = 0;
1928  j < new_cluster->type->pb_graph_head->pb_type->num_modes
1929  && !success; j++) {
1930  new_cluster->pb->mode = j;
1931  reset_cluster_placement_stats(&cluster_placement_stats[i]);
1933  new_cluster->pb->pb_graph_node, j);
1934  success = (boolean) (BLK_PASSED
1935  == try_pack_molecule(&cluster_placement_stats[i],
1936  molecule, primitives_list, new_cluster->pb,
1937  num_models, max_cluster_size, clb_index,
1938  max_nets_in_pb_type, detailed_routing_stage));
1939  }
1940  if (success) {
1941  /* TODO: For now, just grab any working cluster, in the future, heuristic needed to grab best complex block based on supply and demand */
1942  break;
1943  } else {
1944  free_legalizer_for_cluster(new_cluster, TRUE);
1945  free_pb_stats(new_cluster->pb);
1946  free(new_cluster->pb);
1947  }
1948  count++;
1949  }
1950  }
1951  if (count == num_types - 1) {
1952  vpr_printf(TIO_MESSAGE_ERROR, "Can not find any logic block that can implement molecule.\n");
1953  if (molecule->type == MOLECULE_FORCED_PACK) {
1954  vpr_printf(TIO_MESSAGE_ERROR, "\tPattern %s %s\n",
1955  molecule->pack_pattern->name,
1956  molecule->logical_block_ptrs[molecule->root]->name);
1957  } else {
1958  vpr_printf(TIO_MESSAGE_ERROR, "\tAtom %s\n",
1959  molecule->logical_block_ptrs[molecule->root]->name);
1960  }
1961  exit(1);
1962  }
1963 
1964  /* Expand FPGA size and recalculate number of available cluster types*/
1965  if (!success) {
1966  if (aspect >= 1.0) {
1967  ny++;
1968  nx = nint(ny * aspect);
1969  } else {
1970  nx++;
1971  ny = nint(nx / aspect);
1972  }
1973  vpr_printf(TIO_MESSAGE_INFO, "Not enough resources expand FPGA size to x = %d y = %d.\n",
1974  nx, ny);
1975  if ((nx > MAX_SHORT) || (ny > MAX_SHORT)) {
1976  vpr_printf(TIO_MESSAGE_ERROR, "Circuit cannot pack into architecture, architecture size (nx = %d, ny = %d) exceeds packer range.\n",
1977  nx, ny);
1978  exit(1);
1979  }
1980  alloc_and_load_grid(num_instances_type);
1981  freeGrid();
1982  }
1983  }
1984  num_used_instances_type[new_cluster->type->index]++;
1985 }
1986 
1987 /*****************************************/
1989  INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb,
1990  INP enum e_gain_type gain_mode,
1991  INP t_cluster_placement_stats *cluster_placement_stats_ptr) {
1992 
1993  /* This routine populates a list of feasible blocks outside the cluster then returns the best one for the list *
1994  * not currently in a cluster and satisfies the feasibility *
1995  * function passed in as is_feasible. If there are no feasible *
1996  * blocks it returns NO_CLUSTER. */
1997 
1998  int i, j, iblk, index, inet, count;
1999  boolean success;
2000  struct s_linked_vptr *cur;
2001 
2002  t_pack_molecule *molecule;
2003  molecule = NULL;
2004 
2005  if (gain_mode == HILL_CLIMBING) {
2006  vpr_printf(TIO_MESSAGE_ERROR, "Hill climbing not supported yet, error out.\n");
2007  exit(1);
2008  }
2009 
2010  if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) {
2011  /* Divide into two cases for speed only. */
2012  /* Typical case: not too many blocks have been marked. */
2013 
2014  cur_pb->pb_stats->num_feasible_blocks = 0;
2015 
2016  if (cur_pb->pb_stats->num_marked_blocks
2018  for (i = 0; i < cur_pb->pb_stats->num_marked_blocks; i++) {
2019  iblk = cur_pb->pb_stats->marked_blocks[i];
2020  if (logical_block[iblk].clb_index == NO_CLUSTER) {
2021  cur = logical_block[iblk].packed_molecules;
2022  while (cur != NULL) {
2023  molecule = (t_pack_molecule *) cur->data_vptr;
2024  if (molecule->valid) {
2025  success = TRUE;
2026  for (j = 0;
2027  j < get_array_size_of_molecule(molecule);
2028  j++) {
2029  if (molecule->logical_block_ptrs[j] != NULL) {
2030  assert(
2031  molecule->logical_block_ptrs[j]->clb_index == NO_CLUSTER);
2033  cluster_placement_stats_ptr,
2034  iblk)) { /* TODO: debating whether to check if placement exists for molecule (more robust) or individual logical blocks (faster) */
2035  success = FALSE;
2036  break;
2037  }
2038  }
2039  }
2040  if (success) {
2042  cur_pb->pb_stats->gain, cur_pb);
2043  }
2044  }
2045  cur = cur->next;
2046  }
2047  }
2048  }
2049  } else { /* Some high fanout nets marked lots of blocks. */
2050  for (iblk = 0; iblk < num_logical_blocks; iblk++) {
2051  if (logical_block[iblk].clb_index == NO_CLUSTER) {
2052  cur = logical_block[iblk].packed_molecules;
2053  while (cur != NULL) {
2054  molecule = (t_pack_molecule *) cur->data_vptr;
2055  if (molecule->valid) {
2056  success = TRUE;
2057  for (j = 0;
2058  j < get_array_size_of_molecule(molecule);
2059  j++) {
2060  if (molecule->logical_block_ptrs[j] != NULL) {
2061  assert(
2062  molecule->logical_block_ptrs[j]->clb_index == NO_CLUSTER);
2064  cluster_placement_stats_ptr,
2065  iblk)) {
2066  success = FALSE;
2067  break;
2068  }
2069  }
2070  }
2071  if (success) {
2073  cur_pb->pb_stats->gain, cur_pb);
2074  }
2075  }
2076  cur = cur->next;
2077  }
2078  }
2079  }
2080  }
2081  }
2082  if(cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net != OPEN) {
2083  /* Because the packer ignores high fanout nets when marking what blocks to consider, use one of the ignored high fanout net to fill up lightly related blocks */
2084  reset_tried_but_unused_cluster_placements(cluster_placement_stats_ptr);
2085  inet = cur_pb->pb_stats->tie_break_high_fanout_net;
2086  count = 0;
2087  for (i = 0; i <= vpack_net[inet].num_sinks && count < AAPACK_MAX_HIGH_FANOUT_EXPLORE; i++) {
2088  iblk = vpack_net[inet].node_block[i];
2089  if (logical_block[iblk].clb_index == NO_CLUSTER) {
2090  cur = logical_block[iblk].packed_molecules;
2091  while (cur != NULL) {
2092  molecule = (t_pack_molecule *) cur->data_vptr;
2093  if (molecule->valid) {
2094  success = TRUE;
2095  for (j = 0;
2096  j < get_array_size_of_molecule(molecule);
2097  j++) {
2098  if (molecule->logical_block_ptrs[j] != NULL) {
2099  assert(
2100  molecule->logical_block_ptrs[j]->clb_index == NO_CLUSTER);
2102  cluster_placement_stats_ptr,
2103  iblk)) { /* TODO: debating whether to check if placement exists for molecule (more robust) or individual logical blocks (faster) */
2104  success = FALSE;
2105  break;
2106  }
2107  }
2108  }
2109  if (success) {
2111  cur_pb->pb_stats->gain, cur_pb);
2112  count++;
2113  }
2114  }
2115  cur = cur->next;
2116  }
2117  }
2118  }
2119  cur_pb->pb_stats->tie_break_high_fanout_net = OPEN; /* Mark off that this high fanout net has been considered */
2120  }
2121  molecule = NULL;
2122  for (j = 0; j < cur_pb->pb_stats->num_feasible_blocks; j++) {
2123  if (cur_pb->pb_stats->num_feasible_blocks != 0) {
2124  cur_pb->pb_stats->num_feasible_blocks--;
2125  index = cur_pb->pb_stats->num_feasible_blocks;
2126  molecule = cur_pb->pb_stats->feasible_blocks[index];
2127  assert(molecule->valid == TRUE);
2128  return molecule;
2129  }
2130  }
2131 
2132  return molecule;
2133 }
2134 
2135 /*****************************************/
2137  INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb,
2138  INP boolean allow_unrelated_clustering,
2139  INOUTP int *num_unrelated_clustering_attempts,
2140  INP t_cluster_placement_stats *cluster_placement_stats_ptr) {
2141 
2142  /* Finds the vpack block with the the greatest gain that satisifies the *
2143  * input, clock and capacity constraints of a cluster that are *
2144  * passed in. If no suitable vpack block is found it returns NO_CLUSTER.
2145  */
2146 
2147  t_pack_molecule *best_molecule;
2148 
2149  /* If cannot pack into primitive, try packing into cluster */
2150 
2151  best_molecule = get_highest_gain_molecule(packer_algorithm, cur_pb,
2152  NOT_HILL_CLIMBING, cluster_placement_stats_ptr);
2153 
2154  /* If no blocks have any gain to the current cluster, the code above *
2155  * will not find anything. However, another logical_block with no inputs in *
2156  * common with the cluster may still be inserted into the cluster. */
2157 
2158  if (allow_unrelated_clustering) {
2159  if (best_molecule == NULL) {
2160  if (*num_unrelated_clustering_attempts == 0) {
2161  best_molecule =
2163  packer_algorithm, cur_pb,
2164  cluster_placement_stats_ptr);
2165  (*num_unrelated_clustering_attempts)++;
2166  }
2167  } else {
2168  *num_unrelated_clustering_attempts = 0;
2169  }
2170  }
2171 
2172  return best_molecule;
2173 }
2174 
2175 /*****************************************/
2176 static void alloc_and_load_cluster_info(INP int num_clb, INOUTP t_block *clb) {
2177 
2178  /* Loads all missing clustering info necessary to complete clustering. */
2179  int i, j, i_clb, node_index, ipin, iclass;
2180  int inport, outport, clockport;
2181 
2182  const t_pb_type * pb_type;
2183  t_pb *pb;
2184 
2185  for (i_clb = 0; i_clb < num_clb; i_clb++) {
2186  rr_node = clb[i_clb].pb->rr_graph;
2187  pb_type = clb[i_clb].pb->pb_graph_node->pb_type;
2188  pb = clb[i_clb].pb;
2189 
2190  clb[i_clb].nets = (int*)my_malloc(clb[i_clb].type->num_pins * sizeof(int));
2191  for (i = 0; i < clb[i_clb].type->num_pins; i++) {
2192  clb[i_clb].nets[i] = OPEN;
2193  }
2194 
2195  inport = outport = clockport = 0;
2196  ipin = 0;
2197  /* Assume top-level pb and clb share a one-to-one connection */
2198  for (i = 0; i < pb_type->num_ports; i++) {
2199  if (!pb_type->ports[i].is_clock
2200  && pb_type->ports[i].type == IN_PORT) {
2201  for (j = 0; j < pb_type->ports[i].num_pins; j++) {
2202  iclass = clb[i_clb].type->pin_class[ipin];
2203  assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER);
2204  assert(clb[i_clb].type->is_global_pin[ipin] == pb->pb_graph_node->input_pins[inport][j].port->is_non_clock_global);
2205  node_index =
2207  clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
2208  ipin++;
2209  }
2210  inport++;
2211  } else if (pb_type->ports[i].type == OUT_PORT) {
2212  for (j = 0; j < pb_type->ports[i].num_pins; j++) {
2213  iclass = clb[i_clb].type->pin_class[ipin];
2214  assert(clb[i_clb].type->class_inf[iclass].type == DRIVER);
2215  node_index =
2217  clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
2218  ipin++;
2219  }
2220  outport++;
2221  } else {
2222  assert(
2223  pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT);
2224  for (j = 0; j < pb_type->ports[i].num_pins; j++) {
2225  iclass = clb[i_clb].type->pin_class[ipin];
2226  assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER);
2227  assert(clb[i_clb].type->is_global_pin[ipin]);
2228  node_index =
2229  pb->pb_graph_node->clock_pins[clockport][j].pin_count_in_cluster;
2230  clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
2231  ipin++;
2232  }
2233  clockport++;
2234  }
2235  }
2236  }
2237 }
2238 
2239 /* TODO: Add more error checking, too light */
2240 /*****************************************/
2241 static void check_clustering(int num_clb, t_block *clb, boolean *is_clock) {
2242  int i;
2243  t_pb * cur_pb;
2244  boolean * blocks_checked;
2245 
2246  blocks_checked = (boolean*)my_calloc(num_logical_blocks, sizeof(boolean));
2247 
2248  /*
2249  * Check that each logical block connects to one primitive and that the primitive links up to the parent clb
2250  */
2251  for (i = 0; i < num_blocks; i++) {
2252  if (logical_block[i].pb->logical_block != i) {
2253  vpr_printf(TIO_MESSAGE_ERROR, "pb %s does not contain logical block %s but logical block %s #%d links to pb.\n",
2255  exit(1);
2256  }
2257  cur_pb = logical_block[i].pb;
2258  assert(strcmp(cur_pb->name, logical_block[i].name) == 0);
2259  while (cur_pb->parent_pb) {
2260  cur_pb = cur_pb->parent_pb;
2261  assert(cur_pb->name);
2262  }
2263  if (cur_pb != clb[num_clb].pb) {
2264  vpr_printf(TIO_MESSAGE_ERROR, "CLB %s does not match CLB contained by pb %s.\n",
2265  cur_pb->name, logical_block[i].pb->name);
2266  exit(1);
2267  }
2268  }
2269 
2270  /* Check that I do not have spurious links in children pbs */
2271  for (i = 0; i < num_clb; i++) {
2272  check_cluster_logical_blocks(clb[i].pb, blocks_checked);
2273  }
2274 
2275  for (i = 0; i < num_logical_blocks; i++) {
2276  if (blocks_checked[i] == FALSE) {
2277  vpr_printf(TIO_MESSAGE_ERROR, "Logical block %s #%d not found in any cluster.\n",
2278  logical_block[i].name, i);
2279  exit(1);
2280  }
2281  }
2282 
2283  free(blocks_checked);
2284 }
2285 
2286 /* TODO: May want to check that all logical blocks are actually reached (low priority, nice to have) */
2287 static void check_cluster_logical_blocks(t_pb *pb, boolean *blocks_checked) {
2288  int i, j;
2289  const t_pb_type *pb_type;
2290  boolean has_child;
2291 
2292  has_child = FALSE;
2293  pb_type = pb->pb_graph_node->pb_type;
2294  if (pb_type->num_modes == 0) {
2295  /* primitive */
2296  if (pb->logical_block != OPEN) {
2297  if (blocks_checked[pb->logical_block] != FALSE) {
2298  vpr_printf(TIO_MESSAGE_ERROR, "pb %s contains logical block %s #%d but logical block is already contained in another pb.\n",
2300  exit(1);
2301  }
2302  blocks_checked[pb->logical_block] = TRUE;
2303  if (pb != logical_block[pb->logical_block].pb) {
2304  vpr_printf(TIO_MESSAGE_ERROR, "pb %s contains logical block %s #%d but logical block does not link to pb.\n",
2306  exit(1);
2307  }
2308  }
2309  } else {
2310  /* this is a container pb, all container pbs must contain children */
2311  for (i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) {
2312  for (j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb;
2313  j++) {
2314  if (pb->child_pbs[i] != NULL) {
2315  if (pb->child_pbs[i][j].name != NULL) {
2316  has_child = TRUE;
2318  blocks_checked);
2319  }
2320  }
2321  }
2322  }
2323  assert(has_child);
2324  }
2325 }
2326 
2328  /* Do_timing_analysis must be called before this, or this function
2329  * will return garbage. Returns molecule with most critical block;
2330  * if block belongs to multiple molecules, return the biggest molecule. */
2331 
2332  int blkidx;
2333  t_pack_molecule *molecule, *best;
2334  struct s_linked_vptr *cur;
2335 
2336  while (*indexofcrit < num_logical_blocks) {
2337 
2338  blkidx = critindexarray[(*indexofcrit)++];
2339 
2340  if (logical_block[blkidx].clb_index == NO_CLUSTER) {
2341  cur = logical_block[blkidx].packed_molecules;
2342  best = NULL;
2343  while (cur != NULL) {
2344  molecule = (t_pack_molecule *) cur->data_vptr;
2345  if (molecule->valid) {
2346  if (best == NULL
2347  || (best->base_gain) < (molecule->base_gain)) {
2348  best = molecule;
2349  }
2350  }
2351  cur = cur->next;
2352  }
2353  assert(best != NULL);
2354  return best;
2355  }
2356  }
2357 
2358  /*if it makes it to here , there are no more blocks available*/
2359  return NULL;
2360 }
2361 
2362 /* get gain of packing molecule into current cluster
2363  gain is equal to total_block_gain + molecule_base_gain*some_factor - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor
2364 
2365  */
2366 static float get_molecule_gain(t_pack_molecule *molecule, std::map<int, float> &blk_gain) {
2367  float gain;
2368  int i, ipin, iport, inet, iblk;
2369  int num_introduced_inputs_of_indirectly_related_block;
2370  t_model_ports *cur;
2371 
2372  gain = 0;
2373  num_introduced_inputs_of_indirectly_related_block = 0;
2374  for (i = 0; i < get_array_size_of_molecule(molecule); i++) {
2375  if (molecule->logical_block_ptrs[i] != NULL) {
2376  if(blk_gain.count(molecule->logical_block_ptrs[i]->index) > 0) {
2377  gain += blk_gain[molecule->logical_block_ptrs[i]->index];
2378  } else {
2379  /* This block has no connection with current cluster, penalize molecule for having this block
2380  */
2381  cur = molecule->logical_block_ptrs[i]->model->inputs;
2382  iport = 0;
2383  while (cur != NULL) {
2384  if (cur->is_clock != TRUE) {
2385  for (ipin = 0; ipin < cur->size; ipin++) {
2386  inet =
2387  molecule->logical_block_ptrs[i]->input_nets[iport][ipin];
2388  if (inet != OPEN) {
2389  num_introduced_inputs_of_indirectly_related_block++;
2390  for (iblk = 0;
2391  iblk
2393  molecule); iblk++) {
2394  if (molecule->logical_block_ptrs[iblk]
2395  != NULL
2396  && vpack_net[inet].node_block[0]
2397  == molecule->logical_block_ptrs[iblk]->index) {
2398  num_introduced_inputs_of_indirectly_related_block--;
2399  break;
2400  }
2401  }
2402  }
2403  }
2404  iport++;
2405  }
2406  cur = cur->next;
2407  }
2408  }
2409  }
2410  }
2411 
2412  gain += molecule->base_gain * 0.0001; /* Use base gain as tie breaker TODO: need to sweep this value and perhaps normalize */
2413  gain -= num_introduced_inputs_of_indirectly_related_block * (0.001);
2414 
2415  return gain;
2416 }
2417 
2418 static int compare_molecule_gain(const void *a, const void *b) {
2419  float base_gain_a, base_gain_b, diff;
2420  const t_pack_molecule *molecule_a, *molecule_b;
2421  molecule_a = (*(const t_pack_molecule * const *) a);
2422  molecule_b = (*(const t_pack_molecule * const *) b);
2423 
2424  base_gain_a = molecule_a->base_gain;
2425  base_gain_b = molecule_b->base_gain;
2426  diff = base_gain_a - base_gain_b;
2427  if (diff > 0) {
2428  return 1;
2429  }
2430  if (diff < 0) {
2431  return -1;
2432  }
2433  return 0;
2434 }
2435 
2436 /* Determine if speculatively packed cur_pb is pin feasible
2437  * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the number of pb_graph pins. Can use hash tables or make incremental if becomes an issue.
2438  */
2439 static void try_update_lookahead_pins_used(t_pb *cur_pb) {
2440  int i, j;
2441  const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type;
2442 
2443  if (pb_type->num_modes > 0 && cur_pb->name != NULL) {
2444  if (cur_pb->child_pbs != NULL) {
2445  for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children;
2446  i++) {
2447  if (cur_pb->child_pbs[i] != NULL) {
2448  for (j = 0;
2449  j
2450  < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb;
2451  j++) {
2453  &cur_pb->child_pbs[i][j]);
2454  }
2455  }
2456  }
2457  }
2458  } else {
2459  if (pb_type->blif_model != NULL && cur_pb->logical_block != OPEN) {
2461  }
2462  }
2463 }
2464 
2465 /* Resets nets used at different pin classes for determining pin feasibility */
2466 static void reset_lookahead_pins_used(t_pb *cur_pb) {
2467  int i, j;
2468  const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type;
2469  if (cur_pb->pb_stats == NULL) {
2470  return; /* No pins used, no need to continue */
2471  }
2472 
2473  if (pb_type->num_modes > 0 && cur_pb->name != NULL) {
2474  for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
2475  for (j = 0;
2476  j
2477  < cur_pb->pb_graph_node->input_pin_class_size[i]
2479  j++) {
2480  cur_pb->pb_stats->lookahead_input_pins_used[i][j] = OPEN;
2481  }
2482  }
2483 
2484  for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) {
2485  for (j = 0;
2486  j
2487  < cur_pb->pb_graph_node->output_pin_class_size[i]
2489  j++) {
2490  cur_pb->pb_stats->lookahead_output_pins_used[i][j] = OPEN;
2491  }
2492  }
2493 
2494  if (cur_pb->child_pbs != NULL) {
2495  for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children;
2496  i++) {
2497  if (cur_pb->child_pbs[i] != NULL) {
2498  for (j = 0;
2499  j
2500  < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb;
2501  j++) {
2502  reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]);
2503  }
2504  }
2505  }
2506  }
2507  }
2508 }
2509 
2510 /* Determine if pins of speculatively packed pb are legal */
2511 static void compute_and_mark_lookahead_pins_used(int ilogical_block) {
2512  int i, j;
2513  t_pb *cur_pb;
2514  t_pb_graph_node *pb_graph_node;
2515  const t_pb_type *pb_type;
2516  t_port *prim_port;
2517 
2518  int input_port;
2519  int output_port;
2520  int clock_port;
2521 
2522  assert(logical_block[ilogical_block].pb != NULL);
2523 
2524  cur_pb = logical_block[ilogical_block].pb;
2525  pb_graph_node = cur_pb->pb_graph_node;
2526  pb_type = pb_graph_node->pb_type;
2527 
2528  /* Walk through inputs, outputs, and clocks marking pins off of the same class */
2529  /* TODO: This is inelegant design, I should change the primitive ports in pb_type to be input, output, or clock instead of this lookup */
2530  input_port = output_port = clock_port = 0;
2531  for (i = 0; i < pb_type->num_ports; i++) {
2532  prim_port = &pb_type->ports[i];
2533  if (prim_port->is_clock) {
2534  assert(prim_port->type == IN_PORT);
2535  assert(prim_port->num_pins == 1 && clock_port == 0);
2536  /* currently support only one clock for primitives */
2537  if (logical_block[ilogical_block].clock_net != OPEN) {
2539  &pb_graph_node->clock_pins[0][0], cur_pb,
2540  logical_block[ilogical_block].clock_net);
2541  }
2542  clock_port++;
2543  } else if (prim_port->type == IN_PORT) {
2544  for (j = 0; j < prim_port->num_pins; j++) {
2545  if (logical_block[ilogical_block].input_nets[prim_port->model_port->index][j]
2546  != OPEN) {
2548  &pb_graph_node->input_pins[input_port][j], cur_pb,
2549  logical_block[ilogical_block].input_nets[prim_port->model_port->index][j]);
2550  }
2551  }
2552  input_port++;
2553  } else if (prim_port->type == OUT_PORT) {
2554  for (j = 0; j < prim_port->num_pins; j++) {
2555  if (logical_block[ilogical_block].output_nets[prim_port->model_port->index][j]
2556  != OPEN) {
2558  &pb_graph_node->output_pins[output_port][j], cur_pb,
2559  logical_block[ilogical_block].output_nets[prim_port->model_port->index][j]);
2560  }
2561  }
2562  output_port++;
2563  } else {
2564  assert(0);
2565  }
2566  }
2567 }
2568 
2569 /* Given a pin and its assigned net, mark all pin classes that are affected */
2571  t_pb_graph_pin *pb_graph_pin, t_pb *primitive_pb, int inet) {
2572  int depth, i;
2573  int pin_class, output_port;
2574  t_pb * cur_pb;
2575  t_pb * check_pb;
2576  const t_pb_type *pb_type;
2577  t_port *prim_port;
2578  t_pb_graph_pin *output_pb_graph_pin;
2579  int count;
2580 
2581  boolean skip, found;
2582 
2583  cur_pb = primitive_pb->parent_pb;
2584 
2585  while (cur_pb) {
2586  depth = cur_pb->pb_graph_node->pb_type->depth;
2587  pin_class = pb_graph_pin->parent_pin_class[depth];
2588  assert(pin_class != OPEN);
2589 
2590  if (pb_graph_pin->port->type == IN_PORT) {
2591  /* find location of net driver if exist in clb, NULL otherwise */
2592  output_pb_graph_pin = NULL;
2593  if (logical_block[vpack_net[inet].node_block[0]].clb_index
2594  == logical_block[primitive_pb->logical_block].clb_index) {
2595  pb_type =
2597  output_port = 0;
2598  found = FALSE;
2599  for (i = 0; i < pb_type->num_ports && !found; i++) {
2600  prim_port = &pb_type->ports[i];
2601  if (prim_port->type == OUT_PORT) {
2602  if (pb_type->ports[i].model_port->index
2603  == vpack_net[inet].node_block_port[0]) {
2604  found = TRUE;
2605  break;
2606  }
2607  output_port++;
2608  }
2609  }
2610  assert(found);
2611  output_pb_graph_pin =
2612  &(logical_block[vpack_net[inet].node_block[0]].pb->pb_graph_node->output_pins[output_port][vpack_net[inet].node_block_pin[0]]);
2613  }
2614 
2615  skip = FALSE;
2616 
2617  /* check if driving pin for input is contained within the currently investigated cluster, if yes, do nothing since no input needs to be used */
2618  if (output_pb_graph_pin != NULL) {
2619  check_pb = logical_block[vpack_net[inet].node_block[0]].pb;
2620  while (check_pb != NULL && check_pb != cur_pb) {
2621  check_pb = check_pb->parent_pb;
2622  }
2623  if (check_pb != NULL) {
2624  for (i = 0;
2625  skip == FALSE
2626  && i
2627  < output_pb_graph_pin->num_connectable_primtive_input_pins[depth];
2628  i++) {
2629  if (pb_graph_pin
2630  == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) {
2631  skip = TRUE;
2632  }
2633  }
2634  }
2635  }
2636 
2637  /* Must use input pin */
2638  if (!skip) {
2639  /* Check if already in pin class, if yes, skip */
2640  skip = FALSE;
2641  for (i = 0;
2642  i
2643  < cur_pb->pb_graph_node->input_pin_class_size[pin_class]
2645  i++) {
2646  if (cur_pb->pb_stats->lookahead_input_pins_used[pin_class][i]
2647  == inet) {
2648  skip = TRUE;
2649  }
2650  }
2651  if (!skip) {
2652  /* Net must take up a slot */
2653  for (i = 0;
2654  i
2655  < cur_pb->pb_graph_node->input_pin_class_size[pin_class]
2657  i++) {
2658  if (cur_pb->pb_stats->lookahead_input_pins_used[pin_class][i]
2659  == OPEN) {
2660  cur_pb->pb_stats->lookahead_input_pins_used[pin_class][i] =
2661  inet;
2662  break;
2663  }
2664  }
2665  }
2666  }
2667  } else {
2668  assert(pb_graph_pin->port->type == OUT_PORT);
2669 
2670  skip = FALSE;
2671  if (pb_graph_pin->num_connectable_primtive_input_pins[depth]
2672  >= vpack_net[inet].num_sinks) {
2673  /* Important: This runtime penalty looks a lot scarier than it really is. For high fan-out nets, I at most look at the number of pins within the cluster which limits runtime.
2674  DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!!
2675 
2676  Key Observation:
2677  For LUT-based designs it is impossible for the average fanout to exceed the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, if the average fanout is greater than the
2678  number of LUT inputs, where do the extra connections go? Therefore, average fanout must be capped to a small constant where the constant is equal to the number of LUT inputs). The real danger to runtime
2679  is when the number of sinks of a net gets doubled
2680 
2681  */
2682  for (i = 1; i <= vpack_net[inet].num_sinks; i++) {
2683  if (logical_block[vpack_net[inet].node_block[i]].clb_index
2684  != logical_block[vpack_net[inet].node_block[0]].clb_index) {
2685  break;
2686  }
2687  }
2688  if (i == vpack_net[inet].num_sinks + 1) {
2689  count = 0;
2690  /* TODO: I should cache the absorbed outputs, once net is absorbed, net is forever absorbed, no point in rechecking every time */
2691  for (i = 0;
2692  i
2693  < pb_graph_pin->num_connectable_primtive_input_pins[depth];
2694  i++) {
2696  pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i])
2697  == inet) {
2698  count++;
2699  }
2700  }
2701  if (count == vpack_net[inet].num_sinks) {
2702  skip = TRUE;
2703  }
2704  }
2705  }
2706 
2707  if (!skip) {
2708  /* This output must exit this cluster */
2709  for (i = 0;
2710  i
2711  < cur_pb->pb_graph_node->output_pin_class_size[pin_class]
2713  i++) {
2714  assert(
2715  cur_pb->pb_stats->lookahead_output_pins_used[pin_class][i] != inet);
2716  if (cur_pb->pb_stats->lookahead_output_pins_used[pin_class][i]
2717  == OPEN) {
2718  cur_pb->pb_stats->lookahead_output_pins_used[pin_class][i] =
2719  inet;
2720  break;
2721  }
2722  }
2723  }
2724  }
2725 
2726  cur_pb = cur_pb->parent_pb;
2727  }
2728 }
2729 
2730 /* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */
2731 static boolean check_lookahead_pins_used(t_pb *cur_pb) {
2732  int i, j;
2733  int ipin;
2734  const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type;
2735  boolean success;
2736 
2737  success = TRUE;
2738 
2739  if (pb_type->num_modes > 0 && cur_pb->name != NULL) {
2740  for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class && success;
2741  i++) {
2742  ipin = 0;
2743  for (j = 0;
2744  j
2745  < cur_pb->pb_graph_node->input_pin_class_size[i]
2747  j++) {
2748  if (cur_pb->pb_stats->lookahead_input_pins_used[i][j] != OPEN) {
2749  ipin++;
2750  }
2751  }
2752  if (ipin > cur_pb->pb_graph_node->input_pin_class_size[i]) {
2753  success = FALSE;
2754  }
2755  }
2756 
2757  for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class && success;
2758  i++) {
2759  ipin = 0;
2760  for (j = 0;
2761  j
2762  < cur_pb->pb_graph_node->output_pin_class_size[i]
2764  j++) {
2765  if (cur_pb->pb_stats->lookahead_output_pins_used[i][j] != OPEN) {
2766  ipin++;
2767  }
2768  }
2769  if (ipin > cur_pb->pb_graph_node->output_pin_class_size[i]) {
2770  success = FALSE;
2771  }
2772  }
2773 
2774  if (success && cur_pb->child_pbs != NULL) {
2775  for (i = 0;
2776  success
2777  && i
2778  < pb_type->modes[cur_pb->mode].num_pb_type_children;
2779  i++) {
2780  if (cur_pb->child_pbs[i] != NULL) {
2781  for (j = 0;
2782  success
2783  && j
2784  < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb;
2785  j++) {
2786  success = check_lookahead_pins_used(
2787  &cur_pb->child_pbs[i][j]);
2788  }
2789  }
2790  }
2791  }
2792  }
2793  return success;
2794 }
2795 
2796 /* Speculation successful, commit input/output pins used */
2797 static void commit_lookahead_pins_used(t_pb *cur_pb) {
2798  int i, j;
2799  int ipin;
2800  const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type;
2801 
2802  if (pb_type->num_modes > 0 && cur_pb->name != NULL) {
2803  for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
2804  ipin = 0;
2805  for (j = 0;
2806  j
2807  < cur_pb->pb_graph_node->input_pin_class_size[i]
2809  j++) {
2810  if (cur_pb->pb_stats->lookahead_input_pins_used[i][j] != OPEN) {
2811  cur_pb->pb_stats->input_pins_used[i][ipin] =
2812  cur_pb->pb_stats->lookahead_input_pins_used[i][j];
2813  ipin++;
2814  }
2815  assert(ipin <= cur_pb->pb_graph_node->input_pin_class_size[i]);
2816  }
2817  }
2818 
2819  for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) {
2820  ipin = 0;
2821  for (j = 0;
2822  j
2823  < cur_pb->pb_graph_node->output_pin_class_size[i]
2825  j++) {
2826  if (cur_pb->pb_stats->lookahead_output_pins_used[i][j] != OPEN) {
2827  cur_pb->pb_stats->output_pins_used[i][ipin] =
2828  cur_pb->pb_stats->lookahead_output_pins_used[i][j];
2829  ipin++;
2830  }
2831  assert(ipin <= cur_pb->pb_graph_node->output_pin_class_size[i]);
2832  }
2833  }
2834 
2835  if (cur_pb->child_pbs != NULL) {
2836  for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children;
2837  i++) {
2838  if (cur_pb->child_pbs[i] != NULL) {
2839  for (j = 0;
2840  j
2841  < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb;
2842  j++) {
2843  commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]);
2844  }
2845  }
2846  }
2847  }
2848  }
2849 }
2850 
2851 /* determine net at given pin location for cluster, return OPEN if none exists */
2853  t_pb_graph_pin *pb_graph_pin) {
2854  t_pb_graph_node *pb_graph_node;
2855  int i;
2856  t_model_ports *model_port;
2857  int ilogical_block;
2858 
2859  if (cur_pb->name == NULL) {
2860  return OPEN;
2861  }
2862  if (cur_pb->pb_graph_node->pb_type->num_modes != 0) {
2863  pb_graph_node = pb_graph_pin->parent_node;
2864  while (pb_graph_node->parent_pb_graph_node->pb_type->depth
2865  > cur_pb->pb_graph_node->pb_type->depth) {
2866  pb_graph_node = pb_graph_node->parent_pb_graph_node;
2867  }
2868  if (pb_graph_node->parent_pb_graph_node == cur_pb->pb_graph_node) {
2869  if (cur_pb->mode != pb_graph_node->pb_type->parent_mode->index) {
2870  return OPEN;
2871  }
2872  for (i = 0;
2873  i
2874  < cur_pb->pb_graph_node->pb_type->modes[cur_pb->mode].num_pb_type_children;
2875  i++) {
2876  if (pb_graph_node
2877  == &cur_pb->pb_graph_node->child_pb_graph_nodes[cur_pb->mode][i][pb_graph_node->placement_index]) {
2878  break;
2879  }
2880  }
2881  assert(
2882  i < cur_pb->pb_graph_node->pb_type->modes[cur_pb->mode].num_pb_type_children);
2884  &cur_pb->child_pbs[i][pb_graph_node->placement_index],
2885  pb_graph_pin);
2886  } else {
2887  return OPEN;
2888  }
2889  } else {
2890  ilogical_block = cur_pb->logical_block;
2891  if (ilogical_block == OPEN) {
2892  return OPEN;
2893  } else {
2894  model_port = pb_graph_pin->port->model_port;
2895  if (model_port->is_clock) {
2896  assert(model_port->dir == IN_PORT);
2897  return logical_block[ilogical_block].clock_net;
2898  } else if (model_port->dir == IN_PORT) {
2899  return logical_block[ilogical_block].input_nets[model_port->index][pb_graph_pin->pin_number];
2900  } else {
2901  assert(model_port->dir == OUT_PORT);
2902  return logical_block[ilogical_block].output_nets[model_port->index][pb_graph_pin->pin_number];
2903  }
2904  }
2905  }
2906 }
2907 
2908 static void print_block_criticalities(const char * fname) {
2909  /* Prints criticality and critindexarray for each logical block to a file. */
2910 
2911  int iblock, len;
2912  FILE * fp;
2913  char * name;
2914 
2915  fp = my_fopen(fname, "w", 0);
2916  fprintf(fp, "Index \tLogical block name \tCriticality \tCritindexarray\n\n");
2917  for (iblock = 0; iblock < num_logical_blocks; iblock++) {
2918  name = logical_block[iblock].name;
2919  len = strlen(name);
2920  fprintf(fp, "%d\t%s\t", logical_block[iblock].index, name);
2921  if (len < 8) {
2922  fprintf(fp, "\t\t");
2923  } else if (len < 16) {
2924  fprintf(fp, "\t");
2925  }
2926  fprintf(fp, "%f\t%d\n", block_criticality[iblock], critindexarray[iblock]);
2927  }
2928  fclose(fp);
2929 }
int * node_block_pin
Definition: vpr_types.h:509
boolean is_clock
int get_max_depth_of_pb_type(t_pb_type *pb_type)
Definition: vpr_utils.c:222
std::map< int, float > sharinggain
Definition: vpr_types.h:134
void set_mode_cluster_placement_stats(INP t_pb_graph_node *pb_graph_node, int mode)
#define AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE
Definition: cluster.c:34
static void commit_lookahead_pins_used(t_pb *cur_pb)
Definition: cluster.c:2797
char * name
Definition: vpr_types.h:179
t_pb_graph_pin ** clock_pins
static void try_update_lookahead_pins_used(t_pb *cur_pb)
Definition: cluster.c:2439
enum e_pb_type_class class_type
FILE * my_fopen(const char *fname, const char *flag, int prompt)
Definition: util.c:54
#define AAPACK_MAX_HIGH_FANOUT_EXPLORE
Definition: cluster.c:36
struct s_pb * parent_pb
Definition: vpr_types.h:186
struct s_pb ** child_pbs
Definition: vpr_types.h:185
e_net_relation_to_clustered_block
Definition: cluster.c:66
int ** output_pins_used
Definition: vpr_types.h:159
static int * critindexarray
Definition: cluster.c:108
int num_pins
t_model_ports * model_port
void freeGrid()
Definition: SetupGrid.c:130
int ** input_nets
Definition: vpr_types.h:211
struct s_pb_type * pb_type_children
void free_cluster_legality_checker(void)
static void check_clustering(int num_clb, t_block *clb, boolean *is_clock)
Definition: cluster.c:2241
int tie_break_high_fanout_net
Definition: vpr_types.h:151
#define AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC
Definition: cluster.c:31
t_rr_node * rr_node
Definition: globals.c:70
void heapsort(int *sort_index, float *sort_values, int nelem, int start_index)
Definition: heapsort.c:13
static boolean check_lookahead_pins_used(t_pb *cur_pb)
Definition: cluster.c:2731
int net_num
Definition: vpr_types.h:917
struct s_pb_graph_node * parent_pb_graph_node
int get_max_nets_in_pb_type(const t_pb_type *pb_type)
Definition: vpr_utils.c:196
void free_legalizer_for_cluster(INP t_block *clb, boolean free_local_rr_graph)
int * output_pin_class_size
enum PORTS dir
Definition: logic_types.h:22
t_type_ptr EMPTY_TYPE
Definition: globals.c:41
int num_tnodes
Definition: path_delay.c:144
#define nint(a)
Definition: util.h:24
boolean
Definition: util.h:11
static boolean primitive_type_and_memory_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk)
Definition: cluster.c:869
#define MARKED_FRAC
Definition: cluster.c:86
t_mode * modes
t_model * models
#define SCALE_DISTANCE_VAL
Definition: cluster.c:47
struct s_pack_molecule ** feasible_blocks
Definition: vpr_types.h:167
void free_pb_stats(t_pb *pb)
Definition: vpr_utils.c:614
static t_pack_molecule * get_molecule_for_cluster(INP enum e_packer_algorithm packer_algorithm, INP t_pb *cur_pb, INP boolean allow_unrelated_clustering, INOUTP int *num_unrelated_clustering_attempts, INP t_cluster_placement_stats *cluster_placement_stats_ptr)
static void compute_and_mark_lookahead_pins_used(int ilogical_block)
Definition: cluster.c:2511
int get_max_primitives_in_pb_type(t_pb_type *pb_type)
Definition: vpr_utils.c:173
std::map< int, int > num_pins_of_net_in_pb
Definition: vpr_types.h:155
int num_marked_blocks
Definition: vpr_types.h:148
void restore_routing_cluster(void)
static void start_new_cluster(INP t_cluster_placement_stats *cluster_placement_stats, INP t_pb_graph_node **primitives_list, INP const t_arch *arch, INOUTP t_block *new_cluster, INP int clb_index, INP t_pack_molecule *molecule, INP float aspect, INOUTP int *num_used_instances_type, INOUTP int *num_instances_type, INP int num_models, INP int max_cluster_size, INP int max_nets_in_pb_type, INP int detailed_routing_stage)
static void update_timing_gain_values(int inet, int clustered_block, t_pb *cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, t_slack *slacks)
Definition: cluster.c:1523
static void check_cluster_logical_blocks(t_pb *pb, boolean *blocks_checked)
Definition: cluster.c:2287
void free_cluster_placement_stats(INOUTP t_cluster_placement_stats *cluster_placement_stats_list)
int unclustered_list_head_size
Definition: cluster.c:93
Definition: cluster.c:67
static t_pack_molecule * get_seed_logical_molecule_with_most_ext_inputs(int max_molecule_inputs)
Definition: cluster.c:1055
void * my_calloc(size_t nelem, size_t size)
Definition: util.c:132
struct s_model_ports * next
Definition: logic_types.h:28
void free_cb(t_pb *pb)
Definition: vpr_utils.c:508
enum PORTS type
static int * net_output_feeds_driving_block_input
Definition: cluster.c:103
e_feasibility
Definition: cluster.c:56
int * node_block
Definition: vpr_types.h:507
struct s_linked_vptr * packed_molecules
Definition: vpr_types.h:228
static void revert_place_logical_block(INP int ilogical_block, INP int max_models)
Definition: cluster.c:1412
int num_logical_nets
Definition: globals.c:17
static int get_net_corresponding_to_pb_graph_pin(t_pb *cur_pb, t_pb_graph_pin *pb_graph_pin)
Definition: cluster.c:2852
void do_timing_analysis(t_slack *slacks, boolean is_prepacked, boolean do_lut_input_balancing, boolean is_final_analysis)
Definition: path_delay.c:1613
#define AAPACK_MAX_NET_SINKS_IGNORE
Definition: cluster.c:35
e_cluster_seed
Definition: vpr_types.h:112
void print_clustering_timing_info(const char *fname)
Definition: path_delay.c:696
char * blif_model
e_gain_update
Definition: cluster.c:53
void commit_primitive(INOUTP t_cluster_placement_stats *cluster_placement_stats, INP t_pb_graph_node *primitive)
t_pb_graph_pin ** output_pins
float ** slack
Definition: vpr_types.h:405
t_type_ptr type
Definition: vpr_types.h:561
boolean is_clock
Definition: logic_types.h:26
#define MAX_SHORT
Definition: vpr_types.h:75
void reset_legalizer_for_cluster(t_block *clb)
void output_blif(t_block *clb, int num_clusters, boolean global_clocks, boolean *is_clock, const char *out_fname, boolean skip_clustering)
Definition: output_blif.c:489
t_cluster_placement_stats * alloc_and_load_cluster_placement_stats(void)
#define INOUTP
Definition: util.h:21
int num_blocks
Definition: globals.c:30
#define AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST
Definition: cluster.c:32
#define UNDEFINED
Definition: vpr_types.h:103
boolean getEchoEnabled(void)
Definition: ReadOptions.c:67
int ** input_pins_used
Definition: vpr_types.h:158
static void reset_lookahead_pins_used(t_pb *cur_pb)
Definition: cluster.c:2466
char * name
Definition: vpr_types.h:560
t_prepacked_tnode_data * prepacked_data
Definition: vpr_types.h:361
Definition: util.h:12
e_gain_type
Definition: cluster.c:59
static struct s_molecule_link * unclustered_list_head
Definition: cluster.c:92
static void alloc_and_init_clustering(boolean global_clocks, float alpha, float beta, int max_cluster_size, int max_molecule_inputs, int max_pb_depth, int max_models, t_cluster_placement_stats **cluster_placement_stats, t_pb_graph_node ***primitives_list, t_pack_molecule *molecules_head, int num_molecules)
Definition: cluster.c:715
void alloc_and_load_grid(INOUTP int *num_instances_type)
Definition: SetupGrid.c:22
static t_pack_molecule * get_molecule_by_num_ext_inputs(INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, INP int ext_inps, INP enum e_removal_policy remove_flag, INP t_cluster_placement_stats *cluster_placement_stats_ptr)
Definition: cluster.c:958
t_mode * parent_mode
void setup_intracluster_routing_for_molecule(INP t_pack_molecule *molecule, INP t_pb_graph_node **primitive_list)
static float * block_criticality
Definition: cluster.c:107
boolean try_breadth_first_route_cluster(void)
#define SCALE_NUM_PATHS
Definition: cluster.c:38
static void check_clocks(boolean *is_clock)
Definition: cluster.c:619
void save_and_reset_routing_cluster(void)
void set_pb_graph_mode(t_pb_graph_node *pb_graph_node, int mode, int isOn)
e_removal_policy
Definition: cluster.c:62
std::map< int, float > hillgain
Definition: vpr_types.h:142
t_slack * alloc_and_load_pre_packing_timing_graph(float block_delay, float inter_cluster_net_delay, t_model *models, t_timing_inf timing_inf)
Definition: path_delay.c:288
int block
Definition: vpr_types.h:346
int ** lookahead_output_pins_used
Definition: vpr_types.h:162
static void free_pb_stats_recursive(t_pb *pb)
Definition: cluster.c:805
static void * my_malloc(int ibytes)
Definition: graphics.c:499
int get_array_size_of_molecule(t_pack_molecule *molecule)
boolean * is_global
t_tnode * tnode
Definition: path_delay.c:143
void output_clustering(t_block *clb, int num_clusters, boolean global_clocks, boolean *is_clock, char *out_fname, boolean skip_clustering)
int * num_connectable_primtive_input_pins
static t_pack_molecule * get_highest_gain_molecule(INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, INP enum e_gain_type gain_mode, INP t_cluster_placement_stats *cluster_placement_stats_ptr)
Definition: cluster.c:1988
int num_child_blocks_in_pb
Definition: vpr_types.h:149
static t_pack_molecule * get_most_critical_seed_molecule(int *indexofcrit)
Definition: cluster.c:2327
#define NO_CLUSTER
Definition: vpr_types.h:98
static void alloc_and_load_pb_stats(t_pb *pb, int max_models, int max_nets_in_pb_type)
Definition: cluster.c:1082
#define INP
Definition: util.h:19
int nx
Definition: globals.c:46
static void update_connection_gain_values(int inet, int clustered_block, t_pb *cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block)
Definition: cluster.c:1448
void reset_cluster_placement_stats(INOUTP t_cluster_placement_stats *cluster_placement_stats)
boolean has_valid_normalized_T_arr(int inode)
Definition: path_delay.c:3054
static enum e_block_pack_status try_pack_molecule(INOUTP t_cluster_placement_stats *cluster_placement_stats_ptr, INP t_pack_molecule *molecule, INOUTP t_pb_graph_node **primitives_list, INOUTP t_pb *pb, INP int max_models, INP int max_cluster_size, INP int clb_index, INP int max_nets_in_pb_type, INP int detailed_routing_stage)
Definition: cluster.c:1174
struct s_pb_graph_node * parent_node
static void print_block_criticalities(const char *fname)
Definition: cluster.c:2908
void reset_tried_but_unused_cluster_placements(INOUTP t_cluster_placement_stats *cluster_placement_stats)
int logical_block
Definition: vpr_types.h:181
static void add_molecule_to_pb_stats_candidates(t_pack_molecule *molecule, std::map< int, float > &gain, t_pb *pb)
Definition: cluster.c:664
t_model_ports * inputs
Definition: logic_types.h:35
t_model_ports * outputs
Definition: logic_types.h:36
boolean isEchoFileEnabled(enum e_echo_files echo_option)
Definition: ReadOptions.c:115
e_block_pack_status
Definition: vpr_types.h:116
void do_clustering(const t_arch *arch, t_pack_molecule *molecule_head, int num_models, boolean global_clocks, boolean *is_clock, boolean hill_climbing_flag, char *out_fname, boolean timing_driven, enum e_cluster_seed cluster_seed_type, float alpha, float beta, int recompute_timing_after, float block_delay, float intra_cluster_net_delay, float inter_cluster_net_delay, float aspect, boolean allow_unrelated_clustering, boolean allow_early_exit, boolean connection_driven, enum e_packer_algorithm packer_algorithm, t_timing_inf timing_inf)
Definition: cluster.c:232
std::map< int, float > connectiongain
Definition: vpr_types.h:132
boolean exists_free_primitive_for_logical_block(INOUTP t_cluster_placement_stats *cluster_placement_stats, INP int ilogical_block)
struct s_pb_graph_node *** child_pb_graph_nodes
struct s_linked_vptr * next
Definition: util.h:36
int num_pb_type_children
struct s_pb_graph_pin *** list_of_connectable_input_pin_ptrs
std::map< int, float > gain
Definition: vpr_types.h:128
struct s_pack_molecule * next
Definition: vpr_types.h:257
static char * model
Definition: read_blif.c:45
void * data_vptr
Definition: util.h:35
struct s_pb_stats t_pb_stats
static int compare_molecule_gain(const void *a, const void *b)
Definition: cluster.c:2418
void print_criticality(t_slack *slacks, boolean criticality_is_normalized, const char *fname)
Definition: path_delay.c:559
static t_pack_molecule * get_free_molecule_with_most_ext_inputs_for_cluster(INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, INP t_cluster_placement_stats *cluster_placement_stats_ptr)
Definition: cluster.c:1012
struct s_pb_type * pb_type
static struct s_molecule_link * memory_pool
Definition: cluster.c:94
int num_types
Definition: globals.c:37
char * name
Definition: logic_types.h:34
float ** timing_criticality
Definition: vpr_types.h:406
static void compute_and_mark_lookahead_pins_used_for_pin(t_pb_graph_pin *pb_graph_pin, t_pb *primitive_pb, int inet)
Definition: cluster.c:2570
Definition: cluster.c:54
std::map< int, float > timinggain
Definition: vpr_types.h:130
#define NOT_VALID
Definition: vpr_types.h:100
int num_feasible_blocks
Definition: vpr_types.h:168
t_port * ports
static boolean is_logical_blk_in_pb(int iblk, t_pb *pb)
Definition: cluster.c:651
Definition: cluster.c:67
int num_logical_blocks
Definition: globals.c:17
Definition: slre.c:50
void print_slack(float **slack, boolean slack_is_normalized, const char *fname)
Definition: path_delay.c:441
int * input_pin_class_size
int * marked_nets
Definition: vpr_types.h:147
void print_timing_graph(const char *fname)
Definition: path_delay.c:1388
e_detailed_routing_stages
Definition: cluster.c:70
static float get_molecule_gain(t_pack_molecule *molecule, std::map< int, float > &blk_gain)
Definition: cluster.c:2366
struct s_type_descriptor * type_descriptors
Definition: globals.c:38
struct s_net * vpack_net
Definition: globals.c:19
char * getEchoFileName(enum e_echo_files echo_option)
Definition: ReadOptions.c:122
t_logical_block ** logical_block_ptrs
Definition: vpr_types.h:248
#define OUTP
Definition: util.h:20
void free_pb(t_pb *pb)
Definition: vpr_utils.c:529
static void mark_and_update_partial_gain(int inet, enum e_gain_update gain_flag, int clustered_block, int port_on_clustered_block, int pin_on_clustered_block, boolean timing_driven, boolean connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, t_slack *slacks)
Definition: cluster.c:1588
int ** lookahead_input_pins_used
Definition: vpr_types.h:161
static void alloc_and_load_cluster_info(INP int num_clb, INOUTP t_block *clb)
Definition: cluster.c:2176
int num_marked_nets
Definition: vpr_types.h:148
int mode
Definition: vpr_types.h:183
struct s_pb_stats * pb_stats
Definition: vpr_types.h:190
int num_ext_inputs_logical_block(int iblk)
Definition: vpr_utils.c:458
t_model * model
Definition: vpr_types.h:209
void alloc_and_load_legalizer_for_cluster(INP t_block *clb, INP int clb_index, INP const t_arch *arch)
int ny
Definition: globals.c:47
char * my_strdup(const char *str)
Definition: util.c:101
e_packer_algorithm
Definition: vpr_types.h:590
int ** output_nets
Definition: vpr_types.h:212
void save_cluster_solution(void)
messagelogger vpr_printf
Definition: util.c:17
t_pb_graph_node * pb_graph_node
Definition: vpr_types.h:180
void free_timing_graph(t_slack *slacks)
Definition: path_delay.c:390
char * port_class
int num_sinks
Definition: vpr_types.h:506
boolean is_non_clock_global
static enum e_block_pack_status try_place_logical_block_rec(INP t_pb_graph_node *pb_graph_node, INP int ilogical_block, INP t_pb *cb, OUTP t_pb **parent, INP int max_models, INP int max_cluster_size, INP int clb_index, INP int max_nets_in_pb_type, INP t_cluster_placement_stats *cluster_placement_stats_ptr, INP boolean is_root_of_chain, INP t_pb_graph_pin *chain_root_pin)
Definition: cluster.c:1293
boolean valid
Definition: vpr_types.h:249
void alloc_and_load_cluster_legality_checker(void)
static boolean primitive_feasible(int iblk, t_pb *cur_pb)
Definition: cluster.c:831
boolean get_next_primitive_list(INOUTP t_cluster_placement_stats *cluster_placement_stats, INP t_pack_molecule *molecule, INOUTP t_pb_graph_node **primitives_list, INP int clb_index)
int * node_block_port
Definition: vpr_types.h:508
t_pb_graph_pin ** input_pins
struct s_logical_block * logical_block
Definition: globals.c:20
Definition: util.h:12
int * marked_blocks
Definition: vpr_types.h:147
static void update_cluster_stats(INP t_pack_molecule *molecule, INP int clb_index, INP boolean *is_clock, INP boolean global_clocks, INP float alpha, INP float beta, INP boolean timing_driven, INP boolean connection_driven, INP t_slack *slacks)
Definition: cluster.c:1774
static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean connection_driven, boolean global_clocks, t_pb *pb)
Definition: cluster.c:1684