VPR-7.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
route_timing.c
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <math.h>
3 #include <time.h>
4 #include <assert.h>
5 #include "util.h"
6 #include "vpr_types.h"
7 #include "globals.h"
8 #include "route_export.h"
9 #include "route_common.h"
10 #include "route_tree_timing.h"
11 #include "route_timing.h"
12 #include "heapsort.h"
13 #include "path_delay.h"
14 #include "net_delay.h"
15 #include "stats.h"
16 #include "ReadOptions.h"
17 
18 /******************** Subroutines local to route_timing.c ********************/
19 
20 static int get_max_pins_per_net(void);
21 
22 static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
23  float target_criticality, float astar_fac);
24 
25 static void timing_driven_expand_neighbours(struct s_heap *current, int inet,
26  float bend_cost, float criticality_fac, int target_node,
27  float astar_fac, int highfanout_rlim);
28 
29 static float get_timing_driven_expected_cost(int inode, int target_node,
30  float criticality_fac, float R_upstream);
31 
32 static int get_expected_segs_to_target(int inode, int target_node,
33  int *num_segs_ortho_dir_ptr);
34 
35 static void update_rr_base_costs(int inet, float largest_criticality);
36 
37 static void timing_driven_check_net_delays(float **net_delay);
38 
39 static int mark_node_expansion_by_bin(int inet, int target_node,
40  t_rt_node * rt_node);
41 
42 /************************ Subroutine definitions *****************************/
43 
44 boolean try_timing_driven_route(struct s_router_opts router_opts,
45  float **net_delay, t_slack * slacks, t_ivec ** clb_opins_used_locally, boolean timing_analysis_enabled) {
46 
47  /* Timing-driven routing algorithm. The timing graph (includes slack) *
48  * must have already been allocated, and net_delay must have been allocated. *
49  * Returns TRUE if the routing succeeds, FALSE otherwise. */
50 
51  int itry, inet, ipin, i, bends, wirelength, total_wirelength, available_wirelength,
52  segments, *net_index, *sink_order /* [1..max_pins_per_net-1] */;
53  boolean success, is_routable, rip_up_local_opins;
54  float *pin_criticality /* [1..max_pins_per_net-1] */, pres_fac, *sinks,
55  critical_path_delay, init_timing_criticality_val;
56  t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1] */
57  clock_t begin,end;
58  sinks = (float*)my_malloc(sizeof(float) * num_nets);
59  net_index = (int*)my_malloc(sizeof(int) * num_nets);
60 
61  for (i = 0; i < num_nets; i++) {
62  sinks[i] = clb_net[i].num_sinks;
63  net_index[i] = i;
64  }
65  heapsort(net_index, sinks, num_nets, 1);
66 
67  alloc_timing_driven_route_structs(&pin_criticality, &sink_order,
68  &rt_node_of_sink);
69 
70  /* First do one routing iteration ignoring congestion to
71  get reasonable net delay estimates. Set criticalities to 1
72  when timing analysis is on to optimize timing, and to 0
73  when timing analysis is off to optimize routability. */
74 
75  if (timing_analysis_enabled) {
76  init_timing_criticality_val = 1.;
77  } else {
78  init_timing_criticality_val = 0.;
79  }
80 
81  for (inet = 0; inet < num_nets; inet++) {
82  if (clb_net[inet].is_global == FALSE) {
83  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
84  slacks->timing_criticality[inet][ipin] = init_timing_criticality_val;
85 #ifdef PATH_COUNTING
86  slacks->path_criticality[inet][ipin] = init_timing_criticality_val;
87 #endif
88  } else {
89  /* Set delay of global signals to zero. Non-global net
90  delays are set by update_net_delays_from_route_tree()
91  inside timing_driven_route_net(), which is only called
92  for non-global nets. */
93  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
94  net_delay[inet][ipin] = 0.;
95  }
96  }
97  }
98 
99  pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */
100 
101  for (itry = 1; itry <= router_opts.max_router_iterations; itry++) {
102  begin = clock();
103  vpr_printf(TIO_MESSAGE_INFO, "\n");
104  vpr_printf(TIO_MESSAGE_INFO, "Routing iteration: %d\n", itry);
105 
106  for (i = 0; i < num_nets; i++) {
107  inet = net_index[i];
108  if (clb_net[inet].is_global == FALSE) { /* Skip global nets. */
109 
110  is_routable = timing_driven_route_net(inet, pres_fac,
111  router_opts.max_criticality,
112  router_opts.criticality_exp, router_opts.astar_fac,
113  router_opts.bend_cost, pin_criticality,
114  sink_order, rt_node_of_sink, net_delay[inet], slacks);
115 
116  /* Impossible to route? (disconnected rr_graph) */
117 
118  if (!is_routable) {
119  vpr_printf(TIO_MESSAGE_INFO, "Routing failed.\n");
120  free_timing_driven_route_structs(pin_criticality,
121  sink_order, rt_node_of_sink);
122  free(net_index);
123  free(sinks);
124  return (FALSE);
125  }
126  }
127  }
128 
129  if (itry == 1) {
130  /* Early exit code for cases where it is obvious that a successful route will not be found
131  Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit
132  */
133  total_wirelength = 0;
134  available_wirelength = 0;
135 
136  for (i = 0; i < num_rr_nodes; i++) {
137  if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
138  available_wirelength += 1 + rr_node[i].xhigh
139  - rr_node[i].xlow + rr_node[i].yhigh
140  - rr_node[i].ylow;
141  }
142  }
143 
144  for (inet = 0; inet < num_nets; inet++) {
145  if (clb_net[inet].is_global == FALSE
146  && clb_net[inet].num_sinks != 0) { /* Globals don't count. */
147  get_num_bends_and_length(inet, &bends, &wirelength,
148  &segments);
149 
150  total_wirelength += wirelength;
151  }
152  }
153  vpr_printf(TIO_MESSAGE_INFO, "Wire length after first iteration %d, total available wire length %d, ratio %g\n",
154  total_wirelength, available_wirelength,
155  (float) (total_wirelength) / (float) (available_wirelength));
156  if ((float) (total_wirelength) / (float) (available_wirelength)> FIRST_ITER_WIRELENTH_LIMIT) {
157  vpr_printf(TIO_MESSAGE_INFO, "Wire length usage ratio exceeds limit of %g, fail routing.\n",
159  free_timing_driven_route_structs(pin_criticality, sink_order,
160  rt_node_of_sink);
161  free(net_index);
162  free(sinks);
163  return FALSE;
164  }
165  }
166 
167  /* Make sure any CLB OPINs used up by subblocks being hooked directly *
168  * to them are reserved for that purpose. */
169 
170  if (itry == 1)
171  rip_up_local_opins = FALSE;
172  else
173  rip_up_local_opins = TRUE;
174 
175  reserve_locally_used_opins(pres_fac, rip_up_local_opins,
176  clb_opins_used_locally);
177 
178  /* Pathfinder guys quit after finding a feasible route. I may want to keep *
179  * going longer, trying to improve timing. Think about this some. */
180 
181  success = feasible_routing();
182  if (success) {
183  vpr_printf(TIO_MESSAGE_INFO, "Successfully routed after %d routing iterations.\n", itry);
184  free_timing_driven_route_structs(pin_criticality, sink_order, rt_node_of_sink);
185 #ifdef DEBUG
187 #endif
188  free(net_index);
189  free(sinks);
190  return (TRUE);
191  }
192 
193  if (itry == 1) {
194  pres_fac = router_opts.initial_pres_fac;
195  pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */
196  } else {
197  pres_fac *= router_opts.pres_fac_mult;
198 
199  /* Avoid overflow for high iteration counts, even if acc_cost is big */
200  pres_fac = std::min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5));
201 
202  pathfinder_update_cost(pres_fac, router_opts.acc_fac);
203  }
204 
205  if (timing_analysis_enabled) {
206  /* Update slack values by doing another timing analysis. *
207  * Timing_driven_route_net updated the net delay values. */
208 
209  load_timing_graph_net_delays(net_delay);
210 
211  #ifdef HACK_LUT_PIN_SWAPPING
212  do_timing_analysis(slacks, FALSE, TRUE, FALSE);
213  #else
214  do_timing_analysis(slacks, FALSE, FALSE, FALSE);
215  #endif
216 
217  /* Print critical path delay - convert to nanoseconds. */
218  critical_path_delay = get_critical_path_delay();
219  vpr_printf(TIO_MESSAGE_INFO, "Critical path: %g ns\n", critical_path_delay);
220  } else {
221  /* If timing analysis is not enabled, make sure that the criticalities and the *
222  * net_delays stay as 0 so that wirelength can be optimized. */
223 
224  for (inet = 0; inet < num_nets; inet++) {
225  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
226  slacks->timing_criticality[inet][ipin] = 0.;
227 #ifdef PATH_COUNTING
228  slacks->path_criticality[inet][ipin] = 0.;
229 #endif
230  net_delay[inet][ipin] = 0.;
231  }
232  }
233  }
234 
235  end = clock();
236  #ifdef CLOCKS_PER_SEC
237  vpr_printf(TIO_MESSAGE_INFO, "Routing iteration took %g seconds.\n", (float)(end - begin) / CLOCKS_PER_SEC);
238  #else
239  vpr_printf(TIO_MESSAGE_INFO, "Routing iteration took %g seconds.\n", (float)(end - begin) / CLK_PER_SEC);
240  #endif
241 
242  fflush(stdout);
243  }
244 
245  vpr_printf(TIO_MESSAGE_INFO, "Routing failed.\n");
246  free_timing_driven_route_structs(pin_criticality, sink_order,
247  rt_node_of_sink);
248  free(net_index);
249  free(sinks);
250  return (FALSE);
251 }
252 
253 void alloc_timing_driven_route_structs(float **pin_criticality_ptr,
254  int **sink_order_ptr, t_rt_node *** rt_node_of_sink_ptr) {
255 
256  /* Allocates all the structures needed only by the timing-driven router. */
257 
258  int max_pins_per_net;
259  float *pin_criticality;
260  int *sink_order;
262 
263  max_pins_per_net = get_max_pins_per_net();
264 
265  pin_criticality = (float *) my_malloc(
266  (max_pins_per_net - 1) * sizeof(float));
267  *pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */
268 
269  sink_order = (int *) my_malloc((max_pins_per_net - 1) * sizeof(int));
270  *sink_order_ptr = sink_order - 1;
271 
272  rt_node_of_sink = (t_rt_node **) my_malloc(
273  (max_pins_per_net - 1) * sizeof(t_rt_node *));
274  *rt_node_of_sink_ptr = rt_node_of_sink - 1;
275 
277 }
278 
281 
282  /* Frees all the stuctures needed only by the timing-driven router. */
283 
284  free(pin_criticality + 1); /* Starts at index 1. */
285  free(sink_order + 1);
286  free(rt_node_of_sink + 1);
288 }
289 
290 static int get_max_pins_per_net(void) {
291 
292  /* Returns the largest number of pins on any non-global net. */
293 
294  int inet, max_pins_per_net;
295 
296  max_pins_per_net = 0;
297  for (inet = 0; inet < num_nets; inet++) {
298  if (clb_net[inet].is_global == FALSE) {
299  max_pins_per_net = std::max(max_pins_per_net,
300  (clb_net[inet].num_sinks + 1));
301  }
302  }
303 
304  return (max_pins_per_net);
305 }
306 
307 boolean timing_driven_route_net(int inet, float pres_fac, float max_criticality,
308  float criticality_exp, float astar_fac, float bend_cost,
309  float *pin_criticality, int *sink_order,
310  t_rt_node ** rt_node_of_sink, float *net_delay, t_slack * slacks) {
311 
312  /* Returns TRUE as long is found some way to hook up this net, even if that *
313  * way resulted in overuse of resources (congestion). If there is no way *
314  * to route this net, even ignoring congestion, it returns FALSE. In this *
315  * case the rr_graph is disconnected and you can give up. If slacks = NULL, *
316  * give each net a dummy criticality of 0. */
317 
318  int ipin, num_sinks, itarget, target_pin, target_node, inode;
319  float target_criticality, old_tcost, new_tcost, largest_criticality,
320  old_back_cost, new_back_cost;
321  t_rt_node *rt_root;
322  struct s_heap *current;
323  struct s_trace *new_route_start_tptr;
324  int highfanout_rlim;
325 
326  /* Rip-up any old routing. */
327 
328  pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);
329  free_traceback(inet);
330 
331  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
332  if (!slacks) {
333  /* Use criticality of 1. This makes all nets critical. Note: There is a big difference between setting pin criticality to 0
334  compared to 1. If pin criticality is set to 0, then the current path delay is completely ignored during routing. By setting
335  pin criticality to 1, the current path delay to the pin will always be considered and optimized for */
336  pin_criticality[ipin] = 1.0;
337  } else {
338 #ifdef PATH_COUNTING
339  /* Pin criticality is based on a weighted sum of timing and path criticalities. */
340  pin_criticality[ipin] = ROUTE_PATH_WEIGHT * slacks->path_criticality[inet][ipin]
341  + (1 - ROUTE_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin];
342 #else
343  /* Pin criticality is based on only timing criticality. */
344  pin_criticality[ipin] = slacks->timing_criticality[inet][ipin];
345 #endif
346  /* Currently, pin criticality is between 0 and 1. Now shift it downwards
347  by 1 - max_criticality (max_criticality is 0.99 by default, so shift down
348  by 0.01) and cut off at 0. This means that all pins with small criticalities
349  (<0.01) get criticality 0 and are ignored entirely, and everything
350  else becomes a bit less critical. This effect becomes more pronounced if
351  max_criticality is set lower. */
352  assert(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01);
353  pin_criticality[ipin] = std::max(pin_criticality[ipin] - (1.0 - max_criticality), 0.0);
354 
355  /* Take pin criticality to some power (1 by default). */
356  pin_criticality[ipin] = pow(pin_criticality[ipin], criticality_exp);
357 
358  /* Cut off pin criticality at max_criticality. */
359  pin_criticality[ipin] = std::min(pin_criticality[ipin], max_criticality);
360  }
361  }
362 
363  num_sinks = clb_net[inet].num_sinks;
364  heapsort(sink_order, pin_criticality, num_sinks, 0);
365 
366  /* Update base costs according to fanout and criticality rules */
367 
368  largest_criticality = pin_criticality[sink_order[1]];
369  update_rr_base_costs(inet, largest_criticality);
370 
371  mark_ends(inet); /* Only needed to check for multiply-connected SINKs */
372 
373  rt_root = init_route_tree_to_source(inet);
374 
375  for (itarget = 1; itarget <= num_sinks; itarget++) {
376  target_pin = sink_order[itarget];
377  target_node = net_rr_terminals[inet][target_pin];
378 
379  target_criticality = pin_criticality[target_pin];
380 
381  highfanout_rlim = mark_node_expansion_by_bin(inet, target_node,
382  rt_root);
383 
384  add_route_tree_to_heap(rt_root, target_node, target_criticality,
385  astar_fac);
386 
387  current = get_heap_head();
388 
389  if (current == NULL) { /* Infeasible routing. No possible path for net. */
390  vpr_printf(TIO_MESSAGE_INFO, "Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
391  inet, clb_net[inet].name, itarget);
393  free_route_tree(rt_root);
394  return (FALSE);
395  }
396 
397  inode = current->index;
398 
399  while (inode != target_node) {
400  old_tcost = rr_node_route_inf[inode].path_cost;
401  new_tcost = current->cost;
402 
403  if (old_tcost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
404  old_back_cost = HUGE_POSITIVE_FLOAT;
405  else
406  old_back_cost = rr_node_route_inf[inode].backward_path_cost;
407 
408  new_back_cost = current->backward_path_cost;
409 
410  /* I only re-expand a node if both the "known" backward cost is lower *
411  * in the new expansion (this is necessary to prevent loops from *
412  * forming in the routing and causing havoc) *and* the expected total *
413  * cost to the sink is lower than the old value. Different R_upstream *
414  * values could make a path with lower back_path_cost less desirable *
415  * than one with higher cost. Test whether or not I should disallow *
416  * re-expansion based on a higher total cost. */
417 
418  if (old_tcost > new_tcost && old_back_cost > new_back_cost) {
419  rr_node_route_inf[inode].prev_node = current->u.prev_node;
420  rr_node_route_inf[inode].prev_edge = current->prev_edge;
421  rr_node_route_inf[inode].path_cost = new_tcost;
422  rr_node_route_inf[inode].backward_path_cost = new_back_cost;
423 
424  if (old_tcost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
425  add_to_mod_list(&rr_node_route_inf[inode].path_cost);
426 
427  timing_driven_expand_neighbours(current, inet, bend_cost,
428  target_criticality, target_node, astar_fac,
429  highfanout_rlim);
430  }
431 
432  free_heap_data(current);
433  current = get_heap_head();
434 
435  if (current == NULL) { /* Impossible routing. No path for net. */
436  vpr_printf(TIO_MESSAGE_INFO, "Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
437  inet, clb_net[inet].name, itarget);
439  free_route_tree(rt_root);
440  return (FALSE);
441  }
442 
443  inode = current->index;
444  }
445 
446  /* NB: In the code below I keep two records of the partial routing: the *
447  * traceback and the route_tree. The route_tree enables fast recomputation *
448  * of the Elmore delay to each node in the partial routing. The traceback *
449  * lets me reuse all the routines written for breadth-first routing, which *
450  * all take a traceback structure as input. Before this routine exits the *
451  * route_tree structure is destroyed; only the traceback is needed at that *
452  * point. */
453 
454  rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */
455  new_route_start_tptr = update_traceback(current, inet);
456  rt_node_of_sink[target_pin] = update_route_tree(current);
457  free_heap_data(current);
458  pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac);
459 
460  empty_heap();
462  }
463 
464  /* For later timing analysis. */
465 
466  update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet);
467  free_route_tree(rt_root);
468  return (TRUE);
469 }
470 
471 static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
472  float target_criticality, float astar_fac) {
473 
474  /* Puts the entire partial routing below and including rt_node onto the heap *
475  * (except for those parts marked as not to be expanded) by calling itself *
476  * recursively. */
477 
478  int inode;
479  t_rt_node *child_node;
480  t_linked_rt_edge *linked_rt_edge;
481  float tot_cost, backward_path_cost, R_upstream;
482 
483  /* Pre-order depth-first traversal */
484 
485  if (rt_node->re_expand) {
486  inode = rt_node->inode;
487  backward_path_cost = target_criticality * rt_node->Tdel;
488  R_upstream = rt_node->R_upstream;
489  tot_cost = backward_path_cost
490  + astar_fac
491  * get_timing_driven_expected_cost(inode, target_node,
492  target_criticality, R_upstream);
493  node_to_heap(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,
494  backward_path_cost, R_upstream);
495  }
496 
497  linked_rt_edge = rt_node->u.child_list;
498 
499  while (linked_rt_edge != NULL) {
500  child_node = linked_rt_edge->child;
501  add_route_tree_to_heap(child_node, target_node, target_criticality,
502  astar_fac);
503  linked_rt_edge = linked_rt_edge->next;
504  }
505 }
506 
507 static void timing_driven_expand_neighbours(struct s_heap *current, int inet,
508  float bend_cost, float criticality_fac, int target_node,
509  float astar_fac, int highfanout_rlim) {
510 
511  /* Puts all the rr_nodes adjacent to current on the heap. rr_nodes outside *
512  * the expanded bounding box specified in route_bb are not added to the *
513  * heap. */
514 
515  int iconn, to_node, num_edges, inode, iswitch, target_x, target_y;
516  t_rr_type from_type, to_type;
517  float new_tot_cost, old_back_pcost, new_back_pcost, R_upstream;
518  float new_R_upstream, Tdel;
519 
520  inode = current->index;
521  old_back_pcost = current->backward_path_cost;
522  R_upstream = current->R_upstream;
523  num_edges = rr_node[inode].num_edges;
524 
525  target_x = rr_node[target_node].xhigh;
526  target_y = rr_node[target_node].yhigh;
527 
528  for (iconn = 0; iconn < num_edges; iconn++) {
529  to_node = rr_node[inode].edges[iconn];
530 
531  if (rr_node[to_node].xhigh < route_bb[inet].xmin
532  || rr_node[to_node].xlow > route_bb[inet].xmax
533  || rr_node[to_node].yhigh < route_bb[inet].ymin
534  || rr_node[to_node].ylow > route_bb[inet].ymax)
535  continue; /* Node is outside (expanded) bounding box. */
536 
537  if (clb_net[inet].num_sinks >= HIGH_FANOUT_NET_LIM) {
538  if (rr_node[to_node].xhigh < target_x - highfanout_rlim
539  || rr_node[to_node].xlow > target_x + highfanout_rlim
540  || rr_node[to_node].yhigh < target_y - highfanout_rlim
541  || rr_node[to_node].ylow > target_y + highfanout_rlim)
542  continue; /* Node is outside high fanout bin. */
543  }
544 
545  /* Prune away IPINs that lead to blocks other than the target one. Avoids *
546  * the issue of how to cost them properly so they don't get expanded before *
547  * more promising routes, but makes route-throughs (via CLBs) impossible. *
548  * Change this if you want to investigate route-throughs. */
549 
550  to_type = rr_node[to_node].type;
551  if (to_type == IPIN
552  && (rr_node[to_node].xhigh != target_x
553  || rr_node[to_node].yhigh != target_y))
554  continue;
555 
556  /* new_back_pcost stores the "known" part of the cost to this node -- the *
557  * congestion cost of all the routing resources back to the existing route *
558  * plus the known delay of the total path back to the source. new_tot_cost *
559  * is this "known" backward cost + an expected cost to get to the target. */
560 
561  new_back_pcost = old_back_pcost
562  + (1. - criticality_fac) * get_rr_cong_cost(to_node);
563 
564  iswitch = rr_node[inode].switches[iconn];
565  if (switch_inf[iswitch].buffered) {
566  new_R_upstream = switch_inf[iswitch].R;
567  } else {
568  new_R_upstream = R_upstream + switch_inf[iswitch].R;
569  }
570 
571  Tdel = rr_node[to_node].C * (new_R_upstream + 0.5 * rr_node[to_node].R);
572  Tdel += switch_inf[iswitch].Tdel;
573  new_R_upstream += rr_node[to_node].R;
574  new_back_pcost += criticality_fac * Tdel;
575 
576  if (bend_cost != 0.) {
577  from_type = rr_node[inode].type;
578  to_type = rr_node[to_node].type;
579  if ((from_type == CHANX && to_type == CHANY)
580  || (from_type == CHANY && to_type == CHANX))
581  new_back_pcost += bend_cost;
582  }
583 
584  new_tot_cost = new_back_pcost
585  + astar_fac
586  * get_timing_driven_expected_cost(to_node, target_node,
587  criticality_fac, new_R_upstream);
588 
589  node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,
590  new_R_upstream);
591 
592  } /* End for all neighbours */
593 }
594 
595 static float get_timing_driven_expected_cost(int inode, int target_node,
596  float criticality_fac, float R_upstream) {
597 
598  /* Determines the expected cost (due to both delay and resouce cost) to reach *
599  * the target node from inode. It doesn't include the cost of inode -- *
600  * that's already in the "known" path_cost. */
601 
602  t_rr_type rr_type;
603  int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;
604  float expected_cost, cong_cost, Tdel;
605 
606  rr_type = rr_node[inode].type;
607 
608  if (rr_type == CHANX || rr_type == CHANY) {
609  num_segs_same_dir = get_expected_segs_to_target(inode, target_node,
610  &num_segs_ortho_dir);
611  cost_index = rr_node[inode].cost_index;
612  ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
613 
614  cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost
615  + num_segs_ortho_dir
616  * rr_indexed_data[ortho_cost_index].base_cost;
619 
620  Tdel =
621  num_segs_same_dir * rr_indexed_data[cost_index].T_linear
622  + num_segs_ortho_dir
623  * rr_indexed_data[ortho_cost_index].T_linear
624  + num_segs_same_dir * num_segs_same_dir
625  * rr_indexed_data[cost_index].T_quadratic
626  + num_segs_ortho_dir * num_segs_ortho_dir
627  * rr_indexed_data[ortho_cost_index].T_quadratic
628  + R_upstream
629  * (num_segs_same_dir
630  * rr_indexed_data[cost_index].C_load
631  + num_segs_ortho_dir
632  * rr_indexed_data[ortho_cost_index].C_load);
633 
635 
636  expected_cost = criticality_fac * Tdel
637  + (1. - criticality_fac) * cong_cost;
638  return (expected_cost);
639  }
640 
641  else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */
642  return (rr_indexed_data[SINK_COST_INDEX].base_cost);
643  }
644 
645  else { /* Change this if you want to investigate route-throughs */
646  return (0.);
647  }
648 }
649 
650 /* Macro used below to ensure that fractions are rounded up, but floating *
651  * point values very close to an integer are rounded to that integer. */
652 
653 #define ROUND_UP(x) (ceil (x - 0.001))
654 
655 static int get_expected_segs_to_target(int inode, int target_node,
656  int *num_segs_ortho_dir_ptr) {
657 
658  /* Returns the number of segments the same type as inode that will be needed *
659  * to reach target_node (not including inode) in each direction (the same *
660  * direction (horizontal or vertical) as inode and the orthogonal direction).*/
661 
662  t_rr_type rr_type;
663  int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index;
664  int no_need_to_pass_by_clb;
665  float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;
666 
667  target_x = rr_node[target_node].xlow;
668  target_y = rr_node[target_node].ylow;
669  cost_index = rr_node[inode].cost_index;
670  inv_length = rr_indexed_data[cost_index].inv_length;
671  ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
672  ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length;
673  rr_type = rr_node[inode].type;
674 
675  if (rr_type == CHANX) {
676  ylow = rr_node[inode].ylow;
677  xhigh = rr_node[inode].xhigh;
678  xlow = rr_node[inode].xlow;
679 
680  /* Count vertical (orthogonal to inode) segs first. */
681 
682  if (ylow > target_y) { /* Coming from a row above target? */
683  *num_segs_ortho_dir_ptr =
684  (int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));
685  no_need_to_pass_by_clb = 1;
686  } else if (ylow < target_y - 1) { /* Below the CLB bottom? */
687  *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *
688  ortho_inv_length));
689  no_need_to_pass_by_clb = 1;
690  } else { /* In a row that passes by target CLB */
691  *num_segs_ortho_dir_ptr = 0;
692  no_need_to_pass_by_clb = 0;
693  }
694 
695  /* Now count horizontal (same dir. as inode) segs. */
696 
697  if (xlow > target_x + no_need_to_pass_by_clb) {
698  num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -
699  target_x) * inv_length));
700  } else if (xhigh < target_x - no_need_to_pass_by_clb) {
701  num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -
702  xhigh) * inv_length));
703  } else {
704  num_segs_same_dir = 0;
705  }
706  }
707 
708  else { /* inode is a CHANY */
709  ylow = rr_node[inode].ylow;
710  yhigh = rr_node[inode].yhigh;
711  xlow = rr_node[inode].xlow;
712 
713  /* Count horizontal (orthogonal to inode) segs first. */
714 
715  if (xlow > target_x) { /* Coming from a column right of target? */
716  *num_segs_ortho_dir_ptr = (int)(
717  ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));
718  no_need_to_pass_by_clb = 1;
719  } else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */
720  *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *
721  ortho_inv_length));
722  no_need_to_pass_by_clb = 1;
723  } else { /* In a column that passes by target CLB */
724  *num_segs_ortho_dir_ptr = 0;
725  no_need_to_pass_by_clb = 0;
726  }
727 
728  /* Now count vertical (same dir. as inode) segs. */
729 
730  if (ylow > target_y + no_need_to_pass_by_clb) {
731  num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -
732  target_y) * inv_length));
733  } else if (yhigh < target_y - no_need_to_pass_by_clb) {
734  num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -
735  yhigh) * inv_length));
736  } else {
737  num_segs_same_dir = 0;
738  }
739  }
740 
741  return (num_segs_same_dir);
742 }
743 
744 static void update_rr_base_costs(int inet, float largest_criticality) {
745 
746  /* Changes the base costs of different types of rr_nodes according to the *
747  * criticality, fanout, etc. of the current net being routed (inet). */
748 
749  float fanout, factor;
750  int index;
751 
752  fanout = clb_net[inet].num_sinks;
753 
754  /* Other reasonable values for factor include fanout and 1 */
755  factor = sqrt(fanout);
756 
757  for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) {
758  if (rr_indexed_data[index].T_quadratic > 0.) { /* pass transistor */
761  } else {
764  }
765  }
766 }
767 
768 /* Nets that have high fanout can take a very long time to route. Each sink should be routed contained within a bin instead of the entire bounding box to speed things up */
769 static int mark_node_expansion_by_bin(int inet, int target_node,
770  t_rt_node * rt_node) {
771  int target_x, target_y;
772  int rlim = 1;
773  int inode;
774  float area;
775  boolean success;
776  t_linked_rt_edge *linked_rt_edge;
777  t_rt_node * child_node;
778 
779  target_x = rr_node[target_node].xlow;
780  target_y = rr_node[target_node].ylow;
781 
782  if (clb_net[inet].num_sinks < HIGH_FANOUT_NET_LIM) {
783  /* This algorithm only applies to high fanout nets */
784  return 1;
785  }
786 
787  area = (route_bb[inet].xmax - route_bb[inet].xmin)
788  * (route_bb[inet].ymax - route_bb[inet].ymin);
789  if (area <= 0) {
790  area = 1;
791  }
792 
793  rlim = (int)(ceil(sqrt((float) area / (float) clb_net[inet].num_sinks)));
794  if (rt_node == NULL || rt_node->u.child_list == NULL) {
795  /* If unknown traceback, set radius of bin to be size of chip */
796  rlim = std::max(nx + 2, ny + 2);
797  return rlim;
798  }
799 
800  success = FALSE;
801  /* determine quickly a feasible bin radius to route sink for high fanout nets
802  this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router)
803  */
804  linked_rt_edge = rt_node->u.child_list;
805  while (success == FALSE && linked_rt_edge != NULL) {
806  while (linked_rt_edge != NULL && success == FALSE) {
807  child_node = linked_rt_edge->child;
808  inode = child_node->inode;
809  if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
810  if (rr_node[inode].xlow <= target_x + rlim
811  && rr_node[inode].xhigh >= target_x - rlim
812  && rr_node[inode].ylow <= target_y + rlim
813  && rr_node[inode].yhigh >= target_y - rlim) {
814  success = TRUE;
815  }
816  }
817  linked_rt_edge = linked_rt_edge->next;
818  }
819 
820  if (success == FALSE) {
821  if (rlim > std::max(nx + 2, ny + 2)) {
822  vpr_printf(TIO_MESSAGE_ERROR, "VPR internal error, net %s has paths that are not found in traceback.\n",
823  clb_net[inet].name);
824  exit(1);
825  }
826  /* if sink not in bin, increase bin size until fit */
827  rlim *= 2;
828  } else {
829  /* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */
830  rlim += 4;
831  }
832  linked_rt_edge = rt_node->u.child_list;
833  }
834 
835  /* redetermine expansion based on rlim */
836  linked_rt_edge = rt_node->u.child_list;
837  while (linked_rt_edge != NULL) {
838  child_node = linked_rt_edge->child;
839  inode = child_node->inode;
840  if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
841  if (rr_node[inode].xlow <= target_x + rlim
842  && rr_node[inode].xhigh >= target_x - rlim
843  && rr_node[inode].ylow <= target_y + rlim
844  && rr_node[inode].yhigh >= target_y - rlim) {
845  child_node->re_expand = TRUE;
846  } else {
847  child_node->re_expand = FALSE;
848  }
849  }
850  linked_rt_edge = linked_rt_edge->next;
851  }
852  return rlim;
853 }
854 
855 #define ERROR_TOL 0.0001
856 
858 
859  /* Checks that the net delays computed incrementally during timing driven *
860  * routing match those computed from scratch by the net_delay.c module. */
861 
862  int inet, ipin;
863  float **net_delay_check;
864 
865  t_chunk list_head_net_delay_check_ch = {NULL, 0, NULL};
866 
867  /*struct s_linked_vptr *ch_list_head_net_delay_check;*/
868 
869  net_delay_check = alloc_net_delay(&list_head_net_delay_check_ch, clb_net,
870  num_nets);
871  load_net_delay_from_routing(net_delay_check, clb_net, num_nets);
872 
873  for (inet = 0; inet < num_nets; inet++) {
874  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
875  if (net_delay_check[inet][ipin] == 0.) { /* Should be only GLOBAL nets */
876  if (fabs(net_delay[inet][ipin]) > ERROR_TOL) {
877  vpr_printf(TIO_MESSAGE_ERROR, "in timing_driven_check_net_delays: net %d pin %d.\n",
878  inet, ipin);
879  vpr_printf(TIO_MESSAGE_ERROR, "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
880  net_delay[inet][ipin], net_delay_check[inet][ipin]);
881  exit(1);
882  }
883  } else {
884  if (fabs(1.0 - net_delay[inet][ipin] / net_delay_check[inet][ipin]) > ERROR_TOL) {
885  vpr_printf(TIO_MESSAGE_ERROR, "in timing_driven_check_net_delays: net %d pin %d.\n",
886  inet, ipin);
887  vpr_printf(TIO_MESSAGE_ERROR, "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
888  net_delay[inet][ipin], net_delay_check[inet][ipin]);
889  exit(1);
890  }
891  }
892  }
893  }
894 
895  free_net_delay(net_delay_check, &list_head_net_delay_check_ch);
896  vpr_printf(TIO_MESSAGE_INFO, "Completed net delay value cross check successfully.\n");
897 }
boolean feasible_routing(void)
Definition: route_common.c:298
short xhigh
Definition: vpr_types.h:891
Definition: util.h:57
short num_edges
Definition: vpr_types.h:901
void load_net_delay_from_routing(float **net_delay, struct s_net *nets, int n_nets)
Definition: net_delay.c:136
float R
Definition: vpr_types.h:906
int index
Definition: route_common.h:4
int index
Definition: vpr_types.h:866
float get_rr_cong_cost(int inode)
Definition: route_common.c:532
short cost_index
Definition: vpr_types.h:897
int * edges
Definition: vpr_types.h:903
float backward_path_cost
Definition: route_common.h:11
void free_timing_driven_route_structs(float *pin_criticality, int *sink_order, t_rt_node **rt_node_of_sink)
Definition: route_timing.c:279
t_rt_node * init_route_tree_to_source(int inet)
struct s_heap * get_heap_head(void)
Definition: route_common.c:949
t_rr_node * rr_node
Definition: globals.c:70
void heapsort(int *sort_index, float *sort_values, int nelem, int start_index)
Definition: heapsort.c:13
void free_traceback(int inet)
Definition: route_common.c:587
int prev_node
Definition: route_common.h:7
void mark_ends(int inet)
Definition: route_common.c:546
t_rr_node_route_inf * rr_node_route_inf
Definition: route_common.c:21
short iswitch
Definition: vpr_types.h:867
int xmax
Definition: vpr_types.h:533
t_rr_indexed_data * rr_indexed_data
Definition: globals.c:74
#define NO_PREVIOUS
Definition: vpr_types.h:887
float get_critical_path_delay(void)
Definition: path_delay.c:3060
short ylow
Definition: vpr_types.h:892
float pres_fac_mult
Definition: vpr_types.h:698
void free_heap_data(struct s_heap *hptr)
static float ** net_delay
float C
Definition: vpr_types.h:907
int max_router_iterations
Definition: vpr_types.h:701
float first_iter_pres_fac
Definition: vpr_types.h:696
int num_rr_indexed_data
Definition: globals.c:73
void alloc_route_tree_timing_structs(void)
#define ROUND_UP(x)
Definition: route_timing.c:653
#define ERROR_TOL
Definition: route_timing.c:855
struct s_linked_rt_edge * next
void pathfinder_update_cost(float pres_fac, float acc_fac)
Definition: route_common.c:363
int num_nets
Definition: globals.c:27
#define FIRST_ITER_WIRELENTH_LIMIT
Definition: vpr_types.h:88
static int get_max_pins_per_net(void)
Definition: route_timing.c:290
float R_upstream
Definition: route_common.h:12
void do_timing_analysis(t_slack *slacks, boolean is_prepacked, boolean do_lut_input_balancing, boolean is_final_analysis)
Definition: path_delay.c:1613
void update_net_delays_from_route_tree(float *net_delay, t_rt_node **rt_node_of_sink, int inet)
Definition: util.h:12
void reset_path_costs(void)
Definition: route_common.c:490
static float pres_fac
void get_num_bends_and_length(int inet, int *bends_ptr, int *len_ptr, int *segments_ptr)
Definition: stats.c:276
#define min(a, b)
Definition: graphics.c:174
static t_ivec ** clb_opins_used_locally
static float get_timing_driven_expected_cost(int inode, int target_node, float criticality_fac, float R_upstream)
Definition: route_timing.c:595
float bend_cost
Definition: vpr_types.h:700
union s_heap::@0 u
#define HUGE_POSITIVE_FLOAT
Definition: vpr_types.h:79
static void * my_malloc(int ibytes)
Definition: graphics.c:499
boolean * is_global
int prev_edge
Definition: route_common.h:10
#define max(a, b)
Definition: graphics.c:171
static float * pin_criticality
static void add_route_tree_to_heap(t_rt_node *rt_node, int target_node, float target_criticality, float astar_fac)
Definition: route_timing.c:471
struct s_net * clb_net
Definition: globals.c:28
void free_route_tree(t_rt_node *rt_node)
int num_rr_nodes
Definition: globals.c:69
float initial_pres_fac
Definition: vpr_types.h:697
int nx
Definition: globals.c:46
union s_rt_node::@1 u
void free_route_tree_timing_structs(void)
struct s_trace ** trace_head
Definition: globals.c:64
struct s_switch_inf * switch_inf
Definition: globals.c:83
void alloc_timing_driven_route_structs(float **pin_criticality_ptr, int **sink_order_ptr, t_rt_node ***rt_node_of_sink_ptr)
Definition: route_timing.c:253
static void timing_driven_expand_neighbours(struct s_heap *current, int inet, float bend_cost, float criticality_fac, int target_node, float astar_fac, int highfanout_rlim)
Definition: route_timing.c:507
float cost
Definition: route_common.h:5
void node_to_heap(int inode, float cost, int prev_node, int prev_edge, float backward_path_cost, float R_upstream)
Definition: route_common.c:562
void free_net_delay(float **net_delay, t_chunk *chunk_list_ptr)
Definition: net_delay.c:127
float astar_fac
Definition: vpr_types.h:707
Definition: util.h:47
static void timing_driven_check_net_delays(float **net_delay)
Definition: route_timing.c:857
float ** alloc_net_delay(t_chunk *chunk_list_ptr, struct s_net *nets, int n_nets)
Definition: net_delay.c:103
t_linked_rt_edge * child_list
void pathfinder_update_one_cost(struct s_trace *route_segment_start, int add_or_sub, float pres_fac)
Definition: route_common.c:315
float saved_base_cost
Definition: vpr_types.h:971
short yhigh
Definition: vpr_types.h:893
boolean timing_driven_route_net(int inet, float pres_fac, float max_criticality, float criticality_exp, float astar_fac, float bend_cost, float *pin_criticality, int *sink_order, t_rt_node **rt_node_of_sink, float *net_delay, t_slack *slacks)
Definition: route_timing.c:307
struct s_trace * update_traceback(struct s_heap *hptr, int inet)
Definition: route_common.c:421
enum e_rr_type t_rr_type
static void update_rr_base_costs(int inet, float largest_criticality)
Definition: route_timing.c:744
short * switches
Definition: vpr_types.h:904
float ** timing_criticality
Definition: vpr_types.h:406
float max_criticality
Definition: vpr_types.h:708
static int get_expected_segs_to_target(int inode, int target_node, int *num_segs_ortho_dir_ptr)
Definition: route_timing.c:655
t_rt_node * update_route_tree(struct s_heap *hptr)
#define HIGH_FANOUT_NET_LIM
Definition: vpr_types.h:86
static int mark_node_expansion_by_bin(int inet, int target_node, t_rt_node *rt_node)
Definition: route_timing.c:769
struct s_rt_node * child
void add_to_mod_list(float *fptr)
Definition: route_common.c:898
short xlow
Definition: vpr_types.h:890
void reserve_locally_used_opins(float pres_fac, boolean rip_up_local_opins, t_ivec **clb_opins_used_locally)
float acc_fac
Definition: vpr_types.h:699
int ** net_rr_terminals
Definition: globals.c:78
float criticality_exp
Definition: vpr_types.h:709
static t_rt_node ** rt_node_of_sink
struct s_bb * route_bb
Definition: route_common.c:23
void empty_heap(void)
Definition: route_common.c:991
int xmin
Definition: vpr_types.h:532
int ny
Definition: globals.c:47
messagelogger vpr_printf
Definition: util.c:17
int num_sinks
Definition: vpr_types.h:506
boolean try_timing_driven_route(struct s_router_opts router_opts, float **net_delay, t_slack *slacks, t_ivec **clb_opins_used_locally, boolean timing_analysis_enabled)
Definition: route_timing.c:44
void load_timing_graph_net_delays(float **net_delay)
Definition: path_delay.c:368
t_rr_type type
Definition: vpr_types.h:902
Definition: util.h:12
static int * sink_order