d0/d67/route__timing_8c_source.html

 #include <stdio.h>

 #include <math.h>

 #include <time.h>

 #include <assert.h>

 #include "util.h"

 #include "vpr_types.h"

 #include "globals.h"

 #include "route_export.h"

 #include "route_common.h"

 #include "route_tree_timing.h"

 #include "route_timing.h"

 #include "heapsort.h"

 #include "path_delay.h"

 #include "net_delay.h"

 #include "stats.h"

 #include "ReadOptions.h"


 /******************** Subroutines local to route_timing.c ********************/


 static int get_max_pins_per_net(void);


 static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,

         float target_criticality, float astar_fac);


 static void timing_driven_expand_neighbours(struct s_heap *current, int inet,

         float bend_cost, float criticality_fac, int target_node,

         float astar_fac, int highfanout_rlim);


 static float get_timing_driven_expected_cost(int inode, int target_node,

         float criticality_fac, float R_upstream);


 static int get_expected_segs_to_target(int inode, int target_node,

         int *num_segs_ortho_dir_ptr);


 static void update_rr_base_costs(int inet, float largest_criticality);


 static void timing_driven_check_net_delays(float **net_delay);


 static int mark_node_expansion_by_bin(int inet, int target_node,

         t_rt_node * rt_node);


 /************************ Subroutine definitions *****************************/


 boolean try_timing_driven_route(struct s_router_opts router_opts,

         float **net_delay, t_slack * slacks, t_ivec ** clb_opins_used_locally, boolean timing_analysis_enabled) {


     /* Timing-driven routing algorithm.  The timing graph (includes slack)   *

      * must have already been allocated, and net_delay must have been allocated. *

      * Returns TRUE if the routing succeeds, FALSE otherwise.                    */


     int itry, inet, ipin, i, bends, wirelength, total_wirelength, available_wirelength,

         segments, *net_index, *sink_order /* [1..max_pins_per_net-1] */;

     boolean success, is_routable, rip_up_local_opins;

     float *pin_criticality /* [1..max_pins_per_net-1] */, pres_fac, *sinks,

         critical_path_delay, init_timing_criticality_val;

     t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1] */

     clock_t begin,end;

     sinks = (float*)my_malloc(sizeof(float) * num_nets);

     net_index = (int*)my_malloc(sizeof(int) * num_nets);


     for (i = 0; i < num_nets; i++) {

         sinks[i] = clb_net[i].num_sinks;

         net_index[i] = i;

     }

     heapsort(net_index, sinks, num_nets, 1);


     alloc_timing_driven_route_structs(&pin_criticality, &sink_order,

             &rt_node_of_sink);


     /* First do one routing iteration ignoring congestion to

     get reasonable net delay estimates. Set criticalities to 1

     when timing analysis is on to optimize timing, and to 0

     when timing analysis is off to optimize routability. */


     if (timing_analysis_enabled) {

         init_timing_criticality_val = 1.;

     } else {

         init_timing_criticality_val = 0.;

     }


     for (inet = 0; inet < num_nets; inet++) {

         if (clb_net[inet].is_global == FALSE) {

             for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)

                 slacks->timing_criticality[inet][ipin] = init_timing_criticality_val;

 #ifdef PATH_COUNTING

                 slacks->path_criticality[inet][ipin] = init_timing_criticality_val;

 #endif

         } else {

             /* Set delay of global signals to zero. Non-global net

             delays are set by update_net_delays_from_route_tree()

             inside timing_driven_route_net(), which is only called

             for non-global nets. */

             for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {

                 net_delay[inet][ipin] = 0.;

             }

         }

     }


     pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */


     for (itry = 1; itry <= router_opts.max_router_iterations; itry++) {

         begin = clock();

         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_INFO, "Routing iteration: %d\n", itry);


         for (i = 0; i < num_nets; i++) {

             inet = net_index[i];

             if (clb_net[inet].is_global == FALSE) { /* Skip global nets. */


                 is_routable = timing_driven_route_net(inet, pres_fac,

                     router_opts.max_criticality,

                     router_opts.criticality_exp, router_opts.astar_fac,

                     router_opts.bend_cost, pin_criticality,

                     sink_order, rt_node_of_sink, net_delay[inet], slacks);


                 /* Impossible to route? (disconnected rr_graph) */


                 if (!is_routable) {

                     vpr_printf(TIO_MESSAGE_INFO, "Routing failed.\n");

                     free_timing_driven_route_structs(pin_criticality,

                             sink_order, rt_node_of_sink);

                     free(net_index);

                     free(sinks);

                     return (FALSE);

                 }

             }

         }


         if (itry == 1) {

             /* Early exit code for cases where it is obvious that a successful route will not be found

              Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit

              */

             total_wirelength = 0;

             available_wirelength = 0;


             for (i = 0; i < num_rr_nodes; i++) {

                 if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {

                     available_wirelength += 1 + rr_node[i].xhigh

                             - rr_node[i].xlow + rr_node[i].yhigh

                             - rr_node[i].ylow;

                 }

             }


             for (inet = 0; inet < num_nets; inet++) {

                 if (clb_net[inet].is_global == FALSE

                         && clb_net[inet].num_sinks != 0) { /* Globals don't count. */

                     get_num_bends_and_length(inet, &bends, &wirelength,

                             &segments);


                     total_wirelength += wirelength;

                 }

             }

             vpr_printf(TIO_MESSAGE_INFO, "Wire length after first iteration %d, total available wire length %d, ratio %g\n",

                     total_wirelength, available_wirelength,

                     (float) (total_wirelength) / (float) (available_wirelength));

             if ((float) (total_wirelength) / (float) (available_wirelength)> FIRST_ITER_WIRELENTH_LIMIT) {

                 vpr_printf(TIO_MESSAGE_INFO, "Wire length usage ratio exceeds limit of %g, fail routing.\n",

                         FIRST_ITER_WIRELENTH_LIMIT);

                 free_timing_driven_route_structs(pin_criticality, sink_order,

                         rt_node_of_sink);

                 free(net_index);

                 free(sinks);

                 return FALSE;

             }

         }


         /* Make sure any CLB OPINs used up by subblocks being hooked directly     *

          * to them are reserved for that purpose.                                 */


         if (itry == 1)

             rip_up_local_opins = FALSE;

         else

             rip_up_local_opins = TRUE;


         reserve_locally_used_opins(pres_fac, rip_up_local_opins,

                 clb_opins_used_locally);


         /* Pathfinder guys quit after finding a feasible route. I may want to keep *

          * going longer, trying to improve timing.  Think about this some.         */


         success = feasible_routing();

         if (success) {

             vpr_printf(TIO_MESSAGE_INFO, "Successfully routed after %d routing iterations.\n", itry);

             free_timing_driven_route_structs(pin_criticality, sink_order, rt_node_of_sink);

 #ifdef DEBUG

             timing_driven_check_net_delays(net_delay);

 #endif

             free(net_index);

             free(sinks);

             return (TRUE);

         }


         if (itry == 1) {

             pres_fac = router_opts.initial_pres_fac;

             pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */

         } else {

             pres_fac *= router_opts.pres_fac_mult;


             /* Avoid overflow for high iteration counts, even if acc_cost is big */

             pres_fac = std::min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5));


             pathfinder_update_cost(pres_fac, router_opts.acc_fac);

         }


         if (timing_analysis_enabled) {

             /* Update slack values by doing another timing analysis.                 *

              * Timing_driven_route_net updated the net delay values.                 */


             load_timing_graph_net_delays(net_delay);


     #ifdef HACK_LUT_PIN_SWAPPING

             do_timing_analysis(slacks, FALSE, TRUE, FALSE);

     #else

             do_timing_analysis(slacks, FALSE, FALSE, FALSE);

     #endif


             /* Print critical path delay - convert to nanoseconds. */

             critical_path_delay = get_critical_path_delay();

             vpr_printf(TIO_MESSAGE_INFO, "Critical path: %g ns\n", critical_path_delay);

         } else {

             /* If timing analysis is not enabled, make sure that the criticalities and the  *

              * net_delays stay as 0 so that wirelength can be optimized.            */


             for (inet = 0; inet < num_nets; inet++) {

                 for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {

                     slacks->timing_criticality[inet][ipin] = 0.;

 #ifdef PATH_COUNTING

                     slacks->path_criticality[inet][ipin] = 0.;

 #endif

                     net_delay[inet][ipin] = 0.;

                 }

             }

         }


         end = clock();

         #ifdef CLOCKS_PER_SEC

             vpr_printf(TIO_MESSAGE_INFO, "Routing iteration took %g seconds.\n", (float)(end - begin) / CLOCKS_PER_SEC);

         #else

             vpr_printf(TIO_MESSAGE_INFO, "Routing iteration took %g seconds.\n", (float)(end - begin) / CLK_PER_SEC);

         #endif


         fflush(stdout);

     }


     vpr_printf(TIO_MESSAGE_INFO, "Routing failed.\n");

     free_timing_driven_route_structs(pin_criticality, sink_order,

             rt_node_of_sink);

     free(net_index);

     free(sinks);

     return (FALSE);

 }


 void alloc_timing_driven_route_structs(float **pin_criticality_ptr,

         int **sink_order_ptr, t_rt_node *** rt_node_of_sink_ptr) {


     /* Allocates all the structures needed only by the timing-driven router.   */


     int max_pins_per_net;

     float *pin_criticality;

     int *sink_order;

     t_rt_node **rt_node_of_sink;


     max_pins_per_net = get_max_pins_per_net();


     pin_criticality = (float *) my_malloc(

             (max_pins_per_net - 1) * sizeof(float));

     *pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */


     sink_order = (int *) my_malloc((max_pins_per_net - 1) * sizeof(int));

     *sink_order_ptr = sink_order - 1;


     rt_node_of_sink = (t_rt_node **) my_malloc(

             (max_pins_per_net - 1) * sizeof(t_rt_node *));

     *rt_node_of_sink_ptr = rt_node_of_sink - 1;


     alloc_route_tree_timing_structs();

 }


 void free_timing_driven_route_structs(float *pin_criticality, int *sink_order,

         t_rt_node ** rt_node_of_sink) {


     /* Frees all the stuctures needed only by the timing-driven router.        */


     free(pin_criticality + 1); /* Starts at index 1. */

     free(sink_order + 1);

     free(rt_node_of_sink + 1);

     free_route_tree_timing_structs();

 }


 static int get_max_pins_per_net(void) {


     /* Returns the largest number of pins on any non-global net.    */


     int inet, max_pins_per_net;


     max_pins_per_net = 0;

     for (inet = 0; inet < num_nets; inet++) {

         if (clb_net[inet].is_global == FALSE) {

             max_pins_per_net = std::max(max_pins_per_net,

                     (clb_net[inet].num_sinks + 1));

         }

     }


     return (max_pins_per_net);

 }


 boolean timing_driven_route_net(int inet, float pres_fac, float max_criticality,

         float criticality_exp, float astar_fac, float bend_cost,

         float *pin_criticality, int *sink_order,

         t_rt_node ** rt_node_of_sink, float *net_delay, t_slack * slacks) {


     /* Returns TRUE as long is found some way to hook up this net, even if that *

      * way resulted in overuse of resources (congestion).  If there is no way   *

      * to route this net, even ignoring congestion, it returns FALSE.  In this  *

      * case the rr_graph is disconnected and you can give up. If slacks = NULL, *

      * give each net a dummy criticality of 0.                                  */


     int ipin, num_sinks, itarget, target_pin, target_node, inode;

     float target_criticality, old_tcost, new_tcost, largest_criticality,

         old_back_cost, new_back_cost;

     t_rt_node *rt_root;

     struct s_heap *current;

     struct s_trace *new_route_start_tptr;

     int highfanout_rlim;


     /* Rip-up any old routing. */


     pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);

     free_traceback(inet);


     for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {

         if (!slacks) {

             /* Use criticality of 1. This makes all nets critical.  Note: There is a big difference between setting pin criticality to 0

             compared to 1.  If pin criticality is set to 0, then the current path delay is completely ignored during routing.  By setting

             pin criticality to 1, the current path delay to the pin will always be considered and optimized for */

             pin_criticality[ipin] = 1.0;

         } else {

 #ifdef PATH_COUNTING

             /* Pin criticality is based on a weighted sum of timing and path criticalities. */

             pin_criticality[ipin] =      ROUTE_PATH_WEIGHT  * slacks->path_criticality[inet][ipin]

                                   + (1 - ROUTE_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin];

 #else

             /* Pin criticality is based on only timing criticality. */

             pin_criticality[ipin] = slacks->timing_criticality[inet][ipin];

 #endif

             /* Currently, pin criticality is between 0 and 1. Now shift it downwards

             by 1 - max_criticality (max_criticality is 0.99 by default, so shift down

             by 0.01) and cut off at 0.  This means that all pins with small criticalities

             (<0.01) get criticality 0 and are ignored entirely, and everything

             else becomes a bit less critical. This effect becomes more pronounced if

             max_criticality is set lower. */

             assert(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01);

             pin_criticality[ipin] = std::max(pin_criticality[ipin] - (1.0 - max_criticality), 0.0);


             /* Take pin criticality to some power (1 by default). */

             pin_criticality[ipin] = pow(pin_criticality[ipin], criticality_exp);


             /* Cut off pin criticality at max_criticality. */

             pin_criticality[ipin] = std::min(pin_criticality[ipin], max_criticality);

         }

     }


     num_sinks = clb_net[inet].num_sinks;

     heapsort(sink_order, pin_criticality, num_sinks, 0);


     /* Update base costs according to fanout and criticality rules */


     largest_criticality = pin_criticality[sink_order[1]];

     update_rr_base_costs(inet, largest_criticality);


     mark_ends(inet); /* Only needed to check for multiply-connected SINKs */


     rt_root = init_route_tree_to_source(inet);


     for (itarget = 1; itarget <= num_sinks; itarget++) {

         target_pin = sink_order[itarget];

         target_node = net_rr_terminals[inet][target_pin];


         target_criticality = pin_criticality[target_pin];


         highfanout_rlim = mark_node_expansion_by_bin(inet, target_node,

                 rt_root);


         add_route_tree_to_heap(rt_root, target_node, target_criticality,

                 astar_fac);


         current = get_heap_head();


         if (current == NULL) { /* Infeasible routing.  No possible path for net. */

             vpr_printf(TIO_MESSAGE_INFO, "Cannot route net #%d (%s) to sink #%d -- no possible path.\n",

                        inet, clb_net[inet].name, itarget);

             reset_path_costs();

             free_route_tree(rt_root);

             return (FALSE);

         }


         inode = current->index;


         while (inode != target_node) {

             old_tcost = rr_node_route_inf[inode].path_cost;

             new_tcost = current->cost;


             if (old_tcost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */

                 old_back_cost = HUGE_POSITIVE_FLOAT;

             else

                 old_back_cost = rr_node_route_inf[inode].backward_path_cost;


             new_back_cost = current->backward_path_cost;


             /* I only re-expand a node if both the "known" backward cost is lower  *

              * in the new expansion (this is necessary to prevent loops from       *

              * forming in the routing and causing havoc) *and* the expected total  *

              * cost to the sink is lower than the old value.  Different R_upstream *

              * values could make a path with lower back_path_cost less desirable   *

              * than one with higher cost.  Test whether or not I should disallow   *

              * re-expansion based on a higher total cost.                          */


             if (old_tcost > new_tcost && old_back_cost > new_back_cost) {

                 rr_node_route_inf[inode].prev_node = current->u.prev_node;

                 rr_node_route_inf[inode].prev_edge = current->prev_edge;

                 rr_node_route_inf[inode].path_cost = new_tcost;

                 rr_node_route_inf[inode].backward_path_cost = new_back_cost;


                 if (old_tcost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */

                     add_to_mod_list(&rr_node_route_inf[inode].path_cost);


                 timing_driven_expand_neighbours(current, inet, bend_cost,

                         target_criticality, target_node, astar_fac,

                         highfanout_rlim);

             }


             free_heap_data(current);

             current = get_heap_head();


             if (current == NULL) { /* Impossible routing.  No path for net. */

                 vpr_printf(TIO_MESSAGE_INFO, "Cannot route net #%d (%s) to sink #%d -- no possible path.\n",

                          inet, clb_net[inet].name, itarget);

                 reset_path_costs();

                 free_route_tree(rt_root);

                 return (FALSE);

             }


             inode = current->index;

         }


         /* NB:  In the code below I keep two records of the partial routing:  the   *

          * traceback and the route_tree.  The route_tree enables fast recomputation *

          * of the Elmore delay to each node in the partial routing.  The traceback  *

          * lets me reuse all the routines written for breadth-first routing, which  *

          * all take a traceback structure as input.  Before this routine exits the  *

          * route_tree structure is destroyed; only the traceback is needed at that  *

          * point.                                                                   */


         rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */

         new_route_start_tptr = update_traceback(current, inet);

         rt_node_of_sink[target_pin] = update_route_tree(current);

         free_heap_data(current);

         pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac);


         empty_heap();

         reset_path_costs();

     }


     /* For later timing analysis. */


     update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet);

     free_route_tree(rt_root);

     return (TRUE);

 }


 static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,

         float target_criticality, float astar_fac) {


     /* Puts the entire partial routing below and including rt_node onto the heap *

      * (except for those parts marked as not to be expanded) by calling itself   *

      * recursively.                                                              */


     int inode;

     t_rt_node *child_node;

     t_linked_rt_edge *linked_rt_edge;

     float tot_cost, backward_path_cost, R_upstream;


     /* Pre-order depth-first traversal */


     if (rt_node->re_expand) {

         inode = rt_node->inode;

         backward_path_cost = target_criticality * rt_node->Tdel;

         R_upstream = rt_node->R_upstream;

         tot_cost = backward_path_cost

                 + astar_fac

                         * get_timing_driven_expected_cost(inode, target_node,

                                 target_criticality, R_upstream);

         node_to_heap(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,

                 backward_path_cost, R_upstream);

     }


     linked_rt_edge = rt_node->u.child_list;


     while (linked_rt_edge != NULL) {

         child_node = linked_rt_edge->child;

         add_route_tree_to_heap(child_node, target_node, target_criticality,

                 astar_fac);

         linked_rt_edge = linked_rt_edge->next;

     }

 }


 static void timing_driven_expand_neighbours(struct s_heap *current, int inet,

         float bend_cost, float criticality_fac, int target_node,

         float astar_fac, int highfanout_rlim) {


     /* Puts all the rr_nodes adjacent to current on the heap.  rr_nodes outside *

      * the expanded bounding box specified in route_bb are not added to the     *

      * heap.                                                                    */


     int iconn, to_node, num_edges, inode, iswitch, target_x, target_y;

     t_rr_type from_type, to_type;

     float new_tot_cost, old_back_pcost, new_back_pcost, R_upstream;

     float new_R_upstream, Tdel;


     inode = current->index;

     old_back_pcost = current->backward_path_cost;

     R_upstream = current->R_upstream;

     num_edges = rr_node[inode].num_edges;


     target_x = rr_node[target_node].xhigh;

     target_y = rr_node[target_node].yhigh;


     for (iconn = 0; iconn < num_edges; iconn++) {

         to_node = rr_node[inode].edges[iconn];


         if (rr_node[to_node].xhigh < route_bb[inet].xmin

                 || rr_node[to_node].xlow > route_bb[inet].xmax

                 || rr_node[to_node].yhigh < route_bb[inet].ymin

                 || rr_node[to_node].ylow > route_bb[inet].ymax)

             continue; /* Node is outside (expanded) bounding box. */


         if (clb_net[inet].num_sinks >= HIGH_FANOUT_NET_LIM) {

             if (rr_node[to_node].xhigh < target_x - highfanout_rlim

                     || rr_node[to_node].xlow > target_x + highfanout_rlim

                     || rr_node[to_node].yhigh < target_y - highfanout_rlim

                     || rr_node[to_node].ylow > target_y + highfanout_rlim)

                 continue; /* Node is outside high fanout bin. */

         }


         /* Prune away IPINs that lead to blocks other than the target one.  Avoids  *

          * the issue of how to cost them properly so they don't get expanded before *

          * more promising routes, but makes route-throughs (via CLBs) impossible.   *

          * Change this if you want to investigate route-throughs.                   */


         to_type = rr_node[to_node].type;

         if (to_type == IPIN

                 && (rr_node[to_node].xhigh != target_x

                         || rr_node[to_node].yhigh != target_y))

             continue;


         /* new_back_pcost stores the "known" part of the cost to this node -- the   *

          * congestion cost of all the routing resources back to the existing route  *

          * plus the known delay of the total path back to the source.  new_tot_cost *

          * is this "known" backward cost + an expected cost to get to the target.   */


         new_back_pcost = old_back_pcost

                 + (1. - criticality_fac) * get_rr_cong_cost(to_node);


         iswitch = rr_node[inode].switches[iconn];

         if (switch_inf[iswitch].buffered) {

             new_R_upstream = switch_inf[iswitch].R;

         } else {

             new_R_upstream = R_upstream + switch_inf[iswitch].R;

         }


         Tdel = rr_node[to_node].C * (new_R_upstream + 0.5 * rr_node[to_node].R);

         Tdel += switch_inf[iswitch].Tdel;

         new_R_upstream += rr_node[to_node].R;

         new_back_pcost += criticality_fac * Tdel;


         if (bend_cost != 0.) {

             from_type = rr_node[inode].type;

             to_type = rr_node[to_node].type;

             if ((from_type == CHANX && to_type == CHANY)

                     || (from_type == CHANY && to_type == CHANX))

                 new_back_pcost += bend_cost;

         }


         new_tot_cost = new_back_pcost

                 + astar_fac

                         * get_timing_driven_expected_cost(to_node, target_node,

                                 criticality_fac, new_R_upstream);


         node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,

                 new_R_upstream);


     } /* End for all neighbours */

 }


 static float get_timing_driven_expected_cost(int inode, int target_node,

         float criticality_fac, float R_upstream) {


     /* Determines the expected cost (due to both delay and resouce cost) to reach *

      * the target node from inode.  It doesn't include the cost of inode --       *

      * that's already in the "known" path_cost.                                   */


     t_rr_type rr_type;

     int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;

     float expected_cost, cong_cost, Tdel;


     rr_type = rr_node[inode].type;


     if (rr_type == CHANX || rr_type == CHANY) {

         num_segs_same_dir = get_expected_segs_to_target(inode, target_node,

                 &num_segs_ortho_dir);

         cost_index = rr_node[inode].cost_index;

         ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;


         cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost

                 + num_segs_ortho_dir

                         * rr_indexed_data[ortho_cost_index].base_cost;

         cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost

                 + rr_indexed_data[SINK_COST_INDEX].base_cost;


         Tdel =

                 num_segs_same_dir * rr_indexed_data[cost_index].T_linear

                         + num_segs_ortho_dir

                                 * rr_indexed_data[ortho_cost_index].T_linear

                         + num_segs_same_dir * num_segs_same_dir

                                 * rr_indexed_data[cost_index].T_quadratic

                         + num_segs_ortho_dir * num_segs_ortho_dir

                                 * rr_indexed_data[ortho_cost_index].T_quadratic

                         + R_upstream

                                 * (num_segs_same_dir

                                         * rr_indexed_data[cost_index].C_load

                                         + num_segs_ortho_dir

                                                 * rr_indexed_data[ortho_cost_index].C_load);


         Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;


         expected_cost = criticality_fac * Tdel

                 + (1. - criticality_fac) * cong_cost;

         return (expected_cost);

     }


     else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */

         return (rr_indexed_data[SINK_COST_INDEX].base_cost);

     }


     else { /* Change this if you want to investigate route-throughs */

         return (0.);

     }

 }


 /* Macro used below to ensure that fractions are rounded up, but floating   *

  * point values very close to an integer are rounded to that integer.       */


 #define ROUND_UP(x) (ceil (x - 0.001))


 static int get_expected_segs_to_target(int inode, int target_node,

         int *num_segs_ortho_dir_ptr) {


     /* Returns the number of segments the same type as inode that will be needed *

      * to reach target_node (not including inode) in each direction (the same    *

      * direction (horizontal or vertical) as inode and the orthogonal direction).*/


     t_rr_type rr_type;

     int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index;

     int no_need_to_pass_by_clb;

     float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;


     target_x = rr_node[target_node].xlow;

     target_y = rr_node[target_node].ylow;

     cost_index = rr_node[inode].cost_index;

     inv_length = rr_indexed_data[cost_index].inv_length;

     ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;

     ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length;

     rr_type = rr_node[inode].type;


     if (rr_type == CHANX) {

         ylow = rr_node[inode].ylow;

         xhigh = rr_node[inode].xhigh;

         xlow = rr_node[inode].xlow;


         /* Count vertical (orthogonal to inode) segs first. */


         if (ylow > target_y) { /* Coming from a row above target? */

             *num_segs_ortho_dir_ptr =

                     (int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));

             no_need_to_pass_by_clb = 1;

         } else if (ylow < target_y - 1) { /* Below the CLB bottom? */

             *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *

                     ortho_inv_length));

             no_need_to_pass_by_clb = 1;

         } else { /* In a row that passes by target CLB */

             *num_segs_ortho_dir_ptr = 0;

             no_need_to_pass_by_clb = 0;

         }


         /* Now count horizontal (same dir. as inode) segs. */


         if (xlow > target_x + no_need_to_pass_by_clb) {

             num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -

                             target_x) * inv_length));

         } else if (xhigh < target_x - no_need_to_pass_by_clb) {

             num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -

                             xhigh) * inv_length));

         } else {

             num_segs_same_dir = 0;

         }

     }


     else { /* inode is a CHANY */

         ylow = rr_node[inode].ylow;

         yhigh = rr_node[inode].yhigh;

         xlow = rr_node[inode].xlow;


         /* Count horizontal (orthogonal to inode) segs first. */


         if (xlow > target_x) { /* Coming from a column right of target? */

             *num_segs_ortho_dir_ptr = (int)(

                     ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));

             no_need_to_pass_by_clb = 1;

         } else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */

             *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *

                     ortho_inv_length));

             no_need_to_pass_by_clb = 1;

         } else { /* In a column that passes by target CLB */

             *num_segs_ortho_dir_ptr = 0;

             no_need_to_pass_by_clb = 0;

         }


         /* Now count vertical (same dir. as inode) segs. */


         if (ylow > target_y + no_need_to_pass_by_clb) {

             num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -

                             target_y) * inv_length));

         } else if (yhigh < target_y - no_need_to_pass_by_clb) {

             num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -

                             yhigh) * inv_length));

         } else {

             num_segs_same_dir = 0;

         }

     }


     return (num_segs_same_dir);

 }


 static void update_rr_base_costs(int inet, float largest_criticality) {


     /* Changes the base costs of different types of rr_nodes according to the  *

      * criticality, fanout, etc. of the current net being routed (inet).       */


     float fanout, factor;

     int index;


     fanout = clb_net[inet].num_sinks;


     /* Other reasonable values for factor include fanout and 1 */

     factor = sqrt(fanout);


     for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) {

         if (rr_indexed_data[index].T_quadratic > 0.) { /* pass transistor */

             rr_indexed_data[index].base_cost =

                     rr_indexed_data[index].saved_base_cost * factor;

         } else {

             rr_indexed_data[index].base_cost =

                     rr_indexed_data[index].saved_base_cost;

         }

     }

 }


 /* Nets that have high fanout can take a very long time to route.  Each sink should be routed contained within a bin instead of the entire bounding box to speed things up */

 static int mark_node_expansion_by_bin(int inet, int target_node,

         t_rt_node * rt_node) {

     int target_x, target_y;

     int rlim = 1;

     int inode;

     float area;

     boolean success;

     t_linked_rt_edge *linked_rt_edge;

     t_rt_node * child_node;


     target_x = rr_node[target_node].xlow;

     target_y = rr_node[target_node].ylow;


     if (clb_net[inet].num_sinks < HIGH_FANOUT_NET_LIM) {

         /* This algorithm only applies to high fanout nets */

         return 1;

     }


     area = (route_bb[inet].xmax - route_bb[inet].xmin)

             * (route_bb[inet].ymax - route_bb[inet].ymin);

     if (area <= 0) {

         area = 1;

     }


     rlim = (int)(ceil(sqrt((float) area / (float) clb_net[inet].num_sinks)));

     if (rt_node == NULL || rt_node->u.child_list == NULL) {

         /* If unknown traceback, set radius of bin to be size of chip */

         rlim = std::max(nx + 2, ny + 2);

         return rlim;

     }


     success = FALSE;

     /* determine quickly a feasible bin radius to route sink for high fanout nets

      this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router)

      */

     linked_rt_edge = rt_node->u.child_list;

     while (success == FALSE && linked_rt_edge != NULL) {

         while (linked_rt_edge != NULL && success == FALSE) {

             child_node = linked_rt_edge->child;

             inode = child_node->inode;

             if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {

                 if (rr_node[inode].xlow <= target_x + rlim

                         && rr_node[inode].xhigh >= target_x - rlim

                         && rr_node[inode].ylow <= target_y + rlim

                         && rr_node[inode].yhigh >= target_y - rlim) {

                     success = TRUE;

                 }

             }

             linked_rt_edge = linked_rt_edge->next;

         }


         if (success == FALSE) {

             if (rlim > std::max(nx + 2, ny + 2)) {

                 vpr_printf(TIO_MESSAGE_ERROR, "VPR internal error, net %s has paths that are not found in traceback.\n",

                         clb_net[inet].name);

                 exit(1);

             }

             /* if sink not in bin, increase bin size until fit */

             rlim *= 2;

         } else {

             /* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */

             rlim += 4;

         }

         linked_rt_edge = rt_node->u.child_list;

     }


     /* redetermine expansion based on rlim */

     linked_rt_edge = rt_node->u.child_list;

     while (linked_rt_edge != NULL) {

         child_node = linked_rt_edge->child;

         inode = child_node->inode;

         if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {

             if (rr_node[inode].xlow <= target_x + rlim

                     && rr_node[inode].xhigh >= target_x - rlim

                     && rr_node[inode].ylow <= target_y + rlim

                     && rr_node[inode].yhigh >= target_y - rlim) {

                 child_node->re_expand = TRUE;

             } else {

                 child_node->re_expand = FALSE;

             }

         }

         linked_rt_edge = linked_rt_edge->next;

     }

     return rlim;

 }


 #define ERROR_TOL 0.0001


 static void timing_driven_check_net_delays(float **net_delay) {


     /* Checks that the net delays computed incrementally during timing driven    *

      * routing match those computed from scratch by the net_delay.c module.      */


     int inet, ipin;

     float **net_delay_check;


     t_chunk list_head_net_delay_check_ch = {NULL, 0, NULL};


     /*struct s_linked_vptr *ch_list_head_net_delay_check;*/


     net_delay_check = alloc_net_delay(&list_head_net_delay_check_ch, clb_net,

             num_nets);

     load_net_delay_from_routing(net_delay_check, clb_net, num_nets);


     for (inet = 0; inet < num_nets; inet++) {

         for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {

             if (net_delay_check[inet][ipin] == 0.) { /* Should be only GLOBAL nets */

                 if (fabs(net_delay[inet][ipin]) > ERROR_TOL) {

                     vpr_printf(TIO_MESSAGE_ERROR, "in timing_driven_check_net_delays: net %d pin %d.\n",

                             inet, ipin);

                     vpr_printf(TIO_MESSAGE_ERROR, "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",

                             net_delay[inet][ipin], net_delay_check[inet][ipin]);

                     exit(1);

                 }

             } else {

                 if (fabs(1.0 - net_delay[inet][ipin] / net_delay_check[inet][ipin]) > ERROR_TOL) {

                     vpr_printf(TIO_MESSAGE_ERROR, "in timing_driven_check_net_delays: net %d pin %d.\n",

                             inet, ipin);

                     vpr_printf(TIO_MESSAGE_ERROR, "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",

                             net_delay[inet][ipin], net_delay_check[inet][ipin]);

                     exit(1);

                 }

             }

         }

     }


     free_net_delay(net_delay_check, &list_head_net_delay_check_ch);

     vpr_printf(TIO_MESSAGE_INFO, "Completed net delay value cross check successfully.\n");

 }

feasible_routing
boolean feasible_routing(void)
Definition: route_common.c:298

s_rr_node::xhigh
short xhigh
Definition: vpr_types.h:891

CHANY
Definition: vpr_types.h:854

s_chunk
Definition: util.h:57

s_rr_node::num_edges
short num_edges
Definition: vpr_types.h:901

load_net_delay_from_routing
void load_net_delay_from_routing(float **net_delay, struct s_net *nets, int n_nets)
Definition: net_delay.c:136

s_rr_node::R
float R
Definition: vpr_types.h:906

s_heap::index
int index
Definition: route_common.h:4

s_trace::index
int index
Definition: vpr_types.h:866

get_rr_cong_cost
float get_rr_cong_cost(int inode)
Definition: route_common.c:532

s_rr_node::cost_index
short cost_index
Definition: vpr_types.h:897

s_rr_indexed_data::base_cost
float base_cost
Definition: vpr_types.h:970

s_rr_node::edges
int * edges
Definition: vpr_types.h:903

s_heap::backward_path_cost
float backward_path_cost
Definition: route_common.h:11

free_timing_driven_route_structs
void free_timing_driven_route_structs(float *pin_criticality, int *sink_order, t_rt_node **rt_node_of_sink)
Definition: route_timing.c:279

init_route_tree_to_source
t_rt_node * init_route_tree_to_source(int inet)
Definition: route_tree_timing.c:155

get_heap_head
struct s_heap * get_heap_head(void)
Definition: route_common.c:949

rr_node
t_rr_node * rr_node
Definition: globals.c:70

heapsort
void heapsort(int *sort_index, float *sort_values, int nelem, int start_index)
Definition: heapsort.c:13

free_traceback
void free_traceback(int inet)
Definition: route_common.c:587

s_heap::prev_node
int prev_node
Definition: route_common.h:7

route_tree_timing.h

mark_ends
void mark_ends(int inet)
Definition: route_common.c:546

rr_node_route_inf
t_rr_node_route_inf * rr_node_route_inf
Definition: route_common.c:21

s_rt_node::re_expand
short re_expand
Definition: route_tree_timing.h:24

s_trace::iswitch
short iswitch
Definition: vpr_types.h:867

s_bb::xmax
int xmax
Definition: vpr_types.h:533

s_heap
Definition: route_common.h:3

s_rt_node::R_upstream
float R_upstream
Definition: route_tree_timing.h:27

rr_indexed_data
t_rr_indexed_data * rr_indexed_data
Definition: globals.c:74

NO_PREVIOUS
#define NO_PREVIOUS
Definition: vpr_types.h:887

get_critical_path_delay
float get_critical_path_delay(void)
Definition: path_delay.c:3060

s_rr_node::ylow
short ylow
Definition: vpr_types.h:892

s_router_opts::pres_fac_mult
float pres_fac_mult
Definition: vpr_types.h:698

s_rr_indexed_data::ortho_cost_index
int ortho_cost_index
Definition: vpr_types.h:972

IPIN_COST_INDEX
Definition: vpr_types.h:1013

free_heap_data
void free_heap_data(struct s_heap *hptr)
Definition: route_common.c:1019

net_delay
static float ** net_delay
Definition: timing_place_lookup.c:76

s_rr_node::C
float C
Definition: vpr_types.h:907

s_router_opts::max_router_iterations
int max_router_iterations
Definition: vpr_types.h:701

s_router_opts::first_iter_pres_fac
float first_iter_pres_fac
Definition: vpr_types.h:696

num_rr_indexed_data
int num_rr_indexed_data
Definition: globals.c:73

alloc_route_tree_timing_structs
void alloc_route_tree_timing_structs(void)
Definition: route_tree_timing.c:53

SINK_COST_INDEX
Definition: vpr_types.h:1011

ROUND_UP
#define ROUND_UP(x)
Definition: route_timing.c:653

ERROR_TOL
#define ERROR_TOL
Definition: route_timing.c:855

s_linked_rt_edge::next
struct s_linked_rt_edge * next
Definition: route_tree_timing.h:6

s_rr_indexed_data::T_quadratic
float T_quadratic
Definition: vpr_types.h:976

vpr_types.h

pathfinder_update_cost
void pathfinder_update_cost(float pres_fac, float acc_fac)
Definition: route_common.c:363

IPIN
Definition: vpr_types.h:854

t_rr_node_route_inf::path_cost
float path_cost
Definition: route_common.h:38

num_nets
int num_nets
Definition: globals.c:27

FIRST_ITER_WIRELENTH_LIMIT
#define FIRST_ITER_WIRELENTH_LIMIT
Definition: vpr_types.h:88

get_max_pins_per_net
static int get_max_pins_per_net(void)
Definition: route_timing.c:290

s_heap::R_upstream
float R_upstream
Definition: route_common.h:12

do_timing_analysis
void do_timing_analysis(t_slack *slacks, boolean is_prepacked, boolean do_lut_input_balancing, boolean is_final_analysis)
Definition: path_delay.c:1613

ReadOptions.h

update_net_delays_from_route_tree
void update_net_delays_from_route_tree(float *net_delay, t_rt_node **rt_node_of_sink, int inet)
Definition: route_tree_timing.c:479

globals.h

s_switch_inf::R
float R
Definition: physical_types.h:746

stats.h

s_linked_rt_edge
Definition: route_tree_timing.h:3

FALSE
Definition: util.h:12

reset_path_costs
void reset_path_costs(void)
Definition: route_common.c:490

pres_fac
static float pres_fac
Definition: cluster_legality.c:31

get_num_bends_and_length
void get_num_bends_and_length(int inet, int *bends_ptr, int *len_ptr, int *segments_ptr)
Definition: stats.c:276

min
#define min(a, b)
Definition: graphics.c:174

clb_opins_used_locally
static t_ivec ** clb_opins_used_locally
Definition: timing_place_lookup.c:88

t_rr_node_route_inf::prev_node
int prev_node
Definition: route_common.h:35

get_timing_driven_expected_cost
static float get_timing_driven_expected_cost(int inode, int target_node, float criticality_fac, float R_upstream)
Definition: route_timing.c:595

s_router_opts::bend_cost
float bend_cost
Definition: vpr_types.h:700

s_heap::u
union s_heap::@0 u

HUGE_POSITIVE_FLOAT
#define HUGE_POSITIVE_FLOAT
Definition: vpr_types.h:79

my_malloc
static void * my_malloc(int ibytes)
Definition: graphics.c:499

route_export.h

is_global
boolean * is_global
Definition: globals_declare.h:5

s_rt_node
Definition: route_tree_timing.h:17

s_rr_indexed_data::T_linear
float T_linear
Definition: vpr_types.h:975

heapsort.h

s_heap::prev_edge
int prev_edge
Definition: route_common.h:10

max
#define max(a, b)
Definition: graphics.c:171

pin_criticality
static float * pin_criticality
Definition: timing_place_lookup.c:77

add_route_tree_to_heap
static void add_route_tree_to_heap(t_rt_node *rt_node, int target_node, float target_criticality, float astar_fac)
Definition: route_timing.c:471

clb_net
struct s_net * clb_net
Definition: globals.c:28

free_route_tree
void free_route_tree(t_rt_node *rt_node)
Definition: route_tree_timing.c:458

num_rr_nodes
int num_rr_nodes
Definition: globals.c:69

s_router_opts::initial_pres_fac
float initial_pres_fac
Definition: vpr_types.h:697

nx
int nx
Definition: globals.c:46

net_delay.h

s_rt_node::u
union s_rt_node::@1 u

route_common.h

free_route_tree_timing_structs
void free_route_tree_timing_structs(void)
Definition: route_tree_timing.c:67

util.h

trace_head
struct s_trace ** trace_head
Definition: globals.c:64

switch_inf
struct s_switch_inf * switch_inf
Definition: globals.c:83

alloc_timing_driven_route_structs
void alloc_timing_driven_route_structs(float **pin_criticality_ptr, int **sink_order_ptr, t_rt_node ***rt_node_of_sink_ptr)
Definition: route_timing.c:253

timing_driven_expand_neighbours
static void timing_driven_expand_neighbours(struct s_heap *current, int inet, float bend_cost, float criticality_fac, int target_node, float astar_fac, int highfanout_rlim)
Definition: route_timing.c:507

t_rr_node_route_inf::prev_edge
short prev_edge
Definition: route_common.h:40

s_trace
Definition: vpr_types.h:865

s_heap::cost
float cost
Definition: route_common.h:5

node_to_heap
void node_to_heap(int inode, float cost, int prev_node, int prev_edge, float backward_path_cost, float R_upstream)
Definition: route_common.c:562

free_net_delay
void free_net_delay(float **net_delay, t_chunk *chunk_list_ptr)
Definition: net_delay.c:127

s_router_opts::astar_fac
float astar_fac
Definition: vpr_types.h:707

s_ivec
Definition: util.h:47

timing_driven_check_net_delays
static void timing_driven_check_net_delays(float **net_delay)
Definition: route_timing.c:857

alloc_net_delay
float ** alloc_net_delay(t_chunk *chunk_list_ptr, struct s_net *nets, int n_nets)
Definition: net_delay.c:103

s_switch_inf::Tdel
float Tdel
Definition: physical_types.h:749

s_rt_node::child_list
t_linked_rt_edge * child_list
Definition: route_tree_timing.h:19

pathfinder_update_one_cost
void pathfinder_update_one_cost(struct s_trace *route_segment_start, int add_or_sub, float pres_fac)
Definition: route_common.c:315

s_rr_indexed_data::saved_base_cost
float saved_base_cost
Definition: vpr_types.h:971

s_rr_node::yhigh
short yhigh
Definition: vpr_types.h:893

timing_driven_route_net
boolean timing_driven_route_net(int inet, float pres_fac, float max_criticality, float criticality_exp, float astar_fac, float bend_cost, float *pin_criticality, int *sink_order, t_rt_node **rt_node_of_sink, float *net_delay, t_slack *slacks)
Definition: route_timing.c:307

update_traceback
struct s_trace * update_traceback(struct s_heap *hptr, int inet)
Definition: route_common.c:421

t_rr_type
enum e_rr_type t_rr_type

update_rr_base_costs
static void update_rr_base_costs(int inet, float largest_criticality)
Definition: route_timing.c:744

s_rr_node::switches
short * switches
Definition: vpr_types.h:904

s_slack::timing_criticality
float ** timing_criticality
Definition: vpr_types.h:406

s_router_opts::max_criticality
float max_criticality
Definition: vpr_types.h:708

CHANX_COST_INDEX_START
Definition: vpr_types.h:1014

get_expected_segs_to_target
static int get_expected_segs_to_target(int inode, int target_node, int *num_segs_ortho_dir_ptr)
Definition: route_timing.c:655

update_route_tree
t_rt_node * update_route_tree(struct s_heap *hptr)
Definition: route_tree_timing.c:181

HIGH_FANOUT_NET_LIM
#define HIGH_FANOUT_NET_LIM
Definition: vpr_types.h:86

mark_node_expansion_by_bin
static int mark_node_expansion_by_bin(int inet, int target_node, t_rt_node *rt_node)
Definition: route_timing.c:769

s_linked_rt_edge::child
struct s_rt_node * child
Definition: route_tree_timing.h:4

add_to_mod_list
void add_to_mod_list(float *fptr)
Definition: route_common.c:898

s_rr_node::xlow
short xlow
Definition: vpr_types.h:890

SINK
Definition: vpr_types.h:854

reserve_locally_used_opins
void reserve_locally_used_opins(float pres_fac, boolean rip_up_local_opins, t_ivec **clb_opins_used_locally)
Definition: route_common.c:1208

s_router_opts::acc_fac
float acc_fac
Definition: vpr_types.h:699

net_rr_terminals
int ** net_rr_terminals
Definition: globals.c:78

s_router_opts::criticality_exp
float criticality_exp
Definition: vpr_types.h:709

t_rr_node_route_inf::backward_path_cost
float backward_path_cost
Definition: route_common.h:39

rt_node_of_sink
static t_rt_node ** rt_node_of_sink
Definition: timing_place_lookup.c:79

route_bb
struct s_bb * route_bb
Definition: route_common.c:23

empty_heap
void empty_heap(void)
Definition: route_common.c:991

s_rr_indexed_data::C_load
float C_load
Definition: vpr_types.h:977

s_bb::xmin
int xmin
Definition: vpr_types.h:532

t_rr_node_route_inf::target_flag
short target_flag
Definition: route_common.h:41

ny
int ny
Definition: globals.c:47

vpr_printf
messagelogger vpr_printf
Definition: util.c:17

s_rr_indexed_data::inv_length
float inv_length
Definition: vpr_types.h:974

s_net::num_sinks
int num_sinks
Definition: vpr_types.h:506

try_timing_driven_route
boolean try_timing_driven_route(struct s_router_opts router_opts, float **net_delay, t_slack *slacks, t_ivec **clb_opins_used_locally, boolean timing_analysis_enabled)
Definition: route_timing.c:44

load_timing_graph_net_delays
void load_timing_graph_net_delays(float **net_delay)
Definition: path_delay.c:368

s_rt_node::inode
int inode
Definition: route_tree_timing.h:25

path_delay.h

route_timing.h

CHANX
Definition: vpr_types.h:854

s_slack
Definition: vpr_types.h:402

s_rr_node::type
t_rr_type type
Definition: vpr_types.h:902

TRUE
Definition: util.h:12

sink_order
static int * sink_order
Definition: timing_place_lookup.c:78

s_rt_node::Tdel
float Tdel
Definition: route_tree_timing.h:28

s_router_opts
Definition: vpr_types.h:695