d5/d28/place_8c_source.html

 /*#include <stdlib.h> */

 #include <stdio.h>

 #include <math.h>

 #include <assert.h>

 #include "util.h"

 #include "vpr_types.h"

 #include "globals.h"

 #include "place.h"

 #include "read_place.h"

 #include "draw.h"

 #include "place_and_route.h"

 #include "net_delay.h"

 #include "path_delay.h"

 #include "timing_place_lookup.h"

 #include "timing_place.h"

 #include "place_stats.h"

 #include "read_xml_arch_file.h"

 #include "ReadOptions.h"

 #include "vpr_utils.h"

 #include "place_macro.h"


 /************** Types and defines local to place.c ***************************/


 /* Cut off for incremental bounding box updates.                          *

  * 4 is fastest -- I checked.                                             */

 /* To turn off incremental bounding box updates, set this to a huge value */

 #define SMALL_NET 4


 /* This defines the error tolerance for floating points variables used in *

  * cost computation. 0.01 means that there is a 1% error tolerance.       */

 #define ERROR_TOL .01


 /* This defines the maximum number of swap attempts before invoking the   *

  * once-in-a-while placement legality check as well as floating point     *

  * variables round-offs check.                                            */

 #define MAX_MOVES_BEFORE_RECOMPUTE 50000


 /* The maximum number of tries when trying to place a carry chain at a    *

  * random location before trying exhaustive placement - find the fist     *

  * legal position and place it during initial placement.                  */

 #define MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY 4


 /* Flags for the states of the bounding box.                              *

  * Stored as char for memory efficiency.                                  */

 #define NOT_UPDATED_YET 'N'

 #define UPDATED_ONCE 'U'

 #define GOT_FROM_SCRATCH 'S'


 /* For comp_cost.  NORMAL means use the method that generates updateable  *

  * bounding boxes for speed.  CHECK means compute all bounding boxes from *

  * scratch using a very simple routine to allow checks of the other       *

  * costs.                                                                 */

 enum cost_methods {

     NORMAL, CHECK

 };


 /* This is for the placement swap routines. A swap attempt could be       *

  * rejected, accepted or aborted (due to the limitations placed on the    *

  * carry chain support at this point).                                    */

 enum swap_result {

     REJECTED, ACCEPTED, ABORTED

 };


 #define MAX_INV_TIMING_COST 1.e9

 /* Stops inverse timing cost from going to infinity with very lax timing constraints,

 which avoids multiplying by a gigantic inverse_prev_timing_cost when auto-normalizing.

 The exact value of this cost has relatively little impact, but should not be

 large enough to be on the order of timing costs for normal constraints. */


 /********************** Data Sturcture Definition ***************************/

 /* Stores the information of the move for a block that is       *

  * moved during placement                                       *

  * block_num: the index of the moved block                      *

  * xold: the x_coord that the block is moved from               *

  * xnew: the x_coord that the block is moved to                 *

  * yold: the y_coord that the block is moved from               *

  * xnew: the x_coord that the block is moved to                 *

  */

 typedef struct s_pl_moved_block {

     int block_num;

     int xold;

     int xnew;

     int yold;

     int ynew;

     int zold;

     int znew;

     int swapped_to_empty;

 }t_pl_moved_block;


 /* Stores the list of blocks to be moved in a swap during       *

  * placement.                                                   *

  * num_moved_blocks: total number of blocks moved when          *

  *                   swapping two blocks.                       *

  * moved blocks: a list of moved blocks data structure with     *

  *               information on the move.                       *

  *               [0...num_moved_blocks-1]                       *

  */

 typedef struct s_pl_blocks_to_be_moved {

     int num_moved_blocks;

     t_pl_moved_block * moved_blocks;

 }t_pl_blocks_to_be_moved;


 /********************** Variables local to place.c ***************************/


 /* Cost of a net, and a temporary cost of a net used during move assessment. */

 static float *net_cost = NULL, *temp_net_cost = NULL; /* [0..num_nets-1] */


 /* legal positions for type */

 typedef struct s_legal_pos {

     int x;

     int y;

     int z;

 }t_legal_pos;


 static t_legal_pos **legal_pos = NULL; /* [0..num_types-1][0..type_tsize - 1] */

 static int *num_legal_pos = NULL; /* [0..num_legal_pos-1] */


 /* [0...num_nets-1]                                                              *

  * A flag array to indicate whether the specific bounding box has been updated   *

  * in this particular swap or not. If it has been updated before, the code       *

  * must use the updated data, instead of the out-of-date data passed into the    *

  * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET        *

  * indicates that the net has not been updated before, UPDATED_ONCE indicated    *

  * that the net has been updated once, if it is going to be updated again, the   *

  * values from the previous update must be used. GOT_FROM_SCRATCH is only        *

  * applicable for nets larger than SMALL_NETS and it indicates that the          *

  * particular bounding box cannot be updated incrementally before, hence the     *

  * bounding box is got from scratch, so the bounding box would definitely be     *

  * right, DO NOT update again.                                                   *

  * [0...num_nets-1]                                                              */

 static char * bb_updated_before = NULL;


 /* [0..num_nets-1][1..num_pins-1]. What is the value of the timing   */

 /* driven portion of the cost function. These arrays will be set to  */

 /* (criticality * delay) for each point to point connection. */

 static float **point_to_point_timing_cost = NULL;

 static float **temp_point_to_point_timing_cost = NULL;


 /* [0..num_nets-1][1..num_pins-1]. What is the value of the delay */

 /* for each connection in the circuit */

 static float **point_to_point_delay_cost = NULL;

 static float **temp_point_to_point_delay_cost = NULL;


 /* [0..num_blocks-1][0..pins_per_clb-1]. Indicates which pin on the net */

 /* this block corresponds to, this is only required during timing-driven */

 /* placement. It is used to allow us to update individual connections on */

 /* each net */

 static int **net_pin_index = NULL;


 /* [0..num_nets-1].  Store the bounding box coordinates and the number of    *

  * blocks on each of a net's bounding box (to allow efficient updates),      *

  * respectively.                                                             */


 static struct s_bb *bb_coords = NULL, *bb_num_on_edges = NULL;


 /* Store the information on the blocks to be moved in a swap during     *

  * placement, in the form of array of structs instead of struct with    *

  * arrays for cache effifiency                                          *

  */

 static t_pl_blocks_to_be_moved blocks_affected;


 /* The arrays below are used to precompute the inverse of the average   *

  * number of tracks per channel between [subhigh] and [sublow].  Access *

  * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     *

  * speed up the computation of the cost function that takes the length  *

  * of the net bounding box in each dimension, divided by the average    *

  * number of tracks in that direction; for other cost functions they    *

  * will never be used.                                                  *

  *                [0...ny]                [0...nx]                      */

 static float **chanx_place_cost_fac, **chany_place_cost_fac;


 /* The following arrays are used by the try_swap function for speed.   */

 /* [0...num_nets-1] */

 static struct s_bb *ts_bb_coord_new = NULL;

 static struct s_bb *ts_bb_edge_new = NULL;

 static int *ts_nets_to_update = NULL;


 /* The pl_macros array stores all the carry chains placement macros.   *

  * [0...num_pl_macros-1]                                                  */

 static t_pl_macro * pl_macros = NULL;

 static int num_pl_macros;


 /* These file-scoped variables keep track of the number of swaps       *

  * rejected, accepted or aborted. The total number of swap attempts    *

  * is the sum of the three number.                                     */

 static int num_swap_rejected = 0;

 static int num_swap_accepted = 0;

 static int num_swap_aborted = 0;

 static int num_ts_called = 0;


 /* Expected crossing counts for nets with different #'s of pins.  From *

  * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   *

  * Multiplied to bounding box of a net to better estimate wire length  *

  * for higher fanout nets. Each entry is the correction factor for the *

  * fanout index-1                                                      */

 static const float cross_count[50] = { /* [0..49] */1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974,

         1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924,

         1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016,

         2.2334, 2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772,

         2.5064, 2.5356, 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148,

         2.7410, 2.7671, 2.7933 };


 /********************* Static subroutines local to place.c *******************/

 #ifdef VERBOSE

     static void print_clb_placement(const char *fname);

 #endif


 static void alloc_and_load_placement_structs(

         float place_cost_exp, float ***old_region_occ_x,

         float ***old_region_occ_y, struct s_placer_opts placer_opts,

         t_direct_inf *directs, int num_directs);


 static void alloc_and_load_try_swap_structs();


 static void free_placement_structs(

         float **old_region_occ_x, float **old_region_occ_y,

         struct s_placer_opts placer_opts);


 static void alloc_and_load_for_fast_cost_update(float place_cost_exp);


 static void free_fast_cost_update(void);


 static void alloc_legal_placements();

 static void load_legal_placements();


 static void free_legal_placements();


 static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z);


 static int try_place_macro(int itype, int ichoice, int imacro, int * free_locations);


 static void initial_placement_pl_macros(int macros_max_num_tries, int * free_locations);


 static void initial_placement_blocks(int * free_locations, enum e_pad_loc_type pad_loc_type);


 static void initial_placement(enum e_pad_loc_type pad_loc_type,

         char *pad_loc_file);


 static float comp_bb_cost(enum cost_methods method);


 static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to);


 static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to);


 static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost,

         float rlim, float **old_region_occ_x,

         float **old_region_occ_y,

         enum e_place_algorithm place_algorithm, float timing_tradeoff,

         float inverse_prev_bb_cost, float inverse_prev_timing_cost,

         float *delay_cost);


 static void check_place(float bb_cost, float timing_cost,

         enum e_place_algorithm place_algorithm,

         float delay_cost);


 static float starting_t(float *cost_ptr, float *bb_cost_ptr,

         float *timing_cost_ptr, float **old_region_occ_x,

         float **old_region_occ_y,

         struct s_annealing_sched annealing_sched, int max_moves, float rlim,

         enum e_place_algorithm place_algorithm, float timing_tradeoff,

         float inverse_prev_bb_cost, float inverse_prev_timing_cost,

         float *delay_cost_ptr);


 static void update_t(float *t, float std_dev, float rlim, float success_rat,

         struct s_annealing_sched annealing_sched);


 static void update_rlim(float *rlim, float success_rat);


 static int exit_crit(float t, float cost,

         struct s_annealing_sched annealing_sched);


 static int count_connections(void);


 static double get_std_dev(int n, double sum_x_squared, double av_x);


 static float recompute_bb_cost(void);


 static float comp_td_point_to_point_delay(int inet, int ipin);


 static void update_td_cost(void);


 static void comp_delta_td_cost(float *delta_timing, float *delta_delay);


 static void comp_td_costs(float *timing_cost, float *connection_delay_sum);


 static enum swap_result assess_swap(float delta_c, float t);


 static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to);


 static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new);


 static void update_bb(int inet, struct s_bb *bb_coord_new,

         struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew);


 static int find_affected_nets(int *nets_to_update);


 static float get_net_cost(int inet, struct s_bb *bb_ptr);


 static void get_bb_from_scratch(int inet, struct s_bb *coords,

         struct s_bb *num_on_edges);


 static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr);


 static void free_try_swap_arrays(void);


 /*****************************************************************************/

 /* RESEARCH TODO: Bounding Box and rlim need to be redone for heterogeneous to prevent a QoR penalty */

 void try_place(struct s_placer_opts placer_opts,

         struct s_annealing_sched annealing_sched,

         t_chan_width_dist chan_width_dist, struct s_router_opts router_opts,

         struct s_det_routing_arch det_routing_arch, t_segment_inf * segment_inf,

         t_timing_inf timing_inf, t_direct_inf *directs, int num_directs) {


     /* Does almost all the work of placing a circuit.  Width_fac gives the   *

      * width of the widest channel.  Place_cost_exp says what exponent the   *

      * width should be taken to when calculating costs.  This allows a       *

      * greater bias for anisotropic architectures.                           */


     int tot_iter, inner_iter, success_sum, move_lim, moves_since_cost_recompute, width_fac,

         num_connections, inet, ipin, outer_crit_iter_count, inner_crit_iter_count,

         inner_recompute_limit, swap_result;

     float t, success_rat, rlim, cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost,

         delay_cost, new_delay_cost, place_delay_value, inverse_prev_bb_cost, inverse_prev_timing_cost,

         oldt, **old_region_occ_x, **old_region_occ_y, **net_delay = NULL, crit_exponent,

         first_rlim, final_rlim, inverse_delta_rlim, critical_path_delay = UNDEFINED,

         **remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */

     double av_cost, av_bb_cost, av_timing_cost, av_delay_cost, sum_of_squares, std_dev;

     int total_swap_attempts;

     float reject_rate;

     float accept_rate;

     float abort_rate;

     char msg[BUFSIZE];

     t_slack * slacks = NULL;


     /* Allocated here because it goes into timing critical code where each memory allocation is expensive */


     remember_net_delay_original_ptr = NULL; /*prevents compiler warning */


     /* init file scope variables */

     num_swap_rejected = 0;

     num_swap_accepted = 0;

     num_swap_aborted = 0;

     num_ts_called = 0;


     if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

             || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE

             || placer_opts.enable_timing_computations) {

         /*do this before the initial placement to avoid messing up the initial placement */

         slacks = alloc_lookups_and_criticalities(chan_width_dist, router_opts,

                 det_routing_arch, segment_inf, timing_inf, &net_delay, directs, num_directs);


         remember_net_delay_original_ptr = net_delay;


         /*#define PRINT_LOWER_BOUND */

 #ifdef PRINT_LOWER_BOUND

         /*print the crit_path, assuming delay between blocks that are*

          *block_dist apart*/


         if (placer_opts.block_dist <= nx)

         place_delay_value =

         delta_clb_to_clb[placer_opts.block_dist][0];

         else if (placer_opts.block_dist <= ny)

         place_delay_value =

         delta_clb_to_clb[0][placer_opts.block_dist];

         else

         place_delay_value = delta_clb_to_clb[nx][ny];


         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_INFO, "Lower bound assuming delay of %g\n", place_delay_value);


         load_constant_net_delay(net_delay, place_delay_value);

         load_timing_graph_net_delays(net_delay);

         do_timing_analysis(slacks, FALSE, FALSE, TRUE);


         if (getEchoEnabled()) {

             if(isEchoFileEnabled(E_ECHO_PLACEMENT_CRITICAL_PATH))

                 print_critical_path(getEchoFileName(E_ECHO_PLACEMENT_CRITICAL_PATH));

             if(isEchoFileEnabled(E_ECHO_PLACEMENT_LOWER_BOUND_SINK_DELAYS))

                 print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_LOWER_BOUND_SINK_DELAYS));

             if(isEchoFileEnabled(E_ECHO_PLACEMENT_LOGIC_SINK_DELAYS))

                 print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_LOGIC_SINK_DELAYS));

         }


         /*also print sink delays assuming 0 delay between blocks,

          * this tells us how much logic delay is on each path */


         load_constant_net_delay(net_delay, 0);

         load_timing_graph_net_delays(net_delay);

         do_timing_analysis(slacks, FALSE, FALSE, TRUE);


 #endif


     }


     width_fac = placer_opts.place_chan_width;


     init_chan(width_fac, chan_width_dist);


     alloc_and_load_placement_structs(

             placer_opts.place_cost_exp,

             &old_region_occ_x, &old_region_occ_y, placer_opts,

             directs, num_directs);


     initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file);

     init_draw_coords((float) width_fac);


     /* Storing the number of pins on each type of block makes the swap routine *

      * slightly more efficient.                                                */


     /* Gets initial cost and loads bounding boxes. */


     if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

             || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {

         bb_cost = comp_bb_cost(NORMAL);


         crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */


         num_connections = count_connections();

         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_INFO, "There are %d point to point connections in this circuit.\n", num_connections);

         vpr_printf(TIO_MESSAGE_INFO, "\n");


         if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) {

             for (inet = 0; inet < num_nets; inet++)

                 for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)

                     timing_place_crit[inet][ipin] = 0; /*dummy crit values */


             comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used

              * in criticality computations in the next call

              * to comp_td_costs. */

             place_delay_value = delay_cost / num_connections; /*used for computing criticalities */

             load_constant_net_delay(net_delay, place_delay_value, clb_net,

                     num_nets);


         } else

             place_delay_value = 0;


         if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {

             net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with      *

              * *the same values that the placer is using  *

              * *point_to_point_delay_cost is computed each*

              * *time that comp_td_costs is called, and is *

              * *also updated after any swap is accepted   */

         }


         load_timing_graph_net_delays(net_delay);

         do_timing_analysis(slacks, FALSE, FALSE, FALSE);

         load_criticalities(slacks, crit_exponent);

         if (getEchoEnabled()) {

             if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH))

                 print_timing_graph(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH));

             if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_SLACK))

                 print_slack(slacks->slack, FALSE, getEchoFileName(E_ECHO_INITIAL_PLACEMENT_SLACK));

             if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_CRITICALITY))

                 print_criticality(slacks, FALSE, getEchoFileName(E_ECHO_INITIAL_PLACEMENT_CRITICALITY));

         }

         outer_crit_iter_count = 1;


         /*now we can properly compute costs  */

         comp_td_costs(&timing_cost, &delay_cost); /*also vpr_printf proper values into point_to_point_delay_cost */


         inverse_prev_timing_cost = 1 / timing_cost;

         inverse_prev_bb_cost = 1 / bb_cost;

         cost = 1; /*our new cost function uses normalized values of           */

         /*bb_cost and timing_cost, the value of cost will be reset  */

         /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */

     } else { /*BOUNDING_BOX_PLACE */

         cost = bb_cost = comp_bb_cost(NORMAL);

         timing_cost = 0;

         delay_cost = 0;

         place_delay_value = 0;

         outer_crit_iter_count = 0;

         num_connections = 0;

         crit_exponent = 0;


         inverse_prev_timing_cost = 0; /*inverses not used */

         inverse_prev_bb_cost = 0;

     }


     move_lim = (int) (annealing_sched.inner_num * pow(num_blocks, 1.3333));


     if (placer_opts.inner_loop_recompute_divider != 0)

         inner_recompute_limit = (int) (0.5

                 + (float) move_lim

                         / (float) placer_opts.inner_loop_recompute_divider);

     else

         /*don't do an inner recompute */

         inner_recompute_limit = move_lim + 1;


     /* Sometimes I want to run the router with a random placement.  Avoid *

      * using 0 moves to stop division by 0 and 0 length vector problems,  *

      * by setting move_lim to 1 (which is still too small to do any       *

      * significant optimization).                                         */


     if (move_lim <= 0)

         move_lim = 1;


     rlim = (float) std::max(nx + 1, ny + 1);


     first_rlim = rlim; /*used in timing-driven placement for exponent computation */

     final_rlim = 1;

     inverse_delta_rlim = 1 / (first_rlim - final_rlim);


     t = starting_t(&cost, &bb_cost, &timing_cost,

             old_region_occ_x, old_region_occ_y,

             annealing_sched, move_lim, rlim,

             placer_opts.place_algorithm, placer_opts.timing_tradeoff,

             inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);

     tot_iter = 0;

     moves_since_cost_recompute = 0;

     vpr_printf(TIO_MESSAGE_INFO, "Initial placement cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n",

                 cost, bb_cost, timing_cost, delay_cost);

     vpr_printf(TIO_MESSAGE_INFO, "\n");


 #ifndef SPEC

     vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",

             "---------", "---------", "-----------", "-----------", "-----------", "-----------",

             "--------", "--------", "-------", "-------", "-------", "---------", "-------");

     vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",

             "T", "Cost", "Av BB Cost", "Av TD Cost", "Av Tot Del",

             "P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp",

             "Tot Moves", "Alpha");

     vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",

             "---------", "---------", "-----------", "-----------", "-----------", "-----------",

             "--------", "--------", "-------", "-------", "-------", "---------", "-------");

 #endif


     sprintf(msg, "Initial Placement.  Cost: %g  BB Cost: %g  TD Cost %g  Delay Cost: %g \t Channel Factor: %d",

         cost, bb_cost, timing_cost, delay_cost, width_fac);

     update_screen(MAJOR, msg, PLACEMENT, FALSE);


     while (exit_crit(t, cost, annealing_sched) == 0) {


         if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

                 || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {

             cost = 1;

         }


         av_cost = 0.;

         av_bb_cost = 0.;

         av_delay_cost = 0.;

         av_timing_cost = 0.;

         sum_of_squares = 0.;

         success_sum = 0;


         if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

                 || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {


             if (outer_crit_iter_count >= placer_opts.recompute_crit_iter

                     || placer_opts.inner_loop_recompute_divider != 0) {

 #ifdef VERBOSE

                 vpr_printf(TIO_MESSAGE_INFO, "Outer loop recompute criticalities\n");

 #endif

                 place_delay_value = delay_cost / num_connections;


                 if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)

                     load_constant_net_delay(net_delay, place_delay_value,

                             clb_net, num_nets);

                 /*note, for path_based, the net delay is not updated since it is current,

                  *because it accesses point_to_point_delay array */


                 load_timing_graph_net_delays(net_delay);

                 do_timing_analysis(slacks, FALSE, FALSE, FALSE);

                 load_criticalities(slacks, crit_exponent);

                 /*recompute costs from scratch, based on new criticalities */

                 comp_td_costs(&timing_cost, &delay_cost);

                 outer_crit_iter_count = 0;

             }

             outer_crit_iter_count++;


             /*at each temperature change we update these values to be used     */

             /*for normalizing the tradeoff between timing and wirelength (bb)  */

             inverse_prev_bb_cost = 1 / bb_cost;

             /*Prevent inverse timing cost from going to infinity */

             inverse_prev_timing_cost = std::min(1 / timing_cost, (float)MAX_INV_TIMING_COST);

         }


         inner_crit_iter_count = 1;


         for (inner_iter = 0; inner_iter < move_lim; inner_iter++) {

             swap_result = try_swap(t, &cost, &bb_cost, &timing_cost, rlim,

                     old_region_occ_x,

                     old_region_occ_y,

                     placer_opts.place_algorithm, placer_opts.timing_tradeoff,

                     inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);

             if (swap_result == ACCEPTED) {


                 /* Move was accepted.  Update statistics that are useful for the annealing schedule. */

                 success_sum++;

                 av_cost += cost;

                 av_bb_cost += bb_cost;

                 av_timing_cost += timing_cost;

                 av_delay_cost += delay_cost;

                 sum_of_squares += cost * cost;

                 num_swap_accepted++;

             } else if (swap_result == ABORTED) {

                 num_swap_aborted++;

             } else { // swap_result == REJECTED

                 num_swap_rejected++;

             }


             if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

                     || placer_opts.place_algorithm

                             == PATH_TIMING_DRIVEN_PLACE) {


                 /* Do we want to re-timing analyze the circuit to get updated slack and criticality values?

                  * We do this only once in a while, since it is expensive.

                  */

                 if (inner_crit_iter_count >= inner_recompute_limit

                         && inner_iter != move_lim - 1) { /*on last iteration don't recompute */


                     inner_crit_iter_count = 0;

 #ifdef VERBOSE

                     vpr_printf(TIO_MESSAGE_TRACE, "Inner loop recompute criticalities\n");

 #endif

                     if (placer_opts.place_algorithm

                             == NET_TIMING_DRIVEN_PLACE) {

                         /* Use a constant delay per connection as the delay estimate, rather than

                          * estimating based on the current placement.  Not a great idea, but not the

                          * default.

                          */

                         place_delay_value = delay_cost / num_connections;

                         load_constant_net_delay(net_delay, place_delay_value,

                                 clb_net, num_nets);

                     }


                     /* Using the delays in net_delay, do a timing analysis to update slacks and

                      * criticalities; then update the timing cost since it will change.

                      */

                     load_timing_graph_net_delays(net_delay);

                     do_timing_analysis(slacks, FALSE, FALSE, FALSE);

                     load_criticalities(slacks, crit_exponent);

                     comp_td_costs(&timing_cost, &delay_cost);

                 }

                 inner_crit_iter_count++;

             }

 #ifdef VERBOSE

             vpr_printf(TIO_MESSAGE_TRACE, "t = %g  cost = %g   bb_cost = %g timing_cost = %g move = %d dmax = %g\n",

                     t, cost, bb_cost, timing_cost, inner_iter, delay_cost);

             if (fabs(bb_cost - comp_bb_cost(CHECK)) > bb_cost * ERROR_TOL)

                 exit(1);

 #endif

         }


         /* Lines below prevent too much round-off error from accumulating *

          * in the cost over many iterations.  This round-off can lead to  *

          * error checks failing because the cost is different from what   *

          * you get when you recompute from scratch.                       */


         moves_since_cost_recompute += move_lim;

         if (moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) {

             new_bb_cost = recompute_bb_cost();

             if (fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL) {

                 vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_bb_cost = %g, old bb_cost = %g\n",

                         new_bb_cost, bb_cost);

                 exit(1);

             }

             bb_cost = new_bb_cost;


             if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

                     || placer_opts.place_algorithm

                             == PATH_TIMING_DRIVEN_PLACE) {

                 comp_td_costs(&new_timing_cost, &new_delay_cost);

                 if (fabs(new_timing_cost - timing_cost) > timing_cost * ERROR_TOL) {

                     vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_timing_cost = %g, old timing_cost = %g\n",

                             new_timing_cost, timing_cost);

                     exit(1);

                 }

                 if (fabs(new_delay_cost - delay_cost) > delay_cost * ERROR_TOL) {

                     vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_delay_cost = %g, old delay_cost = %g\n",

                             new_delay_cost, delay_cost);

                     exit(1);

                 }

                 timing_cost = new_timing_cost;

             }


             if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {

                 cost = new_bb_cost;

             }

             moves_since_cost_recompute = 0;

         }


         tot_iter += move_lim;

         success_rat = ((float) success_sum) / move_lim;

         if (success_sum == 0) {

             av_cost = cost;

             av_bb_cost = bb_cost;

             av_timing_cost = timing_cost;

             av_delay_cost = delay_cost;

         } else {

             av_cost /= success_sum;

             av_bb_cost /= success_sum;

             av_timing_cost /= success_sum;

             av_delay_cost /= success_sum;

         }

         std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);


         oldt = t; /* for finding and printing alpha. */

         update_t(&t, std_dev, rlim, success_rat, annealing_sched);


 #ifndef SPEC

         critical_path_delay = get_critical_path_delay();

         vpr_printf(TIO_MESSAGE_INFO, "%9.5f %9.5g %11.6g %11.6g %11.6g %11.6g %8.4f %8.4f %7.4f %7.4f %7.4f %9d %7.4f\n",

                 oldt, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value,

                 critical_path_delay, success_rat, std_dev, rlim, crit_exponent, tot_iter, t / oldt);

 #endif


         sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",

                 cost, bb_cost, timing_cost, t);

         update_screen(MINOR, msg, PLACEMENT, FALSE);

         update_rlim(&rlim, success_rat);


         if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

                 || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {

             crit_exponent = (1 - (rlim - final_rlim) * inverse_delta_rlim)

                     * (placer_opts.td_place_exp_last

                             - placer_opts.td_place_exp_first)

                     + placer_opts.td_place_exp_first;

         }

 #ifdef VERBOSE

         if (getEchoEnabled()) {

             print_clb_placement("first_iteration_clb_placement.echo");

         }

 #endif

     }


     t = 0; /* freeze out */

     av_cost = 0.;

     av_bb_cost = 0.;

     av_timing_cost = 0.;

     sum_of_squares = 0.;

     av_delay_cost = 0.;

     success_sum = 0;


     if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

             || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {

         /*at each temperature change we update these values to be used     */

         /*for normalizing the tradeoff between timing and wirelength (bb)  */

         if (outer_crit_iter_count >= placer_opts.recompute_crit_iter

                 || placer_opts.inner_loop_recompute_divider != 0) {


 #ifdef VERBOSE

             vpr_printf(TIO_MESSAGE_INFO, "Outer loop recompute criticalities\n");

 #endif

             place_delay_value = delay_cost / num_connections;


             if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)

                 load_constant_net_delay(net_delay, place_delay_value, clb_net,

                         num_nets);


             load_timing_graph_net_delays(net_delay);

             do_timing_analysis(slacks, FALSE, FALSE, FALSE);

             load_criticalities(slacks, crit_exponent);

             /*recompute criticaliies */

             comp_td_costs(&timing_cost, &delay_cost);

             outer_crit_iter_count = 0;

         }

         outer_crit_iter_count++;


         inverse_prev_bb_cost = 1 / (bb_cost);

         /*Prevent inverse timing cost from going to infinity */

         inverse_prev_timing_cost = std::min(1 / timing_cost, (float)MAX_INV_TIMING_COST);

     }


     inner_crit_iter_count = 1;


     for (inner_iter = 0; inner_iter < move_lim; inner_iter++) {

         swap_result = try_swap(t, &cost, &bb_cost, &timing_cost, rlim,

                 old_region_occ_x, old_region_occ_y,

                 placer_opts.place_algorithm, placer_opts.timing_tradeoff,

                 inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);


         if (swap_result == ACCEPTED) {

             success_sum++;

             av_cost += cost;

             av_bb_cost += bb_cost;

             av_delay_cost += delay_cost;

             av_timing_cost += timing_cost;

             sum_of_squares += cost * cost;


             if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

                     || placer_opts.place_algorithm

                             == PATH_TIMING_DRIVEN_PLACE) {


                 if (inner_crit_iter_count >= inner_recompute_limit

                         && inner_iter != move_lim - 1) {


                     inner_crit_iter_count = 0;

 #ifdef VERBOSE

                     vpr_printf(TIO_MESSAGE_TRACE, "Inner loop recompute criticalities\n");

 #endif

                     if (placer_opts.place_algorithm

                             == NET_TIMING_DRIVEN_PLACE) {

                         place_delay_value = delay_cost / num_connections;

                         load_constant_net_delay(net_delay, place_delay_value,

                                 clb_net, num_nets);

                     }


                     load_timing_graph_net_delays(net_delay);

                     do_timing_analysis(slacks, FALSE, FALSE, FALSE);

                     load_criticalities(slacks, crit_exponent);

                     comp_td_costs(&timing_cost, &delay_cost);

                 }

                 inner_crit_iter_count++;

             }

             num_swap_accepted++;

         } else if (swap_result == ABORTED) {

             num_swap_aborted++;

         } else {

             num_swap_rejected++;

         }


 #ifdef VERBOSE

         vpr_printf(TIO_MESSAGE_INFO, "t = %g, cost = %g, move = %d\n", t, cost, tot_iter);

 #endif

     }

     tot_iter += move_lim;

     success_rat = ((float) success_sum) / move_lim;

     if (success_sum == 0) {

         av_cost = cost;

         av_bb_cost = bb_cost;

         av_delay_cost = delay_cost;

         av_timing_cost = timing_cost;

     } else {

         av_cost /= success_sum;

         av_bb_cost /= success_sum;

         av_delay_cost /= success_sum;

         av_timing_cost /= success_sum;

     }


     std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);


 #ifndef SPEC

     vpr_printf(TIO_MESSAGE_INFO, "%9.5f %9.5g %11.6g %11.6g %11.6g %11.6g %8s %8.4f %7.4f %7.4f %7.4f %9d\n",

             t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value,

             " ", success_rat, std_dev, rlim, crit_exponent, tot_iter);

 #endif


     // TODO:

     // 1. print a message about number of aborted moves.

     // 2. add some subroutine hierarchy!  Too big!

     // 3. put statistics counters (av_cost, success_sum, etc.) in a struct so a

     // pointer to it can be passed around.


 #ifdef VERBOSE

     if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) {

         print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT));

     }

 #endif


     check_place(bb_cost, timing_cost,

             placer_opts.place_algorithm, delay_cost);


     if (placer_opts.enable_timing_computations

             && placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {

         /*need this done since the timing data has not been kept up to date*

          *in bounding_box mode */

         for (inet = 0; inet < num_nets; inet++)

             for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)

                 timing_place_crit[inet][ipin] = 0; /*dummy crit values */

         comp_td_costs(&timing_cost, &delay_cost); /*computes point_to_point_delay_cost */

     }


     if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

             || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE

             || placer_opts.enable_timing_computations) {

         net_delay = point_to_point_delay_cost; /*this makes net_delay up to date with    *

          *the same values that the placer is using*/

         load_timing_graph_net_delays(net_delay);


         do_timing_analysis(slacks, FALSE, FALSE, FALSE);


         if (getEchoEnabled()) {

             if(isEchoFileEnabled(E_ECHO_PLACEMENT_SINK_DELAYS))

                 print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_SINK_DELAYS));

             if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_SLACK))

                 print_slack(slacks->slack, FALSE, getEchoFileName(E_ECHO_FINAL_PLACEMENT_SLACK));

             if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_CRITICALITY))

                 print_criticality(slacks, FALSE, getEchoFileName(E_ECHO_FINAL_PLACEMENT_CRITICALITY));

             if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH))

                 print_timing_graph(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH));

             if(isEchoFileEnabled(E_ECHO_PLACEMENT_CRIT_PATH))

                 print_critical_path(getEchoFileName(E_ECHO_PLACEMENT_CRIT_PATH));

         }


         /* Print critical path delay. */

         critical_path_delay = get_critical_path_delay();

         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_INFO, "Placement estimated critical path delay: %g ns\n", critical_path_delay);

     }


     sprintf(msg, "Placement. Cost: %g  bb_cost: %g td_cost: %g Channel Factor: %d",

             cost, bb_cost, timing_cost, width_fac);

     vpr_printf(TIO_MESSAGE_INFO, "Placement cost: %g, bb_cost: %g, td_cost: %g, delay_cost: %g\n",

             cost, bb_cost, timing_cost, delay_cost);

     update_screen(MAJOR, msg, PLACEMENT, FALSE);


     // Print out swap statistics

     total_swap_attempts = num_swap_rejected + num_swap_accepted + num_swap_aborted;

     reject_rate = num_swap_rejected / total_swap_attempts;

     accept_rate = num_swap_accepted / total_swap_attempts;

     abort_rate = num_swap_aborted / total_swap_attempts;

     vpr_printf(TIO_MESSAGE_INFO, "Placement total # of swap attempts: %d\n", total_swap_attempts);

     vpr_printf(TIO_MESSAGE_INFO, "\tSwap reject rate: %g\n", reject_rate);

     vpr_printf(TIO_MESSAGE_INFO, "\tSwap accept rate: %g\n", accept_rate);

     vpr_printf(TIO_MESSAGE_INFO, "\tSwap abort rate: %g\n", abort_rate);


 #ifdef SPEC

     vpr_printf(TIO_MESSAGE_INFO, "Total moves attempted: %d.0\n", tot_iter);

 #endif


     free_placement_structs(

                 old_region_occ_x, old_region_occ_y,

                 placer_opts);

     if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

             || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE

             || placer_opts.enable_timing_computations) {


         net_delay = remember_net_delay_original_ptr;

         free_lookups_and_criticalities(&net_delay, slacks);

     }


     free_try_swap_arrays();

 }


 static int count_connections() {

     /*only count non-global connections */


     int count, inet;


     count = 0;


     for (inet = 0; inet < num_nets; inet++) {


         if (clb_net[inet].is_global)

             continue;


         count += clb_net[inet].num_sinks;

     }

     return (count);

 }


 static double get_std_dev(int n, double sum_x_squared, double av_x) {


     /* Returns the standard deviation of data set x.  There are n sample points, *

      * sum_x_squared is the summation over n of x^2 and av_x is the average x.   *

      * All operations are done in double precision, since round off error can be *

      * a problem in the initial temp. std_dev calculation for big circuits.      */


     double std_dev;


     if (n <= 1)

         std_dev = 0.;

     else

         std_dev = (sum_x_squared - n * av_x * av_x) / (double) (n - 1);


     if (std_dev > 0.) /* Very small variances sometimes round negative */

         std_dev = sqrt(std_dev);

     else

         std_dev = 0.;


     return (std_dev);

 }


 static void update_rlim(float *rlim, float success_rat) {


     /* Update the range limited to keep acceptance prob. near 0.44.  Use *

      * a floating point rlim to allow gradual transitions at low temps.  */


     float upper_lim;


     *rlim = (*rlim) * (1. - 0.44 + success_rat);

     upper_lim = std::max(nx + 1, ny + 1);

     *rlim = std::min(*rlim, upper_lim);

     *rlim = std::max(*rlim, (float)1.);

 }


 /* Update the temperature according to the annealing schedule selected. */

 static void update_t(float *t, float std_dev, float rlim, float success_rat,

         struct s_annealing_sched annealing_sched) {


     /*  float fac; */


     if (annealing_sched.type == USER_SCHED) {

         *t = annealing_sched.alpha_t * (*t);

     }


     /* Old standard deviation based stuff is below.  This bogs down horribly

      * for big circuits (alu4 and especially bigkey_mod). */

     /* #define LAMBDA .7  */

     /* ------------------------------------ */

 #if 0

     else if (std_dev == 0.)

     {

         *t = 0.;

     }

     else

     {

         fac = exp(-LAMBDA * (*t) / std_dev);

         fac = max(0.5, fac);

         *t = (*t) * fac;

     }

 #endif

     /* ------------------------------------- */


     else { /* AUTO_SCHED */

         if (success_rat > 0.96) {

             *t = (*t) * 0.5;

         } else if (success_rat > 0.8) {

             *t = (*t) * 0.9;

         } else if (success_rat > 0.15 || rlim > 1.) {

             *t = (*t) * 0.95;

         } else {

             *t = (*t) * 0.8;

         }

     }

 }


 static int exit_crit(float t, float cost,

         struct s_annealing_sched annealing_sched) {


     /* Return 1 when the exit criterion is met.                        */


     if (annealing_sched.type == USER_SCHED) {

         if (t < annealing_sched.exit_t) {

             return (1);

         } else {

             return (0);

         }

     }


     /* Automatic annealing schedule */


     if (t < 0.005 * cost / num_nets) {

         return (1);

     } else {

         return (0);

     }

 }


 static float starting_t(float *cost_ptr, float *bb_cost_ptr,

         float *timing_cost_ptr, float **old_region_occ_x,

         float **old_region_occ_y,

         struct s_annealing_sched annealing_sched, int max_moves, float rlim,

         enum e_place_algorithm place_algorithm, float timing_tradeoff,

         float inverse_prev_bb_cost, float inverse_prev_timing_cost,

         float *delay_cost_ptr) {


     /* Finds the starting temperature (hot condition).              */


     int i, num_accepted, move_lim, swap_result;

     double std_dev, av, sum_of_squares; /* Double important to avoid round off */


     if (annealing_sched.type == USER_SCHED)

         return (annealing_sched.init_t);


     move_lim = std::min(max_moves, num_blocks);


     num_accepted = 0;

     av = 0.;

     sum_of_squares = 0.;


     /* Try one move per block.  Set t high so essentially all accepted. */


     for (i = 0; i < move_lim; i++) {

         swap_result = try_swap(HUGE_POSITIVE_FLOAT, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim,

                 old_region_occ_x, old_region_occ_y,

                 place_algorithm, timing_tradeoff,

                 inverse_prev_bb_cost, inverse_prev_timing_cost, delay_cost_ptr);


         if (swap_result == ACCEPTED) {

             num_accepted++;

             av += *cost_ptr;

             sum_of_squares += *cost_ptr * (*cost_ptr);

             num_swap_accepted++;

         } else if (swap_result == ABORTED) {

             num_swap_aborted++;

         } else {

             num_swap_rejected++;

         }

     }


     if (num_accepted != 0)

         av /= num_accepted;

     else

         av = 0.;


     std_dev = get_std_dev(num_accepted, sum_of_squares, av);


 #ifdef DEBUG

     if (num_accepted != move_lim) {

         vpr_printf(TIO_MESSAGE_WARNING, "Starting t: %d of %d configurations accepted.\n", num_accepted, move_lim);

     }

 #endif


 #ifdef VERBOSE

     vpr_printf(TIO_MESSAGE_INFO, "std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev);

 #endif


     /* Set the initial temperature to 20 times the standard of deviation */

     /* so that the initial temperature adjusts according to the circuit */

     return (20. * std_dev);

 }


 static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to) {


     /* Find all the blocks affected when b_from is swapped with b_to.

      * Returns abort_swap.                  */


     int imoved_blk, imacro;

     int x_from, y_from, z_from, b_to;

     int abort_swap = FALSE;


     x_from = block[b_from].x;

     y_from = block[b_from].y;

     z_from = block[b_from].z;


     b_to = grid[x_to][y_to].blocks[z_to];


     // Check whether the to_location is empty

     if (b_to == EMPTY) {


         // Swap the block, dont swap the nets yet

         block[b_from].x = x_to;

         block[b_from].y = y_to;

         block[b_from].z = z_to;


         // Sets up the blocks moved

         imoved_blk = blocks_affected.num_moved_blocks;

         blocks_affected.moved_blocks[imoved_blk].block_num = b_from;

         blocks_affected.moved_blocks[imoved_blk].xold = x_from;

         blocks_affected.moved_blocks[imoved_blk].xnew = x_to;

         blocks_affected.moved_blocks[imoved_blk].yold = y_from;

         blocks_affected.moved_blocks[imoved_blk].ynew = y_to;

         blocks_affected.moved_blocks[imoved_blk].zold = z_from;

         blocks_affected.moved_blocks[imoved_blk].znew = z_to;

         blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = TRUE;

         blocks_affected.num_moved_blocks ++;


     } else {


         // Does not allow a swap with a macro yet

         get_imacro_from_iblk(&imacro, b_to, pl_macros, num_pl_macros);

         if (imacro != -1) {

             abort_swap = TRUE;

             return (abort_swap);

         }


         // Swap the block, dont swap the nets yet

         block[b_to].x = x_from;

         block[b_to].y = y_from;

         block[b_to].z = z_from;


         block[b_from].x = x_to;

         block[b_from].y = y_to;

         block[b_from].z = z_to;


         // Sets up the blocks moved

         imoved_blk = blocks_affected.num_moved_blocks;

         blocks_affected.moved_blocks[imoved_blk].block_num = b_from;

         blocks_affected.moved_blocks[imoved_blk].xold = x_from;

         blocks_affected.moved_blocks[imoved_blk].xnew = x_to;

         blocks_affected.moved_blocks[imoved_blk].yold = y_from;

         blocks_affected.moved_blocks[imoved_blk].ynew = y_to;

         blocks_affected.moved_blocks[imoved_blk].zold = z_from;

         blocks_affected.moved_blocks[imoved_blk].znew = z_to;

         blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = FALSE;

         blocks_affected.num_moved_blocks ++;


         imoved_blk = blocks_affected.num_moved_blocks;

         blocks_affected.moved_blocks[imoved_blk].block_num = b_to;

         blocks_affected.moved_blocks[imoved_blk].xold = x_to;

         blocks_affected.moved_blocks[imoved_blk].xnew = x_from;

         blocks_affected.moved_blocks[imoved_blk].yold = y_to;

         blocks_affected.moved_blocks[imoved_blk].ynew = y_from;

         blocks_affected.moved_blocks[imoved_blk].zold = z_to;

         blocks_affected.moved_blocks[imoved_blk].znew = z_from;

         blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = FALSE;

         blocks_affected.num_moved_blocks ++;


     } // Finish swapping the blocks and setting up blocks_affected


     return (abort_swap);


 }


 static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to) {


     /* Finds and set ups the affected_blocks array.

      * Returns abort_swap. */


     int imacro, imember;

     int x_swap_offset, y_swap_offset, z_swap_offset, x_from, y_from, z_from, b_to;

     int curr_b_from, curr_x_from, curr_y_from, curr_z_from, curr_b_to, curr_x_to, curr_y_to, curr_z_to;

     int abort_swap = FALSE;


     x_from = block[b_from].x;

     y_from = block[b_from].y;

     z_from = block[b_from].z;


     b_to = grid[x_to][y_to].blocks[z_to];


     get_imacro_from_iblk(&imacro, b_from, pl_macros, num_pl_macros);

     if ( imacro != -1) {

         // b_from is part of a macro, I need to swap the whole macro


         // Record down the relative position of the swap

         x_swap_offset = x_to - x_from;

         y_swap_offset = y_to - y_from;

         z_swap_offset = z_to - z_from;


         for (imember = 0; imember < pl_macros[imacro].num_blocks && abort_swap == FALSE; imember++) {


             // Gets the new from and to info for every block in the macro

             // cannot use the old from and to info

             curr_b_from = pl_macros[imacro].members[imember].blk_index;


             curr_x_from = block[curr_b_from].x;

             curr_y_from = block[curr_b_from].y;

             curr_z_from = block[curr_b_from].z;


             curr_x_to = curr_x_from + x_swap_offset;

             curr_y_to = curr_y_from + y_swap_offset;

             curr_z_to = curr_z_from + z_swap_offset;


             // Make sure that the swap_to location is still on the chip

             if (curr_x_to < 1 || curr_x_to > nx || curr_y_to < 1 || curr_y_to > ny || curr_z_to < 0) {

                 abort_swap = TRUE;

             } else {

                 curr_b_to = grid[curr_x_to][curr_y_to].blocks[curr_z_to];

                 abort_swap = setup_blocks_affected(curr_b_from, curr_x_to, curr_y_to, curr_z_to);

             }


         } // Finish going through all the blocks in the macro


     } else {


         // This is not a macro - I could use the from and to info from before

         abort_swap = setup_blocks_affected(b_from, x_to, y_to, z_to);


     } // Finish handling cases for blocks in macro and otherwise


     return (abort_swap);


 }


 static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost,

         float rlim, float **old_region_occ_x,

         float **old_region_occ_y,

         enum e_place_algorithm place_algorithm, float timing_tradeoff,

         float inverse_prev_bb_cost, float inverse_prev_timing_cost,

         float *delay_cost) {


     /* Picks some block and moves it to another spot.  If this spot is   *

      * occupied, switch the blocks.  Assess the change in cost function  *

      * and accept or reject the move.  If rejected, return 0.  If        *

      * accepted return 1.  Pass back the new value of the cost function. *

      * rlim is the range limiter.                                        */


     enum swap_result keep_switch;

     int b_from, x_from, y_from, z_from, x_to, y_to, z_to;

     int num_nets_affected;

     float delta_c, bb_delta_c, timing_delta_c, delay_delta_c;

     int inet, iblk, bnum, iblk_pin, inet_affected;

     int abort_swap = FALSE;


     num_ts_called ++;


     /* I'm using negative values of temp_net_cost as a flag, so DO NOT   *

      * use cost functions that can go negative.                          */


     delta_c = 0; /* Change in cost due to this swap. */

     bb_delta_c = 0;

     timing_delta_c = 0;

     delay_delta_c = 0.0;


     /* Pick a random block to be swapped with another random block    */

     b_from = my_irand(num_blocks - 1);


     /* If the pins are fixed we never move them from their initial    *

      * random locations.  The code below could be made more efficient *

      * by using the fact that pins appear first in the block list,    *

      * but this shouldn't cause any significant slowdown and won't be *

      * broken if I ever change the parser so that the pins aren't     *

      * necessarily at the start of the block list.                    */

     while (block[b_from].isFixed == TRUE) {

         b_from = my_irand(num_blocks - 1);

     }


     x_from = block[b_from].x;

     y_from = block[b_from].y;

     z_from = block[b_from].z;


     if (!find_to(x_from, y_from, block[b_from].type, rlim, &x_to,

             &y_to))

         return REJECTED;


     z_to = 0;

     if (grid[x_to][y_to].type->capacity > 1) {

         z_to = my_irand(grid[x_to][y_to].type->capacity - 1);

     }


     /* Make the switch in order to make computing the new bounding *

      * box simpler.  If the cost increase is too high, switch them *

      * back.  (block data structures switched, clbs not switched   *

      * until success of move is determined.)                       *

      * Also check that whether those are the only 2 blocks         *

      * to be moved - check for carry chains and other placement    *

      * macros.                                                     */


     /* Check whether the from_block is part of a macro first.      *

      * If it is, the whole macro has to be moved. Calculate the    *

      * x, y, z offsets of the swap to maintain relative placements *

      * of the blocks. Abort the swap if the to_block is part of a  *

      * macro (not supported yet).                                  */


     abort_swap = find_affected_blocks(b_from, x_to, y_to, z_to);


     if (abort_swap == FALSE) {


         // Find all the nets affected by this swap

         num_nets_affected = find_affected_nets(ts_nets_to_update);


         /* Go through all the pins in all the blocks moved and update the bounding boxes.  *

          * Do not update the net cost here since it should only be updated once per net,   *

          * not once per pin                                                                */

         for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)

         {

             bnum = blocks_affected.moved_blocks[iblk].block_num;


             /* Go through all the pins in the moved block */

             for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++)

             {

                 inet = block[bnum].nets[iblk_pin];

                 if (inet == OPEN)

                     continue;

                 if (clb_net[inet].is_global)

                     continue;


                 if (clb_net[inet].num_sinks < SMALL_NET) {

                     if(bb_updated_before[inet] == NOT_UPDATED_YET)

                         /* Brute force bounding box recomputation, once only for speed. */

                         get_non_updateable_bb(inet, &ts_bb_coord_new[inet]);

                 } else {

                     update_bb(inet, &ts_bb_coord_new[inet],

                             &ts_bb_edge_new[inet],

                             blocks_affected.moved_blocks[iblk].xold,

                             blocks_affected.moved_blocks[iblk].yold + block[bnum].type->pin_height[iblk_pin],

                             blocks_affected.moved_blocks[iblk].xnew,

                             blocks_affected.moved_blocks[iblk].ynew + block[bnum].type->pin_height[iblk_pin]);

                 }

             }

         }


         /* Now update the cost function. The cost is only updated once for every net  *

          * May have to do major optimizations here later.                             */

         for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {

             inet = ts_nets_to_update[inet_affected];


             temp_net_cost[inet] = get_net_cost(inet, &ts_bb_coord_new[inet]);

             bb_delta_c += temp_net_cost[inet] - net_cost[inet];

         }


         if (place_algorithm == NET_TIMING_DRIVEN_PLACE

                 || place_algorithm == PATH_TIMING_DRIVEN_PLACE) {

             /*in this case we redefine delta_c as a combination of timing and bb.  *

              *additionally, we normalize all values, therefore delta_c is in       *

              *relation to 1*/


             comp_delta_td_cost(&timing_delta_c, &delay_delta_c);


             delta_c = (1 - timing_tradeoff) * bb_delta_c * inverse_prev_bb_cost

                     + timing_tradeoff * timing_delta_c * inverse_prev_timing_cost;

         } else {

             delta_c = bb_delta_c;

         }


         /* 1 -> move accepted, 0 -> rejected. */

         keep_switch = assess_swap(delta_c, t);


         if (keep_switch == ACCEPTED) {

             *cost = *cost + delta_c;

             *bb_cost = *bb_cost + bb_delta_c;


             if (place_algorithm == NET_TIMING_DRIVEN_PLACE

                     || place_algorithm == PATH_TIMING_DRIVEN_PLACE) {

                 /*update the point_to_point_timing_cost and point_to_point_delay_cost

                  * values from the temporary values */

                 *timing_cost = *timing_cost + timing_delta_c;

                 *delay_cost = *delay_cost + delay_delta_c;


                 update_td_cost();

             }


             /* update net cost functions and reset flags. */

             for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {

                 inet = ts_nets_to_update[inet_affected];


                 bb_coords[inet] = ts_bb_coord_new[inet];

                 if (clb_net[inet].num_sinks >= SMALL_NET)

                     bb_num_on_edges[inet] = ts_bb_edge_new[inet];


                 net_cost[inet] = temp_net_cost[inet];


                 /* negative temp_net_cost value is acting as a flag. */

                 temp_net_cost[inet] = -1;

                 bb_updated_before[inet] = NOT_UPDATED_YET;

             }


             /* Update clb data structures since we kept the move. */

             /* Swap physical location */

             for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {


                 x_to = blocks_affected.moved_blocks[iblk].xnew;

                 y_to = blocks_affected.moved_blocks[iblk].ynew;

                 z_to = blocks_affected.moved_blocks[iblk].znew;


                 x_from = blocks_affected.moved_blocks[iblk].xold;

                 y_from = blocks_affected.moved_blocks[iblk].yold;

                 z_from = blocks_affected.moved_blocks[iblk].zold;


                 b_from = blocks_affected.moved_blocks[iblk].block_num;


                 grid[x_to][y_to].blocks[z_to] = b_from;


                 if (blocks_affected.moved_blocks[iblk].swapped_to_empty == TRUE) {

                     grid[x_to][y_to].usage++;

                     grid[x_from][y_from].usage--;

                     grid[x_from][y_from].blocks[z_from] = -1;

                 }


             } // Finish updating clb for all blocks


         } else { /* Move was rejected.  */


             /* Reset the net cost function flags first. */

             for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {

                 inet = ts_nets_to_update[inet_affected];

                 temp_net_cost[inet] = -1;

                 bb_updated_before[inet] = NOT_UPDATED_YET;

             }


             /* Restore the block data structures to their state before the move. */

             for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {

                 b_from = blocks_affected.moved_blocks[iblk].block_num;


                 block[b_from].x = blocks_affected.moved_blocks[iblk].xold;

                 block[b_from].y = blocks_affected.moved_blocks[iblk].yold;

                 block[b_from].z = blocks_affected.moved_blocks[iblk].zold;

             }

         }


         /* Resets the num_moved_blocks, but do not free blocks_moved array. Defensive Coding */

         blocks_affected.num_moved_blocks = 0;


         //check_place(*bb_cost, *timing_cost, place_algorithm, *delay_cost);


         return (keep_switch);

     } else {


         /* Restore the block data structures to their state before the move. */

         for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {

             b_from = blocks_affected.moved_blocks[iblk].block_num;


             block[b_from].x = blocks_affected.moved_blocks[iblk].xold;

             block[b_from].y = blocks_affected.moved_blocks[iblk].yold;

             block[b_from].z = blocks_affected.moved_blocks[iblk].zold;

         }


         /* Resets the num_moved_blocks, but do not free blocks_moved array. Defensive Coding */

         blocks_affected.num_moved_blocks = 0;


         return ABORTED;

     }

 }


 static int find_affected_nets(int *nets_to_update) {


     /* Puts a list of all the nets that are changed by the swap into          *

      * nets_to_update.  Returns the number of affected nets.                  */


     int iblk, iblk_pin, inet, bnum, num_affected_nets;


     num_affected_nets = 0;

     /* Go through all the blocks moved */

     for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)

     {

         bnum = blocks_affected.moved_blocks[iblk].block_num;


         /* Go through all the pins in the moved block */

         for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++)

         {

             /* Updates the pins_to_nets array, set to -1 if   *

              * that pin is not connected to any net or it is a  *

              * global pin that does not need to be updated      */

             inet = block[bnum].nets[iblk_pin];

             if (inet == OPEN)

                 continue;

             if (clb_net[inet].is_global)

                 continue;


             if (temp_net_cost[inet] < 0.) {

                 /* Net not marked yet. */

                 nets_to_update[num_affected_nets] = inet;

                 num_affected_nets++;


                 /* Flag to say we've marked this net. */

                 temp_net_cost[inet] = 1.;

             }

         }

     }

     return num_affected_nets;

 }


 static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to) {


     /* Returns the point to which I want to swap, properly range limited.

      * rlim must always be between 1 and nx (inclusive) for this routine

      * to work.  Assumes that a column only contains blocks of the same type.

      */


     int x_rel, y_rel, rlx, rly, min_x, max_x, min_y, max_y;

     int num_tries;

     int active_area;

     boolean is_legal;

     int block_index, ipos;


     assert(type == grid[x_from][y_from].type);


     rlx = (int)std::min((float)nx + 1, rlim);

     rly = (int)std::min((float)ny + 1, rlim); /* Added rly for aspect_ratio != 1 case. */

     active_area = 4 * rlx * rly;


     min_x = std::max(0, x_from - rlx);

     max_x = std::min(nx + 1, x_from + rlx);

     min_y = std::max(0, y_from - rly);

     max_y = std::min(ny + 1, y_from + rly);


 #ifdef DEBUG

     if (rlx < 1 || rlx > nx + 1) {

         vpr_printf(TIO_MESSAGE_ERROR, "in find_to: rlx = %d\n", rlx);

         exit(1);

     }

 #endif


     num_tries = 0;

     block_index = type->index;


     do { /* Until legal */

         is_legal = TRUE;


         /* Limit the number of tries when searching for an alternative position */

         if(num_tries >= 2 * std::min(active_area / type->height, num_legal_pos[block_index]) + 10) {

             /* Tried randomly searching for a suitable position */

             return FALSE;

         } else {

             num_tries++;

         }

         if(nx / 4 < rlx ||

             ny / 4 < rly ||

             num_legal_pos[block_index] < active_area) {

             ipos = my_irand(num_legal_pos[block_index] - 1);

             *x_to = legal_pos[block_index][ipos].x;

             *y_to = legal_pos[block_index][ipos].y;

         } else {

             x_rel = my_irand(std::max(0, max_x - min_x));

             *x_to = min_x + x_rel;

             y_rel = my_irand(std::max(0, max_y - min_y));

             *y_to = min_y + y_rel;

             *y_to = (*y_to) - grid[*x_to][*y_to].offset; /* align it */

         }


         if((x_from == *x_to) && (y_from == *y_to)) {

             is_legal = FALSE;

         } else if(*x_to > max_x || *x_to < min_x || *y_to > max_y || *y_to < min_y) {

             is_legal = FALSE;

         } else if(grid[*x_to][*y_to].type != grid[x_from][y_from].type) {

             is_legal = FALSE;

         }


         assert(*x_to >= 0 && *x_to <= nx + 1);

         assert(*y_to >= 0 && *y_to <= ny + 1);

     } while (is_legal == FALSE);


 #ifdef DEBUG

     if (*x_to < 0 || *x_to > nx + 1 || *y_to < 0 || *y_to > ny + 1) {

         vpr_printf(TIO_MESSAGE_ERROR, "in routine find_to: (x_to,y_to) = (%d,%d)\n", *x_to, *y_to);

         exit(1);

     }

 #endif

     assert(type == grid[*x_to][*y_to].type);

     return TRUE;

 }


 static enum swap_result assess_swap(float delta_c, float t) {


     /* Returns: 1 -> move accepted, 0 -> rejected. */


     enum swap_result accept;

     float prob_fac, fnum;


     if (delta_c <= 0) {


 #ifdef SPEC         /* Reduce variation in final solution due to round off */

         fnum = my_frand();

 #endif


         accept = ACCEPTED;

         return (accept);

     }


     if (t == 0.)

         return (REJECTED);


     fnum = my_frand();

     prob_fac = exp(-delta_c / t);

     if (prob_fac > fnum) {

         accept = ACCEPTED;

     } else {

         accept = REJECTED;

     }

     return (accept);

 }


 static float recompute_bb_cost(void) {


     /* Recomputes the cost to eliminate roundoff that may have accrued.  *

      * This routine does as little work as possible to compute this new  *

      * cost.                                                             */


     int inet;

     float cost;


     cost = 0;


     for (inet = 0; inet < num_nets; inet++) { /* for each net ... */

         if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */


             /* Bounding boxes don't have to be recomputed; they're correct. */

             cost += net_cost[inet];

         }

     }


     return (cost);

 }


 static float comp_td_point_to_point_delay(int inet, int ipin) {


     /*returns the delay of one point to point connection */


     int source_block, sink_block;

     int delta_x, delta_y;

     t_type_ptr source_type, sink_type;

     float delay_source_to_sink;


     delay_source_to_sink = 0.;


     source_block = clb_net[inet].node_block[0];

     source_type = block[source_block].type;


     sink_block = clb_net[inet].node_block[ipin];

     sink_type = block[sink_block].type;


     assert(source_type != NULL);

     assert(sink_type != NULL);


     delta_x = abs(block[sink_block].x - block[source_block].x);

     delta_y = abs(block[sink_block].y - block[source_block].y);


     /* TODO low priority: Could be merged into one look-up table */

     /* Note: This heuristic is terrible on Quality of Results.

      * A much better heuristic is to create a more comprehensive lookup table but

      * it's too late in the release cycle to do this.  Pushing until the next release */

     if (source_type == IO_TYPE) {

         if (sink_type == IO_TYPE)

             delay_source_to_sink = delta_io_to_io[delta_x][delta_y];

         else

             delay_source_to_sink = delta_io_to_clb[delta_x][delta_y];

     } else {

         if (sink_type == IO_TYPE)

             delay_source_to_sink = delta_clb_to_io[delta_x][delta_y];

         else

             delay_source_to_sink = delta_clb_to_clb[delta_x][delta_y];

     }

     if (delay_source_to_sink < 0) {

         vpr_printf(TIO_MESSAGE_ERROR, "in comp_td_point_to_point_delay: Bad delay_source_to_sink value delta(%d, %d) delay of %g\n", delta_x, delta_y, delay_source_to_sink);

         vpr_printf(TIO_MESSAGE_ERROR, "in comp_td_point_to_point_delay: Delay is less than 0\n");

         exit(1);

     }


     return (delay_source_to_sink);

 }


 static void update_td_cost(void) {

     /* Update the point_to_point_timing_cost values from the temporary *

      * values for all connections that have changed.                   */


     int iblk_pin, net_pin, inet, ipin;

     int iblk, iblk2, bnum, driven_by_moved_block;


     /* Go through all the blocks moved. */

     for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)

     {

         bnum = blocks_affected.moved_blocks[iblk].block_num;

         for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) {


             inet = block[bnum].nets[iblk_pin];


             if (inet == OPEN)

                 continue;


             if (clb_net[inet].is_global)

                 continue;


             net_pin = net_pin_index[bnum][iblk_pin];


             if (net_pin != 0) {


                 driven_by_moved_block = FALSE;

                 for (iblk2 = 0; iblk2 < blocks_affected.num_moved_blocks; iblk2++)

                 {   if (clb_net[inet].node_block[0] == blocks_affected.moved_blocks[iblk2].block_num)

                         driven_by_moved_block = TRUE;

                 }


                 /* The following "if" prevents the value from being updated twice. */

                 if (driven_by_moved_block == FALSE) {

                     point_to_point_delay_cost[inet][net_pin] =

                             temp_point_to_point_delay_cost[inet][net_pin];

                     temp_point_to_point_delay_cost[inet][net_pin] = -1;

                     point_to_point_timing_cost[inet][net_pin] =

                             temp_point_to_point_timing_cost[inet][net_pin];

                     temp_point_to_point_timing_cost[inet][net_pin] = -1;

                 }

             } else { /* This net is being driven by a moved block, recompute */

                 /* All point to point connections on this net. */

                 for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {

                     point_to_point_delay_cost[inet][ipin] =

                             temp_point_to_point_delay_cost[inet][ipin];

                     temp_point_to_point_delay_cost[inet][ipin] = -1;

                     point_to_point_timing_cost[inet][ipin] =

                             temp_point_to_point_timing_cost[inet][ipin];

                     temp_point_to_point_timing_cost[inet][ipin] = -1;

                 } /* Finished updating the pin */

             }

         } /* Finished going through all the pins in the moved block */

     } /* Finished going through all the blocks moved */

 }


 static void comp_delta_td_cost(float *delta_timing, float *delta_delay) {


     /*a net that is being driven by a moved block must have all of its  */

     /*sink timing costs recomputed. A net that is driving a moved block */

     /*must only have the timing cost on the connection driving the input */

     /*pin computed */


     int inet, net_pin, ipin;

     float delta_timing_cost, delta_delay_cost, temp_delay;

     int iblk, iblk2, bnum, iblk_pin, driven_by_moved_block;


     delta_timing_cost = 0.;

     delta_delay_cost = 0.;


     /* Go through all the blocks moved */

     for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)

     {

         bnum = blocks_affected.moved_blocks[iblk].block_num;

         /* Go through all the pins in the moved block */

         for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) {

             inet = block[bnum].nets[iblk_pin];


             if (inet == OPEN)

                 continue;


             if (clb_net[inet].is_global)

                 continue;


             net_pin = net_pin_index[bnum][iblk_pin];


             if (net_pin != 0) {

                 /* If this net is being driven by a block that has moved, we do not    *

                  * need to compute the change in the timing cost (here) since it will  *

                  * be computed in the fanout of the net on  the driving block, also    *

                  * computing it here would double count the change, and mess up the    *

                  * delta_timing_cost value.                                            */

                 driven_by_moved_block = FALSE;

                 for (iblk2 = 0; iblk2 < blocks_affected.num_moved_blocks; iblk2++)

                 {   if (clb_net[inet].node_block[0] == blocks_affected.moved_blocks[iblk2].block_num)

                         driven_by_moved_block = TRUE;

                 }


                 if (driven_by_moved_block == FALSE) {

                     temp_delay = comp_td_point_to_point_delay(inet, net_pin);

                     temp_point_to_point_delay_cost[inet][net_pin] = temp_delay;


                     temp_point_to_point_timing_cost[inet][net_pin] =

                         timing_place_crit[inet][net_pin] * temp_delay;

                     delta_timing_cost += temp_point_to_point_timing_cost[inet][net_pin]

                         - point_to_point_timing_cost[inet][net_pin];

                     delta_delay_cost += temp_point_to_point_delay_cost[inet][net_pin]

                             - point_to_point_delay_cost[inet][net_pin];

                 }

             } else { /* This net is being driven by a moved block, recompute */

                 /* All point to point connections on this net. */

                 for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {

                     temp_delay = comp_td_point_to_point_delay(inet, ipin);

                     temp_point_to_point_delay_cost[inet][ipin] = temp_delay;


                     temp_point_to_point_timing_cost[inet][ipin] =

                         timing_place_crit[inet][ipin] * temp_delay;

                     delta_timing_cost += temp_point_to_point_timing_cost[inet][ipin]

                         - point_to_point_timing_cost[inet][ipin];

                     delta_delay_cost += temp_point_to_point_delay_cost[inet][ipin]

                             - point_to_point_delay_cost[inet][ipin];


                 } /* Finished updating the pin */

             }

         } /* Finished going through all the pins in the moved block */

     } /* Finished going through all the blocks moved */


     *delta_timing = delta_timing_cost;

     *delta_delay = delta_delay_cost;

 }


 static void comp_td_costs(float *timing_cost, float *connection_delay_sum) {

     /* Computes the cost (from scratch) due to the delays and criticalities  *

      * on all point to point connections, we define the timing cost of       *

      * each connection as criticality*delay.                                 */


     int inet, ipin;

     float loc_timing_cost, loc_connection_delay_sum, temp_delay_cost,

             temp_timing_cost;


     loc_timing_cost = 0.;

     loc_connection_delay_sum = 0.;


     for (inet = 0; inet < num_nets; inet++) { /* For each net ... */

         if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */


             for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {


                 temp_delay_cost = comp_td_point_to_point_delay(inet, ipin);

                 temp_timing_cost = temp_delay_cost * timing_place_crit[inet][ipin];


                 loc_connection_delay_sum += temp_delay_cost;

                 point_to_point_delay_cost[inet][ipin] = temp_delay_cost;

                 temp_point_to_point_delay_cost[inet][ipin] = -1; /* Undefined */


                 point_to_point_timing_cost[inet][ipin] = temp_timing_cost;

                 temp_point_to_point_timing_cost[inet][ipin] = -1; /* Undefined */

                 loc_timing_cost += temp_timing_cost;

             }

         }

     }


     /* Make sure timing cost does not go above MIN_TIMING_COST. */

     *timing_cost = loc_timing_cost;


     *connection_delay_sum = loc_connection_delay_sum;

 }


 static float comp_bb_cost(enum cost_methods method) {


     /* Finds the cost from scratch.  Done only when the placement   *

      * has been radically changed (i.e. after initial placement).   *

      * Otherwise find the cost change incrementally.  If method     *

      * check is NORMAL, we find bounding boxes that are updateable  *

      * for the larger nets.  If method is CHECK, all bounding boxes *

      * are found via the non_updateable_bb routine, to provide a    *

      * cost which can be used to check the correctness of the       *

      * other routine.                                               */


     int inet;

     float cost;

     double expected_wirelength;


     cost = 0;

     expected_wirelength = 0.0;


     for (inet = 0; inet < num_nets; inet++) { /* for each net ... */


         if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */


             /* Small nets don't use incremental updating on their bounding boxes, *

              * so they can use a fast bounding box calculator.                    */


             if (clb_net[inet].num_sinks >= SMALL_NET && method == NORMAL) {

                 get_bb_from_scratch(inet, &bb_coords[inet],

                         &bb_num_on_edges[inet]);

             } else {

                 get_non_updateable_bb(inet, &bb_coords[inet]);

             }


             net_cost[inet] = get_net_cost(inet, &bb_coords[inet]);

             cost += net_cost[inet];

             if (method == CHECK)

                 expected_wirelength += get_net_wirelength_estimate(inet,

                         &bb_coords[inet]);

         }

     }


     if (method == CHECK) {

         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_INFO, "BB estimate of min-dist (placement) wirelength: %.0f\n", expected_wirelength);

     }

     return (cost);

 }


 static void free_placement_structs(

         float **old_region_occ_x, float **old_region_occ_y,

         struct s_placer_opts placer_opts) {


     /* Frees the major structures needed by the placer (and not needed       *

      * elsewhere).   */


     int inet, imacro;


     free_legal_placements();

     free_fast_cost_update();


     if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

             || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE

             || placer_opts.enable_timing_computations) {

         for (inet = 0; inet < num_nets; inet++) {

             /*add one to the address since it is indexed from 1 not 0 */


             point_to_point_delay_cost[inet]++;

             free(point_to_point_delay_cost[inet]);


             point_to_point_timing_cost[inet]++;

             free(point_to_point_timing_cost[inet]);


             temp_point_to_point_delay_cost[inet]++;

             free(temp_point_to_point_delay_cost[inet]);


             temp_point_to_point_timing_cost[inet]++;

             free(temp_point_to_point_timing_cost[inet]);

         }

         free(point_to_point_delay_cost);

         free(temp_point_to_point_delay_cost);


         free(point_to_point_timing_cost);

         free(temp_point_to_point_timing_cost);


         free_matrix(net_pin_index, 0, num_blocks - 1, 0, sizeof(int));

     }


     free(net_cost);

     free(temp_net_cost);

     free(bb_num_on_edges);

     free(bb_coords);


     free_placement_macros_structs();


     for (imacro = 0; imacro < num_pl_macros; imacro ++)

         free(pl_macros[imacro].members);

     free(pl_macros);


     net_cost = NULL; /* Defensive coding. */

     temp_net_cost = NULL;

     bb_num_on_edges = NULL;

     bb_coords = NULL;

     pl_macros = NULL;


     /* Frees up all the data structure used in vpr_utils. */

     free_port_pin_from_blk_pin();

     free_blk_pin_from_port_pin();


 }


 static void alloc_and_load_placement_structs(

         float place_cost_exp, float ***old_region_occ_x,

         float ***old_region_occ_y, struct s_placer_opts placer_opts,

         t_direct_inf *directs, int num_directs) {


     /* Allocates the major structures needed only by the placer, primarily for *

      * computing costs quickly and such.                                       */


     int inet, ipin, max_pins_per_clb, i;


     alloc_legal_placements();

     load_legal_placements();


     max_pins_per_clb = 0;

     for (i = 0; i < num_types; i++) {

         max_pins_per_clb = std::max(max_pins_per_clb, type_descriptors[i].num_pins);

     }


     if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE

             || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE

             || placer_opts.enable_timing_computations) {

         /* Allocate structures associated with timing driven placement */

         /* [0..num_nets-1][1..num_pins-1]  */

         point_to_point_delay_cost = (float **) my_malloc(

                 num_nets * sizeof(float *));

         temp_point_to_point_delay_cost = (float **) my_malloc(

                 num_nets * sizeof(float *));


         point_to_point_timing_cost = (float **) my_malloc(

                 num_nets * sizeof(float *));

         temp_point_to_point_timing_cost = (float **) my_malloc(

                 num_nets * sizeof(float *));


         for (inet = 0; inet < num_nets; inet++) {


             /* In the following, subract one so index starts at *

              * 1 instead of 0 */

             point_to_point_delay_cost[inet] = (float *) my_malloc(

                     clb_net[inet].num_sinks * sizeof(float));

             point_to_point_delay_cost[inet]--;


             temp_point_to_point_delay_cost[inet] = (float *) my_malloc(

                     clb_net[inet].num_sinks * sizeof(float));

             temp_point_to_point_delay_cost[inet]--;


             point_to_point_timing_cost[inet] = (float *) my_malloc(

                     clb_net[inet].num_sinks * sizeof(float));

             point_to_point_timing_cost[inet]--;


             temp_point_to_point_timing_cost[inet] = (float *) my_malloc(

                     clb_net[inet].num_sinks * sizeof(float));

             temp_point_to_point_timing_cost[inet]--;

         }

         for (inet = 0; inet < num_nets; inet++) {

             for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {

                 point_to_point_delay_cost[inet][ipin] = 0;

                 temp_point_to_point_delay_cost[inet][ipin] = 0;

             }

         }

     }


     net_cost = (float *) my_malloc(num_nets * sizeof(float));

     temp_net_cost = (float *) my_malloc(num_nets * sizeof(float));

     bb_updated_before = (char*)my_calloc(num_nets, sizeof(char));


     /* Used to store costs for moves not yet made and to indicate when a net's   *

      * cost has been recomputed. temp_net_cost[inet] < 0 means net's cost hasn't *

      * been recomputed.                                                          */


     for (inet = 0; inet < num_nets; inet++){

         bb_updated_before[inet] = NOT_UPDATED_YET;

         temp_net_cost[inet] = -1.;

     }


     bb_coords = (struct s_bb *) my_malloc(num_nets * sizeof(struct s_bb));

     bb_num_on_edges = (struct s_bb *) my_malloc(num_nets * sizeof(struct s_bb));


     /* Shouldn't use them; crash hard if I do!   */

     *old_region_occ_x = NULL;

     *old_region_occ_y = NULL;


     alloc_and_load_for_fast_cost_update(place_cost_exp);


     net_pin_index = alloc_and_load_net_pin_index();


     alloc_and_load_try_swap_structs();


     num_pl_macros = alloc_and_load_placement_macros(directs, num_directs, &pl_macros);

 }


 static void alloc_and_load_try_swap_structs() {

     /* Allocate the local bb_coordinate storage, etc. only once. */

     /* Allocate with size num_nets for any number of nets affected. */

     ts_bb_coord_new = (struct s_bb *) my_calloc(

             num_nets, sizeof(struct s_bb));

     ts_bb_edge_new = (struct s_bb *) my_calloc(

             num_nets, sizeof(struct s_bb));

     ts_nets_to_update = (int *) my_calloc(num_nets, sizeof(int));


     /* Allocate with size num_blocks for any number of moved block. */

     blocks_affected.moved_blocks = (t_pl_moved_block*)my_calloc(

             num_blocks, sizeof(t_pl_moved_block) );

     blocks_affected.num_moved_blocks = 0;


 }


 static void get_bb_from_scratch(int inet, struct s_bb *coords,

         struct s_bb *num_on_edges) {


     /* This routine finds the bounding box of each net from scratch (i.e.    *

      * from only the block location information).  It updates both the       *

      * coordinate and number of pins on each edge information.  It         *

      * should only be called when the bounding box information is not valid. */


     int ipin, bnum, pnum, x, y, xmin, xmax, ymin, ymax;

     int xmin_edge, xmax_edge, ymin_edge, ymax_edge;

     int n_pins;


     n_pins = clb_net[inet].num_sinks + 1;


     bnum = clb_net[inet].node_block[0];

     pnum = clb_net[inet].node_block_pin[0];


     x = block[bnum].x;

     y = block[bnum].y + block[bnum].type->pin_height[pnum];


     x = std::max(std::min(x, nx), 1);

     y = std::max(std::min(y, ny), 1);


     xmin = x;

     ymin = y;

     xmax = x;

     ymax = y;

     xmin_edge = 1;

     ymin_edge = 1;

     xmax_edge = 1;

     ymax_edge = 1;


     for (ipin = 1; ipin < n_pins; ipin++) {

         bnum = clb_net[inet].node_block[ipin];

         pnum = clb_net[inet].node_block_pin[ipin];

         x = block[bnum].x;

         y = block[bnum].y + block[bnum].type->pin_height[pnum];


         /* Code below counts IO blocks as being within the 1..nx, 1..ny clb array. *

          * This is because channels do not go out of the 0..nx, 0..ny range, and   *

          * I always take all channels impinging on the bounding box to be within   *

          * that bounding box.  Hence, this "movement" of IO blocks does not affect *

          * the which channels are included within the bounding box, and it         *

          * simplifies the code a lot.                                              */


         x = std::max(std::min(x, nx), 1);

         y = std::max(std::min(y, ny), 1);


         if (x == xmin) {

             xmin_edge++;

         }

         if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */

             xmax_edge++;

         } else if (x < xmin) {

             xmin = x;

             xmin_edge = 1;

         } else if (x > xmax) {

             xmax = x;

             xmax_edge = 1;

         }


         if (y == ymin) {

             ymin_edge++;

         }

         if (y == ymax) {

             ymax_edge++;

         } else if (y < ymin) {

             ymin = y;

             ymin_edge = 1;

         } else if (y > ymax) {

             ymax = y;

             ymax_edge = 1;

         }

     }


     /* Copy the coordinates and number on edges information into the proper   *

      * structures.                                                            */

     coords->xmin = xmin;

     coords->xmax = xmax;

     coords->ymin = ymin;

     coords->ymax = ymax;


     num_on_edges->xmin = xmin_edge;

     num_on_edges->xmax = xmax_edge;

     num_on_edges->ymin = ymin_edge;

     num_on_edges->ymax = ymax_edge;

 }


 static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr) {


     /* WMF: Finds the estimate of wirelength due to one net by looking at   *

      * its coordinate bounding box.                                         */


     double ncost, crossing;


     /* Get the expected "crossing count" of a net, based on its number *

      * of pins.  Extrapolate for very large nets.                      */


     if (((clb_net[inet].num_sinks + 1) > 50)

             && ((clb_net[inet].num_sinks + 1) < 85)) {

         crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);

     } else if ((clb_net[inet].num_sinks + 1) >= 85) {

         crossing = 2.7933 + 0.011 * (clb_net[inet].num_sinks + 1)

                 - 0.0000018 * (clb_net[inet].num_sinks + 1)

                         * (clb_net[inet].num_sinks + 1);

     } else {

         crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1];

     }


     /* Could insert a check for xmin == xmax.  In that case, assume  *

      * connection will be made with no bends and hence no x-cost.    *

      * Same thing for y-cost.                                        */


     /* Cost = wire length along channel * cross_count / average      *

      * channel capacity.   Do this for x, then y direction and add.  */


     ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing;


     ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing;


     return (ncost);

 }


 static float get_net_cost(int inet, struct s_bb *bbptr) {


     /* Finds the cost due to one net by looking at its coordinate bounding  *

      * box.                                                                 */


     float ncost, crossing;


     /* Get the expected "crossing count" of a net, based on its number *

      * of pins.  Extrapolate for very large nets.                      */


     if ((clb_net[inet].num_sinks + 1) > 50) {

         crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);

         /*    crossing = 3.0;    Old value  */

     } else {

         crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1];

     }


     /* Could insert a check for xmin == xmax.  In that case, assume  *

      * connection will be made with no bends and hence no x-cost.    *

      * Same thing for y-cost.                                        */


     /* Cost = wire length along channel * cross_count / average      *

      * channel capacity.   Do this for x, then y direction and add.  */


     ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing

             * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];


     ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing

             * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];


     return (ncost);

 }


 static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new) {


     /* Finds the bounding box of a net and stores its coordinates in the  *

      * bb_coord_new data structure.  This routine should only be called   *

      * for small nets, since it does not determine enough information for *

      * the bounding box to be updated incrementally later.                *

      * Currently assumes channels on both sides of the CLBs forming the   *

      * edges of the bounding box can be used.  Essentially, I am assuming *

      * the pins always lie on the outside of the bounding box.            */


     int k, xmax, ymax, xmin, ymin, x, y;

     int bnum, pnum;


     bnum = clb_net[inet].node_block[0];

     pnum = clb_net[inet].node_block_pin[0];

     x = block[bnum].x;

     y = block[bnum].y + block[bnum].type->pin_height[pnum];


     xmin = x;

     ymin = y;

     xmax = x;

     ymax = y;


     for (k = 1; k < (clb_net[inet].num_sinks + 1); k++) {

         bnum = clb_net[inet].node_block[k];

         pnum = clb_net[inet].node_block_pin[k];

         x = block[bnum].x;

         y = block[bnum].y + block[bnum].type->pin_height[pnum];


         if (x < xmin) {

             xmin = x;

         } else if (x > xmax) {

             xmax = x;

         }


         if (y < ymin) {

             ymin = y;

         } else if (y > ymax) {

             ymax = y;

         }

     }


     /* Now I've found the coordinates of the bounding box.  There are no *

      * channels beyond nx and ny, so I want to clip to that.  As well,   *

      * since I'll always include the channel immediately below and the   *

      * channel immediately to the left of the bounding box, I want to    *

      * clip to 1 in both directions as well (since minimum channel index *

      * is 0).  See route.c for a channel diagram.                        */


     bb_coord_new->xmin = std::max(std::min(xmin, nx), 1);

     bb_coord_new->ymin = std::max(std::min(ymin, ny), 1);

     bb_coord_new->xmax = std::max(std::min(xmax, nx), 1);

     bb_coord_new->ymax = std::max(std::min(ymax, ny), 1);

 }


 static void update_bb(int inet, struct s_bb *bb_coord_new,

         struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew) {


     /* Updates the bounding box of a net by storing its coordinates in    *

      * the bb_coord_new data structure and the number of blocks on each   *

      * edge in the bb_edge_new data structure.  This routine should only  *

      * be called for large nets, since it has some overhead relative to   *

      * just doing a brute force bounding box calculation.  The bounding   *

      * box coordinate and edge information for inet must be valid before  *

      * this routine is called.                                            *

      * Currently assumes channels on both sides of the CLBs forming the   *

      * edges of the bounding box can be used.  Essentially, I am assuming *

      * the pins always lie on the outside of the bounding box.            *

      * The x and y coordinates are the pin's x and y coordinates.         */

     /* IO blocks are considered to be one cell in for simplicity.         */


     struct s_bb *curr_bb_edge, *curr_bb_coord;


     xnew = std::max(std::min(xnew, nx), 1);

     ynew = std::max(std::min(ynew, ny), 1);

     xold = std::max(std::min(xold, nx), 1);

     yold = std::max(std::min(yold, ny), 1);


     /* Check if the net had been updated before. */

     if (bb_updated_before[inet] == GOT_FROM_SCRATCH)

     {   /* The net had been updated from scratch, DO NOT update again! */

         return;

     }

     else if (bb_updated_before[inet] == NOT_UPDATED_YET)

     {   /* The net had NOT been updated before, could use the old values */

         curr_bb_coord = &bb_coords[inet];

         curr_bb_edge = &bb_num_on_edges[inet];

         bb_updated_before[inet] = UPDATED_ONCE;

     }

     else

     {   /* The net had been updated before, must use the new values */

         curr_bb_coord = bb_coord_new;

         curr_bb_edge = bb_edge_new;

     }


     /* Check if I can update the bounding box incrementally. */


     if (xnew < xold) { /* Move to left. */


         /* Update the xmax fields for coordinates and number of edges first. */


         if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */

             if (curr_bb_edge->xmax == 1) {

                 get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);

                 bb_updated_before[inet] = GOT_FROM_SCRATCH;

                 return;

             } else {

                 bb_edge_new->xmax = curr_bb_edge->xmax - 1;

                 bb_coord_new->xmax = curr_bb_coord->xmax;

             }

         }


         else { /* Move to left, old postion was not at xmax. */

             bb_coord_new->xmax = curr_bb_coord->xmax;

             bb_edge_new->xmax = curr_bb_edge->xmax;

         }


         /* Now do the xmin fields for coordinates and number of edges. */


         if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */

             bb_coord_new->xmin = xnew;

             bb_edge_new->xmin = 1;

         }


         else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */

             bb_coord_new->xmin = xnew;

             bb_edge_new->xmin = curr_bb_edge->xmin + 1;

         }


         else { /* Xmin unchanged. */

             bb_coord_new->xmin = curr_bb_coord->xmin;

             bb_edge_new->xmin = curr_bb_edge->xmin;

         }

     }


     /* End of move to left case. */

     else if (xnew > xold) { /* Move to right. */


         /* Update the xmin fields for coordinates and number of edges first. */


         if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */

             if (curr_bb_edge->xmin == 1) {

                 get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);

                 bb_updated_before[inet] = GOT_FROM_SCRATCH;

                 return;

             } else {

                 bb_edge_new->xmin = curr_bb_edge->xmin - 1;

                 bb_coord_new->xmin = curr_bb_coord->xmin;

             }

         }


         else { /* Move to right, old position was not at xmin. */

             bb_coord_new->xmin = curr_bb_coord->xmin;

             bb_edge_new->xmin = curr_bb_edge->xmin;

         }


         /* Now do the xmax fields for coordinates and number of edges. */


         if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */

             bb_coord_new->xmax = xnew;

             bb_edge_new->xmax = 1;

         }


         else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */

             bb_coord_new->xmax = xnew;

             bb_edge_new->xmax = curr_bb_edge->xmax + 1;

         }


         else { /* Xmax unchanged. */

             bb_coord_new->xmax = curr_bb_coord->xmax;

             bb_edge_new->xmax = curr_bb_edge->xmax;

         }

     }

     /* End of move to right case. */

     else { /* xnew == xold -- no x motion. */

         bb_coord_new->xmin = curr_bb_coord->xmin;

         bb_coord_new->xmax = curr_bb_coord->xmax;

         bb_edge_new->xmin = curr_bb_edge->xmin;

         bb_edge_new->xmax = curr_bb_edge->xmax;

     }


     /* Now account for the y-direction motion. */


     if (ynew < yold) { /* Move down. */


         /* Update the ymax fields for coordinates and number of edges first. */


         if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */

             if (curr_bb_edge->ymax == 1) {

                 get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);

                 bb_updated_before[inet] = GOT_FROM_SCRATCH;

                 return;

             } else {

                 bb_edge_new->ymax = curr_bb_edge->ymax - 1;

                 bb_coord_new->ymax = curr_bb_coord->ymax;

             }

         }


         else { /* Move down, old postion was not at ymax. */

             bb_coord_new->ymax = curr_bb_coord->ymax;

             bb_edge_new->ymax = curr_bb_edge->ymax;

         }


         /* Now do the ymin fields for coordinates and number of edges. */


         if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */

             bb_coord_new->ymin = ynew;

             bb_edge_new->ymin = 1;

         }


         else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */

             bb_coord_new->ymin = ynew;

             bb_edge_new->ymin = curr_bb_edge->ymin + 1;

         }


         else { /* ymin unchanged. */

             bb_coord_new->ymin = curr_bb_coord->ymin;

             bb_edge_new->ymin = curr_bb_edge->ymin;

         }

     }

     /* End of move down case. */

     else if (ynew > yold) { /* Moved up. */


         /* Update the ymin fields for coordinates and number of edges first. */


         if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */

             if (curr_bb_edge->ymin == 1) {

                 get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);

                 bb_updated_before[inet] = GOT_FROM_SCRATCH;

                 return;

             } else {

                 bb_edge_new->ymin = curr_bb_edge->ymin - 1;

                 bb_coord_new->ymin = curr_bb_coord->ymin;

             }

         }


         else { /* Moved up, old position was not at ymin. */

             bb_coord_new->ymin = curr_bb_coord->ymin;

             bb_edge_new->ymin = curr_bb_edge->ymin;

         }


         /* Now do the ymax fields for coordinates and number of edges. */


         if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */

             bb_coord_new->ymax = ynew;

             bb_edge_new->ymax = 1;

         }


         else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */

             bb_coord_new->ymax = ynew;

             bb_edge_new->ymax = curr_bb_edge->ymax + 1;

         }


         else { /* ymax unchanged. */

             bb_coord_new->ymax = curr_bb_coord->ymax;

             bb_edge_new->ymax = curr_bb_edge->ymax;

         }

     }

     /* End of move up case. */

     else { /* ynew == yold -- no y motion. */

         bb_coord_new->ymin = curr_bb_coord->ymin;

         bb_coord_new->ymax = curr_bb_coord->ymax;

         bb_edge_new->ymin = curr_bb_edge->ymin;

         bb_edge_new->ymax = curr_bb_edge->ymax;

     }


     if (bb_updated_before[inet] == NOT_UPDATED_YET)

         bb_updated_before[inet] = UPDATED_ONCE;

 }


 static void alloc_legal_placements() {

     int i, j, k;


     legal_pos = (t_legal_pos **) my_malloc(num_types * sizeof(t_legal_pos *));

     num_legal_pos = (int *) my_calloc(num_types, sizeof(int));


     /* Initialize all occupancy to zero. */


     for (i = 0; i <= nx + 1; i++) {

         for (j = 0; j <= ny + 1; j++) {

             grid[i][j].usage = 0;

             for (k = 0; k < grid[i][j].type->capacity; k++) {

                 grid[i][j].blocks[k] = EMPTY;

                 if (grid[i][j].offset == 0) {

                     num_legal_pos[grid[i][j].type->index]++;

                 }

             }

         }

     }


     for (i = 0; i < num_types; i++) {

         legal_pos[i] = (t_legal_pos *) my_malloc(num_legal_pos[i] * sizeof(t_legal_pos));

     }

 }


 static void load_legal_placements() {

     int i, j, k, itype;

     int *index;


     index = (int *) my_calloc(num_types, sizeof(int));


     for (i = 0; i <= nx + 1; i++) {

         for (j = 0; j <= ny + 1; j++) {

             for (k = 0; k < grid[i][j].type->capacity; k++) {

                 if (grid[i][j].offset == 0) {

                     itype = grid[i][j].type->index;

                     legal_pos[itype][index[itype]].x = i;

                     legal_pos[itype][index[itype]].y = j;

                     legal_pos[itype][index[itype]].z = k;

                     index[itype]++;

                 }

             }

         }

     }

     free(index);

 }


 static void free_legal_placements() {

     int i;

     for (i = 0; i < num_types; i++) {

         free(legal_pos[i]);

     }

     free(legal_pos); /* Free the mapping list */

     free(num_legal_pos);

 }


 static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z) {


     int imember;

     int member_x, member_y, member_z;


     // Every macro can be placed until proven otherwise

     int macro_can_be_placed = TRUE;


     // Check whether all the members can be placed

     for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {

         member_x = x + pl_macros[imacro].members[imember].x_offset;

         member_y = y + pl_macros[imacro].members[imember].y_offset;

         member_z = z + pl_macros[imacro].members[imember].z_offset;


         // Check whether the location could accept block of this type

         // Then check whether the location could still accomodate more blocks

         // Also check whether the member position is valid, that is the member's location

         // still within the chip's dimemsion and the member_z is allowed at that location on the grid

         if (member_x <= nx+1 && member_y <= ny+1

                 && grid[member_x][member_y].type->index == itype

                 && grid[member_x][member_y].blocks[member_z] == OPEN) {

             // Can still accomodate blocks here, check the next position

             continue;

         } else {

             // Cant be placed here - skip to the next try

             macro_can_be_placed = FALSE;

             break;

         }

     }


     return (macro_can_be_placed);

 }


 static int try_place_macro(int itype, int ichoice, int imacro, int * free_locations){


     int x, y, z, member_x, member_y, member_z, imember;


     int macro_placed = FALSE;


     // Choose a random position for the head

     x = legal_pos[itype][ichoice].x;

     y = legal_pos[itype][ichoice].y;

     z = legal_pos[itype][ichoice].z;


     // If that location is occupied, do nothing.

     if (grid[x][y].blocks[z] != OPEN) {

         return (macro_placed);

     }


     int macro_can_be_placed = check_macro_can_be_placed(imacro, itype, x, y, z);


     if (macro_can_be_placed == TRUE) {


         // Place down the macro

         macro_placed = TRUE;

         for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {


             member_x = x + pl_macros[imacro].members[imember].x_offset;

             member_y = y + pl_macros[imacro].members[imember].y_offset;

             member_z = z + pl_macros[imacro].members[imember].z_offset;


             block[pl_macros[imacro].members[imember].blk_index].x = member_x;

             block[pl_macros[imacro].members[imember].blk_index].y = member_y;

             block[pl_macros[imacro].members[imember].blk_index].z = member_z;


             grid[member_x][member_y].blocks[member_z] = pl_macros[imacro].members[imember].blk_index;

             grid[member_x][member_y].usage++;


             // Could not ensure that the randomiser would not pick this location again

             // So, would have to do a lazy removal - whenever I come across a block that could not be placed,

             // go ahead and remove it from the legal_pos[][] array


         } // Finish placing all the members in the macro


     } // End of this choice of legal_pos


     return (macro_placed);


 }


 static void initial_placement_pl_macros(int macros_max_num_tries, int * free_locations) {


     int macro_placed;

     int imacro, iblk, itype, itry, ichoice;


     /* Macros are harder to place.  Do them first */

     for (imacro = 0; imacro < num_pl_macros; imacro++) {


         // Every macro are not placed in the beginnning

         macro_placed = FALSE;


         // Assume that all the blocks in the macro are of the same type

         iblk = pl_macros[imacro].members[0].blk_index;

         itype = block[iblk].type->index;

         if (free_locations[itype] < pl_macros[imacro].num_blocks) {

             vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");

             vpr_printf (TIO_MESSAGE_ERROR, "Could not place macro length %d with head block %s (#%d); not enough free locations of type %s (#%d).\n",

                     pl_macros[imacro].num_blocks, block[iblk].name, iblk, type_descriptors[itype].name, itype);

             vpr_printf (TIO_MESSAGE_INFO, "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n");

             exit(1);

         }


         // Try to place the macro first, if can be placed - place them, otherwise try again

         for (itry = 0; itry < macros_max_num_tries && macro_placed == FALSE; itry++) {


             // Choose a random position for the head

             ichoice = my_irand(free_locations[itype] - 1);


             // Try to place the macro

             macro_placed = try_place_macro(itype, ichoice, imacro, free_locations);


         } // Finished all tries


         if (macro_placed == FALSE){

             // if a macro still could not be placed after macros_max_num_tries times,

             // go through the chip exhaustively to find a legal placement for the macro

             // place the macro on the first location that is legal

             // then set macro_placed = TRUE;

             // if there are no legal positions, error out


             // Exhaustive placement of carry macros

             for (ichoice = 0; ichoice < free_locations[itype] && macro_placed == FALSE; ichoice++) {


                 // Try to place the macro

                 macro_placed = try_place_macro(itype, ichoice, imacro, free_locations);


             } // Exhausted all the legal placement position for this macro


             // If macro could not be placed after exhaustive placement, error out

             if (macro_placed == FALSE) {

                 // Error out

                 vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");

                 vpr_printf (TIO_MESSAGE_ERROR, "Could not place macro length %d with head block %s (#%d); not enough free locations of type %s (#%d).\n",

                     pl_macros[imacro].num_blocks, block[iblk].name, iblk, type_descriptors[itype].name, itype);

                 vpr_printf (TIO_MESSAGE_INFO, "Please manually size the FPGA because VPR can't do this yet.\n");

                 exit(1);

             }


         } else {

             // This macro has been placed successfully, proceed to place the next macro

             continue;

         }

     } // Finish placing all the pl_macros successfully

 }


 static void initial_placement_blocks(int * free_locations, enum e_pad_loc_type pad_loc_type) {


     /* Place blocks that are NOT a part of any macro.

      * We'll randomly place each block in the clustered netlist, one by one.

      */


     int iblk, itype;

     int ichoice, x, y, z;


     for (iblk = 0; iblk < num_blocks; iblk++) {


         if (block[iblk].x != -1) {

             // block placed.

             continue;

         }

         /* Don't do IOs if the user specifies IOs; we'll read those locations later. */

         if (!(block[iblk].type == IO_TYPE && pad_loc_type == USER)) {


             /* Randomly select a free location of the appropriate type

              * for iblk.  We have a linearized list of all the free locations

              * that can accomodate a block of that type in free_locations[itype].

              * Choose one randomly and put iblk there.  Then we don't want to pick that

              * location again, so remove it from the free_locations array.

              */

             itype = block[iblk].type->index;

             if (free_locations[itype] <= 0) {

                 vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");

                 vpr_printf (TIO_MESSAGE_ERROR, "Could not place block %s (#%d); no free locations of type %s (#%d).\n",

                         block[iblk].name, iblk, type_descriptors[itype].name, itype);

                 exit(1);

             }


             ichoice = my_irand(free_locations[itype] - 1);

             x = legal_pos[itype][ichoice].x;

             y = legal_pos[itype][ichoice].y;

             z = legal_pos[itype][ichoice].z;


             // Make sure that the position is OPEN before placing the block down

             assert (grid[x][y].blocks[z] == OPEN);


             grid[x][y].blocks[z] = iblk;

             grid[x][y].usage++;


             block[iblk].x = x;

             block[iblk].y = y;

             block[iblk].z = z;


             /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the

                 * legal positions in legal_pos to remove the entry (choice) we just used, but faster to

                 * just move the last entry in legal_pos to the spot we just used and decrement the

                 * count of free_locations.

                 */

             legal_pos[itype][ichoice] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */

             free_locations[itype]--;


         }

     }

 }


 static void initial_placement(enum e_pad_loc_type pad_loc_type,

         char *pad_loc_file) {


     /* Randomly places the blocks to create an initial placement. We rely on

      * the legal_pos array already being loaded.  That legal_pos[itype] is an

      * array that gives every legal value of (x,y,z) that can accomodate a block.

      * The number of such locations is given by num_legal_pos[itype].

      */

     int i, j, k, iblk, itype, x, y, z, ichoice;

     int *free_locations; /* [0..num_types-1].

                           * Stores how many locations there are for this type that *might* still be free.

                           * That is, this stores the number of entries in legal_pos[itype] that are worth considering

                           * as you look for a free location.

                           */


     free_locations = (int *) my_malloc(num_types * sizeof(int));

     for (itype = 0; itype < num_types; itype++) {

         free_locations[itype] = num_legal_pos[itype];

     }


     /* We'll use the grid to record where everything goes. Initialize to the grid has no

      * blocks placed anywhere.

      */

     for (i = 0; i <= nx + 1; i++) {

         for (j = 0; j <= ny + 1; j++) {

             grid[i][j].usage = 0;

             itype = grid[i][j].type->index;

             for (k = 0; k < type_descriptors[itype].capacity; k++) {

                 grid[i][j].blocks[k] = OPEN;

             }

         }

     }


     /* Similarly, mark all blocks as not being placed yet. */

     for (iblk = 0; iblk < num_blocks; iblk++) {

         block[iblk].x = -1;

         block[iblk].y = -1;

         block[iblk].z = -1;

     }


     initial_placement_pl_macros(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, free_locations);


     // All the macros are placed, update the legal_pos[][] array

     for (itype = 0; itype < num_types; itype++) {

         assert (free_locations[itype] >= 0);

         for (ichoice = 0; ichoice < free_locations[itype]; ichoice++) {

             x = legal_pos[itype][ichoice].x;

             y = legal_pos[itype][ichoice].y;

             z = legal_pos[itype][ichoice].z;


             // Check if that location is occupied.  If it is, remove from legal_pos

             if (grid[x][y].blocks[z] != OPEN) {

                 legal_pos[itype][ichoice] = legal_pos[itype][free_locations[itype] - 1];

                 free_locations[itype]--;


                 // After the move, I need to check this particular entry again

                 ichoice--;

                 continue;

             }

         }

     } // Finish updating the legal_pos[][] and free_locations[] array


     initial_placement_blocks(free_locations, pad_loc_type);


     if (pad_loc_type == USER) {

         read_user_pad_loc(pad_loc_file);

     }


     /* Restore legal_pos */

     load_legal_placements();


 #ifdef VERBOSE

     vpr_printf(TIO_MESSAGE_INFO, "At end of initial_placement.\n");

     if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_INITIAL_CLB_PLACEMENT)) {

         print_clb_placement(getEchoFileName(E_ECHO_INITIAL_CLB_PLACEMENT));

     }

 #endif

     free(free_locations);

 }


 static void free_fast_cost_update(void) {

     int i;


     for (i = 0; i <= ny; i++)

         free(chanx_place_cost_fac[i]);

     free(chanx_place_cost_fac);

     chanx_place_cost_fac = NULL;


     for (i = 0; i <= nx; i++)

         free(chany_place_cost_fac[i]);

     free(chany_place_cost_fac);

     chany_place_cost_fac = NULL;

 }


 static void alloc_and_load_for_fast_cost_update(float place_cost_exp) {


     /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *

      * arrays with the inverse of the average number of tracks per channel   *

      * between [subhigh] and [sublow].  This is only useful for the cost     *

      * function that takes the length of the net bounding box in each        *

      * dimension divided by the average number of tracks in that direction.  *

      * For other cost functions, you don't have to bother calling this       *

      * routine; when using the cost function described above, however, you   *

      * must always call this routine after you call init_chan and before     *

      * you do any placement cost determination.  The place_cost_exp factor   *

      * specifies to what power the width of the channel should be taken --   *

      * larger numbers make narrower channels more expensive.                 */


     int low, high, i;


     /* Access arrays below as chan?_place_cost_fac[subhigh][sublow].  Since   *

      * subhigh must be greater than or equal to sublow, we only need to       *

      * allocate storage for the lower half of a matrix.                       */


     chanx_place_cost_fac = (float **) my_malloc((ny + 1) * sizeof(float *));

     for (i = 0; i <= ny; i++)

         chanx_place_cost_fac[i] = (float *) my_malloc((i + 1) * sizeof(float));


     chany_place_cost_fac = (float **) my_malloc((nx + 1) * sizeof(float *));

     for (i = 0; i <= nx; i++)

         chany_place_cost_fac[i] = (float *) my_malloc((i + 1) * sizeof(float));


     /* First compute the number of tracks between channel high and channel *

      * low, inclusive, in an efficient manner.                             */


     chanx_place_cost_fac[0][0] = chan_width_x[0];


     for (high = 1; high <= ny; high++) {

         chanx_place_cost_fac[high][high] = chan_width_x[high];

         for (low = 0; low < high; low++) {

             chanx_place_cost_fac[high][low] =

                     chanx_place_cost_fac[high - 1][low] + chan_width_x[high];

         }

     }


     /* Now compute the inverse of the average number of tracks per channel *

      * between high and low.  The cost function divides by the average     *

      * number of tracks per channel, so by storing the inverse I convert   *

      * this to a faster multiplication.  Take this final number to the     *

      * place_cost_exp power -- numbers other than one mean this is no      *

      * longer a simple "average number of tracks"; it is some power of     *

      * that, allowing greater penalization of narrow channels.             */


     for (high = 0; high <= ny; high++)

         for (low = 0; low <= high; low++) {

             chanx_place_cost_fac[high][low] = (high - low + 1.)

                     / chanx_place_cost_fac[high][low];

             chanx_place_cost_fac[high][low] = pow(

                     (double) chanx_place_cost_fac[high][low],

                     (double) place_cost_exp);

         }


     /* Now do the same thing for the y-directed channels.  First get the  *

      * number of tracks between channel high and channel low, inclusive.  */


     chany_place_cost_fac[0][0] = chan_width_y[0];


     for (high = 1; high <= nx; high++) {

         chany_place_cost_fac[high][high] = chan_width_y[high];

         for (low = 0; low < high; low++) {

             chany_place_cost_fac[high][low] =

                     chany_place_cost_fac[high - 1][low] + chan_width_y[high];

         }

     }


     /* Now compute the inverse of the average number of tracks per channel *

      * between high and low.  Take to specified power.                     */


     for (high = 0; high <= nx; high++)

         for (low = 0; low <= high; low++) {

             chany_place_cost_fac[high][low] = (high - low + 1.)

                     / chany_place_cost_fac[high][low];

             chany_place_cost_fac[high][low] = pow(

                     (double) chany_place_cost_fac[high][low],

                     (double) place_cost_exp);

         }

 }


 static void check_place(float bb_cost, float timing_cost,

         enum e_place_algorithm place_algorithm,

         float delay_cost) {


     /* Checks that the placement has not confused our data structures. *

      * i.e. the clb and block structures agree about the locations of  *

      * every block, blocks are in legal spots, etc.  Also recomputes   *

      * the final placement cost from scratch and makes sure it is      *

      * within roundoff of what we think the cost is.                   */


     static int *bdone;

     int i, j, k, error = 0, bnum;

     float bb_cost_check;

     int usage_check;

     float timing_cost_check, delay_cost_check;

     int imacro, imember, head_iblk, member_iblk, member_x, member_y, member_z;


     bb_cost_check = comp_bb_cost(CHECK);

     vpr_printf(TIO_MESSAGE_INFO, "bb_cost recomputed from scratch: %g\n", bb_cost_check);

     if (fabs(bb_cost_check - bb_cost) > bb_cost * ERROR_TOL) {

         vpr_printf(TIO_MESSAGE_ERROR, "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", bb_cost_check, bb_cost);

         error++;

     }


     if (place_algorithm == NET_TIMING_DRIVEN_PLACE

             || place_algorithm == PATH_TIMING_DRIVEN_PLACE) {

         comp_td_costs(&timing_cost_check, &delay_cost_check);

         vpr_printf(TIO_MESSAGE_INFO, "timing_cost recomputed from scratch: %g\n", timing_cost_check);

         if (fabs(timing_cost_check - timing_cost) > timing_cost * ERROR_TOL) {

             vpr_printf(TIO_MESSAGE_ERROR, "timing_cost_check: %g and timing_cost: %g differ in check_place.\n",

                        timing_cost_check, timing_cost);

             error++;

         }

         vpr_printf(TIO_MESSAGE_INFO, "delay_cost recomputed from scratch: %g\n", delay_cost_check);

         if (fabs(delay_cost_check - delay_cost) > delay_cost * ERROR_TOL) {

             vpr_printf(TIO_MESSAGE_ERROR, "delay_cost_check: %g and delay_cost: %g differ in check_place.\n",

                     delay_cost_check, delay_cost);

             error++;

         }

     }


     bdone = (int *) my_malloc(num_blocks * sizeof(int));

     for (i = 0; i < num_blocks; i++)

         bdone[i] = 0;


     /* Step through grid array. Check it against block array. */

     for (i = 0; i <= (nx + 1); i++)

         for (j = 0; j <= (ny + 1); j++) {

             if (grid[i][j].usage > grid[i][j].type->capacity) {

                 vpr_printf(TIO_MESSAGE_ERROR, "Block at grid location (%d,%d) overused. Usage is %d.\n",

                         i, j, grid[i][j].usage);

                 error++;

             }

             usage_check = 0;

             for (k = 0; k < grid[i][j].type->capacity; k++) {

                 bnum = grid[i][j].blocks[k];

                 if (EMPTY == bnum)

                     continue;


                 if (block[bnum].type != grid[i][j].type) {

                     vpr_printf(TIO_MESSAGE_ERROR, "Block %d type does not match grid location (%d,%d) type.\n",

                             bnum, i, j);

                     error++;

                 }

                 if ((block[bnum].x != i) || (block[bnum].y != j)) {

                     vpr_printf(TIO_MESSAGE_ERROR, "Block %d location conflicts with grid(%d,%d) data.\n",

                             bnum, i, j);

                     error++;

                 }

                 ++usage_check;

                 bdone[bnum]++;

             }

             if (usage_check != grid[i][j].usage) {

                 vpr_printf(TIO_MESSAGE_ERROR, "Location (%d,%d) usage is %d, but has actual usage %d.\n",

                         i, j, grid[i][j].usage, usage_check);

                 error++;

             }

         }


     /* Check that every block exists in the grid and block arrays somewhere. */

     for (i = 0; i < num_blocks; i++)

         if (bdone[i] != 1) {

             vpr_printf(TIO_MESSAGE_ERROR, "Block %d listed %d times in data structures.\n",

                     i, bdone[i]);

             error++;

         }

     free(bdone);


     /* Check the pl_macro placement are legal - blocks are in the proper relative position. */

     for (imacro = 0; imacro < num_pl_macros; imacro++) {


         head_iblk = pl_macros[imacro].members[0].blk_index;


         for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {


             member_iblk = pl_macros[imacro].members[imember].blk_index;


             // Compute the suppossed member's x,y,z location

             member_x = block[head_iblk].x + pl_macros[imacro].members[imember].x_offset;

             member_y = block[head_iblk].y + pl_macros[imacro].members[imember].y_offset;

             member_z = block[head_iblk].z + pl_macros[imacro].members[imember].z_offset;


             // Check the block data structure first

             if (block[member_iblk].x != member_x

                     || block[member_iblk].y != member_y

                     || block[member_iblk].z != member_z) {

                 vpr_printf(TIO_MESSAGE_ERROR, "Block %d in pl_macro #%d is not placed in the proper orientation.\n",

                         member_iblk, imacro);

                 error++;

             }


             // Then check the grid data structure

             if (grid[member_x][member_y].blocks[member_z] != member_iblk) {

                 vpr_printf(TIO_MESSAGE_ERROR, "Block %d in pl_macro #%d is not placed in the proper orientation.\n",

                         member_iblk, imacro);

                 error++;

             }

         } // Finish going through all the members

     } // Finish going through all the macros


     if (error == 0) {

         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_INFO, "Completed placement consistency check successfully.\n");

         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_INFO, "Swaps called: %d\n", num_ts_called);


 #ifdef PRINT_REL_POS_DISTR

         print_relative_pos_distr(void);

 #endif

     } else {

         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_ERROR, "Completed placement consistency check, %d errors found.\n", error);

         vpr_printf(TIO_MESSAGE_INFO, "Aborting program.\n");

         exit(1);

     }


 }


 #ifdef VERBOSE

 static void print_clb_placement(const char *fname) {


     /* Prints out the clb placements to a file.  */


     FILE *fp;

     int i;


     fp = my_fopen(fname, "w", 0);

     fprintf(fp, "Complex block placements:\n\n");


     fprintf(fp, "Block #\tName\t(X, Y, Z).\n");

     for(i = 0; i < num_blocks; i++) {

         fprintf(fp, "#%d\t%s\t(%d, %d, %d).\n", i, block[i].name, block[i].x, block[i].y, block[i].z);

     }


     fclose(fp);

 }

 #endif


 static void free_try_swap_arrays(void) {

     if(ts_bb_coord_new != NULL) {

         free(ts_bb_coord_new);

         free(ts_bb_edge_new);

         free(ts_nets_to_update);

         free(blocks_affected.moved_blocks);

         free(bb_updated_before);


         ts_bb_coord_new = NULL;

         ts_bb_edge_new = NULL;

         ts_nets_to_update = NULL;

         blocks_affected.moved_blocks = NULL;

         blocks_affected.num_moved_blocks = 0;

         bb_updated_before = NULL;

     }

 }


s_grid_tile::type
t_type_ptr type
Definition: vpr_types.h:522

s_net::node_block_pin
int * node_block_pin
Definition: vpr_types.h:509

s_type_descriptor::pin_height
int * pin_height
Definition: physical_types.h:625

comp_bb_cost
static float comp_bb_cost(enum cost_methods method)
Definition: place.c:1866

MAX_MOVES_BEFORE_RECOMPUTE
#define MAX_MOVES_BEFORE_RECOMPUTE
Definition: place.c:36

s_pl_macro_member::blk_index
int blk_index
Definition: place_macro.h:147

load_legal_placements
static void load_legal_placements()
Definition: place.c:2532

free_try_swap_arrays
static void free_try_swap_arrays(void)
Definition: place.c:3108

s_pl_moved_block
Definition: place.c:79

PATH_TIMING_DRIVEN_PLACE
Definition: vpr_types.h:633

chany_place_cost_fac
static float ** chany_place_cost_fac
Definition: place.c:171

find_affected_nets
static int find_affected_nets(int *nets_to_update)
Definition: place.c:1482

update_screen
void update_screen(int priority, char *msg, enum pic_type pic_on_screen_val, boolean crit_path_button_enabled)
Definition: draw.c:156

my_fopen
FILE * my_fopen(const char *fname, const char *flag, int prompt)
Definition: util.c:54

num_swap_aborted
static int num_swap_aborted
Definition: place.c:189

ts_bb_edge_new
static struct s_bb * ts_bb_edge_new
Definition: place.c:176

check_place
static void check_place(float bb_cost, float timing_cost, enum e_place_algorithm place_algorithm, float delay_cost)
Definition: place.c:2950

s_pl_moved_block::swapped_to_empty
int swapped_to_empty
Definition: place.c:87

draw.h

s_grid_tile::usage
int usage
Definition: vpr_types.h:524

s_pl_moved_block::xold
int xold
Definition: place.c:81

free_port_pin_from_blk_pin
void free_port_pin_from_blk_pin(void)
Definition: vpr_utils.c:736

get_imacro_from_iblk
void get_imacro_from_iblk(int *imacro, int iblk, t_pl_macro *macros, int num_macros)
Definition: place_macro.c:362

s_legal_pos
Definition: place.c:110

place.h

t_pl_blocks_to_be_moved
struct s_pl_blocks_to_be_moved t_pl_blocks_to_be_moved

s_legal_pos::z
int z
Definition: place.c:113

s_placer_opts::enable_timing_computations
boolean enable_timing_computations
Definition: vpr_types.h:646

PLACEMENT
Definition: vpr_types.h:463

comp_delta_td_cost
static void comp_delta_td_cost(float *delta_timing, float *delta_delay)
Definition: place.c:1754

place_and_route.h

get_bb_from_scratch
static void get_bb_from_scratch(int inet, struct s_bb *coords, struct s_bb *num_on_edges)
Definition: place.c:2081

free_matrix
void free_matrix(void *vptr, int nrmin, int nrmax, int ncmin, size_t elsize)
Definition: util.c:573

s_bb::xmax
int xmax
Definition: vpr_types.h:533

initial_placement
static void initial_placement(enum e_pad_loc_type pad_loc_type, char *pad_loc_file)
Definition: place.c:2772

legal_pos
static t_legal_pos ** legal_pos
Definition: place.c:116

s_grid_tile::offset
int offset
Definition: vpr_types.h:523

s_annealing_sched::alpha_t
float alpha_t
Definition: vpr_types.h:628

s_placer_opts::pad_loc_file
char * pad_loc_file
Definition: vpr_types.h:643

s_bb::ymin
int ymin
Definition: vpr_types.h:534

MAJOR
#define MAJOR
Definition: vpr_types.h:73

E_ECHO_FINAL_PLACEMENT_SLACK
Definition: ReadOptions.h:103

get_critical_path_delay
float get_critical_path_delay(void)
Definition: path_delay.c:3060

E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH
Definition: ReadOptions.h:102

s_placer_opts::pad_loc_type
enum e_pad_loc_type pad_loc_type
Definition: vpr_types.h:642

timing_place_crit
float ** timing_place_crit
Definition: timing_place.c:12

net_delay
static float ** net_delay
Definition: timing_place_lookup.c:76

s_block::x
int x
Definition: vpr_types.h:563

s_pl_moved_block::block_num
int block_num
Definition: place.c:80

recompute_bb_cost
static float recompute_bb_cost(void)
Definition: place.c:1630

s_pl_blocks_to_be_moved::num_moved_blocks
int num_moved_blocks
Definition: place.c:99

delta_clb_to_clb
float ** delta_clb_to_clb
Definition: timing_place_lookup.c:67

chan_width_x
int * chan_width_x
Definition: globals.c:56

GOT_FROM_SCRATCH
#define GOT_FROM_SCRATCH
Definition: place.c:47

free_lookups_and_criticalities
void free_lookups_and_criticalities(float ***net_delay, t_slack *slacks)
Definition: timing_place.c:141

s_det_routing_arch
Definition: vpr_types.h:757

assess_swap
static enum swap_result assess_swap(float delta_c, float t)
Definition: place.c:1600

ts_bb_coord_new
static struct s_bb * ts_bb_coord_new
Definition: place.c:175

my_calloc
void * my_calloc(size_t nelem, size_t size)
Definition: util.c:132

alloc_legal_placements
static void alloc_legal_placements()
Definition: place.c:2507

vpr_types.h

initial_placement_blocks
static void initial_placement_blocks(int *free_locations, enum e_pad_loc_type pad_loc_type)
Definition: place.c:2713

get_std_dev
static double get_std_dev(int n, double sum_x_squared, double av_x)
Definition: place.c:947

s_placer_opts::timing_tradeoff
float timing_tradeoff
Definition: vpr_types.h:638

num_nets
int num_nets
Definition: globals.c:27

s_net::node_block
int * node_block
Definition: vpr_types.h:507

delta_io_to_clb
float ** delta_io_to_clb
Definition: timing_place_lookup.c:66

alloc_and_load_try_swap_structs
static void alloc_and_load_try_swap_structs()
Definition: place.c:2065

chan_width_y
int * chan_width_y
Definition: globals.c:57

s_pl_moved_block::zold
int zold
Definition: place.c:85

print_critical_path
void print_critical_path(const char *fname)
Definition: path_delay.c:2458

do_timing_analysis
void do_timing_analysis(t_slack *slacks, boolean is_prepacked, boolean do_lut_input_balancing, boolean is_final_analysis)
Definition: path_delay.c:1613

s_pl_macro::num_blocks
int num_blocks
Definition: place_macro.h:158

e_pad_loc_type
e_pad_loc_type
Definition: vpr_types.h:478

comp_td_point_to_point_delay
static float comp_td_point_to_point_delay(int inet, int ipin)
Definition: place.c:1652

place_macro.h

s_pl_macro_member::y_offset
int y_offset
Definition: place_macro.h:149

ReadOptions.h

E_ECHO_END_CLB_PLACEMENT
Definition: ReadOptions.h:100

s_slack::slack
float ** slack
Definition: vpr_types.h:405

MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY
#define MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY
Definition: place.c:41

timing_place_lookup.h

globals.h

s_block::type
t_type_ptr type
Definition: vpr_types.h:561

s_pl_macro_member::z_offset
int z_offset
Definition: place_macro.h:150

BUFSIZE
#define BUFSIZE
Definition: graphics.c:184

starting_t
static float starting_t(float *cost_ptr, float *bb_cost_ptr, float *timing_cost_ptr, float **old_region_occ_x, float **old_region_occ_y, struct s_annealing_sched annealing_sched, int max_moves, float rlim, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost_ptr)
Definition: place.c:1045

num_blocks
int num_blocks
Definition: globals.c:30

USER
Definition: vpr_types.h:479

s_annealing_sched::inner_num
float inner_num
Definition: vpr_types.h:626

try_swap
static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost, float rlim, float **old_region_occ_x, float **old_region_occ_y, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost)
Definition: place.c:1252

UNDEFINED
#define UNDEFINED
Definition: vpr_types.h:103

getEchoEnabled
boolean getEchoEnabled(void)
Definition: ReadOptions.c:67

ERROR_TOL
#define ERROR_TOL
Definition: place.c:31

s_pl_macro::members
t_pl_macro_member * members
Definition: place_macro.h:159

find_to
static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to)
Definition: place.c:1520

s_legal_pos::y
int y
Definition: place.c:112

FALSE
Definition: util.h:12

bb_updated_before
static char * bb_updated_before
Definition: place.c:132

s_bb
Definition: vpr_types.h:531

s_type_descriptor
Definition: physical_types.h:616

s_placer_opts::td_place_exp_first
float td_place_exp_first
Definition: vpr_types.h:648

free_fast_cost_update
static void free_fast_cost_update(void)
Definition: place.c:2852

s_type_descriptor::capacity
int capacity
Definition: physical_types.h:620

E_ECHO_PLACEMENT_CRIT_PATH
Definition: ReadOptions.h:105

s_block::y
int y
Definition: vpr_types.h:564

BOUNDING_BOX_PLACE
Definition: vpr_types.h:633

min
#define min(a, b)
Definition: graphics.c:174

blocks_affected
static t_pl_blocks_to_be_moved blocks_affected
Definition: place.c:161

s_timing_inf
Definition: physical_types.h:203

comp_td_costs
static void comp_td_costs(float *timing_cost, float *connection_delay_sum)
Definition: place.c:1829

ts_nets_to_update
static int * ts_nets_to_update
Definition: place.c:177

EMPTY
#define EMPTY
Definition: vpr_types.h:90

num_swap_accepted
static int num_swap_accepted
Definition: place.c:188

get_net_cost
static float get_net_cost(int inet, struct s_bb *bb_ptr)
Definition: place.c:2204

num_ts_called
static int num_ts_called
Definition: place.c:190

init_draw_coords
void init_draw_coords(float width_val)
Definition: draw.c:430

chanx_place_cost_fac
static float ** chanx_place_cost_fac
Definition: place.c:171

HUGE_POSITIVE_FLOAT
#define HUGE_POSITIVE_FLOAT
Definition: vpr_types.h:79

temp_point_to_point_delay_cost
static float ** temp_point_to_point_delay_cost
Definition: place.c:143

load_criticalities
void load_criticalities(t_slack *slacks, float crit_exponent)
Definition: timing_place.c:81

num_legal_pos
static int * num_legal_pos
Definition: place.c:117

my_malloc
static void * my_malloc(int ibytes)
Definition: graphics.c:499

NET_TIMING_DRIVEN_PLACE
Definition: vpr_types.h:633

is_global
boolean * is_global
Definition: globals_declare.h:5

check_macro_can_be_placed
static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z)
Definition: place.c:2565

my_frand
float my_frand(void)
Definition: util.c:738

max
#define max(a, b)
Definition: graphics.c:171

s_placer_opts::block_dist
int block_dist
Definition: vpr_types.h:639

MAX_INV_TIMING_COST
#define MAX_INV_TIMING_COST
Definition: place.c:64

block
struct s_block * block
Definition: globals.c:31

free_placement_macros_structs
void free_placement_macros_structs(void)
Definition: place_macro.c:421

bb_num_on_edges
static struct s_bb * bb_num_on_edges
Definition: place.c:155

clb_net
struct s_net * clb_net
Definition: globals.c:28

s_annealing_sched::exit_t
float exit_t
Definition: vpr_types.h:629

s_placer_opts::td_place_exp_last
float td_place_exp_last
Definition: vpr_types.h:650

update_td_cost
static void update_td_cost(void)
Definition: place.c:1699

nx
int nx
Definition: globals.c:46

ABORTED
Definition: place.c:61

net_delay.h

init_chan
void init_chan(int cfactor, t_chan_width_dist chan_width_dist)
Definition: place_and_route.c:564

timing_place.h

alloc_and_load_net_pin_index
int ** alloc_and_load_net_pin_index()
Definition: vpr_utils.c:651

delta_io_to_io
float ** delta_io_to_io
Definition: timing_place_lookup.c:69

NOT_UPDATED_YET
#define NOT_UPDATED_YET
Definition: place.c:45

free_placement_structs
static void free_placement_structs(float **old_region_occ_x, float **old_region_occ_y, struct s_placer_opts placer_opts)
Definition: place.c:1913

net_cost
static float * net_cost
Definition: place.c:107

t_pl_moved_block
struct s_pl_moved_block t_pl_moved_block

s_pl_moved_block::xnew
int xnew
Definition: place.c:82

delta_clb_to_io
float ** delta_clb_to_io
Definition: timing_place_lookup.c:68

util.h

s_pl_macro
Definition: place_macro.h:157

num_swap_rejected
static int num_swap_rejected
Definition: place.c:187

read_user_pad_loc
void read_user_pad_loc(char *pad_loc_file)
Definition: read_place.c:134

isEchoFileEnabled
boolean isEchoFileEnabled(enum e_echo_files echo_option)
Definition: ReadOptions.c:115

point_to_point_delay_cost
static float ** point_to_point_delay_cost
Definition: place.c:142

E_ECHO_PLACEMENT_LOWER_BOUND_SINK_DELAYS
Definition: ReadOptions.h:109

print_sink_delays
void print_sink_delays(const char *fname)
Definition: timing_place.c:58

s_pl_blocks_to_be_moved
Definition: place.c:98

E_ECHO_PLACEMENT_LOGIC_SINK_DELAYS
Definition: ReadOptions.h:110

place_stats.h

find_affected_blocks
static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to)
Definition: place.c:1192

free_blk_pin_from_port_pin
void free_blk_pin_from_port_pin(void)
Definition: vpr_utils.c:840

temp_point_to_point_timing_cost
static float ** temp_point_to_point_timing_cost
Definition: place.c:138

try_place_macro
static int try_place_macro(int itype, int ichoice, int imacro, int *free_locations)
Definition: place.c:2599

E_ECHO_INITIAL_CLB_PLACEMENT
Definition: ReadOptions.h:96

update_rlim
static void update_rlim(float *rlim, float success_rat)
Definition: place.c:969

grid
struct s_grid_tile ** grid
Definition: globals.c:59

alloc_and_load_placement_structs
static void alloc_and_load_placement_structs(float place_cost_exp, float ***old_region_occ_x, float ***old_region_occ_y, struct s_placer_opts placer_opts, t_direct_inf *directs, int num_directs)
Definition: place.c:1975

s_type_descriptor::index
int index
Definition: physical_types.h:653

REJECTED
Definition: place.c:61

update_bb
static void update_bb(int inet, struct s_bb *bb_coord_new, struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew)
Definition: place.c:2292

s_placer_opts::place_cost_exp
float place_cost_exp
Definition: vpr_types.h:640

SMALL_NET
#define SMALL_NET
Definition: place.c:27

free_legal_placements
static void free_legal_placements()
Definition: place.c:2554

E_ECHO_PLACEMENT_SINK_DELAYS
Definition: ReadOptions.h:101

NORMAL
Definition: place.c:54

E_ECHO_INITIAL_PLACEMENT_CRITICALITY
Definition: ReadOptions.h:99

s_placer_opts::recompute_crit_iter
int recompute_crit_iter
Definition: vpr_types.h:645

CHECK
Definition: place.c:54

count_connections
static int count_connections(void)
Definition: place.c:930

s_placer_opts::place_algorithm
enum e_place_algorithm place_algorithm
Definition: vpr_types.h:637

s_grid_tile::blocks
int * blocks
Definition: vpr_types.h:525

print_criticality
void print_criticality(t_slack *slacks, boolean criticality_is_normalized, const char *fname)
Definition: path_delay.c:559

temp_net_cost
static float * temp_net_cost
Definition: place.c:107

s_direct_inf
Definition: physical_types.h:769

E_ECHO_PLACEMENT_CRITICAL_PATH
Definition: ReadOptions.h:108

my_irand
int my_irand(int imax)
Definition: util.c:710

USER_SCHED
Definition: vpr_types.h:458

load_constant_net_delay
void load_constant_net_delay(float **net_delay, float delay_value, struct s_net *nets, int n_nets)
Definition: net_delay.c:175

s_placer_opts
Definition: vpr_types.h:636

s_annealing_sched::type
enum sched_type type
Definition: vpr_types.h:625

num_types
int num_types
Definition: globals.c:37

s_annealing_sched
Definition: vpr_types.h:624

vpr_utils.h

IO_TYPE
t_type_ptr IO_TYPE
Definition: globals.c:40

exit_crit
static int exit_crit(float t, float cost, struct s_annealing_sched annealing_sched)
Definition: place.c:1023

alloc_lookups_and_criticalities
t_slack * alloc_lookups_and_criticalities(t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf *segment_inf, t_timing_inf timing_inf, float ***net_delay, INP t_direct_inf *directs, INP int num_directs)
Definition: timing_place.c:121

alloc_and_load_for_fast_cost_update
static void alloc_and_load_for_fast_cost_update(float place_cost_exp)
Definition: place.c:2866

s_type_descriptor::num_pins
int num_pins
Definition: physical_types.h:619

OPEN
Definition: slre.c:50

s_block::nets
int * nets
Definition: vpr_types.h:562

print_slack
void print_slack(float **slack, boolean slack_is_normalized, const char *fname)
Definition: path_delay.c:441

cross_count
static const float cross_count[50]
Definition: place.c:197

point_to_point_timing_cost
static float ** point_to_point_timing_cost
Definition: place.c:137

print_timing_graph
void print_timing_graph(const char *fname)
Definition: path_delay.c:1388

s_block::z
int z
Definition: vpr_types.h:565

num_pl_macros
static int num_pl_macros
Definition: place.c:182

s_placer_opts::inner_loop_recompute_divider
int inner_loop_recompute_divider
Definition: vpr_types.h:647

type_descriptors
struct s_type_descriptor * type_descriptors
Definition: globals.c:38

getEchoFileName
char * getEchoFileName(enum e_echo_files echo_option)
Definition: ReadOptions.c:122

E_ECHO_INITIAL_PLACEMENT_SLACK
Definition: ReadOptions.h:98

s_pl_blocks_to_be_moved::moved_blocks
t_pl_moved_block * moved_blocks
Definition: place.c:100

s_pl_moved_block::ynew
int ynew
Definition: place.c:84

cost_methods
cost_methods
Definition: place.c:53

E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH
Definition: ReadOptions.h:97

t_legal_pos
struct s_legal_pos t_legal_pos

ACCEPTED
Definition: place.c:61

bb_coords
static struct s_bb * bb_coords
Definition: place.c:155

pl_macros
static t_pl_macro * pl_macros
Definition: place.c:181

read_xml_arch_file.h

E_ECHO_FINAL_PLACEMENT_CRITICALITY
Definition: ReadOptions.h:104

setup_blocks_affected
static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to)
Definition: place.c:1110

s_placer_opts::place_chan_width
int place_chan_width
Definition: vpr_types.h:641

s_bb::xmin
int xmin
Definition: vpr_types.h:532

net_pin_index
static int ** net_pin_index
Definition: place.c:149

initial_placement_pl_macros
static void initial_placement_pl_macros(int macros_max_num_tries, int *free_locations)
Definition: place.c:2647

ny
int ny
Definition: globals.c:47

s_annealing_sched::init_t
float init_t
Definition: vpr_types.h:627

vpr_printf
messagelogger vpr_printf
Definition: util.c:17

get_non_updateable_bb
static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new)
Definition: place.c:2237

s_pl_moved_block::znew
int znew
Definition: place.c:86

update_t
static void update_t(float *t, float std_dev, float rlim, float success_rat, struct s_annealing_sched annealing_sched)
Definition: place.c:983

s_net::num_sinks
int num_sinks
Definition: vpr_types.h:506

MINOR
#define MINOR
Definition: vpr_types.h:72

load_timing_graph_net_delays
void load_timing_graph_net_delays(float **net_delay)
Definition: path_delay.c:368

e_place_algorithm
e_place_algorithm
Definition: vpr_types.h:632

alloc_and_load_placement_macros
int alloc_and_load_placement_macros(t_direct_inf *directs, int num_directs, t_pl_macro **macros)
Definition: place_macro.c:281

s_chan_width_dist
Definition: physical_types.h:680

path_delay.h

s_slack
Definition: vpr_types.h:402

s_segment_inf
Definition: physical_types.h:714

s_pl_moved_block::yold
int yold
Definition: place.c:83

swap_result
swap_result
Definition: place.c:60

get_net_wirelength_estimate
static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr)
Definition: place.c:2169

try_place
void try_place(struct s_placer_opts placer_opts, struct s_annealing_sched annealing_sched, t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf *segment_inf, t_timing_inf timing_inf, t_direct_inf *directs, int num_directs)
Definition: place.c:310

UPDATED_ONCE
#define UPDATED_ONCE
Definition: place.c:46

TRUE
Definition: util.h:12

s_legal_pos::x
int x
Definition: place.c:111

s_pl_macro_member::x_offset
int x_offset
Definition: place_macro.h:148

s_router_opts
Definition: vpr_types.h:695

s_bb::ymax
int ymax
Definition: vpr_types.h:535

read_place.h