VPR-7.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
place.c
Go to the documentation of this file.
1 /*#include <stdlib.h> */
2 #include <stdio.h>
3 #include <math.h>
4 #include <assert.h>
5 #include "util.h"
6 #include "vpr_types.h"
7 #include "globals.h"
8 #include "place.h"
9 #include "read_place.h"
10 #include "draw.h"
11 #include "place_and_route.h"
12 #include "net_delay.h"
13 #include "path_delay.h"
14 #include "timing_place_lookup.h"
15 #include "timing_place.h"
16 #include "place_stats.h"
17 #include "read_xml_arch_file.h"
18 #include "ReadOptions.h"
19 #include "vpr_utils.h"
20 #include "place_macro.h"
21 
22 /************** Types and defines local to place.c ***************************/
23 
24 /* Cut off for incremental bounding box updates. *
25  * 4 is fastest -- I checked. */
26 /* To turn off incremental bounding box updates, set this to a huge value */
27 #define SMALL_NET 4
28 
29 /* This defines the error tolerance for floating points variables used in *
30  * cost computation. 0.01 means that there is a 1% error tolerance. */
31 #define ERROR_TOL .01
32 
33 /* This defines the maximum number of swap attempts before invoking the *
34  * once-in-a-while placement legality check as well as floating point *
35  * variables round-offs check. */
36 #define MAX_MOVES_BEFORE_RECOMPUTE 50000
37 
38 /* The maximum number of tries when trying to place a carry chain at a *
39  * random location before trying exhaustive placement - find the fist *
40  * legal position and place it during initial placement. */
41 #define MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY 4
42 
43 /* Flags for the states of the bounding box. *
44  * Stored as char for memory efficiency. */
45 #define NOT_UPDATED_YET 'N'
46 #define UPDATED_ONCE 'U'
47 #define GOT_FROM_SCRATCH 'S'
48 
49 /* For comp_cost. NORMAL means use the method that generates updateable *
50  * bounding boxes for speed. CHECK means compute all bounding boxes from *
51  * scratch using a very simple routine to allow checks of the other *
52  * costs. */
55 };
56 
57 /* This is for the placement swap routines. A swap attempt could be *
58  * rejected, accepted or aborted (due to the limitations placed on the *
59  * carry chain support at this point). */
62 };
63 
64 #define MAX_INV_TIMING_COST 1.e9
65 /* Stops inverse timing cost from going to infinity with very lax timing constraints,
66 which avoids multiplying by a gigantic inverse_prev_timing_cost when auto-normalizing.
67 The exact value of this cost has relatively little impact, but should not be
68 large enough to be on the order of timing costs for normal constraints. */
69 
70 /********************** Data Sturcture Definition ***************************/
71 /* Stores the information of the move for a block that is *
72  * moved during placement *
73  * block_num: the index of the moved block *
74  * xold: the x_coord that the block is moved from *
75  * xnew: the x_coord that the block is moved to *
76  * yold: the y_coord that the block is moved from *
77  * xnew: the x_coord that the block is moved to *
78  */
79 typedef struct s_pl_moved_block {
80  int block_num;
81  int xold;
82  int xnew;
83  int yold;
84  int ynew;
85  int zold;
86  int znew;
89 
90 /* Stores the list of blocks to be moved in a swap during *
91  * placement. *
92  * num_moved_blocks: total number of blocks moved when *
93  * swapping two blocks. *
94  * moved blocks: a list of moved blocks data structure with *
95  * information on the move. *
96  * [0...num_moved_blocks-1] *
97  */
98 typedef struct s_pl_blocks_to_be_moved {
102 
103 
104 /********************** Variables local to place.c ***************************/
105 
106 /* Cost of a net, and a temporary cost of a net used during move assessment. */
107 static float *net_cost = NULL, *temp_net_cost = NULL; /* [0..num_nets-1] */
108 
109 /* legal positions for type */
110 typedef struct s_legal_pos {
111  int x;
112  int y;
113  int z;
114 }t_legal_pos;
115 
116 static t_legal_pos **legal_pos = NULL; /* [0..num_types-1][0..type_tsize - 1] */
117 static int *num_legal_pos = NULL; /* [0..num_legal_pos-1] */
118 
119 /* [0...num_nets-1] *
120  * A flag array to indicate whether the specific bounding box has been updated *
121  * in this particular swap or not. If it has been updated before, the code *
122  * must use the updated data, instead of the out-of-date data passed into the *
123  * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET *
124  * indicates that the net has not been updated before, UPDATED_ONCE indicated *
125  * that the net has been updated once, if it is going to be updated again, the *
126  * values from the previous update must be used. GOT_FROM_SCRATCH is only *
127  * applicable for nets larger than SMALL_NETS and it indicates that the *
128  * particular bounding box cannot be updated incrementally before, hence the *
129  * bounding box is got from scratch, so the bounding box would definitely be *
130  * right, DO NOT update again. *
131  * [0...num_nets-1] */
132 static char * bb_updated_before = NULL;
133 
134 /* [0..num_nets-1][1..num_pins-1]. What is the value of the timing */
135 /* driven portion of the cost function. These arrays will be set to */
136 /* (criticality * delay) for each point to point connection. */
137 static float **point_to_point_timing_cost = NULL;
138 static float **temp_point_to_point_timing_cost = NULL;
139 
140 /* [0..num_nets-1][1..num_pins-1]. What is the value of the delay */
141 /* for each connection in the circuit */
142 static float **point_to_point_delay_cost = NULL;
143 static float **temp_point_to_point_delay_cost = NULL;
144 
145 /* [0..num_blocks-1][0..pins_per_clb-1]. Indicates which pin on the net */
146 /* this block corresponds to, this is only required during timing-driven */
147 /* placement. It is used to allow us to update individual connections on */
148 /* each net */
149 static int **net_pin_index = NULL;
150 
151 /* [0..num_nets-1]. Store the bounding box coordinates and the number of *
152  * blocks on each of a net's bounding box (to allow efficient updates), *
153  * respectively. */
154 
155 static struct s_bb *bb_coords = NULL, *bb_num_on_edges = NULL;
156 
157 /* Store the information on the blocks to be moved in a swap during *
158  * placement, in the form of array of structs instead of struct with *
159  * arrays for cache effifiency *
160  */
162 
163 /* The arrays below are used to precompute the inverse of the average *
164  * number of tracks per channel between [subhigh] and [sublow]. Access *
165  * them as chan?_place_cost_fac[subhigh][sublow]. They are used to *
166  * speed up the computation of the cost function that takes the length *
167  * of the net bounding box in each dimension, divided by the average *
168  * number of tracks in that direction; for other cost functions they *
169  * will never be used. *
170  * [0...ny] [0...nx] */
172 
173 /* The following arrays are used by the try_swap function for speed. */
174 /* [0...num_nets-1] */
175 static struct s_bb *ts_bb_coord_new = NULL;
176 static struct s_bb *ts_bb_edge_new = NULL;
177 static int *ts_nets_to_update = NULL;
178 
179 /* The pl_macros array stores all the carry chains placement macros. *
180  * [0...num_pl_macros-1] */
181 static t_pl_macro * pl_macros = NULL;
182 static int num_pl_macros;
183 
184 /* These file-scoped variables keep track of the number of swaps *
185  * rejected, accepted or aborted. The total number of swap attempts *
186  * is the sum of the three number. */
187 static int num_swap_rejected = 0;
188 static int num_swap_accepted = 0;
189 static int num_swap_aborted = 0;
190 static int num_ts_called = 0;
191 
192 /* Expected crossing counts for nets with different #'s of pins. From *
193  * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me). *
194  * Multiplied to bounding box of a net to better estimate wire length *
195  * for higher fanout nets. Each entry is the correction factor for the *
196  * fanout index-1 */
197 static const float cross_count[50] = { /* [0..49] */1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974,
198  1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924,
199  1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016,
200  2.2334, 2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772,
201  2.5064, 2.5356, 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148,
202  2.7410, 2.7671, 2.7933 };
203 
204 /********************* Static subroutines local to place.c *******************/
205 #ifdef VERBOSE
206  static void print_clb_placement(const char *fname);
207 #endif
208 
210  float place_cost_exp, float ***old_region_occ_x,
211  float ***old_region_occ_y, struct s_placer_opts placer_opts,
212  t_direct_inf *directs, int num_directs);
213 
214 static void alloc_and_load_try_swap_structs();
215 
216 static void free_placement_structs(
217  float **old_region_occ_x, float **old_region_occ_y,
218  struct s_placer_opts placer_opts);
219 
220 static void alloc_and_load_for_fast_cost_update(float place_cost_exp);
221 
222 static void free_fast_cost_update(void);
223 
224 static void alloc_legal_placements();
225 static void load_legal_placements();
226 
227 static void free_legal_placements();
228 
229 static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z);
230 
231 static int try_place_macro(int itype, int ichoice, int imacro, int * free_locations);
232 
233 static void initial_placement_pl_macros(int macros_max_num_tries, int * free_locations);
234 
235 static void initial_placement_blocks(int * free_locations, enum e_pad_loc_type pad_loc_type);
236 
237 static void initial_placement(enum e_pad_loc_type pad_loc_type,
238  char *pad_loc_file);
239 
240 static float comp_bb_cost(enum cost_methods method);
241 
242 static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to);
243 
244 static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to);
245 
246 static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost,
247  float rlim, float **old_region_occ_x,
248  float **old_region_occ_y,
249  enum e_place_algorithm place_algorithm, float timing_tradeoff,
250  float inverse_prev_bb_cost, float inverse_prev_timing_cost,
251  float *delay_cost);
252 
253 static void check_place(float bb_cost, float timing_cost,
254  enum e_place_algorithm place_algorithm,
255  float delay_cost);
256 
257 static float starting_t(float *cost_ptr, float *bb_cost_ptr,
258  float *timing_cost_ptr, float **old_region_occ_x,
259  float **old_region_occ_y,
260  struct s_annealing_sched annealing_sched, int max_moves, float rlim,
261  enum e_place_algorithm place_algorithm, float timing_tradeoff,
262  float inverse_prev_bb_cost, float inverse_prev_timing_cost,
263  float *delay_cost_ptr);
264 
265 static void update_t(float *t, float std_dev, float rlim, float success_rat,
266  struct s_annealing_sched annealing_sched);
267 
268 static void update_rlim(float *rlim, float success_rat);
269 
270 static int exit_crit(float t, float cost,
271  struct s_annealing_sched annealing_sched);
272 
273 static int count_connections(void);
274 
275 static double get_std_dev(int n, double sum_x_squared, double av_x);
276 
277 static float recompute_bb_cost(void);
278 
279 static float comp_td_point_to_point_delay(int inet, int ipin);
280 
281 static void update_td_cost(void);
282 
283 static void comp_delta_td_cost(float *delta_timing, float *delta_delay);
284 
285 static void comp_td_costs(float *timing_cost, float *connection_delay_sum);
286 
287 static enum swap_result assess_swap(float delta_c, float t);
288 
289 static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to);
290 
291 static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new);
292 
293 static void update_bb(int inet, struct s_bb *bb_coord_new,
294  struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew);
295 
296 static int find_affected_nets(int *nets_to_update);
297 
298 static float get_net_cost(int inet, struct s_bb *bb_ptr);
299 
300 static void get_bb_from_scratch(int inet, struct s_bb *coords,
301  struct s_bb *num_on_edges);
302 
303 static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr);
304 
305 static void free_try_swap_arrays(void);
306 
307 
308 /*****************************************************************************/
309 /* RESEARCH TODO: Bounding Box and rlim need to be redone for heterogeneous to prevent a QoR penalty */
310 void try_place(struct s_placer_opts placer_opts,
311  struct s_annealing_sched annealing_sched,
312  t_chan_width_dist chan_width_dist, struct s_router_opts router_opts,
313  struct s_det_routing_arch det_routing_arch, t_segment_inf * segment_inf,
314  t_timing_inf timing_inf, t_direct_inf *directs, int num_directs) {
315 
316  /* Does almost all the work of placing a circuit. Width_fac gives the *
317  * width of the widest channel. Place_cost_exp says what exponent the *
318  * width should be taken to when calculating costs. This allows a *
319  * greater bias for anisotropic architectures. */
320 
321  int tot_iter, inner_iter, success_sum, move_lim, moves_since_cost_recompute, width_fac,
322  num_connections, inet, ipin, outer_crit_iter_count, inner_crit_iter_count,
323  inner_recompute_limit, swap_result;
324  float t, success_rat, rlim, cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost,
325  delay_cost, new_delay_cost, place_delay_value, inverse_prev_bb_cost, inverse_prev_timing_cost,
326  oldt, **old_region_occ_x, **old_region_occ_y, **net_delay = NULL, crit_exponent,
327  first_rlim, final_rlim, inverse_delta_rlim, critical_path_delay = UNDEFINED,
328  **remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */
329  double av_cost, av_bb_cost, av_timing_cost, av_delay_cost, sum_of_squares, std_dev;
330  int total_swap_attempts;
331  float reject_rate;
332  float accept_rate;
333  float abort_rate;
334  char msg[BUFSIZE];
335  t_slack * slacks = NULL;
336 
337  /* Allocated here because it goes into timing critical code where each memory allocation is expensive */
338 
339  remember_net_delay_original_ptr = NULL; /*prevents compiler warning */
340 
341  /* init file scope variables */
342  num_swap_rejected = 0;
343  num_swap_accepted = 0;
344  num_swap_aborted = 0;
345  num_ts_called = 0;
346 
347  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
348  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
349  || placer_opts.enable_timing_computations) {
350  /*do this before the initial placement to avoid messing up the initial placement */
351  slacks = alloc_lookups_and_criticalities(chan_width_dist, router_opts,
352  det_routing_arch, segment_inf, timing_inf, &net_delay, directs, num_directs);
353 
354  remember_net_delay_original_ptr = net_delay;
355 
356  /*#define PRINT_LOWER_BOUND */
357 #ifdef PRINT_LOWER_BOUND
358  /*print the crit_path, assuming delay between blocks that are*
359  *block_dist apart*/
360 
361  if (placer_opts.block_dist <= nx)
362  place_delay_value =
363  delta_clb_to_clb[placer_opts.block_dist][0];
364  else if (placer_opts.block_dist <= ny)
365  place_delay_value =
366  delta_clb_to_clb[0][placer_opts.block_dist];
367  else
368  place_delay_value = delta_clb_to_clb[nx][ny];
369 
370  vpr_printf(TIO_MESSAGE_INFO, "\n");
371  vpr_printf(TIO_MESSAGE_INFO, "Lower bound assuming delay of %g\n", place_delay_value);
372 
373  load_constant_net_delay(net_delay, place_delay_value);
374  load_timing_graph_net_delays(net_delay);
375  do_timing_analysis(slacks, FALSE, FALSE, TRUE);
376 
377  if (getEchoEnabled()) {
384  }
385 
386  /*also print sink delays assuming 0 delay between blocks,
387  * this tells us how much logic delay is on each path */
388 
389  load_constant_net_delay(net_delay, 0);
390  load_timing_graph_net_delays(net_delay);
391  do_timing_analysis(slacks, FALSE, FALSE, TRUE);
392 
393 #endif
394 
395  }
396 
397  width_fac = placer_opts.place_chan_width;
398 
399  init_chan(width_fac, chan_width_dist);
400 
402  placer_opts.place_cost_exp,
403  &old_region_occ_x, &old_region_occ_y, placer_opts,
404  directs, num_directs);
405 
406  initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file);
407  init_draw_coords((float) width_fac);
408 
409  /* Storing the number of pins on each type of block makes the swap routine *
410  * slightly more efficient. */
411 
412  /* Gets initial cost and loads bounding boxes. */
413 
414  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
415  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
416  bb_cost = comp_bb_cost(NORMAL);
417 
418  crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
419 
420  num_connections = count_connections();
421  vpr_printf(TIO_MESSAGE_INFO, "\n");
422  vpr_printf(TIO_MESSAGE_INFO, "There are %d point to point connections in this circuit.\n", num_connections);
423  vpr_printf(TIO_MESSAGE_INFO, "\n");
424 
425  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) {
426  for (inet = 0; inet < num_nets; inet++)
427  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
428  timing_place_crit[inet][ipin] = 0; /*dummy crit values */
429 
430  comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used
431  * in criticality computations in the next call
432  * to comp_td_costs. */
433  place_delay_value = delay_cost / num_connections; /*used for computing criticalities */
434  load_constant_net_delay(net_delay, place_delay_value, clb_net,
435  num_nets);
436 
437  } else
438  place_delay_value = 0;
439 
440  if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
441  net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with *
442  * *the same values that the placer is using *
443  * *point_to_point_delay_cost is computed each*
444  * *time that comp_td_costs is called, and is *
445  * *also updated after any swap is accepted */
446  }
447 
448  load_timing_graph_net_delays(net_delay);
449  do_timing_analysis(slacks, FALSE, FALSE, FALSE);
450  load_criticalities(slacks, crit_exponent);
451  if (getEchoEnabled()) {
458  }
459  outer_crit_iter_count = 1;
460 
461  /*now we can properly compute costs */
462  comp_td_costs(&timing_cost, &delay_cost); /*also vpr_printf proper values into point_to_point_delay_cost */
463 
464  inverse_prev_timing_cost = 1 / timing_cost;
465  inverse_prev_bb_cost = 1 / bb_cost;
466  cost = 1; /*our new cost function uses normalized values of */
467  /*bb_cost and timing_cost, the value of cost will be reset */
468  /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */
469  } else { /*BOUNDING_BOX_PLACE */
470  cost = bb_cost = comp_bb_cost(NORMAL);
471  timing_cost = 0;
472  delay_cost = 0;
473  place_delay_value = 0;
474  outer_crit_iter_count = 0;
475  num_connections = 0;
476  crit_exponent = 0;
477 
478  inverse_prev_timing_cost = 0; /*inverses not used */
479  inverse_prev_bb_cost = 0;
480  }
481 
482  move_lim = (int) (annealing_sched.inner_num * pow(num_blocks, 1.3333));
483 
484  if (placer_opts.inner_loop_recompute_divider != 0)
485  inner_recompute_limit = (int) (0.5
486  + (float) move_lim
487  / (float) placer_opts.inner_loop_recompute_divider);
488  else
489  /*don't do an inner recompute */
490  inner_recompute_limit = move_lim + 1;
491 
492  /* Sometimes I want to run the router with a random placement. Avoid *
493  * using 0 moves to stop division by 0 and 0 length vector problems, *
494  * by setting move_lim to 1 (which is still too small to do any *
495  * significant optimization). */
496 
497  if (move_lim <= 0)
498  move_lim = 1;
499 
500  rlim = (float) std::max(nx + 1, ny + 1);
501 
502  first_rlim = rlim; /*used in timing-driven placement for exponent computation */
503  final_rlim = 1;
504  inverse_delta_rlim = 1 / (first_rlim - final_rlim);
505 
506  t = starting_t(&cost, &bb_cost, &timing_cost,
507  old_region_occ_x, old_region_occ_y,
508  annealing_sched, move_lim, rlim,
509  placer_opts.place_algorithm, placer_opts.timing_tradeoff,
510  inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);
511  tot_iter = 0;
512  moves_since_cost_recompute = 0;
513  vpr_printf(TIO_MESSAGE_INFO, "Initial placement cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n",
514  cost, bb_cost, timing_cost, delay_cost);
515  vpr_printf(TIO_MESSAGE_INFO, "\n");
516 
517 #ifndef SPEC
518  vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",
519  "---------", "---------", "-----------", "-----------", "-----------", "-----------",
520  "--------", "--------", "-------", "-------", "-------", "---------", "-------");
521  vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",
522  "T", "Cost", "Av BB Cost", "Av TD Cost", "Av Tot Del",
523  "P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp",
524  "Tot Moves", "Alpha");
525  vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",
526  "---------", "---------", "-----------", "-----------", "-----------", "-----------",
527  "--------", "--------", "-------", "-------", "-------", "---------", "-------");
528 #endif
529 
530  sprintf(msg, "Initial Placement. Cost: %g BB Cost: %g TD Cost %g Delay Cost: %g \t Channel Factor: %d",
531  cost, bb_cost, timing_cost, delay_cost, width_fac);
533 
534  while (exit_crit(t, cost, annealing_sched) == 0) {
535 
536  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
537  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
538  cost = 1;
539  }
540 
541  av_cost = 0.;
542  av_bb_cost = 0.;
543  av_delay_cost = 0.;
544  av_timing_cost = 0.;
545  sum_of_squares = 0.;
546  success_sum = 0;
547 
548  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
549  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
550 
551  if (outer_crit_iter_count >= placer_opts.recompute_crit_iter
552  || placer_opts.inner_loop_recompute_divider != 0) {
553 #ifdef VERBOSE
554  vpr_printf(TIO_MESSAGE_INFO, "Outer loop recompute criticalities\n");
555 #endif
556  place_delay_value = delay_cost / num_connections;
557 
558  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
559  load_constant_net_delay(net_delay, place_delay_value,
560  clb_net, num_nets);
561  /*note, for path_based, the net delay is not updated since it is current,
562  *because it accesses point_to_point_delay array */
563 
564  load_timing_graph_net_delays(net_delay);
565  do_timing_analysis(slacks, FALSE, FALSE, FALSE);
566  load_criticalities(slacks, crit_exponent);
567  /*recompute costs from scratch, based on new criticalities */
568  comp_td_costs(&timing_cost, &delay_cost);
569  outer_crit_iter_count = 0;
570  }
571  outer_crit_iter_count++;
572 
573  /*at each temperature change we update these values to be used */
574  /*for normalizing the tradeoff between timing and wirelength (bb) */
575  inverse_prev_bb_cost = 1 / bb_cost;
576  /*Prevent inverse timing cost from going to infinity */
577  inverse_prev_timing_cost = std::min(1 / timing_cost, (float)MAX_INV_TIMING_COST);
578  }
579 
580  inner_crit_iter_count = 1;
581 
582  for (inner_iter = 0; inner_iter < move_lim; inner_iter++) {
583  swap_result = try_swap(t, &cost, &bb_cost, &timing_cost, rlim,
584  old_region_occ_x,
585  old_region_occ_y,
586  placer_opts.place_algorithm, placer_opts.timing_tradeoff,
587  inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);
588  if (swap_result == ACCEPTED) {
589 
590  /* Move was accepted. Update statistics that are useful for the annealing schedule. */
591  success_sum++;
592  av_cost += cost;
593  av_bb_cost += bb_cost;
594  av_timing_cost += timing_cost;
595  av_delay_cost += delay_cost;
596  sum_of_squares += cost * cost;
598  } else if (swap_result == ABORTED) {
600  } else { // swap_result == REJECTED
602  }
603 
604 
605  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
606  || placer_opts.place_algorithm
608 
609  /* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
610  * We do this only once in a while, since it is expensive.
611  */
612  if (inner_crit_iter_count >= inner_recompute_limit
613  && inner_iter != move_lim - 1) { /*on last iteration don't recompute */
614 
615  inner_crit_iter_count = 0;
616 #ifdef VERBOSE
617  vpr_printf(TIO_MESSAGE_TRACE, "Inner loop recompute criticalities\n");
618 #endif
619  if (placer_opts.place_algorithm
621  /* Use a constant delay per connection as the delay estimate, rather than
622  * estimating based on the current placement. Not a great idea, but not the
623  * default.
624  */
625  place_delay_value = delay_cost / num_connections;
626  load_constant_net_delay(net_delay, place_delay_value,
627  clb_net, num_nets);
628  }
629 
630  /* Using the delays in net_delay, do a timing analysis to update slacks and
631  * criticalities; then update the timing cost since it will change.
632  */
633  load_timing_graph_net_delays(net_delay);
634  do_timing_analysis(slacks, FALSE, FALSE, FALSE);
635  load_criticalities(slacks, crit_exponent);
636  comp_td_costs(&timing_cost, &delay_cost);
637  }
638  inner_crit_iter_count++;
639  }
640 #ifdef VERBOSE
641  vpr_printf(TIO_MESSAGE_TRACE, "t = %g cost = %g bb_cost = %g timing_cost = %g move = %d dmax = %g\n",
642  t, cost, bb_cost, timing_cost, inner_iter, delay_cost);
643  if (fabs(bb_cost - comp_bb_cost(CHECK)) > bb_cost * ERROR_TOL)
644  exit(1);
645 #endif
646  }
647 
648  /* Lines below prevent too much round-off error from accumulating *
649  * in the cost over many iterations. This round-off can lead to *
650  * error checks failing because the cost is different from what *
651  * you get when you recompute from scratch. */
652 
653  moves_since_cost_recompute += move_lim;
654  if (moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) {
655  new_bb_cost = recompute_bb_cost();
656  if (fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL) {
657  vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_bb_cost = %g, old bb_cost = %g\n",
658  new_bb_cost, bb_cost);
659  exit(1);
660  }
661  bb_cost = new_bb_cost;
662 
663  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
664  || placer_opts.place_algorithm
666  comp_td_costs(&new_timing_cost, &new_delay_cost);
667  if (fabs(new_timing_cost - timing_cost) > timing_cost * ERROR_TOL) {
668  vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_timing_cost = %g, old timing_cost = %g\n",
669  new_timing_cost, timing_cost);
670  exit(1);
671  }
672  if (fabs(new_delay_cost - delay_cost) > delay_cost * ERROR_TOL) {
673  vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_delay_cost = %g, old delay_cost = %g\n",
674  new_delay_cost, delay_cost);
675  exit(1);
676  }
677  timing_cost = new_timing_cost;
678  }
679 
680  if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
681  cost = new_bb_cost;
682  }
683  moves_since_cost_recompute = 0;
684  }
685 
686  tot_iter += move_lim;
687  success_rat = ((float) success_sum) / move_lim;
688  if (success_sum == 0) {
689  av_cost = cost;
690  av_bb_cost = bb_cost;
691  av_timing_cost = timing_cost;
692  av_delay_cost = delay_cost;
693  } else {
694  av_cost /= success_sum;
695  av_bb_cost /= success_sum;
696  av_timing_cost /= success_sum;
697  av_delay_cost /= success_sum;
698  }
699  std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
700 
701  oldt = t; /* for finding and printing alpha. */
702  update_t(&t, std_dev, rlim, success_rat, annealing_sched);
703 
704 #ifndef SPEC
705  critical_path_delay = get_critical_path_delay();
706  vpr_printf(TIO_MESSAGE_INFO, "%9.5f %9.5g %11.6g %11.6g %11.6g %11.6g %8.4f %8.4f %7.4f %7.4f %7.4f %9d %7.4f\n",
707  oldt, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value,
708  critical_path_delay, success_rat, std_dev, rlim, crit_exponent, tot_iter, t / oldt);
709 #endif
710 
711  sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g",
712  cost, bb_cost, timing_cost, t);
714  update_rlim(&rlim, success_rat);
715 
716  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
717  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
718  crit_exponent = (1 - (rlim - final_rlim) * inverse_delta_rlim)
719  * (placer_opts.td_place_exp_last
720  - placer_opts.td_place_exp_first)
721  + placer_opts.td_place_exp_first;
722  }
723 #ifdef VERBOSE
724  if (getEchoEnabled()) {
725  print_clb_placement("first_iteration_clb_placement.echo");
726  }
727 #endif
728  }
729 
730  t = 0; /* freeze out */
731  av_cost = 0.;
732  av_bb_cost = 0.;
733  av_timing_cost = 0.;
734  sum_of_squares = 0.;
735  av_delay_cost = 0.;
736  success_sum = 0;
737 
738  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
739  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
740  /*at each temperature change we update these values to be used */
741  /*for normalizing the tradeoff between timing and wirelength (bb) */
742  if (outer_crit_iter_count >= placer_opts.recompute_crit_iter
743  || placer_opts.inner_loop_recompute_divider != 0) {
744 
745 #ifdef VERBOSE
746  vpr_printf(TIO_MESSAGE_INFO, "Outer loop recompute criticalities\n");
747 #endif
748  place_delay_value = delay_cost / num_connections;
749 
750  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
751  load_constant_net_delay(net_delay, place_delay_value, clb_net,
752  num_nets);
753 
754  load_timing_graph_net_delays(net_delay);
755  do_timing_analysis(slacks, FALSE, FALSE, FALSE);
756  load_criticalities(slacks, crit_exponent);
757  /*recompute criticaliies */
758  comp_td_costs(&timing_cost, &delay_cost);
759  outer_crit_iter_count = 0;
760  }
761  outer_crit_iter_count++;
762 
763  inverse_prev_bb_cost = 1 / (bb_cost);
764  /*Prevent inverse timing cost from going to infinity */
765  inverse_prev_timing_cost = std::min(1 / timing_cost, (float)MAX_INV_TIMING_COST);
766  }
767 
768  inner_crit_iter_count = 1;
769 
770  for (inner_iter = 0; inner_iter < move_lim; inner_iter++) {
771  swap_result = try_swap(t, &cost, &bb_cost, &timing_cost, rlim,
772  old_region_occ_x, old_region_occ_y,
773  placer_opts.place_algorithm, placer_opts.timing_tradeoff,
774  inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);
775 
776  if (swap_result == ACCEPTED) {
777  success_sum++;
778  av_cost += cost;
779  av_bb_cost += bb_cost;
780  av_delay_cost += delay_cost;
781  av_timing_cost += timing_cost;
782  sum_of_squares += cost * cost;
783 
784  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
785  || placer_opts.place_algorithm
787 
788  if (inner_crit_iter_count >= inner_recompute_limit
789  && inner_iter != move_lim - 1) {
790 
791  inner_crit_iter_count = 0;
792 #ifdef VERBOSE
793  vpr_printf(TIO_MESSAGE_TRACE, "Inner loop recompute criticalities\n");
794 #endif
795  if (placer_opts.place_algorithm
797  place_delay_value = delay_cost / num_connections;
798  load_constant_net_delay(net_delay, place_delay_value,
799  clb_net, num_nets);
800  }
801 
802  load_timing_graph_net_delays(net_delay);
803  do_timing_analysis(slacks, FALSE, FALSE, FALSE);
804  load_criticalities(slacks, crit_exponent);
805  comp_td_costs(&timing_cost, &delay_cost);
806  }
807  inner_crit_iter_count++;
808  }
810  } else if (swap_result == ABORTED) {
812  } else {
814  }
815 
816 #ifdef VERBOSE
817  vpr_printf(TIO_MESSAGE_INFO, "t = %g, cost = %g, move = %d\n", t, cost, tot_iter);
818 #endif
819  }
820  tot_iter += move_lim;
821  success_rat = ((float) success_sum) / move_lim;
822  if (success_sum == 0) {
823  av_cost = cost;
824  av_bb_cost = bb_cost;
825  av_delay_cost = delay_cost;
826  av_timing_cost = timing_cost;
827  } else {
828  av_cost /= success_sum;
829  av_bb_cost /= success_sum;
830  av_delay_cost /= success_sum;
831  av_timing_cost /= success_sum;
832  }
833 
834  std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
835 
836 #ifndef SPEC
837  vpr_printf(TIO_MESSAGE_INFO, "%9.5f %9.5g %11.6g %11.6g %11.6g %11.6g %8s %8.4f %7.4f %7.4f %7.4f %9d\n",
838  t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value,
839  " ", success_rat, std_dev, rlim, crit_exponent, tot_iter);
840 #endif
841 
842  // TODO:
843  // 1. print a message about number of aborted moves.
844  // 2. add some subroutine hierarchy! Too big!
845  // 3. put statistics counters (av_cost, success_sum, etc.) in a struct so a
846  // pointer to it can be passed around.
847 
848 #ifdef VERBOSE
850  print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT));
851  }
852 #endif
853 
854  check_place(bb_cost, timing_cost,
855  placer_opts.place_algorithm, delay_cost);
856 
857  if (placer_opts.enable_timing_computations
858  && placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
859  /*need this done since the timing data has not been kept up to date*
860  *in bounding_box mode */
861  for (inet = 0; inet < num_nets; inet++)
862  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
863  timing_place_crit[inet][ipin] = 0; /*dummy crit values */
864  comp_td_costs(&timing_cost, &delay_cost); /*computes point_to_point_delay_cost */
865  }
866 
867  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
868  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
869  || placer_opts.enable_timing_computations) {
870  net_delay = point_to_point_delay_cost; /*this makes net_delay up to date with *
871  *the same values that the placer is using*/
872  load_timing_graph_net_delays(net_delay);
873 
874  do_timing_analysis(slacks, FALSE, FALSE, FALSE);
875 
876  if (getEchoEnabled()) {
887  }
888 
889  /* Print critical path delay. */
890  critical_path_delay = get_critical_path_delay();
891  vpr_printf(TIO_MESSAGE_INFO, "\n");
892  vpr_printf(TIO_MESSAGE_INFO, "Placement estimated critical path delay: %g ns\n", critical_path_delay);
893  }
894 
895  sprintf(msg, "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d",
896  cost, bb_cost, timing_cost, width_fac);
897  vpr_printf(TIO_MESSAGE_INFO, "Placement cost: %g, bb_cost: %g, td_cost: %g, delay_cost: %g\n",
898  cost, bb_cost, timing_cost, delay_cost);
900 
901  // Print out swap statistics
902  total_swap_attempts = num_swap_rejected + num_swap_accepted + num_swap_aborted;
903  reject_rate = num_swap_rejected / total_swap_attempts;
904  accept_rate = num_swap_accepted / total_swap_attempts;
905  abort_rate = num_swap_aborted / total_swap_attempts;
906  vpr_printf(TIO_MESSAGE_INFO, "Placement total # of swap attempts: %d\n", total_swap_attempts);
907  vpr_printf(TIO_MESSAGE_INFO, "\tSwap reject rate: %g\n", reject_rate);
908  vpr_printf(TIO_MESSAGE_INFO, "\tSwap accept rate: %g\n", accept_rate);
909  vpr_printf(TIO_MESSAGE_INFO, "\tSwap abort rate: %g\n", abort_rate);
910 
911 
912 #ifdef SPEC
913  vpr_printf(TIO_MESSAGE_INFO, "Total moves attempted: %d.0\n", tot_iter);
914 #endif
915 
917  old_region_occ_x, old_region_occ_y,
918  placer_opts);
919  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
920  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
921  || placer_opts.enable_timing_computations) {
922 
923  net_delay = remember_net_delay_original_ptr;
924  free_lookups_and_criticalities(&net_delay, slacks);
925  }
926 
928 }
929 
930 static int count_connections() {
931  /*only count non-global connections */
932 
933  int count, inet;
934 
935  count = 0;
936 
937  for (inet = 0; inet < num_nets; inet++) {
938 
939  if (clb_net[inet].is_global)
940  continue;
941 
942  count += clb_net[inet].num_sinks;
943  }
944  return (count);
945 }
946 
947 static double get_std_dev(int n, double sum_x_squared, double av_x) {
948 
949  /* Returns the standard deviation of data set x. There are n sample points, *
950  * sum_x_squared is the summation over n of x^2 and av_x is the average x. *
951  * All operations are done in double precision, since round off error can be *
952  * a problem in the initial temp. std_dev calculation for big circuits. */
953 
954  double std_dev;
955 
956  if (n <= 1)
957  std_dev = 0.;
958  else
959  std_dev = (sum_x_squared - n * av_x * av_x) / (double) (n - 1);
960 
961  if (std_dev > 0.) /* Very small variances sometimes round negative */
962  std_dev = sqrt(std_dev);
963  else
964  std_dev = 0.;
965 
966  return (std_dev);
967 }
968 
969 static void update_rlim(float *rlim, float success_rat) {
970 
971  /* Update the range limited to keep acceptance prob. near 0.44. Use *
972  * a floating point rlim to allow gradual transitions at low temps. */
973 
974  float upper_lim;
975 
976  *rlim = (*rlim) * (1. - 0.44 + success_rat);
977  upper_lim = std::max(nx + 1, ny + 1);
978  *rlim = std::min(*rlim, upper_lim);
979  *rlim = std::max(*rlim, (float)1.);
980 }
981 
982 /* Update the temperature according to the annealing schedule selected. */
983 static void update_t(float *t, float std_dev, float rlim, float success_rat,
984  struct s_annealing_sched annealing_sched) {
985 
986  /* float fac; */
987 
988  if (annealing_sched.type == USER_SCHED) {
989  *t = annealing_sched.alpha_t * (*t);
990  }
991 
992  /* Old standard deviation based stuff is below. This bogs down horribly
993  * for big circuits (alu4 and especially bigkey_mod). */
994  /* #define LAMBDA .7 */
995  /* ------------------------------------ */
996 #if 0
997  else if (std_dev == 0.)
998  {
999  *t = 0.;
1000  }
1001  else
1002  {
1003  fac = exp(-LAMBDA * (*t) / std_dev);
1004  fac = max(0.5, fac);
1005  *t = (*t) * fac;
1006  }
1007 #endif
1008  /* ------------------------------------- */
1009 
1010  else { /* AUTO_SCHED */
1011  if (success_rat > 0.96) {
1012  *t = (*t) * 0.5;
1013  } else if (success_rat > 0.8) {
1014  *t = (*t) * 0.9;
1015  } else if (success_rat > 0.15 || rlim > 1.) {
1016  *t = (*t) * 0.95;
1017  } else {
1018  *t = (*t) * 0.8;
1019  }
1020  }
1021 }
1022 
1023 static int exit_crit(float t, float cost,
1024  struct s_annealing_sched annealing_sched) {
1025 
1026  /* Return 1 when the exit criterion is met. */
1027 
1028  if (annealing_sched.type == USER_SCHED) {
1029  if (t < annealing_sched.exit_t) {
1030  return (1);
1031  } else {
1032  return (0);
1033  }
1034  }
1035 
1036  /* Automatic annealing schedule */
1037 
1038  if (t < 0.005 * cost / num_nets) {
1039  return (1);
1040  } else {
1041  return (0);
1042  }
1043 }
1044 
1045 static float starting_t(float *cost_ptr, float *bb_cost_ptr,
1046  float *timing_cost_ptr, float **old_region_occ_x,
1047  float **old_region_occ_y,
1048  struct s_annealing_sched annealing_sched, int max_moves, float rlim,
1049  enum e_place_algorithm place_algorithm, float timing_tradeoff,
1050  float inverse_prev_bb_cost, float inverse_prev_timing_cost,
1051  float *delay_cost_ptr) {
1052 
1053  /* Finds the starting temperature (hot condition). */
1054 
1055  int i, num_accepted, move_lim, swap_result;
1056  double std_dev, av, sum_of_squares; /* Double important to avoid round off */
1057 
1058  if (annealing_sched.type == USER_SCHED)
1059  return (annealing_sched.init_t);
1060 
1061  move_lim = std::min(max_moves, num_blocks);
1062 
1063  num_accepted = 0;
1064  av = 0.;
1065  sum_of_squares = 0.;
1066 
1067  /* Try one move per block. Set t high so essentially all accepted. */
1068 
1069  for (i = 0; i < move_lim; i++) {
1070  swap_result = try_swap(HUGE_POSITIVE_FLOAT, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim,
1071  old_region_occ_x, old_region_occ_y,
1072  place_algorithm, timing_tradeoff,
1073  inverse_prev_bb_cost, inverse_prev_timing_cost, delay_cost_ptr);
1074 
1075  if (swap_result == ACCEPTED) {
1076  num_accepted++;
1077  av += *cost_ptr;
1078  sum_of_squares += *cost_ptr * (*cost_ptr);
1080  } else if (swap_result == ABORTED) {
1081  num_swap_aborted++;
1082  } else {
1084  }
1085  }
1086 
1087  if (num_accepted != 0)
1088  av /= num_accepted;
1089  else
1090  av = 0.;
1091 
1092  std_dev = get_std_dev(num_accepted, sum_of_squares, av);
1093 
1094 #ifdef DEBUG
1095  if (num_accepted != move_lim) {
1096  vpr_printf(TIO_MESSAGE_WARNING, "Starting t: %d of %d configurations accepted.\n", num_accepted, move_lim);
1097  }
1098 #endif
1099 
1100 #ifdef VERBOSE
1101  vpr_printf(TIO_MESSAGE_INFO, "std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev);
1102 #endif
1103 
1104  /* Set the initial temperature to 20 times the standard of deviation */
1105  /* so that the initial temperature adjusts according to the circuit */
1106  return (20. * std_dev);
1107 }
1108 
1109 
1110 static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to) {
1111 
1112  /* Find all the blocks affected when b_from is swapped with b_to.
1113  * Returns abort_swap. */
1114 
1115  int imoved_blk, imacro;
1116  int x_from, y_from, z_from, b_to;
1117  int abort_swap = FALSE;
1118 
1119  x_from = block[b_from].x;
1120  y_from = block[b_from].y;
1121  z_from = block[b_from].z;
1122 
1123  b_to = grid[x_to][y_to].blocks[z_to];
1124 
1125  // Check whether the to_location is empty
1126  if (b_to == EMPTY) {
1127 
1128  // Swap the block, dont swap the nets yet
1129  block[b_from].x = x_to;
1130  block[b_from].y = y_to;
1131  block[b_from].z = z_to;
1132 
1133  // Sets up the blocks moved
1134  imoved_blk = blocks_affected.num_moved_blocks;
1135  blocks_affected.moved_blocks[imoved_blk].block_num = b_from;
1136  blocks_affected.moved_blocks[imoved_blk].xold = x_from;
1137  blocks_affected.moved_blocks[imoved_blk].xnew = x_to;
1138  blocks_affected.moved_blocks[imoved_blk].yold = y_from;
1139  blocks_affected.moved_blocks[imoved_blk].ynew = y_to;
1140  blocks_affected.moved_blocks[imoved_blk].zold = z_from;
1141  blocks_affected.moved_blocks[imoved_blk].znew = z_to;
1142  blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = TRUE;
1143  blocks_affected.num_moved_blocks ++;
1144 
1145  } else {
1146 
1147  // Does not allow a swap with a macro yet
1148  get_imacro_from_iblk(&imacro, b_to, pl_macros, num_pl_macros);
1149  if (imacro != -1) {
1150  abort_swap = TRUE;
1151  return (abort_swap);
1152  }
1153 
1154  // Swap the block, dont swap the nets yet
1155  block[b_to].x = x_from;
1156  block[b_to].y = y_from;
1157  block[b_to].z = z_from;
1158 
1159  block[b_from].x = x_to;
1160  block[b_from].y = y_to;
1161  block[b_from].z = z_to;
1162 
1163  // Sets up the blocks moved
1164  imoved_blk = blocks_affected.num_moved_blocks;
1165  blocks_affected.moved_blocks[imoved_blk].block_num = b_from;
1166  blocks_affected.moved_blocks[imoved_blk].xold = x_from;
1167  blocks_affected.moved_blocks[imoved_blk].xnew = x_to;
1168  blocks_affected.moved_blocks[imoved_blk].yold = y_from;
1169  blocks_affected.moved_blocks[imoved_blk].ynew = y_to;
1170  blocks_affected.moved_blocks[imoved_blk].zold = z_from;
1171  blocks_affected.moved_blocks[imoved_blk].znew = z_to;
1172  blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = FALSE;
1173  blocks_affected.num_moved_blocks ++;
1174 
1175  imoved_blk = blocks_affected.num_moved_blocks;
1176  blocks_affected.moved_blocks[imoved_blk].block_num = b_to;
1177  blocks_affected.moved_blocks[imoved_blk].xold = x_to;
1178  blocks_affected.moved_blocks[imoved_blk].xnew = x_from;
1179  blocks_affected.moved_blocks[imoved_blk].yold = y_to;
1180  blocks_affected.moved_blocks[imoved_blk].ynew = y_from;
1181  blocks_affected.moved_blocks[imoved_blk].zold = z_to;
1182  blocks_affected.moved_blocks[imoved_blk].znew = z_from;
1183  blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = FALSE;
1184  blocks_affected.num_moved_blocks ++;
1185 
1186  } // Finish swapping the blocks and setting up blocks_affected
1187 
1188  return (abort_swap);
1189 
1190 }
1191 
1192 static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to) {
1193 
1194  /* Finds and set ups the affected_blocks array.
1195  * Returns abort_swap. */
1196 
1197  int imacro, imember;
1198  int x_swap_offset, y_swap_offset, z_swap_offset, x_from, y_from, z_from, b_to;
1199  int curr_b_from, curr_x_from, curr_y_from, curr_z_from, curr_b_to, curr_x_to, curr_y_to, curr_z_to;
1200  int abort_swap = FALSE;
1201 
1202  x_from = block[b_from].x;
1203  y_from = block[b_from].y;
1204  z_from = block[b_from].z;
1205 
1206  b_to = grid[x_to][y_to].blocks[z_to];
1207 
1208  get_imacro_from_iblk(&imacro, b_from, pl_macros, num_pl_macros);
1209  if ( imacro != -1) {
1210  // b_from is part of a macro, I need to swap the whole macro
1211 
1212  // Record down the relative position of the swap
1213  x_swap_offset = x_to - x_from;
1214  y_swap_offset = y_to - y_from;
1215  z_swap_offset = z_to - z_from;
1216 
1217  for (imember = 0; imember < pl_macros[imacro].num_blocks && abort_swap == FALSE; imember++) {
1218 
1219  // Gets the new from and to info for every block in the macro
1220  // cannot use the old from and to info
1221  curr_b_from = pl_macros[imacro].members[imember].blk_index;
1222 
1223  curr_x_from = block[curr_b_from].x;
1224  curr_y_from = block[curr_b_from].y;
1225  curr_z_from = block[curr_b_from].z;
1226 
1227  curr_x_to = curr_x_from + x_swap_offset;
1228  curr_y_to = curr_y_from + y_swap_offset;
1229  curr_z_to = curr_z_from + z_swap_offset;
1230 
1231  // Make sure that the swap_to location is still on the chip
1232  if (curr_x_to < 1 || curr_x_to > nx || curr_y_to < 1 || curr_y_to > ny || curr_z_to < 0) {
1233  abort_swap = TRUE;
1234  } else {
1235  curr_b_to = grid[curr_x_to][curr_y_to].blocks[curr_z_to];
1236  abort_swap = setup_blocks_affected(curr_b_from, curr_x_to, curr_y_to, curr_z_to);
1237  }
1238 
1239  } // Finish going through all the blocks in the macro
1240 
1241  } else {
1242 
1243  // This is not a macro - I could use the from and to info from before
1244  abort_swap = setup_blocks_affected(b_from, x_to, y_to, z_to);
1245 
1246  } // Finish handling cases for blocks in macro and otherwise
1247 
1248  return (abort_swap);
1249 
1250 }
1251 
1252 static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost,
1253  float rlim, float **old_region_occ_x,
1254  float **old_region_occ_y,
1255  enum e_place_algorithm place_algorithm, float timing_tradeoff,
1256  float inverse_prev_bb_cost, float inverse_prev_timing_cost,
1257  float *delay_cost) {
1258 
1259  /* Picks some block and moves it to another spot. If this spot is *
1260  * occupied, switch the blocks. Assess the change in cost function *
1261  * and accept or reject the move. If rejected, return 0. If *
1262  * accepted return 1. Pass back the new value of the cost function. *
1263  * rlim is the range limiter. */
1264 
1265  enum swap_result keep_switch;
1266  int b_from, x_from, y_from, z_from, x_to, y_to, z_to;
1267  int num_nets_affected;
1268  float delta_c, bb_delta_c, timing_delta_c, delay_delta_c;
1269  int inet, iblk, bnum, iblk_pin, inet_affected;
1270  int abort_swap = FALSE;
1271 
1272  num_ts_called ++;
1273 
1274  /* I'm using negative values of temp_net_cost as a flag, so DO NOT *
1275  * use cost functions that can go negative. */
1276 
1277  delta_c = 0; /* Change in cost due to this swap. */
1278  bb_delta_c = 0;
1279  timing_delta_c = 0;
1280  delay_delta_c = 0.0;
1281 
1282  /* Pick a random block to be swapped with another random block */
1283  b_from = my_irand(num_blocks - 1);
1284 
1285  /* If the pins are fixed we never move them from their initial *
1286  * random locations. The code below could be made more efficient *
1287  * by using the fact that pins appear first in the block list, *
1288  * but this shouldn't cause any significant slowdown and won't be *
1289  * broken if I ever change the parser so that the pins aren't *
1290  * necessarily at the start of the block list. */
1291  while (block[b_from].isFixed == TRUE) {
1292  b_from = my_irand(num_blocks - 1);
1293  }
1294 
1295  x_from = block[b_from].x;
1296  y_from = block[b_from].y;
1297  z_from = block[b_from].z;
1298 
1299  if (!find_to(x_from, y_from, block[b_from].type, rlim, &x_to,
1300  &y_to))
1301  return REJECTED;
1302 
1303  z_to = 0;
1304  if (grid[x_to][y_to].type->capacity > 1) {
1305  z_to = my_irand(grid[x_to][y_to].type->capacity - 1);
1306  }
1307 
1308  /* Make the switch in order to make computing the new bounding *
1309  * box simpler. If the cost increase is too high, switch them *
1310  * back. (block data structures switched, clbs not switched *
1311  * until success of move is determined.) *
1312  * Also check that whether those are the only 2 blocks *
1313  * to be moved - check for carry chains and other placement *
1314  * macros. */
1315 
1316  /* Check whether the from_block is part of a macro first. *
1317  * If it is, the whole macro has to be moved. Calculate the *
1318  * x, y, z offsets of the swap to maintain relative placements *
1319  * of the blocks. Abort the swap if the to_block is part of a *
1320  * macro (not supported yet). */
1321 
1322  abort_swap = find_affected_blocks(b_from, x_to, y_to, z_to);
1323 
1324  if (abort_swap == FALSE) {
1325 
1326  // Find all the nets affected by this swap
1327  num_nets_affected = find_affected_nets(ts_nets_to_update);
1328 
1329  /* Go through all the pins in all the blocks moved and update the bounding boxes. *
1330  * Do not update the net cost here since it should only be updated once per net, *
1331  * not once per pin */
1332  for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)
1333  {
1334  bnum = blocks_affected.moved_blocks[iblk].block_num;
1335 
1336  /* Go through all the pins in the moved block */
1337  for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++)
1338  {
1339  inet = block[bnum].nets[iblk_pin];
1340  if (inet == OPEN)
1341  continue;
1342  if (clb_net[inet].is_global)
1343  continue;
1344 
1345  if (clb_net[inet].num_sinks < SMALL_NET) {
1346  if(bb_updated_before[inet] == NOT_UPDATED_YET)
1347  /* Brute force bounding box recomputation, once only for speed. */
1348  get_non_updateable_bb(inet, &ts_bb_coord_new[inet]);
1349  } else {
1350  update_bb(inet, &ts_bb_coord_new[inet],
1351  &ts_bb_edge_new[inet],
1352  blocks_affected.moved_blocks[iblk].xold,
1353  blocks_affected.moved_blocks[iblk].yold + block[bnum].type->pin_height[iblk_pin],
1354  blocks_affected.moved_blocks[iblk].xnew,
1355  blocks_affected.moved_blocks[iblk].ynew + block[bnum].type->pin_height[iblk_pin]);
1356  }
1357  }
1358  }
1359 
1360  /* Now update the cost function. The cost is only updated once for every net *
1361  * May have to do major optimizations here later. */
1362  for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {
1363  inet = ts_nets_to_update[inet_affected];
1364 
1365  temp_net_cost[inet] = get_net_cost(inet, &ts_bb_coord_new[inet]);
1366  bb_delta_c += temp_net_cost[inet] - net_cost[inet];
1367  }
1368 
1369  if (place_algorithm == NET_TIMING_DRIVEN_PLACE
1370  || place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
1371  /*in this case we redefine delta_c as a combination of timing and bb. *
1372  *additionally, we normalize all values, therefore delta_c is in *
1373  *relation to 1*/
1374 
1375  comp_delta_td_cost(&timing_delta_c, &delay_delta_c);
1376 
1377  delta_c = (1 - timing_tradeoff) * bb_delta_c * inverse_prev_bb_cost
1378  + timing_tradeoff * timing_delta_c * inverse_prev_timing_cost;
1379  } else {
1380  delta_c = bb_delta_c;
1381  }
1382 
1383  /* 1 -> move accepted, 0 -> rejected. */
1384  keep_switch = assess_swap(delta_c, t);
1385 
1386  if (keep_switch == ACCEPTED) {
1387  *cost = *cost + delta_c;
1388  *bb_cost = *bb_cost + bb_delta_c;
1389 
1390  if (place_algorithm == NET_TIMING_DRIVEN_PLACE
1391  || place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
1392  /*update the point_to_point_timing_cost and point_to_point_delay_cost
1393  * values from the temporary values */
1394  *timing_cost = *timing_cost + timing_delta_c;
1395  *delay_cost = *delay_cost + delay_delta_c;
1396 
1397  update_td_cost();
1398  }
1399 
1400  /* update net cost functions and reset flags. */
1401  for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {
1402  inet = ts_nets_to_update[inet_affected];
1403 
1404  bb_coords[inet] = ts_bb_coord_new[inet];
1405  if (clb_net[inet].num_sinks >= SMALL_NET)
1406  bb_num_on_edges[inet] = ts_bb_edge_new[inet];
1407 
1408  net_cost[inet] = temp_net_cost[inet];
1409 
1410  /* negative temp_net_cost value is acting as a flag. */
1411  temp_net_cost[inet] = -1;
1413  }
1414 
1415  /* Update clb data structures since we kept the move. */
1416  /* Swap physical location */
1417  for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
1418 
1419  x_to = blocks_affected.moved_blocks[iblk].xnew;
1420  y_to = blocks_affected.moved_blocks[iblk].ynew;
1421  z_to = blocks_affected.moved_blocks[iblk].znew;
1422 
1423  x_from = blocks_affected.moved_blocks[iblk].xold;
1424  y_from = blocks_affected.moved_blocks[iblk].yold;
1425  z_from = blocks_affected.moved_blocks[iblk].zold;
1426 
1427  b_from = blocks_affected.moved_blocks[iblk].block_num;
1428 
1429  grid[x_to][y_to].blocks[z_to] = b_from;
1430 
1431  if (blocks_affected.moved_blocks[iblk].swapped_to_empty == TRUE) {
1432  grid[x_to][y_to].usage++;
1433  grid[x_from][y_from].usage--;
1434  grid[x_from][y_from].blocks[z_from] = -1;
1435  }
1436 
1437  } // Finish updating clb for all blocks
1438 
1439  } else { /* Move was rejected. */
1440 
1441  /* Reset the net cost function flags first. */
1442  for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {
1443  inet = ts_nets_to_update[inet_affected];
1444  temp_net_cost[inet] = -1;
1446  }
1447 
1448  /* Restore the block data structures to their state before the move. */
1449  for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
1450  b_from = blocks_affected.moved_blocks[iblk].block_num;
1451 
1452  block[b_from].x = blocks_affected.moved_blocks[iblk].xold;
1453  block[b_from].y = blocks_affected.moved_blocks[iblk].yold;
1454  block[b_from].z = blocks_affected.moved_blocks[iblk].zold;
1455  }
1456  }
1457 
1458  /* Resets the num_moved_blocks, but do not free blocks_moved array. Defensive Coding */
1459  blocks_affected.num_moved_blocks = 0;
1460 
1461  //check_place(*bb_cost, *timing_cost, place_algorithm, *delay_cost);
1462 
1463  return (keep_switch);
1464  } else {
1465 
1466  /* Restore the block data structures to their state before the move. */
1467  for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
1468  b_from = blocks_affected.moved_blocks[iblk].block_num;
1469 
1470  block[b_from].x = blocks_affected.moved_blocks[iblk].xold;
1471  block[b_from].y = blocks_affected.moved_blocks[iblk].yold;
1472  block[b_from].z = blocks_affected.moved_blocks[iblk].zold;
1473  }
1474 
1475  /* Resets the num_moved_blocks, but do not free blocks_moved array. Defensive Coding */
1476  blocks_affected.num_moved_blocks = 0;
1477 
1478  return ABORTED;
1479  }
1480 }
1481 
1482 static int find_affected_nets(int *nets_to_update) {
1483 
1484  /* Puts a list of all the nets that are changed by the swap into *
1485  * nets_to_update. Returns the number of affected nets. */
1486 
1487  int iblk, iblk_pin, inet, bnum, num_affected_nets;
1488 
1489  num_affected_nets = 0;
1490  /* Go through all the blocks moved */
1491  for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)
1492  {
1493  bnum = blocks_affected.moved_blocks[iblk].block_num;
1494 
1495  /* Go through all the pins in the moved block */
1496  for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++)
1497  {
1498  /* Updates the pins_to_nets array, set to -1 if *
1499  * that pin is not connected to any net or it is a *
1500  * global pin that does not need to be updated */
1501  inet = block[bnum].nets[iblk_pin];
1502  if (inet == OPEN)
1503  continue;
1504  if (clb_net[inet].is_global)
1505  continue;
1506 
1507  if (temp_net_cost[inet] < 0.) {
1508  /* Net not marked yet. */
1509  nets_to_update[num_affected_nets] = inet;
1510  num_affected_nets++;
1511 
1512  /* Flag to say we've marked this net. */
1513  temp_net_cost[inet] = 1.;
1514  }
1515  }
1516  }
1517  return num_affected_nets;
1518 }
1519 
1520 static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to) {
1521 
1522  /* Returns the point to which I want to swap, properly range limited.
1523  * rlim must always be between 1 and nx (inclusive) for this routine
1524  * to work. Assumes that a column only contains blocks of the same type.
1525  */
1526 
1527  int x_rel, y_rel, rlx, rly, min_x, max_x, min_y, max_y;
1528  int num_tries;
1529  int active_area;
1530  boolean is_legal;
1531  int block_index, ipos;
1532 
1533  assert(type == grid[x_from][y_from].type);
1534 
1535  rlx = (int)std::min((float)nx + 1, rlim);
1536  rly = (int)std::min((float)ny + 1, rlim); /* Added rly for aspect_ratio != 1 case. */
1537  active_area = 4 * rlx * rly;
1538 
1539  min_x = std::max(0, x_from - rlx);
1540  max_x = std::min(nx + 1, x_from + rlx);
1541  min_y = std::max(0, y_from - rly);
1542  max_y = std::min(ny + 1, y_from + rly);
1543 
1544 #ifdef DEBUG
1545  if (rlx < 1 || rlx > nx + 1) {
1546  vpr_printf(TIO_MESSAGE_ERROR, "in find_to: rlx = %d\n", rlx);
1547  exit(1);
1548  }
1549 #endif
1550 
1551  num_tries = 0;
1552  block_index = type->index;
1553 
1554  do { /* Until legal */
1555  is_legal = TRUE;
1556 
1557  /* Limit the number of tries when searching for an alternative position */
1558  if(num_tries >= 2 * std::min(active_area / type->height, num_legal_pos[block_index]) + 10) {
1559  /* Tried randomly searching for a suitable position */
1560  return FALSE;
1561  } else {
1562  num_tries++;
1563  }
1564  if(nx / 4 < rlx ||
1565  ny / 4 < rly ||
1566  num_legal_pos[block_index] < active_area) {
1567  ipos = my_irand(num_legal_pos[block_index] - 1);
1568  *x_to = legal_pos[block_index][ipos].x;
1569  *y_to = legal_pos[block_index][ipos].y;
1570  } else {
1571  x_rel = my_irand(std::max(0, max_x - min_x));
1572  *x_to = min_x + x_rel;
1573  y_rel = my_irand(std::max(0, max_y - min_y));
1574  *y_to = min_y + y_rel;
1575  *y_to = (*y_to) - grid[*x_to][*y_to].offset; /* align it */
1576  }
1577 
1578  if((x_from == *x_to) && (y_from == *y_to)) {
1579  is_legal = FALSE;
1580  } else if(*x_to > max_x || *x_to < min_x || *y_to > max_y || *y_to < min_y) {
1581  is_legal = FALSE;
1582  } else if(grid[*x_to][*y_to].type != grid[x_from][y_from].type) {
1583  is_legal = FALSE;
1584  }
1585 
1586  assert(*x_to >= 0 && *x_to <= nx + 1);
1587  assert(*y_to >= 0 && *y_to <= ny + 1);
1588  } while (is_legal == FALSE);
1589 
1590 #ifdef DEBUG
1591  if (*x_to < 0 || *x_to > nx + 1 || *y_to < 0 || *y_to > ny + 1) {
1592  vpr_printf(TIO_MESSAGE_ERROR, "in routine find_to: (x_to,y_to) = (%d,%d)\n", *x_to, *y_to);
1593  exit(1);
1594  }
1595 #endif
1596  assert(type == grid[*x_to][*y_to].type);
1597  return TRUE;
1598 }
1599 
1600 static enum swap_result assess_swap(float delta_c, float t) {
1601 
1602  /* Returns: 1 -> move accepted, 0 -> rejected. */
1603 
1604  enum swap_result accept;
1605  float prob_fac, fnum;
1606 
1607  if (delta_c <= 0) {
1608 
1609 #ifdef SPEC /* Reduce variation in final solution due to round off */
1610  fnum = my_frand();
1611 #endif
1612 
1613  accept = ACCEPTED;
1614  return (accept);
1615  }
1616 
1617  if (t == 0.)
1618  return (REJECTED);
1619 
1620  fnum = my_frand();
1621  prob_fac = exp(-delta_c / t);
1622  if (prob_fac > fnum) {
1623  accept = ACCEPTED;
1624  } else {
1625  accept = REJECTED;
1626  }
1627  return (accept);
1628 }
1629 
1630 static float recompute_bb_cost(void) {
1631 
1632  /* Recomputes the cost to eliminate roundoff that may have accrued. *
1633  * This routine does as little work as possible to compute this new *
1634  * cost. */
1635 
1636  int inet;
1637  float cost;
1638 
1639  cost = 0;
1640 
1641  for (inet = 0; inet < num_nets; inet++) { /* for each net ... */
1642  if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */
1643 
1644  /* Bounding boxes don't have to be recomputed; they're correct. */
1645  cost += net_cost[inet];
1646  }
1647  }
1648 
1649  return (cost);
1650 }
1651 
1652 static float comp_td_point_to_point_delay(int inet, int ipin) {
1653 
1654  /*returns the delay of one point to point connection */
1655 
1656  int source_block, sink_block;
1657  int delta_x, delta_y;
1658  t_type_ptr source_type, sink_type;
1659  float delay_source_to_sink;
1660 
1661  delay_source_to_sink = 0.;
1662 
1663  source_block = clb_net[inet].node_block[0];
1664  source_type = block[source_block].type;
1665 
1666  sink_block = clb_net[inet].node_block[ipin];
1667  sink_type = block[sink_block].type;
1668 
1669  assert(source_type != NULL);
1670  assert(sink_type != NULL);
1671 
1672  delta_x = abs(block[sink_block].x - block[source_block].x);
1673  delta_y = abs(block[sink_block].y - block[source_block].y);
1674 
1675  /* TODO low priority: Could be merged into one look-up table */
1676  /* Note: This heuristic is terrible on Quality of Results.
1677  * A much better heuristic is to create a more comprehensive lookup table but
1678  * it's too late in the release cycle to do this. Pushing until the next release */
1679  if (source_type == IO_TYPE) {
1680  if (sink_type == IO_TYPE)
1681  delay_source_to_sink = delta_io_to_io[delta_x][delta_y];
1682  else
1683  delay_source_to_sink = delta_io_to_clb[delta_x][delta_y];
1684  } else {
1685  if (sink_type == IO_TYPE)
1686  delay_source_to_sink = delta_clb_to_io[delta_x][delta_y];
1687  else
1688  delay_source_to_sink = delta_clb_to_clb[delta_x][delta_y];
1689  }
1690  if (delay_source_to_sink < 0) {
1691  vpr_printf(TIO_MESSAGE_ERROR, "in comp_td_point_to_point_delay: Bad delay_source_to_sink value delta(%d, %d) delay of %g\n", delta_x, delta_y, delay_source_to_sink);
1692  vpr_printf(TIO_MESSAGE_ERROR, "in comp_td_point_to_point_delay: Delay is less than 0\n");
1693  exit(1);
1694  }
1695 
1696  return (delay_source_to_sink);
1697 }
1698 
1699 static void update_td_cost(void) {
1700  /* Update the point_to_point_timing_cost values from the temporary *
1701  * values for all connections that have changed. */
1702 
1703  int iblk_pin, net_pin, inet, ipin;
1704  int iblk, iblk2, bnum, driven_by_moved_block;
1705 
1706  /* Go through all the blocks moved. */
1707  for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)
1708  {
1709  bnum = blocks_affected.moved_blocks[iblk].block_num;
1710  for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) {
1711 
1712  inet = block[bnum].nets[iblk_pin];
1713 
1714  if (inet == OPEN)
1715  continue;
1716 
1717  if (clb_net[inet].is_global)
1718  continue;
1719 
1720  net_pin = net_pin_index[bnum][iblk_pin];
1721 
1722  if (net_pin != 0) {
1723 
1724  driven_by_moved_block = FALSE;
1725  for (iblk2 = 0; iblk2 < blocks_affected.num_moved_blocks; iblk2++)
1726  { if (clb_net[inet].node_block[0] == blocks_affected.moved_blocks[iblk2].block_num)
1727  driven_by_moved_block = TRUE;
1728  }
1729 
1730  /* The following "if" prevents the value from being updated twice. */
1731  if (driven_by_moved_block == FALSE) {
1732  point_to_point_delay_cost[inet][net_pin] =
1733  temp_point_to_point_delay_cost[inet][net_pin];
1734  temp_point_to_point_delay_cost[inet][net_pin] = -1;
1735  point_to_point_timing_cost[inet][net_pin] =
1736  temp_point_to_point_timing_cost[inet][net_pin];
1737  temp_point_to_point_timing_cost[inet][net_pin] = -1;
1738  }
1739  } else { /* This net is being driven by a moved block, recompute */
1740  /* All point to point connections on this net. */
1741  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
1742  point_to_point_delay_cost[inet][ipin] =
1743  temp_point_to_point_delay_cost[inet][ipin];
1744  temp_point_to_point_delay_cost[inet][ipin] = -1;
1745  point_to_point_timing_cost[inet][ipin] =
1746  temp_point_to_point_timing_cost[inet][ipin];
1747  temp_point_to_point_timing_cost[inet][ipin] = -1;
1748  } /* Finished updating the pin */
1749  }
1750  } /* Finished going through all the pins in the moved block */
1751  } /* Finished going through all the blocks moved */
1752 }
1753 
1754 static void comp_delta_td_cost(float *delta_timing, float *delta_delay) {
1755 
1756  /*a net that is being driven by a moved block must have all of its */
1757  /*sink timing costs recomputed. A net that is driving a moved block */
1758  /*must only have the timing cost on the connection driving the input */
1759  /*pin computed */
1760 
1761  int inet, net_pin, ipin;
1762  float delta_timing_cost, delta_delay_cost, temp_delay;
1763  int iblk, iblk2, bnum, iblk_pin, driven_by_moved_block;
1764 
1765  delta_timing_cost = 0.;
1766  delta_delay_cost = 0.;
1767 
1768  /* Go through all the blocks moved */
1769  for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)
1770  {
1771  bnum = blocks_affected.moved_blocks[iblk].block_num;
1772  /* Go through all the pins in the moved block */
1773  for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) {
1774  inet = block[bnum].nets[iblk_pin];
1775 
1776  if (inet == OPEN)
1777  continue;
1778 
1779  if (clb_net[inet].is_global)
1780  continue;
1781 
1782  net_pin = net_pin_index[bnum][iblk_pin];
1783 
1784  if (net_pin != 0) {
1785  /* If this net is being driven by a block that has moved, we do not *
1786  * need to compute the change in the timing cost (here) since it will *
1787  * be computed in the fanout of the net on the driving block, also *
1788  * computing it here would double count the change, and mess up the *
1789  * delta_timing_cost value. */
1790  driven_by_moved_block = FALSE;
1791  for (iblk2 = 0; iblk2 < blocks_affected.num_moved_blocks; iblk2++)
1792  { if (clb_net[inet].node_block[0] == blocks_affected.moved_blocks[iblk2].block_num)
1793  driven_by_moved_block = TRUE;
1794  }
1795 
1796  if (driven_by_moved_block == FALSE) {
1797  temp_delay = comp_td_point_to_point_delay(inet, net_pin);
1798  temp_point_to_point_delay_cost[inet][net_pin] = temp_delay;
1799 
1800  temp_point_to_point_timing_cost[inet][net_pin] =
1801  timing_place_crit[inet][net_pin] * temp_delay;
1802  delta_timing_cost += temp_point_to_point_timing_cost[inet][net_pin]
1803  - point_to_point_timing_cost[inet][net_pin];
1804  delta_delay_cost += temp_point_to_point_delay_cost[inet][net_pin]
1805  - point_to_point_delay_cost[inet][net_pin];
1806  }
1807  } else { /* This net is being driven by a moved block, recompute */
1808  /* All point to point connections on this net. */
1809  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
1810  temp_delay = comp_td_point_to_point_delay(inet, ipin);
1811  temp_point_to_point_delay_cost[inet][ipin] = temp_delay;
1812 
1813  temp_point_to_point_timing_cost[inet][ipin] =
1814  timing_place_crit[inet][ipin] * temp_delay;
1815  delta_timing_cost += temp_point_to_point_timing_cost[inet][ipin]
1816  - point_to_point_timing_cost[inet][ipin];
1817  delta_delay_cost += temp_point_to_point_delay_cost[inet][ipin]
1818  - point_to_point_delay_cost[inet][ipin];
1819 
1820  } /* Finished updating the pin */
1821  }
1822  } /* Finished going through all the pins in the moved block */
1823  } /* Finished going through all the blocks moved */
1824 
1825  *delta_timing = delta_timing_cost;
1826  *delta_delay = delta_delay_cost;
1827 }
1828 
1829 static void comp_td_costs(float *timing_cost, float *connection_delay_sum) {
1830  /* Computes the cost (from scratch) due to the delays and criticalities *
1831  * on all point to point connections, we define the timing cost of *
1832  * each connection as criticality*delay. */
1833 
1834  int inet, ipin;
1835  float loc_timing_cost, loc_connection_delay_sum, temp_delay_cost,
1836  temp_timing_cost;
1837 
1838  loc_timing_cost = 0.;
1839  loc_connection_delay_sum = 0.;
1840 
1841  for (inet = 0; inet < num_nets; inet++) { /* For each net ... */
1842  if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */
1843 
1844  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
1845 
1846  temp_delay_cost = comp_td_point_to_point_delay(inet, ipin);
1847  temp_timing_cost = temp_delay_cost * timing_place_crit[inet][ipin];
1848 
1849  loc_connection_delay_sum += temp_delay_cost;
1850  point_to_point_delay_cost[inet][ipin] = temp_delay_cost;
1851  temp_point_to_point_delay_cost[inet][ipin] = -1; /* Undefined */
1852 
1853  point_to_point_timing_cost[inet][ipin] = temp_timing_cost;
1854  temp_point_to_point_timing_cost[inet][ipin] = -1; /* Undefined */
1855  loc_timing_cost += temp_timing_cost;
1856  }
1857  }
1858  }
1859 
1860  /* Make sure timing cost does not go above MIN_TIMING_COST. */
1861  *timing_cost = loc_timing_cost;
1862 
1863  *connection_delay_sum = loc_connection_delay_sum;
1864 }
1865 
1866 static float comp_bb_cost(enum cost_methods method) {
1867 
1868  /* Finds the cost from scratch. Done only when the placement *
1869  * has been radically changed (i.e. after initial placement). *
1870  * Otherwise find the cost change incrementally. If method *
1871  * check is NORMAL, we find bounding boxes that are updateable *
1872  * for the larger nets. If method is CHECK, all bounding boxes *
1873  * are found via the non_updateable_bb routine, to provide a *
1874  * cost which can be used to check the correctness of the *
1875  * other routine. */
1876 
1877  int inet;
1878  float cost;
1879  double expected_wirelength;
1880 
1881  cost = 0;
1882  expected_wirelength = 0.0;
1883 
1884  for (inet = 0; inet < num_nets; inet++) { /* for each net ... */
1885 
1886  if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */
1887 
1888  /* Small nets don't use incremental updating on their bounding boxes, *
1889  * so they can use a fast bounding box calculator. */
1890 
1891  if (clb_net[inet].num_sinks >= SMALL_NET && method == NORMAL) {
1892  get_bb_from_scratch(inet, &bb_coords[inet],
1893  &bb_num_on_edges[inet]);
1894  } else {
1895  get_non_updateable_bb(inet, &bb_coords[inet]);
1896  }
1897 
1898  net_cost[inet] = get_net_cost(inet, &bb_coords[inet]);
1899  cost += net_cost[inet];
1900  if (method == CHECK)
1901  expected_wirelength += get_net_wirelength_estimate(inet,
1902  &bb_coords[inet]);
1903  }
1904  }
1905 
1906  if (method == CHECK) {
1907  vpr_printf(TIO_MESSAGE_INFO, "\n");
1908  vpr_printf(TIO_MESSAGE_INFO, "BB estimate of min-dist (placement) wirelength: %.0f\n", expected_wirelength);
1909  }
1910  return (cost);
1911 }
1912 
1914  float **old_region_occ_x, float **old_region_occ_y,
1915  struct s_placer_opts placer_opts) {
1916 
1917  /* Frees the major structures needed by the placer (and not needed *
1918  * elsewhere). */
1919 
1920  int inet, imacro;
1921 
1924 
1925  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
1926  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
1927  || placer_opts.enable_timing_computations) {
1928  for (inet = 0; inet < num_nets; inet++) {
1929  /*add one to the address since it is indexed from 1 not 0 */
1930 
1931  point_to_point_delay_cost[inet]++;
1932  free(point_to_point_delay_cost[inet]);
1933 
1935  free(point_to_point_timing_cost[inet]);
1936 
1938  free(temp_point_to_point_delay_cost[inet]);
1939 
1941  free(temp_point_to_point_timing_cost[inet]);
1942  }
1945 
1948 
1949  free_matrix(net_pin_index, 0, num_blocks - 1, 0, sizeof(int));
1950  }
1951 
1952  free(net_cost);
1953  free(temp_net_cost);
1954  free(bb_num_on_edges);
1955  free(bb_coords);
1956 
1958 
1959  for (imacro = 0; imacro < num_pl_macros; imacro ++)
1960  free(pl_macros[imacro].members);
1961  free(pl_macros);
1962 
1963  net_cost = NULL; /* Defensive coding. */
1964  temp_net_cost = NULL;
1965  bb_num_on_edges = NULL;
1966  bb_coords = NULL;
1967  pl_macros = NULL;
1968 
1969  /* Frees up all the data structure used in vpr_utils. */
1972 
1973 }
1974 
1976  float place_cost_exp, float ***old_region_occ_x,
1977  float ***old_region_occ_y, struct s_placer_opts placer_opts,
1978  t_direct_inf *directs, int num_directs) {
1979 
1980  /* Allocates the major structures needed only by the placer, primarily for *
1981  * computing costs quickly and such. */
1982 
1983  int inet, ipin, max_pins_per_clb, i;
1984 
1987 
1988  max_pins_per_clb = 0;
1989  for (i = 0; i < num_types; i++) {
1990  max_pins_per_clb = std::max(max_pins_per_clb, type_descriptors[i].num_pins);
1991  }
1992 
1993  if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
1994  || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
1995  || placer_opts.enable_timing_computations) {
1996  /* Allocate structures associated with timing driven placement */
1997  /* [0..num_nets-1][1..num_pins-1] */
1998  point_to_point_delay_cost = (float **) my_malloc(
1999  num_nets * sizeof(float *));
2001  num_nets * sizeof(float *));
2002 
2004  num_nets * sizeof(float *));
2006  num_nets * sizeof(float *));
2007 
2008  for (inet = 0; inet < num_nets; inet++) {
2009 
2010  /* In the following, subract one so index starts at *
2011  * 1 instead of 0 */
2012  point_to_point_delay_cost[inet] = (float *) my_malloc(
2013  clb_net[inet].num_sinks * sizeof(float));
2014  point_to_point_delay_cost[inet]--;
2015 
2016  temp_point_to_point_delay_cost[inet] = (float *) my_malloc(
2017  clb_net[inet].num_sinks * sizeof(float));
2019 
2020  point_to_point_timing_cost[inet] = (float *) my_malloc(
2021  clb_net[inet].num_sinks * sizeof(float));
2023 
2024  temp_point_to_point_timing_cost[inet] = (float *) my_malloc(
2025  clb_net[inet].num_sinks * sizeof(float));
2027  }
2028  for (inet = 0; inet < num_nets; inet++) {
2029  for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
2030  point_to_point_delay_cost[inet][ipin] = 0;
2031  temp_point_to_point_delay_cost[inet][ipin] = 0;
2032  }
2033  }
2034  }
2035 
2036  net_cost = (float *) my_malloc(num_nets * sizeof(float));
2037  temp_net_cost = (float *) my_malloc(num_nets * sizeof(float));
2038  bb_updated_before = (char*)my_calloc(num_nets, sizeof(char));
2039 
2040  /* Used to store costs for moves not yet made and to indicate when a net's *
2041  * cost has been recomputed. temp_net_cost[inet] < 0 means net's cost hasn't *
2042  * been recomputed. */
2043 
2044  for (inet = 0; inet < num_nets; inet++){
2046  temp_net_cost[inet] = -1.;
2047  }
2048 
2049  bb_coords = (struct s_bb *) my_malloc(num_nets * sizeof(struct s_bb));
2050  bb_num_on_edges = (struct s_bb *) my_malloc(num_nets * sizeof(struct s_bb));
2051 
2052  /* Shouldn't use them; crash hard if I do! */
2053  *old_region_occ_x = NULL;
2054  *old_region_occ_y = NULL;
2055 
2056  alloc_and_load_for_fast_cost_update(place_cost_exp);
2057 
2059 
2061 
2062  num_pl_macros = alloc_and_load_placement_macros(directs, num_directs, &pl_macros);
2063 }
2064 
2066  /* Allocate the local bb_coordinate storage, etc. only once. */
2067  /* Allocate with size num_nets for any number of nets affected. */
2068  ts_bb_coord_new = (struct s_bb *) my_calloc(
2069  num_nets, sizeof(struct s_bb));
2070  ts_bb_edge_new = (struct s_bb *) my_calloc(
2071  num_nets, sizeof(struct s_bb));
2072  ts_nets_to_update = (int *) my_calloc(num_nets, sizeof(int));
2073 
2074  /* Allocate with size num_blocks for any number of moved block. */
2075  blocks_affected.moved_blocks = (t_pl_moved_block*)my_calloc(
2076  num_blocks, sizeof(t_pl_moved_block) );
2077  blocks_affected.num_moved_blocks = 0;
2078 
2079 }
2080 
2081 static void get_bb_from_scratch(int inet, struct s_bb *coords,
2082  struct s_bb *num_on_edges) {
2083 
2084  /* This routine finds the bounding box of each net from scratch (i.e. *
2085  * from only the block location information). It updates both the *
2086  * coordinate and number of pins on each edge information. It *
2087  * should only be called when the bounding box information is not valid. */
2088 
2089  int ipin, bnum, pnum, x, y, xmin, xmax, ymin, ymax;
2090  int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
2091  int n_pins;
2092 
2093  n_pins = clb_net[inet].num_sinks + 1;
2094 
2095  bnum = clb_net[inet].node_block[0];
2096  pnum = clb_net[inet].node_block_pin[0];
2097 
2098  x = block[bnum].x;
2099  y = block[bnum].y + block[bnum].type->pin_height[pnum];
2100 
2101  x = std::max(std::min(x, nx), 1);
2102  y = std::max(std::min(y, ny), 1);
2103 
2104  xmin = x;
2105  ymin = y;
2106  xmax = x;
2107  ymax = y;
2108  xmin_edge = 1;
2109  ymin_edge = 1;
2110  xmax_edge = 1;
2111  ymax_edge = 1;
2112 
2113  for (ipin = 1; ipin < n_pins; ipin++) {
2114  bnum = clb_net[inet].node_block[ipin];
2115  pnum = clb_net[inet].node_block_pin[ipin];
2116  x = block[bnum].x;
2117  y = block[bnum].y + block[bnum].type->pin_height[pnum];
2118 
2119  /* Code below counts IO blocks as being within the 1..nx, 1..ny clb array. *
2120  * This is because channels do not go out of the 0..nx, 0..ny range, and *
2121  * I always take all channels impinging on the bounding box to be within *
2122  * that bounding box. Hence, this "movement" of IO blocks does not affect *
2123  * the which channels are included within the bounding box, and it *
2124  * simplifies the code a lot. */
2125 
2126  x = std::max(std::min(x, nx), 1);
2127  y = std::max(std::min(y, ny), 1);
2128 
2129  if (x == xmin) {
2130  xmin_edge++;
2131  }
2132  if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */
2133  xmax_edge++;
2134  } else if (x < xmin) {
2135  xmin = x;
2136  xmin_edge = 1;
2137  } else if (x > xmax) {
2138  xmax = x;
2139  xmax_edge = 1;
2140  }
2141 
2142  if (y == ymin) {
2143  ymin_edge++;
2144  }
2145  if (y == ymax) {
2146  ymax_edge++;
2147  } else if (y < ymin) {
2148  ymin = y;
2149  ymin_edge = 1;
2150  } else if (y > ymax) {
2151  ymax = y;
2152  ymax_edge = 1;
2153  }
2154  }
2155 
2156  /* Copy the coordinates and number on edges information into the proper *
2157  * structures. */
2158  coords->xmin = xmin;
2159  coords->xmax = xmax;
2160  coords->ymin = ymin;
2161  coords->ymax = ymax;
2162 
2163  num_on_edges->xmin = xmin_edge;
2164  num_on_edges->xmax = xmax_edge;
2165  num_on_edges->ymin = ymin_edge;
2166  num_on_edges->ymax = ymax_edge;
2167 }
2168 
2169 static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr) {
2170 
2171  /* WMF: Finds the estimate of wirelength due to one net by looking at *
2172  * its coordinate bounding box. */
2173 
2174  double ncost, crossing;
2175 
2176  /* Get the expected "crossing count" of a net, based on its number *
2177  * of pins. Extrapolate for very large nets. */
2178 
2179  if (((clb_net[inet].num_sinks + 1) > 50)
2180  && ((clb_net[inet].num_sinks + 1) < 85)) {
2181  crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);
2182  } else if ((clb_net[inet].num_sinks + 1) >= 85) {
2183  crossing = 2.7933 + 0.011 * (clb_net[inet].num_sinks + 1)
2184  - 0.0000018 * (clb_net[inet].num_sinks + 1)
2185  * (clb_net[inet].num_sinks + 1);
2186  } else {
2187  crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1];
2188  }
2189 
2190  /* Could insert a check for xmin == xmax. In that case, assume *
2191  * connection will be made with no bends and hence no x-cost. *
2192  * Same thing for y-cost. */
2193 
2194  /* Cost = wire length along channel * cross_count / average *
2195  * channel capacity. Do this for x, then y direction and add. */
2196 
2197  ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing;
2198 
2199  ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing;
2200 
2201  return (ncost);
2202 }
2203 
2204 static float get_net_cost(int inet, struct s_bb *bbptr) {
2205 
2206  /* Finds the cost due to one net by looking at its coordinate bounding *
2207  * box. */
2208 
2209  float ncost, crossing;
2210 
2211  /* Get the expected "crossing count" of a net, based on its number *
2212  * of pins. Extrapolate for very large nets. */
2213 
2214  if ((clb_net[inet].num_sinks + 1) > 50) {
2215  crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);
2216  /* crossing = 3.0; Old value */
2217  } else {
2218  crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1];
2219  }
2220 
2221  /* Could insert a check for xmin == xmax. In that case, assume *
2222  * connection will be made with no bends and hence no x-cost. *
2223  * Same thing for y-cost. */
2224 
2225  /* Cost = wire length along channel * cross_count / average *
2226  * channel capacity. Do this for x, then y direction and add. */
2227 
2228  ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing
2229  * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
2230 
2231  ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing
2232  * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
2233 
2234  return (ncost);
2235 }
2236 
2237 static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new) {
2238 
2239  /* Finds the bounding box of a net and stores its coordinates in the *
2240  * bb_coord_new data structure. This routine should only be called *
2241  * for small nets, since it does not determine enough information for *
2242  * the bounding box to be updated incrementally later. *
2243  * Currently assumes channels on both sides of the CLBs forming the *
2244  * edges of the bounding box can be used. Essentially, I am assuming *
2245  * the pins always lie on the outside of the bounding box. */
2246 
2247  int k, xmax, ymax, xmin, ymin, x, y;
2248  int bnum, pnum;
2249 
2250  bnum = clb_net[inet].node_block[0];
2251  pnum = clb_net[inet].node_block_pin[0];
2252  x = block[bnum].x;
2253  y = block[bnum].y + block[bnum].type->pin_height[pnum];
2254 
2255  xmin = x;
2256  ymin = y;
2257  xmax = x;
2258  ymax = y;
2259 
2260  for (k = 1; k < (clb_net[inet].num_sinks + 1); k++) {
2261  bnum = clb_net[inet].node_block[k];
2262  pnum = clb_net[inet].node_block_pin[k];
2263  x = block[bnum].x;
2264  y = block[bnum].y + block[bnum].type->pin_height[pnum];
2265 
2266  if (x < xmin) {
2267  xmin = x;
2268  } else if (x > xmax) {
2269  xmax = x;
2270  }
2271 
2272  if (y < ymin) {
2273  ymin = y;
2274  } else if (y > ymax) {
2275  ymax = y;
2276  }
2277  }
2278 
2279  /* Now I've found the coordinates of the bounding box. There are no *
2280  * channels beyond nx and ny, so I want to clip to that. As well, *
2281  * since I'll always include the channel immediately below and the *
2282  * channel immediately to the left of the bounding box, I want to *
2283  * clip to 1 in both directions as well (since minimum channel index *
2284  * is 0). See route.c for a channel diagram. */
2285 
2286  bb_coord_new->xmin = std::max(std::min(xmin, nx), 1);
2287  bb_coord_new->ymin = std::max(std::min(ymin, ny), 1);
2288  bb_coord_new->xmax = std::max(std::min(xmax, nx), 1);
2289  bb_coord_new->ymax = std::max(std::min(ymax, ny), 1);
2290 }
2291 
2292 static void update_bb(int inet, struct s_bb *bb_coord_new,
2293  struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew) {
2294 
2295  /* Updates the bounding box of a net by storing its coordinates in *
2296  * the bb_coord_new data structure and the number of blocks on each *
2297  * edge in the bb_edge_new data structure. This routine should only *
2298  * be called for large nets, since it has some overhead relative to *
2299  * just doing a brute force bounding box calculation. The bounding *
2300  * box coordinate and edge information for inet must be valid before *
2301  * this routine is called. *
2302  * Currently assumes channels on both sides of the CLBs forming the *
2303  * edges of the bounding box can be used. Essentially, I am assuming *
2304  * the pins always lie on the outside of the bounding box. *
2305  * The x and y coordinates are the pin's x and y coordinates. */
2306  /* IO blocks are considered to be one cell in for simplicity. */
2307 
2308  struct s_bb *curr_bb_edge, *curr_bb_coord;
2309 
2310  xnew = std::max(std::min(xnew, nx), 1);
2311  ynew = std::max(std::min(ynew, ny), 1);
2312  xold = std::max(std::min(xold, nx), 1);
2313  yold = std::max(std::min(yold, ny), 1);
2314 
2315  /* Check if the net had been updated before. */
2316  if (bb_updated_before[inet] == GOT_FROM_SCRATCH)
2317  { /* The net had been updated from scratch, DO NOT update again! */
2318  return;
2319  }
2320  else if (bb_updated_before[inet] == NOT_UPDATED_YET)
2321  { /* The net had NOT been updated before, could use the old values */
2322  curr_bb_coord = &bb_coords[inet];
2323  curr_bb_edge = &bb_num_on_edges[inet];
2325  }
2326  else
2327  { /* The net had been updated before, must use the new values */
2328  curr_bb_coord = bb_coord_new;
2329  curr_bb_edge = bb_edge_new;
2330  }
2331 
2332  /* Check if I can update the bounding box incrementally. */
2333 
2334  if (xnew < xold) { /* Move to left. */
2335 
2336  /* Update the xmax fields for coordinates and number of edges first. */
2337 
2338  if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */
2339  if (curr_bb_edge->xmax == 1) {
2340  get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);
2342  return;
2343  } else {
2344  bb_edge_new->xmax = curr_bb_edge->xmax - 1;
2345  bb_coord_new->xmax = curr_bb_coord->xmax;
2346  }
2347  }
2348 
2349  else { /* Move to left, old postion was not at xmax. */
2350  bb_coord_new->xmax = curr_bb_coord->xmax;
2351  bb_edge_new->xmax = curr_bb_edge->xmax;
2352  }
2353 
2354  /* Now do the xmin fields for coordinates and number of edges. */
2355 
2356  if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */
2357  bb_coord_new->xmin = xnew;
2358  bb_edge_new->xmin = 1;
2359  }
2360 
2361  else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */
2362  bb_coord_new->xmin = xnew;
2363  bb_edge_new->xmin = curr_bb_edge->xmin + 1;
2364  }
2365 
2366  else { /* Xmin unchanged. */
2367  bb_coord_new->xmin = curr_bb_coord->xmin;
2368  bb_edge_new->xmin = curr_bb_edge->xmin;
2369  }
2370  }
2371 
2372  /* End of move to left case. */
2373  else if (xnew > xold) { /* Move to right. */
2374 
2375  /* Update the xmin fields for coordinates and number of edges first. */
2376 
2377  if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */
2378  if (curr_bb_edge->xmin == 1) {
2379  get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);
2381  return;
2382  } else {
2383  bb_edge_new->xmin = curr_bb_edge->xmin - 1;
2384  bb_coord_new->xmin = curr_bb_coord->xmin;
2385  }
2386  }
2387 
2388  else { /* Move to right, old position was not at xmin. */
2389  bb_coord_new->xmin = curr_bb_coord->xmin;
2390  bb_edge_new->xmin = curr_bb_edge->xmin;
2391  }
2392 
2393  /* Now do the xmax fields for coordinates and number of edges. */
2394 
2395  if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */
2396  bb_coord_new->xmax = xnew;
2397  bb_edge_new->xmax = 1;
2398  }
2399 
2400  else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */
2401  bb_coord_new->xmax = xnew;
2402  bb_edge_new->xmax = curr_bb_edge->xmax + 1;
2403  }
2404 
2405  else { /* Xmax unchanged. */
2406  bb_coord_new->xmax = curr_bb_coord->xmax;
2407  bb_edge_new->xmax = curr_bb_edge->xmax;
2408  }
2409  }
2410  /* End of move to right case. */
2411  else { /* xnew == xold -- no x motion. */
2412  bb_coord_new->xmin = curr_bb_coord->xmin;
2413  bb_coord_new->xmax = curr_bb_coord->xmax;
2414  bb_edge_new->xmin = curr_bb_edge->xmin;
2415  bb_edge_new->xmax = curr_bb_edge->xmax;
2416  }
2417 
2418  /* Now account for the y-direction motion. */
2419 
2420  if (ynew < yold) { /* Move down. */
2421 
2422  /* Update the ymax fields for coordinates and number of edges first. */
2423 
2424  if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */
2425  if (curr_bb_edge->ymax == 1) {
2426  get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);
2428  return;
2429  } else {
2430  bb_edge_new->ymax = curr_bb_edge->ymax - 1;
2431  bb_coord_new->ymax = curr_bb_coord->ymax;
2432  }
2433  }
2434 
2435  else { /* Move down, old postion was not at ymax. */
2436  bb_coord_new->ymax = curr_bb_coord->ymax;
2437  bb_edge_new->ymax = curr_bb_edge->ymax;
2438  }
2439 
2440  /* Now do the ymin fields for coordinates and number of edges. */
2441 
2442  if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */
2443  bb_coord_new->ymin = ynew;
2444  bb_edge_new->ymin = 1;
2445  }
2446 
2447  else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */
2448  bb_coord_new->ymin = ynew;
2449  bb_edge_new->ymin = curr_bb_edge->ymin + 1;
2450  }
2451 
2452  else { /* ymin unchanged. */
2453  bb_coord_new->ymin = curr_bb_coord->ymin;
2454  bb_edge_new->ymin = curr_bb_edge->ymin;
2455  }
2456  }
2457  /* End of move down case. */
2458  else if (ynew > yold) { /* Moved up. */
2459 
2460  /* Update the ymin fields for coordinates and number of edges first. */
2461 
2462  if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */
2463  if (curr_bb_edge->ymin == 1) {
2464  get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);
2466  return;
2467  } else {
2468  bb_edge_new->ymin = curr_bb_edge->ymin - 1;
2469  bb_coord_new->ymin = curr_bb_coord->ymin;
2470  }
2471  }
2472 
2473  else { /* Moved up, old position was not at ymin. */
2474  bb_coord_new->ymin = curr_bb_coord->ymin;
2475  bb_edge_new->ymin = curr_bb_edge->ymin;
2476  }
2477 
2478  /* Now do the ymax fields for coordinates and number of edges. */
2479 
2480  if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */
2481  bb_coord_new->ymax = ynew;
2482  bb_edge_new->ymax = 1;
2483  }
2484 
2485  else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */
2486  bb_coord_new->ymax = ynew;
2487  bb_edge_new->ymax = curr_bb_edge->ymax + 1;
2488  }
2489 
2490  else { /* ymax unchanged. */
2491  bb_coord_new->ymax = curr_bb_coord->ymax;
2492  bb_edge_new->ymax = curr_bb_edge->ymax;
2493  }
2494  }
2495  /* End of move up case. */
2496  else { /* ynew == yold -- no y motion. */
2497  bb_coord_new->ymin = curr_bb_coord->ymin;
2498  bb_coord_new->ymax = curr_bb_coord->ymax;
2499  bb_edge_new->ymin = curr_bb_edge->ymin;
2500  bb_edge_new->ymax = curr_bb_edge->ymax;
2501  }
2502 
2503  if (bb_updated_before[inet] == NOT_UPDATED_YET)
2505 }
2506 
2507 static void alloc_legal_placements() {
2508  int i, j, k;
2509 
2510  legal_pos = (t_legal_pos **) my_malloc(num_types * sizeof(t_legal_pos *));
2511  num_legal_pos = (int *) my_calloc(num_types, sizeof(int));
2512 
2513  /* Initialize all occupancy to zero. */
2514 
2515  for (i = 0; i <= nx + 1; i++) {
2516  for (j = 0; j <= ny + 1; j++) {
2517  grid[i][j].usage = 0;
2518  for (k = 0; k < grid[i][j].type->capacity; k++) {
2519  grid[i][j].blocks[k] = EMPTY;
2520  if (grid[i][j].offset == 0) {
2521  num_legal_pos[grid[i][j].type->index]++;
2522  }
2523  }
2524  }
2525  }
2526 
2527  for (i = 0; i < num_types; i++) {
2528  legal_pos[i] = (t_legal_pos *) my_malloc(num_legal_pos[i] * sizeof(t_legal_pos));
2529  }
2530 }
2531 
2532 static void load_legal_placements() {
2533  int i, j, k, itype;
2534  int *index;
2535 
2536  index = (int *) my_calloc(num_types, sizeof(int));
2537 
2538  for (i = 0; i <= nx + 1; i++) {
2539  for (j = 0; j <= ny + 1; j++) {
2540  for (k = 0; k < grid[i][j].type->capacity; k++) {
2541  if (grid[i][j].offset == 0) {
2542  itype = grid[i][j].type->index;
2543  legal_pos[itype][index[itype]].x = i;
2544  legal_pos[itype][index[itype]].y = j;
2545  legal_pos[itype][index[itype]].z = k;
2546  index[itype]++;
2547  }
2548  }
2549  }
2550  }
2551  free(index);
2552 }
2553 
2554 static void free_legal_placements() {
2555  int i;
2556  for (i = 0; i < num_types; i++) {
2557  free(legal_pos[i]);
2558  }
2559  free(legal_pos); /* Free the mapping list */
2560  free(num_legal_pos);
2561 }
2562 
2563 
2564 
2565 static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z) {
2566 
2567  int imember;
2568  int member_x, member_y, member_z;
2569 
2570  // Every macro can be placed until proven otherwise
2571  int macro_can_be_placed = TRUE;
2572 
2573  // Check whether all the members can be placed
2574  for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {
2575  member_x = x + pl_macros[imacro].members[imember].x_offset;
2576  member_y = y + pl_macros[imacro].members[imember].y_offset;
2577  member_z = z + pl_macros[imacro].members[imember].z_offset;
2578 
2579  // Check whether the location could accept block of this type
2580  // Then check whether the location could still accomodate more blocks
2581  // Also check whether the member position is valid, that is the member's location
2582  // still within the chip's dimemsion and the member_z is allowed at that location on the grid
2583  if (member_x <= nx+1 && member_y <= ny+1
2584  && grid[member_x][member_y].type->index == itype
2585  && grid[member_x][member_y].blocks[member_z] == OPEN) {
2586  // Can still accomodate blocks here, check the next position
2587  continue;
2588  } else {
2589  // Cant be placed here - skip to the next try
2590  macro_can_be_placed = FALSE;
2591  break;
2592  }
2593  }
2594 
2595  return (macro_can_be_placed);
2596 }
2597 
2598 
2599 static int try_place_macro(int itype, int ichoice, int imacro, int * free_locations){
2600 
2601  int x, y, z, member_x, member_y, member_z, imember;
2602 
2603  int macro_placed = FALSE;
2604 
2605  // Choose a random position for the head
2606  x = legal_pos[itype][ichoice].x;
2607  y = legal_pos[itype][ichoice].y;
2608  z = legal_pos[itype][ichoice].z;
2609 
2610  // If that location is occupied, do nothing.
2611  if (grid[x][y].blocks[z] != OPEN) {
2612  return (macro_placed);
2613  }
2614 
2615  int macro_can_be_placed = check_macro_can_be_placed(imacro, itype, x, y, z);
2616 
2617  if (macro_can_be_placed == TRUE) {
2618 
2619  // Place down the macro
2620  macro_placed = TRUE;
2621  for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {
2622 
2623  member_x = x + pl_macros[imacro].members[imember].x_offset;
2624  member_y = y + pl_macros[imacro].members[imember].y_offset;
2625  member_z = z + pl_macros[imacro].members[imember].z_offset;
2626 
2627  block[pl_macros[imacro].members[imember].blk_index].x = member_x;
2628  block[pl_macros[imacro].members[imember].blk_index].y = member_y;
2629  block[pl_macros[imacro].members[imember].blk_index].z = member_z;
2630 
2631  grid[member_x][member_y].blocks[member_z] = pl_macros[imacro].members[imember].blk_index;
2632  grid[member_x][member_y].usage++;
2633 
2634  // Could not ensure that the randomiser would not pick this location again
2635  // So, would have to do a lazy removal - whenever I come across a block that could not be placed,
2636  // go ahead and remove it from the legal_pos[][] array
2637 
2638  } // Finish placing all the members in the macro
2639 
2640  } // End of this choice of legal_pos
2641 
2642  return (macro_placed);
2643 
2644 }
2645 
2646 
2647 static void initial_placement_pl_macros(int macros_max_num_tries, int * free_locations) {
2648 
2649  int macro_placed;
2650  int imacro, iblk, itype, itry, ichoice;
2651 
2652  /* Macros are harder to place. Do them first */
2653  for (imacro = 0; imacro < num_pl_macros; imacro++) {
2654 
2655  // Every macro are not placed in the beginnning
2656  macro_placed = FALSE;
2657 
2658  // Assume that all the blocks in the macro are of the same type
2659  iblk = pl_macros[imacro].members[0].blk_index;
2660  itype = block[iblk].type->index;
2661  if (free_locations[itype] < pl_macros[imacro].num_blocks) {
2662  vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");
2663  vpr_printf (TIO_MESSAGE_ERROR, "Could not place macro length %d with head block %s (#%d); not enough free locations of type %s (#%d).\n",
2664  pl_macros[imacro].num_blocks, block[iblk].name, iblk, type_descriptors[itype].name, itype);
2665  vpr_printf (TIO_MESSAGE_INFO, "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n");
2666  exit(1);
2667  }
2668 
2669  // Try to place the macro first, if can be placed - place them, otherwise try again
2670  for (itry = 0; itry < macros_max_num_tries && macro_placed == FALSE; itry++) {
2671 
2672  // Choose a random position for the head
2673  ichoice = my_irand(free_locations[itype] - 1);
2674 
2675  // Try to place the macro
2676  macro_placed = try_place_macro(itype, ichoice, imacro, free_locations);
2677 
2678  } // Finished all tries
2679 
2680 
2681  if (macro_placed == FALSE){
2682  // if a macro still could not be placed after macros_max_num_tries times,
2683  // go through the chip exhaustively to find a legal placement for the macro
2684  // place the macro on the first location that is legal
2685  // then set macro_placed = TRUE;
2686  // if there are no legal positions, error out
2687 
2688  // Exhaustive placement of carry macros
2689  for (ichoice = 0; ichoice < free_locations[itype] && macro_placed == FALSE; ichoice++) {
2690 
2691  // Try to place the macro
2692  macro_placed = try_place_macro(itype, ichoice, imacro, free_locations);
2693 
2694  } // Exhausted all the legal placement position for this macro
2695 
2696  // If macro could not be placed after exhaustive placement, error out
2697  if (macro_placed == FALSE) {
2698  // Error out
2699  vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");
2700  vpr_printf (TIO_MESSAGE_ERROR, "Could not place macro length %d with head block %s (#%d); not enough free locations of type %s (#%d).\n",
2701  pl_macros[imacro].num_blocks, block[iblk].name, iblk, type_descriptors[itype].name, itype);
2702  vpr_printf (TIO_MESSAGE_INFO, "Please manually size the FPGA because VPR can't do this yet.\n");
2703  exit(1);
2704  }
2705 
2706  } else {
2707  // This macro has been placed successfully, proceed to place the next macro
2708  continue;
2709  }
2710  } // Finish placing all the pl_macros successfully
2711 }
2712 
2713 static void initial_placement_blocks(int * free_locations, enum e_pad_loc_type pad_loc_type) {
2714 
2715  /* Place blocks that are NOT a part of any macro.
2716  * We'll randomly place each block in the clustered netlist, one by one.
2717  */
2718 
2719  int iblk, itype;
2720  int ichoice, x, y, z;
2721 
2722  for (iblk = 0; iblk < num_blocks; iblk++) {
2723 
2724  if (block[iblk].x != -1) {
2725  // block placed.
2726  continue;
2727  }
2728  /* Don't do IOs if the user specifies IOs; we'll read those locations later. */
2729  if (!(block[iblk].type == IO_TYPE && pad_loc_type == USER)) {
2730 
2731  /* Randomly select a free location of the appropriate type
2732  * for iblk. We have a linearized list of all the free locations
2733  * that can accomodate a block of that type in free_locations[itype].
2734  * Choose one randomly and put iblk there. Then we don't want to pick that
2735  * location again, so remove it from the free_locations array.
2736  */
2737  itype = block[iblk].type->index;
2738  if (free_locations[itype] <= 0) {
2739  vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");
2740  vpr_printf (TIO_MESSAGE_ERROR, "Could not place block %s (#%d); no free locations of type %s (#%d).\n",
2741  block[iblk].name, iblk, type_descriptors[itype].name, itype);
2742  exit(1);
2743  }
2744 
2745  ichoice = my_irand(free_locations[itype] - 1);
2746  x = legal_pos[itype][ichoice].x;
2747  y = legal_pos[itype][ichoice].y;
2748  z = legal_pos[itype][ichoice].z;
2749 
2750  // Make sure that the position is OPEN before placing the block down
2751  assert (grid[x][y].blocks[z] == OPEN);
2752 
2753  grid[x][y].blocks[z] = iblk;
2754  grid[x][y].usage++;
2755 
2756  block[iblk].x = x;
2757  block[iblk].y = y;
2758  block[iblk].z = z;
2759 
2760  /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the
2761  * legal positions in legal_pos to remove the entry (choice) we just used, but faster to
2762  * just move the last entry in legal_pos to the spot we just used and decrement the
2763  * count of free_locations.
2764  */
2765  legal_pos[itype][ichoice] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */
2766  free_locations[itype]--;
2767 
2768  }
2769  }
2770 }
2771 
2772 static void initial_placement(enum e_pad_loc_type pad_loc_type,
2773  char *pad_loc_file) {
2774 
2775  /* Randomly places the blocks to create an initial placement. We rely on
2776  * the legal_pos array already being loaded. That legal_pos[itype] is an
2777  * array that gives every legal value of (x,y,z) that can accomodate a block.
2778  * The number of such locations is given by num_legal_pos[itype].
2779  */
2780  int i, j, k, iblk, itype, x, y, z, ichoice;
2781  int *free_locations; /* [0..num_types-1].
2782  * Stores how many locations there are for this type that *might* still be free.
2783  * That is, this stores the number of entries in legal_pos[itype] that are worth considering
2784  * as you look for a free location.
2785  */
2786 
2787  free_locations = (int *) my_malloc(num_types * sizeof(int));
2788  for (itype = 0; itype < num_types; itype++) {
2789  free_locations[itype] = num_legal_pos[itype];
2790  }
2791 
2792  /* We'll use the grid to record where everything goes. Initialize to the grid has no
2793  * blocks placed anywhere.
2794  */
2795  for (i = 0; i <= nx + 1; i++) {
2796  for (j = 0; j <= ny + 1; j++) {
2797  grid[i][j].usage = 0;
2798  itype = grid[i][j].type->index;
2799  for (k = 0; k < type_descriptors[itype].capacity; k++) {
2800  grid[i][j].blocks[k] = OPEN;
2801  }
2802  }
2803  }
2804 
2805  /* Similarly, mark all blocks as not being placed yet. */
2806  for (iblk = 0; iblk < num_blocks; iblk++) {
2807  block[iblk].x = -1;
2808  block[iblk].y = -1;
2809  block[iblk].z = -1;
2810  }
2811 
2813 
2814  // All the macros are placed, update the legal_pos[][] array
2815  for (itype = 0; itype < num_types; itype++) {
2816  assert (free_locations[itype] >= 0);
2817  for (ichoice = 0; ichoice < free_locations[itype]; ichoice++) {
2818  x = legal_pos[itype][ichoice].x;
2819  y = legal_pos[itype][ichoice].y;
2820  z = legal_pos[itype][ichoice].z;
2821 
2822  // Check if that location is occupied. If it is, remove from legal_pos
2823  if (grid[x][y].blocks[z] != OPEN) {
2824  legal_pos[itype][ichoice] = legal_pos[itype][free_locations[itype] - 1];
2825  free_locations[itype]--;
2826 
2827  // After the move, I need to check this particular entry again
2828  ichoice--;
2829  continue;
2830  }
2831  }
2832  } // Finish updating the legal_pos[][] and free_locations[] array
2833 
2834  initial_placement_blocks(free_locations, pad_loc_type);
2835 
2836  if (pad_loc_type == USER) {
2837  read_user_pad_loc(pad_loc_file);
2838  }
2839 
2840  /* Restore legal_pos */
2842 
2843 #ifdef VERBOSE
2844  vpr_printf(TIO_MESSAGE_INFO, "At end of initial_placement.\n");
2846  print_clb_placement(getEchoFileName(E_ECHO_INITIAL_CLB_PLACEMENT));
2847  }
2848 #endif
2849  free(free_locations);
2850 }
2851 
2852 static void free_fast_cost_update(void) {
2853  int i;
2854 
2855  for (i = 0; i <= ny; i++)
2856  free(chanx_place_cost_fac[i]);
2857  free(chanx_place_cost_fac);
2858  chanx_place_cost_fac = NULL;
2859 
2860  for (i = 0; i <= nx; i++)
2861  free(chany_place_cost_fac[i]);
2862  free(chany_place_cost_fac);
2863  chany_place_cost_fac = NULL;
2864 }
2865 
2866 static void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
2867 
2868  /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
2869  * arrays with the inverse of the average number of tracks per channel *
2870  * between [subhigh] and [sublow]. This is only useful for the cost *
2871  * function that takes the length of the net bounding box in each *
2872  * dimension divided by the average number of tracks in that direction. *
2873  * For other cost functions, you don't have to bother calling this *
2874  * routine; when using the cost function described above, however, you *
2875  * must always call this routine after you call init_chan and before *
2876  * you do any placement cost determination. The place_cost_exp factor *
2877  * specifies to what power the width of the channel should be taken -- *
2878  * larger numbers make narrower channels more expensive. */
2879 
2880  int low, high, i;
2881 
2882  /* Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since *
2883  * subhigh must be greater than or equal to sublow, we only need to *
2884  * allocate storage for the lower half of a matrix. */
2885 
2886  chanx_place_cost_fac = (float **) my_malloc((ny + 1) * sizeof(float *));
2887  for (i = 0; i <= ny; i++)
2888  chanx_place_cost_fac[i] = (float *) my_malloc((i + 1) * sizeof(float));
2889 
2890  chany_place_cost_fac = (float **) my_malloc((nx + 1) * sizeof(float *));
2891  for (i = 0; i <= nx; i++)
2892  chany_place_cost_fac[i] = (float *) my_malloc((i + 1) * sizeof(float));
2893 
2894  /* First compute the number of tracks between channel high and channel *
2895  * low, inclusive, in an efficient manner. */
2896 
2898 
2899  for (high = 1; high <= ny; high++) {
2900  chanx_place_cost_fac[high][high] = chan_width_x[high];
2901  for (low = 0; low < high; low++) {
2902  chanx_place_cost_fac[high][low] =
2903  chanx_place_cost_fac[high - 1][low] + chan_width_x[high];
2904  }
2905  }
2906 
2907  /* Now compute the inverse of the average number of tracks per channel *
2908  * between high and low. The cost function divides by the average *
2909  * number of tracks per channel, so by storing the inverse I convert *
2910  * this to a faster multiplication. Take this final number to the *
2911  * place_cost_exp power -- numbers other than one mean this is no *
2912  * longer a simple "average number of tracks"; it is some power of *
2913  * that, allowing greater penalization of narrow channels. */
2914 
2915  for (high = 0; high <= ny; high++)
2916  for (low = 0; low <= high; low++) {
2917  chanx_place_cost_fac[high][low] = (high - low + 1.)
2918  / chanx_place_cost_fac[high][low];
2919  chanx_place_cost_fac[high][low] = pow(
2920  (double) chanx_place_cost_fac[high][low],
2921  (double) place_cost_exp);
2922  }
2923 
2924  /* Now do the same thing for the y-directed channels. First get the *
2925  * number of tracks between channel high and channel low, inclusive. */
2926 
2928 
2929  for (high = 1; high <= nx; high++) {
2930  chany_place_cost_fac[high][high] = chan_width_y[high];
2931  for (low = 0; low < high; low++) {
2932  chany_place_cost_fac[high][low] =
2933  chany_place_cost_fac[high - 1][low] + chan_width_y[high];
2934  }
2935  }
2936 
2937  /* Now compute the inverse of the average number of tracks per channel *
2938  * between high and low. Take to specified power. */
2939 
2940  for (high = 0; high <= nx; high++)
2941  for (low = 0; low <= high; low++) {
2942  chany_place_cost_fac[high][low] = (high - low + 1.)
2943  / chany_place_cost_fac[high][low];
2944  chany_place_cost_fac[high][low] = pow(
2945  (double) chany_place_cost_fac[high][low],
2946  (double) place_cost_exp);
2947  }
2948 }
2949 
2950 static void check_place(float bb_cost, float timing_cost,
2951  enum e_place_algorithm place_algorithm,
2952  float delay_cost) {
2953 
2954  /* Checks that the placement has not confused our data structures. *
2955  * i.e. the clb and block structures agree about the locations of *
2956  * every block, blocks are in legal spots, etc. Also recomputes *
2957  * the final placement cost from scratch and makes sure it is *
2958  * within roundoff of what we think the cost is. */
2959 
2960  static int *bdone;
2961  int i, j, k, error = 0, bnum;
2962  float bb_cost_check;
2963  int usage_check;
2964  float timing_cost_check, delay_cost_check;
2965  int imacro, imember, head_iblk, member_iblk, member_x, member_y, member_z;
2966 
2967  bb_cost_check = comp_bb_cost(CHECK);
2968  vpr_printf(TIO_MESSAGE_INFO, "bb_cost recomputed from scratch: %g\n", bb_cost_check);
2969  if (fabs(bb_cost_check - bb_cost) > bb_cost * ERROR_TOL) {
2970  vpr_printf(TIO_MESSAGE_ERROR, "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", bb_cost_check, bb_cost);
2971  error++;
2972  }
2973 
2974  if (place_algorithm == NET_TIMING_DRIVEN_PLACE
2975  || place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
2976  comp_td_costs(&timing_cost_check, &delay_cost_check);
2977  vpr_printf(TIO_MESSAGE_INFO, "timing_cost recomputed from scratch: %g\n", timing_cost_check);
2978  if (fabs(timing_cost_check - timing_cost) > timing_cost * ERROR_TOL) {
2979  vpr_printf(TIO_MESSAGE_ERROR, "timing_cost_check: %g and timing_cost: %g differ in check_place.\n",
2980  timing_cost_check, timing_cost);
2981  error++;
2982  }
2983  vpr_printf(TIO_MESSAGE_INFO, "delay_cost recomputed from scratch: %g\n", delay_cost_check);
2984  if (fabs(delay_cost_check - delay_cost) > delay_cost * ERROR_TOL) {
2985  vpr_printf(TIO_MESSAGE_ERROR, "delay_cost_check: %g and delay_cost: %g differ in check_place.\n",
2986  delay_cost_check, delay_cost);
2987  error++;
2988  }
2989  }
2990 
2991  bdone = (int *) my_malloc(num_blocks * sizeof(int));
2992  for (i = 0; i < num_blocks; i++)
2993  bdone[i] = 0;
2994 
2995  /* Step through grid array. Check it against block array. */
2996  for (i = 0; i <= (nx + 1); i++)
2997  for (j = 0; j <= (ny + 1); j++) {
2998  if (grid[i][j].usage > grid[i][j].type->capacity) {
2999  vpr_printf(TIO_MESSAGE_ERROR, "Block at grid location (%d,%d) overused. Usage is %d.\n",
3000  i, j, grid[i][j].usage);
3001  error++;
3002  }
3003  usage_check = 0;
3004  for (k = 0; k < grid[i][j].type->capacity; k++) {
3005  bnum = grid[i][j].blocks[k];
3006  if (EMPTY == bnum)
3007  continue;
3008 
3009  if (block[bnum].type != grid[i][j].type) {
3010  vpr_printf(TIO_MESSAGE_ERROR, "Block %d type does not match grid location (%d,%d) type.\n",
3011  bnum, i, j);
3012  error++;
3013  }
3014  if ((block[bnum].x != i) || (block[bnum].y != j)) {
3015  vpr_printf(TIO_MESSAGE_ERROR, "Block %d location conflicts with grid(%d,%d) data.\n",
3016  bnum, i, j);
3017  error++;
3018  }
3019  ++usage_check;
3020  bdone[bnum]++;
3021  }
3022  if (usage_check != grid[i][j].usage) {
3023  vpr_printf(TIO_MESSAGE_ERROR, "Location (%d,%d) usage is %d, but has actual usage %d.\n",
3024  i, j, grid[i][j].usage, usage_check);
3025  error++;
3026  }
3027  }
3028 
3029  /* Check that every block exists in the grid and block arrays somewhere. */
3030  for (i = 0; i < num_blocks; i++)
3031  if (bdone[i] != 1) {
3032  vpr_printf(TIO_MESSAGE_ERROR, "Block %d listed %d times in data structures.\n",
3033  i, bdone[i]);
3034  error++;
3035  }
3036  free(bdone);
3037 
3038  /* Check the pl_macro placement are legal - blocks are in the proper relative position. */
3039  for (imacro = 0; imacro < num_pl_macros; imacro++) {
3040 
3041  head_iblk = pl_macros[imacro].members[0].blk_index;
3042 
3043  for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {
3044 
3045  member_iblk = pl_macros[imacro].members[imember].blk_index;
3046 
3047  // Compute the suppossed member's x,y,z location
3048  member_x = block[head_iblk].x + pl_macros[imacro].members[imember].x_offset;
3049  member_y = block[head_iblk].y + pl_macros[imacro].members[imember].y_offset;
3050  member_z = block[head_iblk].z + pl_macros[imacro].members[imember].z_offset;
3051 
3052  // Check the block data structure first
3053  if (block[member_iblk].x != member_x
3054  || block[member_iblk].y != member_y
3055  || block[member_iblk].z != member_z) {
3056  vpr_printf(TIO_MESSAGE_ERROR, "Block %d in pl_macro #%d is not placed in the proper orientation.\n",
3057  member_iblk, imacro);
3058  error++;
3059  }
3060 
3061  // Then check the grid data structure
3062  if (grid[member_x][member_y].blocks[member_z] != member_iblk) {
3063  vpr_printf(TIO_MESSAGE_ERROR, "Block %d in pl_macro #%d is not placed in the proper orientation.\n",
3064  member_iblk, imacro);
3065  error++;
3066  }
3067  } // Finish going through all the members
3068  } // Finish going through all the macros
3069 
3070  if (error == 0) {
3071  vpr_printf(TIO_MESSAGE_INFO, "\n");
3072  vpr_printf(TIO_MESSAGE_INFO, "Completed placement consistency check successfully.\n");
3073  vpr_printf(TIO_MESSAGE_INFO, "\n");
3074  vpr_printf(TIO_MESSAGE_INFO, "Swaps called: %d\n", num_ts_called);
3075 
3076 #ifdef PRINT_REL_POS_DISTR
3077  print_relative_pos_distr(void);
3078 #endif
3079  } else {
3080  vpr_printf(TIO_MESSAGE_INFO, "\n");
3081  vpr_printf(TIO_MESSAGE_ERROR, "Completed placement consistency check, %d errors found.\n", error);
3082  vpr_printf(TIO_MESSAGE_INFO, "Aborting program.\n");
3083  exit(1);
3084  }
3085 
3086 }
3087 
3088 #ifdef VERBOSE
3089 static void print_clb_placement(const char *fname) {
3090 
3091  /* Prints out the clb placements to a file. */
3092 
3093  FILE *fp;
3094  int i;
3095 
3096  fp = my_fopen(fname, "w", 0);
3097  fprintf(fp, "Complex block placements:\n\n");
3098 
3099  fprintf(fp, "Block #\tName\t(X, Y, Z).\n");
3100  for(i = 0; i < num_blocks; i++) {
3101  fprintf(fp, "#%d\t%s\t(%d, %d, %d).\n", i, block[i].name, block[i].x, block[i].y, block[i].z);
3102  }
3103 
3104  fclose(fp);
3105 }
3106 #endif
3107 
3108 static void free_try_swap_arrays(void) {
3109  if(ts_bb_coord_new != NULL) {
3110  free(ts_bb_coord_new);
3111  free(ts_bb_edge_new);
3112  free(ts_nets_to_update);
3113  free(blocks_affected.moved_blocks);
3114  free(bb_updated_before);
3115 
3116  ts_bb_coord_new = NULL;
3117  ts_bb_edge_new = NULL;
3118  ts_nets_to_update = NULL;
3119  blocks_affected.moved_blocks = NULL;
3120  blocks_affected.num_moved_blocks = 0;
3121  bb_updated_before = NULL;
3122  }
3123 }
3124 
t_type_ptr type
Definition: vpr_types.h:522
int * node_block_pin
Definition: vpr_types.h:509
static float comp_bb_cost(enum cost_methods method)
Definition: place.c:1866
#define MAX_MOVES_BEFORE_RECOMPUTE
Definition: place.c:36
static void load_legal_placements()
Definition: place.c:2532
static void free_try_swap_arrays(void)
Definition: place.c:3108
static float ** chany_place_cost_fac
Definition: place.c:171
static int find_affected_nets(int *nets_to_update)
Definition: place.c:1482
void update_screen(int priority, char *msg, enum pic_type pic_on_screen_val, boolean crit_path_button_enabled)
Definition: draw.c:156
FILE * my_fopen(const char *fname, const char *flag, int prompt)
Definition: util.c:54
static int num_swap_aborted
Definition: place.c:189
static struct s_bb * ts_bb_edge_new
Definition: place.c:176
static void check_place(float bb_cost, float timing_cost, enum e_place_algorithm place_algorithm, float delay_cost)
Definition: place.c:2950
int swapped_to_empty
Definition: place.c:87
void free_port_pin_from_blk_pin(void)
Definition: vpr_utils.c:736
void get_imacro_from_iblk(int *imacro, int iblk, t_pl_macro *macros, int num_macros)
Definition: place_macro.c:362
struct s_pl_blocks_to_be_moved t_pl_blocks_to_be_moved
boolean enable_timing_computations
Definition: vpr_types.h:646
static void comp_delta_td_cost(float *delta_timing, float *delta_delay)
Definition: place.c:1754
static void get_bb_from_scratch(int inet, struct s_bb *coords, struct s_bb *num_on_edges)
Definition: place.c:2081
void free_matrix(void *vptr, int nrmin, int nrmax, int ncmin, size_t elsize)
Definition: util.c:573
int xmax
Definition: vpr_types.h:533
static void initial_placement(enum e_pad_loc_type pad_loc_type, char *pad_loc_file)
Definition: place.c:2772
static t_legal_pos ** legal_pos
Definition: place.c:116
char * pad_loc_file
Definition: vpr_types.h:643
int ymin
Definition: vpr_types.h:534
#define MAJOR
Definition: vpr_types.h:73
float get_critical_path_delay(void)
Definition: path_delay.c:3060
enum e_pad_loc_type pad_loc_type
Definition: vpr_types.h:642
float ** timing_place_crit
Definition: timing_place.c:12
static float ** net_delay
int x
Definition: vpr_types.h:563
int block_num
Definition: place.c:80
static float recompute_bb_cost(void)
Definition: place.c:1630
float ** delta_clb_to_clb
int * chan_width_x
Definition: globals.c:56
#define GOT_FROM_SCRATCH
Definition: place.c:47
void free_lookups_and_criticalities(float ***net_delay, t_slack *slacks)
Definition: timing_place.c:141
static enum swap_result assess_swap(float delta_c, float t)
Definition: place.c:1600
static struct s_bb * ts_bb_coord_new
Definition: place.c:175
void * my_calloc(size_t nelem, size_t size)
Definition: util.c:132
static void alloc_legal_placements()
Definition: place.c:2507
static void initial_placement_blocks(int *free_locations, enum e_pad_loc_type pad_loc_type)
Definition: place.c:2713
static double get_std_dev(int n, double sum_x_squared, double av_x)
Definition: place.c:947
float timing_tradeoff
Definition: vpr_types.h:638
int num_nets
Definition: globals.c:27
int * node_block
Definition: vpr_types.h:507
float ** delta_io_to_clb
static void alloc_and_load_try_swap_structs()
Definition: place.c:2065
int * chan_width_y
Definition: globals.c:57
void print_critical_path(const char *fname)
Definition: path_delay.c:2458
void do_timing_analysis(t_slack *slacks, boolean is_prepacked, boolean do_lut_input_balancing, boolean is_final_analysis)
Definition: path_delay.c:1613
int num_blocks
Definition: place_macro.h:158
e_pad_loc_type
Definition: vpr_types.h:478
static float comp_td_point_to_point_delay(int inet, int ipin)
Definition: place.c:1652
float ** slack
Definition: vpr_types.h:405
#define MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY
Definition: place.c:41
t_type_ptr type
Definition: vpr_types.h:561
#define BUFSIZE
Definition: graphics.c:184
static float starting_t(float *cost_ptr, float *bb_cost_ptr, float *timing_cost_ptr, float **old_region_occ_x, float **old_region_occ_y, struct s_annealing_sched annealing_sched, int max_moves, float rlim, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost_ptr)
Definition: place.c:1045
int num_blocks
Definition: globals.c:30
static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost, float rlim, float **old_region_occ_x, float **old_region_occ_y, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost)
Definition: place.c:1252
#define UNDEFINED
Definition: vpr_types.h:103
boolean getEchoEnabled(void)
Definition: ReadOptions.c:67
#define ERROR_TOL
Definition: place.c:31
t_pl_macro_member * members
Definition: place_macro.h:159
static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to)
Definition: place.c:1520
Definition: util.h:12
static char * bb_updated_before
Definition: place.c:132
float td_place_exp_first
Definition: vpr_types.h:648
static void free_fast_cost_update(void)
Definition: place.c:2852
int y
Definition: vpr_types.h:564
#define min(a, b)
Definition: graphics.c:174
static t_pl_blocks_to_be_moved blocks_affected
Definition: place.c:161
static void comp_td_costs(float *timing_cost, float *connection_delay_sum)
Definition: place.c:1829
static int * ts_nets_to_update
Definition: place.c:177
#define EMPTY
Definition: vpr_types.h:90
static int num_swap_accepted
Definition: place.c:188
static float get_net_cost(int inet, struct s_bb *bb_ptr)
Definition: place.c:2204
static int num_ts_called
Definition: place.c:190
void init_draw_coords(float width_val)
Definition: draw.c:430
static float ** chanx_place_cost_fac
Definition: place.c:171
#define HUGE_POSITIVE_FLOAT
Definition: vpr_types.h:79
static float ** temp_point_to_point_delay_cost
Definition: place.c:143
void load_criticalities(t_slack *slacks, float crit_exponent)
Definition: timing_place.c:81
static int * num_legal_pos
Definition: place.c:117
static void * my_malloc(int ibytes)
Definition: graphics.c:499
boolean * is_global
static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z)
Definition: place.c:2565
float my_frand(void)
Definition: util.c:738
#define max(a, b)
Definition: graphics.c:171
#define MAX_INV_TIMING_COST
Definition: place.c:64
struct s_block * block
Definition: globals.c:31
void free_placement_macros_structs(void)
Definition: place_macro.c:421
static struct s_bb * bb_num_on_edges
Definition: place.c:155
struct s_net * clb_net
Definition: globals.c:28
float td_place_exp_last
Definition: vpr_types.h:650
static void update_td_cost(void)
Definition: place.c:1699
int nx
Definition: globals.c:46
Definition: place.c:61
void init_chan(int cfactor, t_chan_width_dist chan_width_dist)
int ** alloc_and_load_net_pin_index()
Definition: vpr_utils.c:651
float ** delta_io_to_io
#define NOT_UPDATED_YET
Definition: place.c:45
static void free_placement_structs(float **old_region_occ_x, float **old_region_occ_y, struct s_placer_opts placer_opts)
Definition: place.c:1913
static float * net_cost
Definition: place.c:107
struct s_pl_moved_block t_pl_moved_block
float ** delta_clb_to_io
static int num_swap_rejected
Definition: place.c:187
void read_user_pad_loc(char *pad_loc_file)
Definition: read_place.c:134
boolean isEchoFileEnabled(enum e_echo_files echo_option)
Definition: ReadOptions.c:115
static float ** point_to_point_delay_cost
Definition: place.c:142
void print_sink_delays(const char *fname)
Definition: timing_place.c:58
static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to)
Definition: place.c:1192
void free_blk_pin_from_port_pin(void)
Definition: vpr_utils.c:840
static float ** temp_point_to_point_timing_cost
Definition: place.c:138
static int try_place_macro(int itype, int ichoice, int imacro, int *free_locations)
Definition: place.c:2599
static void update_rlim(float *rlim, float success_rat)
Definition: place.c:969
struct s_grid_tile ** grid
Definition: globals.c:59
static void alloc_and_load_placement_structs(float place_cost_exp, float ***old_region_occ_x, float ***old_region_occ_y, struct s_placer_opts placer_opts, t_direct_inf *directs, int num_directs)
Definition: place.c:1975
Definition: place.c:61
static void update_bb(int inet, struct s_bb *bb_coord_new, struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew)
Definition: place.c:2292
float place_cost_exp
Definition: vpr_types.h:640
#define SMALL_NET
Definition: place.c:27
static void free_legal_placements()
Definition: place.c:2554
Definition: place.c:54
int recompute_crit_iter
Definition: vpr_types.h:645
Definition: place.c:54
static int count_connections(void)
Definition: place.c:930
enum e_place_algorithm place_algorithm
Definition: vpr_types.h:637
int * blocks
Definition: vpr_types.h:525
void print_criticality(t_slack *slacks, boolean criticality_is_normalized, const char *fname)
Definition: path_delay.c:559
static float * temp_net_cost
Definition: place.c:107
int my_irand(int imax)
Definition: util.c:710
void load_constant_net_delay(float **net_delay, float delay_value, struct s_net *nets, int n_nets)
Definition: net_delay.c:175
enum sched_type type
Definition: vpr_types.h:625
int num_types
Definition: globals.c:37
t_type_ptr IO_TYPE
Definition: globals.c:40
static int exit_crit(float t, float cost, struct s_annealing_sched annealing_sched)
Definition: place.c:1023
t_slack * alloc_lookups_and_criticalities(t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf *segment_inf, t_timing_inf timing_inf, float ***net_delay, INP t_direct_inf *directs, INP int num_directs)
Definition: timing_place.c:121
static void alloc_and_load_for_fast_cost_update(float place_cost_exp)
Definition: place.c:2866
Definition: slre.c:50
int * nets
Definition: vpr_types.h:562
void print_slack(float **slack, boolean slack_is_normalized, const char *fname)
Definition: path_delay.c:441
static const float cross_count[50]
Definition: place.c:197
static float ** point_to_point_timing_cost
Definition: place.c:137
void print_timing_graph(const char *fname)
Definition: path_delay.c:1388
int z
Definition: vpr_types.h:565
static int num_pl_macros
Definition: place.c:182
int inner_loop_recompute_divider
Definition: vpr_types.h:647
struct s_type_descriptor * type_descriptors
Definition: globals.c:38
char * getEchoFileName(enum e_echo_files echo_option)
Definition: ReadOptions.c:122
t_pl_moved_block * moved_blocks
Definition: place.c:100
cost_methods
Definition: place.c:53
struct s_legal_pos t_legal_pos
Definition: place.c:61
static struct s_bb * bb_coords
Definition: place.c:155
static t_pl_macro * pl_macros
Definition: place.c:181
static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to)
Definition: place.c:1110
int place_chan_width
Definition: vpr_types.h:641
int xmin
Definition: vpr_types.h:532
static int ** net_pin_index
Definition: place.c:149
static void initial_placement_pl_macros(int macros_max_num_tries, int *free_locations)
Definition: place.c:2647
int ny
Definition: globals.c:47
messagelogger vpr_printf
Definition: util.c:17
static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new)
Definition: place.c:2237
static void update_t(float *t, float std_dev, float rlim, float success_rat, struct s_annealing_sched annealing_sched)
Definition: place.c:983
int num_sinks
Definition: vpr_types.h:506
#define MINOR
Definition: vpr_types.h:72
void load_timing_graph_net_delays(float **net_delay)
Definition: path_delay.c:368
e_place_algorithm
Definition: vpr_types.h:632
int alloc_and_load_placement_macros(t_direct_inf *directs, int num_directs, t_pl_macro **macros)
Definition: place_macro.c:281
swap_result
Definition: place.c:60
static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr)
Definition: place.c:2169
void try_place(struct s_placer_opts placer_opts, struct s_annealing_sched annealing_sched, t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf *segment_inf, t_timing_inf timing_inf, t_direct_inf *directs, int num_directs)
Definition: place.c:310
#define UPDATED_ONCE
Definition: place.c:46
Definition: util.h:12
int ymax
Definition: vpr_types.h:535