VPR-7.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
rr_graph_area.c File Reference
#include <math.h>
#include "util.h"
#include "vpr_types.h"
#include <assert.h>
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph_area.h"
+ Include dependency graph for rr_graph_area.c:

Go to the source code of this file.

Functions

static void count_bidir_routing_transistors (int num_switch, float R_minW_nmos, float R_minW_pmos)
 
static void count_unidir_routing_transistors (t_segment_inf *segment_inf, float R_minW_nmos, float R_minW_pmos)
 
static float get_cblock_trans (int *num_inputs_to_cblock, int max_inputs_to_cblock, float trans_cblock_to_lblock_buf, float trans_sram_bit)
 
static float * alloc_and_load_unsharable_switch_trans (int num_switch, float trans_sram_bit, float R_minW_nmos)
 
static float * alloc_and_load_sharable_switch_trans (int num_switch, float trans_sram_bit, float R_minW_nmos, float R_minW_pmos)
 
static float trans_per_buf (float Rbuf, float R_minW_nmos, float R_minW_pmos)
 
static float trans_per_mux (int num_inputs, float trans_sram_bit, float pass_trans_area)
 
static float trans_per_R (float Rtrans, float R_minW_trans)
 
void count_routing_transistors (enum e_directionality directionality, int num_switch, t_segment_inf *segment_inf, float R_minW_nmos, float R_minW_pmos)
 

Function Documentation

static float * alloc_and_load_sharable_switch_trans ( int  num_switch,
float  trans_sram_bit,
float  R_minW_nmos,
float  R_minW_pmos 
)
static

Definition at line 500 of file rr_graph_area.c.

501  {
502 
503  /* Loads up an array that says how many transistor are needed to implement *
504  * the sharable portion of each switch type. The SRAM bit of a switch and *
505  * the pass transistor (forming either the entire switch or the output part *
506  * of a tri-state buffer) are both unsharable. Only the buffer part of a *
507  * buffer switch is sharable. */
508 
509  float *sharable_switch_trans, Rbuf;
510  int i;
511 
512  sharable_switch_trans = (float *) my_malloc(num_switch * sizeof(float));
513 
514  for (i = 0; i < num_switch; i++) {
515 
516  if (switch_inf[i].buffered == FALSE) {
517  sharable_switch_trans[i] = 0.;
518  } else { /* Buffer. Set Rbuf = Rpass = 1/2 Rtotal. */
519  Rbuf = switch_inf[i].R / 2.;
520  sharable_switch_trans[i] = trans_per_buf(Rbuf, R_minW_nmos,
521  R_minW_pmos);
522  }
523  }
524 
525  return (sharable_switch_trans);
526 }
static float trans_per_buf(float Rbuf, float R_minW_nmos, float R_minW_pmos)
Definition: util.h:12
static void * my_malloc(int ibytes)
Definition: graphics.c:499
struct s_switch_inf * switch_inf
Definition: globals.c:83

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static float * alloc_and_load_unsharable_switch_trans ( int  num_switch,
float  trans_sram_bit,
float  R_minW_nmos 
)
static

Definition at line 471 of file rr_graph_area.c.

472  {
473 
474  /* Loads up an array that says how many transistors are needed to implement *
475  * the unsharable portion of each switch type. The SRAM bit of a switch and *
476  * the pass transistor (forming either the entire switch or the output part *
477  * of a tri-state buffer) are both unsharable. */
478 
479  float *unsharable_switch_trans, Rpass;
480  int i;
481 
482  unsharable_switch_trans = (float *) my_malloc(num_switch * sizeof(float));
483 
484  for (i = 0; i < num_switch; i++) {
485 
486  if (switch_inf[i].buffered == FALSE) {
487  Rpass = switch_inf[i].R;
488  } else { /* Buffer. Set Rpass = Rbuf = 1/2 Rtotal. */
489  Rpass = switch_inf[i].R / 2.;
490  }
491 
492  unsharable_switch_trans[i] = trans_per_R(Rpass, R_minW_nmos)
493  + trans_sram_bit;
494  }
495 
496  return (unsharable_switch_trans);
497 }
Definition: util.h:12
static void * my_malloc(int ibytes)
Definition: graphics.c:499
struct s_switch_inf * switch_inf
Definition: globals.c:83
static float trans_per_R(float Rtrans, float R_minW_trans)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void count_bidir_routing_transistors ( int  num_switch,
float  R_minW_nmos,
float  R_minW_pmos 
)
static

Definition at line 59 of file rr_graph_area.c.

60  {
61 
62  /* Tri-state buffers are designed as a buffer followed by a pass transistor. *
63  * I make Rbuffer = Rpass_transitor = 1/2 Rtri-state_buffer. *
64  * I make the pull-up and pull-down sides of the buffer the same strength -- *
65  * i.e. I make the p transistor R_minW_pmos / R_minW_nmos wider than the n *
66  * transistor. *
67  * *
68  * I generate two area numbers in this routine: ntrans_sharing and *
69  * ntrans_no_sharing. ntrans_sharing exactly reflects what the timing *
70  * analyzer, etc. works with -- each switch is a completely self contained *
71  * pass transistor or tri-state buffer. In the case of tri-state buffers *
72  * this is rather pessimisitic. The inverter chain part of the buffer (as *
73  * opposed to the pass transistor + SRAM output part) can be shared by *
74  * several switches in the same location. Obviously all the switches from *
75  * an OPIN can share one buffer. Also, CHANX and CHANY switches at the same *
76  * spot (i,j) on a single segment can share a buffer. For a more realistic *
77  * area number I assume all buffered switches from a node that are at the *
78  * *same (i,j) location* can share one buffer. Only the lowest resistance *
79  * (largest) buffer is implemented. In practice, you might want to build *
80  * something that is 1.5x or 2x the largest buffer, so this may be a bit *
81  * optimistic (but I still think it's pretty reasonable). */
82 
83  int *num_inputs_to_cblock; /* [0..num_rr_nodes-1], but all entries not */
84 
85  /* corresponding to IPINs will be 0. */
86 
87  boolean * cblock_counted; /* [0..max(nx,ny)] -- 0th element unused. */
88  float *shared_buffer_trans; /* [0..max_nx,ny)] */
89  float *unsharable_switch_trans, *sharable_switch_trans; /* [0..num_switch-1] */
90 
91  t_rr_type from_rr_type, to_rr_type;
92  int from_node, to_node, iedge, num_edges, maxlen;
93  int iswitch, i, j, iseg, max_inputs_to_cblock;
94  float input_cblock_trans, shared_opin_buffer_trans;
95  const float trans_sram_bit = 6.;
96 
97  /* Two variables below are the accumulator variables that add up all the *
98  * transistors in the routing. Make doubles so that they don't stop *
99  * incrementing once adding a switch makes a change of less than 1 part in *
100  * 10^7 to the total. If this still isn't good enough (adding 1 part in *
101  * 10^15 will still be thrown away), compute the transistor count in *
102  * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing *
103  * the partial sums together. */
104 
105  double ntrans_sharing, ntrans_no_sharing;
106 
107  /* Buffers from the routing to the ipin cblock inputs, and from the ipin *
108  * cblock outputs to the logic block, respectively. Assume minimum size n *
109  * transistors, and ptransistors sized to make the pull-up R = pull-down R. */
110 
111  float trans_track_to_cblock_buf;
112  float trans_cblock_to_lblock_buf;
113 
114  ntrans_sharing = 0.;
115  ntrans_no_sharing = 0.;
116  max_inputs_to_cblock = 0;
117 
118  /* Assume the two buffers below are 4x minimum drive strength (enough to *
119  * drive a fanout of up to 16 pretty nicely -- should cover a reasonable *
120  * wiring C plus the fanout. */
121 
122  trans_track_to_cblock_buf = trans_per_buf(R_minW_nmos / 4., R_minW_nmos,
123  R_minW_pmos);
124 
125  trans_cblock_to_lblock_buf = trans_per_buf(R_minW_nmos / 4., R_minW_nmos,
126  R_minW_pmos);
127 
128  num_inputs_to_cblock = (int *) my_calloc(num_rr_nodes, sizeof(int));
129 
130  maxlen = std::max(nx, ny) + 1;
131  cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean));
132  shared_buffer_trans = (float *) my_calloc(maxlen, sizeof(float));
133 
134  unsharable_switch_trans = alloc_and_load_unsharable_switch_trans(num_switch,
135  trans_sram_bit, R_minW_nmos);
136 
137  sharable_switch_trans = alloc_and_load_sharable_switch_trans(num_switch,
138  trans_sram_bit, R_minW_nmos, R_minW_pmos);
139 
140  for (from_node = 0; from_node < num_rr_nodes; from_node++) {
141 
142  from_rr_type = rr_node[from_node].type;
143 
144  switch (from_rr_type) {
145 
146  case CHANX:
147  case CHANY:
148  num_edges = rr_node[from_node].num_edges;
149 
150  for (iedge = 0; iedge < num_edges; iedge++) {
151 
152  to_node = rr_node[from_node].edges[iedge];
153  to_rr_type = rr_node[to_node].type;
154 
155  switch (to_rr_type) {
156 
157  case CHANX:
158  case CHANY:
159  iswitch = rr_node[from_node].switches[iedge];
160 
161  if (switch_inf[iswitch].buffered) {
162  iseg = seg_index_of_sblock(from_node, to_node);
163  shared_buffer_trans[iseg] = std::max(
164  shared_buffer_trans[iseg],
165  sharable_switch_trans[iswitch]);
166 
167  ntrans_no_sharing += unsharable_switch_trans[iswitch]
168  + sharable_switch_trans[iswitch];
169  ntrans_sharing += unsharable_switch_trans[iswitch];
170  } else if (from_node < to_node) {
171 
172  /* Pass transistor shared by two edges -- only count once. *
173  * Also, no part of a pass transistor is sharable. */
174 
175  ntrans_no_sharing += unsharable_switch_trans[iswitch];
176  ntrans_sharing += unsharable_switch_trans[iswitch];
177  }
178  break;
179 
180  case IPIN:
181  num_inputs_to_cblock[to_node]++;
182  max_inputs_to_cblock = std::max(max_inputs_to_cblock,
183  num_inputs_to_cblock[to_node]);
184 
185  iseg = seg_index_of_cblock(from_rr_type, to_node);
186 
187  if (cblock_counted[iseg] == FALSE) {
188  cblock_counted[iseg] = TRUE;
189  ntrans_sharing += trans_track_to_cblock_buf;
190  ntrans_no_sharing += trans_track_to_cblock_buf;
191  }
192  break;
193 
194  default:
195  vpr_printf(TIO_MESSAGE_ERROR, "in count_routing_transistors:\n");
196  vpr_printf(TIO_MESSAGE_ERROR, "\tUnexpected connection from node %d (type %d) to node %d (type %d).\n",
197  from_node, from_rr_type, to_node, to_rr_type);
198  exit(1);
199  break;
200 
201  } /* End switch on to_rr_type. */
202 
203  } /* End for each edge. */
204 
205  /* Now add in the shared buffer transistors, and reset some flags. */
206 
207  if (from_rr_type == CHANX) {
208  for (i = rr_node[from_node].xlow - 1;
209  i <= rr_node[from_node].xhigh; i++) {
210  ntrans_sharing += shared_buffer_trans[i];
211  shared_buffer_trans[i] = 0.;
212  }
213 
214  for (i = rr_node[from_node].xlow; i <= rr_node[from_node].xhigh;
215  i++)
216  cblock_counted[i] = FALSE;
217 
218  } else { /* CHANY */
219  for (j = rr_node[from_node].ylow - 1;
220  j <= rr_node[from_node].yhigh; j++) {
221  ntrans_sharing += shared_buffer_trans[j];
222  shared_buffer_trans[j] = 0.;
223  }
224 
225  for (j = rr_node[from_node].ylow; j <= rr_node[from_node].yhigh;
226  j++)
227  cblock_counted[j] = FALSE;
228 
229  }
230  break;
231 
232  case OPIN:
233  num_edges = rr_node[from_node].num_edges;
234  shared_opin_buffer_trans = 0.;
235 
236  for (iedge = 0; iedge < num_edges; iedge++) {
237  iswitch = rr_node[from_node].switches[iedge];
238  ntrans_no_sharing += unsharable_switch_trans[iswitch]
239  + sharable_switch_trans[iswitch];
240  ntrans_sharing += unsharable_switch_trans[iswitch];
241 
242  shared_opin_buffer_trans = std::max(shared_opin_buffer_trans,
243  sharable_switch_trans[iswitch]);
244  }
245 
246  ntrans_sharing += shared_opin_buffer_trans;
247  break;
248 
249  default:
250  break;
251 
252  } /* End switch on from_rr_type */
253  } /* End for all nodes */
254 
255  free(cblock_counted);
256  free(shared_buffer_trans);
257  free(unsharable_switch_trans);
258  free(sharable_switch_trans);
259 
260  /* Now add in the input connection block transistors. */
261 
262  input_cblock_trans = get_cblock_trans(num_inputs_to_cblock,
263  max_inputs_to_cblock, trans_cblock_to_lblock_buf, trans_sram_bit);
264 
265  free(num_inputs_to_cblock);
266 
267  ntrans_sharing += input_cblock_trans;
268  ntrans_no_sharing += input_cblock_trans;
269 
270  vpr_printf(TIO_MESSAGE_INFO, "\n");
271  vpr_printf(TIO_MESSAGE_INFO, "Routing area (in minimum width transistor areas)...\n");
272  vpr_printf(TIO_MESSAGE_INFO, "\tAssuming no buffer sharing (pessimistic). Total: %#g, per logic tile: %#g\n",
273  ntrans_no_sharing, ntrans_no_sharing / (float) (nx * ny));
274  vpr_printf(TIO_MESSAGE_INFO, "\tAssuming buffer sharing (slightly optimistic). Total: %#g, per logic tile: %#g\n",
275  ntrans_sharing, ntrans_sharing / (float) (nx * ny));
276  vpr_printf(TIO_MESSAGE_INFO, "\n");
277 }
short xhigh
Definition: vpr_types.h:891
short num_edges
Definition: vpr_types.h:901
int seg_index_of_cblock(t_rr_type from_rr_type, int to_node)
Definition: rr_graph_util.c:40
int * edges
Definition: vpr_types.h:903
t_rr_node * rr_node
Definition: globals.c:70
void * my_calloc(size_t nelem, size_t size)
Definition: util.c:132
static float trans_per_buf(float Rbuf, float R_minW_nmos, float R_minW_pmos)
Definition: util.h:12
int seg_index_of_sblock(int from_node, int to_node)
Definition: rr_graph_util.c:52
static float * alloc_and_load_unsharable_switch_trans(int num_switch, float trans_sram_bit, float R_minW_nmos)
#define max(a, b)
Definition: graphics.c:171
int num_rr_nodes
Definition: globals.c:69
int nx
Definition: globals.c:46
static float * alloc_and_load_sharable_switch_trans(int num_switch, float trans_sram_bit, float R_minW_nmos, float R_minW_pmos)
struct s_switch_inf * switch_inf
Definition: globals.c:83
short yhigh
Definition: vpr_types.h:893
enum e_rr_type t_rr_type
short * switches
Definition: vpr_types.h:904
static float get_cblock_trans(int *num_inputs_to_cblock, int max_inputs_to_cblock, float trans_cblock_to_lblock_buf, float trans_sram_bit)
int ny
Definition: globals.c:47
messagelogger vpr_printf
Definition: util.c:17
t_rr_type type
Definition: vpr_types.h:902
Definition: util.h:12

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void count_routing_transistors ( enum e_directionality  directionality,
int  num_switch,
t_segment_inf segment_inf,
float  R_minW_nmos,
float  R_minW_pmos 
)

Definition at line 36 of file rr_graph_area.c.

38  {
39 
40  /* Counts how many transistors are needed to implement the FPGA routing *
41  * resources. Call this only when an rr_graph exists. It does not count *
42  * the transistors used in logic blocks, but it counts the transistors in *
43  * the input connection block multiplexers and in the output pin drivers and *
44  * pass transistors. NB: this routine assumes pass transistors always *
45  * generate two edges (one forward, one backward) between two nodes. *
46  * Physically, this is what happens -- make sure your rr_graph does it. *
47  * *
48  * I assume a minimum width transistor takes 1 unit of area. A double-width *
49  * transistor takes the twice the diffusion width, but the same spacing, so *
50  * I assume it takes 1.5x the area of a minimum-width transitor. */
51  if (directionality == BI_DIRECTIONAL) {
52  count_bidir_routing_transistors(num_switch, R_minW_nmos, R_minW_pmos);
53  } else {
54  assert(directionality == UNI_DIRECTIONAL);
55  count_unidir_routing_transistors(segment_inf, R_minW_nmos, R_minW_pmos);
56  }
57 }
static void count_unidir_routing_transistors(t_segment_inf *segment_inf, float R_minW_nmos, float R_minW_pmos)
static void count_bidir_routing_transistors(int num_switch, float R_minW_nmos, float R_minW_pmos)
Definition: rr_graph_area.c:59

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void count_unidir_routing_transistors ( t_segment_inf segment_inf,
float  R_minW_nmos,
float  R_minW_pmos 
)
static

Definition at line 279 of file rr_graph_area.c.

280  {
281  boolean * cblock_counted; /* [0..max(nx,ny)] -- 0th element unused. */
282  int *num_inputs_to_cblock; /* [0..num_rr_nodes-1], but all entries not */
283 
284  /* corresponding to IPINs will be 0. */
285 
286  t_rr_type from_rr_type, to_rr_type;
287  int i, j, iseg, from_node, to_node, iedge, num_edges, maxlen;
288  int max_inputs_to_cblock, cost_index, seg_type, switch_type;
289  float input_cblock_trans;
290  const float trans_sram_bit = 6.;
291 
292  /* Two variables below are the accumulator variables that add up all the *
293  * transistors in the routing. Make doubles so that they don't stop *
294  * incrementing once adding a switch makes a change of less than 1 part in *
295  * 10^7 to the total. If this still isn't good enough (adding 1 part in *
296  * 10^15 will still be thrown away), compute the transistor count in *
297  * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing *
298  * the partial sums together. */
299 
300  double ntrans;
301 
302  /* Buffers from the routing to the ipin cblock inputs, and from the ipin *
303  * cblock outputs to the logic block, respectively. Assume minimum size n *
304  * transistors, and ptransistors sized to make the pull-up R = pull-down R. */
305 
306  float trans_track_to_cblock_buf;
307  float trans_cblock_to_lblock_buf;
308 
309  max_inputs_to_cblock = 0;
310 
311  /* Assume the two buffers below are 4x minimum drive strength (enough to *
312  * drive a fanout of up to 16 pretty nicely -- should cover a reasonable *
313  * wiring C plus the fanout. */
314 
315  trans_track_to_cblock_buf = trans_per_buf(R_minW_nmos / 4., R_minW_nmos,
316  R_minW_pmos);
317 
318  trans_cblock_to_lblock_buf = trans_per_buf(R_minW_nmos / 4., R_minW_nmos,
319  R_minW_pmos);
320 
321  num_inputs_to_cblock = (int *) my_calloc(num_rr_nodes, sizeof(int));
322  maxlen = std::max(nx, ny) + 1;
323  cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean));
324 
325  ntrans = 0;
326  for (from_node = 0; from_node < num_rr_nodes; from_node++) {
327 
328  from_rr_type = rr_node[from_node].type;
329 
330  switch (from_rr_type) {
331 
332  case CHANX:
333  case CHANY:
334  num_edges = rr_node[from_node].num_edges;
335  cost_index = rr_node[from_node].cost_index;
336  seg_type = rr_indexed_data[cost_index].seg_index;
337  switch_type = segment_inf[seg_type].wire_switch;
338  assert(
339  segment_inf[seg_type].wire_switch == segment_inf[seg_type].opin_switch);
340  assert(switch_inf[switch_type].mux_trans_size >= 1);
341  /* can't be smaller than min sized transistor */
342 
343  assert(rr_node[from_node].num_opin_drivers == 0);
344  /* undir has no opin or wire switches */
345  assert(rr_node[from_node].num_wire_drivers == 0);
346  /* undir has no opin or wire switches */
347 
348  /* Each wire segment begins with a multipexer followed by a driver for unidirectional */
349  /* Each multiplexer contains all the fan-in to that routing node */
350  /* Add up area of multiplexer */
351  ntrans += trans_per_mux(rr_node[from_node].fan_in, trans_sram_bit,
352  switch_inf[switch_type].mux_trans_size);
353 
354  /* Add up area of buffer */
355  if (switch_inf[switch_type].buf_size == 0) {
356  ntrans += trans_per_buf(switch_inf[switch_type].R, R_minW_nmos,
357  R_minW_pmos);
358  } else {
359  ntrans += switch_inf[switch_type].buf_size;
360  }
361 
362  for (iedge = 0; iedge < num_edges; iedge++) {
363 
364  to_node = rr_node[from_node].edges[iedge];
365  to_rr_type = rr_node[to_node].type;
366 
367  switch (to_rr_type) {
368 
369  case CHANX:
370  case CHANY:
371  break;
372 
373  case IPIN:
374  num_inputs_to_cblock[to_node]++;
375  max_inputs_to_cblock = std::max(max_inputs_to_cblock,
376  num_inputs_to_cblock[to_node]);
377  iseg = seg_index_of_cblock(from_rr_type, to_node);
378 
379  if (cblock_counted[iseg] == FALSE) {
380  cblock_counted[iseg] = TRUE;
381  ntrans += trans_track_to_cblock_buf;
382  }
383  break;
384 
385  default:
386  vpr_printf(TIO_MESSAGE_ERROR, "in count_routing_transistors:\n");
387  vpr_printf(TIO_MESSAGE_ERROR, "\tUnexpected connection from node %d (type %d) to node %d (type %d).\n",
388  from_node, from_rr_type, to_node, to_rr_type);
389  exit(1);
390  break;
391 
392  } /* End switch on to_rr_type. */
393 
394  } /* End for each edge. */
395 
396  /* Reset some flags */
397  if (from_rr_type == CHANX) {
398  for (i = rr_node[from_node].xlow; i <= rr_node[from_node].xhigh;
399  i++)
400  cblock_counted[i] = FALSE;
401 
402  } else { /* CHANY */
403  for (j = rr_node[from_node].ylow; j <= rr_node[from_node].yhigh;
404  j++)
405  cblock_counted[j] = FALSE;
406 
407  }
408  break;
409  case OPIN:
410  break;
411 
412  default:
413  break;
414 
415  } /* End switch on from_rr_type */
416  } /* End for all nodes */
417 
418  /* Now add in the input connection block transistors. */
419 
420  input_cblock_trans = get_cblock_trans(num_inputs_to_cblock,
421  max_inputs_to_cblock, trans_cblock_to_lblock_buf, trans_sram_bit);
422 
423  free(cblock_counted);
424  free(num_inputs_to_cblock);
425 
426  ntrans += input_cblock_trans;
427 
428  vpr_printf(TIO_MESSAGE_INFO, "\n");
429  vpr_printf(TIO_MESSAGE_INFO, "Routing area (in minimum width transistor areas)...\n");
430  vpr_printf(TIO_MESSAGE_INFO, "\tTotal routing area: %#g, per logic tile: %#g\n", ntrans, ntrans / (float) (nx * ny));
431 }
short xhigh
Definition: vpr_types.h:891
short num_edges
Definition: vpr_types.h:901
short cost_index
Definition: vpr_types.h:897
int seg_index_of_cblock(t_rr_type from_rr_type, int to_node)
Definition: rr_graph_util.c:40
int * edges
Definition: vpr_types.h:903
t_rr_node * rr_node
Definition: globals.c:70
t_rr_indexed_data * rr_indexed_data
Definition: globals.c:74
void * my_calloc(size_t nelem, size_t size)
Definition: util.c:132
static float trans_per_buf(float Rbuf, float R_minW_nmos, float R_minW_pmos)
Definition: util.h:12
#define max(a, b)
Definition: graphics.c:171
int num_rr_nodes
Definition: globals.c:69
int nx
Definition: globals.c:46
struct s_switch_inf * switch_inf
Definition: globals.c:83
static float trans_per_mux(int num_inputs, float trans_sram_bit, float pass_trans_area)
short yhigh
Definition: vpr_types.h:893
enum e_rr_type t_rr_type
static float get_cblock_trans(int *num_inputs_to_cblock, int max_inputs_to_cblock, float trans_cblock_to_lblock_buf, float trans_sram_bit)
int ny
Definition: globals.c:47
messagelogger vpr_printf
Definition: util.c:17
t_rr_type type
Definition: vpr_types.h:902
Definition: util.h:12

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static float get_cblock_trans ( int *  num_inputs_to_cblock,
int  max_inputs_to_cblock,
float  trans_cblock_to_lblock_buf,
float  trans_sram_bit 
)
static

Definition at line 433 of file rr_graph_area.c.

435  {
436 
437  /* Computes the transistors in the input connection block multiplexers and *
438  * the buffers from connection block outputs to the logic block input pins. *
439  * For speed, I precompute the number of transistors in the multiplexers of *
440  * interest. */
441 
442  float *trans_per_cblock; /* [0..max_inputs_to_cblock] */
443  float trans_count;
444  int i, num_inputs;
445 
446  trans_per_cblock = (float *) my_malloc(
447  (max_inputs_to_cblock + 1) * sizeof(float));
448 
449  trans_per_cblock[0] = 0.; /* i.e., not an IPIN or no inputs */
450 
451  /* With one or more inputs, add the mux and output buffer. I add the output *
452  * buffer even when the number of inputs = 1 (i.e. no mux) because I assume *
453  * I need the drivability just for metal capacitance. */
454 
455  for (i = 1; i <= max_inputs_to_cblock; i++)
456  trans_per_cblock[i] = trans_per_mux(i, trans_sram_bit,
457  ipin_mux_trans_size) + trans_cblock_to_lblock_buf;
458 
459  trans_count = 0.;
460 
461  for (i = 0; i < num_rr_nodes; i++) {
462  num_inputs = num_inputs_to_cblock[i];
463  trans_count += trans_per_cblock[num_inputs];
464  }
465 
466  free(trans_per_cblock);
467  return (trans_count);
468 }
static void * my_malloc(int ibytes)
Definition: graphics.c:499
int num_rr_nodes
Definition: globals.c:69
static float trans_per_mux(int num_inputs, float trans_sram_bit, float pass_trans_area)
float ipin_mux_trans_size
Definition: globals.c:14

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static float trans_per_buf ( float  Rbuf,
float  R_minW_nmos,
float  R_minW_pmos 
)
static

Definition at line 528 of file rr_graph_area.c.

528  {
529 
530  /* Returns the number of minimum width transistor area equivalents needed to *
531  * implement this buffer. Assumes a stage ratio of 4, and equal strength *
532  * pull-up and pull-down paths. */
533 
534  int num_stage, istage;
535  float trans_count, stage_ratio, Rstage;
536 
537  if (Rbuf > 0.6 * R_minW_nmos || Rbuf <= 0.) { /* Use a single-stage buffer */
538  trans_count = trans_per_R(Rbuf, R_minW_nmos)
539  + trans_per_R(Rbuf, R_minW_pmos);
540  } else { /* Use a multi-stage buffer */
541 
542  /* Target stage ratio = 4. 1 minimum width buffer, then num_stage bigger *
543  * ones. */
544 
545  num_stage = nint(log10(R_minW_nmos / Rbuf) / log10(4.));
546  num_stage = std::max(num_stage, 1);
547  stage_ratio = pow((float)(R_minW_nmos / Rbuf), (float)( 1. / (float) num_stage));
548 
549  Rstage = R_minW_nmos;
550  trans_count = 0.;
551 
552  for (istage = 0; istage <= num_stage; istage++) {
553  trans_count += trans_per_R(Rstage, R_minW_nmos)
554  + trans_per_R(Rstage, R_minW_pmos);
555  Rstage /= stage_ratio;
556  }
557  }
558 
559  return (trans_count);
560 }
#define nint(a)
Definition: util.h:24
#define max(a, b)
Definition: graphics.c:171
static float trans_per_R(float Rtrans, float R_minW_trans)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static float trans_per_mux ( int  num_inputs,
float  trans_sram_bit,
float  pass_trans_area 
)
static

Definition at line 562 of file rr_graph_area.c.

563  {
564 
565  /* Returns the number of transistors needed to build a pass transistor mux. *
566  * DOES NOT include input buffers or any output buffer. *
567  * Attempts to select smart multiplexer size depending on number of inputs *
568  * For multiplexers with inputs 4 or less, one level is used, more has two *
569  * levels. */
570  float ntrans, sram_trans, pass_trans;
571  int num_second_stage_trans;
572 
573  if (num_inputs <= 1) {
574  return (0);
575  } else if (num_inputs == 2) {
576  pass_trans = 2 * pass_trans_area;
577  sram_trans = 1 * trans_sram_bit;
578  } else if (num_inputs <= 4) {
579  /* One-hot encoding */
580  pass_trans = num_inputs * pass_trans_area;
581  sram_trans = num_inputs * trans_sram_bit;
582  } else {
583  /* This is a large multiplexer so design it using a two-level multiplexer *
584  * + 0.00001 is to make sure exact square roots two don't get rounded down *
585  * to one lower level. */
586  num_second_stage_trans = (int)floor((float)sqrt((float)num_inputs) + 0.00001);
587  pass_trans = (num_inputs + num_second_stage_trans) * pass_trans_area;
588  sram_trans = (ceil(
589  (float) num_inputs / num_second_stage_trans - 0.00001)
590  + num_second_stage_trans) * trans_sram_bit;
591  if (num_second_stage_trans == 2) {
592  /* Can use one-bit instead of a two-bit one-hot encoding for the second stage */
593  /* Eliminates one sram bit counted earlier */
594  sram_trans -= 1 * trans_sram_bit;
595  }
596  }
597 
598  ntrans = pass_trans + sram_trans;
599  return (ntrans);
600 }

+ Here is the caller graph for this function:

static float trans_per_R ( float  Rtrans,
float  R_minW_trans 
)
static

Definition at line 602 of file rr_graph_area.c.

602  {
603 
604  /* Returns the number of minimum width transistor area equivalents needed *
605  * to make a transistor with Rtrans, given that the resistance of a minimum *
606  * width transistor of this type is R_minW_trans. */
607 
608  float trans_area;
609 
610  if (Rtrans <= 0.) /* Assume resistances are nonsense -- use min. width */
611  return (1.);
612 
613  if (Rtrans >= R_minW_trans)
614  return (1.);
615 
616  /* Area = minimum width area (1) + 0.5 for each additional unit of width. *
617  * The 50% factor takes into account the "overlapping" that occurs in *
618  * horizontally-paralleled transistors, and the need for only one spacing, *
619  * not two (i.e. two min W transistors need two spaces; a 2W transistor *
620  * needs only 1). */
621 
622  trans_area = 0.5 * R_minW_trans / Rtrans + 0.5;
623  return (trans_area);
624 }

+ Here is the caller graph for this function: