dc/dfb/path__delay_8c_source.html

 #include <stdio.h>

 #include <string.h>

 #include <limits.h>

 #include <math.h>

 #include "util.h"

 #include "vpr_types.h"

 #include "globals.h"

 #include "path_delay.h"

 #include "path_delay2.h"

 #include "net_delay.h"

 #include "vpr_utils.h"

 #include <assert.h>

 #include "read_xml_arch_file.h"

 #include "ReadOptions.h"

 #include "read_sdc.h"

 #include "stats.h"


 /**************************** Top-level summary ******************************


 Timing analysis by Vaughn Betz, Jason Luu, and Michael Wainberg.


 Timing analysis is a three-step process:

 1. Interpret the constraints specified by the user in an SDC (Synopsys Design

 Constraints) file or, if none is specified, use default constraints.

 2. Convert the pre- or post-packed netlist (depending on the stage of the

 flow) into a timing graph, where nodes represent pins and edges represent

 dependencies and delays between pins (see "Timing graph structure", below).

 3. Traverse the timing graph to obtain information about which connections

 to optimize, as well as statistics for the user.


 Steps 1 and 2 are performed through one of two helper functions:

 alloc_and_load_timing_graph and alloc_and_load_pre_packing_timing_graph.

 The first step is to create the timing graph, which is stored in the array

 tnode ("timing node").  This is done through alloc_and_load_tnodes (post-

 packed) or alloc_and_load_tnodes_from_prepacked_netlist. Then, the timing

 graph is topologically sorted ("levelized") in

 alloc_and_load_timing_graph_levels, to allow for faster traversals later.


 read_sdc reads the SDC file, interprets its contents and stores them in

 the data structure g_sdc.  (This data structure does not need to remain

 global but it is probably easier, since its information is used in both

 netlists and only needs to be read in once.)


 load_clock_domain_and_clock_and_io_delay then gives each flip-flop and I/O

 the index of a constrained clock from the SDC file in g_sdc->constrained_

 clocks, or -1 if an I/O is unconstrained.


 process_constraints does a pre-traversal through the timing graph and prunes

 all constraints between domains that never intersect so they are not analysed.


 Step 3 is performed through do_timing_analysis.  For each constraint between

 a pair of clock domains we do a forward traversal from the "source" domain

 to the "sink" domain to compute each tnode's arrival time, the time when the

 latest signal would arrive at the node.  We also do a backward traversal to

 compute required time, the time when the earliest signal has to leave the

 node to meet the constraint.  The "slack" of each sink pin on each net is

 basically the difference between the two times.


 If path counting is on, we calculate a forward and backward path weight in

 do_path_counting.  These represent the importance of paths fanning,

 respectively, into and out of this pin, in such a way that paths with a

 higher slack are discounted exponentially in importance.


 If path counting is off and we are using the pre-packed netlist, we also

 calculate normalized costs for the clusterer (normalized arrival time, slack

 and number of critical paths) in normalized_costs. The clusterer uses these

 to calculate a criticality for each block.


 Finally, in update_slacks, we calculate the slack for each sink pin on each net

 for printing, as well as a derived metric, timing criticality, which the

 optimizers actually use. If path counting is on, we calculate a path criticality

 from the forward and backward weights on each tnode. */


 /************************* Timing graph structure ****************************


 Author:  V. Betz


 We can build timing graphs that match either the primitive (logical_block)

 netlist (of basic elements before clustering, like FFs and LUTs) or that

 match the clustered netlist (block).  You pass in the is_pre_packed flag to

 say which kind of netlist and timing graph you are working with.


 Every used (not OPEN) block pin becomes a timing node, both on primitive

 blocks and (if you�re building the timing graph that matches a clustered

 netlist) on clustered blocks.  For the clustered (not pre_packed) timing

 graph, every used pin within a clustered (pb_type) block also becomes a timing

 node.  So a CLB that contains ALMs that contains LUTs will have nodes created

 for the CLB pins, ALM pins and LUT pins, with edges that connect them as

 specified in the clustered netlist.  Unused (OPEN) pins don't create any

 timing nodes.


 The primitive blocks have edges from the tnodes that represent input pins and

 output pins that represent the timing dependencies within these lowest level

 blocks (e.g. LUTs and FFs and IOs).  The exact edges created come from reading

 the architecture file.  A LUT for example will have delays specified from all

 its inputs to its output, and hence we will create a tedge from each tnode

 corresponding to an input pin of the LUT to the tnode that represents the LUT

 output pin.  The delay marked on each edge is read from the architecture file.


 A FF has nodes representing its pins, but also two extra nodes, representing

 the TN_FF_SINK and TN_FF_SOURCE.  Those two nodes represent the internal storage

 node of the FF.  The TN_FF_SINK has edges going to it from the regular FF inputs

 (but not the clock input), with appropriate delays.  It has no outgoing edges,

 since it terminates timing paths. The TN_FF_SOURCE has an edge going from it to

 the FF output pin, with the appropriate delay.  TN_FF_SOURCES have no edges; they

 start timing paths.  FF clock pins have no outgoing edges, but the delay to

 them can be looked up, and is used in the timing traversals to compute clock

 delay for slack computations.


 In the timing graph I create, input pads and constant generators have no

 inputs (incoming edges), just like TN_FF_SOURCES.  Every input pad and output

 pad is represented by two tnodes -- an input pin/source and an output pin/

 sink.  For an input pad the input source comes from off chip and has no fanin,

 while the output pin drives signals within the chip.  For output pads, the

 input pin is driven by signal (net) within the chip, and the output sink node

 goes off chip and has no fanout (out-edges).  I need two nodes to respresent

 things like pads because I mark all delay on tedges, not on tnodes.


 One other subblock that needs special attention is a constant generator.

 This has no used inputs, but its output is used.  I create an extra tnode,

 a dummy input, in addition to the output pin tnode.  The dummy tnode has

 no fanin.  Since constant generators really generate their outputs at T =

 -infinity, I set the delay from the input tnode to the output to a large-

 magnitude negative number.  This guarantees every block that needs the

 output of a constant generator sees it available very early.


 The main structure of the timing graph is given by the nodes and the edges

 that connect them.  We also store some extra information on tnodes that

 (1) lets us figure out how to map from a tnode back to the netlist pin (or

 other item) it represents and (2) figure out what clock domain it is on, if

 it is a timing path start point (no incoming edges) or end point (no outgoing

 edges) and (3) lets us figure out the delay of the clock to that node, if it

 is a timing path start or end point.


 To map efficiently from tedges back to the netlist pins, we create the tedge

 array driven by a tnode the represents a netlist output pin *in the same order

 as the netlist net pins*.  That means the edge index for the tedge array from

 such a tnode guarantees iedge = net_pin_index  1.  The code to map slacks

 from the timing graph back to the netlist relies on this. */


 /*************************** Global variables *******************************/


 t_tnode *tnode = NULL; /* [0..num_tnodes - 1] */

 int num_tnodes = 0; /* Number of nodes (pins) in the timing graph */


 /******************** Variables local to this module ************************/


 #define NUM_BUCKETS 5 /* Used when printing slack and criticality. */


 /* Variables for "chunking" the tedge memory.  If the head pointer in tedge_ch is NULL, *

  * no timing graph exists now.                                              */


 static t_chunk tedge_ch = {NULL, 0, NULL};


 static struct s_net *timing_nets = NULL;


 static int num_timing_nets = 0;


 static t_timing_stats * f_timing_stats = NULL; /* Critical path delay and worst-case slack per constraint. */


 static int * f_net_to_driver_tnode;

 /* [0..num_nets - 1]. Gives the index of the tnode that drives each net.

 Used for both pre- and post-packed netlists. If you just want the number

 of edges on the driver tnode, use:

     num_edges = timing_nets[inet].num_sinks;

 instead of the equivalent but more convoluted:

     driver_tnode = f_net_to_driver_tnode[inet];

     num_edges = tnode[driver_tnode].num_edges;

 Only use this array if you want the actual edges themselves or the index

 of the driver tnode. */


 /***************** Subroutines local to this module *************************/


 static t_slack * alloc_slacks(void);


 static void update_slacks(t_slack * slacks, int source_clock_domain, int sink_clock_domain, float criticality_denom,

     boolean update_slack);


 static void alloc_and_load_tnodes(t_timing_inf timing_inf);


 static void alloc_and_load_tnodes_from_prepacked_netlist(float block_delay,

         float inter_cluster_net_delay);


 static void alloc_timing_stats(void);


 static float do_timing_analysis_for_constraint(int source_clock_domain, int sink_clock_domain,

     boolean is_prepacked, boolean is_final_analysis, long * max_critical_input_paths_ptr,

     long * max_critical_output_paths_ptr);


 #ifdef PATH_COUNTING

 static void do_path_counting(float criticality_denom);

 #endif


 static void do_lut_rebalancing();

 static void load_tnode(INP t_pb_graph_pin *pb_graph_pin, INP int iblock,

         INOUTP int *inode, INP t_timing_inf timing_inf);


 #ifndef PATH_COUNTING

 static void update_normalized_costs(float T_arr_max_this_domain, long max_critical_input_paths,

         long max_critical_output_paths);

 #endif


 static void print_primitive_as_blif (FILE *fpout, int iblk);


 static void set_and_balance_arrival_time(int to_node, int from_node, float Tdel, boolean do_lut_input_balancing);


 static void load_clock_domain_and_clock_and_io_delay(boolean is_prepacked);


 static char * find_tnode_net_name(int inode, boolean is_prepacked);


 static t_tnode * find_ff_clock_tnode(int inode, boolean is_prepacked);


 static inline int get_tnode_index(t_tnode * node);


 static inline boolean has_valid_T_arr(int inode);


 static inline boolean has_valid_T_req(int inode);


 static int find_clock(char * net_name);


 static int find_input(char * net_name);


 static int find_output(char * net_name);


 static int find_cf_constraint(char * source_clock_name, char * sink_ff_name);


 static void propagate_clock_domain_and_skew(int inode);


 static void process_constraints(void);


 static void print_global_criticality_stats(FILE * fp, float ** criticality, const char * singular_name, const char * capitalized_plural_name);


 static void print_timing_constraint_info(const char *fname);


 static void print_spaces(FILE * fp, int num_spaces);


 /********************* Subroutine definitions *******************************/


 t_slack * alloc_and_load_timing_graph(t_timing_inf timing_inf) {


     /* This routine builds the graph used for timing analysis.  Every cb pin is a

      * timing node (tnode).  The connectivity between pins is                   *

      * represented by timing edges (tedges).  All delay is marked on edges, not *

      * on nodes.  Returns two arrays that will store slack values:              *

      * slack and criticality ([0..num_nets-1][1..num_pins]).           */


     /*  For pads, only the first two pin locations are used (input to pad is first,

      * output of pad is second).  For CLBs, all OPEN pins on the cb have their

      * mapping set to OPEN so I won't use it by mistake.                          */


     int num_sinks;

     t_slack * slacks = NULL;

     boolean do_process_constraints = FALSE;


     if (tedge_ch.chunk_ptr_head != NULL) {

         vpr_printf(TIO_MESSAGE_ERROR, "in alloc_and_load_timing_graph: An old timing graph still exists.\n");

         exit(1);

     }

     num_timing_nets = num_nets;

     timing_nets = clb_net;


     alloc_and_load_tnodes(timing_inf);


     num_sinks = alloc_and_load_timing_graph_levels();


     check_timing_graph(num_sinks);


     slacks = alloc_slacks();


     if (g_sdc == NULL) {

         /* the SDC timing constraints only need to be read in once; *

          * if they haven't been already, do it now                  */

         read_sdc(timing_inf);

         do_process_constraints = TRUE;

     }


     load_clock_domain_and_clock_and_io_delay(FALSE);


     if (do_process_constraints)

         process_constraints();


     if (f_timing_stats == NULL)

         alloc_timing_stats();


     return slacks;

 }


 t_slack * alloc_and_load_pre_packing_timing_graph(float block_delay,

         float inter_cluster_net_delay, t_model *models, t_timing_inf timing_inf) {


     /* This routine builds the graph used for timing analysis.  Every technology-

      * mapped netlist pin is a timing node (tnode).  The connectivity between pins is *

      * represented by timing edges (tedges).  All delay is marked on edges, not *

      * on nodes.  Returns two arrays that will store slack values:               *

      * slack and criticality ([0..num_nets-1][1..num_pins]).           */


     /*  For pads, only the first two pin locations are used (input to pad is first,

      * output of pad is second).  For CLBs, all OPEN pins on the cb have their

      * mapping set to OPEN so I won't use it by mistake.                          */


     int num_sinks;

     t_slack * slacks = NULL;

     boolean do_process_constraints = FALSE;


     if (tedge_ch.chunk_ptr_head != NULL) {

         vpr_printf(TIO_MESSAGE_ERROR, "in alloc_and_load_timing_graph: An old timing graph still exists.\n");

         exit(1);

     }


     num_timing_nets = num_logical_nets;

     timing_nets = vpack_net;


     alloc_and_load_tnodes_from_prepacked_netlist(block_delay,

             inter_cluster_net_delay);


     num_sinks = alloc_and_load_timing_graph_levels();


     slacks = alloc_slacks();


     check_timing_graph(num_sinks);


     if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_PRE_PACKING_TIMING_GRAPH_AS_BLIF)) {

         print_timing_graph_as_blif(getEchoFileName(E_ECHO_PRE_PACKING_TIMING_GRAPH_AS_BLIF), models);

     }


     if (g_sdc == NULL) {

         /* the SDC timing constraints only need to be read in once; *

          * if they haven't been already, do it now                  */

         read_sdc(timing_inf);

         do_process_constraints = TRUE;

     }


     load_clock_domain_and_clock_and_io_delay(TRUE);


     if (do_process_constraints)

         process_constraints();


     if (f_timing_stats == NULL)

         alloc_timing_stats();


     return slacks;

 }


 static t_slack * alloc_slacks(void) {


     /* Allocates the slack, criticality and path_criticality structures

     ([0..num_nets-1][1..num_pins-1]). Chunk allocated to save space. */


     int inet;

     t_slack * slacks = (t_slack *) my_malloc(sizeof(t_slack));


     slacks->slack   = (float **) my_malloc(num_timing_nets * sizeof(float *));

     slacks->timing_criticality = (float **) my_malloc(num_timing_nets * sizeof(float *));

 #ifdef PATH_COUNTING

     slacks->path_criticality = (float **) my_malloc(num_timing_nets * sizeof(float *));

 #endif

     for (inet = 0; inet < num_timing_nets; inet++) {

         slacks->slack[inet]   = (float *) my_chunk_malloc((timing_nets[inet].num_sinks + 1) * sizeof(float), &tedge_ch);

         slacks->timing_criticality[inet] = (float *) my_chunk_malloc((timing_nets[inet].num_sinks + 1) * sizeof(float), &tedge_ch);

 #ifdef PATH_COUNTING

         slacks->path_criticality[inet] = (float *) my_chunk_malloc((timing_nets[inet].num_sinks + 1) * sizeof(float), &tedge_ch);

 #endif

     }


     return slacks;

 }


 void load_timing_graph_net_delays(float **net_delay) {


     /* Sets the delays of the inter-CLB nets to the values specified by          *

      * net_delay[0..num_nets-1][1..num_pins-1].  These net delays should have    *

      * been allocated and loaded with the net_delay routines.  This routine      *

      * marks the corresponding edges in the timing graph with the proper delay.  */


     int inet, ipin, inode;

     t_tedge *tedge;


     for (inet = 0; inet < num_timing_nets; inet++) {

         inode = f_net_to_driver_tnode[inet];

         tedge = tnode[inode].out_edges;


         /* Note that the edges of a tnode corresponding to a CLB or INPAD opin must  *

          * be in the same order as the pins of the net driven by the tnode.          */


         for (ipin = 1; ipin < (timing_nets[inet].num_sinks + 1); ipin++)

             tedge[ipin - 1].Tdel = net_delay[inet][ipin];

     }

 }


 void free_timing_graph(t_slack * slacks) {


     int inode;


     if (tedge_ch.chunk_ptr_head == NULL) {

         vpr_printf(TIO_MESSAGE_ERROR, "in free_timing_graph: No timing graph to free.\n");

         exit(1);

     }


     free_chunk_memory(&tedge_ch);


     if (tnode[0].prepacked_data) {

         /* If we allocated prepacked_data for the first node,

         it must be allocated for all other nodes too. */

         for (inode = 0; inode < num_tnodes; inode++) {

             free(tnode[inode].prepacked_data);

         }

     }

     free(tnode);

     free(f_net_to_driver_tnode);

     free_ivec_vector(tnodes_at_level, 0, num_tnode_levels - 1);


     free(slacks->slack);

     free(slacks->timing_criticality);

 #ifdef PATH_COUNTING

     free(slacks->path_criticality);

 #endif

     free(slacks);


     tnode = NULL;

     num_tnodes = 0;

     f_net_to_driver_tnode = NULL;

     tnodes_at_level = NULL;

     num_tnode_levels = 0;

     slacks = NULL;

 }


 void free_timing_stats(void) {

     int i;

     if(f_timing_stats != NULL) {

         for (i = 0; i < g_sdc->num_constrained_clocks; i++) {

             free(f_timing_stats->cpd[i]);

             free(f_timing_stats->least_slack[i]);

         }

         free(f_timing_stats->cpd);

         free(f_timing_stats->least_slack);

         free(f_timing_stats);

     }

     f_timing_stats = NULL;

 }


 void print_slack(float ** slack, boolean slack_is_normalized, const char *fname) {


     /* Prints slacks into a file. */


     int inet, iedge, ibucket, driver_tnode, num_edges, num_unused_slacks = 0;

     t_tedge * tedge;

     FILE *fp;

     float max_slack = HUGE_NEGATIVE_FLOAT, min_slack = HUGE_POSITIVE_FLOAT,

         total_slack = 0, total_negative_slack = 0, bucket_size, slk;

     int slacks_in_bucket[NUM_BUCKETS];


     fp = my_fopen(fname, "w", 0);


     if (slack_is_normalized) {

         fprintf(fp, "The following slacks have been normalized to be non-negative by "

                     "relaxing the required times to the maximum arrival time.\n\n");

     }


     /* Go through slack once to get the largest and smallest slack, both for reporting and

        so that we can delimit the buckets. Also calculate the total negative slack in the design. */

     for (inet = 0; inet < num_timing_nets; inet++) {

         num_edges = timing_nets[inet].num_sinks;

         for (iedge = 0; iedge < num_edges; iedge++) {

             slk = slack[inet][iedge + 1];

             if (slk < HUGE_POSITIVE_FLOAT - 1) { /* if slack was analysed */

                 max_slack = std::max(max_slack, slk);

                 min_slack = std::min(min_slack, slk);

                 total_slack += slk;

                 if (slk < NEGATIVE_EPSILON) {

                     total_negative_slack -= slk; /* By convention, we'll have total_negative_slack be a positive number. */

                 }

             } else { /* slack was never analysed */

                 num_unused_slacks++;

             }

         }

     }


     if (max_slack > HUGE_NEGATIVE_FLOAT + 1) {

         fprintf(fp, "Largest slack in design: %g\n", max_slack);

     } else {

         fprintf(fp, "Largest slack in design: --\n");

     }


     if (min_slack < HUGE_POSITIVE_FLOAT - 1) {

         fprintf(fp, "Smallest slack in design: %g\n", min_slack);

     } else {

         fprintf(fp, "Smallest slack in design: --\n");

     }


     fprintf(fp, "Total slack in design: %g\n", total_slack);

     fprintf(fp, "Total negative slack: %g\n", total_negative_slack);


     if (max_slack - min_slack > EPSILON) { /* Only sort the slacks into buckets if not all slacks are the same (if they are identical, no need to sort). */

         /* Initialize slacks_in_bucket, an array counting how many slacks are within certain linearly-spaced ranges (buckets). */

         for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) {

             slacks_in_bucket[ibucket] = 0;

         }


         /* The size of each bucket is the range of slacks, divided by the number of buckets. */

         bucket_size = (max_slack - min_slack)/NUM_BUCKETS;


         /* Now, pass through again, incrementing the number of slacks in the nth bucket

             for each slack between (min_slack + n*bucket_size) and (min_slack + (n+1)*bucket_size). */


         for (inet = 0; inet < num_timing_nets; inet++) {

             num_edges = timing_nets[inet].num_sinks;

             for (iedge = 0; iedge < num_edges; iedge++) {

                 slk = slack[inet][iedge + 1];

                 if (slk < HUGE_POSITIVE_FLOAT - 1) {

                     /* We have to watch out for the special case where slack = max_slack, in which case ibucket = NUM_BUCKETS and we go out of bounds of the array. */

                     ibucket = std::min(NUM_BUCKETS - 1, (int) ((slk - min_slack)/bucket_size));

                     assert(ibucket >= 0 && ibucket < NUM_BUCKETS);

                     slacks_in_bucket[ibucket]++;

                 }

             }

         }


         /* Now print how many slacks are in each bucket. */

         fprintf(fp, "\n\nRange\t\t");

         for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) {

             fprintf(fp, "%.1e to ", min_slack);

             min_slack += bucket_size;

             fprintf(fp, "%.1e\t", min_slack);

         }

         fprintf(fp, "Not analysed");

         fprintf(fp, "\nSlacks in range\t\t");

         for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) {

             fprintf(fp, "%d\t\t\t", slacks_in_bucket[ibucket]);

         }

         fprintf(fp, "%d", num_unused_slacks);

     }


     /* Finally, print all the slacks, organized by net. */

     fprintf(fp, "\n\nNet #\tDriver_tnode\tto_node\tSlack\n\n");


     for (inet = 0; inet < num_timing_nets; inet++) {

         driver_tnode = f_net_to_driver_tnode[inet];

         num_edges = tnode[driver_tnode].num_edges;

         tedge = tnode[driver_tnode].out_edges;

         slk = slack[inet][1];

         if (slk < HUGE_POSITIVE_FLOAT - 1) {

             fprintf(fp, "%5d\t%5d\t\t%5d\t%g\n", inet, driver_tnode, tedge[0].to_node, slk);

         } else { /* Slack is meaningless, so replace with --. */

             fprintf(fp, "%5d\t%5d\t\t%5d\t--\n", inet, driver_tnode, tedge[0].to_node);

         }

         for (iedge = 1; iedge < num_edges; iedge++) { /* newline and indent subsequent edges after the first */

             slk = slack[inet][iedge+1];

             if (slk < HUGE_POSITIVE_FLOAT - 1) {

                 fprintf(fp, "\t\t\t%5d\t%g\n", tedge[iedge].to_node, slk);

             } else { /* Slack is meaningless, so replace with --. */

                 fprintf(fp, "\t\t\t%5d\t--\n", tedge[iedge].to_node);

             }

         }

     }


     fclose(fp);

 }


 void print_criticality(t_slack * slacks, boolean criticality_is_normalized, const char *fname) {


     /* Prints timing criticalities (and path criticalities if enabled) into a file. */


     int inet, iedge, driver_tnode, num_edges;

     t_tedge * tedge;

     FILE *fp;


     fp = my_fopen(fname, "w", 0);


     if (criticality_is_normalized) {

         fprintf(fp, "Timing criticalities have been normalized to be non-negative by "

                     "relaxing the required times to the maximum arrival time.\n\n");

     }


     print_global_criticality_stats(fp, slacks->timing_criticality, "timing criticality", "Timing criticalities");

 #ifdef PATH_COUNTING

     print_global_criticality_stats(fp, slacks->path_criticality, "path criticality", "Path criticalities");

 #endif


     /* Finally, print all the criticalities, organized by net. */

     fprintf(fp, "\n\nNet #\tDriver_tnode\t to_node\tTiming criticality"

 #ifdef PATH_COUNTING

         "\tPath criticality"

 #endif

         "\n");


     for (inet = 0; inet < num_timing_nets; inet++) {

         driver_tnode = f_net_to_driver_tnode[inet];

         num_edges = tnode[driver_tnode].num_edges;

         tedge = tnode[driver_tnode].out_edges;


         fprintf(fp, "\n%5d\t%5d\t\t%5d\t\t%.6f", inet, driver_tnode, tedge[0].to_node, slacks->timing_criticality[inet][1]);

 #ifdef PATH_COUNTING

         fprintf(fp, "\t\t%g", slacks->path_criticality[inet][1]);

 #endif

         for (iedge = 1; iedge < num_edges; iedge++) { /* newline and indent subsequent edges after the first */

             fprintf(fp, "\n\t\t\t%5d\t\t%.6f", tedge[iedge].to_node, slacks->timing_criticality[inet][iedge+1]);

 #ifdef PATH_COUNTING

             fprintf(fp, "\t\t%g", slacks->path_criticality[inet][iedge+1]);

 #endif

         }

     }


     fclose(fp);

 }


 static void print_global_criticality_stats(FILE * fp, float ** criticality, const char * singular_name, const char * capitalized_plural_name) {


     /* Prints global stats for timing or path criticality to the file pointed to by fp,

     including maximum criticality, minimum criticality, total criticality in the design,

     and the number of criticalities within various ranges, or buckets. */


     int inet, iedge, num_edges, ibucket, criticalities_in_bucket[NUM_BUCKETS];

     float crit, max_criticality = HUGE_NEGATIVE_FLOAT, min_criticality = HUGE_POSITIVE_FLOAT,

         total_criticality = 0, bucket_size;


     /* Go through criticality once to get the largest and smallest timing criticality,

     both for reporting and so that we can delimit the buckets. */

     for (inet = 0; inet < num_timing_nets; inet++) {

         num_edges = timing_nets[inet].num_sinks;

         for (iedge = 0; iedge < num_edges; iedge++) {

             crit = criticality[inet][iedge + 1];

             max_criticality = std::max(max_criticality, crit);

             min_criticality = std::min(min_criticality, crit);

             total_criticality += crit;

         }

     }


     fprintf(fp, "Largest %s in design: %g\n", singular_name, max_criticality);

     fprintf(fp, "Smallest %s in design: %g\n", singular_name, min_criticality);

     fprintf(fp, "Total %s in design: %g\n", singular_name, total_criticality);


     if (max_criticality - min_criticality > EPSILON) { /* Only sort the criticalities into buckets if not all criticalities are the same (if they are identical, no need to sort). */

         /* Initialize criticalities_in_bucket, an array counting how many criticalities are within certain linearly-spaced ranges (buckets). */

         for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) {

             criticalities_in_bucket[ibucket] = 0;

         }


         /* The size of each bucket is the range of criticalities, divided by the number of buckets. */

         bucket_size = (max_criticality - min_criticality)/NUM_BUCKETS;


         /* Now, pass through again, incrementing the number of criticalities in the nth bucket

             for each criticality between (min_criticality + n*bucket_size) and (min_criticality + (n+1)*bucket_size). */


         for (inet = 0; inet < num_timing_nets; inet++) {

             num_edges = timing_nets[inet].num_sinks;

             for (iedge = 0; iedge < num_edges; iedge++) {

                 crit = criticality[inet][iedge + 1];

                 /* We have to watch out for the special case where criticality = max_criticality, in which case ibucket = NUM_BUCKETS and we go out of bounds of the array. */

                 ibucket = std::min(NUM_BUCKETS - 1, (int) ((crit - min_criticality)/bucket_size));

                 assert(ibucket >= 0 && ibucket < NUM_BUCKETS);

                 criticalities_in_bucket[ibucket]++;

             }

         }


         /* Now print how many criticalities are in each bucket. */

         fprintf(fp, "\nRange\t\t");

         for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) {

             fprintf(fp, "%.1e to ", min_criticality);

             min_criticality += bucket_size;

             fprintf(fp, "%.1e\t", min_criticality);

         }

         fprintf(fp, "\n%s in range\t\t", capitalized_plural_name);

         for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) {

             fprintf(fp, "%d\t\t\t", criticalities_in_bucket[ibucket]);

         }

     }

     fprintf(fp, "\n\n");

 }


 void print_net_delay(float **net_delay, const char *fname) {


     /* Prints the net delays into a file. */


     int inet, iedge, driver_tnode, num_edges;

     t_tedge * tedge;

     FILE *fp;


     fp = my_fopen(fname, "w", 0);


     fprintf(fp, "Net #\tDriver_tnode\tto_node\tDelay\n\n");


     for (inet = 0; inet < num_timing_nets; inet++) {

         driver_tnode = f_net_to_driver_tnode[inet];

         num_edges = tnode[driver_tnode].num_edges;

         tedge = tnode[driver_tnode].out_edges;

         fprintf(fp, "%5d\t%5d\t\t%5d\t%g\n", inet, driver_tnode, tedge[0].to_node, net_delay[inet][1]);

         for (iedge = 1; iedge < num_edges; iedge++) { /* newline and indent subsequent edges after the first */

             fprintf(fp, "\t\t\t%5d\t%g\n", tedge[iedge].to_node, net_delay[inet][iedge+1]);

         }

     }


     fclose(fp);

 }


 #ifndef PATH_COUNTING

 void print_clustering_timing_info(const char *fname) {

     /* Print information from tnodes which is used by the clusterer. */

     int inode;

     FILE *fp;


     fp = my_fopen(fname, "w", 0);


     fprintf(fp, "inode  ");

     if (g_sdc->num_constrained_clocks <= 1) {

         /* These values are from the last constraint analysed,

         so they're not meaningful unless there was only one constraint. */

         fprintf(fp, "Critical input paths  Critical output paths  ");

     }

     fprintf(fp, "Normalized slack  Normalized Tarr  Normalized total crit paths\n");

     for (inode = 0; inode < num_tnodes; inode++) {

         fprintf(fp, "%d\t", inode);

         /* Only print normalized values for tnodes which have valid normalized values. (If normalized_T_arr is valid, the others will be too.) */

         if (has_valid_normalized_T_arr(inode)) {

             if (g_sdc->num_constrained_clocks <= 1) {

                 fprintf(fp, "%ld\t\t\t%ld\t\t\t", tnode[inode].prepacked_data->num_critical_input_paths, tnode[inode].prepacked_data->num_critical_output_paths);

             }

             fprintf(fp, "%f\t%f\t%f\n", tnode[inode].prepacked_data->normalized_slack, tnode[inode].prepacked_data->normalized_T_arr, tnode[inode].prepacked_data->normalized_total_critical_paths);

         } else {

             if (g_sdc->num_constrained_clocks <= 1) {

                 fprintf(fp, "--\t\t\t--\t\t\t");

             }

             fprintf(fp, "--\t\t--\t\t--\n");

         }

     }


     fclose(fp);

 }

 #endif

 /* Count # of tnodes, allocates space, and loads the tnodes and its associated edges */

 static void alloc_and_load_tnodes(t_timing_inf timing_inf) {

     int i, j, k;

     int inode;

     int num_nodes_in_block;

     int count;

     int iblock, irr_node;

     int inet, dport, dpin, dblock, dnode;

     int normalized_pin, normalization;

     t_pb_graph_pin *ipb_graph_pin;

     t_rr_node *local_rr_graph, *d_rr_graph;

     int num_dangling_pins;


     f_net_to_driver_tnode = (int*)my_malloc(num_timing_nets * sizeof(int));


     for (i = 0; i < num_timing_nets; i++) {

         f_net_to_driver_tnode[i] = OPEN;

     }


     /* allocate space for tnodes */

     num_tnodes = 0;

     for (i = 0; i < num_blocks; i++) {

         num_nodes_in_block = 0;

         for (j = 0; j < block[i].pb->pb_graph_node->total_pb_pins; j++) {

             if (block[i].pb->rr_graph[j].net_num != OPEN) {

                 if (block[i].pb->rr_graph[j].pb_graph_pin->type == PB_PIN_INPAD

                         || block[i].pb->rr_graph[j].pb_graph_pin->type

                                 == PB_PIN_OUTPAD

                         || block[i].pb->rr_graph[j].pb_graph_pin->type

                                 == PB_PIN_SEQUENTIAL) {

                     num_nodes_in_block += 2;

                 } else {

                     num_nodes_in_block++;

                 }

             }

         }

         num_tnodes += num_nodes_in_block;

     }

     tnode = (t_tnode*)my_calloc(num_tnodes, sizeof(t_tnode));


     /* load tnodes with all info except edge info */

     /* populate tnode lookups for edge info */

     inode = 0;

     for (i = 0; i < num_blocks; i++) {

         for (j = 0; j < block[i].pb->pb_graph_node->total_pb_pins; j++) {

             if (block[i].pb->rr_graph[j].net_num != OPEN) {

                 assert(tnode[inode].pb_graph_pin == NULL);

                 load_tnode(block[i].pb->rr_graph[j].pb_graph_pin, i, &inode,

                         timing_inf);

             }

         }

     }

     assert(inode == num_tnodes);

     num_dangling_pins = 0;


     /* load edge delays and initialize clock domains to OPEN

     and prepacked_data (which is not used post-packing) to NULL. */

     for (i = 0; i < num_tnodes; i++) {

         tnode[i].clock_domain = OPEN;

         tnode[i].prepacked_data = NULL;


         /* 3 primary scenarios for edge delays

          1.  Point-to-point delays inside block

          2.

          */

         count = 0;

         iblock = tnode[i].block;

         switch (tnode[i].type) {

         case TN_INPAD_OPIN:

         case TN_INTERMEDIATE_NODE:

         case TN_PRIMITIVE_OPIN:

         case TN_FF_OPIN:

         case TN_CB_IPIN:

             /* fanout is determined by intra-cluster connections */

             /* Allocate space for edges  */

             irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster;

             local_rr_graph = block[iblock].pb->rr_graph;

             ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin;


             if (ipb_graph_pin->parent_node->pb_type->max_internal_delay

                     != UNDEFINED) {

                 if (pb_max_internal_delay == UNDEFINED) {

                     pb_max_internal_delay =

                             ipb_graph_pin->parent_node->pb_type->max_internal_delay;

                     pbtype_max_internal_delay =

                             ipb_graph_pin->parent_node->pb_type;

                 } else if (pb_max_internal_delay

                         < ipb_graph_pin->parent_node->pb_type->max_internal_delay) {

                     pb_max_internal_delay =

                             ipb_graph_pin->parent_node->pb_type->max_internal_delay;

                     pbtype_max_internal_delay =

                             ipb_graph_pin->parent_node->pb_type;

                 }

             }


             for (j = 0; j < block[iblock].pb->rr_graph[irr_node].num_edges;

                     j++) {

                 dnode = local_rr_graph[irr_node].edges[j];

                 if ((local_rr_graph[dnode].prev_node == irr_node)

                         && (j == local_rr_graph[dnode].prev_edge)) {

                     count++;

                 }

             }

             assert(count > 0);

             tnode[i].num_edges = count;

             tnode[i].out_edges = (t_tedge *) my_chunk_malloc(

                     count * sizeof(t_tedge), &tedge_ch);


             /* Load edges */

             count = 0;

             for (j = 0; j < local_rr_graph[irr_node].num_edges; j++) {

                 dnode = local_rr_graph[irr_node].edges[j];

                 if ((local_rr_graph[dnode].prev_node == irr_node)

                         && (j == local_rr_graph[dnode].prev_edge)) {

                     assert(

                             (ipb_graph_pin->output_edges[j]->num_output_pins == 1) && (local_rr_graph[ipb_graph_pin->output_edges[j]->output_pins[0]->pin_count_in_cluster].net_num == local_rr_graph[irr_node].net_num));


                     tnode[i].out_edges[count].Tdel =

                             ipb_graph_pin->output_edges[j]->delay_max;

                     tnode[i].out_edges[count].to_node =

                             get_tnode_index(local_rr_graph[dnode].tnode);


                     if (vpack_net[local_rr_graph[irr_node].net_num].is_const_gen

                             == TRUE && tnode[i].type == TN_PRIMITIVE_OPIN) {

                         tnode[i].out_edges[count].Tdel = HUGE_NEGATIVE_FLOAT;

                         tnode[i].type = TN_CONSTANT_GEN_SOURCE;

                     }


                     count++;

                 }

             }

             assert(count > 0);


             break;

         case TN_PRIMITIVE_IPIN:

             /* Pin info comes from pb_graph block delays

              */

             /*there would be no slack information if timing analysis is off*/

             if (timing_inf.timing_analysis_enabled)

             {

             irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster;

             local_rr_graph = block[iblock].pb->rr_graph;

             ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin;

             tnode[i].num_edges = ipb_graph_pin->num_pin_timing;

             tnode[i].out_edges = (t_tedge *) my_chunk_malloc(

                     ipb_graph_pin->num_pin_timing * sizeof(t_tedge),

                     &tedge_ch);

             k = 0;


             for (j = 0; j < tnode[i].num_edges; j++) {

                 /* Some outpins aren't used, ignore these.  Only consider output pins that are used */

                 if (local_rr_graph[ipb_graph_pin->pin_timing[j]->pin_count_in_cluster].net_num

                         != OPEN) {

                     tnode[i].out_edges[k].Tdel =

                             ipb_graph_pin->pin_timing_del_max[j];

                     tnode[i].out_edges[k].to_node =

                             get_tnode_index(local_rr_graph[ipb_graph_pin->pin_timing[j]->pin_count_in_cluster].tnode);

                     assert(tnode[i].out_edges[k].to_node != OPEN);

                     k++;

                 }

             }

             tnode[i].num_edges -= (j - k); /* remove unused edges */

             if (tnode[i].num_edges == 0) {

                 /* Dangling pin */

                 num_dangling_pins++;

             }

             }

             break;

         case TN_CB_OPIN:

             /* load up net info */

             irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster;

             local_rr_graph = block[iblock].pb->rr_graph;

             ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin;

             assert(local_rr_graph[irr_node].net_num != OPEN);

             inet = vpack_to_clb_net_mapping[local_rr_graph[irr_node].net_num];

             assert(inet != OPEN);

             f_net_to_driver_tnode[inet] = i;

             tnode[i].num_edges = clb_net[inet].num_sinks;

             tnode[i].out_edges = (t_tedge *) my_chunk_malloc(

                     clb_net[inet].num_sinks * sizeof(t_tedge),

                     &tedge_ch);

             for (j = 1; j <= clb_net[inet].num_sinks; j++) {

                 dblock = clb_net[inet].node_block[j];

                 normalization = block[dblock].type->num_pins

                         / block[dblock].type->capacity;

                 normalized_pin = clb_net[inet].node_block_pin[j]

                         % normalization;

                 d_rr_graph = block[dblock].pb->rr_graph;

                 dpin = OPEN;

                 dport = OPEN;

                 count = 0;


                 for (k = 0;

                         k < block[dblock].pb->pb_graph_node->num_input_ports

                                 && dpin == OPEN; k++) {

                     if (normalized_pin >= count

                             && (count

                                     + block[dblock].pb->pb_graph_node->num_input_pins[k]

                                     > normalized_pin)) {

                         dpin = normalized_pin - count;

                         dport = k;

                         break;

                     }

                     count += block[dblock].pb->pb_graph_node->num_input_pins[k];

                 }

                 if (dpin == OPEN) {

                     for (k = 0;

                             k

                                     < block[dblock].pb->pb_graph_node->num_output_ports

                                     && dpin == OPEN; k++) {

                         count +=

                                 block[dblock].pb->pb_graph_node->num_output_pins[k];

                     }

                     for (k = 0;

                             k < block[dblock].pb->pb_graph_node->num_clock_ports

                                     && dpin == OPEN; k++) {

                         if (normalized_pin >= count

                                 && (count

                                         + block[dblock].pb->pb_graph_node->num_clock_pins[k]

                                         > normalized_pin)) {

                             dpin = normalized_pin - count;

                             dport = k;

                         }

                         count +=

                                 block[dblock].pb->pb_graph_node->num_clock_pins[k];

                     }

                     assert(dpin != OPEN);

                     assert(

                             inet == vpack_to_clb_net_mapping[d_rr_graph[block[dblock].pb->pb_graph_node->clock_pins[dport][dpin].pin_count_in_cluster].net_num]);

                     tnode[i].out_edges[j - 1].to_node =

                             get_tnode_index(d_rr_graph[block[dblock].pb->pb_graph_node->clock_pins[dport][dpin].pin_count_in_cluster].tnode);

                 } else {

                     assert(dpin != OPEN);

                     assert(

                             inet == vpack_to_clb_net_mapping[d_rr_graph[block[dblock].pb->pb_graph_node->input_pins[dport][dpin].pin_count_in_cluster].net_num]);

                     /* delays are assigned post routing */

                     tnode[i].out_edges[j - 1].to_node =

                             get_tnode_index(d_rr_graph[block[dblock].pb->pb_graph_node->input_pins[dport][dpin].pin_count_in_cluster].tnode);

                 }

                 tnode[i].out_edges[j - 1].Tdel = 0;

                 assert(inet != OPEN);

             }

             break;

         case TN_OUTPAD_IPIN:

         case TN_INPAD_SOURCE:

         case TN_OUTPAD_SINK:

         case TN_FF_SINK:

         case TN_FF_SOURCE:

         case TN_FF_IPIN:

         case TN_FF_CLOCK:

             break;

         default:

             vpr_printf(TIO_MESSAGE_ERROR, "Consistency check failed: Unknown tnode type %d.\n", tnode[i].type);

             assert(0);

             break;

         }

     }

     if(num_dangling_pins > 0) {

         vpr_printf(TIO_MESSAGE_WARNING, "Unconnected logic in design, number of dangling tnodes = %d\n", num_dangling_pins);

     }

 }


 /* Allocate timing graph for pre packed netlist

  Count number of tnodes first

  Then connect up tnodes with edges

  */

 static void alloc_and_load_tnodes_from_prepacked_netlist(float block_delay,

         float inter_cluster_net_delay) {

     int i, j, k;

     t_model *model;

     t_model_ports *model_port;

     t_pb_graph_pin *from_pb_graph_pin, *to_pb_graph_pin;

     int inode, inet;

     int incr;

     int count;


     f_net_to_driver_tnode = (int*)my_malloc(num_logical_nets * sizeof(int));


     for (i = 0; i < num_logical_nets; i++) {

         f_net_to_driver_tnode[i] = OPEN;

     }


     /* allocate space for tnodes */

     num_tnodes = 0;

     for (i = 0; i < num_logical_blocks; i++) {

         model = logical_block[i].model;

         logical_block[i].clock_net_tnode = NULL;

         if (logical_block[i].type == VPACK_INPAD) {

             logical_block[i].output_net_tnodes = (t_tnode***)my_calloc(1,

                     sizeof(t_tnode**));

             num_tnodes += 2;

         } else if (logical_block[i].type == VPACK_OUTPAD) {

             logical_block[i].input_net_tnodes = (t_tnode***)my_calloc(1, sizeof(t_tnode**));

             num_tnodes += 2;

         } else {

             if (logical_block[i].clock_net == OPEN) {

                 incr = 1;

             } else {

                 incr = 2;

             }

             j = 0;

             model_port = model->inputs;

             while (model_port) {

                 if (model_port->is_clock == FALSE) {

                     for (k = 0; k < model_port->size; k++) {

                         if (logical_block[i].input_nets[j][k] != OPEN) {

                             num_tnodes += incr;

                         }

                     }

                     j++;

                 } else {

                     num_tnodes++;

                 }

                 model_port = model_port->next;

             }

             logical_block[i].input_net_tnodes = (t_tnode ***)my_calloc(j, sizeof(t_tnode**));


             j = 0;

             model_port = model->outputs;

             while (model_port) {

                 for (k = 0; k < model_port->size; k++) {

                     if (logical_block[i].output_nets[j][k] != OPEN) {

                         num_tnodes += incr;

                     }

                 }

                 j++;

                 model_port = model_port->next;

             }

             logical_block[i].output_net_tnodes = (t_tnode ***)my_calloc(j,

                     sizeof(t_tnode**));

         }

     }

     tnode = (t_tnode *)my_calloc(num_tnodes, sizeof(t_tnode));


     /* Allocate space for prepacked_data, which is only used pre-packing. */

     for (inode = 0; inode < num_tnodes; inode++) {

         tnode[inode].prepacked_data = (t_prepacked_tnode_data *) my_malloc(sizeof(t_prepacked_tnode_data));

     }


     /* load tnodes, alloc edges for tnodes, load all known tnodes */

     inode = 0;

     for (i = 0; i < num_logical_blocks; i++) {

         model = logical_block[i].model;

         if (logical_block[i].type == VPACK_INPAD) {

             logical_block[i].output_net_tnodes[0] = (t_tnode **)my_calloc(1,

                     sizeof(t_tnode*));

             logical_block[i].output_net_tnodes[0][0] = &tnode[inode];

             f_net_to_driver_tnode[logical_block[i].output_nets[0][0]] = inode;

             tnode[inode].prepacked_data->model_pin = 0;

             tnode[inode].prepacked_data->model_port = 0;

             tnode[inode].prepacked_data->model_port_ptr = model->outputs;

             tnode[inode].block = i;

             tnode[inode].type = TN_INPAD_OPIN;


             tnode[inode].num_edges =

                     vpack_net[logical_block[i].output_nets[0][0]].num_sinks;

             tnode[inode].out_edges = (t_tedge *) my_chunk_malloc(

                     tnode[inode].num_edges * sizeof(t_tedge),

                     &tedge_ch);

             tnode[inode + 1].num_edges = 1;

             tnode[inode + 1].out_edges = (t_tedge *) my_chunk_malloc(

                     1 * sizeof(t_tedge), &tedge_ch);

             tnode[inode + 1].out_edges->Tdel = 0;

             tnode[inode + 1].out_edges->to_node = inode;

             tnode[inode + 1].type = TN_INPAD_SOURCE;

             tnode[inode + 1].block = i;

             inode += 2;

         } else if (logical_block[i].type == VPACK_OUTPAD) {

             logical_block[i].input_net_tnodes[0] = (t_tnode **)my_calloc(1,

                     sizeof(t_tnode*));

             logical_block[i].input_net_tnodes[0][0] = &tnode[inode];

             tnode[inode].prepacked_data->model_pin = 0;

             tnode[inode].prepacked_data->model_port = 0;

             tnode[inode].prepacked_data->model_port_ptr = model->inputs;

             tnode[inode].block = i;

             tnode[inode].type = TN_OUTPAD_IPIN;

             tnode[inode].num_edges = 1;

             tnode[inode].out_edges = (t_tedge *) my_chunk_malloc(

                     1 * sizeof(t_tedge), &tedge_ch);

             tnode[inode].out_edges->Tdel = 0;

             tnode[inode].out_edges->to_node = inode + 1;

             tnode[inode + 1].type = TN_OUTPAD_SINK;

             tnode[inode + 1].block = i;

             tnode[inode + 1].num_edges = 0;

             tnode[inode + 1].out_edges = NULL;

             inode += 2;

         } else {

             j = 0;

             model_port = model->outputs;

             while (model_port) {

                 logical_block[i].output_net_tnodes[j] = (t_tnode **)my_calloc(

                         model_port->size, sizeof(t_tnode*));

                 for (k = 0; k < model_port->size; k++) {

                     if (logical_block[i].output_nets[j][k] != OPEN) {

                         tnode[inode].prepacked_data->model_pin = k;

                         tnode[inode].prepacked_data->model_port = j;

                         tnode[inode].prepacked_data->model_port_ptr = model_port;

                         tnode[inode].block = i;

                         f_net_to_driver_tnode[logical_block[i].output_nets[j][k]] =

                                 inode;

                         logical_block[i].output_net_tnodes[j][k] =

                                 &tnode[inode];


                         tnode[inode].num_edges =

                                 vpack_net[logical_block[i].output_nets[j][k]].num_sinks;

                         tnode[inode].out_edges = (t_tedge *) my_chunk_malloc(

                                 tnode[inode].num_edges * sizeof(t_tedge),

                                 &tedge_ch);


                         if (logical_block[i].clock_net == OPEN) {

                             tnode[inode].type = TN_PRIMITIVE_OPIN;

                             inode++;

                         } else {

                             /* load delays from predicted clock-to-Q time */

                             from_pb_graph_pin = get_pb_graph_node_pin_from_model_port_pin(model_port, k, logical_block[i].expected_lowest_cost_primitive);

                             tnode[inode].type = TN_FF_OPIN;

                             tnode[inode + 1].num_edges = 1;

                             tnode[inode + 1].out_edges =

                                     (t_tedge *) my_chunk_malloc(

                                             1 * sizeof(t_tedge),

                                             &tedge_ch);

                             tnode[inode + 1].out_edges->to_node = inode;

                             tnode[inode + 1].out_edges->Tdel = from_pb_graph_pin->tsu_tco;

                             tnode[inode + 1].type = TN_FF_SOURCE;

                             tnode[inode + 1].block = i;

                             inode += 2;

                         }

                     }

                 }

                 j++;

                 model_port = model_port->next;

             }


             j = 0;

             model_port = model->inputs;

             while (model_port) {

                 if (model_port->is_clock == FALSE) {

                     logical_block[i].input_net_tnodes[j] = (t_tnode **)my_calloc(

                             model_port->size, sizeof(t_tnode*));

                     for (k = 0; k < model_port->size; k++) {

                         if (logical_block[i].input_nets[j][k] != OPEN) {

                             tnode[inode].prepacked_data->model_pin = k;

                             tnode[inode].prepacked_data->model_port = j;

                             tnode[inode].prepacked_data->model_port_ptr = model_port;

                             tnode[inode].block = i;

                             logical_block[i].input_net_tnodes[j][k] =

                                     &tnode[inode];

                             from_pb_graph_pin = get_pb_graph_node_pin_from_model_port_pin(model_port, k, logical_block[i].expected_lowest_cost_primitive);

                             if (logical_block[i].clock_net == OPEN) {

                                 /* load predicted combinational delays to predicted edges */

                                 tnode[inode].type = TN_PRIMITIVE_IPIN;

                                 tnode[inode].out_edges =

                                         (t_tedge *) my_chunk_malloc(

                                         from_pb_graph_pin->num_pin_timing * sizeof(t_tedge),

                                                 &tedge_ch);

                                 count = 0;

                                 for(int m = 0; m < from_pb_graph_pin->num_pin_timing; m++) {

                                     to_pb_graph_pin = from_pb_graph_pin->pin_timing[m];

                                     if(logical_block[i].output_nets[to_pb_graph_pin->port->model_port->index][to_pb_graph_pin->pin_number] == OPEN) {

                                         continue;

                                     }

                                     tnode[inode].out_edges[count].Tdel = from_pb_graph_pin->pin_timing_del_max[m];

                                     tnode[inode].out_edges[count].to_node =

                                         get_tnode_index(logical_block[i].output_net_tnodes[to_pb_graph_pin->port->model_port->index][to_pb_graph_pin->pin_number]);

                                     count++;

                                 }

                                 tnode[inode].num_edges = count;

                                 inode++;

                             } else {

                                 /* load predicted setup time */

                                 tnode[inode].type = TN_FF_IPIN;

                                 tnode[inode].num_edges = 1;

                                 tnode[inode].out_edges =

                                         (t_tedge *) my_chunk_malloc(

                                                 1 * sizeof(t_tedge),

                                                 &tedge_ch);

                                 tnode[inode].out_edges->to_node = inode + 1;

                                 tnode[inode].out_edges->Tdel = from_pb_graph_pin->tsu_tco;

                                 tnode[inode + 1].type = TN_FF_SINK;

                                 tnode[inode + 1].num_edges = 0;

                                 tnode[inode + 1].out_edges = NULL;

                                 tnode[inode + 1].block = i;

                                 inode += 2;

                             }

                         }

                     }

                     j++;

                 } else {

                     if (logical_block[i].clock_net != OPEN) {

                         assert(logical_block[i].clock_net_tnode == NULL);

                         logical_block[i].clock_net_tnode = &tnode[inode];

                         tnode[inode].block = i;

                         tnode[inode].prepacked_data->model_pin = 0;

                         tnode[inode].prepacked_data->model_port = 0;

                         tnode[inode].prepacked_data->model_port_ptr = model_port;

                         tnode[inode].num_edges = 0;

                         tnode[inode].out_edges = NULL;

                         tnode[inode].type = TN_FF_CLOCK;

                         inode++;

                     }

                 }

                 model_port = model_port->next;

             }

         }

     }

     assert(inode == num_tnodes);


     /* load edge delays and initialize clock domains to OPEN. */

     for (i = 0; i < num_tnodes; i++) {

         tnode[i].clock_domain = OPEN;


         /* 3 primary scenarios for edge delays

          1.  Point-to-point delays inside block

          2.

          */

         count = 0;

         switch (tnode[i].type) {

         case TN_INPAD_OPIN:

         case TN_PRIMITIVE_OPIN:

         case TN_FF_OPIN:

             /* fanout is determined by intra-cluster connections */

             /* Allocate space for edges  */

             inet =

                     logical_block[tnode[i].block].output_nets[tnode[i].prepacked_data->model_port][tnode[i].prepacked_data->model_pin];

             assert(inet != OPEN);


             for (j = 1; j <= vpack_net[inet].num_sinks; j++) {

                 if (vpack_net[inet].is_const_gen) {

                     tnode[i].out_edges[j - 1].Tdel = HUGE_NEGATIVE_FLOAT;

                     tnode[i].type = TN_CONSTANT_GEN_SOURCE;

                 } else {

                     tnode[i].out_edges[j - 1].Tdel = inter_cluster_net_delay;

                 }

                 if (vpack_net[inet].is_global) {

                     assert(

                             logical_block[vpack_net[inet].node_block[j]].clock_net == inet);

                     tnode[i].out_edges[j - 1].to_node =

                             get_tnode_index(logical_block[vpack_net[inet].node_block[j]].clock_net_tnode);

                 } else {

                     assert(

                         logical_block[vpack_net[inet].node_block[j]].input_net_tnodes[vpack_net[inet].node_block_port[j]][vpack_net[inet].node_block_pin[j]] != NULL);

                     tnode[i].out_edges[j - 1].to_node =

                             get_tnode_index(logical_block[vpack_net[inet].node_block[j]].input_net_tnodes[vpack_net[inet].node_block_port[j]][vpack_net[inet].node_block_pin[j]]);

                 }

             }

             assert(tnode[i].num_edges == vpack_net[inet].num_sinks);

             break;

         case TN_PRIMITIVE_IPIN:

         case TN_OUTPAD_IPIN:

         case TN_INPAD_SOURCE:

         case TN_OUTPAD_SINK:

         case TN_FF_SINK:

         case TN_FF_SOURCE:

         case TN_FF_IPIN:

         case TN_FF_CLOCK:

             break;

         default:

             vpr_printf(TIO_MESSAGE_ERROR, "Consistency check failed: Unknown tnode type %d.\n", tnode[i].type);

             assert(0);

             break;

         }

     }


     for (i = 0; i < num_logical_nets; i++) {

         assert(f_net_to_driver_tnode[i] != OPEN);

     }

 }


 static void load_tnode(INP t_pb_graph_pin *pb_graph_pin, INP int iblock,

         INOUTP int *inode, INP t_timing_inf timing_inf) {

     int i;

     i = *inode;

     tnode[i].pb_graph_pin = pb_graph_pin;

     tnode[i].block = iblock;

     block[iblock].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].tnode =

             &tnode[i];

     if (tnode[i].pb_graph_pin->parent_node->pb_type->blif_model == NULL) {

         assert(tnode[i].pb_graph_pin->type == PB_PIN_NORMAL);

         if (tnode[i].pb_graph_pin->parent_node->parent_pb_graph_node == NULL) {

             if (tnode[i].pb_graph_pin->port->type == IN_PORT) {

                 tnode[i].type = TN_CB_IPIN;

             } else {

                 assert(tnode[i].pb_graph_pin->port->type == OUT_PORT);

                 tnode[i].type = TN_CB_OPIN;

             }

         } else {

             tnode[i].type = TN_INTERMEDIATE_NODE;

         }

     } else {

         if (tnode[i].pb_graph_pin->type == PB_PIN_INPAD) {

             assert(tnode[i].pb_graph_pin->port->type == OUT_PORT);

             tnode[i].type = TN_INPAD_OPIN;

             tnode[i + 1].num_edges = 1;

             tnode[i + 1].out_edges = (t_tedge *) my_chunk_malloc(

                     1 * sizeof(t_tedge), &tedge_ch);

             tnode[i + 1].out_edges->Tdel = 0;

             tnode[i + 1].out_edges->to_node = i;

             tnode[i + 1].pb_graph_pin = pb_graph_pin; /* Necessary for propagate_clock_domain_and_skew(). */

             tnode[i + 1].type = TN_INPAD_SOURCE;

             tnode[i + 1].block = iblock;

             (*inode)++;

         } else if (tnode[i].pb_graph_pin->type == PB_PIN_OUTPAD) {

             assert(tnode[i].pb_graph_pin->port->type == IN_PORT);

             tnode[i].type = TN_OUTPAD_IPIN;

             tnode[i].num_edges = 1;

             tnode[i].out_edges = (t_tedge *) my_chunk_malloc(

                     1 * sizeof(t_tedge), &tedge_ch);

             tnode[i].out_edges->Tdel = 0;

             tnode[i].out_edges->to_node = i + 1;

             tnode[i + 1].pb_graph_pin = pb_graph_pin; /* Necessary for find_tnode_net_name(). */

             tnode[i + 1].type = TN_OUTPAD_SINK;

             tnode[i + 1].block = iblock;

             tnode[i + 1].num_edges = 0;

             tnode[i + 1].out_edges = NULL;

             (*inode)++;

         } else if (tnode[i].pb_graph_pin->type == PB_PIN_SEQUENTIAL) {

             if (tnode[i].pb_graph_pin->port->type == IN_PORT) {

                 tnode[i].type = TN_FF_IPIN;

                 tnode[i].num_edges = 1;

                 tnode[i].out_edges = (t_tedge *) my_chunk_malloc(

                         1 * sizeof(t_tedge), &tedge_ch);

                 tnode[i].out_edges->Tdel = pb_graph_pin->tsu_tco;

                 tnode[i].out_edges->to_node = i + 1;

                 tnode[i + 1].pb_graph_pin = pb_graph_pin;

                 tnode[i + 1].type = TN_FF_SINK;

                 tnode[i + 1].block = iblock;

                 tnode[i + 1].num_edges = 0;

                 tnode[i + 1].out_edges = NULL;

             } else {

                 assert(tnode[i].pb_graph_pin->port->type == OUT_PORT);

                 tnode[i].type = TN_FF_OPIN;

                 tnode[i + 1].num_edges = 1;

                 tnode[i + 1].out_edges = (t_tedge *) my_chunk_malloc(

                         1 * sizeof(t_tedge), &tedge_ch);

                 tnode[i + 1].out_edges->Tdel = pb_graph_pin->tsu_tco;

                 tnode[i + 1].out_edges->to_node = i;

                 tnode[i + 1].pb_graph_pin = pb_graph_pin;

                 tnode[i + 1].type = TN_FF_SOURCE;

                 tnode[i + 1].block = iblock;

             }

             (*inode)++;

         } else if (tnode[i].pb_graph_pin->type == PB_PIN_CLOCK) {

             tnode[i].type = TN_FF_CLOCK;

             tnode[i].num_edges = 0;

             tnode[i].out_edges = NULL;

         } else {

             if (tnode[i].pb_graph_pin->port->type == IN_PORT) {

                 assert(tnode[i].pb_graph_pin->type == PB_PIN_TERMINAL);

                 tnode[i].type = TN_PRIMITIVE_IPIN;

             } else {

                 assert(tnode[i].pb_graph_pin->port->type == OUT_PORT);

                 assert(tnode[i].pb_graph_pin->type == PB_PIN_TERMINAL);

                 tnode[i].type = TN_PRIMITIVE_OPIN;

             }

         }

     }

     (*inode)++;

 }


 void print_timing_graph(const char *fname) {


     /* Prints the timing graph into a file. */


     FILE *fp;

     int inode, iedge, ilevel, i;

     t_tedge *tedge;

     e_tnode_type itype;

     const char *tnode_type_names[] = {  "TN_INPAD_SOURCE", "TN_INPAD_OPIN", "TN_OUTPAD_IPIN",


             "TN_OUTPAD_SINK", "TN_CB_IPIN", "TN_CB_OPIN", "TN_INTERMEDIATE_NODE",

             "TN_PRIMITIVE_IPIN", "TN_PRIMITIVE_OPIN", "TN_FF_IPIN", "TN_FF_OPIN", "TN_FF_SINK",

             "TN_FF_SOURCE", "TN_FF_CLOCK", "TN_CONSTANT_GEN_SOURCE" };


     fp = my_fopen(fname, "w", 0);


     fprintf(fp, "num_tnodes: %d\n", num_tnodes);

     fprintf(fp, "Node #\tType\t\tipin\tiblk\tDomain\tSkew\tI/O Delay\t# edges\t"

             "to_node     Tdel\n\n");


     for (inode = 0; inode < num_tnodes; inode++) {

         fprintf(fp, "%d\t", inode);


         itype = tnode[inode].type;

         fprintf(fp, "%-15.15s\t", tnode_type_names[itype]);


         if (tnode[inode].pb_graph_pin != NULL) {

             fprintf(fp, "%d\t%d\t",

                     tnode[inode].pb_graph_pin->pin_count_in_cluster,

                     tnode[inode].block);

         } else {

             fprintf(fp, "\t%d\t", tnode[inode].block);

         }


         if (itype == TN_FF_CLOCK || itype == TN_FF_SOURCE || itype == TN_FF_SINK) {

             fprintf(fp, "%d\t%.3e\t\t", tnode[inode].clock_domain, tnode[inode].clock_delay);

         } else if (itype == TN_INPAD_SOURCE) {

             fprintf(fp, "%d\t\t%.3e\t", tnode[inode].clock_domain, tnode[inode].out_edges[0].Tdel);

         } else if (itype == TN_OUTPAD_SINK) {

             assert(tnode[inode-1].type == TN_OUTPAD_IPIN); /* Outpad ipins should be one prior in the tnode array */

             fprintf(fp, "%d\t\t%.3e\t", tnode[inode].clock_domain, tnode[inode-1].out_edges[0].Tdel);

         } else {

             fprintf(fp, "\t\t\t\t");

         }


         fprintf(fp, "%d", tnode[inode].num_edges);


         /* Print all edges after edge 0 on separate lines */

         tedge = tnode[inode].out_edges;

         if (tnode[inode].num_edges > 0) {

             fprintf(fp, "\t%4d\t%7.3g", tedge[0].to_node, tedge[0].Tdel);

             for (iedge = 1; iedge < tnode[inode].num_edges; iedge++) {

                 fprintf(fp, "\n\t\t\t\t\t\t\t\t\t\t%4d\t%7.3g", tedge[iedge].to_node, tedge[iedge].Tdel);

             }

         }

         fprintf(fp, "\n");

     }


     fprintf(fp, "\n\nnum_tnode_levels: %d\n", num_tnode_levels);


     for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) {

         fprintf(fp, "\n\nLevel: %d  Num_nodes: %d\nNodes:", ilevel,

                 tnodes_at_level[ilevel].nelem);

         for (i = 0; i < tnodes_at_level[ilevel].nelem; i++)

             fprintf(fp, "\t%d", tnodes_at_level[ilevel].list[i]);

     }


     fprintf(fp, "\n");

     fprintf(fp, "\n\nNet #\tNet_to_driver_tnode\n");


     for (i = 0; i < num_nets; i++)

         fprintf(fp, "%4d\t%6d\n", i, f_net_to_driver_tnode[i]);


     if (g_sdc->num_constrained_clocks == 1) {

         /* Arrival and required times, and forward and backward weights, will be meaningless for multiclock

         designs, since the values currently on the graph will only correspond to the most recent traversal. */

         fprintf(fp, "\n\nNode #\t\tT_arr\t\tT_req"

 #ifdef PATH_COUNTING

             "\tForward weight\tBackward weight"

 #endif

             "\n\n");


         for (inode = 0; inode < num_tnodes; inode++) {

             if (tnode[inode].T_arr > HUGE_NEGATIVE_FLOAT + 1) {

                 fprintf(fp, "%d\t%12g", inode, tnode[inode].T_arr);

             } else {

                 fprintf(fp, "%d\t\t   -", inode);

             }

             if (tnode[inode].T_req < HUGE_POSITIVE_FLOAT - 1) {

                 fprintf(fp, "\t%12g", tnode[inode].T_req);

             } else {

                 fprintf(fp, "\t\t   -");

             }

 #ifdef PATH_COUNTING

             fprintf(fp, "\t%12g\t%12g\n", tnode[inode].forward_weight, tnode[inode].backward_weight);

 #endif

         }

     }


     fclose(fp);

 }

 static void process_constraints(void) {

     /* Removes all constraints between domains which never intersect. We need to do this

     so that criticality_denom in do_timing_analysis is not affected by unused constraints.

     BFS through the levelized graph once for each source domain. Whenever we get to a sink,

     mark off that we've used that sink clock domain.  After each traversal, set all unused

     constraints to DO_NOT_ANALYSE.


     Also, print g_sdc->domain_constraints, constrained I/Os and override constraints,

     and convert g_sdc->domain_constraints and flip-flop-level override constraints

     to be in seconds rather than nanoseconds. We don't need to normalize g_sdc->cc_constraints

     because they're already on the g_sdc->domain_constraints matrix, and we don't need

     to normalize constrained_ios because we already did the normalization when

     we put the delays onto the timing graph in load_clock_domain_and_clock_and_io_delay. */


     int source_clock_domain, sink_clock_domain, inode, ilevel, num_at_level, i,

         num_edges, iedge, to_node, icf, ifc, iff;

     t_tedge * tedge;

     float constraint;

     boolean * constraint_used = (boolean *) my_malloc(g_sdc->num_constrained_clocks * sizeof(boolean));


     for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) {

         /* We're going to use arrival time to flag which nodes we've reached,

         even though the values we put in will not correspond to actual arrival times.

         Nodes which are reached on this traversal will get an arrival time of 0.

         Reset arrival times now to an invalid number. */

         for (inode = 0; inode < num_tnodes; inode++) {

             tnode[inode].T_arr = HUGE_NEGATIVE_FLOAT;

         }


         /* Reset all constraint_used entries. */

         for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

             constraint_used[sink_clock_domain] = FALSE;

         }


         /* Set arrival times for each top-level tnode on this clock domain. */

         num_at_level = tnodes_at_level[0].nelem;

         for (i = 0; i < num_at_level; i++) {

             inode = tnodes_at_level[0].list[i];

             if (tnode[inode].clock_domain == source_clock_domain) {

                 tnode[inode].T_arr = 0.;

             }

         }


         for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) { /* Go down one level at a time. */

             num_at_level = tnodes_at_level[ilevel].nelem;


             for (i = 0; i < num_at_level; i++) {

                 inode = tnodes_at_level[ilevel].list[i];    /* Go through each of the tnodes at the level we're on. */

                 if (has_valid_T_arr(inode)) {   /* If this tnode has been used */

                     num_edges = tnode[inode].num_edges;

                     if (num_edges == 0) { /* sink */

                         /* We've reached the sink domain of this tnode, so set constraint_used

                         to true for this tnode's clock domain (if it has a valid one). */

                         sink_clock_domain = tnode[inode].clock_domain;

                         if (sink_clock_domain != -1) {

                             constraint_used[sink_clock_domain] = TRUE;

                         }

                     } else {

                         /* Set arrival time to a valid value (0.) for each tnode in this tnode's fanout. */

                         tedge = tnode[inode].out_edges;

                         for (iedge = 0; iedge < num_edges; iedge++) {

                             to_node = tedge[iedge].to_node;

                             tnode[to_node].T_arr = 0.;

                         }

                     }

                 }

             }

         }


         /* At the end of the source domain traversal, see which sink domains haven't been hit,

         and set the constraint for the pair of source and sink domains to DO_NOT_ANALYSE */


         for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

             if (!constraint_used[sink_clock_domain]) {

                 g_sdc->domain_constraint[source_clock_domain][sink_clock_domain] = DO_NOT_ANALYSE;

             }

         }

     }


     free(constraint_used);


     /* Print constraints */

     if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_TIMING_CONSTRAINTS)) {

         print_timing_constraint_info(getEchoFileName(E_ECHO_TIMING_CONSTRAINTS));

     }


     /* Convert g_sdc->domain_constraint and ff-level override constraints to be in seconds, not nanoseconds. */

     for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) {

         for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

             constraint = g_sdc->domain_constraint[source_clock_domain][sink_clock_domain];

             if (constraint > NEGATIVE_EPSILON) { /* if constraint does not equal DO_NOT_ANALYSE */

                 g_sdc->domain_constraint[source_clock_domain][sink_clock_domain] = constraint * 1e-9;

             }

         }

     }

     for (icf = 0; icf < g_sdc->num_cf_constraints; icf++) {

         g_sdc->cf_constraints[icf].constraint *= 1e-9;

     }

     for (ifc = 0; ifc < g_sdc->num_fc_constraints; ifc++) {

         g_sdc->fc_constraints[ifc].constraint *= 1e-9;

     }

     for (iff = 0; iff < g_sdc->num_ff_constraints; iff++) {

         g_sdc->ff_constraints[iff].constraint *= 1e-9;

     }


     /* Finally, free g_sdc->cc_constraints since all of its info is contained in g_sdc->domain_constraint. */

     free_override_constraint(g_sdc->cc_constraints, g_sdc->num_cc_constraints);

 }


 static void alloc_timing_stats(void) {


     /* Allocate f_timing_stats data structure. */


     int i;


     f_timing_stats = (t_timing_stats *) my_malloc(sizeof(t_timing_stats));

     f_timing_stats->cpd = (float **) my_malloc(g_sdc->num_constrained_clocks * sizeof(float *));

     f_timing_stats->least_slack = (float **) my_malloc(g_sdc->num_constrained_clocks * sizeof(float *));

     for (i = 0; i < g_sdc->num_constrained_clocks; i++) {

         f_timing_stats->cpd[i] = (float *) my_malloc(g_sdc->num_constrained_clocks * sizeof(float));

         f_timing_stats->least_slack[i] = (float *) my_malloc(g_sdc->num_constrained_clocks * sizeof(float));

     }

 }


 void do_timing_analysis(t_slack * slacks, boolean is_prepacked, boolean do_lut_input_balancing, boolean is_final_analysis) {


 /*  Performs timing analysis on the circuit.  Before this routine is called, t_slack * slacks

     must have been allocated, and the circuit must have been converted into a timing graph.

     The nodes of the timing graph represent pins and the edges between them represent delays

     and m dependencies from one pin to another.  Most elements are modeled as a pair of nodes so

     that the delay through the element can be marked on the edge between them (e.g.

     TN_INPAD_SOURCE->TN_INPAD_OPIN, TN_OUTPAD_IPIN->TN_OUTPAD_SINK, TN_PRIMITIVE_OPIN->

     TN_PRIMITIVE_OPIN, etc.).


     The timing graph nodes are stored as an array, tnode [0..num_tnodes - 1]. Each tnode

     includes an array of all edges, tedge, which fan out from it. Each tedge includes the

     index of the node on its far end (in the tnode array), and the delay to that node.


     The timing graph has sources at each TN_FF_SOURCE (Q output), TN_INPAD_SOURCE (input I/O pad)

     and TN_CONSTANT_GEN_SOURCE (constant 1 or 0 generator) node and sinks at TN_FF_SINK (D input)

     and TN_OUTPAD_SINK (output I/O pad) nodes. Two traversals, one forward (sources to sinks)

     and one backward, are performed for each valid constraint (one which is not DO_NOT_ANALYSE)

     between a source and a sink clock domain in the matrix g_sdc->domain_constraint

     [0..g_sdc->num_constrained_clocks - 1][0..g_sdc->num_constrained_clocks - 1]. This matrix has been

     pruned so that all domain pairs with no paths between them have been set to DO_NOT_ANALYSE.


     During the traversal pair for each constraint, all nodes in the fanout of sources on the

     source clock domain are assigned a T_arr, the arrival time of the last input signal to the node.

     All nodes in the fanin of sinks on the sink clock domain are assigned a T_req, the required

     arrival time of the last input signal to the node if the critical path for this constraint is

     not to be lengthened.  Nodes which receive both a valid T_arr and T_req are flagged with

     used_on_this_traversal, and nodes which are used on at least one traversal pair are flagged

     with has_valid_slack so that later functions know the slack is valid.


     After each traversal pair, a slack is calculated for each sink pin on each net (or equivalently,

     each connection or tedge fanning out from that net's driver tnode). Slack is calculated as:

         T_req (dest node) - T_arr (source node) - Tdel (edge)

     and represents the amount of delay which could be added to this connection before the critical

     path delay for this constraint would worsen.  Edges on the critical path have a slack of 0.

     Slacks which are never used are set to HUGE_POSITIVE_FLOAT.


     The optimizers actually use a metric called timing_criticality.  Timing criticality is defined

     as 1 - slack / criticality_denom, where the normalization factor criticality_denom is the max

     of all arrival times in the constraint and the constraint itself (T_req-relaxed slacks) or

     all arrival times and constraints in the design (shifted slacks).  See below for a further

     discussion of these two regimes.  Timing criticality is always between 0 (not critical) and 1

     (very critical).  Unlike slack, which is set to HUGE_POSITIVE_FLOAT for unanalysed connections,

     timing criticality is 0 for these, meaning no special check has to be made for which connections

     have been analysed.


     If path counting is on (PATH_COUNTING is defined in vpr_types.h), the optimizers use a weighted

     sum of timing_criticality and path_criticality, the latter of which is an estimate of the

     importance of the number of paths using a particular connection.  As a path's timing_criticality

     decreases, it will become exponentially less important to the path_criticality of any connection

     which this path uses.  Path criticality also goes from 0 (not critical or unanalysed) to 1.


     Slack and criticalities are only calculated if both the driver of the net and the sink pin were

     used_on_this_traversal, and are only overwritten if lower than previously-obtained values.

     The optimizers actually use criticality rather than slack, but slack is useful for human

     designers and so we calculate it only if we need to print it.


     This routine outputs slack and criticality to t_slack * slacks. It also stores least slack and

     critical path delay per constraint [0..g_sdc->num_constrained_clocks - 1][0..g_sdc->num_constrained_clocks - 1]

     in the file-scope variable f_timing_stats.


     Is_prepacked flags whether to calculate normalized costs for the clusterer (normalized_slack,

     normalized_Tarr, normalized_total_critical_paths). Setting this to FALSE saves time in post-

     packed timing analyses.


     Do_lut_input_balancing flags whether to rebalance LUT inputs.  LUT rebalancing takes advantage of

     the fact that different LUT inputs often have different delays. Since we can freely permute which

     LUT inputs are used by just changing the logic in the LUT, these LUT permutations can be performed

     late into the routing stage of the flow.


     Is_final_analysis flags whether this is the final, analysis pass.  If it is, the analyser will

     compute actual slacks instead of relaxed ones. We "relax" slacks by setting the required time to

     the maximum arrival time for tight constraints so that no slacks are negative (which confuses

     the optimizers). This is called "T_req-relaxed" slack.  However, human designers want to see

     actual slack values, so we report those in the final analysis.  The alternative way of making

     slacks positive is shifting them upwards by the value of the largest negative slack, after

     all traversals are complete ("shifted slacks"), which can be enabled by changing SLACK_DEFINITION

     from 'R' to 'S' in path_delay.h.


     To do: flip-flop to flip-flop and flip-flop to clock domain constraints (set_false_path, set_max_delay,

     and especially set_multicycle_path). All the info for these constraints is contained in g_sdc->fc_constraints and

     g_sdc->ff_constraints, but graph traversals are not included yet. Probably, an entire traversal will be needed for

     each constraint. Clock domain to flip-flop constraints are coded but not tested, and are done within

     existing traversals. */


     int i, j, source_clock_domain, sink_clock_domain, inode, inet, ipin;


 #if defined PATH_COUNTING || SLACK_DEFINITION == 'S'

     int iedge, num_edges;

 #endif


 #ifdef PATH_COUNTING

     float max_path_criticality = HUGE_NEGATIVE_FLOAT /* used to normalize path_criticalities */;

 #endif


     boolean update_slack = (boolean) (is_final_analysis || getEchoEnabled());

                                  /* Only update slack values if we need to print it, i.e.

                                  for the final output file (is_final_analysis) or echo files. */


     float criticality_denom; /* (SLACK_DEFINITION == 'R' only) For a particular constraint, the maximum of

                              the constraint and all arrival times for the constraint's traversal. Used to

                              normalize the clusterer's normalized_slack and, more importantly, criticality. */


     long max_critical_output_paths, max_critical_input_paths;

     t_pb *pb;


 #if SLACK_DEFINITION == 'S'

     float smallest_slack_in_design = HUGE_POSITIVE_FLOAT;

     /* Shift all slacks upwards by this number if it is negative. */


     float criticality_denom_global = HUGE_NEGATIVE_FLOAT;

     /* Denominator of criticality for shifted - max of all arrival times and all constraints. */

 #endif


     /* Reset LUT input rebalancing. */

     for (inode = 0; inode < num_tnodes; inode++) {

         if (tnode[inode].type == TN_PRIMITIVE_OPIN && tnode[inode].pb_graph_pin != NULL) {

             pb = block[tnode[inode].block].pb->rr_node_to_pb_mapping[tnode[inode].pb_graph_pin->pin_count_in_cluster];

             if (pb != NULL && pb->lut_pin_remap != NULL) {

                 /* this is a LUT primitive, do pin swapping */

                 assert(pb->pb_graph_node->pb_type->num_output_pins == 1 && pb->pb_graph_node->pb_type->num_clock_pins == 0); /* ensure LUT properties are valid */

                 assert(pb->pb_graph_node->num_input_ports == 1);

                 /* If all input pins are known, perform LUT input delay rebalancing, do nothing otherwise */

                 for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) {

                     pb->lut_pin_remap[i] = OPEN;

                 }

             }

         }

     }


     /* Reset all values which need to be reset once per

     timing analysis, rather than once per traversal pair. */


     /* Reset slack and criticality */

     for (inet = 0; inet < num_timing_nets; inet++) {

         for (ipin = 1; ipin <= timing_nets[inet].num_sinks; ipin++) {

             slacks->slack[inet][ipin]              = HUGE_POSITIVE_FLOAT;

             slacks->timing_criticality[inet][ipin] = 0.;

 #ifdef PATH_COUNTING

             slacks->path_criticality[inet][ipin] = 0.;

 #endif

         }

     }


     /* Reset f_timing_stats. */

     for (i = 0; i < g_sdc->num_constrained_clocks; i++) {

         for (j = 0; j < g_sdc->num_constrained_clocks; j++) {

             f_timing_stats->cpd[i][j]         = HUGE_NEGATIVE_FLOAT;

             f_timing_stats->least_slack[i][j] = HUGE_POSITIVE_FLOAT;

         }

     }


 #ifndef PATH_COUNTING

     /* Reset normalized values for clusterer. */

     if (is_prepacked) {

         for (inode = 0; inode < num_tnodes; inode++) {

             tnode[inode].prepacked_data->normalized_slack = HUGE_POSITIVE_FLOAT;

             tnode[inode].prepacked_data->normalized_T_arr = HUGE_NEGATIVE_FLOAT;

             tnode[inode].prepacked_data->normalized_total_critical_paths = HUGE_NEGATIVE_FLOAT;

         }

     }

 #endif


     if (do_lut_input_balancing) {

         do_lut_rebalancing();

     }


     /* For each valid constraint (pair of source and sink clock domains), we do one

     forward and one backward topological traversal to find arrival and required times,

     in do_timing_analysis_for_constraint. If path counting is on, we then do another,

     simpler traversal to find forward and backward weights, relying on the largest

     required time we found from the first traversal.  After each constraint's traversals,

     we update the slacks, timing criticalities and (if necessary) path criticalities or

     normalized costs used by the clusterer. */


     for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) {

         for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

             if (g_sdc->domain_constraint[source_clock_domain][sink_clock_domain] > NEGATIVE_EPSILON) { /* i.e. != DO_NOT_ANALYSE */


                 /* Perform the forward and backward traversal for this constraint. */

                 criticality_denom = do_timing_analysis_for_constraint(source_clock_domain, sink_clock_domain,

                     is_prepacked, is_final_analysis, &max_critical_input_paths, &max_critical_output_paths);

 #ifdef PATH_COUNTING

                 /* Weight the importance of each net, used in slack calculation. */

                 do_path_counting(criticality_denom);

 #endif


                 /* Update the slack and criticality for each edge of each net which was

                 analysed on the most recent traversal and has a lower (slack) or

                 higher (criticality) value than before. */

                 update_slacks(slacks, source_clock_domain, sink_clock_domain, criticality_denom, update_slack);


 #ifndef PATH_COUNTING

                 /* Update the normalized costs used by the clusterer. */

                 if (is_prepacked) {

                     update_normalized_costs(criticality_denom, max_critical_input_paths, max_critical_output_paths);

                 }

 #endif


 #if SLACK_DEFINITION == 'S'

                 /* Set criticality_denom_global to the max of criticality_denom over all traversals. */

                 criticality_denom_global = std::max(criticality_denom_global, criticality_denom);

 #endif

             }

         }

     }


 #ifdef PATH_COUNTING

     /* Normalize path criticalities by the largest value in the

     circuit. Otherwise, path criticalities would be unbounded. */


     for (inet = 0; inet < num_timing_nets; inet++) {

         num_edges = timing_nets[inet].num_sinks;

         for (iedge = 0; iedge < num_edges; iedge++) {

             max_path_criticality = std::max(max_path_criticality, slacks->path_criticality[inet][iedge + 1]);

         }

     }


     for (inet = 0; inet < num_timing_nets; inet++) {

         num_edges = timing_nets[inet].num_sinks;

         for (iedge = 0; iedge < num_edges; iedge++) {

             slacks->path_criticality[inet][iedge + 1] /= max_path_criticality;

         }

     }


 #endif


 #if SLACK_DEFINITION == 'S'

     if (!is_final_analysis) {


         /* Find the smallest slack in the design. */

         for (i = 0; i < g_sdc->num_constrained_clocks; i++) {

             for (j = 0; j < g_sdc->num_constrained_clocks; j++) {

                 smallest_slack_in_design = std::min(smallest_slack_in_design, f_timing_stats->least_slack[i][j]);

             }

         }


         /* Increase all slacks by the value of the smallest slack in the design, if it's negative. */

         if (smallest_slack_in_design < 0) {

             for (inet = 0; inet < num_timing_nets; inet++) {

                 num_edges = timing_nets[inet].num_sinks;

                 for (iedge = 0; iedge < num_edges; iedge++) {

                     slacks->slack[inet][iedge + 1] -= smallest_slack_in_design;

                     /* Remember, smallest_slack_in_design is negative, so we're INCREASING all the slacks. */

                     /* Note that if slack was equal to HUGE_POSITIVE_FLOAT, it will still be equal to more than this,

                     so it will still be ignored when we calculate criticalities. */

                 }

             }

         }

     }


     /* We can now calculate criticalities, only after we normalize slacks. */

     for (inet = 0; inet < num_timing_nets; inet++) {

         num_edges = timing_nets[inet].num_sinks;

         for (iedge = 0; iedge < num_edges; iedge++) {

             if (slacks->slack[inet][iedge + 1] < HUGE_POSITIVE_FLOAT - 1) { /* if the slack is valid */

                 slacks->timing_criticality[inet][iedge + 1] = 1 - slacks->slack[inet][iedge + 1]/criticality_denom_global;

             }

             /* otherwise, criticality remains 0, as it was initialized */

         }

     }

 #endif

 }


 static void do_lut_rebalancing() {


     int inode, num_at_level, i, ilevel, num_edges, iedge, to_node;

     t_tedge * tedge;


     /* Reset all arrival times to a very large negative number. */

     for (inode = 0; inode < num_tnodes; inode++) {

         tnode[inode].T_arr = HUGE_NEGATIVE_FLOAT;

     }


     /* Set arrival times for each top-level tnode. */

     num_at_level = tnodes_at_level[0].nelem;

     for (i = 0; i < num_at_level; i++) {

         inode = tnodes_at_level[0].list[i];

         if (tnode[inode].type == TN_FF_SOURCE) {

             /* Set the arrival time of this flip-flop tnode to its clock skew. */

             tnode[inode].T_arr = tnode[inode].clock_delay;

         } else if (tnode[inode].type == TN_INPAD_SOURCE) {

             /* There's no such thing as clock skew for external clocks. The closest equivalent,

             input delay, is already marked on the edge coming out from this node.

             As a result, the signal can be said to arrive at t = 0. */

             tnode[inode].T_arr = 0.;

         }

     }


     /* Now we actually start the forward topological traversal, to compute arrival times. */

     for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) { /* For each level of our levelized timing graph... */

         num_at_level = tnodes_at_level[ilevel].nelem;       /* ...there are num_at_level tnodes at that level. */


         for (i = 0; i < num_at_level; i++) {

             inode = tnodes_at_level[ilevel].list[i];        /* Go through each of the tnodes at the level we're on. */


             num_edges = tnode[inode].num_edges;             /* Get the number of edges fanning out from the node we're visiting */

             tedge = tnode[inode].out_edges;                 /* Get the list of edges from the node we're visiting */


             for (iedge = 0; iedge < num_edges; iedge++) {   /* Now go through each edge coming out from this tnode */

                 to_node = tedge[iedge].to_node;             /* Get the index of the destination tnode of this edge. */


                 /* The arrival time T_arr at the destination node is set to the maximum of all the

                 possible arrival times from all edges fanning in to the node.

                 The arrival time represents the latest time that all inputs must arrive at a node.

                 LUT input rebalancing also occurs at this step. */

                 set_and_balance_arrival_time(to_node, inode, tedge[iedge].Tdel, TRUE);

             }

         }

     }

 }


 static float do_timing_analysis_for_constraint(int source_clock_domain, int sink_clock_domain,

     boolean is_prepacked, boolean is_final_analysis, long * max_critical_input_paths_ptr,

     long * max_critical_output_paths_ptr) {


     /* Performs a single forward and backward traversal for the domain pair

     source_clock_domain and sink_clock_domain. Returns the denominator that

     will be later used to normalize criticality - the maximum of all arrival

     times from this traversal and the constraint for this pair of domains.

     Also returns the maximum number of critical input and output paths of any

     node analysed for this constraint, passed by reference from do_timing_analysis. */


     int inode, num_at_level, i, total, ilevel, num_edges, iedge, to_node, icf;

     float constraint, Tdel, T_req, max_Tarr = HUGE_NEGATIVE_FLOAT;

                                    /* Max of all arrival times for this constraint -

                                       used to relax required times. */

     t_tedge * tedge;

     int num_dangling_nodes;

     boolean found;

     long max_critical_input_paths = 0, max_critical_output_paths = 0;


     /* Reset all values which need to be reset once per

     traversal pair, rather than once per timing analysis. */


     /* Reset all arrival and required times. */

     for (inode = 0; inode < num_tnodes; inode++) {

         tnode[inode].T_arr = HUGE_NEGATIVE_FLOAT;

         tnode[inode].T_req = HUGE_POSITIVE_FLOAT;

     }


 #ifndef PATH_COUNTING

     /* Reset num_critical_output_paths. */

     if (is_prepacked) {

         for (inode = 0; inode < num_tnodes; inode++) {

             tnode[inode].prepacked_data->num_critical_output_paths = 0;

         }

     }

 #endif


     /* Set arrival times for each top-level tnode on this source domain. */

     num_at_level = tnodes_at_level[0].nelem;

     for (i = 0; i < num_at_level; i++) {

         inode = tnodes_at_level[0].list[i];


         if (tnode[inode].clock_domain == source_clock_domain) {


             if (tnode[inode].type == TN_FF_SOURCE) {

                 /* Set the arrival time of this flip-flop tnode to its clock skew. */

                 tnode[inode].T_arr = tnode[inode].clock_delay;


             } else if (tnode[inode].type == TN_INPAD_SOURCE) {

                 /* There's no such thing as clock skew for external clocks, and

                 input delay is already marked on the edge coming out from this node.

                 As a result, the signal can be said to arrive at t = 0. */

                 tnode[inode].T_arr = 0.;

             }


         }

     }


     /* Compute arrival times with a forward topological traversal from sources

     (TN_FF_SOURCE, TN_INPAD_SOURCE, TN_CONSTANT_GEN_SOURCE) to sinks (TN_FF_SINK, TN_OUTPAD_SINK). */


     total = 0;                                              /* We count up all tnodes to error-check at the end. */

     for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) { /* For each level of our levelized timing graph... */

         num_at_level = tnodes_at_level[ilevel].nelem;       /* ...there are num_at_level tnodes at that level. */

         total += num_at_level;


         for (i = 0; i < num_at_level; i++) {

             inode = tnodes_at_level[ilevel].list[i];        /* Go through each of the tnodes at the level we're on. */

             if (tnode[inode].T_arr < NEGATIVE_EPSILON) {    /* If the arrival time is less than 0 (i.e. HUGE_NEGATIVE_FLOAT)... */

                 continue;                                   /* End this iteration of the num_at_level for loop since

                                                             this node is not part of the clock domain we're analyzing.

                                                             (If it were, it would have received an arrival time already.) */

             }


             num_edges = tnode[inode].num_edges;             /* Get the number of edges fanning out from the node we're visiting */

             tedge = tnode[inode].out_edges;                 /* Get the list of edges from the node we're visiting */

 #ifndef PATH_COUNTING

             if (is_prepacked && ilevel == 0) {

                 tnode[inode].prepacked_data->num_critical_input_paths = 1;  /* Top-level tnodes have one locally-critical input path. */

             }


             /* Using a somewhat convoluted procedure inherited from T-VPack,

             count how many locally-critical input paths fan into each tnode,

             and also find the maximum number over all tnodes. */

             if (is_prepacked) {

                 for (iedge = 0; iedge < num_edges; iedge++) {

                     to_node = tedge[iedge].to_node;

                     if (fabs(tnode[to_node].T_arr - (tnode[inode].T_arr + tedge[iedge].Tdel)) < EPSILON) {

                         /* If the "local forward slack" (T_arr(to_node) - T_arr(inode) - T_del) for this edge

                         is 0 (i.e. the path from inode to to_node is locally as critical as any other path to

                         to_node), add to_node's num critical input paths to inode's number. */

                         tnode[to_node].prepacked_data->num_critical_input_paths += tnode[inode].prepacked_data->num_critical_input_paths;

                     } else if (tnode[to_node].T_arr < (tnode[inode].T_arr + tedge[iedge].Tdel)) {

                         /* If the "local forward slack" for this edge is negative,

                         reset to_node's num critical input paths to inode's number. */

                         tnode[to_node].prepacked_data->num_critical_input_paths = tnode[inode].prepacked_data->num_critical_input_paths;

                     }

                     /* Set max_critical_input_paths to the maximum number of critical

                     input paths for all tnodes analysed on this traversal. */

                     if (tnode[to_node].prepacked_data->num_critical_input_paths > max_critical_input_paths) {

                         max_critical_input_paths = tnode[to_node].prepacked_data->num_critical_input_paths;

                     }

                 }

             }

 #endif

             for (iedge = 0; iedge < num_edges; iedge++) {   /* Now go through each edge coming out from this tnode */

                 to_node = tedge[iedge].to_node;             /* Get the index of the destination tnode of this edge. */


                 /* The arrival time T_arr at the destination node is set to the maximum of all

                 the possible arrival times from all edges fanning in to the node. The arrival

                 time represents the latest time that all inputs must arrive at a node. LUT input

                 rebalancing also occurs at this step. */

                 set_and_balance_arrival_time(to_node, inode, tedge[iedge].Tdel, FALSE);


                 /* Since we updated the destination node (to_node), change the max arrival

                 time for the forward traversal if to_node's arrival time is greater than

                 the existing maximum. */

                 max_Tarr = std::max(max_Tarr, tnode[to_node].T_arr);

             }

         }

     }


     assert(total == num_tnodes);

     num_dangling_nodes = 0;

     /* Compute required times with a backward topological traversal from sinks to sources. */


     for (ilevel = num_tnode_levels - 1; ilevel >= 0; ilevel--) {

         num_at_level = tnodes_at_level[ilevel].nelem;


         for (i = 0; i < num_at_level; i++) {

             inode = tnodes_at_level[ilevel].list[i];

             num_edges = tnode[inode].num_edges;


             if (ilevel == 0) {

                 if (!(tnode[inode].type == TN_INPAD_SOURCE || tnode[inode].type == TN_FF_SOURCE || tnode[inode].type == TN_CONSTANT_GEN_SOURCE)) {

                     vpr_printf(TIO_MESSAGE_ERROR, "Timing graph started on unexpected node %s.%s[%d].\n",

                             tnode[inode].pb_graph_pin->parent_node->pb_type->name,

                             tnode[inode].pb_graph_pin->port->name,

                             tnode[inode].pb_graph_pin->pin_number);

                     vpr_printf(TIO_MESSAGE_ERROR, "This is a VPR internal error, contact VPR development team.\n");

                     exit(1);

                 }

             } else {

                 if ((tnode[inode].type == TN_INPAD_SOURCE || tnode[inode].type == TN_FF_SOURCE || tnode[inode].type == TN_CONSTANT_GEN_SOURCE)) {

                     vpr_printf(TIO_MESSAGE_ERROR, "Timing graph discovered unexpected edge to node %s.%s[%d].\n",

                             tnode[inode].pb_graph_pin->parent_node->pb_type->name,

                             tnode[inode].pb_graph_pin->port->name,

                             tnode[inode].pb_graph_pin->pin_number);

                     vpr_printf(TIO_MESSAGE_ERROR, "This is a VPR internal error, contact VPR development team.\n");

                     exit(1);

                 }

             }


             /* Unlike the forward traversal, the sinks are all on different levels, so we always have to

             check whether a node is a sink. We give every sink on the sink clock domain we're considering

             a valid required time. Every non-sink node in the fanin of one of these sinks and the fanout of

             some source from the forward traversal also gets a valid required time. */


             if (num_edges == 0) { /* sink */


                 if (tnode[inode].type == TN_FF_CLOCK || tnode[inode].T_arr < HUGE_NEGATIVE_FLOAT + 1) {

                     continue; /* Skip nodes on the clock net itself, and nodes with unset arrival times. */

                 }


                 if (!(tnode[inode].type == TN_OUTPAD_SINK || tnode[inode].type == TN_FF_SINK)) {

                     if(is_prepacked) {

                         vpr_printf(TIO_MESSAGE_WARNING, "Pin on block %s.%s[%d] not used\n",

                                                     logical_block[tnode[inode].block].name,

                                                     tnode[inode].prepacked_data->model_port_ptr->name,

                                                     tnode[inode].prepacked_data->model_pin);

                     }

                     num_dangling_nodes++;

                     /* Note: Still need to do standard traversals with dangling pins so that algorithm runs properly, but T_arr and T_Req to values such that it dangling nodes do not affect actual timing values */

                 }


                 /* Skip nodes not on the sink clock domain of the

                 constraint we're currently considering */

                 if (tnode[inode].clock_domain != sink_clock_domain) {

                     continue;

                 }


                 /* See if there's an override constraint between the source clock domain (name is

                 g_sdc->constrained_clocks[source_clock_domain].name) and the flip-flop or outpad we're at

                 now (name is find_tnode_net_name(inode, is_prepacked)). We test if

                 g_sdc->num_cf_constraints > 0 first so that we can save time looking up names in the vast

                 majority of cases where there are no such constraints. */


                 if (g_sdc->num_cf_constraints > 0 && (icf = find_cf_constraint(g_sdc->constrained_clocks[source_clock_domain].name, find_tnode_net_name(inode, is_prepacked))) != -1) {

                     constraint = g_sdc->cf_constraints[icf].constraint;

                     if (constraint < NEGATIVE_EPSILON) {

                         /* Constraint is DO_NOT_ANALYSE (-1) for this particular sink. */

                         continue;

                     }

                 } else { /* Use the default constraint from g_sdc->domain_constraint. */

                     constraint = g_sdc->domain_constraint[source_clock_domain][sink_clock_domain];

                     /* Constraint is guaranteed to be valid since we checked for it at the very beginning. */

                 }


                 /* Now we know we should analyse this tnode. */


 #if SLACK_DEFINITION == 'R'

                 /* Assign the required time T_req for this leaf node, taking into account clock skew. T_req is the

                 time all inputs to a tnode must arrive by before it would degrade this constraint's critical path delay.


                 Relax the required time at the sink node to be non-negative by taking the max of the "real" required

                 time (constraint + tnode[inode].clock_delay) and the max arrival time in this domain (max_Tarr), except

                 for the final analysis where we report actual slack. We do this to prevent any slacks from being

                 negative, since negative slacks are not used effectively by the optimizers.


                 E.g. if we have a 10 ns constraint and it takes 14 ns to get here, we'll have a slack of at most -4 ns

                 for any edge along the path that got us here.  If we say the required time is 14 ns (no less than the

                 arrival time), we don't have a negative slack anymore.  However, in the final timing analysis, the real

                 slacks are computed (that's what human designers care about), not the relaxed ones. */


                 if (is_final_analysis) {

                     tnode[inode].T_req =     constraint + tnode[inode].clock_delay;

                 } else {

                     tnode[inode].T_req = std::max(constraint + tnode[inode].clock_delay, max_Tarr);

                 }

 #else

                 /* Don't do the relaxation and always set T_req equal to the "real" required time. */

                 tnode[inode].T_req = constraint + tnode[inode].clock_delay;

 #endif


                 /* Store the largest critical path delay for this constraint (source domain AND sink domain)

                 in the matrix critical_path_delay. C.P.D. = T_arr at destination - clock skew at destination

                 = (datapath delay + clock delay to source) - clock delay to destination.


                 Critical path delay is really telling us how fast we can run the source clock before we can no

                 longer meet this constraint.  e.g. If the datapath delay is 10 ns, the clock delay at source is

                 2 ns and the clock delay at destination is 5 ns, then C.P.D. is 7 ns by the above formula. We

                 can run the source clock at 7 ns because the clock skew gives us 3 ns extra to meet the 10 ns

                 datapath delay. */


                 f_timing_stats->cpd[source_clock_domain][sink_clock_domain] =

                     std::max(f_timing_stats->cpd[source_clock_domain][sink_clock_domain],

                        (tnode[inode].T_arr - tnode[inode].clock_delay));


 #ifndef PATH_COUNTING

                 if (is_prepacked) {

                     tnode[inode].prepacked_data->num_critical_output_paths = 1; /* Bottom-level tnodes have one locally-critical input path. */

                 }

 #endif

             } else { /* not a sink */


                 assert(!(tnode[inode].type == TN_OUTPAD_SINK || tnode[inode].type == TN_FF_SINK || tnode[inode].type == TN_FF_CLOCK));


                 /* We need to skip this node unless it is on a path from source_clock_domain to

                 sink_clock_domain.  We need to skip all nodes which:

                    1. Fan out to the sink domain but do not fan in from the source domain.

                    2. Fan in from the source domain but do not fan out to the sink domain.

                    3. Do not fan in or out to either domain.

                 If a node does not fan in from the source domain, it will not have a valid arrival time.

                 So cases 1 and 3 can be skipped by continuing if T_arr = HUGE_NEGATIVE_FLOAT. We cannot

                 treat case 2 as simply since the required time for this node has not yet been assigned,

                 so we have to look at the required time for every node in its immediate fanout instead. */


                 /* Cases 1 and 3 */

                 if (tnode[inode].T_arr < HUGE_NEGATIVE_FLOAT + 1) {

                     continue; /* Skip nodes with unset arrival times. */

                 }


                 /* Case 2 */

                 found = FALSE;

                 tedge = tnode[inode].out_edges;

                 for (iedge = 0; iedge < num_edges && !found; iedge++) {

                     to_node = tedge[iedge].to_node;

                     if (tnode[to_node].T_req < HUGE_POSITIVE_FLOAT) {

                         found = TRUE;

                     }

                 }

                 if (!found) {

                     continue;

                 }


                 /* Now we know this node is on a path from source_clock_domain to

                 sink_clock_domain, and needs to be analyzed. */


                 /* Opposite to T_arr, set T_req to the MINIMUM of the

                 required times of all edges fanning OUT from this node. */

                 for (iedge = 0; iedge < num_edges; iedge++) {

                     to_node = tedge[iedge].to_node;

                     Tdel = tedge[iedge].Tdel;

                     T_req = tnode[to_node].T_req;

                     tnode[inode].T_req = std::min(tnode[inode].T_req, T_req - Tdel);


                     /* Update least slack per constraint. This is NOT the same as the minimum slack we will

                     calculate on this traversal for post-packed netlists, which only count inter-cluster

                     slacks. We only look at edges adjacent to sink nodes on the sink clock domain since

                     all paths go through one of these edges. */

                     if (tnode[to_node].num_edges == 0 && tnode[to_node].clock_domain == sink_clock_domain) {

                         f_timing_stats->least_slack[source_clock_domain][sink_clock_domain] =

                             std::min(f_timing_stats->least_slack[source_clock_domain][sink_clock_domain],

                                (T_req - Tdel - tnode[inode].T_arr));

                     }

                 }

 #ifndef PATH_COUNTING


                 /* Similar to before, we count how many locally-critical output paths fan out from each tnode,

                 and also find the maximum number over all tnodes. Unlike for input paths, where we haven't set

                 the arrival time at to_node before analysing it, here the required time is set at both nodes,

                 so the "local backward slack" (T_req(to_node) - T_req(inode) - T_del) will never be negative.

                 Hence, we only have to test if the "local backward slack" is 0. */

                 if (is_prepacked) {

                     for (iedge = 0; iedge < num_edges; iedge++) {

                         to_node = tedge[iedge].to_node;

                         /* If the "local backward slack" (T_arr(to_node) - T_arr(inode) - T_del) for this edge

                         is 0 (i.e. the path from inode to to_node is locally as critical as any other path to

                         to_node), add to_node's num critical output paths to inode's number. */

                         if (fabs(tnode[to_node].T_req - (tnode[inode].T_req + tedge[iedge].Tdel)) < EPSILON) {

                             tnode[inode].prepacked_data->num_critical_output_paths += tnode[to_node].prepacked_data->num_critical_output_paths;

                         }

                         /* Set max_critical_output_paths to the maximum number of critical

                         output paths for all tnodes analysed on this traversal. */

                         if (tnode[to_node].prepacked_data->num_critical_output_paths > max_critical_output_paths) {

                             max_critical_output_paths = tnode[to_node].prepacked_data->num_critical_output_paths;

                         }

                     }

                 }

 #endif

             }

         }

     }


     /* Return max critical input/output paths for this constraint through

     the pointers we passed in. */

     if (max_critical_input_paths_ptr && max_critical_output_paths_ptr) {

         *max_critical_input_paths_ptr = max_critical_input_paths;

         *max_critical_output_paths_ptr = max_critical_output_paths;

     }


     if(num_dangling_nodes > 0 && (is_final_analysis || is_prepacked)) {

         vpr_printf(TIO_MESSAGE_WARNING, "%d unused pins \n",  num_dangling_nodes);

     }


     /* The criticality denominator is the maximum of the max

     arrival time and the constraint for this domain pair. */

     return std::max(max_Tarr, g_sdc->domain_constraint[source_clock_domain][sink_clock_domain]);

 }

 #ifdef PATH_COUNTING

 static void do_path_counting(float criticality_denom) {

     /* Count the importance of the number of paths going through each net

     by giving each net a forward and backward path weight. This is the first step of

     "A Novel Net Weighting Algorithm for Timing-Driven Placement" (Kong, 2002).

     We only visit nodes with set arrival and required times, so this function

     must be called after do_timing_analysis_for_constraints, which sets T_arr and T_req. */


     int inode, num_at_level, i, ilevel, num_edges, iedge, to_node;

     t_tedge * tedge;

     float forward_local_slack, backward_local_slack, discount;


     /* Reset forward and backward weights for all tnodes. */

     for (inode = 0; inode < num_tnodes; inode++) {

         tnode[inode].forward_weight = 0;

         tnode[inode].backward_weight = 0;

     }


     /* Set foward weights for each top-level tnode. */

     num_at_level = tnodes_at_level[0].nelem;

     for (i = 0; i < num_at_level; i++) {

         inode = tnodes_at_level[0].list[i];

         tnode[inode].forward_weight = 1.;

     }


     /* Do a forward topological traversal to populate forward weights. */

     for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) {

         num_at_level = tnodes_at_level[ilevel].nelem;


         for (i = 0; i < num_at_level; i++) {

             inode = tnodes_at_level[ilevel].list[i];

             if (!(has_valid_T_arr(inode) && has_valid_T_req(inode))) {

                 continue;

             }

             tedge = tnode[inode].out_edges;

             num_edges = tnode[inode].num_edges;

             for (iedge = 0; iedge < num_edges; iedge++) {

                 to_node = tedge[iedge].to_node;

                 if (!(has_valid_T_arr(to_node) && has_valid_T_req(to_node))) {

                     continue;

                 }

                 forward_local_slack = tnode[to_node].T_arr - tnode[inode].T_arr - tedge[iedge].Tdel;

                 discount = pow((float) DISCOUNT_FUNCTION_BASE, -1 * forward_local_slack / criticality_denom);

                 tnode[to_node].forward_weight += discount * tnode[inode].forward_weight;

             }

         }

     }


     /* Do a backward topological traversal to populate backward weights.

     Since the sinks are all on different levels, we have to check for them as we go. */

     for (ilevel = num_tnode_levels - 1; ilevel >= 0; ilevel--) {

         num_at_level = tnodes_at_level[ilevel].nelem;


         for (i = 0; i < num_at_level; i++) {

             inode = tnodes_at_level[ilevel].list[i];

             if (!(has_valid_T_arr(inode) && has_valid_T_req(inode))) {

                 continue;

             }

             num_edges = tnode[inode].num_edges;

             if (num_edges == 0) { /* sink */

                 tnode[inode].backward_weight = 1.;

             } else {

                 tedge = tnode[inode].out_edges;

                 for (iedge = 0; iedge < num_edges; iedge++) {

                     to_node = tedge[iedge].to_node;

                     if (!(has_valid_T_arr(to_node) && has_valid_T_req(to_node))) {

                         continue;

                     }

                     backward_local_slack = tnode[to_node].T_req - tnode[inode].T_req - tedge[iedge].Tdel;

                     discount = pow((float) DISCOUNT_FUNCTION_BASE, -1 * backward_local_slack / criticality_denom);

                     tnode[inode].backward_weight += discount * tnode[to_node].backward_weight;

                 }

             }

         }

     }

 }

 #endif


 static void update_slacks(t_slack * slacks, int source_clock_domain, int sink_clock_domain, float criticality_denom,

     boolean update_slack) {


     /* Updates the slack and criticality of each sink pin, or equivalently

     each edge, of each net. Go through the list of nets. If the net's

     driver tnode has been used, go through each tedge of that tnode and

     take the minimum of the slack/criticality for this traversal and the

     existing values. Since the criticality_denom can vary greatly between

     traversals, we have to update slack and criticality separately.

     Only update slack if we need to print it later (update_slack == TRUE).


     Note: there is a correspondence in indexing between out_edges and the

     net data structure: out_edges[iedge] = net[inet].node_block[iedge + 1]

     There is an offset of 1 because net[inet].node_block includes the driver

     node at index 0, while out_edges is part of the driver node and does

     not bother to refer to itself. */


     int inet, iedge, inode, to_node, num_edges;

     t_tedge *tedge;

     float T_arr, Tdel, T_req, slk, timing_criticality;


     for (inet = 0; inet < num_timing_nets; inet++) {

         inode = f_net_to_driver_tnode[inet];

         T_arr = tnode[inode].T_arr;


         if (!(has_valid_T_arr(inode) && has_valid_T_req(inode))) {

             continue; /* Only update this net on this traversal if its

                       driver node has been updated on this traversal. */

         }


         num_edges = tnode[inode].num_edges;

         tedge = tnode[inode].out_edges;


         for (iedge = 0; iedge < num_edges; iedge++) {

             to_node = tedge[iedge].to_node;

             if (!(has_valid_T_arr(to_node) && has_valid_T_req(to_node))) {

                 continue; /* Only update this edge on this traversal if this

                           particular sink node has been updated on this traversal. */

             }

             Tdel = tedge[iedge].Tdel;

             T_req = tnode[to_node].T_req;


             if (update_slack) {

                 /* Update the slack for this edge. */

                 slk = T_req - T_arr - Tdel;

                 if (slk < slacks->slack[inet][iedge + 1]) {

                 /* Only update on this traversal if this edge would have

                 lower slack from this traversal than its current value. */

                     slacks->slack[inet][iedge + 1] = slk;

                 }

             }


 #if SLACK_DEFINITION == 'R'

             /* Since criticality_denom is not the same on each traversal,

             we have to update criticality separately. */

             timing_criticality = 1 - (T_req - T_arr - Tdel)/criticality_denom;

             if (timing_criticality > slacks->timing_criticality[inet][iedge + 1]) {

                 slacks->timing_criticality[inet][iedge + 1] = timing_criticality;

             }

     #ifdef PATH_COUNTING

             /* Also update path criticality separately.  Kong uses slack / T_crit for the exponent,

             which is equivalent to criticality - 1.  Depending on how PATH_COUNTING is defined,

             different functional forms are used. */

         #if PATH_COUNTING == 'S' /* Use sum of forward and backward weights. */

             slacks->path_criticality[inet][iedge + 1] = max(slacks->path_criticality[inet][iedge + 1],

                 (tnode[inode].forward_weight + tnode[to_node].backward_weight) *

                 pow((float) FINAL_DISCOUNT_FUNCTION_BASE, timing_criticality - 1));

         #elif PATH_COUNTING == 'P' /* Use product of forward and backward weights. */

             slacks->path_criticality[inet][iedge + 1] = max(slacks->path_criticality[inet][iedge + 1],

                 tnode[inode].forward_weight * tnode[to_node].backward_weight *

                 pow((float) FINAL_DISCOUNT_FUNCTION_BASE, timing_criticality - 1));

         #elif PATH_COUNTING == 'L' /* Use natural log of product of forward and backward weights. */

             slacks->path_criticality[inet][iedge + 1] = max(slacks->path_criticality[inet][iedge + 1],

                 log(tnode[inode].forward_weight * tnode[to_node].backward_weight) *

                 pow((float) FINAL_DISCOUNT_FUNCTION_BASE, timing_criticality - 1));

         #elif PATH_COUNTING == 'R' /* Use product of natural logs of forward and backward weights. */

             slacks->path_criticality[inet][iedge + 1] = max(slacks->path_criticality[inet][iedge + 1],

                 log(tnode[inode].forward_weight) * log(tnode[to_node].backward_weight) *

                 pow((float) FINAL_DISCOUNT_FUNCTION_BASE, timing_criticality - 1));

         #endif

     #endif

 #endif

         }

     }

 }


 void print_lut_remapping(const char *fname) {

     FILE *fp;

     int inode, i;

     t_pb *pb;


     fp = my_fopen(fname, "w", 0);

     fprintf(fp, "# LUT_Name\tinput_pin_mapping\n");


     for (inode = 0; inode < num_tnodes; inode++) {

         /* Print LUT input rebalancing */

         if (tnode[inode].type == TN_PRIMITIVE_OPIN && tnode[inode].pb_graph_pin != NULL) {

             pb = block[tnode[inode].block].pb->rr_node_to_pb_mapping[tnode[inode].pb_graph_pin->pin_count_in_cluster];

             if (pb != NULL && pb->lut_pin_remap != NULL) {

                 assert(pb->pb_graph_node->pb_type->num_output_pins == 1 && pb->pb_graph_node->pb_type->num_clock_pins == 0); /* ensure LUT properties are valid */

                 assert(pb->pb_graph_node->num_input_ports == 1);

                 fprintf(fp, "%s", pb->name);

                 for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) {

                     fprintf(fp, "\t%d", pb->lut_pin_remap[i]);

                 }

                 fprintf(fp, "\n");

             }

         }

     }


     fclose(fp);

 }


 void print_critical_path(const char *fname) {


     /* Prints the critical path to a file. */


     t_linked_int *critical_path_head, *critical_path_node;

     FILE *fp;

     int non_global_nets_on_crit_path, global_nets_on_crit_path;

     int tnodes_on_crit_path, inode, iblk, inet;

     e_tnode_type type;

     float total_net_delay, total_logic_delay, Tdel;


     critical_path_head = allocate_and_load_critical_path();

     critical_path_node = critical_path_head;


     fp = my_fopen(fname, "w", 0);


     non_global_nets_on_crit_path = 0;

     global_nets_on_crit_path = 0;

     tnodes_on_crit_path = 0;

     total_net_delay = 0.;

     total_logic_delay = 0.;


     while (critical_path_node != NULL) {

         Tdel = print_critical_path_node(fp, critical_path_node);

         inode = critical_path_node->data;

         type = tnode[inode].type;

         tnodes_on_crit_path++;


         if (type == TN_CB_OPIN) {

             get_tnode_block_and_output_net(inode, &iblk, &inet);


             if (!timing_nets[inet].is_global)

                 non_global_nets_on_crit_path++;

             else

                 global_nets_on_crit_path++;


             total_net_delay += Tdel;

         } else {

             total_logic_delay += Tdel;

         }


         critical_path_node = critical_path_node->next;

     }


     fprintf(fp, "\nTnodes on critical path: %d  Non-global nets on critical path: %d."

             "\n", tnodes_on_crit_path, non_global_nets_on_crit_path);

     fprintf(fp, "Global nets on critical path: %d.\n", global_nets_on_crit_path);

     fprintf(fp, "Total logic delay: %g (s)  Total net delay: %g (s)\n",

             total_logic_delay, total_net_delay);


     vpr_printf(TIO_MESSAGE_INFO, "Nets on critical path: %d normal, %d global.\n",

             non_global_nets_on_crit_path, global_nets_on_crit_path);


     vpr_printf(TIO_MESSAGE_INFO, "Total logic delay: %g (s), total net delay: %g (s)\n",

             total_logic_delay, total_net_delay);


     /* Make sure total_logic_delay and total_net_delay add

     up to critical path delay,within 5 decimal places. */

     assert(total_logic_delay + total_net_delay - get_critical_path_delay()/1e9 < 1e-5);


     fclose(fp);

     free_int_list(&critical_path_head);

 }


 t_linked_int * allocate_and_load_critical_path(void) {


     /* Finds the critical path and returns a list of the tnodes on the critical *

      * path in a linked list, from the path SOURCE to the path SINK.            */


     t_linked_int *critical_path_head, *curr_crit_node, *prev_crit_node;

     int inode, iedge, to_node, num_at_level_zero, i, j, crit_node = OPEN, num_edges;

     int source_clock_domain = UNDEFINED, sink_clock_domain = UNDEFINED;

     float min_slack = HUGE_POSITIVE_FLOAT, slack;

     t_tedge *tedge;


     /* If there's only one clock, we can use the arrival and required times

     currently on the timing graph to find the critical path. If there are multiple

     clocks, however, the values currently on the timing graph correspond to the

     last constraint (pair of clock domains) analysed, which may not be the constraint

     with the critical path. In this case, we have to find the constraint with the

     least slack and redo the timing analysis for this constraint so we get the right

     values onto the timing graph. */


     if (g_sdc->num_constrained_clocks > 1) {

         /* The critical path belongs to the source and sink clock domains

         with the least slack. Find these clock domains now. */


         for (i = 0; i < g_sdc->num_constrained_clocks; i++) {

             for (j = 0; j < g_sdc->num_constrained_clocks; j++) {

                 if (min_slack > f_timing_stats->least_slack[i][j]) {

                     min_slack = f_timing_stats->least_slack[i][j];

                     source_clock_domain = i;

                     sink_clock_domain = j;

                 }

             }

         }


         /* Do a timing analysis for this clock domain pair only.

         Set is_prepacked to FALSE since we don't care about the clusterer's normalized values.

         Set is_final_analysis to FALSE to get actual, rather than relaxed, slacks.

         Set max critical input/output paths to NULL since they aren't used unless is_prepacked is TRUE. */

         do_timing_analysis_for_constraint(source_clock_domain, sink_clock_domain, FALSE, FALSE, (long*)NULL, (long*)NULL);

     }


     /* Start at the source (level-0) tnode with the least slack (T_req-T_arr).

     This will form the head of our linked list of tnodes on the critical path. */

     min_slack = HUGE_POSITIVE_FLOAT;

     num_at_level_zero = tnodes_at_level[0].nelem;

     for (i = 0; i < num_at_level_zero; i++) {

         inode = tnodes_at_level[0].list[i];

         if (has_valid_T_arr(inode) && has_valid_T_req(inode)) { /* Valid arrival and required times */

             slack = tnode[inode].T_req - tnode[inode].T_arr;

             if (slack < min_slack) {

                 crit_node = inode;

                 min_slack = slack;

             }

         }

     }

     critical_path_head = (t_linked_int *) my_malloc(sizeof(t_linked_int));

     critical_path_head->data = crit_node;

     assert(crit_node != OPEN);

     prev_crit_node = critical_path_head;

     num_edges = tnode[crit_node].num_edges;


     /* Keep adding the tnode in this tnode's fanout which has the least slack

     to our critical path linked list, then jump to that tnode and repeat, until

     we hit a tnode with no edges, which is the sink of the critical path. */

     while (num_edges != 0) {

         curr_crit_node = (t_linked_int *) my_malloc(sizeof(t_linked_int));

         prev_crit_node->next = curr_crit_node;

         tedge = tnode[crit_node].out_edges;

         min_slack = HUGE_POSITIVE_FLOAT;


         for (iedge = 0; iedge < num_edges; iedge++) {

             to_node = tedge[iedge].to_node;

             if (has_valid_T_arr(to_node) && has_valid_T_req(to_node)) { /* Valid arrival and required times */

                 slack = tnode[to_node].T_req - tnode[to_node].T_arr;

                 if (slack < min_slack) {

                     crit_node = to_node;

                     min_slack = slack;

                 }

             }

         }


         curr_crit_node->data = crit_node;

         prev_crit_node = curr_crit_node;

         num_edges = tnode[crit_node].num_edges;

     }


     prev_crit_node->next = NULL;

     return (critical_path_head);

 }


 void get_tnode_block_and_output_net(int inode, int *iblk_ptr, int *inet_ptr) {


     /* Returns the index of the block that this tnode is part of.  If the tnode *

      * is a TN_CB_OPIN or TN_INPAD_OPIN (i.e. if it drives a net), the net index is  *

      * returned via inet_ptr.  Otherwise inet_ptr points at OPEN.               */


     int inet, ipin, iblk;

     e_tnode_type tnode_type;


     iblk = tnode[inode].block;

     tnode_type = tnode[inode].type;


     if (tnode_type == TN_CB_OPIN) {

         ipin = tnode[inode].pb_graph_pin->pin_count_in_cluster;

         inet = block[iblk].pb->rr_graph[ipin].net_num;

         assert(inet != OPEN);

         inet = vpack_to_clb_net_mapping[inet];

     } else {

         inet = OPEN;

     }


     *iblk_ptr = iblk;

     *inet_ptr = inet;

 }


 void do_constant_net_delay_timing_analysis(t_timing_inf timing_inf,

         float constant_net_delay_value) {


     /* Does a timing analysis (simple) where it assumes that each net has a      *

      * constant delay value.  Used only when operation == TIMING_ANALYSIS_ONLY.  */


     /*struct s_linked_vptr *net_delay_chunk_list_head;*/


     t_chunk net_delay_ch = {NULL, 0, NULL};

     t_slack * slacks = NULL;

     float **net_delay = NULL;


     slacks = alloc_and_load_timing_graph(timing_inf);

     net_delay = alloc_net_delay(&net_delay_ch, timing_nets,

             num_timing_nets);


     load_constant_net_delay(net_delay, constant_net_delay_value, timing_nets,

             num_timing_nets);

     load_timing_graph_net_delays(net_delay);


     do_timing_analysis(slacks, FALSE, FALSE, TRUE);


     if (getEchoEnabled()) {

         if(isEchoFileEnabled(E_ECHO_CRITICAL_PATH))

             print_critical_path("critical_path.echo");

         if(isEchoFileEnabled(E_ECHO_TIMING_GRAPH))

             print_timing_graph(getEchoFileName(E_ECHO_TIMING_GRAPH));

         if(isEchoFileEnabled(E_ECHO_SLACK))

             print_slack(slacks->slack, TRUE, getEchoFileName(E_ECHO_SLACK));

         if(isEchoFileEnabled(E_ECHO_CRITICALITY))

             print_criticality(slacks, TRUE, getEchoFileName(E_ECHO_CRITICALITY));

         if(isEchoFileEnabled(E_ECHO_NET_DELAY))

             print_net_delay(net_delay, getEchoFileName(E_ECHO_NET_DELAY));

     }


     print_timing_stats();


     free_timing_graph(slacks);

     free_net_delay(net_delay, &net_delay_ch);

 }

 #ifndef PATH_COUNTING

 static void update_normalized_costs(float criticality_denom, long max_critical_input_paths,

         long max_critical_output_paths) {

     int inode;


     /* Update the normalized costs for the clusterer.  On each traversal, each cost is

     updated for tnodes analysed on this traversal if it would give this tnode a higher

     criticality when calculating block criticality for the clusterer. */


     assert(criticality_denom != 0); /* Possible if timing analysis is being run pre-packing

                                     with all delays set to 0. This is not currently done,

                                     but if you're going to do it, you need to decide how

                                     best to normalize these values to suit your purposes. */


     for (inode = 0; inode < num_tnodes; inode++) {

         /* Only calculate for tnodes which have valid arrival and required times. */

         if (has_valid_T_arr(inode) && has_valid_T_req(inode)) {

             tnode[inode].prepacked_data->normalized_slack = std::min(tnode[inode].prepacked_data->normalized_slack,

                 (tnode[inode].T_req - tnode[inode].T_arr)/criticality_denom);

             tnode[inode].prepacked_data->normalized_T_arr = std::max(tnode[inode].prepacked_data->normalized_T_arr,

                 tnode[inode].T_arr/criticality_denom);

             tnode[inode].prepacked_data->normalized_total_critical_paths = std::max(tnode[inode].prepacked_data->normalized_total_critical_paths,

                     ((float) tnode[inode].prepacked_data->num_critical_input_paths + tnode[inode].prepacked_data->num_critical_output_paths) /

                              ((float) max_critical_input_paths + max_critical_output_paths));

         }

     }

 }

 #endif

 /* Set new arrival time

     Special code for LUTs to enable LUT input delay balancing

 */

 static void set_and_balance_arrival_time(int to_node, int from_node, float Tdel, boolean do_lut_input_balancing) {

     int i, j;

     t_pb *pb;

     boolean rebalance;

     t_tnode *input_tnode;


     boolean *assigned = NULL;

     int fastest_unassigned_pin, most_crit_tnode, most_crit_pin;

     float min_delay, highest_T_arr, balanced_T_arr;


     /* Normal case for determining arrival time */

     tnode[to_node].T_arr = std::max(tnode[to_node].T_arr, tnode[from_node].T_arr + Tdel);


     /* Do LUT input rebalancing for LUTs */

     if (do_lut_input_balancing && tnode[to_node].type == TN_PRIMITIVE_OPIN && tnode[to_node].pb_graph_pin != NULL) {

         pb = block[tnode[to_node].block].pb->rr_node_to_pb_mapping[tnode[to_node].pb_graph_pin->pin_count_in_cluster];

         if (pb != NULL && pb->lut_pin_remap != NULL) {

             /* this is a LUT primitive, do pin swapping */

             assert(pb->pb_graph_node->pb_type->num_output_pins == 1 && pb->pb_graph_node->pb_type->num_clock_pins == 0); /* ensure LUT properties are valid */

             assert(pb->pb_graph_node->num_input_ports == 1);

             assert(tnode[from_node].block == tnode[to_node].block);


             /* assign from_node to default location */

             assert(pb->lut_pin_remap[tnode[from_node].pb_graph_pin->pin_number] == OPEN);

             pb->lut_pin_remap[tnode[from_node].pb_graph_pin->pin_number] = tnode[from_node].pb_graph_pin->pin_number;


             /* If all input pins are known, perform LUT input delay rebalancing, do nothing otherwise */

             rebalance = TRUE;

             for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) {

                 input_tnode = block[tnode[to_node].block].pb->rr_graph[pb->pb_graph_node->input_pins[0][i].pin_count_in_cluster].tnode;

                 if (input_tnode != NULL && pb->lut_pin_remap[i] == OPEN) {

                     rebalance = FALSE;

                 }

             }

             if (rebalance == TRUE) {

                 /* Rebalance LUT inputs so that the most critical paths get the fastest inputs */

                 balanced_T_arr = OPEN;

                 assigned = (boolean*)my_calloc(pb->pb_graph_node->num_input_pins[0], sizeof(boolean));

                 /* Clear pin remapping */

                 for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) {

                     pb->lut_pin_remap[i] = OPEN;

                 }

                 /* load new T_arr and pin mapping */

                 for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) {

                     /* Find fastest physical input pin of LUT */

                     fastest_unassigned_pin = OPEN;

                     min_delay = OPEN;

                     for (j = 0; j < pb->pb_graph_node->num_input_pins[0]; j++) {

                         if (pb->lut_pin_remap[j] == OPEN) {

                             if (fastest_unassigned_pin == OPEN) {

                                 fastest_unassigned_pin = j;

                                 min_delay = pb->pb_graph_node->input_pins[0][j].pin_timing_del_max[0];

                             } else if (min_delay > pb->pb_graph_node->input_pins[0][j].pin_timing_del_max[0]) {

                                 fastest_unassigned_pin = j;

                                 min_delay = pb->pb_graph_node->input_pins[0][j].pin_timing_del_max[0];

                             }

                         }

                     }

                     assert(fastest_unassigned_pin != OPEN);


                     /* Find most critical LUT input pin in user circuit */

                     most_crit_tnode = OPEN;

                     highest_T_arr = OPEN;

                     most_crit_pin = OPEN;

                     for (j = 0; j < pb->pb_graph_node->num_input_pins[0]; j++) {

                         input_tnode = block[tnode[to_node].block].pb->rr_graph[pb->pb_graph_node->input_pins[0][j].pin_count_in_cluster].tnode;

                         if (input_tnode != NULL && assigned[j] == FALSE) {

                             if (most_crit_tnode == OPEN) {

                                 most_crit_tnode = get_tnode_index(input_tnode);

                                 highest_T_arr = input_tnode->T_arr;

                                 most_crit_pin = j;

                             } else if (highest_T_arr < input_tnode->T_arr) {

                                 most_crit_tnode = get_tnode_index(input_tnode);

                                 highest_T_arr = input_tnode->T_arr;

                                 most_crit_pin = j;

                             }

                         }

                     }


                     if (most_crit_tnode == OPEN) {

                         break;

                     } else {

                         assert(tnode[most_crit_tnode].num_edges == 1);

                         tnode[most_crit_tnode].out_edges[0].Tdel = min_delay;

                         pb->lut_pin_remap[fastest_unassigned_pin] = most_crit_pin;

                         assigned[most_crit_pin] = TRUE;

                         if (balanced_T_arr < min_delay + highest_T_arr) {

                             balanced_T_arr = min_delay + highest_T_arr;

                         }

                     }

                 }

                 free(assigned);

                 if (balanced_T_arr != OPEN) {

                     tnode[to_node].T_arr = balanced_T_arr;

                 }

             }

         }

     }

 }


 static void load_clock_domain_and_clock_and_io_delay(boolean is_prepacked) {

 /* Loads clock domain and clock delay onto TN_FF_SOURCE and TN_FF_SINK tnodes.

 The clock domain of each clock is its index in g_sdc->constrained_clocks.

 We do this by matching each clock input pad to a constrained clock name, then

 propagating forward its domain index to all flip-flops fed by it (TN_FF_CLOCK

 tnodes), then later looking up the TN_FF_CLOCK tnode corresponding to every

 TN_FF_SOURCE and TN_FF_SINK tnode. We also add up the delays along the clock net

 to each TN_FF_CLOCK tnode to give it (and the SOURCE/SINK nodes) a clock delay.


 Also loads input delay/output delay (from set_input_delay or set_output_delay SDC

 constraints) onto the tedges between TN_INPAD_SOURCE/OPIN and TN_OUTPAD_IPIN/SINK

 tnodes.  Finds fanout of each clock domain, including virtual (external) clocks.

 Marks unconstrained I/Os with a dummy clock domain (-1). */


     int i, iclock, inode, num_at_level, clock_index, input_index, output_index;

     char * net_name;

     t_tnode * clock_node;


     /* Wipe fanout of each clock domain in g_sdc->constrained_clocks. */

     for (iclock = 0; iclock < g_sdc->num_constrained_clocks; iclock++) {

         g_sdc->constrained_clocks[iclock].fanout = 0;

     }


     /* First, visit all TN_INPAD_SOURCE tnodes */

     num_at_level = tnodes_at_level[0].nelem;    /* There are num_at_level top-level tnodes. */

     for (i = 0; i < num_at_level; i++) {

         inode = tnodes_at_level[0].list[i];     /* Iterate through each tnode. inode is the index of the tnode in the array tnode. */

         if (tnode[inode].type == TN_INPAD_SOURCE) { /* See if this node is the start of an I/O pad (as oppposed to a flip-flop source). */

             net_name = find_tnode_net_name(inode, is_prepacked);

             if ((clock_index = find_clock(net_name)) != -1) { /* We have a clock inpad. */

                 /* Set clock skew to 0 at the source and propagate skew

                 recursively to all connected nodes, adding delay as we go.

                 Set the clock domain to the index of the clock in the

                 g_sdc->constrained_clocks array and propagate unchanged. */

                 tnode[inode].clock_delay = 0.;

                 tnode[inode].clock_domain = clock_index;

                 propagate_clock_domain_and_skew(inode);


                 /* Set the clock domain of this clock inpad to -1, so that we do not analyse it.

                 If we did not do this, the clock net would be analysed on the same iteration of

                 the timing analyzer as the flip-flops it drives! */

                 tnode[inode].clock_domain = -1;


             } else if ((input_index = find_input(net_name)) != -1) {

                 /* We have a constrained non-clock inpad - find the clock it's constrained on. */

                 clock_index = find_clock(g_sdc->constrained_inputs[input_index].clock_name);

                 assert(clock_index != -1);

                 /* The clock domain for this input is that of its virtual clock */

                 tnode[inode].clock_domain = clock_index;

                 /* Increment the fanout of this virtual clock domain. */

                 g_sdc->constrained_clocks[clock_index].fanout++;

                 /* Mark input delay specified in SDC file on the timing graph edge leading out from the TN_INPAD_SOURCE node. */

                 tnode[inode].out_edges[0].Tdel = g_sdc->constrained_inputs[input_index].delay * 1e-9; /* normalize to be in seconds not ns */

             } else { /* We have an unconstrained input - mark with dummy clock domain and do not analyze. */

                 tnode[inode].clock_domain = -1;

             }

         }

     }


     /* Second, visit all TN_OUTPAD_SINK tnodes. Unlike for TN_INPAD_SOURCE tnodes,

     we have to search the entire tnode array since these are all on different levels. */

     for (inode = 0; inode < num_tnodes; inode++) {

         if (tnode[inode].type == TN_OUTPAD_SINK) {

             /*  Since the pb_graph_pin of TN_OUTPAD_SINK tnodes points to NULL, we have to find the net

             from the pb_graph_pin of the corresponding TN_OUTPAD_IPIN node.

             Exploit the fact that the TN_OUTPAD_IPIN node will always be one prior in the tnode array. */

             assert(tnode[inode - 1].type == TN_OUTPAD_IPIN);

             net_name = find_tnode_net_name(inode, is_prepacked);

             output_index = find_output(net_name + 4); /* the + 4 removes the prefix "out:" automatically prepended to outputs */

             if (output_index != -1) {

                 /* We have a constrained outpad, find the clock it's constrained on. */

                 clock_index = find_clock(g_sdc->constrained_outputs[output_index].clock_name);

                 assert(clock_index != -1);

                 /* The clock doain for this output is that of its virtual clock */

                 tnode[inode].clock_domain = clock_index;

                 /* Increment the fanout of this virtual clock domain. */

                 g_sdc->constrained_clocks[clock_index].fanout++;

                 /* Mark output delay specified in SDC file on the timing graph edge leading into the TN_OUTPAD_SINK node.

                 However, this edge is part of the corresponding TN_OUTPAD_IPIN node.

                 Exploit the fact that the TN_OUTPAD_IPIN node will always be one prior in the tnode array. */

                 tnode[inode - 1].out_edges[0].Tdel = g_sdc->constrained_outputs[output_index].delay * 1e-9; /* normalize to be in seconds not ns */


             } else { /* We have an unconstrained input - mark with dummy clock domain and do not analyze. */

                 tnode[inode].clock_domain = -1;

             }

         }

     }


     /* Third, visit all TN_FF_SOURCE and TN_FF_SINK tnodes, and transfer the clock domain and skew from their corresponding TN_FF_CLOCK tnodes*/

     for (inode = 0; inode < num_tnodes; inode++) {

         if (tnode[inode].type == TN_FF_SOURCE || tnode[inode].type == TN_FF_SINK) {

             clock_node = find_ff_clock_tnode(inode, is_prepacked);

             tnode[inode].clock_domain = clock_node->clock_domain;

             tnode[inode].clock_delay   = clock_node->clock_delay;

         }

     }

 }


 static void propagate_clock_domain_and_skew(int inode) {

 /* Given a tnode indexed by inode (which is part of a clock net),

  * propagate forward the clock domain (unchanged) and skew (adding the delay of edges) to all nodes in its fanout.

  * We then call recursively on all children in a depth-first search.  If num_edges is 0, we should be at an TN_FF_CLOCK tnode; we then set the

  * TN_FF_SOURCE and TN_FF_SINK nodes to have the same clock domain and skew as the TN_FF_CLOCK node.  We implicitly rely on a tnode not being

  * part of two separate clock nets, since undefined behaviour would result if one DFS overwrote the results of another.  This may

  * be problematic in cases of multiplexed or locally-generated clocks. */


     int iedge, to_node;

     t_tedge * tedge;


     tedge = tnode[inode].out_edges; /* Get the list of edges from the node we're visiting. */


     if (!tedge) { /* Leaf/sink node; base case of the recursion. */

         assert(tnode[inode].type == TN_FF_CLOCK);

         assert(tnode[inode].clock_domain != -1); /* Make sure clock domain is valid. */

         g_sdc->constrained_clocks[tnode[inode].clock_domain].fanout++;

         return;

     }


     for (iedge = 0; iedge < tnode[inode].num_edges; iedge++) {  /* Go through each edge coming out from this tnode */

         to_node = tedge[iedge].to_node;

         /* Propagate clock skew forward along this clock net, adding the delay of the wires (edges) of the clock network. */

         tnode[to_node].clock_delay = tnode[inode].clock_delay + tedge[iedge].Tdel;

         /* Propagate clock domain forward unchanged */

         tnode[to_node].clock_domain = tnode[inode].clock_domain;

         /* Finally, call recursively on the destination tnode. */

         propagate_clock_domain_and_skew(to_node);

     }

 }


 static char * find_tnode_net_name(int inode, boolean is_prepacked) {

     /* Finds the name of the net which a tnode (inode) is on (different for pre-/post-packed netlists). */


     int logic_block; /* Name chosen not to conflict with the array logical_block */

     char * net_name;

     t_pb * physical_block;

     t_pb_graph_pin * pb_graph_pin;


     logic_block = tnode[inode].block;

     if (is_prepacked) {

         net_name = logical_block[logic_block].name;

     } else {

         physical_block = block[logic_block].pb;

         pb_graph_pin = tnode[inode].pb_graph_pin;

         net_name = physical_block->rr_node_to_pb_mapping[pb_graph_pin->pin_count_in_cluster]->name;

     }

     return net_name;

 }


 static t_tnode * find_ff_clock_tnode(int inode, boolean is_prepacked) {

     /* Finds the TN_FF_CLOCK tnode on the same flipflop as an TN_FF_SOURCE or TN_FF_SINK tnode. */


     int logic_block; /* Name chosen not to conflict with the array logical_block */

     t_tnode * ff_clock_tnode;

     t_rr_node * rr_graph;

     t_pb_graph_node * parent_pb_graph_node;

     t_pb_graph_pin * ff_source_or_sink_pb_graph_pin, * clock_pb_graph_pin;


     logic_block = tnode[inode].block;

     if (is_prepacked) {

         ff_clock_tnode = logical_block[logic_block].clock_net_tnode;

     } else {

         rr_graph = block[logic_block].pb->rr_graph;

         ff_source_or_sink_pb_graph_pin = tnode[inode].pb_graph_pin;

         parent_pb_graph_node = ff_source_or_sink_pb_graph_pin->parent_node;

         /* Make sure there's only one clock port and only one clock pin in that port */

         assert(parent_pb_graph_node->num_clock_ports == 1);

         assert(parent_pb_graph_node->num_clock_pins[0] == 1);

         clock_pb_graph_pin = &parent_pb_graph_node->clock_pins[0][0];

         ff_clock_tnode = rr_graph[clock_pb_graph_pin->pin_count_in_cluster].tnode;

     }

     assert(ff_clock_tnode != NULL);

     assert(ff_clock_tnode->type == TN_FF_CLOCK);

     return ff_clock_tnode;

 }


 static int find_clock(char * net_name) {

 /* Given a string net_name, find whether it's the name of a clock in the array g_sdc->constrained_clocks.  *

  * if it is, return the clock's index in g_sdc->constrained_clocks; if it's not, return -1. */

     int index;

     for (index = 0; index < g_sdc->num_constrained_clocks; index++) {

         if (strcmp(net_name, g_sdc->constrained_clocks[index].name) == 0) {

             return index;

         }

     }

     return -1;

 }


 static int find_input(char * net_name) {

 /* Given a string net_name, find whether it's the name of a constrained input in the array g_sdc->constrained_inputs.  *

  * if it is, return its index in g_sdc->constrained_inputs; if it's not, return -1. */

     int index;

     for (index = 0; index < g_sdc->num_constrained_inputs; index++) {

         if (strcmp(net_name, g_sdc->constrained_inputs[index].name) == 0) {

             return index;

         }

     }

     return -1;

 }


 static int find_output(char * net_name) {

 /* Given a string net_name, find whether it's the name of a constrained output in the array g_sdc->constrained_outputs.  *

  * if it is, return its index in g_sdc->constrained_outputs; if it's not, return -1. */

     int index;

     for (index = 0; index < g_sdc->num_constrained_outputs; index++) {

         if (strcmp(net_name, g_sdc->constrained_outputs[index].name) == 0) {

             return index;

         }

     }

     return -1;

 }


 static int find_cf_constraint(char * source_clock_name, char * sink_ff_name) {

     /* Given a source clock domain and a sink flip-flop, find out if there's an override constraint between them.

     If there is, return the index in g_sdc->cf_constraints; if there is not, return -1. */

     int icf, isource, isink;


     for (icf = 0; icf < g_sdc->num_cf_constraints; icf++) {

         for (isource = 0; isource < g_sdc->cf_constraints[icf].num_source; isource++) {

             if (strcmp(g_sdc->cf_constraints[icf].source_list[isource], source_clock_name) == 0) {

                 for (isink = 0; isink < g_sdc->cf_constraints[icf].num_sink; isink++) {

                     if (strcmp(g_sdc->cf_constraints[icf].sink_list[isink], sink_ff_name) == 0) {

                         return icf;

                     }

                 }

             }

         }

     }

     return -1;

 }


 static inline int get_tnode_index(t_tnode * node) {

     /* Returns the index of pointer_to_tnode in the array tnode [0..num_tnodes - 1]

     using pointer arithmetic. */

     return node - tnode;

 }


 static inline boolean has_valid_T_arr(int inode) {

     /* Has this tnode's arrival time been changed from its original value of HUGE_NEGATIVE_FLOAT? */

     return (boolean) (tnode[inode].T_arr > HUGE_NEGATIVE_FLOAT + 1);

 }


 static inline boolean has_valid_T_req(int inode) {

     /* Has this tnode's required time been changed from its original value of HUGE_POSITIVE_FLOAT? */

     return (boolean) (tnode[inode].T_req < HUGE_POSITIVE_FLOAT - 1);

 }


 #ifndef PATH_COUNTING

 boolean has_valid_normalized_T_arr(int inode) {

     /* Has this tnode's normalized_T_arr been changed from its original value of HUGE_NEGATIVE_FLOAT? */

     return (boolean) (tnode[inode].prepacked_data->normalized_T_arr > HUGE_NEGATIVE_FLOAT + 1);

 }

 #endif


 float get_critical_path_delay(void) {

     /* Finds the critical path delay, which is the minimum clock period required to meet the constraint

     corresponding to the pair of source and sink clock domains with the least slack in the design. */


     int source_clock_domain, sink_clock_domain;

     float least_slack_in_design = HUGE_POSITIVE_FLOAT, critical_path_delay = UNDEFINED;


     if (!g_sdc) return UNDEFINED; /* If timing analysis is off, for instance. */


     for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) {

         for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

             if (least_slack_in_design > f_timing_stats->least_slack[source_clock_domain][sink_clock_domain]) {

                 least_slack_in_design = f_timing_stats->least_slack[source_clock_domain][sink_clock_domain];

                 critical_path_delay = f_timing_stats->cpd[source_clock_domain][sink_clock_domain];

             }

         }

     }


     return critical_path_delay * 1e9; /* Convert to nanoseconds */

 }


 void print_timing_stats(void) {


     /* Prints critical path delay/fmax, least slack in design, and, for multiple-clock designs,

     minimum required clock period to meet each constraint, least slack per constraint,

     geometric average clock frequency, and fanout-weighted geometric average clock frequency. */


     int source_clock_domain, sink_clock_domain, clock_domain, fanout, total_fanout = 0,

         num_netlist_clocks_with_intra_domain_paths = 0;

     float geomean_period = 1., least_slack_in_design = HUGE_POSITIVE_FLOAT, critical_path_delay = UNDEFINED;

     double fanout_weighted_geomean_period = 1.;

     boolean found;


     /* Find critical path delay. If the pb_max_internal_delay is greater than this, it becomes

      the limiting factor on critical path delay, so print that instead, with a special message. */


     for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) {

         for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

             if (least_slack_in_design > f_timing_stats->least_slack[source_clock_domain][sink_clock_domain]) {

                 least_slack_in_design = f_timing_stats->least_slack[source_clock_domain][sink_clock_domain];

                 critical_path_delay = f_timing_stats->cpd[source_clock_domain][sink_clock_domain];

             }

         }

     }


     if (pb_max_internal_delay != UNDEFINED && pb_max_internal_delay > critical_path_delay) {

         critical_path_delay = pb_max_internal_delay;

         vpr_printf(TIO_MESSAGE_INFO, "Final critical path: %g ns\n", 1e9 * critical_path_delay);

         vpr_printf(TIO_MESSAGE_INFO, "\t(capped by fmax of block type %s)\n", pbtype_max_internal_delay->name);


     } else {

         vpr_printf(TIO_MESSAGE_INFO, "Final critical path: %g ns\n", 1e9 * critical_path_delay);

     }


     if (g_sdc->num_constrained_clocks <= 1) {

         /* Although critical path delay is always well-defined, it doesn't make sense to talk about fmax for multi-clock circuits */

         vpr_printf(TIO_MESSAGE_INFO, "f_max: %g MHz\n", 1e-6 / critical_path_delay);

     }


     /* Also print the least slack in the design */

     vpr_printf(TIO_MESSAGE_INFO, "\n");

     vpr_printf(TIO_MESSAGE_INFO, "Least slack in design: %g ns\n", 1e9 * least_slack_in_design);

     vpr_printf(TIO_MESSAGE_INFO, "\n");


     if (g_sdc->num_constrained_clocks > 1) { /* Multiple-clock design */


         /* Print minimum possible clock period to meet each constraint. Convert to nanoseconds. */


         vpr_printf(TIO_MESSAGE_INFO, "Minimum possible clock period to meet each constraint (including skew effects):\n");

         for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) {

             /* Print the intra-domain constraint if it was analysed. */

             if (g_sdc->domain_constraint[source_clock_domain][source_clock_domain] > NEGATIVE_EPSILON) {

                 vpr_printf(TIO_MESSAGE_INFO, "%s to %s: %g ns (%g MHz)\n",

                         g_sdc->constrained_clocks[source_clock_domain].name,

                         g_sdc->constrained_clocks[source_clock_domain].name,

                         1e9 * f_timing_stats->cpd[source_clock_domain][source_clock_domain],

                         1e-6 / f_timing_stats->cpd[source_clock_domain][source_clock_domain]);

             } else {

                 vpr_printf(TIO_MESSAGE_INFO, "%s to %s: --\n",

                         g_sdc->constrained_clocks[source_clock_domain].name,

                         g_sdc->constrained_clocks[source_clock_domain].name);

             }

             /* Then, print all other constraints on separate lines, indented. We re-print

             the source clock domain's name so there's no ambiguity when parsing. */

             for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

                 if (source_clock_domain == sink_clock_domain) continue; /* already done that */

                 if (g_sdc->domain_constraint[source_clock_domain][sink_clock_domain] > NEGATIVE_EPSILON) {

                     /* If this domain pair was analysed */

                     vpr_printf(TIO_MESSAGE_INFO, "\t%s to %s: %g ns (%g MHz)\n",

                             g_sdc->constrained_clocks[source_clock_domain].name,

                             g_sdc->constrained_clocks[sink_clock_domain].name,

                             1e9 * f_timing_stats->cpd[source_clock_domain][sink_clock_domain],

                             1e-6 / f_timing_stats->cpd[source_clock_domain][sink_clock_domain]);

                 } else {

                     vpr_printf(TIO_MESSAGE_INFO, "\t%s to %s: --\n",

                             g_sdc->constrained_clocks[source_clock_domain].name,

                             g_sdc->constrained_clocks[sink_clock_domain].name);

                 }

             }

         }


         /* Print least slack per constraint. */


         vpr_printf(TIO_MESSAGE_INFO, "\n");

         vpr_printf(TIO_MESSAGE_INFO, "Least slack per constraint:\n");

         for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) {

             /* Print the intra-domain slack if valid. */

             if (f_timing_stats->least_slack[source_clock_domain][source_clock_domain] < HUGE_POSITIVE_FLOAT - 1) {

                 vpr_printf(TIO_MESSAGE_INFO, "%s to %s: %g ns\n",

                         g_sdc->constrained_clocks[source_clock_domain].name,

                         g_sdc->constrained_clocks[source_clock_domain].name,

                         1e9 * f_timing_stats->least_slack[source_clock_domain][source_clock_domain]);

             } else {

                 vpr_printf(TIO_MESSAGE_INFO, "%s to %s: --\n",

                         g_sdc->constrained_clocks[source_clock_domain].name,

                         g_sdc->constrained_clocks[source_clock_domain].name);

             }

             /* Then, print all other slacks on separate lines. */

             for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

                 if (source_clock_domain == sink_clock_domain) continue; /* already done that */

                 if (f_timing_stats->least_slack[source_clock_domain][sink_clock_domain] < HUGE_POSITIVE_FLOAT - 1) {

                     /* If this domain pair was analysed and has a valid slack */

                     vpr_printf(TIO_MESSAGE_INFO, "\t%s to %s: %g ns\n",

                             g_sdc->constrained_clocks[source_clock_domain].name,

                             g_sdc->constrained_clocks[sink_clock_domain].name,

                             1e9 * f_timing_stats->least_slack[source_clock_domain][sink_clock_domain]);

                 } else {

                     vpr_printf(TIO_MESSAGE_INFO, "\t%s to %s: --\n",

                             g_sdc->constrained_clocks[source_clock_domain].name,

                             g_sdc->constrained_clocks[sink_clock_domain].name);

                 }

             }

         }


         /* Calculate geometric mean f_max (fanout-weighted and unweighted) from the diagonal (intra-domain) entries of critical_path_delay,

         excluding domains without intra-domain paths (for which the timing constraint is DO_NOT_ANALYSE) and virtual clocks. */

         found = FALSE;

         for (clock_domain = 0; clock_domain < g_sdc->num_constrained_clocks; clock_domain++) {

             if (g_sdc->domain_constraint[clock_domain][clock_domain] > NEGATIVE_EPSILON && g_sdc->constrained_clocks[clock_domain].is_netlist_clock) {

                 geomean_period *= f_timing_stats->cpd[clock_domain][clock_domain];

                 fanout = g_sdc->constrained_clocks[clock_domain].fanout;

                 fanout_weighted_geomean_period *= pow((double) f_timing_stats->cpd[clock_domain][clock_domain], fanout);

                 total_fanout += fanout;

                 num_netlist_clocks_with_intra_domain_paths++;

                 found = TRUE;

             }

         }

         if (found) { /* Only print these if we found at least one clock domain with intra-domain paths. */

             geomean_period = pow(geomean_period, (float) 1/num_netlist_clocks_with_intra_domain_paths);

             fanout_weighted_geomean_period = pow(fanout_weighted_geomean_period, (double) 1/total_fanout);

             /* Convert to MHz */

             vpr_printf(TIO_MESSAGE_INFO, "\n");

             vpr_printf(TIO_MESSAGE_INFO, "Geometric mean intra-domain period: %g ns (%g MHz)\n",

                     1e9 * geomean_period, 1e-6 / geomean_period);

             vpr_printf(TIO_MESSAGE_INFO, "Fanout-weighted geomean intra-domain period: %g ns (%g MHz)\n",

                     1e9 * fanout_weighted_geomean_period, 1e-6 / fanout_weighted_geomean_period);

         }


         vpr_printf(TIO_MESSAGE_INFO, "\n");

     }

 }


 static void print_timing_constraint_info(const char *fname) {

     /* Prints the contents of g_sdc->domain_constraint, g_sdc->constrained_clocks, constrained_ios and g_sdc->cc_constraints to a file. */


     FILE * fp;

     int source_clock_domain, sink_clock_domain, i, j;

     int * clock_name_length = (int *) my_malloc(g_sdc->num_constrained_clocks * sizeof(int)); /* Array of clock name lengths */

     int max_clock_name_length = INT_MIN;

     char * clock_name;


     fp = my_fopen(fname, "w", 0);


     /* Get lengths of each clock name and max length so we can space the columns properly. */

     for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

         clock_name = g_sdc->constrained_clocks[sink_clock_domain].name;

         clock_name_length[sink_clock_domain] = strlen(clock_name);

         if (clock_name_length[sink_clock_domain] > max_clock_name_length) {

             max_clock_name_length = clock_name_length[sink_clock_domain];

         }

     }


     /* First, combine info from g_sdc->domain_constraint and g_sdc->constrained_clocks into a matrix

     (they're indexed the same as each other). */


     fprintf(fp, "Timing constraints in ns (source clock domains down left side, sink along top).\n"

                 "A value of -1.00 means the pair of source and sink domains will not be analysed.\n\n");


     print_spaces(fp, max_clock_name_length + 4);


     for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

         fprintf(fp, "%s", g_sdc->constrained_clocks[sink_clock_domain].name);

         /* Minimum column width of 8 */

         print_spaces(fp, std::max(8 - clock_name_length[sink_clock_domain], 4));

     }

     fprintf(fp, "\n");


     for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) {

         fprintf(fp, "%s", g_sdc->constrained_clocks[source_clock_domain].name);

         print_spaces(fp, max_clock_name_length + 4 - clock_name_length[source_clock_domain]);

         for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) {

             fprintf(fp, "%5.2f", g_sdc->domain_constraint[source_clock_domain][sink_clock_domain]);

             /* Minimum column width of 8 */

             print_spaces(fp, std::max(clock_name_length[sink_clock_domain] - 1, 3));

         }

         fprintf(fp, "\n");

     }


     free(clock_name_length);


     /* Second, print I/O constraints. */

     fprintf(fp, "\nList of constrained inputs (note: constraining a clock net input has no effect):\n");

     for (i = 0; i < g_sdc->num_constrained_inputs; i++) {

         fprintf(fp, "Input name %s on clock %s with input delay %.2f ns\n",

             g_sdc->constrained_inputs[i].name, g_sdc->constrained_inputs[i].clock_name, g_sdc->constrained_inputs[i].delay);

     }


     fprintf(fp, "\nList of constrained outputs:\n");

     for (i = 0; i < g_sdc->num_constrained_outputs; i++) {

         fprintf(fp, "Output name %s on clock %s with output delay %.2f ns\n",

             g_sdc->constrained_outputs[i].name, g_sdc->constrained_outputs[i].clock_name, g_sdc->constrained_outputs[i].delay);

     }


     /* Third, print override constraints. */

     fprintf(fp, "\nList of override constraints (non-default constraints created by set_false_path, set_clock_groups, \nset_max_delay, and set_multicycle_path):\n");


     for (i = 0; i < g_sdc->num_cc_constraints; i++) {

         fprintf(fp, "Clock domain");

         for (j = 0; j < g_sdc->cc_constraints[i].num_source; j++) {

             fprintf(fp, " %s,", g_sdc->cc_constraints[i].source_list[j]);

         }

         fprintf(fp, " to clock domain");

         for (j = 0; j < g_sdc->cc_constraints[i].num_sink - 1; j++) {

             fprintf(fp, " %s,", g_sdc->cc_constraints[i].sink_list[j]);

         } /* We have to print the last one separately because we don't want a comma after it. */

         if (g_sdc->cc_constraints[i].num_multicycles == 0) { /* not a multicycle constraint */

             fprintf(fp, " %s: %.2f ns\n", g_sdc->cc_constraints[i].sink_list[j], g_sdc->cc_constraints[i].constraint);

         } else { /* multicycle constraint */

             fprintf(fp, " %s: %d multicycles\n", g_sdc->cc_constraints[i].sink_list[j], g_sdc->cc_constraints[i].num_multicycles);

         }

     }


     for (i = 0; i < g_sdc->num_cf_constraints; i++) {

         fprintf(fp, "Clock domain");

         for (j = 0; j < g_sdc->cf_constraints[i].num_source; j++) {

             fprintf(fp, " %s,", g_sdc->cf_constraints[i].source_list[j]);

         }

         fprintf(fp, " to flip-flop");

         for (j = 0; j < g_sdc->cf_constraints[i].num_sink - 1; j++) {

             fprintf(fp, " %s,", g_sdc->cf_constraints[i].sink_list[j]);

         } /* We have to print the last one separately because we don't want a comma after it. */

         if (g_sdc->cf_constraints[i].num_multicycles == 0) { /* not a multicycle constraint */

             fprintf(fp, " %s: %.2f ns\n", g_sdc->cf_constraints[i].sink_list[j], g_sdc->cf_constraints[i].constraint);

         } else { /* multicycle constraint */

             fprintf(fp, " %s: %d multicycles\n", g_sdc->cf_constraints[i].sink_list[j], g_sdc->cf_constraints[i].num_multicycles);

         }

     }


     for (i = 0; i < g_sdc->num_fc_constraints; i++) {

         fprintf(fp, "Flip-flop");

         for (j = 0; j < g_sdc->fc_constraints[i].num_source; j++) {

             fprintf(fp, " %s,", g_sdc->fc_constraints[i].source_list[j]);

         }

         fprintf(fp, " to clock domain");

         for (j = 0; j < g_sdc->fc_constraints[i].num_sink - 1; j++) {

             fprintf(fp, " %s,", g_sdc->fc_constraints[i].sink_list[j]);

         } /* We have to print the last one separately because we don't want a comma after it. */

         if (g_sdc->fc_constraints[i].num_multicycles == 0) { /* not a multicycle constraint */

             fprintf(fp, " %s: %.2f ns\n", g_sdc->fc_constraints[i].sink_list[j], g_sdc->fc_constraints[i].constraint);

         } else { /* multicycle constraint */

             fprintf(fp, " %s: %d multicycles\n", g_sdc->fc_constraints[i].sink_list[j], g_sdc->fc_constraints[i].num_multicycles);

         }

     }


     for (i = 0; i < g_sdc->num_ff_constraints; i++) {

         fprintf(fp, "Flip-flop");

         for (j = 0; j < g_sdc->ff_constraints[i].num_source; j++) {

             fprintf(fp, " %s,", g_sdc->ff_constraints[i].source_list[j]);

         }

         fprintf(fp, " to flip-flop");

         for (j = 0; j < g_sdc->ff_constraints[i].num_sink - 1; j++) {

             fprintf(fp, " %s,", g_sdc->ff_constraints[i].sink_list[j]);

         } /* We have to print the last one separately because we don't want a comma after it. */

         if (g_sdc->ff_constraints[i].num_multicycles == 0) { /* not a multicycle constraint */

             fprintf(fp, " %s: %.2f ns\n", g_sdc->ff_constraints[i].sink_list[j], g_sdc->ff_constraints[i].constraint);

         } else { /* multicycle constraint */

             fprintf(fp, " %s: %d multicycles\n", g_sdc->ff_constraints[i].sink_list[j], g_sdc->ff_constraints[i].num_multicycles);

         }

     }


     fclose(fp);

 }


 static void print_spaces(FILE * fp, int num_spaces) {

     /* Prints num_spaces spaces to file pointed to by fp. */

     for ( ; num_spaces > 0; num_spaces--) {

         fprintf(fp, " ");

     }

 }


 void print_timing_graph_as_blif (const char *fname, t_model *models) {

     struct s_model_ports *port;

     struct s_linked_vptr *p_io_removed;

     /* Prints out the critical path to a file.  */


     FILE *fp;

     int i, j;


     fp = my_fopen(fname, "w", 0);


     fprintf(fp, ".model %s\n", blif_circuit_name);


     fprintf(fp, ".inputs ");

     for (i = 0; i < num_logical_blocks; i++) {

         if (logical_block[i].type == VPACK_INPAD) {

             fprintf(fp, "\\\n%s ", logical_block[i].name);

         }

     }

     p_io_removed = circuit_p_io_removed;

     while (p_io_removed) {

         fprintf(fp, "\\\n%s ", (char *) p_io_removed->data_vptr);

         p_io_removed = p_io_removed->next;

     }


     fprintf(fp, "\n");


     fprintf(fp, ".outputs ");

     for (i = 0; i < num_logical_blocks; i++) {

         if (logical_block[i].type == VPACK_OUTPAD) {

             /* Outputs have a "out:" prepended to them, must remove */

             fprintf(fp, "\\\n%s ", &logical_block[i].name[4]);

         }

     }

     fprintf(fp, "\n");

     fprintf(fp, ".names unconn\n");

     fprintf(fp, " 0\n\n");


     /* Print out primitives */

     for (i = 0; i < num_logical_blocks; i++) {

         print_primitive_as_blif (fp, i);

     }


     /* Print out tnode connections */

     for (i = 0; i < num_tnodes; i++) {

         if (tnode[i].type != TN_PRIMITIVE_IPIN && tnode[i].type != TN_FF_SOURCE

                 && tnode[i].type != TN_INPAD_SOURCE

                 && tnode[i].type != TN_OUTPAD_IPIN) {

             for (j = 0; j < tnode[i].num_edges; j++) {

                 fprintf(fp, ".names tnode_%d tnode_%d\n", i,

                     tnode[i].out_edges[j].to_node);

                 fprintf(fp, "1 1\n\n");

             }

         }

     }


     fprintf(fp, ".end\n\n");


     /* Print out .subckt models */

     while (models) {

         fprintf(fp, ".model %s\n", models->name);

         fprintf(fp, ".inputs ");

         port = models->inputs;

         while (port) {

             if (port->size > 1) {

                 for (j = 0; j < port->size; j++) {

                     fprintf(fp, "%s[%d] ", port->name, j);

                 }

             } else {

                 fprintf(fp, "%s ", port->name);

             }

             port = port->next;

         }

         fprintf(fp, "\n");

         fprintf(fp, ".outputs ");

         port = models->outputs;

         while (port) {

             if (port->size > 1) {

                 for (j = 0; j < port->size; j++) {

                     fprintf(fp, "%s[%d] ", port->name, j);

                 }

             } else {

                 fprintf(fp, "%s ", port->name);

             }

             port = port->next;

         }

         fprintf(fp, "\n.blackbox\n.end\n\n");

         fprintf(fp, "\n\n");

         models = models->next;

     }

     fclose(fp);

 }


 static void print_primitive_as_blif (FILE *fpout, int iblk) {

     int i, j;

     struct s_model_ports *port;

     struct s_linked_vptr *truth_table;

     t_rr_node *irr_graph;

     t_pb_graph_node *pb_graph_node;

     int node;


     /* Print primitives found in timing graph in blif format based on whether this is a logical primitive or a physical primitive */


     if (logical_block[iblk].type == VPACK_INPAD) {

         if (logical_block[iblk].pb == NULL) {

             fprintf(fpout, ".names %s tnode_%d\n", logical_block[iblk].name,

                     get_tnode_index(logical_block[iblk].output_net_tnodes[0][0]));

         } else {

             fprintf(fpout, ".names %s tnode_%d\n", logical_block[iblk].name,

                     get_tnode_index(logical_block[iblk].pb->rr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode));

         }

         fprintf(fpout, "1 1\n\n");

     } else if (logical_block[iblk].type == VPACK_OUTPAD) {

         /* outputs have the symbol out: automatically prepended to it, must remove */

         if (logical_block[iblk].pb == NULL) {

             fprintf(fpout, ".names tnode_%d %s\n",

                     get_tnode_index(logical_block[iblk].input_net_tnodes[0][0]),

                     &logical_block[iblk].name[4]);

         } else {

             /* avoid the out: from output pad naming */

             fprintf(fpout, ".names tnode_%d %s\n",

                     get_tnode_index(logical_block[iblk].pb->rr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].tnode),

                     (logical_block[iblk].name + 4));

         }

         fprintf(fpout, "1 1\n\n");

     } else if (strcmp(logical_block[iblk].model->name, "latch") == 0) {

         fprintf(fpout, ".latch ");

         node = OPEN;


         if (logical_block[iblk].pb == NULL) {

             i = 0;

             port = logical_block[iblk].model->inputs;

             while (port) {

                 if (!port->is_clock) {

                     assert(port->size == 1);

                     for (j = 0; j < port->size; j++) {

                         if (logical_block[iblk].input_net_tnodes[i][j] != NULL) {

                             fprintf(fpout, "tnode_%d ",

                                     get_tnode_index(logical_block[iblk].input_net_tnodes[i][j]));

                         } else {

                             assert(0);

                         }

                     }

                     i++;

                 }

                 port = port->next;

             }

             assert(i == 1);


             i = 0;

             port = logical_block[iblk].model->outputs;

             while (port) {

                 assert(port->size == 1);

                 for (j = 0; j < port->size; j++) {

                     if (logical_block[iblk].output_net_tnodes[i][j] != NULL) {

                         node =

                                 get_tnode_index(logical_block[iblk].output_net_tnodes[i][j]);

                         fprintf(fpout, "latch_%s re ",

                                 logical_block[iblk].name);

                     } else {

                         assert(0);

                     }

                 }

                 i++;

                 port = port->next;

             }

             assert(i == 1);


             i = 0;

             port = logical_block[iblk].model->inputs;

             while (port) {

                 if (port->is_clock) {

                     for (j = 0; j < port->size; j++) {

                         if (logical_block[iblk].clock_net_tnode != NULL) {

                             fprintf(fpout, "tnode_%d 0\n\n",

                                     get_tnode_index(logical_block[iblk].clock_net_tnode));

                         } else {

                             assert(0);

                         }

                     }

                     i++;

                 }

                 port = port->next;

             }

             assert(i == 1);

         } else {

             irr_graph = logical_block[iblk].pb->rr_graph;

             assert(

                     irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].net_num != OPEN);

             fprintf(fpout, "tnode_%d ",

                     get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].tnode));

             node =

                     get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode);

             fprintf(fpout, "latch_%s re ", logical_block[iblk].name);

             assert(

                     irr_graph[logical_block[iblk].pb->pb_graph_node->clock_pins[0][0].pin_count_in_cluster].net_num != OPEN);

             fprintf(fpout, "tnode_%d 0\n\n",

                     get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->clock_pins[0][0].pin_count_in_cluster].tnode));

         }

         assert(node != OPEN);

         fprintf(fpout, ".names latch_%s tnode_%d\n", logical_block[iblk].name,

                 node);

         fprintf(fpout, "1 1\n\n");

     } else if (strcmp(logical_block[iblk].model->name, "names") == 0) {

         fprintf(fpout, ".names ");

         node = OPEN;


         if (logical_block[iblk].pb == NULL) {

             i = 0;

             port = logical_block[iblk].model->inputs;

             while (port) {

                 assert(!port->is_clock);

                 for (j = 0; j < port->size; j++) {

                     if (logical_block[iblk].input_net_tnodes[i][j] != NULL) {

                         fprintf(fpout, "tnode_%d ",

                                 get_tnode_index(logical_block[iblk].input_net_tnodes[i][j]));

                     } else {

                         break;

                     }

                 }

                 i++;

                 port = port->next;

             }

             assert(i == 1);


             i = 0;

             port = logical_block[iblk].model->outputs;

             while (port) {

                 assert(port->size == 1);

                 fprintf(fpout, "lut_%s\n", logical_block[iblk].name);

                 node = get_tnode_index(logical_block[iblk].output_net_tnodes[0][0]);

                 assert(node != OPEN);

                 i++;

                 port = port->next;

             }

             assert(i == 1);

         } else {

             irr_graph = logical_block[iblk].pb->rr_graph;

             assert(logical_block[iblk].pb->pb_graph_node->num_input_ports == 1);

             for (i = 0;

                     i < logical_block[iblk].pb->pb_graph_node->num_input_pins[0];

                     i++) {

                 if(logical_block[iblk].input_nets[0][i] != OPEN) {

                     for (j = 0; j < logical_block[iblk].pb->pb_graph_node->num_input_pins[0]; j++) {

                         if (irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][j].pin_count_in_cluster].net_num

                             != OPEN) {

                             if (irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][j].pin_count_in_cluster].net_num == logical_block[iblk].input_nets[0][i]) {

                                 fprintf(fpout, "tnode_%d ",

                                 get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][j].pin_count_in_cluster].tnode));

                                 break;

                             }

                         }

                     }

                     assert(j < logical_block[iblk].pb->pb_graph_node->num_input_pins[0]);

                 }

             }

             assert(

                     logical_block[iblk].pb->pb_graph_node->num_output_ports == 1);

             assert(

                     logical_block[iblk].pb->pb_graph_node->num_output_pins[0] == 1);

             fprintf(fpout, "lut_%s\n", logical_block[iblk].name);

             node =

                     get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode);

         }

         assert(node != OPEN);

         truth_table = logical_block[iblk].truth_table;

         while (truth_table) {

             fprintf(fpout, "%s\n", (char*) truth_table->data_vptr);

             truth_table = truth_table->next;

         }

         fprintf(fpout, "\n");

         fprintf(fpout, ".names lut_%s tnode_%d\n", logical_block[iblk].name,

                 node);

         fprintf(fpout, "1 1\n\n");

     } else {

         /* This is a standard .subckt blif structure */

         fprintf(fpout, ".subckt %s ", logical_block[iblk].model->name);

         if (logical_block[iblk].pb == NULL) {

             i = 0;

             port = logical_block[iblk].model->inputs;

             while (port) {

                 if (!port->is_clock) {

                     for (j = 0; j < port->size; j++) {

                         if (logical_block[iblk].input_net_tnodes[i][j] != NULL) {

                             if (port->size > 1) {

                                 fprintf(fpout, "\\\n%s[%d]=tnode_%d ",

                                         port->name, j,

                                         get_tnode_index(logical_block[iblk].input_net_tnodes[i][j]));

                             } else {

                                 fprintf(fpout, "\\\n%s=tnode_%d ", port->name,

                                         get_tnode_index(logical_block[iblk].input_net_tnodes[i][j]));

                             }

                         } else {

                             if (port->size > 1) {

                                 fprintf(fpout, "\\\n%s[%d]=unconn ", port->name,

                                         j);

                             } else {

                                 fprintf(fpout, "\\\n%s=unconn ", port->name);

                             }

                         }

                     }

                     i++;

                 }

                 port = port->next;

             }


             i = 0;

             port = logical_block[iblk].model->outputs;

             while (port) {

                 for (j = 0; j < port->size; j++) {

                     if (logical_block[iblk].output_net_tnodes[i][j] != NULL) {

                         if (port->size > 1) {

                             fprintf(fpout, "\\\n%s[%d]=%s ", port->name, j,

                                     vpack_net[logical_block[iblk].output_nets[i][j]].name);

                         } else {

                             fprintf(fpout, "\\\n%s=%s ", port->name,

                                     vpack_net[logical_block[iblk].output_nets[i][j]].name);

                         }

                     } else {

                         if (port->size > 1) {

                             fprintf(fpout, "\\\n%s[%d]=unconn_%d_%s_%d ",

                                     port->name, j, iblk, port->name, j);

                         } else {

                             fprintf(fpout, "\\\n%s=unconn_%d_%s ", port->name,

                                     iblk, port->name);

                         }

                     }

                 }

                 i++;

                 port = port->next;

             }


             i = 0;

             port = logical_block[iblk].model->inputs;

             while (port) {

                 if (port->is_clock) {

                     assert(port->size == 1);

                     if (logical_block[iblk].clock_net_tnode != NULL) {

                         fprintf(fpout, "\\\n%s=tnode_%d ", port->name,

                                 get_tnode_index(logical_block[iblk].clock_net_tnode));

                     } else {

                         fprintf(fpout, "\\\n%s=unconn ", port->name);

                     }

                     i++;

                 }

                 port = port->next;

             }


             fprintf(fpout, "\n\n");


             i = 0;

             port = logical_block[iblk].model->outputs;

             while (port) {

                 for (j = 0; j < port->size; j++) {

                     if (logical_block[iblk].output_net_tnodes[i][j] != NULL) {

                         fprintf(fpout, ".names %s tnode_%d\n",

                                 vpack_net[logical_block[iblk].output_nets[i][j]].name,

                                 get_tnode_index(logical_block[iblk].output_net_tnodes[i][j]));

                         fprintf(fpout, "1 1\n\n");

                     }

                 }

                 i++;

                 port = port->next;

             }

         } else {

             irr_graph = logical_block[iblk].pb->rr_graph;

             pb_graph_node = logical_block[iblk].pb->pb_graph_node;

             for (i = 0; i < pb_graph_node->num_input_ports; i++) {

                 for (j = 0; j < pb_graph_node->num_input_pins[i]; j++) {

                     if (irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].net_num

                             != OPEN) {

                         if (pb_graph_node->num_input_pins[i] > 1) {

                             fprintf(fpout, "\\\n%s[%d]=tnode_%d ",

                                     pb_graph_node->input_pins[i][j].port->name,

                                     j,

                                     get_tnode_index(irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].tnode));

                         } else {

                             fprintf(fpout, "\\\n%s=tnode_%d ",

                                     pb_graph_node->input_pins[i][j].port->name,

                                     get_tnode_index(irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].tnode));

                         }

                     } else {

                         if (pb_graph_node->num_input_pins[i] > 1) {

                             fprintf(fpout, "\\\n%s[%d]=unconn ",

                                     pb_graph_node->input_pins[i][j].port->name,

                                     j);

                         } else {

                             fprintf(fpout, "\\\n%s=unconn ",

                                     pb_graph_node->input_pins[i][j].port->name);

                         }

                     }

                 }

             }

             for (i = 0; i < pb_graph_node->num_output_ports; i++) {

                 for (j = 0; j < pb_graph_node->num_output_pins[i]; j++) {

                     if (irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num

                             != OPEN) {

                         if (pb_graph_node->num_output_pins[i] > 1) {

                             fprintf(fpout, "\\\n%s[%d]=%s ",

                                     pb_graph_node->output_pins[i][j].port->name,

                                     j,

                                     vpack_net[irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num].name);

                         } else {

                             char* port_name =

                                     pb_graph_node->output_pins[i][j].port->name;

                             int pin_count =

                                     pb_graph_node->output_pins[i][j].pin_count_in_cluster;

                             int node_index = irr_graph[pin_count].net_num;

                             char* node_name = vpack_net[node_index].name;

                             fprintf(fpout, "\\\n%s=%s ", port_name, node_name);

                         }

                     } else {

                         if (pb_graph_node->num_output_pins[i] > 1) {

                             fprintf(fpout, "\\\n%s[%d]=unconn ",

                                     pb_graph_node->output_pins[i][j].port->name,

                                     j);

                         } else {

                             fprintf(fpout, "\\\n%s=unconn ",

                                     pb_graph_node->output_pins[i][j].port->name);

                         }

                     }

                 }

             }

             for (i = 0; i < pb_graph_node->num_clock_ports; i++) {

                 for (j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {

                     if (irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].net_num

                             != OPEN) {

                         if (pb_graph_node->num_clock_pins[i] > 1) {

                             fprintf(fpout, "\\\n%s[%d]=tnode_%d ",

                                     pb_graph_node->clock_pins[i][j].port->name,

                                     j,

                                     get_tnode_index(irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].tnode));

                         } else {

                             fprintf(fpout, "\\\n%s=tnode_%d ",

                                     pb_graph_node->clock_pins[i][j].port->name,

                                     get_tnode_index(irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].tnode));

                         }

                     } else {

                         if (pb_graph_node->num_clock_pins[i] > 1) {

                             fprintf(fpout, "\\\n%s[%d]=unconn ",

                                     pb_graph_node->clock_pins[i][j].port->name,

                                     j);

                         } else {

                             fprintf(fpout, "\\\n%s=unconn ",

                                     pb_graph_node->clock_pins[i][j].port->name);

                         }

                     }

                 }

             }


             fprintf(fpout, "\n\n");

             /* connect up output port names to output tnodes */

             for (i = 0; i < pb_graph_node->num_output_ports; i++) {

                 for (j = 0; j < pb_graph_node->num_output_pins[i]; j++) {

                     if (irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num

                             != OPEN) {

                         fprintf(fpout, ".names %s tnode_%d\n",

                                 vpack_net[irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num].name,

                                 get_tnode_index(irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].tnode));

                         fprintf(fpout, "1 1\n\n");

                     }

                 }

             }

         }

     }

 }

find_cf_constraint
static int find_cf_constraint(char *source_clock_name, char *sink_ff_name)
Definition: path_delay.c:3018

s_net::node_block_pin
int * node_block_pin
Definition: vpr_types.h:509

alloc_and_load_tnodes_from_prepacked_netlist
static void alloc_and_load_tnodes_from_prepacked_netlist(float block_delay, float inter_cluster_net_delay)
Definition: path_delay.c:995

tnodes_at_level
struct s_ivec * tnodes_at_level
Definition: path_delay2.c:12

TN_FF_SOURCE
Definition: vpr_types.h:315

s_chunk
Definition: util.h:57

s_linked_int
Definition: util.h:39

print_timing_stats
void print_timing_stats(void)
Definition: path_delay.c:3081

get_pb_graph_node_pin_from_model_port_pin
t_pb_graph_pin * get_pb_graph_node_pin_from_model_port_pin(t_model_ports *model_port, int model_pin, t_pb_graph_node *pb_graph_node)
Definition: vpr_utils.c:303

tedge_ch
static t_chunk tedge_ch
Definition: path_delay.c:153

s_rr_node::num_edges
short num_edges
Definition: vpr_types.h:901

s_pb::name
char * name
Definition: vpr_types.h:179

TN_PRIMITIVE_OPIN
Definition: vpr_types.h:311

check_timing_graph
void check_timing_graph(int num_sinks)
Definition: path_delay2.c:161

s_pb_graph_node::clock_pins
t_pb_graph_pin ** clock_pins
Definition: physical_types.h:504

read_sdc.h

my_fopen
FILE * my_fopen(const char *fname, const char *flag, int prompt)
Definition: util.c:54

find_ff_clock_tnode
static t_tnode * find_ff_clock_tnode(int inode, boolean is_prepacked)
Definition: path_delay.c:2955

s_rr_node::edges
int * edges
Definition: vpr_types.h:903

s_pb_graph_node::num_output_pins
int * num_output_pins
Definition: physical_types.h:511

NUM_BUCKETS
#define NUM_BUCKETS
Definition: path_delay.c:148

s_timing_constraints::num_ff_constraints
int num_ff_constraints
Definition: vpr_types.h:448

PB_PIN_CLOCK
Definition: physical_types.h:358

num_timing_nets
static int num_timing_nets
Definition: path_delay.c:157

s_tnode::T_req
float T_req
Definition: vpr_types.h:344

s_port::model_port
t_model_ports * model_port
Definition: physical_types.h:227

print_timing_graph_as_blif
void print_timing_graph_as_blif(const char *fname, t_model *models)
Definition: path_delay.c:3360

s_logical_block::input_nets
int ** input_nets
Definition: vpr_types.h:211

free_override_constraint
void free_override_constraint(t_override_constraint *&constraint_array, int num_constraints)
Definition: read_sdc.c:1297

s_model_ports::name
char * name
Definition: logic_types.h:23

s_prepacked_tnode_data::num_critical_output_paths
long num_critical_output_paths
Definition: vpr_types.h:326

s_net::is_const_gen
boolean is_const_gen
Definition: vpr_types.h:511

s_tedge
Definition: vpr_types.h:294

s_prepacked_tnode_data::num_critical_input_paths
long num_critical_input_paths
Definition: vpr_types.h:326

circuit_p_io_removed
struct s_linked_vptr * circuit_p_io_removed
Definition: globals.c:90

s_pb_graph_node::total_pb_pins
int total_pb_pins
Definition: physical_types.h:517

s_rr_node::net_num
int net_num
Definition: vpr_types.h:917

s_prepacked_tnode_data
Definition: vpr_types.h:320

E_ECHO_SLACK
Definition: ReadOptions.h:129

s_pb::rr_graph
struct s_rr_node * rr_graph
Definition: vpr_types.h:188

PB_PIN_SEQUENTIAL
Definition: physical_types.h:354

num_tnodes
int num_tnodes
Definition: path_delay.c:144

s_model_ports
Definition: logic_types.h:21

s_rr_node::tnode
t_tnode * tnode
Definition: vpr_types.h:919

boolean
boolean
Definition: util.h:11

alloc_slacks
static t_slack * alloc_slacks(void)
Definition: path_delay.c:344

TN_INPAD_SOURCE
Definition: vpr_types.h:302

s_linked_int::data
int data
Definition: util.h:40

PB_PIN_TERMINAL
Definition: physical_types.h:357

pb_max_internal_delay
float pb_max_internal_delay
Definition: globals.c:93

s_pb_graph_pin::pin_timing_del_max
float * pin_timing_del_max
Definition: physical_types.h:388

do_lut_rebalancing
static void do_lut_rebalancing()
Definition: path_delay.c:1877

get_critical_path_delay
float get_critical_path_delay(void)
Definition: path_delay.c:3060

s_timing_stats
Definition: vpr_types.h:383

s_timing_stats::least_slack
float ** least_slack
Definition: vpr_types.h:399

s_ivec::list
int * list
Definition: util.h:49

s_pb_graph_edge::output_pins
t_pb_graph_pin ** output_pins
Definition: physical_types.h:465

find_input
static int find_input(char *net_name)
Definition: path_delay.c:2994

net_delay
static float ** net_delay
Definition: timing_place_lookup.c:76

s_pb_graph_pin::output_edges
struct s_pb_graph_edge ** output_edges
Definition: physical_types.h:376

s_clock::is_netlist_clock
boolean is_netlist_clock
Definition: vpr_types.h:370

s_pb_graph_pin
Definition: physical_types.h:371

print_primitive_as_blif
static void print_primitive_as_blif(FILE *fpout, int iblk)
Definition: path_delay.c:3452

my_chunk_malloc
void * my_chunk_malloc(size_t size, t_chunk *chunk_info)
Definition: util.c:184

s_tnode::T_arr
float T_arr
Definition: vpr_types.h:343

s_timing_inf::timing_analysis_enabled
boolean timing_analysis_enabled
Definition: physical_types.h:204

s_override_constraint::num_sink
int num_sink
Definition: vpr_types.h:420

TN_FF_CLOCK
Definition: vpr_types.h:316

f_timing_stats
static t_timing_stats * f_timing_stats
Definition: path_delay.c:159

my_calloc
void * my_calloc(size_t nelem, size_t size)
Definition: util.c:132

get_tnode_block_and_output_net
void get_tnode_block_and_output_net(int inode, int *iblk_ptr, int *inet_ptr)
Definition: path_delay.c:2611

s_model_ports::next
struct s_model_ports * next
Definition: logic_types.h:28

load_tnode
static void load_tnode(INP t_pb_graph_pin *pb_graph_pin, INP int iblock, INOUTP int *inode, INP t_timing_inf timing_inf)
Definition: path_delay.c:1297

vpr_types.h

e_tnode_type
e_tnode_type
Definition: vpr_types.h:300

s_pb_graph_pin::pin_number
int pin_number
Definition: physical_types.h:373

s_tnode
Definition: vpr_types.h:333

num_nets
int num_nets
Definition: globals.c:27

s_pb::lut_pin_remap
int * lut_pin_remap
Definition: vpr_types.h:197

s_net::node_block
int * node_block
Definition: vpr_types.h:507

pbtype_max_internal_delay
const t_pb_type * pbtype_max_internal_delay
Definition: globals.c:94

TN_OUTPAD_IPIN
Definition: vpr_types.h:304

s_logical_block::truth_table
struct s_linked_vptr * truth_table
Definition: vpr_types.h:227

s_tnode::clock_domain
int clock_domain
Definition: vpr_types.h:354

allocate_and_load_critical_path
t_linked_int * allocate_and_load_critical_path(void)
Definition: path_delay.c:2522

DO_NOT_ANALYSE
#define DO_NOT_ANALYSE
Definition: path_delay.h:4

num_logical_nets
int num_logical_nets
Definition: globals.c:17

print_critical_path
void print_critical_path(const char *fname)
Definition: path_delay.c:2458

do_timing_analysis
void do_timing_analysis(t_slack *slacks, boolean is_prepacked, boolean do_lut_input_balancing, boolean is_final_analysis)
Definition: path_delay.c:1613

s_timing_constraints::domain_constraint
float ** domain_constraint
Definition: vpr_types.h:431

print_clustering_timing_info
void print_clustering_timing_info(const char *fname)
Definition: path_delay.c:696

s_pb_graph_pin::pin_timing
struct s_pb_graph_pin ** pin_timing
Definition: physical_types.h:387

s_net::name
char * name
Definition: vpr_types.h:505

TN_INTERMEDIATE_NODE
Definition: vpr_types.h:308

ReadOptions.h

s_pb_graph_node::output_pins
t_pb_graph_pin ** output_pins
Definition: physical_types.h:503

s_pb::rr_node_to_pb_mapping
struct s_pb ** rr_node_to_pb_mapping
Definition: vpr_types.h:189

s_slack::slack
float ** slack
Definition: vpr_types.h:405

s_chunk::chunk_ptr_head
struct s_linked_vptr * chunk_ptr_head
Definition: util.h:58

globals.h

s_block::type
t_type_ptr type
Definition: vpr_types.h:561

s_override_constraint::num_source
int num_source
Definition: vpr_types.h:419

propagate_clock_domain_and_skew
static void propagate_clock_domain_and_skew(int inode)
Definition: path_delay.c:2905

s_model_ports::is_clock
boolean is_clock
Definition: logic_types.h:26

INOUTP
#define INOUTP
Definition: util.h:21

num_blocks
int num_blocks
Definition: globals.c:30

blif_circuit_name
char * blif_circuit_name
Definition: globals.c:21

s_pb_graph_pin::type
enum e_pb_graph_pin_type type
Definition: physical_types.h:385

UNDEFINED
#define UNDEFINED
Definition: vpr_types.h:103

stats.h

getEchoEnabled
boolean getEchoEnabled(void)
Definition: ReadOptions.c:67

TN_OUTPAD_SINK
Definition: vpr_types.h:305

s_timing_constraints::num_constrained_outputs
int num_constrained_outputs
Definition: vpr_types.h:436

PB_PIN_OUTPAD
Definition: physical_types.h:356

s_model_ports::index
int index
Definition: logic_types.h:30

s_tnode::prepacked_data
t_prepacked_tnode_data * prepacked_data
Definition: vpr_types.h:361

load_clock_domain_and_clock_and_io_delay
static void load_clock_domain_and_clock_and_io_delay(boolean is_prepacked)
Definition: path_delay.c:2807

FALSE
Definition: util.h:12

s_override_constraint::num_multicycles
int num_multicycles
Definition: vpr_types.h:422

s_model_ports::size
int size
Definition: logic_types.h:24

s_tnode::num_edges
int num_edges
Definition: vpr_types.h:342

TN_CB_IPIN
Definition: vpr_types.h:306

num_tnode_levels
int num_tnode_levels
Definition: path_delay2.c:10

s_timing_constraints::num_cf_constraints
int num_cf_constraints
Definition: vpr_types.h:442

s_type_descriptor::capacity
int capacity
Definition: physical_types.h:620

s_io::name
char * name
Definition: vpr_types.h:377

print_spaces
static void print_spaces(FILE *fp, int num_spaces)
Definition: path_delay.c:3353

min
#define min(a, b)
Definition: graphics.c:174

s_timing_inf
Definition: physical_types.h:203

s_timing_stats::cpd
float ** cpd
Definition: vpr_types.h:398

s_pb_graph_edge::num_output_pins
int num_output_pins
Definition: physical_types.h:466

TN_INPAD_OPIN
Definition: vpr_types.h:303

s_logical_block::clock_net_tnode
struct s_tnode * clock_net_tnode
Definition: vpr_types.h:225

OUT_PORT
Definition: logic_types.h:19

s_prepacked_tnode_data::normalized_slack
float normalized_slack
Definition: vpr_types.h:327

do_timing_analysis_for_constraint
static float do_timing_analysis_for_constraint(int source_clock_domain, int sink_clock_domain, boolean is_prepacked, boolean is_final_analysis, long *max_critical_input_paths_ptr, long *max_critical_output_paths_ptr)
Definition: path_delay.c:1926

s_timing_constraints::fc_constraints
t_override_constraint * fc_constraints
Definition: vpr_types.h:446

alloc_and_load_pre_packing_timing_graph
t_slack * alloc_and_load_pre_packing_timing_graph(float block_delay, float inter_cluster_net_delay, t_model *models, t_timing_inf timing_inf)
Definition: path_delay.c:288

s_tnode::block
int block
Definition: vpr_types.h:346

PB_PIN_INPAD
Definition: physical_types.h:355

HUGE_POSITIVE_FLOAT
#define HUGE_POSITIVE_FLOAT
Definition: vpr_types.h:79

read_sdc
void read_sdc(t_timing_inf timing_inf)
Definition: read_sdc.c:115

s_prepacked_tnode_data::normalized_total_critical_paths
float normalized_total_critical_paths
Definition: vpr_types.h:328

my_malloc
static void * my_malloc(int ibytes)
Definition: graphics.c:499

s_timing_constraints::cf_constraints
t_override_constraint * cf_constraints
Definition: vpr_types.h:443

do_constant_net_delay_timing_analysis
void do_constant_net_delay_timing_analysis(t_timing_inf timing_inf, float constant_net_delay_value)
Definition: path_delay.c:2636

s_pb_graph_edge::delay_max
float delay_max
Definition: physical_types.h:469

is_global
boolean * is_global
Definition: globals_declare.h:5

tnode
t_tnode * tnode
Definition: path_delay.c:143

s_logical_block::name
char * name
Definition: vpr_types.h:207

s_logical_block::input_net_tnodes
struct s_tnode *** input_net_tnodes
Definition: vpr_types.h:223

print_lut_remapping
void print_lut_remapping(const char *fname)
Definition: path_delay.c:2430

vpack_to_clb_net_mapping
int * vpack_to_clb_net_mapping
Definition: globals.c:34

s_pb_type::name
char * name
Definition: physical_types.h:554

s_model::next
struct s_model * next
Definition: logic_types.h:40

max
#define max(a, b)
Definition: graphics.c:171

s_timing_constraints::num_constrained_inputs
int num_constrained_inputs
Definition: vpr_types.h:433

block
struct s_block * block
Definition: globals.c:31

s_pb_graph_node::num_input_ports
int num_input_ports
Definition: physical_types.h:506

clb_net
struct s_net * clb_net
Definition: globals.c:28

s_ivec::nelem
int nelem
Definition: util.h:48

s_clock::fanout
int fanout
Definition: vpr_types.h:371

INP
#define INP
Definition: util.h:19

net_delay.h

free_ivec_vector
void free_ivec_vector(struct s_ivec *ivec_vector, int nrmin, int nrmax)
Definition: util.c:498

s_port::name
char * name
Definition: physical_types.h:226

has_valid_normalized_T_arr
boolean has_valid_normalized_T_arr(int inode)
Definition: path_delay.c:3054

s_override_constraint::sink_list
char ** sink_list
Definition: vpr_types.h:418

s_pb_graph_pin::parent_node
struct s_pb_graph_node * parent_node
Definition: physical_types.h:379

TN_PRIMITIVE_IPIN
Definition: vpr_types.h:310

process_constraints
static void process_constraints(void)
Definition: path_delay.c:1489

net_delay_ch
static t_chunk net_delay_ch
Definition: timing_place.c:15

s_clock::name
char * name
Definition: vpr_types.h:369

update_slacks
static void update_slacks(t_slack *slacks, int source_clock_domain, int sink_clock_domain, float criticality_denom, boolean update_slack)
Definition: path_delay.c:2344

free_int_list
void free_int_list(t_linked_int **int_list_head_ptr)
Definition: util.c:341

s_timing_constraints::num_constrained_clocks
int num_constrained_clocks
Definition: vpr_types.h:428

E_ECHO_CRITICALITY
Definition: ReadOptions.h:130

util.h

s_prepacked_tnode_data::normalized_T_arr
float normalized_T_arr
Definition: vpr_types.h:329

print_timing_constraint_info
static void print_timing_constraint_info(const char *fname)
Definition: path_delay.c:3222

s_model::inputs
t_model_ports * inputs
Definition: logic_types.h:35

alloc_and_load_tnodes
static void alloc_and_load_tnodes(t_timing_inf timing_inf)
Definition: path_delay.c:730

s_model
Definition: logic_types.h:33

s_model::outputs
t_model_ports * outputs
Definition: logic_types.h:36

E_ECHO_TIMING_GRAPH
Definition: ReadOptions.h:116

isEchoFileEnabled
boolean isEchoFileEnabled(enum e_echo_files echo_option)
Definition: ReadOptions.c:115

get_tnode_index
static int get_tnode_index(t_tnode *node)
Definition: path_delay.c:3037

f_net_to_driver_tnode
static int * f_net_to_driver_tnode
Definition: path_delay.c:161

s_timing_constraints::num_cc_constraints
int num_cc_constraints
Definition: vpr_types.h:439

E_ECHO_CRITICAL_PATH
Definition: ReadOptions.h:128

s_prepacked_tnode_data::model_pin
int model_pin
Definition: vpr_types.h:323

s_tedge::Tdel
float Tdel
Definition: vpr_types.h:297

s_linked_vptr::next
struct s_linked_vptr * next
Definition: util.h:36

PB_PIN_NORMAL
Definition: physical_types.h:353

s_rr_node
Definition: vpr_types.h:889

path_delay2.h

free_net_delay
void free_net_delay(float **net_delay, t_chunk *chunk_list_ptr)
Definition: net_delay.c:127

s_pb_graph_node::num_input_pins
int * num_input_pins
Definition: physical_types.h:510

s_io::delay
float delay
Definition: vpr_types.h:379

s_timing_constraints::constrained_clocks
t_clock * constrained_clocks
Definition: vpr_types.h:429

s_tnode::pb_graph_pin
t_pb_graph_pin * pb_graph_pin
Definition: vpr_types.h:358

model
static char * model
Definition: read_blif.c:45

s_linked_vptr::data_vptr
void * data_vptr
Definition: util.h:35

s_pb_graph_pin::tsu_tco
float tsu_tco
Definition: physical_types.h:386

alloc_net_delay
float ** alloc_net_delay(t_chunk *chunk_list_ptr, struct s_net *nets, int n_nets)
Definition: net_delay.c:103

alloc_and_load_timing_graph
t_slack * alloc_and_load_timing_graph(t_timing_inf timing_inf)
Definition: path_delay.c:239

alloc_timing_stats
static void alloc_timing_stats(void)
Definition: path_delay.c:1598

EPSILON
#define EPSILON
Definition: vpr_types.h:83

VPACK_OUTPAD
Definition: vpr_types.h:108

E_ECHO_NET_DELAY
Definition: ReadOptions.h:115

s_tnode::out_edges
t_tedge * out_edges
Definition: vpr_types.h:336

print_criticality
void print_criticality(t_slack *slacks, boolean criticality_is_normalized, const char *fname)
Definition: path_delay.c:559

TN_FF_SINK
Definition: vpr_types.h:314

s_pb_graph_node::pb_type
struct s_pb_type * pb_type
Definition: physical_types.h:498

timing_nets
static struct s_net * timing_nets
Definition: path_delay.c:155

update_normalized_costs
static void update_normalized_costs(float T_arr_max_this_domain, long max_critical_input_paths, long max_critical_output_paths)
Definition: path_delay.c:2677

VPACK_INPAD
Definition: vpr_types.h:108

load_constant_net_delay
void load_constant_net_delay(float **net_delay, float delay_value, struct s_net *nets, int n_nets)
Definition: net_delay.c:175

free_timing_stats
void free_timing_stats(void)
Definition: path_delay.c:427

s_logical_block::pb
t_pb * pb
Definition: vpr_types.h:220

s_model::name
char * name
Definition: logic_types.h:34

s_pb_graph_node::num_output_ports
int num_output_ports
Definition: physical_types.h:507

print_global_criticality_stats
static void print_global_criticality_stats(FILE *fp, float **criticality, const char *singular_name, const char *capitalized_plural_name)
Definition: path_delay.c:606

vpr_utils.h

s_slack::timing_criticality
float ** timing_criticality
Definition: vpr_types.h:406

has_valid_T_req
static boolean has_valid_T_req(int inode)
Definition: path_delay.c:3048

s_rr_node::pb_graph_pin
t_pb_graph_pin * pb_graph_pin
Definition: vpr_types.h:918

s_tedge::to_node
int to_node
Definition: vpr_types.h:296

s_prepacked_tnode_data::model_port_ptr
t_model_ports * model_port_ptr
Definition: vpr_types.h:324

s_io::clock_name
char * clock_name
Definition: vpr_types.h:378

s_override_constraint::source_list
char ** source_list
Definition: vpr_types.h:417

find_clock
static int find_clock(char *net_name)
Definition: path_delay.c:2982

s_timing_constraints::constrained_outputs
t_io * constrained_outputs
Definition: vpr_types.h:437

num_logical_blocks
int num_logical_blocks
Definition: globals.c:17

s_type_descriptor::num_pins
int num_pins
Definition: physical_types.h:619

OPEN
Definition: slre.c:50

print_slack
void print_slack(float **slack, boolean slack_is_normalized, const char *fname)
Definition: path_delay.c:441

s_timing_constraints::ff_constraints
t_override_constraint * ff_constraints
Definition: vpr_types.h:449

s_pb_graph_pin::pin_count_in_cluster
int pin_count_in_cluster
Definition: physical_types.h:380

s_block::pb
t_pb * pb
Definition: vpr_types.h:567

s_linked_int::next
struct s_linked_int * next
Definition: util.h:41

TN_CB_OPIN
Definition: vpr_types.h:307

s_prepacked_tnode_data::model_port
int model_port
Definition: vpr_types.h:323

print_timing_graph
void print_timing_graph(const char *fname)
Definition: path_delay.c:1388

print_critical_path_node
float print_critical_path_node(FILE *fp, t_linked_int *critical_path_node)
Definition: path_delay2.c:199

s_pb_graph_node::num_clock_ports
int num_clock_ports
Definition: physical_types.h:508

find_output
static int find_output(char *net_name)
Definition: path_delay.c:3006

s_timing_constraints::cc_constraints
t_override_constraint * cc_constraints
Definition: vpr_types.h:440

s_tnode::type
e_tnode_type type
Definition: vpr_types.h:335

s_logical_block::output_net_tnodes
struct s_tnode *** output_net_tnodes
Definition: vpr_types.h:224

vpack_net
struct s_net * vpack_net
Definition: globals.c:19

getEchoFileName
char * getEchoFileName(enum e_echo_files echo_option)
Definition: ReadOptions.c:122

s_tnode::clock_delay
float clock_delay
Definition: vpr_types.h:355

find_tnode_net_name
static char * find_tnode_net_name(int inode, boolean is_prepacked)
Definition: path_delay.c:2936

TN_CONSTANT_GEN_SOURCE
Definition: vpr_types.h:317

s_rr_node::prev_edge
int prev_edge
Definition: vpr_types.h:916

s_pb
Definition: vpr_types.h:178

g_sdc
t_timing_constraints * g_sdc
Definition: read_sdc.c:65

read_xml_arch_file.h

s_pb_type::max_internal_delay
float max_internal_delay
Definition: physical_types.h:572

E_ECHO_PRE_PACKING_TIMING_GRAPH_AS_BLIF
Definition: ReadOptions.h:119

free_chunk_memory
void free_chunk_memory(t_chunk *chunk_info)
Definition: util.c:270

s_logical_block::model
t_model * model
Definition: vpr_types.h:209

s_pb_type::num_output_pins
int num_output_pins
Definition: physical_types.h:567

TN_FF_IPIN
Definition: vpr_types.h:312

NEGATIVE_EPSILON
#define NEGATIVE_EPSILON
Definition: vpr_types.h:84

E_ECHO_TIMING_CONSTRAINTS
Definition: ReadOptions.h:127

s_logical_block::output_nets
int ** output_nets
Definition: vpr_types.h:212

s_net
Definition: vpr_types.h:504

s_pb_graph_node
Definition: physical_types.h:497

vpr_printf
messagelogger vpr_printf
Definition: util.c:17

s_linked_vptr
Definition: util.h:34

s_pb::pb_graph_node
t_pb_graph_node * pb_graph_node
Definition: vpr_types.h:180

set_and_balance_arrival_time
static void set_and_balance_arrival_time(int to_node, int from_node, float Tdel, boolean do_lut_input_balancing)
Definition: path_delay.c:2707

free_timing_graph
void free_timing_graph(t_slack *slacks)
Definition: path_delay.c:390

HUGE_NEGATIVE_FLOAT
#define HUGE_NEGATIVE_FLOAT
Definition: vpr_types.h:80

s_net::num_sinks
int num_sinks
Definition: vpr_types.h:506

s_pb_graph_node::num_clock_pins
int * num_clock_pins
Definition: physical_types.h:512

print_net_delay
void print_net_delay(float **net_delay, const char *fname)
Definition: path_delay.c:670

load_timing_graph_net_delays
void load_timing_graph_net_delays(float **net_delay)
Definition: path_delay.c:368

path_delay.h

s_slack
Definition: vpr_types.h:402

s_timing_constraints::constrained_inputs
t_io * constrained_inputs
Definition: vpr_types.h:434

has_valid_T_arr
static boolean has_valid_T_arr(int inode)
Definition: path_delay.c:3043

IN_PORT
Definition: logic_types.h:19

s_net::node_block_port
int * node_block_port
Definition: vpr_types.h:508

s_pb_graph_pin::port
t_port * port
Definition: physical_types.h:372

s_pb_graph_node::input_pins
t_pb_graph_pin ** input_pins
Definition: physical_types.h:502

s_override_constraint::constraint
float constraint
Definition: vpr_types.h:421

logical_block
struct s_logical_block * logical_block
Definition: globals.c:20

s_pb_type::num_clock_pins
int num_clock_pins
Definition: physical_types.h:565

TRUE
Definition: util.h:12

s_pb_graph_pin::num_pin_timing
int num_pin_timing
Definition: physical_types.h:389

s_timing_constraints::num_fc_constraints
int num_fc_constraints
Definition: vpr_types.h:445

TN_FF_OPIN
Definition: vpr_types.h:313

alloc_and_load_timing_graph_levels
int alloc_and_load_timing_graph_levels(void)
Definition: path_delay2.c:81