calyx_opt/analysis/
compaction_analysis.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
use crate::analysis::{ControlOrder, PromotionAnalysis};
use calyx_ir::{self as ir};
use ir::GetAttributes;
use itertools::Itertools;
use petgraph::{algo, graph::NodeIndex};
use std::collections::HashMap;

use super::read_write_set::AssignmentAnalysis;

/// Struct to perform compaction on `seqs`.
/// It will only work if you update_cont_read_writes for each component that
/// you run it on.
#[derive(Debug, Default)]
pub struct CompactionAnalysis {
    cont_reads: Vec<ir::RRC<ir::Cell>>,
    cont_writes: Vec<ir::RRC<ir::Cell>>,
}

impl CompactionAnalysis {
    /// Updates self so that compaction will take continuous assignments into account
    pub fn update_cont_read_writes(&mut self, comp: &mut ir::Component) {
        let (cont_reads, cont_writes) = (
            comp.continuous_assignments
                .iter()
                .analysis()
                .cell_reads()
                .collect(),
            comp.continuous_assignments
                .iter()
                .analysis()
                .cell_writes()
                .collect(),
        );
        self.cont_reads = cont_reads;
        self.cont_writes = cont_writes;
    }

    // Given a total_order and sorted schedule, builds a vec of the original seq.
    // Note that this function assumes the `total_order`` and `sorted_schedule`
    // represent a completely sequential schedule.
    fn recover_seq(
        mut total_order: petgraph::graph::DiGraph<Option<ir::Control>, ()>,
        sorted_schedule: Vec<(NodeIndex, u64)>,
    ) -> Vec<ir::Control> {
        sorted_schedule
            .into_iter()
            .map(|(i, _)| total_order[i].take().unwrap())
            .collect_vec()
    }

    /// Takes a vec of ctrl stmts and turns it into a compacted schedule.
    /// If compaction doesn't lead to any latency decreases, it just returns
    /// a vec of stmts in the original order.
    /// If it can compact, then it returns a vec with one
    /// element: a compacted static par.
    pub fn compact_control_vec(
        &mut self,
        stmts: Vec<ir::Control>,
        promotion_analysis: &mut PromotionAnalysis,
        builder: &mut ir::Builder,
    ) -> Vec<ir::Control> {
        // Records the corresponding node indices that each control program
        // has data dependency on.
        let mut dependency: HashMap<NodeIndex, Vec<NodeIndex>> = HashMap::new();
        // Records the latency of corresponding control operator for each
        // node index.
        let mut latency_map: HashMap<NodeIndex, u64> = HashMap::new();
        // Records the scheduled start time of corresponding control operator
        // for each node index.
        let mut schedule: HashMap<NodeIndex, u64> = HashMap::new();

        let og_latency: u64 = stmts
            .iter()
            .map(PromotionAnalysis::get_inferred_latency)
            .sum();

        let mut total_order = ControlOrder::<false>::get_dependency_graph_seq(
            stmts.into_iter(),
            (&self.cont_reads, &self.cont_writes),
            &mut dependency,
            &mut latency_map,
        );

        if let Ok(order) = algo::toposort(&total_order, None) {
            let mut total_time: u64 = 0;

            // First we build the schedule.
            for i in order {
                // Start time is when the latest dependency finishes
                let start = dependency
                    .get(&i)
                    .unwrap()
                    .iter()
                    .map(|node| schedule[node] + latency_map[node])
                    .max()
                    .unwrap_or(0);
                schedule.insert(i, start);
                total_time = std::cmp::max(start + latency_map[&i], total_time);
            }

            // We sort the schedule by start time.
            let mut sorted_schedule: Vec<(NodeIndex, u64)> =
                schedule.into_iter().collect();
            sorted_schedule
                .sort_by(|(k1, v1), (k2, v2)| (v1, k1).cmp(&(v2, k2)));

            if total_time == og_latency {
                // If we can't comapct at all, then just recover the and return
                // the original seq.
                return Self::recover_seq(total_order, sorted_schedule);
            }

            // Threads for the static par, where each entry is (thread, thread_latency)
            let mut par_threads: Vec<(Vec<ir::Control>, u64)> = Vec::new();

            // We encode the schedule while trying to minimize the number of
            // par threads.
            'outer: for (i, start) in sorted_schedule {
                let control = total_order[i].take().unwrap();
                for (thread, thread_latency) in par_threads.iter_mut() {
                    if *thread_latency <= start {
                        if *thread_latency < start {
                            // Need a no-op group so the schedule starts correctly
                            let no_op = builder.add_static_group(
                                "no-op",
                                start - *thread_latency,
                            );
                            thread.push(ir::Control::Static(
                                ir::StaticControl::Enable(ir::StaticEnable {
                                    group: no_op,
                                    attributes: ir::Attributes::default(),
                                }),
                            ));
                            *thread_latency = start;
                        }
                        thread.push(control);
                        *thread_latency += latency_map[&i];
                        continue 'outer;
                    }
                }
                // We must create a new par thread.
                if start > 0 {
                    // If start > 0, then we must add a delay to the start of the
                    // group.
                    let no_op = builder.add_static_group("no-op", start);
                    let no_op_enable = ir::Control::Static(
                        ir::StaticControl::Enable(ir::StaticEnable {
                            group: no_op,
                            attributes: ir::Attributes::default(),
                        }),
                    );
                    par_threads.push((
                        vec![no_op_enable, control],
                        start + latency_map[&i],
                    ));
                } else {
                    par_threads.push((vec![control], latency_map[&i]));
                }
            }
            // Turn Vec<ir::StaticControl> -> StaticSeq
            let mut par_control_threads: Vec<ir::StaticControl> = Vec::new();
            for (thread, thread_latency) in par_threads {
                let mut promoted_stmts = thread
                    .into_iter()
                    .map(|mut stmt| {
                        promotion_analysis.convert_to_static(&mut stmt, builder)
                    })
                    .collect_vec();
                if promoted_stmts.len() == 1 {
                    // Don't wrap in static seq if we don't need to.
                    par_control_threads.push(promoted_stmts.pop().unwrap());
                } else {
                    par_control_threads.push(ir::StaticControl::Seq(
                        ir::StaticSeq {
                            stmts: promoted_stmts,
                            attributes: ir::Attributes::default(),
                            latency: thread_latency,
                        },
                    ));
                }
            }
            // Double checking that we have built the static par correctly.
            let max: Option<u64> =
                par_control_threads.iter().map(|c| c.get_latency()).max();
            assert!(
                max.unwrap() == total_time,
                "The schedule expects latency {}. The static par that was built has latency {}",
                total_time,
                max.unwrap()
            );

            let mut s_par = ir::StaticControl::Par(ir::StaticPar {
                stmts: par_control_threads,
                attributes: ir::Attributes::default(),
                latency: total_time,
            });
            s_par.get_mut_attributes().insert(ir::BoolAttr::Promoted, 1);
            vec![ir::Control::Static(s_par)]
        } else {
            panic!(
                "Error when producing topo sort. Dependency graph has a cycle."
            );
        }
    }
}