Browse Source

Add possibility to run multiple times in succession and use real dataset as ip source instead of random generation

niels 1 year ago
parent
commit
eaafc13ec1

+ 0 - 1
responder-bench/src/main.rs

@@ -10,7 +10,6 @@ enum Commands {
     Process(process::Options)
 }
 
-
 fn main() -> anyhow::Result<()> {
     env_logger::init();
     let commands = Commands::parse();

+ 175 - 115
responder-bench/src/process.rs

@@ -6,7 +6,7 @@ use regex::Regex;
 
 use crate::run::{BENCH_BASE_PATH, BENCH_DATA_PATH};
 
-fn json_from_file<P>(path: P) -> anyhow::Result<HashMap<String, f64>>
+fn json_from_file<P>(path: P) -> anyhow::Result<HashMap<String, String>>
 where
     P: Debug,
     P: AsRef<Path>
@@ -26,16 +26,14 @@ where
     };
     let mut map = HashMap::new();
     for (key, val) in json_obj.iter() {
-        let val: f64 = match val {
+        let val: String = match val {
             JsonValue::Number(n) => {
-                f64::from(*n)
+                f64::from(*n).to_string()
             },
             JsonValue::String(_) | JsonValue::Short(_) => {
                 val.as_str()
                    .unwrap()
-                   .strip_suffix("%")
-                   .ok_or(anyhow!("Expected percentage String, got: {:?}", val))?
-                   .parse()?
+                   .to_string()
             },
             _ => bail!("Expected json number or string, got: {:?}", val),
         };
@@ -127,146 +125,208 @@ pub fn process(opts: Options) -> anyhow::Result<()> {
     let mut path = PathBuf::new();
     path.push(BENCH_BASE_PATH);
     path.push(BENCH_DATA_PATH);
-    path.push(opts.seed);
 
     let zmap_stats_regex = Regex::new(r"^[^\(;]+(?:\(.+left\))?; send: [^\(;]+done \(([\d\.]+) ([KMG]?)p/s avg\); recv: [^\(;]+\(([\d\.]+) ([KMG]?)p/s avg\); drops: [^\(;]+\(([\d\.]+) ([KMG]?)p/s avg\); hitrate: [^;]+$")?;
 
-    let header_row = [
-        "type", "filter-type",
-        "subnet_size", "hitrate", "bloom_filter_bits", "bloom_filter_hash_count", "zmap_scanrate",
+    let run_header_row = [
+        "run", "type", "filter-type",
+        "subnet_size", "dataset_index", "filter_bits", "bloom_filter_hash_count", "zmap_scanrate",
         "bpf_run_time_total", "bpf_run_count", "bpf_memory_lock",
-        "filter_intern_build_time", "filter_intern_write_time",
-        "filter_extern_time_clock", "filter_extern_cpu_p", "filter_extern_kernel_secs", "filter_extern_user_secs",
         "zmap_send_rate", "zmap_receive_rate", "zmap_drop_rate",
         "false_positive_count", "false_negative_count"
     ];
-    let mut data_rows = vec![];
-    data_rows.push(header_row.map(str::to_string));
 
-    for subnet_dir in path.read_dir().context(format!("Failed to read subnet dirs in path {:?}", &path))? {
-        let subnet_dir = subnet_dir.context(format!("Failed to read file info on file in path {:?}", &path))?;
-        if !subnet_dir
+    let build_header_row = [
+        "type", "filter-type",
+        "subnet_size", "dataset_index", "bloom_filter_bits", "bloom_filter_hash_count",
+        "ips_subnet", "ips_entries", "ips_hitrate",
+        "filter_intern_build_time", "filter_intern_write_time",
+        "filter_extern_time_clock", "filter_extern_cpu_p", "filter_extern_kernel_secs", "filter_extern_user_secs",
+    ];
+
+    let mut run_data_rows = vec![];
+    run_data_rows.push(run_header_row.map(str::to_string));
+
+    let mut build_data_rows = vec![];
+    build_data_rows.push(build_header_row.map(str::to_string));
+
+    let get_or_default = |map: &mut HashMap<String, String>, k: &str| map
+        .remove(k).unwrap_or(String::from("-1"));
+
+    for run_dir in path.read_dir().context(format!("Failed to read subnet dirs in path {:?}", &path))? {
+        let run_dir = run_dir.context(format!("Failed to read file info on file in path {:?}", &path))?;
+        if !run_dir
             .file_type()
-            .context(format!("Failed to read file info on file {:?}", subnet_dir.path()))?
+            .context(format!("Failed to read file info on file {:?}", run_dir.path()))?
             .is_dir() {
-                bail!("Expected dir at {:?}", subnet_dir.path())
+                bail!("Expected dir at {:?}", run_dir.path());
             }
-        let subnet = subnet_dir.file_name().into_string().map_err(|e| anyhow!(format!("{:?}", e)))?;
-        for hitrate_dir in subnet_dir.path().read_dir().context(format!("Failed to read hitrate dirs in path {:?}", subnet_dir.path()))? {
-            let hitrate_dir = hitrate_dir.context(format!("Failed to read file info on file in path {:?}",subnet_dir.path()))?;
-            if !hitrate_dir
+        let run = run_dir.file_name().into_string().map_err(|e| anyhow!(format!("{:?}", e)))?;
+        let is_build = run == "build";
+
+        let seed_path = run_dir.path().join(opts.seed.to_owned());
+
+        for subnet_dir in seed_path.read_dir().context(format!("Failed to read subnet dirs in path {:?}", &path))? {
+            let subnet_dir = subnet_dir.context(format!("Failed to read file info on file in path {:?}", &path))?;
+            if !subnet_dir
                 .file_type()
-                .context(format!("Failed to read file info on file {:?}", hitrate_dir.path()))?
+                .context(format!("Failed to read file info on file {:?}", subnet_dir.path()))?
                 .is_dir() {
-                    bail!("Expected dir at {:?}", hitrate_dir.path())
+                    bail!("Expected dir at {:?}", subnet_dir.path())
                 }
-            let hitrate = hitrate_dir.file_name().into_string().map_err(|e| anyhow!(format!("{:?}", e)))?;
-            let in_ips = ips_from_file(hitrate_dir.path().join("ips.txt")).context(format!("Failed to read ips from {:?}/ips.txt", hitrate_dir.path()))?;
-            for bloom_dir in hitrate_dir.path().read_dir().context(format!("Failed to read bloom dirs in path {:?}", hitrate_dir.path()))? {
-                let bloom_dir = bloom_dir.context(format!("Failed to read file info on file in path {:?}", hitrate_dir.path()))?;
-                if !bloom_dir
+            let subnet = subnet_dir.file_name().into_string().map_err(|e| anyhow!(format!("{:?}", e)))?;
+            for dataset_dir in subnet_dir.path().read_dir().context(format!("Failed to read hitrate dirs in path {:?}", subnet_dir.path()))? {
+                let dataset_dir = dataset_dir.context(format!("Failed to read file info on file in path {:?}",subnet_dir.path()))?;
+                if !dataset_dir
                     .file_type()
-                    .context(format!("Failed to read file info on file {:?}", bloom_dir.path()))?
+                    .context(format!("Failed to read file info on file {:?}", dataset_dir.path()))?
                     .is_dir() {
-                        continue;
+                        bail!("Expected dir at {:?}", dataset_dir.path())
                     }
-                let bloom_folder_name = bloom_dir
-                    .file_name()
-                    .into_string()
-                    .map_err(|e| anyhow!(format!("{:?}", e)))?;
-                let (test_type, filter_type, bloom_bits, bloom_hashes) = if bloom_folder_name.contains('-') {
-                    let (bloom_bits, rem) = bloom_folder_name.split_once("-")
-                                                             .ok_or(anyhow!("Expected filename with -, got {:?}", bloom_dir.file_name()))?;
-
-                    let (filter_type, rem) = rem.split_once("-").unwrap_or((rem, ""));
-
-                    let (bloom_hashes, bpf_enabled) = if filter_type == "bloom" {
-                        rem.split_once("-").map(|(c, rem)| (c,rem=="bpf")).unwrap_or((rem, false))
-                    } else {
-                        ("0", rem == "bpf")
-                    };
-
-                    let bloom_bits = bloom_bits.to_string();
-                    let bloom_hashes = bloom_hashes.to_string();
-                    let test_type = if bpf_enabled {
-                        "bpf-stats"
-                    } else {
-                        "normal"
-                    };
-                    (test_type, filter_type, bloom_bits, bloom_hashes)
+                let dataset_index = dataset_dir.file_name().into_string().map_err(|e| anyhow!(format!("{:?}", e)))?;
+                let mut in_ips_path = path.clone();
+                in_ips_path.push("build");
+                in_ips_path.push(&opts.seed);
+                in_ips_path.push(&subnet);
+                in_ips_path.push(&dataset_index);
+                in_ips_path.push("ips.txt");
+
+                let in_ips = ips_from_file(&in_ips_path).context(format!("Failed to read ips from {:?}", &in_ips_path))?;
+                let ips_info = if is_build {
+                    Some(json_from_file(dataset_dir.path().join("ips-info.json")).context(format!("Failed to read ip info from {:?}/ips-info.json", dataset_dir.path()))?)
                 } else {
-                    ("baseline","none", String::from("-1"), String::from("-1"))
+                    None
                 };
-
-                let bloom_path = bloom_dir.path();
-
-                let mut filter_intern_time = json_from_file(bloom_path.join("filter_intern_time.json"))
-                    .context(format!("Failed to parse filter_intern_time.json for {:?}", bloom_path))?;
-                let mut filter_extern_time = json_from_file(bloom_path.join("filter_extern_time.json"))
-                    .context(format!("Failed to parse filter_extern_time.json  for {:?}", bloom_path))?;
-
-                for scan_rate_dir in bloom_dir.path().read_dir().context(format!("Failed to read scan rate dirs in path {:?}", bloom_dir.path()))? {
-                    let scan_rate_dir = scan_rate_dir.context(format!("Failed to read file info on file in path {:?}", bloom_dir.path()))?;
-                    if !scan_rate_dir
+                for filter_dir in dataset_dir.path().read_dir().context(format!("Failed to read bloom dirs in path {:?}", dataset_dir.path()))? {
+                    let filter_dir = filter_dir.context(format!("Failed to read file info on file in path {:?}", dataset_dir.path()))?;
+                    if !filter_dir
                         .file_type()
-                        .context(format!("Failed to read file info on file {:?}", scan_rate_dir.path()))?
+                        .context(format!("Failed to read file info on file {:?}", filter_dir.path()))?
                         .is_dir() {
                             continue;
                         }
-                    let scan_rate = scan_rate_dir.file_name().to_str().unwrap().to_string();
-
-                    let wd_path = scan_rate_dir.path();
-                    let mut bpf_stats = json_from_file(wd_path.join("bpf_stats.json"))
-                        .context(format!("Failed to parse bpf_stats.json for {:?}", wd_path))?;
-                    let out_ips = ips_from_file(wd_path.join("zmap_out_ips.txt"))
-                        .context(format!("Failed to parse zmap_out_ips.txt from {:?}", wd_path))?;
-                    let zmap_stats = zmap_stats(wd_path.join("zmap_stats.txt"), &zmap_stats_regex)
-                        .context(format!("Failed to parse zmap_stats.txt from {:?}", wd_path))?;
-
-                    let get_or_default = |map: &mut HashMap<String, f64>, k: &str| map
-                        .get(k).unwrap_or(&-1f64).to_string();
-
-                    let data_row = (|| {
-                        Ok([
-                            test_type.to_owned(),
-                            filter_type.to_owned(),
-                            subnet.clone(), hitrate.clone(), bloom_bits.clone(), bloom_hashes.clone(), scan_rate.clone(),
-                            get_or_default(&mut bpf_stats, "run_time"),
-                            get_or_default(&mut bpf_stats, "run_count"),
-                            get_or_default(&mut bpf_stats, "mem_lock"),
-                            get_or_default(&mut filter_intern_time, "build"),
-                            get_or_default(&mut filter_intern_time, "write"),
-                            get_or_default(&mut filter_extern_time, "clock"),
-                            get_or_default(&mut filter_extern_time, "cpu_p"),
-                            get_or_default(&mut filter_extern_time, "kernel_s"),
-                            get_or_default(&mut filter_extern_time, "user_s"),
-                            zmap_stats.0.to_string(),
-                            zmap_stats.1.to_string(),
-                            zmap_stats.2.to_string(),
-                            out_ips.difference(&in_ips).count().to_string(),
-                            in_ips.difference(&out_ips).count().to_string(),
-                        ])
-                    })().map_err(|key: String| {
-                        anyhow!("Failed to read data point {} for {:?}", key, wd_path)
-                    })?;
-
-                    let mut f = File::create(wd_path.join("data_row.csv"))?;
-                    f.write_all(header_row.join(",").as_bytes())?;
-                    f.write(&[b'\n'])?;
-                    f.write_all(data_row.join(",").as_bytes())?;
-                    data_rows.push(data_row);
+                    let filter_folder_name = filter_dir
+                        .file_name()
+                        .into_string()
+                        .map_err(|e| anyhow!(format!("{:?}", e)))?;
+                    let (test_type, filter_type, bits, bloom_hashes) = if filter_folder_name.contains('-') {
+                        let (bits, rem) = filter_folder_name.split_once("-")
+                                                                 .ok_or(anyhow!("Expected filename with -, got {:?}", filter_dir.file_name()))?;
+
+                        let (filter_type, rem) = rem.split_once("-").unwrap_or((rem, ""));
+
+                        let (bloom_hashes, bpf_enabled) = if filter_type == "bloom" {
+                            rem.split_once("-").map(|(c, rem)| (c,rem=="bpf")).unwrap_or((rem, false))
+                        } else {
+                            ("0", rem == "bpf")
+                        };
+
+                        let bits = bits.to_string();
+                        let bloom_hashes = bloom_hashes.to_string();
+                        let test_type = if bpf_enabled {
+                            "bpf-stats"
+                        } else {
+                            "normal"
+                        };
+                    (test_type, filter_type, bits, bloom_hashes)
+                    } else {
+                        ("baseline","none", String::from("-1"), String::from("-1"))
+                    };
+
+                    if is_build {
+                        let bloom_path = filter_dir.path();
+                        let mut ips_info = ips_info.clone().unwrap();
+
+                        let mut filter_intern_time = json_from_file(bloom_path.join("filter_intern_time.json"))
+                            .context(format!("Failed to parse filter_intern_time.json for {:?}", bloom_path))?;
+                        let mut filter_extern_time = json_from_file(bloom_path.join("filter_extern_time.json"))
+                            .context(format!("Failed to parse filter_extern_time.json  for {:?}", bloom_path))?;
+
+                        let build_data_row = (|| {
+                            Ok([
+                                test_type.to_owned(),
+                                filter_type.to_owned(),
+                                subnet.clone(), dataset_index.clone(), bits.clone(), bloom_hashes.clone(),
+                                get_or_default(&mut ips_info, "subnet"),
+                                get_or_default(&mut ips_info, "entries"),
+                                get_or_default(&mut ips_info, "hitrate"),
+                                get_or_default(&mut filter_intern_time, "build"),
+                                get_or_default(&mut filter_intern_time, "write"),
+                                get_or_default(&mut filter_extern_time, "clock"),
+                                get_or_default(&mut filter_extern_time, "cpu_p"),
+                                get_or_default(&mut filter_extern_time, "kernel_s"),
+                                get_or_default(&mut filter_extern_time, "user_s"),
+                            ])
+                        })().map_err(|key: String| {
+                            anyhow!("Failed to read build data point {} for {:?}", key, bloom_path)
+                        })?;
+
+                        let mut f = File::create(bloom_path.join("data_row.csv"))?;
+                        f.write_all(build_header_row.join(",").as_bytes())?;
+                        f.write(&[b'\n'])?;
+                        f.write_all(build_data_row.join(",").as_bytes())?;
+                        build_data_rows.push(build_data_row);
+                    } else {
+                        for scan_rate_dir in filter_dir.path().read_dir().context(format!("Failed to read scan rate dirs in path {:?}", filter_dir.path()))? {
+                            let scan_rate_dir = scan_rate_dir.context(format!("Failed to read file info on file in path {:?}", filter_dir.path()))?;
+                            if !scan_rate_dir
+                                .file_type()
+                                .context(format!("Failed to read file info on file {:?}", scan_rate_dir.path()))?
+                                .is_dir() {
+                                    continue;
+                                }
+                            let scan_rate = scan_rate_dir.file_name().to_str().unwrap().to_string();
+
+                            let wd_path = scan_rate_dir.path();
+                            let mut bpf_stats = json_from_file(wd_path.join("bpf_stats.json"))
+                                .context(format!("Failed to parse bpf_stats.json for {:?}", wd_path))?;
+                            let out_ips = ips_from_file(wd_path.join("zmap_out_ips.txt"))
+                                .context(format!("Failed to parse zmap_out_ips.txt from {:?}", wd_path))?;
+                            let zmap_stats = zmap_stats(wd_path.join("zmap_stats.txt"), &zmap_stats_regex)
+                                .context(format!("Failed to parse zmap_stats.txt from {:?}", wd_path))?;
+
+                            let run_data_row = (|| {
+                                Ok([
+                                    run.to_owned(),
+                                    test_type.to_owned(),
+                                    filter_type.to_owned(),
+                                    subnet.clone(), dataset_index.clone(), bits.clone(), bloom_hashes.clone(), scan_rate.clone(),
+                                    get_or_default(&mut bpf_stats, "run_time"),
+                                    get_or_default(&mut bpf_stats, "run_count"),
+                                    get_or_default(&mut bpf_stats, "mem_lock"),
+                                    zmap_stats.0.to_string(),
+                                    zmap_stats.1.to_string(),
+                                    zmap_stats.2.to_string(),
+                                    out_ips.difference(&in_ips).count().to_string(),
+                                    in_ips.difference(&out_ips).count().to_string(),
+                                ])
+                            })().map_err(|key: String| {
+                                anyhow!("Failed to read data point {} for {:?}", key, wd_path)
+                            })?;
+
+                            let mut f = File::create(wd_path.join("data_row.csv"))?;
+                            f.write_all(run_header_row.join(",").as_bytes())?;
+                            f.write(&[b'\n'])?;
+                            f.write_all(run_data_row.join(",").as_bytes())?;
+                            run_data_rows.push(run_data_row);
 
+                        }
+                    }
                 }
             }
         }
     }
 
-    let data = data_rows.into_iter()
-             .map(|row| row.join(","))
-             .fold(String::new(), |a, b| a + &b + "\n");
+    let run_data = run_data_rows.into_iter()
+                        .map(|row| row.join(","))
+                        .fold(String::new(), |a, b| a + &b + "\n");
 
+    let build_data = build_data_rows.into_iter()
+                        .map(|row| row.join(","))
+                        .fold(String::new(), |a, b| a + &b + "\n");
 
-    File::create(path.join("data.csv"))?.write_all(data.as_bytes())?;
+    File::create(path.join("run_data.csv"))?.write_all(run_data.as_bytes())?;
+    File::create(path.join("build_data.csv"))?.write_all(build_data.as_bytes())?;
 
 
     Ok(())

+ 126 - 47
responder-bench/src/run.rs

@@ -1,8 +1,8 @@
-use std::{path::{Path, PathBuf}, fs::{File, self, OpenOptions}, io::{BufWriter, Write, self, Read, BufRead}, net::Ipv4Addr, process::{Command, self, Stdio}, time::{Instant, Duration}, thread};
+use std::{path::{Path, PathBuf}, fs::{File, self, OpenOptions}, io::{BufWriter, Write, self, Read, BufRead, BufReader}, net::Ipv4Addr, process::{Command, self, Stdio}, time::{Instant, Duration}, thread, collections::HashSet};
 use json::JsonValue;
 use log::info;
 use rand::prelude::*;
-use anyhow::{Context, ensure, anyhow};
+use anyhow::{Context, ensure, anyhow, bail};
 
 mod args;
 mod error;
@@ -17,9 +17,80 @@ pub const BENCH_BIN_PATH: &str = "bin";
 pub const BENCH_LOG_PATH: &str = "log";
 
 const XDP_LOAD_TIMEOUT_SECS: u64 = 5;
+const HITRATE_ALLOWED_DIFFERENCE: f64 = 0.25;
+const HITRATE_GENERATION_ALLOWED_ATTEMPTS: usize = 50;
 
 const PRIVILEGE_RUNNER: [&str;1] = ["sudo"];
 
+#[derive(Clone, Debug)]
+struct IpDataSet {
+    path: PathBuf,
+    subnet_size: usize,
+    set: HashSet<Ipv4Addr>
+}
+
+impl IpDataSet {
+    fn from(path: PathBuf, subnet_size: usize) -> anyhow::Result<Self> {
+        let ip_file = File::open(&path).context(format!("Failed to open ip file {:?}", &path))?;
+        let reader = BufReader::new(ip_file);
+        let ips: HashSet<_> = reader
+            .lines()
+            .map(|s| s
+                 .map_err(|e| anyhow!(e))
+                 .and_then(|s| s.parse::<Ipv4Addr>().context("Failed to parse ipv4 from ip file")))
+            .collect::<Result<HashSet<_>,_>>()?;
+
+        Ok(Self {
+            path,
+            subnet_size,
+            set: ips
+        })
+    }
+
+    fn generate_subnet(&self, subnet_size: usize, seed: u64) -> anyhow::Result<HashSet<Ipv4Addr>> {
+        if subnet_size == self.subnet_size {
+            return Ok(self.set.clone())
+        } else if subnet_size > self.subnet_size {
+            bail!("Could not generate subnet of size {}, bigger than dataset ({})", subnet_size, self.subnet_size);
+        } else {
+            let mut rng = SmallRng::seed_from_u64(seed);
+            let mask = (1u64 << (self.subnet_size as u64)) - (1u64 << (subnet_size as u64));
+            let mask = mask as u32;
+
+            let hitrate = self.set.len() as f64 / ((1u64 << (self.subnet_size as u64)) as f64);
+            info!("{} {} {}", hitrate * (1f64 - HITRATE_ALLOWED_DIFFERENCE), hitrate, hitrate*(1f64 + HITRATE_ALLOWED_DIFFERENCE));
+            for _ in 0..HITRATE_GENERATION_ALLOWED_ATTEMPTS {
+                let subnet = mask & rng.next_u32();
+                if let Some(res) = self.try_generate_subnet(subnet_size, mask, subnet, hitrate) {
+                    if res.len() == 0 {
+                        bail!("Failed to generate subnet of size {} with hitrate {}: results in empty set", subnet_size, hitrate)
+                    }
+                    return Ok(res)
+                }
+            }
+            bail!("Failed to generate subnet of size {} with hitrate {} after {} attempts", subnet_size, hitrate, HITRATE_GENERATION_ALLOWED_ATTEMPTS);
+        }
+    }
+
+    fn try_generate_subnet(&self,subnet_size: usize, mask: u32, subnet: u32, hitrate: f64) -> Option<HashSet<Ipv4Addr>> {
+        let mut new_set = HashSet::new();
+
+        for ip in self.set.iter() {
+            if u32::from(*ip) & mask == subnet {
+                new_set.insert(*ip);
+            }
+        }
+
+        let new_hitrate = (new_set.len() as f64) / ((1u64 << subnet_size) as f64);
+        if (((new_hitrate as f64) / (hitrate as f64)) - 1f64).abs() > HITRATE_ALLOWED_DIFFERENCE {
+            info!("{}/{}: {}", Ipv4Addr::from(subnet), 32 - subnet_size, new_hitrate);
+            return None
+        }
+
+        Some(new_set)
+    }
+}
+
 fn log<R>(log_path: &Path, reader: &mut R, name: &str) -> anyhow::Result<()>
 where
     R: ?Sized,
@@ -57,10 +128,14 @@ pub fn run() -> Result<(), anyhow::Error> {
     let cores: u32 = 4;
 
     let seed: u64 = 0x1337133713371337;
-    let scan_sizes: Vec<u64> = vec![8, 16];//, 24];//,32]; // TODO 8 only for test purposes
+
+    let times: u64 = 2;
+
+    let scan_sizes: Vec<u64> = vec![16];//, 24];//,32]; // TODO 8 only for test purposes
     // let scan_sizes: Vec<u64> = vec![24];
     // let hit_rates: Vec<f64> = vec![0.001, 0.0032,0.01,0.032,0.1];
-    let hit_rates: Vec<f64> = vec![0.02];
+    info!("Loading in data sets");
+    let ip_data_sets: Vec<IpDataSet> = vec![IpDataSet::from(PathBuf::from("./data/http-scan.txt"), 32usize)?];
     // let false_positive_rates: Vec<TestType> = vec![Baseline, EmptyFilter,Normal(0.1),Normal(0.01),Normal(0.001),Normal(0.0001)];
     let false_positive_rates: Vec<TestType> = vec![Baseline, Normal(0.001), BpfStats(0.001)];
     let filter_types: Vec<FilterType> = vec![Bitmap, Bloom];
@@ -68,20 +143,24 @@ pub fn run() -> Result<(), anyhow::Error> {
     // let scan_rates: Vec<u64> = vec![316_000, 562_000, 1_000_000, 1_780_000, 3_160_000];
     let scan_rates: Vec<u64> = vec![500000, 629463, 792447, 997631, 1255943, 1581139, 1990536, 2505936, 3154787, 3971641, 5000000];
 
+    let mut ip_sets = vec![];
 
-    for scan_size in &scan_sizes {
-        for hit_rate in &hit_rates {
-            let data_args = DataArgs::from(seed, *scan_size, *hit_rate);
-            if data_args.entries == 0 {
-                info!("Skipping {}; no entries", data_args);
-                continue;
-            }
+    for (i, ip_data_set) in ip_data_sets.iter().enumerate() {
+        for scan_size in &scan_sizes {
+            let data_args = DataArgs::from(seed, *scan_size, i as u64);
 
             info!("Building IP file for {}", data_args);
-            let (ip_file_path, subnet) = build_ip_file(data_args)
+            let (ip_file_path, subnet, entries, hitrate) = build_ip_file(ip_data_set, data_args)
                 .context(format!("Building ip file for {}", data_args))?;
+            ip_sets.push((*scan_size, i, ip_file_path, subnet, entries));
+            info!("{}: subnet: {}, entries: {}, hitrate: {}", data_args, subnet, entries, hitrate);
+        }
+    }
 
-            info!("subnet for {} is {}", data_args, subnet);
+
+    for time in 0..times {
+        for (scan_size, data_index, ip_file_path, subnet, entries) in &ip_sets {
+            let data_args = DataArgs::from(seed, *scan_size, *data_index as u64);
 
             for test_type in &false_positive_rates {
                 let filter_types = match test_type {
@@ -94,7 +173,7 @@ pub fn run() -> Result<(), anyhow::Error> {
                 };
 
                 for filter_type in filter_types {
-                    let bloom_args = FilterArgs::from(data_args, *test_type, *filter_type);
+                    let bloom_args = FilterArgs::from(data_args, *test_type, *filter_type, *entries);
                     info!("Building binaries for {} {}", data_args, bloom_args);
                     build_binaries(data_args, bloom_args)
                         .context(format!("Failed to build binaries for {} {}", data_args, bloom_args))?;
@@ -113,7 +192,7 @@ pub fn run() -> Result<(), anyhow::Error> {
 
                     for scan_rate in &scan_rates {
                         let scan_args = ScanArgs::new(*scan_rate);
-                        let args = BenchArgs {data_args, bloom_filter_args: bloom_args, scan_args};
+                        let args = BenchArgs {n: time, data_args, bloom_filter_args: bloom_args, scan_args};
 
                         let run_output = (|| {
                             fs::create_dir_all(args.wd_path())
@@ -142,7 +221,7 @@ pub fn run() -> Result<(), anyhow::Error> {
                             }
 
                             info!("Running zmap for {}", args);
-                            let zmap_result = run_zmap(args, subnet)
+                            let zmap_result = run_zmap(args, *subnet)
                                 .context(format!("Running zmap for {}", args));
                             if let Err(e) = zmap_result {
                                 return Err((handle, e));
@@ -231,46 +310,45 @@ fn clean() -> anyhow::Result<()> {
     Ok(())
 }
 
-fn next_ip(rng: &mut SmallRng, mask: u32) -> u32 {
-    loop {
-        let ip = rng.next_u32() & mask;
-        if ip & 0xff000000 != 0x7f000000 {
-            // can not have ips in 127.0.0.0/8
-            break ip;
-        }
-    }
-}
-
-fn build_ip_file(data_args: DataArgs) -> anyhow::Result<(PathBuf, Ipv4Addr)> {
+fn build_ip_file(data_set: &IpDataSet, data_args: DataArgs) -> anyhow::Result<(PathBuf, Ipv4Addr, u64, f64)> {
     let mut path = PathBuf::new();
     path.push(BENCH_BASE_PATH);
     path.push(BENCH_DATA_PATH);
+    path.push("build");
     path.push(data_args.rel_path());
+    let mut info_path = path.clone();
     path.push("ips.txt");
+    info_path.push("ips-info.json");
 
     fs::create_dir_all(path.parent().unwrap())?;
 
-    let ip_file = File::create(&path)?;
-    let mut writer = BufWriter::new(ip_file);
+    let ip_set = data_set.generate_subnet(data_args.scan_subnet_size as usize, data_args.seed)?;
 
-    let mut rng = SmallRng::seed_from_u64(data_args.seed);
+    let subnet = u32::from(*ip_set.iter().next().ok_or(anyhow!("dataset empty: {:?}", data_set.path))?) & ((((1u64 << 32u64) - 1) << data_args.scan_subnet_size) as u32);
+    let subnet = Ipv4Addr::from(subnet);
 
-    let lower_subnet_mask = ((1u64 << (data_args.scan_subnet_size)) - 1u64) as u32;
-    let upper_subnet_mask = u32::MAX - lower_subnet_mask;
+    let entries = ip_set.len() as u64;
+    let hitrate = (entries as f64) / ((1u64 << data_args.scan_subnet_size) as f64);
 
-    let subnet = next_ip(&mut rng, upper_subnet_mask);
+    let ip_file = File::create(&path)?;
+    let mut writer = BufWriter::new(ip_file);
 
-    for _ in 0..data_args.entries {
-        let ip = subnet | next_ip(&mut rng, lower_subnet_mask);
-        writer.write(Ipv4Addr::from(ip).to_string().as_bytes())?;
+    for ip in ip_set.into_iter() {
+        writer.write(ip.to_string().as_bytes())?;
         writer.write(b"\n")?;
     }
 
-    Ok((path, Ipv4Addr::from(subnet)))
+    let mut ip_info_file = File::create(&info_path)?;
+    ip_info_file.write_all(format!("{{\"subnet\": \"{}\", \"entries\": {}, \"hitrate\": {}}}", subnet.to_string(), entries, hitrate).as_bytes())?;
+
+    Ok((path, Ipv4Addr::from(subnet), entries, hitrate))
 }
 
 fn build_binaries(data_args: DataArgs, bloom_args: FilterArgs) -> anyhow::Result<()> {
     let bin_path = BenchArgs::bin_bin_path(data_args, bloom_args);
+    if bin_path.exists() {
+        return Ok(())
+    }
     fs::create_dir_all(&bin_path).context("Failed to create bench dir")?;
     let output = Command::new("cargo")
         .args([
@@ -294,13 +372,16 @@ fn build_binaries(data_args: DataArgs, bloom_args: FilterArgs) -> anyhow::Result
     Ok(())
 }
 
-fn build_filter(data_args: DataArgs, bloom_args: FilterArgs, filter_type: FilterType, ip_file_path: &Path) -> anyhow::Result<PathBuf> {
+fn build_filter(data_args: DataArgs, filter_args: FilterArgs, filter_type: FilterType, ip_file_path: &Path) -> anyhow::Result<PathBuf> {
     let filter_file = match filter_type {
         Bloom => "ips.bfb",
         Bitmap => "ips.fb",
     };
 
-    let path = BenchArgs::bin_wd_path(data_args, bloom_args).join(filter_file);
+    let path = BenchArgs::bin_wd_path(data_args, filter_args).join(filter_file);
+    if path.exists() {
+        return Ok(path)
+    }
     fs::create_dir_all(path.parent().unwrap()).context("Failed to create bench dir")?;
 
     let filter_type_string = filter_type.to_string().to_lowercase();
@@ -308,13 +389,13 @@ fn build_filter(data_args: DataArgs, bloom_args: FilterArgs, filter_type: Filter
     let output = Command::new("/usr/bin/time")
         .args([
             // time args
-            "-o", BenchArgs::bin_wd_path(data_args, bloom_args).join("filter_extern_time.json").to_str().unwrap(),
+            "-o", BenchArgs::bin_wd_path(data_args, filter_args).join("filter_extern_time.json").to_str().unwrap(),
             "--format", "{\"clock\": %e, \"cpu_p\": \"%P\", \"kernel_s\": %S, \"user_s\": %U}",
 
             // actual command
-            BenchArgs::bin_bin_path(data_args, bloom_args).join("tools/build_filter").to_str().unwrap(),
+            BenchArgs::bin_bin_path(data_args, filter_args).join("tools/build_filter").to_str().unwrap(),
             "--force",
-            "--timing-path", BenchArgs::bin_wd_path(data_args, bloom_args).join("filter_intern_time.json").to_str().unwrap(),
+            "--timing-path", BenchArgs::bin_wd_path(data_args, filter_args).join("filter_intern_time.json").to_str().unwrap(),
             filter_type_string.as_str(),
             ip_file_path.to_str().unwrap(),
             path.to_str().unwrap()
@@ -326,7 +407,7 @@ fn build_filter(data_args: DataArgs, bloom_args: FilterArgs, filter_type: Filter
         .output()
         .context("Failed to run build_filter binary")?;
 
-    let log_path = BenchArgs::bin_log_path(data_args, bloom_args);
+    let log_path = BenchArgs::bin_log_path(data_args, filter_args);
     log_both(&log_path, &mut output.stderr.as_slice(), &mut output.stdout.as_slice(), "build-filter")?;
     ensure!(output.status.success(), CommandError::new(output, log_path));
 
@@ -351,8 +432,6 @@ fn load_xdp(bench_args: BenchArgs, filter_path: &Path) -> Result<(process::Child
         "--filter-type", filter_type.as_str(),
     ]);
 
-    println!("{:?}", args);
-
     let mut handle = Command::new(args.remove(0))
         .args(args)
         .env("RUST_LOG", "info")
@@ -516,7 +595,7 @@ fn run_zmap(bench_args: BenchArgs, subnet: Ipv4Addr) -> anyhow::Result<process::
     let seed = bench_args.data_args.seed.to_string();
     let mut args = Vec::from(PRIVILEGE_RUNNER);
     args.extend_from_slice(&[
-        "zmap",
+        "./zmap",
         subnet_string.as_str(),
         "--target-port=80",
         "--interface", interface,
@@ -526,7 +605,7 @@ fn run_zmap(bench_args: BenchArgs, subnet: Ipv4Addr) -> anyhow::Result<process::
         "--sender-threads=7",
         "--cooldown-time=1",
         "--seed", seed.as_str(),
-        "--blacklist-file=blocklist",
+        "--blocklist-file=blocklist",
         "--max-sendto-failures=-1"
     ]);
     let output = Command::new(args.remove(0))

+ 12 - 11
responder-bench/src/run/args.rs

@@ -10,6 +10,7 @@ const ADDRESS_MIN_BITS: u64 = 3;
 
 #[derive(Debug, Copy, Clone)]
 pub struct BenchArgs {
+    pub n: u64,
     pub data_args: DataArgs,
     pub bloom_filter_args: FilterArgs,
     pub scan_args: ScanArgs
@@ -17,7 +18,7 @@ pub struct BenchArgs {
 
 impl Display for BenchArgs {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "BenchArgs({}, {}, {})", self.data_args, self.bloom_filter_args, self.scan_args)
+        write!(f, "BenchArgs({}, {}, {}, {})", self.n, self.data_args, self.bloom_filter_args, self.scan_args)
     }
 }
 
@@ -25,13 +26,12 @@ impl Display for BenchArgs {
 pub struct DataArgs {
     pub seed: u64,
     pub scan_subnet_size: u64,
-    pub hit_rate: f64,
-    pub entries: u64,
+    pub data_set_index: u64,
 }
 
 impl Display for DataArgs {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Data(seed: {}, subnet: {}, hit rate: {})", self.seed, self.scan_subnet_size, self.hit_rate)
+        write!(f, "Data(seed: {}, subnet: {}, dataset index: {})", self.seed, self.scan_subnet_size, self.data_set_index)
     }
 }
 
@@ -64,6 +64,7 @@ impl Display for ScanArgs {
 impl BenchArgs {
     pub fn rel_path(&self) -> PathBuf {
         let mut path = PathBuf::new();
+        path.push(PathBuf::from(self.n.to_string()));
         path.push(self.data_args.rel_path());
         path.push(self.bloom_filter_args.rel_path());
         path.push(self.scan_args.rel_path());
@@ -95,6 +96,7 @@ impl BenchArgs {
         let mut path = PathBuf::new();
         path.push(BENCH_BASE_PATH);
         path.push(BENCH_LOG_PATH);
+        path.push("build");
         path.push(data_args.rel_path());
         path.push(bloom_args.rel_path());
         return path;
@@ -104,6 +106,7 @@ impl BenchArgs {
         let mut path = PathBuf::new();
         path.push(BENCH_BASE_PATH);
         path.push(BENCH_DATA_PATH);
+        path.push("build");
         path.push(data_args.rel_path());
         path.push(bloom_args.rel_path());
         return path;
@@ -119,13 +122,11 @@ impl BenchArgs {
 }
 
 impl DataArgs {
-    pub fn from(seed: u64, scan_subnet_size: u64, hit_rate: f64) -> Self {
-        let entries = (((1u64 << scan_subnet_size) as f64) * hit_rate).round() as u64;
+    pub fn from(seed: u64, scan_subnet_size: u64, data_set_index: u64) -> Self {
         Self {
             seed,
             scan_subnet_size,
-            hit_rate,
-            entries
+            data_set_index,
         }
     }
 
@@ -133,19 +134,19 @@ impl DataArgs {
         let mut path = PathBuf::new();
         path.push(self.seed.to_string());
         path.push(self.scan_subnet_size.to_string());
-        path.push(self.hit_rate.to_string());
+        path.push(self.data_set_index.to_string());
         return path;
     }
 }
 
 impl FilterArgs {
-    pub fn from(data_args: DataArgs, test_type: TestType, filter_type: FilterType) -> FilterArgs {
+    pub fn from(data_args: DataArgs, test_type: TestType, filter_type: FilterType, entries: u64) -> FilterArgs {
         match test_type {
             Normal(false_hit_rate) | BpfStats(false_hit_rate) => {
                 match filter_type {
                     Bloom => {
                         let hash_count = (-false_hit_rate.log2()).round();
-                        let size = (data_args.entries as f64) * (hash_count/ 2f64.ln());
+                        let size = (entries as f64) * (hash_count/ 2f64.ln());
                         let address_bits = size.log2().round() as u64;
                         let address_bits = ADDRESS_MIN_BITS.max(address_bits);
                         let chunk_address_bits = CHUNK_MAX_BITS.min(address_bits);

+ 0 - 2
responder-common/src/lib.rs

@@ -1,8 +1,6 @@
 #![no_std]
 
 pub mod filter {
-    use core::str::FromStr;
-
     use konst::primitive::parse_usize;
     use konst::unwrap_ctx;
     use konst::option::unwrap_or;

+ 2 - 2
responder/src/main.rs

@@ -25,9 +25,9 @@ struct Opt {
     iface: String,
     #[clap(short, long, default_value = "syn")]
     scan_type: String,
-    #[clap(short, long, default_value = "bitmap")]
+    #[clap(long, default_value = "bitmap")]
     filter_type: String,
-    #[clap(short, long)]
+    #[clap(long)]
     filter_path: PathBuf,
     #[clap(default_value = DEFAULT_TARGET, long)]
     target: PathBuf,