123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273 |
- use std::{fmt::Debug, path::{Path, PathBuf}, fs::File, io::{Read, BufReader, BufRead, Write}, collections::{HashMap, HashSet}, net::Ipv4Addr};
- use anyhow::{Context, bail, anyhow};
- use clap::Parser;
- use json::JsonValue;
- use regex::Regex;
- use crate::run::{BENCH_BASE_PATH, BENCH_DATA_PATH};
- fn json_from_file<P>(path: P) -> anyhow::Result<HashMap<String, f64>>
- where
- P: Debug,
- P: AsRef<Path>
- {
- if !path.as_ref().exists() {
- return Ok(HashMap::new());
- }
- let mut vec = vec![];
- File::open(&path)
- .context(format!("Failed to open {:?}", path))?
- .read_to_end(&mut vec)?;
- let json_val = json::parse(String::from_utf8(vec)?.as_str())?;
- let json_obj = if let JsonValue::Object(o) = json_val {
- o
- } else {
- bail!("Expected json object, got: {}", json_val);
- };
- let mut map = HashMap::new();
- for (key, val) in json_obj.iter() {
- let val: f64 = match val {
- JsonValue::Number(n) => {
- f64::from(*n)
- },
- JsonValue::String(_) | JsonValue::Short(_) => {
- val.as_str()
- .unwrap()
- .strip_suffix("%")
- .ok_or(anyhow!("Expected percentage String, got: {:?}", val))?
- .parse()?
- },
- _ => bail!("Expected json number or string, got: {:?}", val),
- };
- map.insert(String::from(key), val);
- }
- Ok(map)
- }
- fn ips_from_file<P>(path: P) -> anyhow::Result<HashSet<Ipv4Addr>>
- where
- P: Debug,
- P: AsRef<Path>
- {
- let f = File::open(path)?;
- let reader = BufReader::new(f);
- let mut hashset = HashSet::new();
- for line in reader.lines() {
- let line = line?;
- hashset.insert(line.parse()?);
- }
- Ok(hashset)
- }
- fn parse_rate(rate: &str, unit: &str) -> anyhow::Result<f64> {
- let multiplier = match unit {
- "G" => 1_000_000_000f64,
- "M" => 1_000_000f64,
- "K" => 1_000f64,
- "" => 1f64,
- m => bail!("Unknown unit {} (rate: {})", m, rate)
- };
- let rate: f64 = rate.parse()?;
- return Ok(rate * multiplier)
- }
- fn zmap_stats<P>(path: P, regex: &Regex) -> anyhow::Result<(f64, f64, f64)>
- where
- P: Debug,
- P: AsRef<Path>
- {
- let f = File::open(path)?;
- let reader = BufReader::new(f);
- let mut rates = None;
- for line in reader.lines() {
- let line = line?;
- if let Some(capture) = regex.captures(&line) {
- let result: anyhow::Result<_> = (|| {
- let send_rate = capture
- .get(1)
- .ok_or(anyhow!("Capture group 1 did not match"))?;
- let send_rate_unit = capture
- .get(2)
- .ok_or(anyhow!("Capture group 2 did not match"))?;
- let send_rate = parse_rate(send_rate.as_str(), send_rate_unit.as_str())?;
- let receive_rate = capture
- .get(3)
- .ok_or(anyhow!("Capture group 3 did not match"))?;
- let receive_rate_unit = capture
- .get(4)
- .ok_or(anyhow!("Capture group 4 did not match"))?;
- let receive_rate = parse_rate(receive_rate.as_str(), receive_rate_unit.as_str())?;
- let drop_rate = capture
- .get(5)
- .ok_or(anyhow!("Capture group 5 did not match"))?;
- let drop_rate_unit = capture
- .get(6)
- .ok_or(anyhow!("Capture group 6 did not match"))?;
- let drop_rate = parse_rate(drop_rate.as_str(), drop_rate_unit.as_str())?;
- Ok((send_rate, receive_rate, drop_rate))
- })();
- rates = Some(result.context(format!("Failed to parse stats line: '{}'", line))?);
- }
- }
- rates.ok_or(anyhow!("Failed to find final stats line"))
- }
- #[derive(Debug, Parser)]
- pub struct Options {
- seed: String
- }
- pub fn process(opts: Options) -> anyhow::Result<()> {
- let mut path = PathBuf::new();
- path.push(BENCH_BASE_PATH);
- path.push(BENCH_DATA_PATH);
- path.push(opts.seed);
- let zmap_stats_regex = Regex::new(r"^[^\(;]+(?:\(.+left\))?; send: [^\(;]+done \(([\d\.]+) ([KMG]?)p/s avg\); recv: [^\(;]+\(([\d\.]+) ([KMG]?)p/s avg\); drops: [^\(;]+\(([\d\.]+) ([KMG]?)p/s avg\); hitrate: [^;]+$")?;
- let header_row = [
- "type", "filter-type",
- "subnet_size", "hitrate", "bloom_filter_bits", "bloom_filter_hash_count", "zmap_scanrate",
- "bpf_run_time_total", "bpf_run_count", "bpf_memory_lock",
- "filter_intern_build_time", "filter_intern_write_time",
- "filter_extern_time_clock", "filter_extern_cpu_p", "filter_extern_kernel_secs", "filter_extern_user_secs",
- "zmap_send_rate", "zmap_receive_rate", "zmap_drop_rate",
- "false_positive_count", "false_negative_count"
- ];
- let mut data_rows = vec![];
- data_rows.push(header_row.map(str::to_string));
- for subnet_dir in path.read_dir().context(format!("Failed to read subnet dirs in path {:?}", &path))? {
- let subnet_dir = subnet_dir.context(format!("Failed to read file info on file in path {:?}", &path))?;
- if !subnet_dir
- .file_type()
- .context(format!("Failed to read file info on file {:?}", subnet_dir.path()))?
- .is_dir() {
- bail!("Expected dir at {:?}", subnet_dir.path())
- }
- let subnet = subnet_dir.file_name().into_string().map_err(|e| anyhow!(format!("{:?}", e)))?;
- for hitrate_dir in subnet_dir.path().read_dir().context(format!("Failed to read hitrate dirs in path {:?}", subnet_dir.path()))? {
- let hitrate_dir = hitrate_dir.context(format!("Failed to read file info on file in path {:?}",subnet_dir.path()))?;
- if !hitrate_dir
- .file_type()
- .context(format!("Failed to read file info on file {:?}", hitrate_dir.path()))?
- .is_dir() {
- bail!("Expected dir at {:?}", hitrate_dir.path())
- }
- let hitrate = hitrate_dir.file_name().into_string().map_err(|e| anyhow!(format!("{:?}", e)))?;
- let in_ips = ips_from_file(hitrate_dir.path().join("ips.txt")).context(format!("Failed to read ips from {:?}/ips.txt", hitrate_dir.path()))?;
- for bloom_dir in hitrate_dir.path().read_dir().context(format!("Failed to read bloom dirs in path {:?}", hitrate_dir.path()))? {
- let bloom_dir = bloom_dir.context(format!("Failed to read file info on file in path {:?}", hitrate_dir.path()))?;
- if !bloom_dir
- .file_type()
- .context(format!("Failed to read file info on file {:?}", bloom_dir.path()))?
- .is_dir() {
- continue;
- }
- let bloom_folder_name = bloom_dir
- .file_name()
- .into_string()
- .map_err(|e| anyhow!(format!("{:?}", e)))?;
- let (test_type, filter_type, bloom_bits, bloom_hashes) = if bloom_folder_name.contains('-') {
- let (bloom_bits, rem) = bloom_folder_name.split_once("-")
- .ok_or(anyhow!("Expected filename with -, got {:?}", bloom_dir.file_name()))?;
- let (filter_type, rem) = rem.split_once("-").unwrap_or((rem, ""));
- let (bloom_hashes, bpf_enabled) = if filter_type == "bloom" {
- rem.split_once("-").map(|(c, rem)| (c,rem=="bpf")).unwrap_or((rem, false))
- } else {
- ("0", rem == "bpf")
- };
- let bloom_bits = bloom_bits.to_string();
- let bloom_hashes = bloom_hashes.to_string();
- let test_type = if bpf_enabled {
- "bpf-stats"
- } else {
- "normal"
- };
- (test_type, filter_type, bloom_bits, bloom_hashes)
- } else {
- ("baseline","none", String::from("-1"), String::from("-1"))
- };
- let bloom_path = bloom_dir.path();
- let mut filter_intern_time = json_from_file(bloom_path.join("filter_intern_time.json"))
- .context(format!("Failed to parse filter_intern_time.json for {:?}", bloom_path))?;
- let mut filter_extern_time = json_from_file(bloom_path.join("filter_extern_time.json"))
- .context(format!("Failed to parse filter_extern_time.json for {:?}", bloom_path))?;
- for scan_rate_dir in bloom_dir.path().read_dir().context(format!("Failed to read scan rate dirs in path {:?}", bloom_dir.path()))? {
- let scan_rate_dir = scan_rate_dir.context(format!("Failed to read file info on file in path {:?}", bloom_dir.path()))?;
- if !scan_rate_dir
- .file_type()
- .context(format!("Failed to read file info on file {:?}", scan_rate_dir.path()))?
- .is_dir() {
- continue;
- }
- let scan_rate = scan_rate_dir.file_name().to_str().unwrap().to_string();
- let wd_path = scan_rate_dir.path();
- let mut bpf_stats = json_from_file(wd_path.join("bpf_stats.json"))
- .context(format!("Failed to parse bpf_stats.json for {:?}", wd_path))?;
- let out_ips = ips_from_file(wd_path.join("zmap_out_ips.txt"))
- .context(format!("Failed to parse zmap_out_ips.txt from {:?}", wd_path))?;
- let zmap_stats = zmap_stats(wd_path.join("zmap_stats.txt"), &zmap_stats_regex)
- .context(format!("Failed to parse zmap_stats.txt from {:?}", wd_path))?;
- let get_or_default = |map: &mut HashMap<String, f64>, k: &str| map
- .get(k).unwrap_or(&-1f64).to_string();
- let data_row = (|| {
- Ok([
- test_type.to_owned(),
- filter_type.to_owned(),
- subnet.clone(), hitrate.clone(), bloom_bits.clone(), bloom_hashes.clone(), scan_rate.clone(),
- get_or_default(&mut bpf_stats, "run_time"),
- get_or_default(&mut bpf_stats, "run_count"),
- get_or_default(&mut bpf_stats, "mem_lock"),
- get_or_default(&mut filter_intern_time, "build"),
- get_or_default(&mut filter_intern_time, "write"),
- get_or_default(&mut filter_extern_time, "clock"),
- get_or_default(&mut filter_extern_time, "cpu_p"),
- get_or_default(&mut filter_extern_time, "kernel_s"),
- get_or_default(&mut filter_extern_time, "user_s"),
- zmap_stats.0.to_string(),
- zmap_stats.1.to_string(),
- zmap_stats.2.to_string(),
- out_ips.difference(&in_ips).count().to_string(),
- in_ips.difference(&out_ips).count().to_string(),
- ])
- })().map_err(|key: String| {
- anyhow!("Failed to read data point {} for {:?}", key, wd_path)
- })?;
- let mut f = File::create(wd_path.join("data_row.csv"))?;
- f.write_all(header_row.join(",").as_bytes())?;
- f.write(&[b'\n'])?;
- f.write_all(data_row.join(",").as_bytes())?;
- data_rows.push(data_row);
- }
- }
- }
- }
- let data = data_rows.into_iter()
- .map(|row| row.join(","))
- .fold(String::new(), |a, b| a + &b + "\n");
- File::create(path.join("data.csv"))?.write_all(data.as_bytes())?;
- Ok(())
- }
|