use anyhow::{ensure, Result};
use bincode::Options;
use bitvec::array::BitArray;
use bitvec::order::Lsb0;
use bitvec::prelude::*;
use bytes::Bytes;
use geohash::Coord;
use kdam::format::size_of;
use kdam::BarIter;
use log::debug;
use log::kv::Source;
use rmp_serde::{Deserializer as MsgPackDeserializer, Serializer as MsgPackSerializer};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::HashMap;
use std::collections::HashSet;
use std::fmt::Pointer;
use std::fs::File;
use std::io::Read;
use std::io::{self, Write};
use std::{hash::Hash, ops::Deref};
use zstd::stream::{read::Decoder as ZstdDecoder, write::Encoder as ZstdEncoder};
pub type MiniMap = HashMap<String, u8>;
pub type Mac = [u8; 6];
pub type GeoSegment = [u8; 2];
pub type Blip = (Coord, Mac, std::string::String);
pub type Blips = Vec<Blip>;
pub const GEOHASH_DEPTHS: [usize; 4] = [0, 2, 4, 6];
pub fn generate_fingerprint_wifi_unused(geo: &str, macaddr: &Mac, ssid: &str) -> String {
const SHORTGEO: usize = 5;
const SHORTHASH: usize = 4;
let mut h = blake3::Hasher::new();
h.update(geo[..SHORTGEO].as_bytes());
h.update(macaddr);
h.update(ssid.as_bytes());
let mut result = String::with_capacity(20);
result.push('1');
result.push_str(&geo[..SHORTGEO]);
result.push('-');
result.push_str(&h.finalize().to_hex()[..SHORTHASH]);
result
}
pub fn parse_macaddr(mac: &str) -> Result<Mac> {
let chunks: Vec<&str> = mac.split(':').collect();
if chunks.len() != 6 {
anyhow::bail!("Invalid MAC address {}", mac);
}
let mut out = [0u8; 6];
for (i, part) in chunks.iter().enumerate() {
out[i] = u8::from_str_radix(part, 16)?;
}
Ok(out)
}
pub fn haversine_distance_coord(a: Coord, b: Coord) -> f64 {
const EARTH_RADIUS: f64 = 6371000.0;
let a_lat = a.y.to_radians();
let b_lat = b.y.to_radians();
let dlat = b_lat - a_lat;
let dlon = b.x.to_radians() - a.x.to_radians();
let a = (dlat / 2.0).sin().powi(2) + a_lat.cos() * b_lat.cos() * (dlon / 2.0).sin().powi(2);
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
EARTH_RADIUS * c
}
pub fn haversine_distance_gh(a: &Coord, gh_b: &str) -> f64 {
let b = geohash::decode_bbox(gh_b).unwrap().center();
let lat1_rad = a.y.to_radians();
let lat2_rad = b.y.to_radians();
let dlat = lat2_rad - lat1_rad;
let dlon = b.x.to_radians() - a.x.to_radians();
let a =
(dlat / 2.0).sin().powi(2) + lat1_rad.cos() * lat2_rad.cos() * (dlon / 2.0).sin().powi(2);
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
let earth_radius_m = 6371000.0;
earth_radius_m * c
}
pub fn latlon_to_geohash(latitude: f64, longitude: f64) -> String {
let c = Coord {
x: longitude,
y: latitude,
};
geohash::encode(c, 9usize).unwrap()
}
pub fn coord_to_geohash(c: Coord) -> String {
geohash::encode(c, 9usize).unwrap()
}
type Lsm = f32;
const GEOHASH_PRECISION: [(usize, f32); 9] = [
(1, 2500.0),
(2, 156.0),
(3, 20.0),
(4, 2.4),
(5, 0.61),
(6, 0.076),
(7, 0.019),
(8, 0.0024),
(9, 0.0006),
];
const GEOHASH_CONV_HACK: f32 = 1500.0;
fn geohash_length_to_lsm(len: &usize) -> Lsm {
match GEOHASH_PRECISION.get(*len) {
Some((_, max_err_km)) => max_err_km * GEOHASH_CONV_HACK,
None => f32::MAX,
}
}
pub fn geohash_to_plocation(geohash: &str) -> Result<Plocation> {
let loc = geohash::decode_bbox(geohash)?.center();
let lsm = geohash_length_to_lsm(&geohash.len());
Ok(Plocation { loc, lsm })
}
fn lsm_to_geohash_length(stdev: Lsm) -> usize {
for &(length, error_km) in GEOHASH_PRECISION.iter() {
if stdev >= error_km * GEOHASH_CONV_HACK {
return length - 1;
}
}
GEOHASH_PRECISION.len()
}
pub const FINGERPRINT_LEN: usize = 6;
const DENSITY_COMPRESSION: [usize; 3] = [2, 4, 6];
const _: () = {
if (FINGERPRINT_LEN != DENSITY_COMPRESSION[DENSITY_COMPRESSION.len() - 1]) {
panic!("Compile-time assertion failed: a is not less than b");
}
};
pub fn generate_fingerprint_wifi(geohash: &str, macaddr: &Mac, ssid: &str) -> String {
let mut h = blake3::Hasher::new();
h.update(geohash.as_bytes());
h.update(macaddr);
h.update(ssid.as_bytes());
let mut result = "".to_string();
result.push_str(&h.finalize().to_hex()[..FINGERPRINT_LEN]);
result
}
const GEOHASH_GRID_SIZE: usize = 32;
const BITSET_SIZE: usize = GEOHASH_GRID_SIZE * GEOHASH_GRID_SIZE;
pub struct GeoBitSet {
data: [u128; BITSET_SIZE / 128],
}
impl GeoBitSet {
fn new() -> Self {
GeoBitSet {
data: [0; BITSET_SIZE / 128],
}
}
fn insert(&mut self, geosegment: &GeoSegment) {
let idx = geosegment_to_index(geosegment);
let (word, bit) = (idx / 128, idx % 128);
self.data[word] |= 1 << bit;
}
fn contains(&self, geosegment: &GeoSegment) -> bool {
let idx = geosegment_to_index(geosegment);
let (word, bit) = (idx / 128, idx % 128);
(self.data[word] & (1 << bit)) != 0
}
}
pub type BloomHash = BitArray<[u8; 128], Lsb0>;
fn get_segments(b: &BloomHash) -> Vec<GeoSegment> {
let mut out = vec![];
for (i, bit) in b.iter().enumerate() {
if *bit {
out.push(index_to_geosegment(i));
}
}
out
}
#[derive(Serialize, Deserialize, Debug)]
pub struct BloomHashStore {
kv: HashMap<String, BloomHash>,
}
impl BloomHashStore {
pub fn new() -> Self {
Self { kv: HashMap::new() }
}
fn insert_one(&mut self, fingerprint: &str, geosegment: &GeoSegment) {
let pos = geosegment_to_index(geosegment);
self.kv
.entry(fingerprint.into())
.or_insert(BloomHash::ZERO)
.set(pos, true);
}
pub fn insert(&mut self, fingerprint: &str, geosegment: &GeoSegment) {
let gpl = fingerprint.len();
for i in DENSITY_COMPRESSION {
if fingerprint.len() < i {
return;
}
self.insert_one(&fingerprint[0..i], geosegment)
}
}
pub fn insert_datapoint(&mut self, lat: f64, lon: f64, mac: &Mac, ssid: &str) {
let c = geohash::Coord { x: lon, y: lat };
let geoh = coord_to_geohash(c);
for s in GEOHASH_DEPTHS {
let geopath = &geoh[..s];
let fp = generate_fingerprint_wifi(geopath, mac, ssid);
let b = geoh[s..s + 2].as_bytes();
let geosegment: GeoSegment = [b[0], b[1]];
self.insert(&fp, &geosegment)
}
}
pub fn merge_store(&mut self, other: &BloomHashStore) {
for (key, other_val) in &other.kv {
if let Some(val) = self.kv.get_mut(key) {
*val |= *other_val;
} else {
self.kv.insert(key.clone(), *other_val);
}
}
}
fn set(&mut self, key: String, bit_array: BitArray<[u8; 128], Lsb0>) {
self.kv.insert(key, bit_array);
}
pub fn extract(&self, fingerprint: &str) -> &BloomHash {
let mut last: Option<&BloomHash> = None;
let mut prev: Option<&BloomHash> = None;
for fp_len in DENSITY_COMPRESSION {
if fingerprint.len() < fp_len {
break;
}
if let Some(v) = self.kv.get(&fingerprint[0..fp_len]) {
(prev, last) = (last, Some(v));
let ones = v.count_ones();
}
}
if let Some(v) = last {
v
} else {
&BitArray::ZERO
}
}
fn get_segments(&self, key: &str) -> Vec<GeoSegment> {
if let Some(v) = self.kv.get(key) {
get_segments(v)
} else {
vec![]
}
}
pub fn pack_then_compress(&self) -> Result<Vec<u8>> {
let mut buf = Vec::new();
self.serialize(&mut MsgPackSerializer::new(&mut buf))?;
let out = zstd::stream::encode_all(&buf[..], 0)?;
Ok(out)
}
pub fn decompress_and_unpack(buf: &[u8]) -> Result<Self> {
let deco = zstd::stream::decode_all(buf)?;
let mut x = MsgPackDeserializer::new(&deco[..]);
let out = Self::deserialize(&mut x)?;
Ok(out)
}
pub fn show(&self) -> Vec<(&str, Vec<GeoSegment>)> {
let mut keys: Vec<&String> = self.kv.keys().collect();
keys.sort();
keys.into_iter()
.map(|k| (k.as_str(), self.get_segments(k)))
.collect()
}
pub fn stats(&self) -> String {
use std::mem::size_of;
let s = self.kv.len() * (size_of::<BloomHash>() + 4);
format!("Number of keys {} Estimated bytes: {}", self.kv.len(), s)
}
pub fn key_count(&self) -> usize {
self.kv.len()
}
fn create_and_insert(&mut self, key: String) -> &mut BitArray<[u8; 128], Lsb0> {
self.kv.entry(key).or_insert(BitArray::ZERO)
}
}
pub fn geosegment_to_string(segment: GeoSegment) -> String {
let mut out = String::with_capacity(2);
for &b in &segment {
out.push(b as char);
}
out
}
pub fn pile_up_blooms(blooms: Vec<BloomHash>) -> [u8; BITSET_SIZE] {
let mut out: [u8; BITSET_SIZE] = [0; BITSET_SIZE];
for b in blooms.iter() {
for (i, bit) in b.iter().enumerate() {
out[i] += *bit as u8;
}
}
out
}
pub fn pick_best_segment(bts: [u8; BITSET_SIZE]) -> (f32, GeoSegment) {
let mut highest_val: u32 = 0;
let mut second_highest_val: u32 = 0;
let mut best = 0;
for (i, &v) in bts.iter().enumerate() {
let v = v as u32;
if v > highest_val {
(second_highest_val, highest_val, best) = (highest_val, v, i);
}
}
let selectiveness = (highest_val - second_highest_val) as f32 / highest_val as f32;
(selectiveness, index_to_geosegment(best))
}
fn geosegment_to_index(gs: &GeoSegment) -> usize {
geohash_char_to_index(gs[0]) as usize * GEOHASH_GRID_SIZE
+ geohash_char_to_index(gs[1]) as usize
}
fn index_to_geosegment(i: usize) -> GeoSegment {
[
geohash_index_to_char(i / GEOHASH_GRID_SIZE),
geohash_index_to_char(i % GEOHASH_GRID_SIZE),
]
}
pub fn lookup_step(kv: &BloomHashStore, blips: &Blips, geopath: &str) -> (f32, String) {
let blooms: Vec<BloomHash> = blips
.iter()
.map(|(_, mac, ssid)| {
let fingerprint = generate_fingerprint_wifi(geopath, mac, ssid);
kv.extract(&fingerprint).clone()
})
.collect();
let (selectiveness, b) = pick_best_segment(pile_up_blooms(blooms));
(selectiveness, geosegment_to_string(b))
}
const GEOHASH_ALPHABET: [char; 32] = [
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k',
'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
];
pub fn geohash_index_to_char(i: usize) -> u8 {
debug_assert!(i <= 32);
GEOHASH_ALPHABET[i] as u8
}
const fn build_lookup_table() -> [u8; 128] {
let mut out = [u8::MIN; 128];
let mut i = 0;
while i < GEOHASH_ALPHABET.len() {
let ch = GEOHASH_ALPHABET[i];
out[ch as usize] = i as u8;
i += 1;
}
out
}
const GEOHASH_LOOKUP_TABLE: [u8; 128] = build_lookup_table();
pub fn geohash_char_to_index(c: u8) -> u8 {
GEOHASH_LOOKUP_TABLE[c as usize]
}
#[derive(Serialize, Deserialize, Clone)]
pub struct Upload {
pub uuid: u32,
}
#[derive(Debug, Copy, Clone)]
pub struct Plocation {
pub loc: Coord,
pub lsm: Lsm,
}
type Plocations = Vec<Plocation>;
pub const NOWHERE: Plocation = Plocation {
loc: Coord { x: 0., y: 0. },
lsm: f32::MAX,
};
pub fn load_previous_location() -> Plocation {
NOWHERE
}
pub fn fetch_geoip_locations() -> Plocations {
vec![]
}
pub fn fetch_wifi_bt_location(current_pl: &Plocation) -> Plocation {
NOWHERE
}
fn quartiles(mut values: Vec<f64>) -> (f64, f64) {
values.sort_by(|a, b| a.partial_cmp(b).unwrap());
let q1_idx = (0.25 * (values.len() - 1) as f64) as usize;
let q3_idx = (0.75 * (values.len() - 1) as f64) as usize;
(values[q1_idx], values[q3_idx])
}
pub fn fuse_location_sources(locations: Vec<Plocation>) -> Plocation {
for p in locations {
if p.lsm < 10000. {
return p;
}
}
todo!();
let (q1_x, q3_x) = quartiles(locations.iter().map(|p| p.loc.x).collect());
let (q1_y, q3_y) = quartiles(locations.iter().map(|p| p.loc.y).collect());
let thresh_x = 1.5 * (q3_x - q1_x);
let thresh_y = 1.5 * (q3_y - q1_y);
let mut tot_x = 0.0;
let mut tot_y = 0.0;
let mut tot_weight = 0.0;
for p in locations {
if p.lsm == f32::MAX
|| p.loc.x < q1_x - thresh_x
|| (p.loc.x > q3_x + thresh_x)
|| (p.loc.y < q1_y - thresh_y)
|| (p.loc.y > q3_y + thresh_y)
{
debug!("[fuse] ignoring {p:?}");
continue;
}
let weight = 1.0 / (p.lsm.powi(2) as f64);
tot_x += p.loc.x * weight;
tot_y += p.loc.y * weight;
tot_weight += weight;
}
debug!("[fuse] tot w {tot_weight:?}");
if tot_weight > 0.0 {
let combined_lsm = (1.0 / tot_weight).sqrt() as f32;
Plocation {
loc: Coord {
x: tot_x / tot_weight,
y: tot_y / tot_weight,
},
lsm: combined_lsm,
}
} else {
NOWHERE
}
}
pub fn draw_minimap_num(m: &MiniMap) {
if m.is_empty() {
println!("empty map!");
return;
}
let max_v = m.values().max().unwrap();
for c1 in GEOHASH_ALPHABET.iter() {
for c2 in GEOHASH_ALPHABET.iter() {
if let Some(v) = m.get(&format!("{}{}", c1, c2)) {
let g = (255. * (1. - (*v as f32 / *max_v as f32))) as u8;
print!("\x1b[38;2;{0};{0};{0}m{v}", g);
} else {
print!(" ")
}
}
println!();
}
println!("\x1b[0m");
}
pub fn draw_minimap(m: &MiniMap) {
if m.is_empty() {
println!("empty map!");
return;
}
let max_v = m.values().max().unwrap();
for c1 in GEOHASH_ALPHABET.iter() {
for c2 in GEOHASH_ALPHABET.iter() {
if let Some(v) = m.get(&format!("{}{}", c1, c2)) {
let g = (255. * (1. - (*v as f32 / *max_v as f32))) as u8;
print!("\x1b[38;2;{0};{0};{0}m█", g);
} else {
print!(" ")
}
}
}
println!("\x1b[0m(@)");
println!("Filled cells: {}", m.len());
}
pub fn draw_minimap2d(m: &MiniMap) {
if m.is_empty() {
println!("empty map!");
return;
}
let max_v = m.values().max().unwrap();
for c1 in GEOHASH_ALPHABET.iter() {
for c2 in GEOHASH_ALPHABET.iter() {
if let Some(v) = m.get(&format!("{}{}", c1, c2)) {
let g = (255. * (1. - (*v as f32 / *max_v as f32))) as u8;
print!("\x1b[38;2;{0};{0};{0}m██", g);
} else {
print!(" ")
}
}
}
println!("\x1b[0m(@)");
}
pub fn gini(map: &MiniMap) {
let mut vals: Vec<u8> = map.values().cloned().collect();
vals.sort();
let n = vals.len() as f64;
let total_sum: f64 = vals.iter().sum::<u8>() as f64;
let mut cumulative_sum: f64 = 0.0;
let mut cumulative_sum_ranks: f64 = 0.0;
for (rank, &v) in vals.iter().enumerate() {
cumulative_sum += v as f64;
cumulative_sum_ranks += cumulative_sum / total_sum * (rank as f64 + 1.0);
}
let gini_coeff = (n + 1.0 - 2.0 * cumulative_sum_ranks / total_sum) / n;
println!("Gini coefficient: {:.4}", gini_coeff);
}
#[derive(Debug, Serialize, Deserialize)]
pub struct MLSGeoSubmit {
pub items: Vec<MLSGeoSubItem>,
}
#[allow(dead_code, non_snake_case)]
#[derive(Debug, Serialize, Deserialize)]
pub struct MLSGeoSubItem {
pub timestamp: Option<i64>,
pub position: Option<Position>,
pub bluetoothBeacons: Option<Vec<BluetoothBeacon>>,
pub cellTowers: Option<Vec<CellTower>>,
pub wifiAccessPoints: Option<Vec<WiFiAccessPoint>>,
}
#[allow(dead_code, non_snake_case)]
#[derive(Debug, Serialize, Deserialize)]
pub struct Position {
pub latitude: f64,
pub longitude: f64,
pub accuracy: Option<f64>,
pub age: Option<i64>,
pub altitude: Option<f64>,
pub altitudeAccuracy: Option<f64>,
pub heading: Option<f64>,
pub pressure: Option<f64>,
pub speed: Option<f64>,
pub source: Option<String>,
}
#[allow(dead_code, non_snake_case)]
#[derive(Debug, Serialize, Deserialize)]
struct BluetoothBeacon {
macAddress: String,
name: Option<String>,
age: Option<i64>,
signalStrength: Option<i64>,
}
#[allow(dead_code, non_snake_case)]
#[derive(Debug, Serialize, Deserialize)]
pub struct CellTower {
pub radioType: String,
pub mobileCountryCode: i32,
pub mobileNetworkCode: i32,
pub locationAreaCode: Option<i32>,
pub cellId: i32,
pub age: Option<i64>,
pub asu: Option<i32>,
pub primaryScramblingCode: Option<i32>,
pub serving: Option<i32>,
pub signalStrength: Option<i64>,
pub timingAdvance: Option<i32>,
}
#[allow(dead_code, non_snake_case)]
#[derive(Debug, Serialize, Deserialize)]
pub struct WiFiAccessPoint {
pub macAddress: String,
pub age: Option<i64>,
pub channel: Option<i32>,
pub frequency: Option<i32>,
pub radioType: Option<String>,
pub signalToNoiseRatio: Option<i64>,
pub signalStrength: Option<i64>,
pub ssid: Option<String>,
}
#[cfg(test)]
mod test {
use crate::*;
use geohash::Coord;
#[test]
fn test_geobitset() {
for i in 0..32 {
let c = geohash_index_to_char(i);
let i2 = geohash_char_to_index(c);
assert_eq!(i as u8, i2);
}
}
#[test]
fn test_geosegment_index() {
assert_eq!(geosegment_to_index(&[b'0', b'0']), 0);
assert_eq!(geosegment_to_index(&[b'0', b'z']), 31);
assert_eq!(geosegment_to_index(&[b'z', b'0']), 992);
assert_eq!(geosegment_to_index(&[b'z', b'z']), 1023);
assert_eq!(index_to_geosegment(0), [b'0', b'0']);
assert_eq!(index_to_geosegment(31), [b'0', b'z']);
assert_eq!(index_to_geosegment(992), [b'z', b'0']);
assert_eq!(index_to_geosegment(1023), [b'z', b'z']);
}
macro_rules! gss {
($($s:expr),* $(,)?) => {{
let mut vec = Vec::new();
$(
let bytes = $s.as_bytes();
assert_eq!(bytes.len(), 2);
vec.push([bytes[0], bytes[1]]);
)*
vec
}};
}
#[test]
fn test_bloomhashstore() {
let mut a = BloomHashStore::new();
a.insert("0bcdef", &[b'0', b'0']);
println!("{:?}", a.show());
assert_eq!(
a.show(),
[
("0b", vec![[b'0', b'0']]),
("0bcd", vec![[b'0', b'0']]),
("0bcdef", vec![[b'0', b'0']])
]
);
let mut b = BloomHashStore::new();
b.insert("0bcdef", &[b'b', b'b']);
a.merge_store(&b);
assert_eq!(
a.show(),
[
("0b", vec![[b'0', b'0'], [b'b', b'b']]),
("0bcd", vec![[b'0', b'0'], [b'b', b'b']]),
("0bcdef", vec![[b'0', b'0'], [b'b', b'b']]),
]
);
a.insert("0bcdww", &[b'w', b'w']);
assert_eq!(get_segments(a.extract("0bcdef0000")), gss!("00", "bb"));
assert_eq!(get_segments(a.extract("0bcdef")), gss!("00", "bb"));
assert_eq!(get_segments(a.extract("0bcd")), gss!("00", "bb", "ww"));
}
#[test]
fn test_fuse_locations() {
let w_locations = vec![
Plocation {
loc: Coord { x: 3.0, y: 5.0 },
lsm: 3.,
},
Plocation {
loc: Coord { x: 4.0, y: 4.5 },
lsm: 2.,
},
Plocation {
loc: Coord { x: 5.0, y: 4.0 },
lsm: 1.,
},
Plocation {
loc: Coord { x: 6.0, y: 3.5 },
lsm: 1.,
},
NOWHERE,
];
let out = fuse_location_sources(w_locations);
assert_eq!(
out.loc,
Coord {
x: 5.223529411764705,
y: 3.888235294117647
}
);
assert_eq!(out.lsm, 0.65079135);
}
#[test]
fn lsm() {
for geohash_len in 0..9 {
let lsm = geohash_length_to_lsm(&geohash_len);
let glen = lsm_to_geohash_length(lsm);
assert_eq!(geohash_len, glen);
}
assert_eq!(lsm_to_geohash_length(1.), 8);
assert_eq!(lsm_to_geohash_length(10.), 7);
assert_eq!(lsm_to_geohash_length(100.), 6);
assert_eq!(lsm_to_geohash_length(1_000.), 4);
assert_eq!(lsm_to_geohash_length(10_000.), 3);
assert_eq!(lsm_to_geohash_length(100_000.), 2);
assert_eq!(lsm_to_geohash_length(1_000_000.), 1);
}
}