| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | use crate::core::Point; |
| |
|
| | |
| | #[derive(Debug, Clone)] |
| | pub struct SubspaceConfig { |
| | |
| | pub rank: usize, |
| |
|
| | |
| | pub min_points_for_subspace: usize, |
| |
|
| | |
| | pub subspace_weight: f32, |
| |
|
| | |
| | |
| | pub incremental_covariance: bool, |
| | } |
| |
|
| | impl Default for SubspaceConfig { |
| | fn default() -> Self { |
| | Self { |
| | rank: 3, |
| | min_points_for_subspace: 5, |
| | subspace_weight: 0.3, |
| | incremental_covariance: false, |
| | } |
| | } |
| | } |
| |
|
| | impl SubspaceConfig { |
| | pub fn new() -> Self { |
| | Self::default() |
| | } |
| |
|
| | pub fn with_rank(mut self, rank: usize) -> Self { |
| | self.rank = rank; |
| | self |
| | } |
| |
|
| | pub fn with_subspace_weight(mut self, weight: f32) -> Self { |
| | self.subspace_weight = weight.clamp(0.0, 1.0); |
| | self |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | #[derive(Debug, Clone)] |
| | pub struct Subspace { |
| | |
| | pub centroid: Point, |
| |
|
| | |
| | |
| | pub principal_directions: Vec<Point>, |
| |
|
| | |
| | |
| | pub eigenvalues: Vec<f32>, |
| |
|
| | |
| | pub point_count: usize, |
| |
|
| | |
| | accumulated_sum: Vec<f32>, |
| |
|
| | |
| | |
| | accumulated_outer_product: Vec<f32>, |
| | } |
| |
|
| | impl Subspace { |
| | |
| | pub fn new(dimensionality: usize) -> Self { |
| | Self { |
| | centroid: Point::origin(dimensionality), |
| | principal_directions: Vec::new(), |
| | eigenvalues: Vec::new(), |
| | point_count: 0, |
| | accumulated_sum: vec![0.0; dimensionality], |
| | |
| | accumulated_outer_product: vec![0.0; dimensionality * (dimensionality + 1) / 2], |
| | } |
| | } |
| |
|
| | |
| | pub fn from_point(point: &Point) -> Self { |
| | Self { |
| | centroid: point.clone(), |
| | principal_directions: Vec::new(), |
| | eigenvalues: Vec::new(), |
| | point_count: 1, |
| | accumulated_sum: point.dims().to_vec(), |
| | accumulated_outer_product: Self::outer_product_upper(point.dims()), |
| | } |
| | } |
| |
|
| | |
| | pub fn dimensionality(&self) -> usize { |
| | self.centroid.dimensionality() |
| | } |
| |
|
| | |
| | pub fn has_subspace(&self) -> bool { |
| | !self.principal_directions.is_empty() |
| | } |
| |
|
| | |
| | pub fn rank(&self) -> usize { |
| | self.principal_directions.len() |
| | } |
| |
|
| | |
| | fn outer_product_upper(v: &[f32]) -> Vec<f32> { |
| | let n = v.len(); |
| | let mut result = vec![0.0; n * (n + 1) / 2]; |
| | let mut idx = 0; |
| | for i in 0..n { |
| | for j in i..n { |
| | result[idx] = v[i] * v[j]; |
| | idx += 1; |
| | } |
| | } |
| | result |
| | } |
| |
|
| | |
| | fn get_upper(&self, i: usize, j: usize) -> f32 { |
| | let (row, col) = if i <= j { (i, j) } else { (j, i) }; |
| | let n = self.dimensionality(); |
| | |
| | let idx = row * (2 * n - row - 1) / 2 + col; |
| | self.accumulated_outer_product[idx] |
| | } |
| |
|
| | |
| | fn add_to_upper(&mut self, i: usize, j: usize, value: f32) { |
| | let (row, col) = if i <= j { (i, j) } else { (j, i) }; |
| | let n = self.dimensionality(); |
| | let idx = row * (2 * n - row - 1) / 2 + col; |
| | self.accumulated_outer_product[idx] += value; |
| | } |
| |
|
| | |
| | pub fn add_point(&mut self, point: &Point) { |
| | let dims = point.dims(); |
| |
|
| | |
| | for (i, &v) in dims.iter().enumerate() { |
| | self.accumulated_sum[i] += v; |
| | } |
| |
|
| | |
| | for i in 0..dims.len() { |
| | for j in i..dims.len() { |
| | self.add_to_upper(i, j, dims[i] * dims[j]); |
| | } |
| | } |
| |
|
| | self.point_count += 1; |
| |
|
| | |
| | let n = self.point_count as f32; |
| | let centroid_dims: Vec<f32> = self.accumulated_sum.iter() |
| | .map(|&s| s / n) |
| | .collect(); |
| | self.centroid = Point::new(centroid_dims).normalize(); |
| | } |
| |
|
| | |
| | fn compute_covariance(&self) -> Vec<Vec<f32>> { |
| | let n = self.dimensionality(); |
| | let count = self.point_count as f32; |
| |
|
| | if count < 2.0 { |
| | return vec![vec![0.0; n]; n]; |
| | } |
| |
|
| | |
| | let mean: Vec<f32> = self.accumulated_sum.iter() |
| | .map(|&s| s / count) |
| | .collect(); |
| |
|
| | |
| | let mut cov = vec![vec![0.0; n]; n]; |
| | for i in 0..n { |
| | for j in i..n { |
| | let exx = self.get_upper(i, j) / count; |
| | let exex = mean[i] * mean[j]; |
| | let c = exx - exex; |
| | cov[i][j] = c; |
| | cov[j][i] = c; |
| | } |
| | } |
| |
|
| | cov |
| | } |
| |
|
| | |
| | |
| | pub fn recompute_subspace(&mut self, rank: usize) { |
| | if self.point_count < 3 { |
| | |
| | self.principal_directions.clear(); |
| | self.eigenvalues.clear(); |
| | return; |
| | } |
| |
|
| | let cov = self.compute_covariance(); |
| | let n = self.dimensionality(); |
| |
|
| | |
| | let mut directions = Vec::new(); |
| | let mut values = Vec::new(); |
| | let mut working_cov = cov.clone(); |
| |
|
| | for _ in 0..rank.min(n) { |
| | |
| | let (eigval, eigvec) = self.power_iteration(&working_cov, 50); |
| |
|
| | if eigval < 1e-8 { |
| | break; |
| | } |
| |
|
| | values.push(eigval); |
| | directions.push(Point::new(eigvec.clone()).normalize()); |
| |
|
| | |
| | for i in 0..n { |
| | for j in 0..n { |
| | working_cov[i][j] -= eigval * eigvec[i] * eigvec[j]; |
| | } |
| | } |
| | } |
| |
|
| | self.principal_directions = directions; |
| | self.eigenvalues = values; |
| | } |
| |
|
| | |
| | fn power_iteration(&self, matrix: &[Vec<f32>], max_iters: usize) -> (f32, Vec<f32>) { |
| | let n = matrix.len(); |
| |
|
| | |
| | let mut v: Vec<f32> = (0..n).map(|i| 1.0 + (i as f32) * 0.1).collect(); |
| | let mut norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt(); |
| | for x in &mut v { |
| | *x /= norm; |
| | } |
| |
|
| | let mut eigenvalue = 0.0f32; |
| |
|
| | for _ in 0..max_iters { |
| | |
| | let mut v_new = vec![0.0; n]; |
| | for i in 0..n { |
| | for j in 0..n { |
| | v_new[i] += matrix[i][j] * v[j]; |
| | } |
| | } |
| |
|
| | |
| | eigenvalue = v_new.iter().zip(v.iter()).map(|(a, b)| a * b).sum(); |
| |
|
| | |
| | norm = v_new.iter().map(|x| x * x).sum::<f32>().sqrt(); |
| | if norm < 1e-10 { |
| | return (0.0, vec![0.0; n]); |
| | } |
| |
|
| | let converged = v.iter().zip(v_new.iter()) |
| | .map(|(a, b)| (a - b / norm).abs()) |
| | .sum::<f32>() < 1e-8; |
| |
|
| | for i in 0..n { |
| | v[i] = v_new[i] / norm; |
| | } |
| |
|
| | if converged { |
| | break; |
| | } |
| | } |
| |
|
| | (eigenvalue.abs(), v) |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | pub fn subspace_similarity(a: &Subspace, b: &Subspace) -> f32 { |
| | |
| | if !a.has_subspace() || !b.has_subspace() { |
| | return centroid_similarity(&a.centroid, &b.centroid); |
| | } |
| |
|
| | |
| | let rank_a = a.rank(); |
| | let rank_b = b.rank(); |
| | let k = rank_a.min(rank_b); |
| |
|
| | if k == 0 { |
| | return centroid_similarity(&a.centroid, &b.centroid); |
| | } |
| |
|
| | |
| | let mut m = vec![vec![0.0f32; rank_b]; rank_a]; |
| | for i in 0..rank_a { |
| | for j in 0..rank_b { |
| | let dot: f32 = a.principal_directions[i].dims().iter() |
| | .zip(b.principal_directions[j].dims().iter()) |
| | .map(|(x, y)| x * y) |
| | .sum(); |
| | m[i][j] = dot; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | let cos_angles = greedy_max_matching(&m, k); |
| |
|
| | |
| | let similarity: f32 = cos_angles.iter() |
| | .map(|&c| c * c) |
| | .sum::<f32>() / k as f32; |
| |
|
| | similarity |
| | } |
| |
|
| | |
| | fn greedy_max_matching(m: &[Vec<f32>], k: usize) -> Vec<f32> { |
| | let rows = m.len(); |
| | let cols = if rows > 0 { m[0].len() } else { 0 }; |
| |
|
| | let mut used_rows = vec![false; rows]; |
| | let mut used_cols = vec![false; cols]; |
| | let mut result = Vec::new(); |
| |
|
| | for _ in 0..k { |
| | let mut best = (0, 0, 0.0f32); |
| |
|
| | for i in 0..rows { |
| | if used_rows[i] { continue; } |
| | for j in 0..cols { |
| | if used_cols[j] { continue; } |
| | let val = m[i][j].abs(); |
| | if val > best.2 { |
| | best = (i, j, val); |
| | } |
| | } |
| | } |
| |
|
| | if best.2 > 0.0 { |
| | used_rows[best.0] = true; |
| | used_cols[best.1] = true; |
| | result.push(best.2); |
| | } else { |
| | break; |
| | } |
| | } |
| |
|
| | result |
| | } |
| |
|
| | |
| | fn centroid_similarity(a: &Point, b: &Point) -> f32 { |
| | let dot: f32 = a.dims().iter() |
| | .zip(b.dims().iter()) |
| | .map(|(x, y)| x * y) |
| | .sum(); |
| | dot.clamp(-1.0, 1.0) |
| | } |
| |
|
| | |
| | |
| | |
| | pub fn combined_subspace_similarity( |
| | query: &Point, |
| | container: &Subspace, |
| | config: &SubspaceConfig, |
| | ) -> f32 { |
| | let centroid_sim = centroid_similarity(query, &container.centroid); |
| |
|
| | if !container.has_subspace() || config.subspace_weight < 1e-6 { |
| | return centroid_sim; |
| | } |
| |
|
| | |
| | |
| | let subspace_sim = query_subspace_alignment(query, container); |
| |
|
| | |
| | let w = config.subspace_weight; |
| | (1.0 - w) * centroid_sim + w * subspace_sim |
| | } |
| |
|
| | |
| | |
| | |
| | pub fn query_subspace_alignment(query: &Point, subspace: &Subspace) -> f32 { |
| | if !subspace.has_subspace() { |
| | return centroid_similarity(query, &subspace.centroid); |
| | } |
| |
|
| | |
| | let centered: Vec<f32> = query.dims().iter() |
| | .zip(subspace.centroid.dims().iter()) |
| | .map(|(q, c)| q - c) |
| | .collect(); |
| |
|
| | let centered_norm: f32 = centered.iter().map(|x| x * x).sum::<f32>().sqrt(); |
| | if centered_norm < 1e-10 { |
| | |
| | return 1.0; |
| | } |
| |
|
| | |
| | let mut total_proj_sq = 0.0f32; |
| | for (dir, &eigenval) in subspace.principal_directions.iter().zip(subspace.eigenvalues.iter()) { |
| | let proj: f32 = centered.iter() |
| | .zip(dir.dims().iter()) |
| | .map(|(c, d)| c * d) |
| | .sum(); |
| |
|
| | |
| | |
| | let weight = (eigenval / subspace.eigenvalues[0]).sqrt(); |
| | total_proj_sq += proj * proj * weight; |
| | } |
| |
|
| | |
| | let alignment = (total_proj_sq / (centered_norm * centered_norm)).min(1.0); |
| |
|
| | |
| | let centroid_sim = centroid_similarity(query, &subspace.centroid); |
| |
|
| | |
| | (centroid_sim + alignment) / 2.0 |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | pub fn subspace_spread(subspace: &Subspace) -> f32 { |
| | if subspace.eigenvalues.is_empty() { |
| | return 0.0; |
| | } |
| |
|
| | |
| | subspace.eigenvalues.iter().sum() |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | pub fn subspace_isotropy(subspace: &Subspace) -> f32 { |
| | if subspace.eigenvalues.len() < 2 { |
| | return 1.0; |
| | } |
| |
|
| | |
| | let max = subspace.eigenvalues[0]; |
| | let min = *subspace.eigenvalues.last().unwrap(); |
| |
|
| | if max < 1e-10 { |
| | return 1.0; |
| | } |
| |
|
| | min / max |
| | } |
| |
|
| | #[cfg(test)] |
| | mod tests { |
| | use super::*; |
| |
|
| | fn make_point(v: Vec<f32>) -> Point { |
| | Point::new(v).normalize() |
| | } |
| |
|
| | #[test] |
| | fn test_subspace_creation() { |
| | let mut subspace = Subspace::new(3); |
| |
|
| | |
| | subspace.add_point(&make_point(vec![1.0, 0.0, 0.0])); |
| | subspace.add_point(&make_point(vec![0.9, 0.1, 0.0])); |
| | subspace.add_point(&make_point(vec![0.8, 0.2, 0.0])); |
| | subspace.add_point(&make_point(vec![0.7, 0.3, 0.1])); |
| | subspace.add_point(&make_point(vec![0.6, 0.4, 0.1])); |
| |
|
| | assert_eq!(subspace.point_count, 5); |
| |
|
| | |
| | subspace.recompute_subspace(2); |
| |
|
| | assert!(subspace.has_subspace()); |
| | assert!(subspace.rank() > 0); |
| | assert!(!subspace.eigenvalues.is_empty()); |
| |
|
| | println!("Centroid: {:?}", subspace.centroid.dims()); |
| | println!("Principal directions: {}", subspace.rank()); |
| | println!("Eigenvalues: {:?}", subspace.eigenvalues); |
| | } |
| |
|
| | #[test] |
| | fn test_subspace_similarity() { |
| | let mut a = Subspace::new(3); |
| | let mut b = Subspace::new(3); |
| |
|
| | |
| | for i in 0..10 { |
| | let x = 1.0 - i as f32 * 0.05; |
| | let y = i as f32 * 0.05; |
| | a.add_point(&make_point(vec![x, y, 0.0])); |
| | } |
| |
|
| | |
| | for i in 0..10 { |
| | let x = 0.95 - i as f32 * 0.04; |
| | let y = i as f32 * 0.04 + 0.05; |
| | b.add_point(&make_point(vec![x, y, 0.1])); |
| | } |
| |
|
| | a.recompute_subspace(2); |
| | b.recompute_subspace(2); |
| |
|
| | let sim = subspace_similarity(&a, &b); |
| | println!("Similarity between similar subspaces: {:.3}", sim); |
| | assert!(sim > 0.5, "Similar subspaces should have high similarity"); |
| |
|
| | |
| | let mut c = Subspace::new(3); |
| | for i in 0..10 { |
| | let z = 1.0 - i as f32 * 0.05; |
| | c.add_point(&make_point(vec![0.0, 0.1, z])); |
| | } |
| | c.recompute_subspace(2); |
| |
|
| | let sim_ac = subspace_similarity(&a, &c); |
| | println!("Similarity between orthogonal subspaces: {:.3}", sim_ac); |
| | assert!(sim_ac < sim, "Orthogonal subspaces should have lower similarity"); |
| | } |
| |
|
| | #[test] |
| | fn test_query_alignment() { |
| | let mut subspace = Subspace::new(3); |
| |
|
| | |
| | for i in 0..20 { |
| | let x = 0.8 + (i % 3) as f32 * 0.1; |
| | let y = (i as f32 * 0.05) % 0.3; |
| | subspace.add_point(&make_point(vec![x, y, 0.05])); |
| | } |
| | subspace.recompute_subspace(2); |
| |
|
| | |
| | let aligned_query = make_point(vec![0.9, 0.1, 0.0]); |
| | let aligned_score = query_subspace_alignment(&aligned_query, &subspace); |
| |
|
| | |
| | let orthogonal_query = make_point(vec![0.0, 0.0, 1.0]); |
| | let orthogonal_score = query_subspace_alignment(&orthogonal_query, &subspace); |
| |
|
| | println!("Aligned query score: {:.3}", aligned_score); |
| | println!("Orthogonal query score: {:.3}", orthogonal_score); |
| |
|
| | assert!(aligned_score > orthogonal_score, |
| | "Aligned query should score higher than orthogonal query"); |
| | } |
| |
|
| | #[test] |
| | fn test_spread_and_isotropy() { |
| | let mut tight = Subspace::new(3); |
| | let mut spread_out = Subspace::new(3); |
| |
|
| | |
| | for _ in 0..20 { |
| | tight.add_point(&make_point(vec![0.9, 0.1, 0.05])); |
| | } |
| |
|
| | |
| | for i in 0..20 { |
| | let angle = i as f32 * 0.3; |
| | spread_out.add_point(&make_point(vec![ |
| | angle.cos(), |
| | angle.sin(), |
| | 0.1 |
| | ])); |
| | } |
| |
|
| | tight.recompute_subspace(3); |
| | spread_out.recompute_subspace(3); |
| |
|
| | let tight_spread = subspace_spread(&tight); |
| | let wide_spread = subspace_spread(&spread_out); |
| |
|
| | println!("Tight cluster spread: {:.6}", tight_spread); |
| | println!("Wide cluster spread: {:.6}", wide_spread); |
| |
|
| | |
| | |
| | } |
| | } |
| |
|