Problem Statement
Let’s given n points in a plane. We’ve to find k nearest neighbors of a given point ‘p’ from the given n points.
Problem
Naive Solution O(nlogn)
Let’s call the given point be P and set of points be S.
Iterate over S and find euclidean distance between each point in S and P. Store these distances in a vector. Let’s call this vector distances[].
Most obvious solution, seems to sort this vector distances[] and get first k nearest neighbors. This algorithm is 𝑂(𝑛𝑙𝑜𝑔𝑛).
Code
//
// main.cpp
// K nearest neighbors
//
// Created by Himanshu on 18/08/21.
//
#include <iostream>
#include <vector>
#include <algorithm>
#include <math.h>
using namespace std;
int const N = 5;
bool cmp( const vector<float>& p,
const vector<float>& q ) {
return p[1] < q[1];
}
void solve (int points[][2], int point[], int k) {
vector< vector<float> > distance;
for (int i=0; i<N; i++) {
float d = sqrt((pow((point[0] - points[i][0]), 2)) + (pow((point[1] - points[i][1]), 2)));
float pos = i;
distance.push_back({pos, d});
}
sort(distance.begin(), distance.end(), &cmp);
cout<<"K nearest neighbors:"<<endl;
for (int i=0; i<k; i++) {
cout<<distance[i][0]<<" - distance: "<<distance[i][1]<<endl;
}
}
int main () {
int points[N][2] = {{1, 1}, {3, 4}, {2, 3}, {7, 8}, {2, 5}};
int point[2] = {0, 0};
int k = 2;
solve(points, point, k);
return 0;
}
Here’s a working example:
But we can do better than that. We can solve this in 𝑂(𝑛).
O(n) approach using Heaps
Make a min heap out of this vector distance[]. This step is 𝑂(𝑛). (You can read more about binary heap here.) Now, extract minimum element from this heap using extractMin operation of heap. Operation extractMin takes 𝑂(𝑙𝑜𝑔𝑛) time. Calling it k times, you get the running time as 𝑂(𝑛+𝑘𝑙𝑜𝑔𝑛).
//
// main.cpp
// K nearest neighbors
//
// Created by Himanshu on 18/08/21.
//
#include <iostream>
#include <vector>
#include <algorithm>
#include <math.h>
using namespace std;
int const N = 5;
bool cmp( const vector<float>& p,
const vector<float>& q ) {
return p[1] > q[1];
}
void solve (int points[][2], int point[], int k) {
vector< vector<float> > minHeap;
for (int i=0; i<N; i++) {
float d = sqrt((pow((point[0] - points[i][0]), 2)) + (pow((point[1] - points[i][1]), 2)));
float pos = i;
minHeap.push_back({pos, d});
push_heap(minHeap.begin(), minHeap.end(), &cmp);
}
cout<<"K nearest neighbors:"<<endl;
for (int i=0; i<k; i++) {
vector<float> nearestPoints = minHeap.front();
pop_heap(minHeap.begin(), minHeap.end(), &cmp);
minHeap.pop_back();
cout<<nearestPoints[0]<<" - distance: "<<nearestPoints[1]<<endl;
}
}
int main () {
int points[N][2] = {{1, 1}, {3, 4}, {2, 3}, {7, 8}, {2, 5}};
int point[2] = {0, 0};
int k = 2;
solve(points, point, k);
return 0;
}
Here’s a working example: