Reservoir sampling is a category of randomised algorithms for choosing a simple random sample, without replacement, of k items from a population of an unknown size: N (∞).
Select k random numbers from a stream of numbers
Solution I
1. One by one select an item from stream[0…N-1].
2. If the selected item is not previously selected, then put it in the reservoir.
To check, if this item was previously selected or not, search in the reservoir.
It won’t work, if k is large or input is in the form of an infinite stream.
Code Implementation
void printReservoir (vector<int> reservoir) {
cout<<"Items in the reservoir:"<<endl;
for (int item: reservoir) {
cout<<item<<" ";
}
cout<<endl;
}
bool checkItemPresent (vector<int> reservoir, int x) {
for (int item: reservoir) {
if (item == x) {
return true;
}
}
return false;
}
void selectReservoir (int stream[], int N, int k) {
int i; //current index of elements in stream[]
// reservoir[] vector contain selected items.
// Initialise it with first k elements of stream[]
vector<int> reservoir;
for (i = 0; i < k; i++) {
reservoir.push_back(stream[i]);
}
printReservoir(reservoir);
// N is the number of elements in the stream
while (i < N) {
if (!checkItemPresent(reservoir, stream[i])) {
int j = rand()%k;
reservoir[j] = stream[i];
printReservoir(reservoir);
}
i++;
}
}
O(k*N) Time complexity, O(k) Space complexity
Solution II
1. Copy first k elements of stream in an array: reservoir[0…k-1] 2. for (i = k+1 to Nth item) a) Select a random number between 0 to i where i is the current item in stream. Let the number be j. (To generate a random number you can use: C++ STL inbuilt function – rand().) b) If j ∈ [0…k) replace reservoir[j] with stream[i].
Code Implementation
void printReservoir (vector<int> reservoir) {
cout<<"Items in the reservoir:"<<endl;
for (int item: reservoir) {
cout<<item<<" ";
}
cout<<endl;
}
void selectReservoirOptimized (int stream[], int N, int k) {
int i; //current index of elements in stream[]
// reservoir[] vector contain selected items.
// Initialise it with first k elements of stream[]
vector<int> reservoir;
for (i = 0; i < k; i++) {
reservoir.push_back(stream[i]);
}
printReservoir(reservoir);
// N is the number of elements in the stream
while (i < N) {
int r = rand()%(i+1);
if (r < k) {
reservoir[r] = stream[i];
}
printReservoir(reservoir);
i++;
}
}
O(N) Time complexity, O(k) Space complexity
Solution III
for each iteration i (1 to n):
a) Generate a random number between 0 and i [i is the current stream count]:
r = rand() % (i+1)
b) if (i == r):
return (x = stream[i])
else,
return x (previous result)
for i == 0, x = stream[i]
Code Implementation
void selectReservoirItem (int stream[], int N) {
int i=0; //current index of elements in stream[]
int x = stream[i];
// N is the number of elements in the stream
while (i < N) {
int r = rand()%(i+1);
if (r == i) {
x = stream[i];
}
cout<<x<<endl;
i++;
}
}
O(N) Time complexity, O(1) Space complexity
Reservoir Sampling Code Implementation
//
// main.cpp
// Reservoir Sampling
//
// Created by Himanshu on 27/11/22.
//
#include <iostream>
#include <vector>
using namespace std;
void printReservoir (vector<int> reservoir) {
cout<<"Items in the reservoir:"<<endl;
for (int item: reservoir) {
cout<<item<<" ";
}
cout<<endl;
}
bool checkItemPresent (vector<int> reservoir, int x) {
for (int item: reservoir) {
if (item == x) {
return true;
}
}
return false;
}
void selectReservoir (int stream[], int N, int k) {
int i; //current index of elements in stream[]
// reservoir[] vector contain selected items.
// Initialise it with first k elements of stream[]
vector<int> reservoir;
for (i = 0; i < k; i++) {
reservoir.push_back(stream[i]);
}
printReservoir(reservoir);
// N is the number of elements in the stream
while (i < N) {
if (!checkItemPresent(reservoir, stream[i])) {
int j = rand()%k;
reservoir[j] = stream[i];
printReservoir(reservoir);
}
i++;
}
}
void selectReservoirOptimized (int stream[], int N, int k) {
int i; //current index of elements in stream[]
// reservoir[] vector contain selected items.
// Initialise it with first k elements of stream[]
vector<int> reservoir;
for (i = 0; i < k; i++) {
reservoir.push_back(stream[i]);
}
printReservoir(reservoir);
// N is the number of elements in the stream
while (i < N) {
int r = rand()%(i+1);
if (r < k) {
reservoir[r] = stream[i];
}
printReservoir(reservoir);
i++;
}
}
void selectReservoirItem (int stream[], int N) {
int i=0; //current index of elements in stream[]
int x = stream[i];
// N is the number of elements in the stream
while (i < N) {
int r = rand()%(i+1);
if (r == i) {
x = stream[i];
}
cout<<x<<endl;
i++;
}
}
int main () {
int stream[] = {1, 2, 5, 5, 3, 7, 8, 8, 8, 11, 12};
int N = 11;
int k = 4;
cout<<"Reservoir Sampling (Solution I)"<<endl;
selectReservoir(stream, N, k);
cout<<endl<<"Reservoir Sampling Optimised (Solution II)"<<endl;
selectReservoirOptimized(stream, N, k);
cout<<endl<<"Reservoir item (Solution III)"<<endl;
selectReservoirItem(stream, N);
return 0;
}
Output
Reservoir Sampling (Solution I) Items in the reservoir: 1 2 5 5 Items in the reservoir: 1 2 5 3 Items in the reservoir: 1 2 7 3 Items in the reservoir: 1 8 7 3 Items in the reservoir: 1 8 7 11 Items in the reservoir: 1 12 7 11 Reservoir Sampling Optimised (Solution II) Items in the reservoir: 1 2 5 5 Items in the reservoir: 3 2 5 5 Items in the reservoir: 3 2 5 5 Items in the reservoir: 3 2 8 5 Items in the reservoir: 3 8 8 5 Items in the reservoir: 3 8 8 5 Items in the reservoir: 3 8 11 5 Items in the reservoir: 3 8 11 5 Reservoir item (Solution III) 1 2 5 5 5 5 5 5 8 8 8
Here’s a working example: Reservoir Sampling