Reservoir Sampling

Reservoir sampling is a category of randomised algorithms for choosing a simple random sample, without replacement, of k items from a population of an unknown size: N (∞).

Select k random numbers from a stream of numbers
Solution I
1. One by one select an item from stream[0…N-1].2. If the selected item is not previously selected, then put it in the reservoir.To check, if this item was previously selected or not, search in the reservoir.

It won’t work, if k is large or input is in the form of an infinite stream.

Code Implementation

void printReservoir (vector<int> reservoir) {
cout<<"Items in the reservoir:"<<endl;

for (int item: reservoir) {
cout<<item<<" ";
}

cout<<endl;
}

bool checkItemPresent (vector<int> reservoir, int x) {
for (int item: reservoir) {
if (item == x) {
return true;
}
}

return false;
}

void selectReservoir (int stream[], int N, int k) {
int i; //current index of elements in stream[]

// reservoir[] vector contain selected items.
// Initialise it with first k elements of stream[]
vector<int> reservoir;

for (i = 0; i < k; i++) {
reservoir.push_back(stream[i]);
}

printReservoir(reservoir);

// N is the number of elements in the stream
while (i < N) {
if (!checkItemPresent(reservoir, stream[i])) {
int j = rand()%k;
reservoir[j] = stream[i];

printReservoir(reservoir);
}

i++;
}

}


O(k*N) Time complexity, O(k) Space complexity

Solution II
1. Copy first k elements of stream in an array: reservoir[0…k-1]
2. for (i = k+1 to Nth item)
a) Select a random number between 0 to i where i is the current item in stream.
Let the number be j. (To generate a random number you can use:
C++ STL inbuilt function – rand().)
b) If j ∈ [0…k) replace reservoir[j] with stream[i]. 

Code Implementation

void printReservoir (vector<int> reservoir) {
cout<<"Items in the reservoir:"<<endl;

for (int item: reservoir) {
cout<<item<<" ";
}

cout<<endl;
}

void selectReservoirOptimized (int stream[], int N, int k) {
int i; //current index of elements in stream[]

// reservoir[] vector contain selected items.
// Initialise it with first k elements of stream[]
vector<int> reservoir;

for (i = 0; i < k; i++) {
reservoir.push_back(stream[i]);
}

printReservoir(reservoir);

// N is the number of elements in the stream
while (i < N) {

int r = rand()%(i+1);

if (r < k) {
reservoir[r] = stream[i];
}

printReservoir(reservoir);

i++;
}

}


O(N) Time complexity, O(k) Space complexity

Solution III
for each iteration i (1 to n):     a) Generate a random number between 0 and i  [i is the current stream count]:        r = rand() % (i+1)     b) if (i == r):          return (x = stream[i])        else,          return x (previous result)for i == 0, x = stream[i]

Code Implementation

void selectReservoirItem (int stream[], int N) {
int i=0; //current index of elements in stream[]
int x = stream[i];

// N is the number of elements in the stream
while (i < N) {
int r = rand()%(i+1);

if (r == i) {
x = stream[i];
}

cout<<x<<endl;
i++;
}

}


O(N) Time complexity, O(1) Space complexity

Reservoir Sampling Code Implementation

//
//  main.cpp
//  Reservoir Sampling
//
//  Created by Himanshu on 27/11/22.
//

#include <iostream>
#include <vector>
using namespace std;

void printReservoir (vector<int> reservoir) {
cout<<"Items in the reservoir:"<<endl;

for (int item: reservoir) {
cout<<item<<" ";
}

cout<<endl;
}

bool checkItemPresent (vector<int> reservoir, int x) {
for (int item: reservoir) {
if (item == x) {
return true;
}
}

return false;
}

void selectReservoir (int stream[], int N, int k) {
int i; //current index of elements in stream[]

// reservoir[] vector contain selected items.
// Initialise it with first k elements of stream[]
vector<int> reservoir;

for (i = 0; i < k; i++) {
reservoir.push_back(stream[i]);
}

printReservoir(reservoir);

// N is the number of elements in the stream
while (i < N) {
if (!checkItemPresent(reservoir, stream[i])) {
int j = rand()%k;
reservoir[j] = stream[i];

printReservoir(reservoir);
}

i++;
}

}

void selectReservoirOptimized (int stream[], int N, int k) {
int i; //current index of elements in stream[]

// reservoir[] vector contain selected items.
// Initialise it with first k elements of stream[]
vector<int> reservoir;

for (i = 0; i < k; i++) {
reservoir.push_back(stream[i]);
}

printReservoir(reservoir);

// N is the number of elements in the stream
while (i < N) {

int r = rand()%(i+1);

if (r < k) {
reservoir[r] = stream[i];
}

printReservoir(reservoir);

i++;
}

}

void selectReservoirItem (int stream[], int N) {
int i=0; //current index of elements in stream[]
int x = stream[i];

// N is the number of elements in the stream
while (i < N) {
int r = rand()%(i+1);

if (r == i) {
x = stream[i];
}

cout<<x<<endl;
i++;
}

}

int main () {
int stream[] = {1, 2, 5, 5, 3, 7, 8, 8, 8, 11, 12};

int N = 11;
int k = 4;

cout<<"Reservoir Sampling (Solution I)"<<endl;
selectReservoir(stream, N, k);

cout<<endl<<"Reservoir Sampling Optimised (Solution II)"<<endl;
selectReservoirOptimized(stream, N, k);

cout<<endl<<"Reservoir item (Solution III)"<<endl;
selectReservoirItem(stream, N);

return 0;
}



Output

Reservoir Sampling (Solution I)
Items in the reservoir:
1 2 5 5
Items in the reservoir:
1 2 5 3
Items in the reservoir:
1 2 7 3
Items in the reservoir:
1 8 7 3
Items in the reservoir:
1 8 7 11
Items in the reservoir:
1 12 7 11

Reservoir Sampling Optimised (Solution II)
Items in the reservoir:
1 2 5 5
Items in the reservoir:
3 2 5 5
Items in the reservoir:
3 2 5 5
Items in the reservoir:
3 2 8 5
Items in the reservoir:
3 8 8 5
Items in the reservoir:
3 8 8 5
Items in the reservoir:
3 8 11 5
Items in the reservoir:
3 8 11 5

Reservoir item (Solution III)
1
2
5
5
5
5
5
5
8
8
8


Here’s a working example: Reservoir Sampling