Reservoir Sampling

Reservoir sampling is a category of randomised algorithms for choosing a simple random sample, without replacement, of k items from a population of an unknown size: N (∞).

Select k random numbers from a stream of numbers
Solution I
1. One by one select an item from stream[0…N-1].
2. If the selected item is not previously selected, then put it in the reservoir.

To check, if this item was previously selected or not, search in the reservoir.

It won’t work, if k is large or input is in the form of an infinite stream.

Code Implementation

void printReservoir (vector<int> reservoir) {
    cout<<"Items in the reservoir:"<<endl;
    
    for (int item: reservoir) {
        cout<<item<<" ";
    }
    
    cout<<endl;
}

bool checkItemPresent (vector<int> reservoir, int x) {
    for (int item: reservoir) {
        if (item == x) {
            return true;
        }
    }
    
    return false;
}

void selectReservoir (int stream[], int N, int k) {
    int i; //current index of elements in stream[]
    
    // reservoir[] vector contain selected items.
    // Initialise it with first k elements of stream[]
    vector<int> reservoir;
    
    for (i = 0; i < k; i++) {
        reservoir.push_back(stream[i]);
    }
    
    printReservoir(reservoir);
    
    // N is the number of elements in the stream
    while (i < N) {
        if (!checkItemPresent(reservoir, stream[i])) {
            int j = rand()%k;
            reservoir[j] = stream[i];
            
            printReservoir(reservoir);
        }
        
        i++;
    }
    
}

O(k*N) Time complexity, O(k) Space complexity

Solution II
1. Copy first k elements of stream in an array: reservoir[0…k-1]
2. for (i = k+1 to Nth item)
     a) Select a random number between 0 to i where i is the current item in stream. 
        Let the number be j. (To generate a random number you can use:
        C++ STL inbuilt function – rand().)
     b) If j ∈ [0…k) replace reservoir[j] with stream[i]. 

Code Implementation

void printReservoir (vector<int> reservoir) {
    cout<<"Items in the reservoir:"<<endl;
    
    for (int item: reservoir) {
        cout<<item<<" ";
    }
    
    cout<<endl;
}

void selectReservoirOptimized (int stream[], int N, int k) {
    int i; //current index of elements in stream[]
    
    // reservoir[] vector contain selected items.
    // Initialise it with first k elements of stream[]
    vector<int> reservoir;
    
    for (i = 0; i < k; i++) {
        reservoir.push_back(stream[i]);
    }
    
    printReservoir(reservoir);
    
    // N is the number of elements in the stream
    while (i < N) {
        
        int r = rand()%(i+1);
        
        if (r < k) {
            reservoir[r] = stream[i];
        }
        
        printReservoir(reservoir);
        
        i++;
    }
    
}

O(N) Time complexity, O(k) Space complexity

Solution III
for each iteration i (1 to n):
a) Generate a random number between 0 and i  [i is the current stream count]:
r = rand() % (i+1)
b) if (i == r):
return (x = stream[i])
else,
return x (previous result)

for i == 0, x = stream[i]

Code Implementation

void selectReservoirItem (int stream[], int N) {
    int i=0; //current index of elements in stream[]
    int x = stream[i];
    
    // N is the number of elements in the stream
    while (i < N) {
        int r = rand()%(i+1);
        
        if (r == i) {
            x = stream[i];
        }
        
        cout<<x<<endl;
        i++;
    }
    
}

O(N) Time complexity, O(1) Space complexity

Reservoir Sampling Code Implementation

//
//  main.cpp
//  Reservoir Sampling
//
//  Created by Himanshu on 27/11/22.
//

#include <iostream>
#include <vector>
using namespace std;

void printReservoir (vector<int> reservoir) {
    cout<<"Items in the reservoir:"<<endl;
    
    for (int item: reservoir) {
        cout<<item<<" ";
    }
    
    cout<<endl;
}

bool checkItemPresent (vector<int> reservoir, int x) {
    for (int item: reservoir) {
        if (item == x) {
            return true;
        }
    }
    
    return false;
}

void selectReservoir (int stream[], int N, int k) {
    int i; //current index of elements in stream[]
    
    // reservoir[] vector contain selected items.
    // Initialise it with first k elements of stream[]
    vector<int> reservoir;
    
    for (i = 0; i < k; i++) {
        reservoir.push_back(stream[i]);
    }
    
    printReservoir(reservoir);
    
    // N is the number of elements in the stream
    while (i < N) {
        if (!checkItemPresent(reservoir, stream[i])) {
            int j = rand()%k;
            reservoir[j] = stream[i];
            
            printReservoir(reservoir);
        }
        
        i++;
    }
    
}

void selectReservoirOptimized (int stream[], int N, int k) {
    int i; //current index of elements in stream[]
    
    // reservoir[] vector contain selected items.
    // Initialise it with first k elements of stream[]
    vector<int> reservoir;
    
    for (i = 0; i < k; i++) {
        reservoir.push_back(stream[i]);
    }
    
    printReservoir(reservoir);
    
    // N is the number of elements in the stream
    while (i < N) {
        
        int r = rand()%(i+1);
        
        if (r < k) {
            reservoir[r] = stream[i];
        }
        
        printReservoir(reservoir);
        
        i++;
    }
    
}

void selectReservoirItem (int stream[], int N) {
    int i=0; //current index of elements in stream[]
    int x = stream[i];
    
    // N is the number of elements in the stream
    while (i < N) {
        int r = rand()%(i+1);
        
        if (r == i) {
            x = stream[i];
        }
        
        cout<<x<<endl;
        i++;
    }
    
}

int main () {
    int stream[] = {1, 2, 5, 5, 3, 7, 8, 8, 8, 11, 12};
    
    int N = 11;
    int k = 4;
    
    cout<<"Reservoir Sampling (Solution I)"<<endl;
    selectReservoir(stream, N, k);
    
    cout<<endl<<"Reservoir Sampling Optimised (Solution II)"<<endl;
    selectReservoirOptimized(stream, N, k);
    
    cout<<endl<<"Reservoir item (Solution III)"<<endl;
    selectReservoirItem(stream, N);
    
    return 0;
}

Output

Reservoir Sampling (Solution I)
Items in the reservoir:
1 2 5 5 
Items in the reservoir:
1 2 5 3 
Items in the reservoir:
1 2 7 3 
Items in the reservoir:
1 8 7 3 
Items in the reservoir:
1 8 7 11 
Items in the reservoir:
1 12 7 11 

Reservoir Sampling Optimised (Solution II)
Items in the reservoir:
1 2 5 5 
Items in the reservoir:
3 2 5 5 
Items in the reservoir:
3 2 5 5 
Items in the reservoir:
3 2 8 5 
Items in the reservoir:
3 8 8 5 
Items in the reservoir:
3 8 8 5 
Items in the reservoir:
3 8 11 5 
Items in the reservoir:
3 8 11 5 

Reservoir item (Solution III)
1
2
5
5
5
5
5
5
8
8
8

Here’s a working example: Reservoir Sampling

Leave a Reply

Your email address will not be published. Required fields are marked *