Cleanup scripts for Postgres and MongoDB databases

In order to remove old session and interactions data we have to clear archive service database entries and binary storage database entries. The archive service is using a PostgreSQL (Postgres) database, while the binary storage service is storing data in a MongoDB database.

In order to access the databases referenced in this cleanup guide we’re using two CLI  tools. These tools are psql and mongosh.
The psql command-line client tool is used to interact with the PostgreSQL database engine. 

The MongoDB Shell, mongosh, is a JavaScript and Node.js environment for interacting with a MongoDB deployment in Atlas, locally, or on another remote host.
Depending of specific use case these tools can be installed separately or executed from within already configured database containers.

Here are link with installation instructions for psql and mongosh should a local setup be required.

The following scripts should only be used with version 4.4 and later of the LumenVox software

Removing old sessions, interactions and related audio data

step 1 - extracting audio data MongoDB references into mongoIds.txt file
psql <postgres_connection_string> -t -A -c "SELECT sa.storage_ref FROM storage_audio sa INNER JOIN session s ON s.id=sa.session_id WHERE s.end_date < NOW() - INTERVAL '30 days'" >> mongoIds.txt

postgres_connection_string will be in a format similar to this:
postgres://<username>:<password>@<host>:<port>/<db_name>


step 2 - remove all Postgres session entries older than 30 days in this example
psql <postgres_connection_string> -c "DELETE FROM session WHERE end_date < NOW() - INTERVAL '30 days'"

Removing old grammars data (optional)

Since grammar caching is used, the number of archived grammars will be much less than the number of actual sessions and interactions. Archived grammars can be cleared with the following steps: 

step 1 - extracting grammar data MongoDB references into mongoIds.txt file
psql <postgres_connection_string> -t -A -c "SELECT storage_ref FROM storage_grammar WHERE created_date < NOW() - INTERVAL '30 days'" >> mongoIds.txt


step 2 - remove all Postgres grammar entries older than 30 days in this example
psql <postgres_connection_string> -c "DELETE FROM storage_grammar WHERE created_date < NOW() - INTERVAL '30 days'"

Removing old SSML data (optional)

step 1 - extracting SSML data MongoDB references into mongoIds.txt file
psql <postgres_connection_string> -t -A -c "SELECT storage_ref FROM storage_ssml WHERE created_date < NOW() - INTERVAL '30 days'" >> mongoIds.txt


step 2 - remove all Postgres SSML entries older than 30 days in this example
psql <postgres_connection_string> -c "DELETE FROM storage_ssml WHERE created_date < NOW() - INTERVAL '30 days'"

 

Removing old phrases data (optional)

psql <postgres_connection_string> -c "DELETE FROM storage_phrases WHERE created_date < NOW() - INTERVAL '30 days'"

Removing MongoDB references

In the previous steps we generated a mongoIds.txt file which now contains a list of MongoDB entries. To remove those entries from MongoDB we have to convert them into UUID binary representation used by MongoDB ID. This is done with the steps below: 

step 1 - create a script file

Create a cleanMongoDB.js file with the following content

// UUID conversion helper functions 
// https://github.com/mongodb/mongo-csharp-driver/blob/master/uuidhelpers.js 
function CSUUID(uuid) {     
        var hex = uuid.replace(/[{}-]/g, ""); // remove extra characters    
        var a = hex.substr(6, 2) + hex.substr(4, 2) + hex.substr(2, 2) + hex.substr(0, 2);     
        var b = hex.substr(10, 2) + hex.substr(8, 2);     
        var c = hex.substr(14, 2) + hex.substr(12, 2);     
       var d = hex.substr(16, 16);     
       hex = a + b + c + d;     
       var base64 = HexToBase64(hex);    
       return new BinData(3, base64); 


function HexToBase64(hex) {     
       var base64Digits = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";     
       var base64 = "";     
       var group;     
       for (var i = 0; i < 30; i += 6) {         
             group = parseInt(hex.substr(i, 6), 16);         
             base64 += base64Digits[(group >> 18) & 0x3f];         
             base64 += base64Digits[(group >> 12) & 0x3f];         
             base64 += base64Digits[(group >> 6) & 0x3f];         
             base64 += base64Digits[group & 0x3f];    
    group = parseInt(hex.substr(30, 2), 16);     
      base64 += base64Digits[(group >> 2) & 0x3f];     
      base64 += base64Digits[(group << 4) & 0x3f];     
     base64 += "==";     
     return base64; 


function main() {     
      const idsFilePath = 'mongoIds.txt';     
    // split file content into array of string guids     
    const ids=fs.readFileSync(idsFilePath).toString().split('\n');     
   console.log(db.BinaryStorage.deleteMany({_id: { $in: ids.map(CSUUID)}})); 


main();


step 2 - remove MongoDB entries
mongosh <mongo_connection_string> cleanMongoDB.js

The expected output after running the script should contain the following:

{ acknowledged: true, deletedCount: 0 }, where deletedCount is indicating the number of removed MongoDB entries.


 


Was this article helpful?
Copyright (C) 2001-2024, Ai Software, LLC d/b/a LumenVox