#!/usr/bin/env python3 ################################################################################ # # Copyright (c) 2019, Perforce Software, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # = Date # # $Date: 2019/10/14 $ # # = Description # # Parse a checkpoint for database records without a corresponding # db.domain record # Orphaned records are written in journal patch which can be replay # on the Helix server to remove this unnecessary data. # # = Usage # # orphanedMetadataDetection.py a_checkpoint|a_checkpoint.gz # ################################################################################ from __future__ import print_function import re import sys import os import gzip import mimetypes def cmdUsage(): sys.exit("Usage: orphanedMetadataDetection.py a_checkpoint|a_checkpoint.gz") def main(): if len(sys.argv) < 2: cmdUsage() clients = [] labels = [] lastTable = "" filename = sys.argv[1] # for Python 2 compatibility purpose: stderr = getattr(sys.stderr, 'buffer', sys.stderr) if mimetypes.guess_type(filename)[1] == 'gzip': ckp = gzip.open(filename, "rb") else: ckp = open(filename, "rb") clientSpecRE = re.compile(b'@pv@ [0-9]* @db.domain@ @(.*?)@ 99 @.*') haveRE = re.compile(b'@pv@ [0-9]* @db.have.*?@ @//(.*?)/.*') haverpRE = re.compile(b'@pv@ [0-9]* @db.have.rp@ @//(.*?)/.*') workingRE = re.compile(b'@pv@ [0-9]* @db.working@ @//(.*?)/.*') locksRE = re.compile(b'@pv@ [0-9]* @db.locks@ @//.*?@ @(.*?)@ .*') labelSpecRE = re.compile(b'@pv@ [0-9]* @db.domain@ @(.*?)@ .*') labelRE = re.compile(b'@pv@ [0-9]* @db.label@ @(.*?)@.*') tableRE = re.compile(b'@pv@ [0-9]* @(.*?)@ .*') jnlPatchName = str.encode(os.getcwd()) + b"/jnl.patch.gz" if os.path.isfile(jnlPatchName): os.remove(jnlPatchName) jnlPatch = None for line in ckp: match = tableRE.search(line) if match: table = match.group(1) if table == b"db.change": # stop, no need to look further break if not table == lastTable: stderr.write(b"Processing " + table + b"...\n") stderr.flush() lastTable = table match = clientSpecRE.search(line) if match: client = match.group(1) if client not in clients: clients.append(client) else: match = haveRE.search(line) if match: client = match.group(1) if client not in clients: if not jnlPatch: jnlPatch = gzip.open(jnlPatchName, "wb") jnlPatch.write(line.replace(b"^@pv@", b"@dv@")) else: match = haverpRE.search(line) if match: client = match.group(1) if client not in clients: if not jnlPatch: jnlPatch = gzip.open(jnlPatchName, "wb") jnlPatch.write(line.replace(b"^@pv@", b"@dv@")) else: match = workingRE.search(line) if match: client = match.group(1) if client not in clients: if not jnlPatch: jnlPatch = gzip.open(jnlPatchName, "wb") jnlPatch.write(line.replace(b"^@pv@", b"@dv@")) else: match = locksRE.search(line) if match: client = match.group(1) if client not in clients: if not jnlPatch: jnlPatch = gzip.open(jnlPatchName, "wb") jnlPatch.write(line.replace(b"^@pv@", b"@dv@")) else: match = labelSpecRE.search(line) if match: label = match.group(1) if label not in labels: labels.append(label) else: match = labelRE.search(line) if match: label = match.group(1) if label not in labels: if not jnlPatch: jnlPatch = gzip.open(jnlPatchName, "wb") jnlPatch.write(line.replace(b"^@pv@", b"@dv@")) ckp.close() if jnlPatch: jnlPatch.close() stderr.write(b"Found some database records without a corresponding db.domain record\n") stderr.write(b"Check and replay " + jnlPatchName + b" to remove these records\n") else: stderr.write(b"All the database records have a corresponding db.domain record\n") if __name__ == '__main__': main()