#!/usr/bin/env python ################################################################################ # # Copyright (c) 2023, Perforce Software, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # DATE # # $Date: 2024/08/20 $ # # SYNOPSIS # # unicodeModeSwitch.py [-v] [-r P4ROOT] [-C charset] -u superuser # # DESCRIPTION # # Switch a Helix Core server to the unicode mode: # - non utf8 metadata are patched # - server depot files with non utf8 filename are renamed # # The character encoding of the non utf8 metadata is detected automatically # but can be overwriten with the -C option. # Only one charset can be used/detected, if multiple non-utf8 character # encodings have been used, the switch may fail or data will be # incorrectly converted to utf8. # # If P4ROOT is not provided, it will defaults to the current # working directory. # # A Helix Core user with super privileges is required to run # the "p4 fstat -Ob" commands, which are used to identify the location # of the server depot files. # # REQUIREMENT # # * this script was tested with Python 3.11 on Windows and Linux # * p4d 2017.1 or greater in the environment PATH # * python chardet library # (can be installed by running the "pip install chardet" command) # ################################################################################ import argparse import re import gzip import os import sys import mimetypes import subprocess import chardet from pathlib import Path def createRenameDict(revList, p4root, p4user, charset, verbose): p4port = "rsh:p4d -r \\\"" + p4root + "\\\" -i" cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" login -s" result = subprocess.run(cmd , shell=True, capture_output=True) if verbose: print("cmd:", cmd) print("result:", result) if result.stderr: print("Enter password for '" + p4user + "':") cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" login" result = subprocess.run(cmd , shell=True, capture_output=True) if verbose: print("cmd:", cmd) print("result:", result) if result.stderr: exit("Password invalid.") cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" -F %lbrPath% fstat -Ob" renameDict = {} for rev in revList: delete = False if sys.platform == 'win32': result = subprocess.run(cmd + " \"" + rev.decode(charset, errors="backslashreplace") + "\"", shell=True, capture_output=True, encoding=charset) if verbose: print("cmd:", cmd + " \"" + rev.decode(charset, errors="backslashreplace") + "\"") print("result:", result) if not result.stdout: delete = True; else: source = Path(result.stdout.strip()) target = Path(result.stdout.strip().encode().decode(charset)) else: result = subprocess.run(cmd.encode() + b" \"" + rev + b"\"", shell=True, capture_output=True) if verbose: print(b"cmd: " + cmd.encode() + b" \"" + rev + b"\"") print("result:", result) if not result.stdout: delete = True; else: source = Path(result.stdout.strip().decode(errors="surrogateescape")) target = Path(result.stdout.strip().decode(charset)) if not delete: if not source in renameDict: renameDict[source] = target return(renameDict) def renameFiles(renameDict, charset, verbose): for source in renameDict: target = renameDict[source] target.parents[0].mkdir(parents=True, exist_ok=True) source.rename(target) try: os.removedirs(source.parents[0]) except OSError as error: if verbose: print(error) pass print("Server depot files with a filename encoded in " + charset + " have been renamed using utf8") def createJnlPatch(charset, jnl_invalid_utf8, jnl_patch): jnlMarker = re.compile(b'@nx@|@ex@|@mx') jnlRec = re.compile(b'@pv@ .*') jnlRev = re.compile(b'@pv@ \d+ @(db.rev|db.revsh)@ @.*@ \d+ \d+ \d+ (\d+) .* @(.*)@ @.*@ .*') jnlPatch = open(jnl_patch, "wb") if mimetypes.guess_type(jnl_invalid_utf8)[1] == 'gzip': jnlInvalidUtf8 = gzip.open(jnl_invalid_utf8, "rb") else: jnlInvalidUtf8 = open(jnl_invalid_utf8, "rb") revList = [] line = jnlInvalidUtf8.readline() while line: matchRev = jnlRev.match(line) if matchRev: change = matchRev.group(2) rev = matchRev.group(3) + b"@=" + change if not rev in revList: revList.append(rev) matchRec = jnlRec.match(line) if matchRec: block = line line = jnlInvalidUtf8.readline() if matchRec: while line: matchMarker = jnlMarker.match(line) matchRec = jnlRec.match(line) if matchMarker or matchRec: break block = block + line line = jnlInvalidUtf8.readline() jnlPatch.write(block.replace(b"@pv@",b"@dv@")) jnlPatch.write(block.decode(charset).encode()) jnlInvalidUtf8.close() jnlPatch.close() return(revList) def patchMetadata(p4root, jnl_patch, charset, verbose): cmd = "p4d -r \"" + p4root + "\" -jr \"" + jnl_patch + "\"" result = subprocess.run(cmd, shell=True, capture_output=True) if verbose: print("cmd:", cmd) print("result:", result) if result.stderr: print("\n" + result.args[0]) print(result.stderr) sys.exit("Switch to unicode mode failed\n" + result.stdout) print("Metadata encoded in " + charset + " have been patched using utf8") def runP4dXi(p4root, verbose): cmd = "p4d -r \"" + p4root + "\" -xi" result = subprocess.run(cmd, shell=True, capture_output=True) if verbose: print("cmd:", cmd) print("result:", result) if result.stderr: print("\n" + result.args[0]) print(result.stderr) sys.exit("Switch to unicode mode failed\n" + result.stdout) def unicodeModeSwitch(p4root, charset, p4user, verbose): cmd = "p4d -r \"" + p4root + "\" -xi" result = subprocess.run(cmd, shell=True, capture_output=True, text=True) if verbose: print("cmd:", cmd) print("result:", result) if result.stdout == "Server switched to Unicode mode.\n": print("All metadata are already encoded in utf8, no patch was required before switching!") else: jnl_invalid_utf8 = str(Path(p4root + "/jnl.invalid-utf8")) print(jnl_invalid_utf8) if not os.path.isfile(jnl_invalid_utf8): exit("Error: " + jnl_invalid_utf8 + " does not exist") if not charset: result = chardet.detect(open(jnl_invalid_utf8, 'rb').read()) if not result: sys.exit("Switch to unicode mode failed.\nUnable to identify a character encoding for non utf8 data") charset = result["encoding"] if result["language"] == "": language = "" else: language = "language = " + result["language"] + ", " confidence = "confidence = " + str(round(float(result["confidence"])*100)) + "%" print("Character encoding detected for non utf8 data: " + charset + " (" + language + confidence + ")") jnl_patch = str(Path(p4root + "/jnl.patch")) revList = createJnlPatch(charset, jnl_invalid_utf8, jnl_patch) renameDict = createRenameDict(revList, p4root, p4user, charset, verbose) patchMetadata(p4root, jnl_patch, charset, verbose) runP4dXi(p4root, verbose) renameFiles(renameDict, charset, verbose) print("Switch to unicode mode was completed succesfully!") if __name__ == '__main__': parser = argparse.ArgumentParser(prog='unicodeModeSwitch', usage='%(prog)s [-v] [-r P4ROOT] [-C charset] -u superuser') parser.add_argument("-r", "--p4root", dest="p4root", help="P4ROOT directory") parser.add_argument("-C", "--charset", dest="charset", help="character encoding") parser.add_argument("-u", "--user", dest="p4user", required=True, help="Helix Core superuser") parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") args = parser.parse_args() minP4DVersion = 2017.1 cmd = "p4d -V" result = subprocess.run(cmd, shell=True, capture_output=True) if result.stderr: exit("Error: p4d executable not found, please add it to your PATH") match = re.search(b'P4D\/.*\/(\d+.\d)\/.*', result.stdout, re.DOTALL) if not match: sys.exit("Error: p4d version cannot be detected") if float(match.group(1)) < minP4DVersion: sys.exit("Error: p4d/metadata must be " + str(minP4DVersion) + " or greater, please upgrade") p4root = args.p4root if not p4root: p4root = str(Path.cwd()) else: if not Path(p4root).is_absolute(): p4root = str(Path(p4root).resolve()) charset = args.charset p4user = args.p4user verbose = args.verbose unicodeModeSwitch(p4root, charset, p4user, verbose)
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 30549 | Pascal Soccard |
Switch a Helix Core server to the unicode mode: - non utf8 metadata are patched - server depot files with non utf8 filename are renamed |