#!/usr/bin/env python
################################################################################
#
# Copyright (c) 2023, Perforce Software, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# DATE
#
# $Date: 2024/08/20 $
#
# SYNOPSIS
#
# unicodeModeSwitch.py [-v] [-r P4ROOT] [-C charset] -u superuser
#
# DESCRIPTION
#
# Switch a Helix Core server to the unicode mode:
# - non utf8 metadata are patched
# - server depot files with non utf8 filename are renamed
#
# The character encoding of the non utf8 metadata is detected automatically
# but can be overwriten with the -C option.
# Only one charset can be used/detected, if multiple non-utf8 character
# encodings have been used, the switch may fail or data will be
# incorrectly converted to utf8.
#
# If P4ROOT is not provided, it will defaults to the current
# working directory.
#
# A Helix Core user with super privileges is required to run
# the "p4 fstat -Ob" commands, which are used to identify the location
# of the server depot files.
#
# REQUIREMENT
#
# * this script was tested with Python 3.11 on Windows and Linux
# * p4d 2017.1 or greater in the environment PATH
# * python chardet library
# (can be installed by running the "pip install chardet" command)
#
################################################################################
import argparse
import re
import gzip
import os
import sys
import mimetypes
import subprocess
import chardet
from pathlib import Path
def createRenameDict(revList, p4root, p4user, charset, verbose):
p4port = "rsh:p4d -r \\\"" + p4root + "\\\" -i"
cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" login -s"
result = subprocess.run(cmd , shell=True, capture_output=True)
if verbose:
print("cmd:", cmd)
print("result:", result)
if result.stderr:
print("Enter password for '" + p4user + "':")
cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" login"
result = subprocess.run(cmd , shell=True, capture_output=True)
if verbose:
print("cmd:", cmd)
print("result:", result)
if result.stderr:
exit("Password invalid.")
cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" -F %lbrPath% fstat -Ob"
renameDict = {}
for rev in revList:
delete = False
if sys.platform == 'win32':
result = subprocess.run(cmd + " \"" + rev.decode(charset, errors="backslashreplace") + "\"", shell=True, capture_output=True, encoding=charset)
if verbose:
print("cmd:", cmd + " \"" + rev.decode(charset, errors="backslashreplace") + "\"")
print("result:", result)
if not result.stdout:
delete = True;
else:
source = Path(result.stdout.strip())
target = Path(result.stdout.strip().encode().decode(charset))
else:
result = subprocess.run(cmd.encode() + b" \"" + rev + b"\"", shell=True, capture_output=True)
if verbose:
print(b"cmd: " + cmd.encode() + b" \"" + rev + b"\"")
print("result:", result)
if not result.stdout:
delete = True;
else:
source = Path(result.stdout.strip().decode(errors="surrogateescape"))
target = Path(result.stdout.strip().decode(charset))
if not delete:
if not source in renameDict:
renameDict[source] = target
return(renameDict)
def renameFiles(renameDict, charset, verbose):
for source in renameDict:
target = renameDict[source]
target.parents[0].mkdir(parents=True, exist_ok=True)
source.rename(target)
try:
os.removedirs(source.parents[0])
except OSError as error:
if verbose:
print(error)
pass
print("Server depot files with a filename encoded in " + charset + " have been renamed using utf8")
def createJnlPatch(charset, jnl_invalid_utf8, jnl_patch):
jnlMarker = re.compile(b'@nx@|@ex@|@mx')
jnlRec = re.compile(b'@pv@ .*')
jnlRev = re.compile(b'@pv@ \d+ @(db.rev|db.revsh)@ @.*@ \d+ \d+ \d+ (\d+) .* @(.*)@ @.*@ .*')
jnlPatch = open(jnl_patch, "wb")
if mimetypes.guess_type(jnl_invalid_utf8)[1] == 'gzip':
jnlInvalidUtf8 = gzip.open(jnl_invalid_utf8, "rb")
else:
jnlInvalidUtf8 = open(jnl_invalid_utf8, "rb")
revList = []
line = jnlInvalidUtf8.readline()
while line:
matchRev = jnlRev.match(line)
if matchRev:
change = matchRev.group(2)
rev = matchRev.group(3) + b"@=" + change
if not rev in revList:
revList.append(rev)
matchRec = jnlRec.match(line)
if matchRec:
block = line
line = jnlInvalidUtf8.readline()
if matchRec:
while line:
matchMarker = jnlMarker.match(line)
matchRec = jnlRec.match(line)
if matchMarker or matchRec:
break
block = block + line
line = jnlInvalidUtf8.readline()
jnlPatch.write(block.replace(b"@pv@",b"@dv@"))
jnlPatch.write(block.decode(charset).encode())
jnlInvalidUtf8.close()
jnlPatch.close()
return(revList)
def patchMetadata(p4root, jnl_patch, charset, verbose):
cmd = "p4d -r \"" + p4root + "\" -jr \"" + jnl_patch + "\""
result = subprocess.run(cmd, shell=True, capture_output=True)
if verbose:
print("cmd:", cmd)
print("result:", result)
if result.stderr:
print("\n" + result.args[0])
print(result.stderr)
sys.exit("Switch to unicode mode failed\n" + result.stdout)
print("Metadata encoded in " + charset + " have been patched using utf8")
def runP4dXi(p4root, verbose):
cmd = "p4d -r \"" + p4root + "\" -xi"
result = subprocess.run(cmd, shell=True, capture_output=True)
if verbose:
print("cmd:", cmd)
print("result:", result)
if result.stderr:
print("\n" + result.args[0])
print(result.stderr)
sys.exit("Switch to unicode mode failed\n" + result.stdout)
def unicodeModeSwitch(p4root, charset, p4user, verbose):
cmd = "p4d -r \"" + p4root + "\" -xi"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if verbose:
print("cmd:", cmd)
print("result:", result)
if result.stdout == "Server switched to Unicode mode.\n":
print("All metadata are already encoded in utf8, no patch was required before switching!")
else:
jnl_invalid_utf8 = str(Path(p4root + "/jnl.invalid-utf8"))
print(jnl_invalid_utf8)
if not os.path.isfile(jnl_invalid_utf8):
exit("Error: " + jnl_invalid_utf8 + " does not exist")
if not charset:
result = chardet.detect(open(jnl_invalid_utf8, 'rb').read())
if not result:
sys.exit("Switch to unicode mode failed.\nUnable to identify a character encoding for non utf8 data")
charset = result["encoding"]
if result["language"] == "":
language = ""
else:
language = "language = " + result["language"] + ", "
confidence = "confidence = " + str(round(float(result["confidence"])*100)) + "%"
print("Character encoding detected for non utf8 data: " + charset + " (" + language + confidence + ")")
jnl_patch = str(Path(p4root + "/jnl.patch"))
revList = createJnlPatch(charset, jnl_invalid_utf8, jnl_patch)
renameDict = createRenameDict(revList, p4root, p4user, charset, verbose)
patchMetadata(p4root, jnl_patch, charset, verbose)
runP4dXi(p4root, verbose)
renameFiles(renameDict, charset, verbose)
print("Switch to unicode mode was completed succesfully!")
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='unicodeModeSwitch', usage='%(prog)s [-v] [-r P4ROOT] [-C charset] -u superuser')
parser.add_argument("-r", "--p4root", dest="p4root", help="P4ROOT directory")
parser.add_argument("-C", "--charset", dest="charset", help="character encoding")
parser.add_argument("-u", "--user", dest="p4user", required=True, help="Helix Core superuser")
parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")
args = parser.parse_args()
minP4DVersion = 2017.1
cmd = "p4d -V"
result = subprocess.run(cmd, shell=True, capture_output=True)
if result.stderr:
exit("Error: p4d executable not found, please add it to your PATH")
match = re.search(b'P4D\/.*\/(\d+.\d)\/.*', result.stdout, re.DOTALL)
if not match:
sys.exit("Error: p4d version cannot be detected")
if float(match.group(1)) < minP4DVersion:
sys.exit("Error: p4d/metadata must be " + str(minP4DVersion) + " or greater, please upgrade")
p4root = args.p4root
if not p4root:
p4root = str(Path.cwd())
else:
if not Path(p4root).is_absolute():
p4root = str(Path(p4root).resolve())
charset = args.charset
p4user = args.p4user
verbose = args.verbose
unicodeModeSwitch(p4root, charset, p4user, verbose)
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #1 | 30549 | Pascal Soccard |
Switch a Helix Core server to the unicode mode: - non utf8 metadata are patched - server depot files with non utf8 filename are renamed |