unicodeModeSwitch.py #1

  • //
  • guest/
  • pascal_soccard/
  • Scripts/
  • server/
  • unicodeModeSwitch.py
  • View
  • Commits
  • Open Download .zip Download (10 KB)
#!/usr/bin/env python

################################################################################
#
# Copyright (c) 2023, Perforce Software, Inc.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1.  Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#
# 2.  Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# DATE
#   
#   $Date: 2024/08/20 $
#
# SYNOPSIS
# 
#   unicodeModeSwitch.py [-v] [-r P4ROOT] [-C charset] -u superuser
#
# DESCRIPTION
#
#   Switch a Helix Core server to the unicode mode:
#   - non utf8 metadata are patched
#   - server depot files with non utf8 filename are renamed
#
#   The character encoding of the non utf8 metadata is detected automatically
#   but can be overwriten with the -C option.
#   Only one charset can be used/detected, if multiple non-utf8 character
#   encodings have been used, the switch may fail or data will be 
#   incorrectly converted to utf8.
#
#   If P4ROOT is not provided, it will defaults to the current
#   working directory.
#
#   A Helix Core user with super privileges is required to run 
#   the "p4 fstat -Ob" commands, which are used to identify the location
#   of the server depot files.
#
# REQUIREMENT
#
#   * this script was tested with Python 3.11 on Windows and Linux
#   * p4d 2017.1 or greater in the environment PATH
#   * python chardet library
#     (can be installed by running the "pip install chardet" command)
#
################################################################################

import argparse
import re
import gzip
import os
import sys
import mimetypes
import subprocess
import chardet
from pathlib import Path

def createRenameDict(revList, p4root, p4user, charset, verbose):
    p4port = "rsh:p4d -r \\\"" + p4root + "\\\" -i"
    cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" login -s"
    result = subprocess.run(cmd , shell=True, capture_output=True)
    if verbose:
       print("cmd:", cmd)
       print("result:", result)
    if result.stderr:
       print("Enter password for '" + p4user + "':")
       cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" login"
       result = subprocess.run(cmd , shell=True, capture_output=True)
       if verbose:
          print("cmd:", cmd)
          print("result:", result)
       if result.stderr:
          exit("Password invalid.")
         
    cmd = "p4 -u " + p4user + " -C none -p \"" + p4port + "\" -F %lbrPath% fstat -Ob"
    renameDict = {}
    for rev in revList:
        delete = False
        if sys.platform == 'win32':        
          result = subprocess.run(cmd + " \"" + rev.decode(charset, errors="backslashreplace") + "\"", shell=True, capture_output=True, encoding=charset)
          if verbose:
             print("cmd:", cmd + " \"" + rev.decode(charset, errors="backslashreplace") + "\"")
             print("result:", result)
          if not result.stdout:
             delete = True;
          else: 
             source = Path(result.stdout.strip())
             target = Path(result.stdout.strip().encode().decode(charset))
        else:
          result = subprocess.run(cmd.encode() + b" \"" + rev + b"\"", shell=True, capture_output=True)
          if verbose:
             print(b"cmd: " + cmd.encode() + b" \"" + rev + b"\"")
             print("result:", result)
          if not result.stdout:
             delete = True;
          else:
             source = Path(result.stdout.strip().decode(errors="surrogateescape"))
             target = Path(result.stdout.strip().decode(charset))
        if not delete:
           if not source in renameDict:
              renameDict[source] = target   
    return(renameDict)

def renameFiles(renameDict, charset, verbose):
    for source in renameDict:
        target = renameDict[source]
        target.parents[0].mkdir(parents=True, exist_ok=True)
        source.rename(target)
        try:
            os.removedirs(source.parents[0])
        except OSError as error:
            if verbose:
               print(error)
            pass
    print("Server depot files with a filename encoded in " + charset + " have been renamed using utf8")

def createJnlPatch(charset, jnl_invalid_utf8, jnl_patch):
    jnlMarker = re.compile(b'@nx@|@ex@|@mx')
    jnlRec = re.compile(b'@pv@ .*')
    jnlRev = re.compile(b'@pv@ \d+ @(db.rev|db.revsh)@ @.*@ \d+ \d+ \d+ (\d+) .* @(.*)@ @.*@ .*')
    jnlPatch = open(jnl_patch, "wb")
    if mimetypes.guess_type(jnl_invalid_utf8)[1] == 'gzip':
       jnlInvalidUtf8 = gzip.open(jnl_invalid_utf8, "rb")
    else:
       jnlInvalidUtf8 = open(jnl_invalid_utf8, "rb")
    revList = []
    line = jnlInvalidUtf8.readline()
    while line:
          matchRev = jnlRev.match(line)
          if matchRev:
             change = matchRev.group(2)
             rev = matchRev.group(3) + b"@=" + change
             if not rev in revList:
                revList.append(rev)
          matchRec = jnlRec.match(line)
          if matchRec:
             block = line
          line = jnlInvalidUtf8.readline()
          if matchRec:
             while line:
                   matchMarker = jnlMarker.match(line)
                   matchRec = jnlRec.match(line)
                   if matchMarker or matchRec:
                      break
                   block = block + line
                   line = jnlInvalidUtf8.readline()
             jnlPatch.write(block.replace(b"@pv@",b"@dv@"))
             jnlPatch.write(block.decode(charset).encode())
    jnlInvalidUtf8.close()
    jnlPatch.close()
    return(revList)

def patchMetadata(p4root, jnl_patch, charset, verbose):
    cmd = "p4d -r \"" + p4root + "\" -jr \"" + jnl_patch + "\""
    result = subprocess.run(cmd, shell=True, capture_output=True)
    if verbose:
       print("cmd:", cmd)
       print("result:", result)
    if result.stderr:
       print("\n" + result.args[0])    
       print(result.stderr)
       sys.exit("Switch to unicode mode failed\n" + result.stdout)
    print("Metadata encoded in " + charset + " have been patched using utf8")

def runP4dXi(p4root, verbose):
    cmd = "p4d -r \"" + p4root + "\" -xi"
    result = subprocess.run(cmd, shell=True, capture_output=True)
    if verbose:
       print("cmd:", cmd)
       print("result:", result)
    if result.stderr:
       print("\n" + result.args[0])          
       print(result.stderr)
       sys.exit("Switch to unicode mode failed\n" + result.stdout)

def unicodeModeSwitch(p4root, charset, p4user, verbose):
    cmd = "p4d -r \"" + p4root + "\" -xi"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    if verbose:
       print("cmd:", cmd)
       print("result:", result)
    if result.stdout == "Server switched to Unicode mode.\n":
       print("All metadata are already encoded in utf8, no patch was required before switching!")
    else:
      jnl_invalid_utf8 = str(Path(p4root + "/jnl.invalid-utf8"))
      print(jnl_invalid_utf8)
       
      if not os.path.isfile(jnl_invalid_utf8):
         exit("Error: " + jnl_invalid_utf8 + " does not exist")
       
      if not charset:
         result = chardet.detect(open(jnl_invalid_utf8, 'rb').read())
         if not result:
            sys.exit("Switch to unicode mode failed.\nUnable to identify a character encoding for non utf8 data")
         charset = result["encoding"]
         if result["language"] == "":
            language = ""
         else:
            language = "language = " + result["language"] + ", "
         confidence = "confidence = " + str(round(float(result["confidence"])*100)) + "%"
         print("Character encoding detected for non utf8 data: " + charset + " (" + language + confidence + ")")
    
      jnl_patch = str(Path(p4root + "/jnl.patch"))

      revList = createJnlPatch(charset, jnl_invalid_utf8, jnl_patch)

      renameDict = createRenameDict(revList, p4root, p4user, charset, verbose)
         
      patchMetadata(p4root, jnl_patch, charset, verbose)

      runP4dXi(p4root, verbose)

      renameFiles(renameDict, charset, verbose)
 
    print("Switch to unicode mode was completed succesfully!")

if __name__ == '__main__':
   parser = argparse.ArgumentParser(prog='unicodeModeSwitch', usage='%(prog)s [-v] [-r P4ROOT] [-C charset] -u superuser')
   parser.add_argument("-r", "--p4root", dest="p4root", help="P4ROOT directory")
   parser.add_argument("-C", "--charset", dest="charset", help="character encoding")
   parser.add_argument("-u", "--user", dest="p4user", required=True, help="Helix Core superuser")
   parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")
   args = parser.parse_args()
   minP4DVersion = 2017.1
   cmd = "p4d -V"
   result = subprocess.run(cmd, shell=True, capture_output=True)
   if result.stderr:    
       exit("Error: p4d executable not found, please add it to your PATH")
   match = re.search(b'P4D\/.*\/(\d+.\d)\/.*', result.stdout, re.DOTALL)
   if not match:
      sys.exit("Error: p4d version cannot be detected")
   if float(match.group(1)) < minP4DVersion:
      sys.exit("Error: p4d/metadata must be " + str(minP4DVersion) + " or greater, please upgrade")
   p4root = args.p4root
   if not p4root:
      p4root = str(Path.cwd())
   else:
      if not Path(p4root).is_absolute():
         p4root = str(Path(p4root).resolve())
   charset = args.charset
   p4user = args.p4user
   verbose = args.verbose
   unicodeModeSwitch(p4root, charset, p4user, verbose)
# Change User Description Committed
#1 30549 Pascal Soccard Switch a Helix Core server to the unicode mode:
  - non utf8 metadata are patched
  - server depot files with non utf8 filename are renamed