strbuf.cc #1

/*
 * Copyright 1995, 1996 Perforce Software.  All rights reserved.
 *
 * This file is part of Perforce - the FAST SCM System.
 */

/*
 * strings.cc - support for StrBuf, StrPtr
 */

# include <stdhdrs.h>
# include <charman.h>
# include <math.h>

# include "strbuf.h"
# include "strdict.h"
# include "strops.h"

/*
 * Null strings for StrRef::Null() and empty StrBuf
 */

char StrBuf::nullStrBuf[8] = "\0\0\0\0\0\0\0";
StrRef StrRef::null( "", 0 );

/*
 * StrPtr::CCompare/SCompare/SCompareN/SEqual
 *
 * These following methods alone handle the case sensitivity switch
 * between UNIX and NT servers.  They include support for an experimental
 * "hybrid" form of case handling.
 *
 * Even the case insensitive comparators start with a case sensitive
 * compare, since that is faster and handles the bulk of data.  Only
 * if the case sensitive compare finds a difference, and we are being
 * case insensitive, do we proceed to the slower case folding compare.
 *
 * Here are the case handling modes:
 *
 *	ST_UNIX: Compare with case significant.
 *
 *		Ax < ax
 *		aa > AX
 *
 *	ST_WINDOWS: Compare with case ignored.
 *
 *		Ax == ax
 *		aa < AX
 *
 *	ST_HYBRID: First compare with case ignored, then significant.
 *
 *		Ax < ax
 *		aa < AX
 *
 *		This leads to an ordering like:
 *
 *			//depot/main/a
 *			//depot/main/b
 *			//depot/Main/c
 *			//depot/main/c
 *			//depot/main/d
 *
 *		That is, it looks like WINDOWS order, but still maintains
 *		uniqueness.  Because ordering depends on seeing the whole
 *		string before going back and looking for case differences,
 *		this can complicate code which attempts to do its own char
 *		by char sorting.  Thus the character based SCompare() and
 *		SEqual() differ only for the HYBRID case: SCompare ignores
 *		the case, while SEqual() heeds it.
 *
 * And the methods:
 *
 *	StrPtr::CCompare() 
 *		Compare with case ignored.
 *
 *	StrPtr::NCompare() 
 *		Compare with case ignored, natural order comparison.
 *
 *	StrPtr::SCompare( string )
 *		Compare with case significant (UNIX).
 *		Compare with case ignored (WINDOWS)
 *		Compare with case ignored then significant (HYBRID)
 *
 *	StrPtr::SCompareN( string, len )
 *		Compare with case significant (UNIX).
 *		Compare with case ignored (WINDOWS)
 *		Compare with case ignored; if EOS then significant (HYBRID)
 *
 *	StrPtr::SCompare( char )
 *		Compare with case significant (UNIX).
 *		Compare with case ignored (WINDOWS,HYBRID)
 *
 *	StrPtr::SEqual( char )
 *		Compare with case significant (UNIX,HYBRID).
 *		Compare with case ignored (WINDOWS)
 */

# ifdef CASE_INSENSITIVE
StrPtr::CaseUse StrPtr::caseUse = ST_WINDOWS;
# else
StrPtr::CaseUse StrPtr::caseUse = ST_UNIX;
# endif
bool		StrPtr::foldingSet = 0;

int 
StrPtr::CCompare( const char *sa, const char *sb )
{
	register const unsigned char *a = (const unsigned char *)sa;
	register const unsigned char *b = (const unsigned char *)sb;

	// Case significant part (fast)

	while( *a && *a == *b )
	    ++a, ++b;

	// Case ignored part (slow)

	while( *a && tolowerq( *a ) == tolowerq( *b ) )
	    ++a, ++b;

	return tolowerq( *a ) - tolowerq( *b );
}

int 
StrPtr::SCompare( const char *sa, const char *sb )
{
	register const unsigned char *a = (const unsigned char *)sa;
	register const unsigned char *b = (const unsigned char *)sb;

	// Case significant compare for UNIX

	while( *a && *a == *b )
	    ++a, ++b;

	int cmpCased = *a - *b;

	if( caseUse == ST_UNIX )
	    return cmpCased;

	// Proceed with case ignored compare for WINDOWS, HYBRID

	while( *a && tolowerq( *a ) == tolowerq( *b ) )
	    ++a, ++b;

	int cmpFolded = tolowerq( *a ) - tolowerq( *b );

	if( cmpFolded || caseUse == ST_WINDOWS ) 
	    return cmpFolded;

	// If case-ignored matched and HYBRID, now return first case mismatch

	return cmpCased;
}

/*
 * NCompare(),  original code taken from:
 *
 * strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
 * Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
 *
 * see:   
 * http://computer.perforce.com/newwiki/index.php?title=3rd_Party_Software
 * 
 */

int
StrPtr::NCompare( const char *a, const char *b )
{
	const unsigned char *ca = (const unsigned char *)a;
	const unsigned char *cb = (const unsigned char *)b;
	int result;

	for( ;; )
	{
	    while( isAspace( ca ) ) ++ca;
	    while( isAspace( cb ) ) ++cb;

	    if( !*ca && !*cb )
	        return 0;

	    if ( isAdigit( ca ) && isAdigit( cb ) ) 
	    {
	        if( *ca == '0' || *cb == '0' ) 
	        {
	            if( ( result = NCompareLeft( ca, cb ) ) != 0 )
	                return result;
	        }
	        else 
	        {
	            if( ( result = NCompareRight( ca, cb ) ) != 0 )
	                return result;
	        }
	    }

	    // For now,  always ignore case

	    if( tolowerq( *ca ) < tolowerq( *cb ) ) return -1;
	    if( tolowerq( *ca ) > tolowerq( *cb ) ) return +1;

	    ++ca; ++cb;
	}
}

int 
StrPtr::NCompareLeft( const unsigned char *a, const unsigned char *b )
{
	for( ;; a++, b++ ) 
	{
	    if ( !isAdigit( a ) && !isAdigit( b ) ) 
	        return 0;
            else if( !isAdigit( a ) ) 
	        return -1;
	    else if( !isAdigit( b ) ) 
	        return +1;
	    else if( *a < *b ) 
	        return -1;
	    else if( *a > *b ) 
	        return +1;
	}  
	// NOT-REACHED!
}

int 
StrPtr::NCompareRight( const unsigned char *a, const unsigned char *b )
{
	int bias = 0;

	for( ;; a++, b++ ) 
	{
	    if( !isAdigit( a ) && !isAdigit( b ) ) 
	        return bias;
	    else if( !isAdigit( a ) ) 
	        return -1;
	    else if( !isAdigit( b ) ) 
	        return +1;
	    else if( *a < *b ) 
	        { if( !bias ) bias = -1; } 
	    else if (*a > *b) 
	        { if( !bias ) bias = +1; } 
	    else if ( !*a && !*b )
	        return bias;
	}
	// NOT-REACHED!
}

int     
StrPtr::SCompareN( const StrPtr &s ) const
{
	register const unsigned char *a = (const unsigned char *)buffer;
	register const unsigned char *b = (const unsigned char *)s.buffer;
	register int n = length;

	// Case significant compare for UNIX

	while( n && *a && *a == *b )
	    ++a, ++b, --n;

	if( !n )
	    return 0;

	int cmpCased = *a - *b;

	if( caseUse == ST_UNIX )
	    return cmpCased;

	// Proceed with case folding compare for WINDOWS, HYBRID

	while( n && *a && tolowerq( *a ) == tolowerq( *b ) )
	    ++a, ++b, --n;

	if( !n ) 
	    return 0;

	int cmpFolded = tolowerq( *a ) - tolowerq( *b );

	if( cmpFolded || caseUse == ST_WINDOWS ) 
	    return cmpFolded;

	// If case-ignored matched and HYBRID, now return first case mismatch

	return cmpCased;
}

int 
StrPtr::SCompareF( unsigned char a, unsigned char b )
{
	return caseUse == ST_UNIX ? a-b : tolowerq( a ) - tolowerq( b );
}

int 
StrPtr::SEqualF( unsigned char a, unsigned char b )
{
	return caseUse == ST_WINDOWS ? tolowerq( a ) == tolowerq( b ) : a==b;
}

/*
 * StrPtr::Atoi64() - our own strtoll()
 * StrPtr::Itoa64() - a cheesy sprintf()
 */

P4INT64
StrPtr::Atoi64( const char *p )
{
	P4INT64 value = 0;
	int sign = 0;

	/* Skip any leading blanks.  */

	while( isAspace( p ) )
	    ++p;

	/* Check for a sign.  */

	if( *p == '+' )
	    ++p;
	else if( *p == '-' ) 
	    ++p, sign = 1;

	/* Convert the digits */

	while( isAdigit( p ) )
	    value = value * 10 + ( *p++ - '0' );

	/* Add the sign */

	return sign ? -value : value;
}

char *
StrPtr::Itoa64( P4INT64 v, char *buffer )
{
	// Our own cheesy sprintf( buf, "%d", v );
	// We work backwards from the end of the buffer.
	// We return the beginning of the buffer

	int neg;
	if( neg = ( v < 0 ) )
	    v = -v;

	*--buffer = 0;

	do
	{
	    *--buffer = ( v % 10 ) + '0';
	    v = v / 10;
	}
	while( v );

	if( neg )
	    *--buffer = '-';

	return buffer;
}

char *
StrPtr::Itox( unsigned int v, char *buffer )
{
	// Our own cheesy sprintf( buf, "%x", v );
	// We work backwards from the end of the buffer.
	// We return the beginning of the buffer

	*--buffer = 0;

	do
	{
	    *--buffer = StrOps::OtoX( v & 0xf );
	    v >>= 4;
	}
	while( v );

	*--buffer = 'x';
	*--buffer = '0';

	return buffer;
}

// A legal number follows the simple grammar: ( )*[+-]\d+
//
bool
StrPtr::IsNumeric() const
{
	const char *p = Text();

	while( isAspace( p ) ) ++p;

	if( *p == '+' || *p == '-' ) ++p;

	const char *q = p;

	while( isAdigit( p ) ) ++p;

	return *p == '\0' && ( p - q ) > 0;
}

int
StrPtr::EndsWith( const char *s, int l ) const
{
	if( Length() < l )
	    return 0;

	const char *p = Text() + ( Length() - l );

	while( l-- > 0 )
	    if( *p++ != *s++ )
	        return 0;

	return 1;
}

/*
 * StrBuf::Append() - append to a buffer
 */

void
StrBuf::Append( const char *buf, p4size_t len )
{
	// This code is the functional equivalent of:
	//
	//	Extend( buf, len );
	//	Terminate();
	//

	char *s = Alloc( len + 1 );
	memmove( s, buf, len );
	s[ len ] = '\0'; 
	--length;
}

void
StrBuf::Append( const char *buf )
{
	// Use buf's EOS
	int len = strlen( buf ) + 1;
	memmove( Alloc( len ), buf, len );
	--length;
}

void
StrBuf::Append( const StrPtr *t )
{
	char *s = Alloc( t->Length() + 1 );
	memmove( s, t->Text(), t->Length() );
	s[ t->Length() ] = '\0'; 
	--length;
}

void
StrBuf::UAppend( const char *buf, p4size_t len )
{
	// This code is the functional equivalent of:
	//
	//	Extend( buf, len );
	//	Terminate();
	//

	char *s = Alloc( len + 1 );
	memcpy( s, buf, len );
	s[ len ] = '\0'; 
	--length;
}

void
StrBuf::UAppend( const char *buf )
{
	// Use buf's EOS
	int len = strlen( buf ) + 1;
	memcpy( Alloc( len ), buf, len );
	--length;
}

void
StrBuf::UAppend( const StrPtr *t )
{
	char *s = Alloc( t->Length() + 1 );
	memcpy( s, t->Text(), t->Length() );
	s[ t->Length() ] = '\0'; 
	--length;
}

/*
 * StrBuf::Grow() - grow a string buffer
 */

void
StrBuf::Grow( p4size_t oldlen )
{
	// Resize?
	// If Growing an existing string, double needed size ('cause
	// we'll grow again).  But if allocating a string to begin
	// with, add an extra byte for the null terminator that gets
	// tacked on.

	size = length;

	if( buffer != nullStrBuf )
	{
	    // geometric growth
	    size = ( size + 30 ) * 3 / 2;
	    char *o = buffer;
	    buffer = new char[ size ];
	    memcpy( buffer, o, oldlen );
	    delete []o;
	}
	else
	{
	    if( size < 4096 )
		size += 1;
	    buffer = new char[ size ];
	}
}

/*
 * StrBuf::BlockAppend() - append a large block to a buffer
 */

void
StrBuf::BlockAppend( const char *buf, p4size_t len )
{
	// This code is the functional equivalent of:
	//
	//	Extend( buf, len );
	//	Terminate();
	//
	// except that it doesn't over-allocate

	char *s = BlockAlloc( len + 1 );
	memmove( s, buf, len );
	s[ len ] = '\0'; 
	--length;
}

void
StrBuf::BlockAppend( const char *buf )
{
	// Use buf's EOS
	int len = strlen( buf ) + 1;
	memmove( BlockAlloc( len ), buf, len );
	--length;
}

void
StrBuf::BlockAppend( const StrPtr *t )
{
	char *s = BlockAlloc( t->Length() + 1 );
	memmove( s, t->Text(), t->Length() );
	s[ t->Length() ] = '\0'; 
	--length;
}

void
StrBuf::UBlockAppend( const char *buf, p4size_t len )
{
	// This code is the functional equivalent of:
	//
	//	Extend( buf, len );
	//	Terminate();
	//
	// except that it doesn't over-allocate

	char *s = BlockAlloc( len + 1 );
	memcpy( s, buf, len );
	s[ len ] = '\0'; 
	--length;
}

void
StrBuf::UBlockAppend( const char *buf )
{
	// Use buf's EOS
	int len = strlen( buf ) + 1;
	memcpy( BlockAlloc( len ), buf, len );
	--length;
}

void
StrBuf::UBlockAppend( const StrPtr *t )
{
	char *s = BlockAlloc( t->Length() + 1 );
	memcpy( s, t->Text(), t->Length() );
	s[ t->Length() ] = '\0'; 
	--length;
}

/*
 * StrBuf::Reserve() - Ensure that there is the requested  space
 * - don't over-allocate
 * - intended for large requests (>= 128 KB)
 */

void
StrBuf::Reserve( p4size_t oldlen )
{
	size = length;

	if( buffer != nullStrBuf )
	{
	    // geometric growth
	    char *o = buffer;
	    buffer = new char[ size ];
	    memcpy( buffer, o, oldlen );
	    delete []o;
	}
	else
	{
	    buffer = new char[ size ];
	}
}

void
StrBuf::Fill(const char *buf, p4size_t len)
{
    if (len > Length() )
        len = Length();
    memset( Text(), *buf, len);
}

void
StrBuf::TruncateBlanks()
{
	const char *p = buffer;
	const char *blank = 0;
	while( *p )
	{
	    if( *p != ' ' )
		blank = 0;
	    else if( !blank )
	        blank = p;

	    p++;
	}
	if( blank )
	{
	    SetEnd( (char *)blank );
	    Terminate();
	}
}

void
StrBuf::TrimBlanks()
{
	const char *start = buffer;
	const char *blank = 0;
	while( *start == ' ' )
	    start++;

	const char *p = start;
	while( *p )
	{
	    if( *p != ' ' )
		blank = 0;
	    else if( !blank )
	        blank = p;

	    p++;
	}
	int l = blank ? blank - start : p - start;
	if( l != length )
	{
	    memmove( buffer, start, l );
	    buffer[ l ] = '\0'; 
	    length = l;
	}
}

/**
 * StrBuf::CompressTail
 *
 * @brief Chop the common substring off tail and
 *        prepend with the offset value of the substring
 *        in the passed string.
 *
 * @param the string to match
 * @return offset into string argument
 *         -1 if error
 *         0 if not compressed
 */
int
StrBuf::EncodeTail( StrPtr &s, const char *replaceBytes )
{
	/*
	 * If replaceBytes is NULL, then ASSUME we shall compress
	 * the string. If replaceBytes is not NULL and the contents
	 * does not match the first two bytes of our buffer (e.g. "//")
	 * then we return assuming that the buffer has already been
	 * compressed. (This happens in DbOpen::Put since the record
	 * is written in two parts, the keys then the non key columns).
	 * To compress our string, compare a string with a supplied argument s.
	 * Find the count of trailing identical characters then
	 * truncate the common substring from the end. The
	 * get the offset value for the common string into the supplied
	 * argument, convert to 2-byte hex value and prepend to the leading
	 * non-matching piece. (max offset 255 for FF)
	 */

	// job075871: advance past the client name so that substring
	// is not considered in the match process.
	register int i = 2;
	while( i < s.Length() && s.buffer[i] != '/' )
	    i++;
	if( s.buffer[i] != '/' )
	    return 0;

	register int shortest = length < (s.Length() - i)? length : (s.Length() - i);
	// bail early if one string is empty
	if( shortest == 0 )
	    return 0;

	// reset counter
	i = 0;

	register const unsigned char *a = (const unsigned char *)(s.buffer + s.Length() - 1);
	register const unsigned char *b = (const unsigned char *)(buffer + length - 1);

	if( replaceBytes && strncmp(buffer, replaceBytes, 2) != 0)
		return 0;
	while( i < shortest && *a == *b )
	{
	    a--;
	    b--;
	    i++;
	}

	// sanity check
	if ( i > (length - 2) )
	{
	    // If client path contains full depot path
	    // back off a character so that we have enough
	    // room to store offset.
	    if( i == (length - 1) )
		i--;
	    else
		return -1;
	}

	/*
	 * get the offset value into the passed StrPtr where the
	 * common substring starts.
	 */
	register int v = s.Length() - i;

	if ((i == 0) || (v > 255) )
	{
	    // nothing to compress or
	    // the offset is too large do not compress the tail
	    return 0;
	}

	// chop off common part of path
	length -= i;
	Terminate();

	// prepend this value to the first 2 bytes of our buffer
	*(buffer+1) = StrOps::OtoX( v & 0xf ); v >>= 4;
	*(buffer)   = v ? StrOps::OtoX( v & 0xf ) : '0';
	return (s.Length() - i);
}

int
StrBuf::DecodeTail( StrPtr &s, const char *replaceBytes )
{
	/*
	 * If replaceBytes is NULL, then ASSUMES a compressed string.
	 * If replaceBytes is not NULL and contains the
	 * identical first two bytes of our buffer (e.g. "//")
	 * then we return assuming that the buffer is uncompressed.
	 * This StrBuf will may grow to accommodate the common
	 * substring which will be appended.  If replacedBytes
	 * is non-NULL then the first two bytes will replace
	 * those currently in the buffer.
	 */

	/* Sanity Checks */

	// bail early if target string is empty
	if( s.Length() == 0 )
	    return -1;

	// bail if not enough room for offset in source
	if( length < 2 )
	    return 0;

	// bail if passed replacement string and it has already been replaced
	if( replaceBytes && strlen(replaceBytes) >= 2 && strncmp(buffer, replaceBytes, 2) == 0)
	    return 0;

	int v = ( StrOps::XtoO( buffer[0] ) << 4 )
	      | ( StrOps::XtoO( buffer[1] ) << 0 );

	// santity check offset value
	if( v <= 2 || v > 255 )
	    return -1;

	if( replaceBytes && strlen(replaceBytes) >= 2 )
	{
	    buffer[0] = replaceBytes[0];
	    buffer[1] = replaceBytes[1];
	}
	if( s.Length() >= v )
	    Append( s.buffer + v );
	else
	    return -1;
	return v;
}

void
StrBuf::Compress( StrPtr *s )
{
	// Compare a string with a supplied argument 
	// Find the count of leading identical characters then
	// convert to 2-byte hex value and append the trailing 
	// non-matching piece.  (max 255 for FF)

	register const unsigned char *a = (const unsigned char *)buffer;
	register const unsigned char *b = (const unsigned char *)s->buffer;
	register int n = length;
	register int i = 0;

	while( n && *a && *a == *b && ++i < 256 )
	    ++a, ++b, --n;

	// leading characters in common (max 255)

	int v = ( length - n );
	int newSize = (length - v) + 4;
	int l = v;

	char *newBuffer = new char[ newSize ];
	*(newBuffer+1) = StrOps::OtoX( v & 0xf ); v >>= 4;
	*(newBuffer)   = v ? StrOps::OtoX( v & 0xf ) : '0';
	memcpy( newBuffer + 2, buffer + l, n );
	newBuffer[ n + 2 ] = '\0';

	delete []buffer;

	buffer = newBuffer;
	length = n + 2;
	size = newSize;
}

void
StrBuf::UnCompress( StrPtr *s )
{
	// ASSUMES a compressed string, note uncompressed size
	// could be much bigger since Alloc uses Grow(), but
	// typically this strbuf will be copied into another
	// record format where it will be correctly sized.

	register int n = length;

	int v = ( StrOps::XtoO( buffer[0] ) << 4 )
	      | ( StrOps::XtoO( buffer[1] ) << 0 );

	int extra = v - 2;

	if( extra > 0 )
	    Alloc( extra + 1 );

	memmove( buffer + v, buffer + 2, n - 2 );
	memcpy( buffer, s->buffer, v );
	buffer[ n + extra ] = '\0';
	length = n + extra;
}

void
StrFixed::SetBufferSize( p4size_t l )
{
	if( length == l )
	    return;

	delete []buffer;
	this->length = l;
	this->buffer = new char[ l ];
}

/*
 * Converts a 64-bit integer to a "human-readable" format. Is passed the
 * end of the buffer, works backward, returns the beginning of the buffer.
 */
char *
StrHuman::Itoa64( P4INT64 v, char *buffer, int f )
{
	static const char suffix[] = "BKMGTP";
	P4INT64 factor = f;
 
	P4INT64 current = v;
	int     remainder = 0;
	const char *s = suffix;
 
	for( ; s < suffix + 6; s++)
	{
	    if( current < factor )
	        break;
	    else
	    {
	        remainder = (current * 100 / factor ) % 100;
	        current = current / factor;
	    }
	}
	// remainder is a percentage (0 .. 99). Now round that to the nearest
	// 10, and if we get a 10, carry that up to increment current.

# if OS_NT
	// Visual Studio versions prior to VS2013 don't have "round"...
	remainder = (int)floor( ( remainder / (double)10 ) + 0.5 );
# else
	remainder = (int)round( remainder / (double)10 );
# endif

	*--buffer = 0;
	*--buffer = *s;
 
	if( remainder == 10 )
	    current++;
	else if( remainder )
	{
	    do
	    {
	        *--buffer = ( remainder % 10 ) + '0';
	        remainder = remainder / 10;
	    }
	    while( remainder );
 
	    *--buffer = '.';
	}
 
	do
	{
	    *--buffer = ( current % 10 ) + '0';
	    current = current / 10;
	}
	while( current );
 
	return buffer;
}
#	Change	User	Description
#1	21443	dannyz_snps	"Forking branch 2016-1 of perforce_software-p4 to dannyz_snps-p4."
//guest/perforce_software/p4/2016-1/support/strbuf.cc
#1	19472	Liz Lam	Initial add of the 2016.1 p4/p4api source code.