/*
* Copyright 2001 Perforce Software. All rights reserved.
*
* This file is part of Perforce - the FAST SCM System.
*/
class CharSetUTF8Valid;
class CharStep;
class StrPtr;
class StrBuf;
class StrDict;
/*
* CharSetCvt.h - Character set converters
*/
class CharSetCvt : public CharSetApi {
public:
enum Errors {
NONE = 0, NOMAPPING, PARTIALCHAR
};
static CharSetCvt *FindCvt(CharSet from, CharSet to);
// do not delete CharSetCvt* returned by FindCachedCvt. They are kept in a global cache
static CharSetCvt *FindCachedCvt(CharSet from, CharSet to);
virtual ~CharSetCvt();
// If you call reverse or clone you must delete the charset
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
virtual int LastErr();
virtual void ResetErr();
/* convert buffer into an allocated buffer, caller must free result */
virtual char *CvtBuffer(const char *, int len, int *retlen = 0);
/* convert buffer into an managed buffer, caller must copy result
out before calling this again */
virtual const char *FastCvt(const char *, int len, int *retlen = 0);
/* convert buffer into an managed buffer, caller must copy result
out before calling this again - substitute '?' for bad mappings */
virtual const char *FastCvtQues(const char *, int len, int *retlen = 0);
virtual void IgnoreBOM();
void ResetCnt() { linecnt = 1; charcnt = 0; }
int LineCnt() { return linecnt; }
int CharCnt() { return charcnt; }
static int Utf8Fold( const StrPtr *, StrBuf * );
struct MapEnt {
unsigned short cfrom, cto;
};
static char bytesFromUTF8[];
static unsigned long offsetsFromUTF8[];
static unsigned long minimumFromUTF8[];
protected:
friend class CharSetCvtCache; // for the following default constructor
CharSetCvt() : lasterr(0), linecnt(1), charcnt(0), fastbuf(0), fastsize(0)
{}
int lasterr;
int linecnt;
int charcnt;
void doverify( MapEnt *, int, MapEnt *, int );
void dodump( MapEnt *, int );
virtual void printmap( unsigned short, unsigned short, unsigned short );
virtual void printmap( unsigned short, unsigned short );
virtual CharStep *FromCharStep(char *);
static unsigned short MapThru( unsigned short, const MapEnt *,
int, unsigned short );
private:
char *fastbuf;
int fastsize;
CharSetCvt(const CharSetCvt &); // to prevent copys
void operator =(const CharSetCvt &); // to prevent assignment
};
class CharSetCvtFromUTF8 : public CharSetCvt {
protected:
CharSetCvtFromUTF8() : checkBOM(0) {}
virtual void IgnoreBOM();
virtual CharStep *FromCharStep( char * );
int checkBOM;
};
class CharSetCvtUTF8UTF8 : public CharSetCvtFromUTF8 {
public:
CharSetCvtUTF8UTF8(int dir, int f);
~CharSetCvtUTF8UTF8();
// Direction 1 to client, -1 to server
// flags are...
#define UTF8_WRITE_BOM 1
#define UTF8_VALID_CHECK 2
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
private:
int direction;
int flags;
CharSetUTF8Valid *validator;
};
class CharSetCvtUTF16 : public CharSetCvtFromUTF8 {
protected:
CharSetCvtUTF16(int, int);
int invert, fileinvert;
int bom;
virtual void IgnoreBOM();
};
class CharSetCvtUTF816 : public CharSetCvtUTF16 {
public:
CharSetCvtUTF816(int i = -1, int b = 0) : CharSetCvtUTF16(i, b) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
};
class CharSetCvtUTF168 : public CharSetCvtUTF16 {
public:
CharSetCvtUTF168(int i = -1, int b = 0) : CharSetCvtUTF16(i, b) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
};
class CharSetCvtUTF832 : public CharSetCvtUTF16 {
public:
CharSetCvtUTF832(int i = -1, int b = 0) : CharSetCvtUTF16(i, b) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
};
class CharSetCvtUTF328 : public CharSetCvtUTF16 {
public:
CharSetCvtUTF328(int i = -1, int b = 0) : CharSetCvtUTF16(i, b) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
};
class CharSetCvtUTF8to8859_1 : public CharSetCvtFromUTF8 {
public:
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
};
class CharSetCvt8859_1toUTF8 : public CharSetCvt {
public:
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
};
class CharSetCvtUTF8toShiftJis : public CharSetCvtFromUTF8 {
public:
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
private:
static MapEnt UCS2toShiftJis[];
friend void verifymaps();
friend void dumpmaps();
void mapreport(MapEnt *, int);
void mapreport();
virtual void printmap( unsigned short, unsigned short, unsigned short );
virtual void printmap( unsigned short, unsigned short );
static int MapCount();
};
class CharSetCvtShiftJistoUTF8 : public CharSetCvt {
public:
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
virtual CharStep *FromCharStep( char * );
private:
static MapEnt ShiftJistoUCS2[];
friend void verifymaps();
friend void dumpmaps();
void mapreport(MapEnt *, int);
void mapreport();
virtual void printmap( unsigned short, unsigned short, unsigned short );
virtual void printmap( unsigned short, unsigned short );
static int MapCount();
};
class CharSetCvtUTF8toEUCJP : public CharSetCvtFromUTF8 {
public:
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
private:
static MapEnt UCS2toEUCJP[];
friend void verifymaps();
friend void dumpmaps();
void mapreport(MapEnt *, int);
void mapreport();
virtual void printmap( unsigned short, unsigned short, unsigned short );
virtual void printmap( unsigned short, unsigned short );
static int MapCount();
};
class CharSetCvtEUCJPtoUTF8 : public CharSetCvt {
public:
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
virtual CharStep *FromCharStep( char * );
private:
static MapEnt EUCJPtoUCS2[];
friend void verifymaps();
friend void dumpmaps();
void mapreport(MapEnt *, int);
void mapreport();
virtual void printmap( unsigned short, unsigned short, unsigned short );
virtual void printmap( unsigned short, unsigned short );
static int MapCount();
};
struct SimpleCharSet {
const CharSetCvt::MapEnt *toMap;
int toMapSize;
const unsigned short *fromMap;
int fromOffset;
};
class CharSetCvtUTF8toSimple : public CharSetCvtFromUTF8 {
public:
CharSetCvtUTF8toSimple(int);
CharSetCvtUTF8toSimple(const SimpleCharSet *s) : charinfo(s) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
private:
const SimpleCharSet *charinfo;
};
class CharSetCvtSimpletoUTF8 : public CharSetCvt {
public:
CharSetCvtSimpletoUTF8(int);
CharSetCvtSimpletoUTF8(const SimpleCharSet *s) : charinfo(s) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
private:
const SimpleCharSet *charinfo;
};
class CharSetCvtUTF8toCp : public CharSetCvtFromUTF8 {
protected:
CharSetCvtUTF8toCp( const MapEnt *tMap, int toSz )
: toMap(tMap), toMapSize(toSz) {}
public:
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
private:
const MapEnt *toMap;
int toMapSize;
virtual void printmap( unsigned short, unsigned short, unsigned short );
virtual void printmap( unsigned short, unsigned short );
};
class CharSetCvtUTF8toCp949 : public CharSetCvtUTF8toCp
{
public:
CharSetCvtUTF8toCp949() : CharSetCvtUTF8toCp( UCS2toCp949, MapCount() ) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
static int MapCount();
private:
static MapEnt UCS2toCp949[];
friend void verifymaps();
friend void dumpmaps();
void mapreport( MapEnt *, int );
void mapreport();
};
class CharSetCvtUTF8toCp936 : public CharSetCvtUTF8toCp
{
public:
CharSetCvtUTF8toCp936() : CharSetCvtUTF8toCp( UCS2toCp936, MapCount() ) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
static int MapCount();
private:
static MapEnt UCS2toCp936[];
friend void verifymaps();
friend void dumpmaps();
void mapreport( MapEnt *, int );
void mapreport();
};
class CharSetCvtUTF8toCp950 : public CharSetCvtUTF8toCp
{
public:
CharSetCvtUTF8toCp950() : CharSetCvtUTF8toCp( UCS2toCp950, MapCount() ) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
static int MapCount();
private:
static MapEnt UCS2toCp950[];
friend void verifymaps();
friend void dumpmaps();
void mapreport( MapEnt *, int );
void mapreport();
};
class CharSetCvtCptoUTF8 : public CharSetCvt {
protected:
CharSetCvtCptoUTF8( const MapEnt *tMap, int toSz )
: toMap(tMap), toMapSize(toSz) {}
public:
virtual int Cvt(const char **sourcestart, const char *sourceend,
char **targetstart, char *targetend);
private:
const MapEnt *toMap;
int toMapSize;
virtual int isDoubleByte( int leadByte ) = 0;
virtual void printmap( unsigned short, unsigned short, unsigned short );
virtual void printmap( unsigned short, unsigned short );
};
class CharSetCvtCp949toUTF8 : public CharSetCvtCptoUTF8
{
public:
CharSetCvtCp949toUTF8() : CharSetCvtCptoUTF8( Cp949toUCS2, MapCount() ) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
static int MapCount();
CharStep *FromCharStep( char * );
private:
static MapEnt Cp949toUCS2[];
friend void verifymaps();
friend void dumpmaps();
virtual int isDoubleByte( int leadByte );
void mapreport(MapEnt *, int);
void mapreport();
};
class CharSetCvtCp936toUTF8 : public CharSetCvtCptoUTF8
{
public:
CharSetCvtCp936toUTF8() : CharSetCvtCptoUTF8( Cp936toUCS2, MapCount() ) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
static int MapCount();
CharStep *FromCharStep( char * );
private:
static MapEnt Cp936toUCS2[];
friend void verifymaps();
friend void dumpmaps();
virtual int isDoubleByte( int leadByte );
void mapreport(MapEnt *, int);
void mapreport();
};
class CharSetCvtCp950toUTF8 : public CharSetCvtCptoUTF8
{
public:
CharSetCvtCp950toUTF8() : CharSetCvtCptoUTF8( Cp950toUCS2, MapCount() ) {}
virtual CharSetCvt *Clone();
virtual CharSetCvt *ReverseCvt();
static int MapCount();
CharStep *FromCharStep( char * );
private:
static MapEnt Cp950toUCS2[];
friend void verifymaps();
friend void dumpmaps();
virtual int isDoubleByte( int leadByte );
void mapreport(MapEnt *, int);
void mapreport();
};