RcsReader.java #1

package com.perforce.cvs.parser;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.perforce.common.asset.ContentProperty;
import com.perforce.config.CFG;
import com.perforce.config.Config;
import com.perforce.config.ConfigException;
import com.perforce.cvs.parser.rcstypes.RcsObject;
import com.perforce.cvs.parser.rcstypes.RcsObjectAdmin;
import com.perforce.cvs.parser.rcstypes.RcsObjectBlock;
import com.perforce.cvs.parser.rcstypes.RcsObjectDelta;
import com.perforce.cvs.parser.rcstypes.RcsObjectNum;

public class RcsReader {

	private Logger logger = LoggerFactory.getLogger(RcsReader.class);

	private File rcsFile;
	private String rcsPath;
	private RcsObjectAdmin rcsAdmin;
	private RcsObject rcsDesc;
	private Map<String, RcsObjectDelta> rcsDeltas = new HashMap<String, RcsObjectDelta>();
	private List<ContentProperty> props = new ArrayList<ContentProperty>();

	private CvsLineReader cvsLineReader;

	public RcsReader(File file, boolean getContent) throws Exception {
		rcsFile = file;
		rcsPath = parseBasePath();
		rcsAdmin = new RcsObjectAdmin();
		cvsLineReader = new CvsLineReader(rcsFile.toString());

		// parse permission bits
		if (file.canExecute()) {
			props.add(ContentProperty.EXECUTE);
		}

		parseRcsAdmin();
		if (logger.isTraceEnabled()) {
			logger.trace(rcsAdmin.toString());
		}

		RcsObjectDelta rcsObject = parseRcsDeltas();
		while (!rcsObject.isEmpty()) {

			if (rcsObject.containsKey(RcsSchema.DATE)) {
				rcsDeltas.put(rcsObject.getID().toString(), rcsObject);
			}

			if (rcsObject.containsKey(RcsSchema.DESC)) {
				// Exit early if content is not needed
				if(!getContent) {
					break;
				}
				rcsDesc = rcsObject;
			}

			// find log and add to rcsDeltas matching the same ID key
			if (rcsObject.containsKey(RcsSchema.LOG)) {
				String key = rcsObject.getID().toString();
				if (rcsDeltas.containsKey(key)) {
					RcsObjectDelta set = rcsDeltas.get(key);
					set.add(RcsSchema.LOG, rcsObject.getLog());
					rcsDeltas.put(key, set);
				}
			}

			if (rcsObject.containsKey(RcsSchema.TEXT)) {
				String key = rcsObject.getID().toString();
				if (rcsDeltas.containsKey(key)) {
					RcsObjectDelta set = rcsDeltas.get(key);
					set.add(RcsSchema.TEXT, rcsObject.getBlock());
					rcsDeltas.put(key, set);
				}
			}

			rcsObject = parseRcsDeltas();
		}
		cvsLineReader.close();
	}

	public RcsObjectAdmin getAdmin() {
		return rcsAdmin;
	}

	public ArrayList<RcsObjectNum> getIDs() {
		ArrayList<RcsObjectNum> list = new ArrayList<RcsObjectNum>();
		for (String key : rcsDeltas.keySet()) {
			list.add(new RcsObjectNum(key));
		}
		return list;
	}

	public RcsObjectDelta getDelta(RcsObjectNum id) {
		String key = id.toString();
		RcsObjectDelta delta = rcsDeltas.get(key);
		return delta;
	}

	public RcsObject getDesc() {
		return rcsDesc;
	}

	/**
	 * Returns a File object to the RCS ',v' file.
	 * 
	 * @return
	 */
	public File getRcsFile() {
		return rcsFile;
	}

	private void parseRcsAdmin() throws Exception {
		String line = getLine();
		StringBuffer sb = new StringBuffer();

		while (line != null) {
			// drop out on empty line
			if (line.isEmpty()) {
				if (logger.isTraceEnabled()) {
					logger.trace("end of admin");
				}
				return;
			}

			// add all phrases in line
			if (line.endsWith(";")) {
				sb.append(line);
				parsePhrase(sb.toString(), rcsAdmin);
				sb = new StringBuffer();
			} else {
				sb.append(line);
			}

			// get next line
			line = getLine();
		}
	}

	private String getLine() throws IOException {
		String line = cvsLineReader.getLine();
		return line;
	}

	private RcsObjectDelta parseRcsDeltas() throws Exception {
		RcsObjectDelta rcsObject = new RcsObjectDelta();

		// find and read delta number
		String line = getLine();
		StringBuffer sb = new StringBuffer();

		while (line != null) {
			// block might be delta e.g. 1.1
			if (line.contains(".")) {
				rcsObject.add(RcsSchema.ID, line);
				line = getLine();
				break;
			}

			// block might be a description e.g. desc
			if (line.startsWith("desc")) {
				String log = parseLog();
				rcsObject.add(RcsSchema.DESC, log);
				line = getLine();
				break;
			}
			line = getLine();
		}

		while (line != null) {
			// drop out on empty line (end of delta block)
			if (line.isEmpty()) {
				if (logger.isTraceEnabled()) {
					logger.trace("end of delta");
				}
				return rcsObject;
			}

			// add all phrases in line
			if (rcsObject.containsKey(RcsSchema.DATE)) {
				if (line.endsWith(";")) {
					sb.append(line);
					parsePhrase(sb.toString(), rcsObject);
					sb = new StringBuffer();
				} else {
					sb.append(line);
				}
			} else {
				parsePhrase(line, rcsObject);
			}

			// get next line
			line = getLine();
		}
		return rcsObject;
	}

	private void parsePhrase(String line, RcsObject rcs) throws Exception {

		// The "(?<=;)" is a cleaver (positive lookbehind) regex that leaves
		// the ';' in the string
		String[] phrases = line.split(";");
		for (String phrase : phrases) {

			// tidy up phrase
			phrase = phrase.trim();

			// split into key/value pairs
			String args[] = phrase.split("\\s+");

			// find key and detect if there is a value
			RcsSchema type = RcsSchema.parse(args[0]);

			// store remainder
			int pos = line.indexOf(args[0]) + args[0].length();
			String remainder = line.substring(pos);

			// process value for key
			switch (type) {
			case LOG:
				String log = parseLog();
				rcs.add(type, log);
				break;

			case TEXT:
				cvsLineReader.returnLine(remainder);
				RcsObjectBlock block = parseText();
				rcs.add(type, block);
				break;

			case SYMBOLS:
				rcs.add(type, phrase);
				break;

			case BRANCHES:
				StringBuffer sb = new StringBuffer();
				for (int i = 1; i < args.length; i++) {
					sb.append(args[i] + " ");
				}
				rcs.add(type, sb.toString());
				break;

			case COMMENT:
			case EXPAND:
				if (line.contains("@")) {
					int begin = line.indexOf("@") + 1;
					int end = line.lastIndexOf("@");
					String comment = line.substring(begin, end);
					rcs.add(type, comment);
					return;
				} else {
					rcs.add(type, "");
				}

			case BRANCH:
				if (args.length > 1) {
					// parse remainder for values
					String r = args[1];
					r = r.trim();
					rcs.add(type, r);
				}
				break;

			default:
				if (args.length > 1) {
					// parse remainder for values
					String r = args[1];
					r = r.trim();
					rcs.add(type, r);
				} else {
					rcs.add(type, "");
				}
				break;
			}
		}
	}

	private String parseLog() throws Exception {
		StringBuffer log = new StringBuffer();

		String line = getLine();
		if (!line.startsWith("@"))
			return null;

		// remove starting '@'
		line = line.substring(1);

		while (line != null) {
			// check for terminating '@'
			String end = line.replaceAll("@@", "_");
			if (end.contains("@")) {
				int pos = end.indexOf("@");
				if (!end.endsWith("@")) {
					String remainder = end.substring(pos + 1);
					cvsLineReader.returnLine(remainder);
				}
				line = line.replaceAll("@@", "@");
				line = line.substring(0, pos);
				log.append(line);
				break;
			} else {
				line = line.replaceAll("@@", "@");
				log.append(line);
				log.append("\n");
				line = getLine();
			}
		}
		return log.toString();
	}

	/**
	 * Check the buffer starts with an '@' and return a buffer less the starting
	 * '@', else null.
	 * 
	 * @param buf
	 * @return
	 */
	private ByteArrayOutputStream startAtpersand(ByteArrayOutputStream buf) {
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		byte[] b = buf.toByteArray();
		if (b[0] == '@') {
			out.write(b, 1, buf.size() - 1);
			return out;
		} else {
			return null;
		}
	}

	private ByteArrayOutputStream decodeAtpersand(ByteArrayOutputStream buf) {
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		byte last = '\0';
		for (byte b : buf.toByteArray()) {
			if (b == '@' && last == '@') {
				// don't write and invalidate last char to process @@@@ -> @@
				last = '\0';
			} else {
				out.write(b);
				last = b;
			}

		}
		return out;
	}

	private boolean endAtpersand(ByteArrayOutputStream buf)
			throws ConfigException {
		int size = buf.size();

		// exit early if less than 2 chars
		if (size < 2) {
			return false;
		}

		byte[] bytes = buf.toByteArray();

		// count '@' in line, if even then not end '@'
		int count = 0;
		for (byte b : bytes) {
			if (b == '@') {
				count++;
			}
		}
		if ((count % 2) != 0) {
			return true;
		}

		return false;
	}

	private RcsObjectBlock parseText() throws Exception {
		RcsObjectBlock lines = new RcsObjectBlock();

		// to help with debug
		int sum = 0;
		StringBuffer sb = new StringBuffer();

		// check and remove starting '@'
		ByteArrayOutputStream line = cvsLineReader.getData();
		line = startAtpersand(line);
		if (line == null)
			return null;

		while (line != null) {
			// replace '@@' with '@'
			ByteArrayOutputStream clean = new ByteArrayOutputStream();
			clean = decodeAtpersand(line);

			// exit if ending with '@\n'
			if (endAtpersand(line)) {
				ByteArrayOutputStream out = new ByteArrayOutputStream();
				// copy buffer, but trim ending '@\n' chars
				out.write(clean.toByteArray(), 0, clean.size() - 2);
				if (out.size() > 0) {
					lines.add(out);
					if (logger.isTraceEnabled()) {
						sum += out.size();
						sb.append("parse:");
						sb.append(out.size());
						sb.append(":");
						sb.append(sum);
						sb.append(":END");
					}
				}
				break;
			} else {
				lines.add(clean);
				if (logger.isTraceEnabled()) {
					sum += clean.size();
					sb.append("parse:");
					sb.append(clean.size());
					sb.append(":");
					sb.append(sum);
					sb.append(" ");
				}
			}
			// get next line;
			line = cvsLineReader.getData();
		}
		if (logger.isTraceEnabled()) {
			logger.trace(sb.toString());
			logger.trace("total[" + lines.size() + "] " + sum);
		}
		return lines;
	}

	private String parseBasePath() throws Exception {
		String cvsroot = (String) Config.get(CFG.CVS_ROOT);
		String module = (String) Config.get(CFG.CVS_MODULE);

		String base = getRcsFile().getAbsolutePath();
		// remove CVSROOT from path
		if (base.startsWith(cvsroot)) {
			base = base.substring(cvsroot.length());
		}
		// remove MODULE from path
		if (base.startsWith(module)) {
			base = base.substring(module.length());
		}
		// remove leading '/'
		if (base.startsWith("/")) {
			base = base.substring(1);
		}
		// remove ',v' extension
		if (base.endsWith(",v")) {
			base = base.substring(0, base.lastIndexOf(",v"));
		}
		// remove attic from base path
		if (base.contains("Attic")) {
			int p = base.lastIndexOf("Attic");
			base = base.substring(0, p) + base.substring(p + 6);
		}
		return base;
	}

	public String getPath() {
		return rcsPath;
	}

	public List<ContentProperty> getProps() {
		return props;
	}
}

#	Change	User	Description
#1	13876	Paul Allen	Rename/move file(s)
//guest/paul_allen/p4convert-maven/src/com/perforce/cvs/parser/RcsReader.java
#1	13873	Paul Allen	Branching using p4convert-maven
//guest/perforce_software/p4convert/src/com/perforce/cvs/parser/RcsReader.java
#14	12440	Paul Allen	CVS: More efficient parsing of RCS files.
#13	12195	Paul Allen	CVS - Use the RCS 'expand' field to detect BINARY files. If set this will take precedence over the type map. - Added test case061
#12	11760	Paul Allen	Use default label description and append label type. Previously I used the change description, which is ok for Automatic labels, but Static labels may include more than one change. Includes: - Fix parsing of RCS descriptions (support descriptions without new line) - Update to test cases.
#11	11745	Paul Allen	CVS: Parse non-standard RCS file, where 'log' and 'text' are on the same line. Support alternative delete method - added test case 054 to support parse changes - added test case 055 to support alternatice delete method
#10	11457	Paul Allen	CVS: Support empty an 'branch' symbol. Generally the branch symbol is only added with an ID, however the RCS spec does not specify that an ID is mandatory. This change is to support manufactured CVS data from 3rd party import tools. - 'symbol' test case053
#9	11064	Paul Allen	CVS: added detection and support for +x revisions - extended testcase 040 to test for exec bits
#8	10919	Paul Allen	CVS parse detection of 'comment' in RCS header. - Includes test case 049 parse-comment
#7	10774	Paul Allen	CVS: Updated end of RCS content block detection. An @ character at the end of the line followed by a blank line could fool the converter into exiting the block early. Detection now uses original line with expanded @@ and counts odd/even. Added test case 048
#6	10719	Paul Allen	CVS: Activate old label code and add config option. disabled by default -- for the moment until it works (set log level from trace -> debug)
#5	10655	Paul Allen	Fixed buffered writer. Fills buffer with one or more lines upto 8K and then save remainder. Includes line/EOF (null) detection.
#4	10653	Paul Allen	Debugging data for low level byte operations.
#3	10499	Paul Allen	CVS TestCase: binary-file Fix decodeAtpersand() method to handle a byte stream with "@@@@" to produce "@@". - Added Import mode testcase 032
#2	10497	Paul Allen	New low-level RCS reader using a byte[] to manage CVS lines. Designed to help with the processing of BINARY data in RCS files. The line reading code still looks for a unix style '\n', but has a MAX LINE (hard coded to 10K). The RcsObjectBlock uses a ByteArrayOutputStream to store lines and parsers uses byte logic. (passes basic cvs/svn unit tests)
#1	9807	Paul Allen	Initial import of p4-convert (from change 894340)