package com.perforce.cvs.parser; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.perforce.common.asset.ContentProperty; import com.perforce.config.CFG; import com.perforce.config.Config; import com.perforce.config.ConfigException; import com.perforce.cvs.parser.rcstypes.RcsObject; import com.perforce.cvs.parser.rcstypes.RcsObjectAdmin; import com.perforce.cvs.parser.rcstypes.RcsObjectBlock; import com.perforce.cvs.parser.rcstypes.RcsObjectDelta; import com.perforce.cvs.parser.rcstypes.RcsObjectNum; public class RcsReader { private Logger logger = LoggerFactory.getLogger(RcsReader.class); private File rcsFile; private String rcsPath; private RcsObjectAdmin rcsAdmin; private RcsObject rcsDesc; private Map<String, RcsObjectDelta> rcsDeltas = new HashMap<String, RcsObjectDelta>(); private List<ContentProperty> props = new ArrayList<ContentProperty>(); private CvsLineReader cvsLineReader; public RcsReader(File file, boolean getContent) throws Exception { rcsFile = file; rcsPath = parseBasePath(); rcsAdmin = new RcsObjectAdmin(); cvsLineReader = new CvsLineReader(rcsFile.toString()); // parse permission bits if (file.canExecute()) { props.add(ContentProperty.EXECUTE); } parseRcsAdmin(); if (logger.isTraceEnabled()) { logger.trace(rcsAdmin.toString()); } RcsObjectDelta rcsObject = parseRcsDeltas(); while (!rcsObject.isEmpty()) { if (rcsObject.containsKey(RcsSchema.DATE)) { rcsDeltas.put(rcsObject.getID().toString(), rcsObject); } if (rcsObject.containsKey(RcsSchema.DESC)) { // Exit early if content is not needed if(!getContent) { break; } rcsDesc = rcsObject; } // find log and add to rcsDeltas matching the same ID key if (rcsObject.containsKey(RcsSchema.LOG)) { String key = rcsObject.getID().toString(); if (rcsDeltas.containsKey(key)) { RcsObjectDelta set = rcsDeltas.get(key); set.add(RcsSchema.LOG, rcsObject.getLog()); rcsDeltas.put(key, set); } } if (rcsObject.containsKey(RcsSchema.TEXT)) { String key = rcsObject.getID().toString(); if (rcsDeltas.containsKey(key)) { RcsObjectDelta set = rcsDeltas.get(key); set.add(RcsSchema.TEXT, rcsObject.getBlock()); rcsDeltas.put(key, set); } } rcsObject = parseRcsDeltas(); } cvsLineReader.close(); } public RcsObjectAdmin getAdmin() { return rcsAdmin; } public ArrayList<RcsObjectNum> getIDs() { ArrayList<RcsObjectNum> list = new ArrayList<RcsObjectNum>(); for (String key : rcsDeltas.keySet()) { list.add(new RcsObjectNum(key)); } return list; } public RcsObjectDelta getDelta(RcsObjectNum id) { String key = id.toString(); RcsObjectDelta delta = rcsDeltas.get(key); return delta; } public RcsObject getDesc() { return rcsDesc; } /** * Returns a File object to the RCS ',v' file. * * @return */ public File getRcsFile() { return rcsFile; } private void parseRcsAdmin() throws Exception { String line = getLine(); StringBuffer sb = new StringBuffer(); while (line != null) { // drop out on empty line if (line.isEmpty()) { if (logger.isTraceEnabled()) { logger.trace("end of admin"); } return; } // add all phrases in line if (line.endsWith(";")) { sb.append(line); parsePhrase(sb.toString(), rcsAdmin); sb = new StringBuffer(); } else { sb.append(line); } // get next line line = getLine(); } } private String getLine() throws IOException { String line = cvsLineReader.getLine(); return line; } private RcsObjectDelta parseRcsDeltas() throws Exception { RcsObjectDelta rcsObject = new RcsObjectDelta(); // find and read delta number String line = getLine(); StringBuffer sb = new StringBuffer(); while (line != null) { // block might be delta e.g. 1.1 if (line.contains(".")) { rcsObject.add(RcsSchema.ID, line); line = getLine(); break; } // block might be a description e.g. desc if (line.startsWith("desc")) { String log = parseLog(); rcsObject.add(RcsSchema.DESC, log); line = getLine(); break; } line = getLine(); } while (line != null) { // drop out on empty line (end of delta block) if (line.isEmpty()) { if (logger.isTraceEnabled()) { logger.trace("end of delta"); } return rcsObject; } // add all phrases in line if (rcsObject.containsKey(RcsSchema.DATE)) { if (line.endsWith(";")) { sb.append(line); parsePhrase(sb.toString(), rcsObject); sb = new StringBuffer(); } else { sb.append(line); } } else { parsePhrase(line, rcsObject); } // get next line line = getLine(); } return rcsObject; } private void parsePhrase(String line, RcsObject rcs) throws Exception { // The "(?<=;)" is a cleaver (positive lookbehind) regex that leaves // the ';' in the string String[] phrases = line.split(";"); for (String phrase : phrases) { // tidy up phrase phrase = phrase.trim(); // split into key/value pairs String args[] = phrase.split("\\s+"); // find key and detect if there is a value RcsSchema type = RcsSchema.parse(args[0]); // store remainder int pos = line.indexOf(args[0]) + args[0].length(); String remainder = line.substring(pos); // process value for key switch (type) { case LOG: String log = parseLog(); rcs.add(type, log); break; case TEXT: cvsLineReader.returnLine(remainder); RcsObjectBlock block = parseText(); rcs.add(type, block); break; case SYMBOLS: rcs.add(type, phrase); break; case BRANCHES: StringBuffer sb = new StringBuffer(); for (int i = 1; i < args.length; i++) { sb.append(args[i] + " "); } rcs.add(type, sb.toString()); break; case COMMENT: case EXPAND: if (line.contains("@")) { int begin = line.indexOf("@") + 1; int end = line.lastIndexOf("@"); String comment = line.substring(begin, end); rcs.add(type, comment); return; } else { rcs.add(type, ""); } case BRANCH: if (args.length > 1) { // parse remainder for values String r = args[1]; r = r.trim(); rcs.add(type, r); } break; default: if (args.length > 1) { // parse remainder for values String r = args[1]; r = r.trim(); rcs.add(type, r); } else { rcs.add(type, ""); } break; } } } private String parseLog() throws Exception { StringBuffer log = new StringBuffer(); String line = getLine(); if (!line.startsWith("@")) return null; // remove starting '@' line = line.substring(1); while (line != null) { // check for terminating '@' String end = line.replaceAll("@@", "_"); if (end.contains("@")) { int pos = end.indexOf("@"); if (!end.endsWith("@")) { String remainder = end.substring(pos + 1); cvsLineReader.returnLine(remainder); } line = line.replaceAll("@@", "@"); line = line.substring(0, pos); log.append(line); break; } else { line = line.replaceAll("@@", "@"); log.append(line); log.append("\n"); line = getLine(); } } return log.toString(); } /** * Check the buffer starts with an '@' and return a buffer less the starting * '@', else null. * * @param buf * @return */ private ByteArrayOutputStream startAtpersand(ByteArrayOutputStream buf) { ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] b = buf.toByteArray(); if (b[0] == '@') { out.write(b, 1, buf.size() - 1); return out; } else { return null; } } private ByteArrayOutputStream decodeAtpersand(ByteArrayOutputStream buf) { ByteArrayOutputStream out = new ByteArrayOutputStream(); byte last = '\0'; for (byte b : buf.toByteArray()) { if (b == '@' && last == '@') { // don't write and invalidate last char to process @@@@ -> @@ last = '\0'; } else { out.write(b); last = b; } } return out; } private boolean endAtpersand(ByteArrayOutputStream buf) throws ConfigException { int size = buf.size(); // exit early if less than 2 chars if (size < 2) { return false; } byte[] bytes = buf.toByteArray(); // count '@' in line, if even then not end '@' int count = 0; for (byte b : bytes) { if (b == '@') { count++; } } if ((count % 2) != 0) { return true; } return false; } private RcsObjectBlock parseText() throws Exception { RcsObjectBlock lines = new RcsObjectBlock(); // to help with debug int sum = 0; StringBuffer sb = new StringBuffer(); // check and remove starting '@' ByteArrayOutputStream line = cvsLineReader.getData(); line = startAtpersand(line); if (line == null) return null; while (line != null) { // replace '@@' with '@' ByteArrayOutputStream clean = new ByteArrayOutputStream(); clean = decodeAtpersand(line); // exit if ending with '@\n' if (endAtpersand(line)) { ByteArrayOutputStream out = new ByteArrayOutputStream(); // copy buffer, but trim ending '@\n' chars out.write(clean.toByteArray(), 0, clean.size() - 2); if (out.size() > 0) { lines.add(out); if (logger.isTraceEnabled()) { sum += out.size(); sb.append("parse:"); sb.append(out.size()); sb.append(":"); sb.append(sum); sb.append(":END"); } } break; } else { lines.add(clean); if (logger.isTraceEnabled()) { sum += clean.size(); sb.append("parse:"); sb.append(clean.size()); sb.append(":"); sb.append(sum); sb.append(" "); } } // get next line; line = cvsLineReader.getData(); } if (logger.isTraceEnabled()) { logger.trace(sb.toString()); logger.trace("total[" + lines.size() + "] " + sum); } return lines; } private String parseBasePath() throws Exception { String cvsroot = (String) Config.get(CFG.CVS_ROOT); String module = (String) Config.get(CFG.CVS_MODULE); String base = getRcsFile().getAbsolutePath(); // remove CVSROOT from path if (base.startsWith(cvsroot)) { base = base.substring(cvsroot.length()); } // remove MODULE from path if (base.startsWith(module)) { base = base.substring(module.length()); } // remove leading '/' if (base.startsWith("/")) { base = base.substring(1); } // remove ',v' extension if (base.endsWith(",v")) { base = base.substring(0, base.lastIndexOf(",v")); } // remove attic from base path if (base.contains("Attic")) { int p = base.lastIndexOf("Attic"); base = base.substring(0, p) + base.substring(p + 6); } return base; } public String getPath() { return rcsPath; } public List<ContentProperty> getProps() { return props; } }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 13876 | Paul Allen | Rename/move file(s) | ||
//guest/paul_allen/p4convert-maven/src/com/perforce/cvs/parser/RcsReader.java | |||||
#1 | 13873 | Paul Allen | Branching using p4convert-maven | ||
//guest/perforce_software/p4convert/src/com/perforce/cvs/parser/RcsReader.java | |||||
#14 | 12440 | Paul Allen | CVS: More efficient parsing of RCS files. | ||
#13 | 12195 | Paul Allen |
CVS - Use the RCS 'expand' field to detect BINARY files. If set this will take precedence over the type map. - Added test case061 |
||
#12 | 11760 | Paul Allen |
Use default label description and append label type. Previously I used the change description, which is ok for Automatic labels, but Static labels may include more than one change. Includes: - Fix parsing of RCS descriptions (support descriptions without new line) - Update to test cases. |
||
#11 | 11745 | Paul Allen |
CVS: Parse non-standard RCS file, where 'log' and 'text' are on the same line. Support alternative delete method - added test case 054 to support parse changes - added test case 055 to support alternatice delete method |
||
#10 | 11457 | Paul Allen |
CVS: Support empty an 'branch' symbol. Generally the branch symbol is only added with an ID, however the RCS spec does not specify that an ID is mandatory. This change is to support manufactured CVS data from 3rd party import tools. - 'symbol' test case053 |
||
#9 | 11064 | Paul Allen |
CVS: added detection and support for +x revisions - extended testcase 040 to test for exec bits |
||
#8 | 10919 | Paul Allen |
CVS parse detection of 'comment' in RCS header. - Includes test case 049 parse-comment |
||
#7 | 10774 | Paul Allen |
CVS: Updated end of RCS content block detection. An @ character at the end of the line followed by a blank line could fool the converter into exiting the block early. Detection now uses original line with expanded @@ and counts odd/even. Added test case 048 |
||
#6 | 10719 | Paul Allen |
CVS: Activate old label code and add config option. disabled by default -- for the moment until it works (set log level from trace -> debug) |
||
#5 | 10655 | Paul Allen |
Fixed buffered writer. Fills buffer with one or more lines upto 8K and then save remainder. Includes line/EOF (null) detection. |
||
#4 | 10653 | Paul Allen | Debugging data for low level byte operations. | ||
#3 | 10499 | Paul Allen |
CVS TestCase: binary-file Fix decodeAtpersand() method to handle a byte stream with "@@@@" to produce "@@". - Added Import mode testcase 032 |
||
#2 | 10497 | Paul Allen |
New low-level RCS reader using a byte[] to manage CVS lines. Designed to help with the processing of BINARY data in RCS files. The line reading code still looks for a unix style '\n', but has a MAX LINE (hard coded to 10K). The RcsObjectBlock uses a ByteArrayOutputStream to store lines and parsers uses byte logic. (passes basic cvs/svn unit tests) |
||
#1 | 9807 | Paul Allen | Initial import of p4-convert (from change 894340) |