#!/usr/bin/env python3.3 ''' /* * Copyright (c) 2015, Charles McLouth * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL STEWART LORD BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. auditconverter2 - This python script converts audit logs for ingestion into the Helix Threat Detection analytics engine. It converts all input files to be encoded as utf-8. It converts all structured log formats to standard P4AUDIT format. It compresses output files see auditconverter2 -h for usage $Id: //guest/cmclouth/projects/auditconverter/src/auditconverter2.py#5 $ */ ''' import logging import sys import os import argparse import datetime #import operator import auditconverter scriptversion = "1.0" scriptname = os.path.basename(sys.argv[0]) # standard format: '%s %s@%s %s %s %s#%s' # structured format: '6,,,%s,,,%s,%s,,%s,,,,%s,%s,%s' # (self.f_date, self.f_user, self.f_client, self.f_host, self.f_action, self.f_file, self.f_rev) P4AUDIT_RECORDFORMAT='%s %s@%s %s %s %s#%s' STRUCTURED_RECORDFORMAT='6,,,%s,,,%s,%s,,%s,,,,%s,%s,%s' def isDEBUG(record): return record.levelname == 'DEBUG' def isINFO(record): return record.levelname == 'INFO' def isWARN(record): return record.levelname == 'WARNING' def isERROR(record): return record.levelname in ['ERROR', 'CRITICAL'] def processInputParams(pargs=sys.argv): ''' process commandline arguments and run function ''' gParser = argparse.ArgumentParser() gParser.description="This python script converts audit logs for ingesting into the Helix Threat Detection analytics engine.\n"\ "It converts all input files to be encoded as utf-8, converts all structured log formats to standard P4AUDIT format."\ "Optionally it will anonymize the data in the output log files and compress them." gParser.add_argument('-V', '--version', action='version', version='%(prog)s ' + scriptversion) gParser.add_argument('-i', '--inputFile', dest='inputFile', metavar='inputFile', \ help='audit log to convert.') gParser.add_argument('-o', '--output', dest='output', metavar='output', \ help='a directory to write converted log files to.') gParser.add_argument('-f', '--format', dest='logformat', metavar='logformat', \ type=int, default=0, \ help='Output record format. 0 (zero) for P4AUDIT 1 (one) for Structured Audit Log.') gParser.add_argument('-c', '--compress', dest='compress', action='store_true', \ help='compress output log files with gzip compatible compression.') args = gParser.parse_args(pargs) # must have i or a or both if (not hasattr(args, 'inputFile') or args.inputFile is None): gParser.print_help() gParser.error('inputFile (-i) are required.') if (not hasattr(args, 'output') or args.inputFile is None): gParser.print_help() gParser.error('output (-o) is required.') # validate inputs if hasattr(args, 'inputFile') and args.inputFile is not None: if not os.path.isfile(args.inputFile): gParser.print_help() gParser.error("invalid inputFile (-i) '%s'" % (args.inputFile)) if hasattr(args, 'output') and args.output is not None: if not os.path.isdir(args.output): gParser.print_help() gParser.error("invalid output directory (-o) '%s'" % (args.output)) return args if __name__ == '__main__': logger = logging.getLogger(scriptname) logger.propagate = False logger.setLevel(logging.INFO) debugHandler = logging.StreamHandler() #debugHandler = logging.FileHandler('mergetracker.out') debugHandler.setLevel(logging.DEBUG) debugHandler.setFormatter(logging.Formatter('%(levelname)s:%(filename)s:%(lineno)d:%(funcName)s:%(message)s')) debugHandler.addFilter(isDEBUG) logger.addHandler(debugHandler) infoHandler = logging.StreamHandler(sys.stdout) infoHandler.setLevel(logging.INFO) infoHandler.setFormatter(logging.Formatter('%(message)s')) infoHandler.addFilter(isINFO) logger.addHandler(infoHandler) warnHandler = logging.StreamHandler() warnHandler.setLevel(logging.WARN) warnHandler.setFormatter(logging.Formatter('%(message)s')) warnHandler.addFilter(isWARN) logger.addHandler(warnHandler) errorHandler = logging.StreamHandler() errorHandler.setLevel(logging.ERROR) errorHandler.setFormatter(logging.Formatter('%(message)s')) errorHandler.addFilter(isERROR) logger.addHandler(errorHandler) auditconverter.logger = logger args = processInputParams(sys.argv[1:]) if args.logformat == 1: outputRecordFormat = STRUCTURED_RECORDFORMAT else: outputRecordFormat = P4AUDIT_RECORDFORMAT utf8converter = auditconverter.UTF8Converter() maxWriteSize = None fileName = args.inputFile fRead = None fWrite = None charsWritten = 0 fileNameOut = None dtStart = datetime.datetime.now() try: fileNameOut = os.path.basename(fileName) if fileName.endswith('.gz'): baseName = os.path.basename(fileName) fileNameOut = baseName[0:len(baseName)-3] fileNameOut += '.utf8' if args.compress: fileNameOut += '.gz' fileNameOut = os.path.join(args.output, fileNameOut) fRead = auditconverter.AuditFileIO(fileName, True, False, utf8converter, None) fWrite = auditconverter.AuditFileIO(fileNameOut, False, False, utf8converter, maxWriteSize) logger.info('Processing file: %s converting to: %s' % (fRead.fileName, fWrite.fileName)) while True: try: aRecord = fRead.readRecord(True) # eof if aRecord is None: break # write output fWrite.writeLine(aRecord.getLine(outputRecordFormat)) except auditconverter.AuditException as e: errCode = e.args[0] fileName = e.args[1] lineNo = e.args[2] logger.error(str(e)) # if errCode in [1,3,4]: # if errCode == 1: logger.error('file=%s;line=%d;err=%d;' % (fileName, lineNo, errCode)) # elif errCode == 3: lineBin = logger.error('file=%s;line=%d;err=%d;:%s' % (fileName, lineNo, errCode, e.args[3])) # elif errCode == 4: logger.error('file=%s;line=%d;err=%d;:%s' % (fileName, lineNo, errCode, e.args[4])) if fRead.linesRead % 100000 == 0: logger.info('Progress: %s converting to: %s with %d lines.' % (fRead.fileName, fWrite.fileName, fRead.linesRead)) # if fRead.linesRead != fWrite.linesWritten: logger.warn('Lines read: %d; Lines written: %d' % (fRead.linesRead, fWrite.linesWritten)) dtStop = datetime.datetime.now() seconds = (dtStop-dtStart).seconds if seconds == 0: seconds = 1 logger.info('Completed: %s converting to: %s with %d lines in %d seconds (%d lines/second.)' % (fRead.fileName, fWrite.fileName, fRead.linesRead, seconds, int(fRead.linesRead/seconds))) except: logger.exception('unknown exception processing input file: %s' % (fileName)) finally: if fRead is not None: fRead.close() if fWrite is not None: fWrite.close()
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#5 | 18156 | Charlie McLouth | Initialize logger for imported auditconverter | ||
#4 | 18155 | Charlie McLouth | Correct bug in the options | ||
#3 | 18148 | Charlie McLouth | process a single file | ||
#2 | 18145 | Charlie McLouth | rebaseline with version | ||
#1 | 18142 | Charlie McLouth | Branching for new version | ||
//guest/cmclouth/projects/auditconverter/src/auditconverter.py | |||||
#28 | 18124 | Charlie McLouth | Add linefeed to exception output when line data is UTF-8 | ||
#27 | 18123 | Charlie McLouth | Added re-validate after attempted fix | ||
#26 | 18122 | Charlie McLouth | Add linefeed to error line | ||
#25 | 18121 | Charlie McLouth | make exception parameters variable length | ||
#24 | 18120 | Charlie McLouth | Changed exception parameters to be variable length | ||
#23 | 18119 | Charlie McLouth | Sort files numerically | ||
#22 | 18118 | Charlie McLouth |
Changed to default to case-insensitve. switch now turns it to case-sensitive. |
||
#21 | 18117 | Charlie McLouth | Now it will try to fix data where the user is null or unknown | ||
#20 | 18116 | Charlie McLouth | always case-insensitive when anonymizing | ||
#19 | 18115 | Charlie McLouth | strip pound sign from record format and apply back in string conversion | ||
#18 | 18114 | Charlie McLouth | Added option to dump the ip/user map | ||
#17 | 18113 | Charlie McLouth | Make Record validation optional | ||
#16 | 18112 | Charlie McLouth | Write problem records to error file | ||
#15 | 18111 | Charlie McLouth | Added Exception handling | ||
#14 | 18110 | Charlie McLouth | Added reverse lookups for decoding anonymization | ||
#13 | 18109 | Charlie McLouth | Alter file io to write binary output files seems to write output slightly faster | ||
#12 | 18108 | Charlie McLouth | Added support for splitting output files | ||
#11 | 18107 | Charlie McLouth | Refactored file io | ||
#10 | 18106 | Charlie McLouth |
refactored parse and join operations added option for case-insensitive |
||
#9 | 18105 | Charlie McLouth | Fixed double newline | ||
#8 | 18104 | Charlie McLouth | Small fix to lineending problem | ||
#7 | 18103 | Charlie McLouth | upped dbversion to support the transition of project anonymization | ||
#6 | 18102 | Charlie McLouth |
Fixed host anonymization to work with the Interset connector. also added database versioning to migrate the anonymization db for EA |
||
#5 | 18101 | Charlie McLouth | Handle non-audit line in the structured log | ||
#4 | 18100 | Charlie McLouth | Resolved a bug where a depotFile did not have a revision specified | ||
#3 | 18099 | Charlie McLouth | changed Version | ||
#2 | 18098 | Charlie McLouth |
auditconverter converts audit logs from local encoding to utf-8 from structured logs to standard P4AUDIT anonymizes the fields for Helix Threat Detection |
||
#1 | 18097 | Charlie McLouth | Rename project |