Something quick I whipped up last night, after noticing that after my log file was > 4GB the WorldOfLogs parser will no longer do real time logging.

Edit: Turns out that the WoW client itself stopped logging, even though the log file was a little over expected limit (4,334,806,196 bytes)

Note: The code is just a one off script; things are hard coded, and it’s pretty slow (100MB a minute)

#!/usr/bin/env python

import re
import datetime

GAP_SIZE_IN_SECONDS = 60 * 60

class CombatLog:
    def __init__(self, filename):
        self.filename = filename

    def process(self):
        last_timestamp = None
        line_count = 0
        split_log = None

        for line in open(self.filename):
            line_count += 1
            timestamp = self.parse_timestamp(line)

            if timestamp == None:
                print "Unparsable data on line %d" % (line_count,)
                print repr(line)
                print
                continue

            # To handle the first line
            if last_timestamp == None:
                last_timestamp = timestamp
                split_log = Appender(timestamp)
                print "Starting new file", split_log.filename

            difference = timestamp - last_timestamp

            if difference.seconds > GAP_SIZE_IN_SECONDS:
                # Close the old log file, and start a new one
                split_log.close()
                split_log = Appender(timestamp)

                print "Starting new file", split_log.filename


            split_log.append(line)
            last_timestamp = timestamp

            if line_count % 100000 == 0:
                print "Processed %d lines" % (line_count,)



    def parse_timestamp(self, line):
        # m/d hh:mm:ss.msec
        # 6/6 21:04:29.435
        regex = r"^(\d+)/(\d+) (\d+):(\d+):(\d+).(\d+) "

        matches = re.search(regex, line)

        if matches == None:
            return None

        timestamp = datetime.datetime(2009,
                                      int(matches.group(1)),
                                      int(matches.group(2)),
                                      int(matches.group(3)),
                                      int(matches.group(4)),
                                      int(matches.group(5)),
                                      int(matches.group(6))*1000);

        return timestamp


class Appender:
    def __init__(self, timestamp):
        self.filename = "WoWCombatLog_" + timestamp.strftime("%Y%m%d_%H%M%S") + ".txt"
        self.handle = open(self.filename, 'a')

    def append(self, line):
        self.handle.write(line)

    def close(self):
        self.handle.close()


splitter = CombatLog("../WoWCombatLog.20090904.txt")
splitter.process()

print "Done"

Comment from Cryoclasm on August 9th 2011

Here’s an improved version of the timestamp function that eliminates the hard-coded year 2009. It should work properly as long as the clock hasn’t been set back since the start of the last log (may be an issue for speed-kill runs when DST ends) and the oldest log entry is less than a year old.

def parse_timestamp(self, line):
# m/d hh:mm:ss.msec
# 6/6 21:04:29.435
regex = r”^(\d+)/(\d+) (\d+):(\d+):(\d+).(\d+) ”

matches = re.search(regex, line)

if matches == None:
return None

now = datetime.datetime.now()

timestamp = datetime.datetime(now.year,
int(matches.group(1)),
int(matches.group(2)),
int(matches.group(3)),
int(matches.group(4)),
int(matches.group(5)),
int(matches.group(6))*1000);

if timestamp > now: # this log is from last year
timestamp.year -= 1

return timestamp