帮朋友分析VPS性能时写的小东西。没有“产品化”,不过我觉得基本可用了,可以释放出来。
有需求的话,可以按需稍加修改,获取想要的信息,:-)
#!/usr/bin/env python import re import sys g_ip_record={} g_time_record={} def analyze_by_ip(): print 'number of IPs : %d' % len(g_ip_record) def analyze_by_time(): keys = g_time_record.keys() keys.sort() for hour_min in keys: print '%s : %d' % (hour_min , g_time_record[hour_min]) def analyze(): analyze_by_time() analyze_by_ip() def get_hour_min(time): #pattern = re.compile('.+/May/2011:(\d+:\d+:)\d+.+') #pattern = re.compile('.+/Jun/2011:(\d+:\d+:)\d+.+') #pattern = re.compile('.+/.+/.+:(\d+:\d+:)\d+.+') pattern = re.compile('24/Jun/2011:(\d+:\d+:)\d+.+') match = pattern.match(time) if match: return match.group(1) else: #print 'unmatched time string: %s' % time return '' class access_record: def __init__(self, time, browser): self.time = time self.browser = browser def process_access_record(ip, time, browser): #print 'ip=%s, time=%s, browser=%s' % (ip, time, browser) access_rec = access_record(time, browser) try: g_ip_record[ip].append(access_rec) except KeyError: g_ip_record[ip] = [access_rec] hour_min = get_hour_min(time) #print 'hour=%d' % hour if hour_min: try : g_time_record[hour_min] += 1 except KeyError: g_time_record[hour_min] = 1 def set_parse_pattern(): parse_pattern = '/dragonsight_service/compass_lite_ad.xml' #pattern = re.compile('(^\d+\.\d+\.\d+\.\d+).+\[(.+)\].+compass_lite_update.xml.+\"(.+)\"$') pattern = re.compile('(^\d+\.\d+\.\d+\.\d+).+\[(.+)\].+\"(.+)\"$') return pattern def parse_log_line(line_data, pattern): ip = None time = None browser = None match = pattern.match(line_data) if match: ip = match.group(1) time = match.group(2) browser = match.group(3) return (ip, time, browser) def main(): if len(sys.argv) != 2: print 'Usage: parse_apache_log.py <log_file>' exit (-1) log_pattern = set_parse_pattern() file_name = sys.argv[1] log_f = open (file_name, "r") while True: line_data = log_f.readline() if not line_data : break; ip, time, browser = parse_log_line(line_data, log_pattern) if ip and time and browser: process_access_record(ip, time, browser) log_f.close() analyze() if __name__ == '__main__': main ()