一个apache log分析工具

帮朋友分析VPS性能时写的小东西。没有“产品化”,不过我觉得基本可用了,可以释放出来。

有需求的话,可以按需稍加修改,获取想要的信息,:-)


#!/usr/bin/env python

import re
import sys


g_ip_record={}
g_time_record={}

def analyze_by_ip():
	print 'number of IPs : %d' % len(g_ip_record)
	


def analyze_by_time():
	keys = g_time_record.keys()
	keys.sort()
	for hour_min in keys:
		print '%s : %d' % (hour_min , g_time_record[hour_min])

def analyze():
	analyze_by_time()
	analyze_by_ip()
	

def get_hour_min(time):
	#pattern = re.compile('.+/May/2011:(\d+:\d+:)\d+.+')
	#pattern = re.compile('.+/Jun/2011:(\d+:\d+:)\d+.+')
	#pattern = re.compile('.+/.+/.+:(\d+:\d+:)\d+.+')
	pattern = re.compile('24/Jun/2011:(\d+:\d+:)\d+.+')
	match = pattern.match(time)
	if match:
		return match.group(1)
	else:
		#print 'unmatched time string: %s' % time
		return ''


class access_record:
	def __init__(self, time, browser):
		self.time = time
		self.browser = browser

def process_access_record(ip, time, browser):
	#print 'ip=%s, time=%s, browser=%s' % (ip, time, browser)
	access_rec = access_record(time, browser)
	try:
		g_ip_record[ip].append(access_rec)
	except KeyError:
		g_ip_record[ip] = [access_rec]

	hour_min = get_hour_min(time)
	
	#print 'hour=%d' % hour
	if hour_min:
		try :
			g_time_record[hour_min] += 1
		except KeyError:
			g_time_record[hour_min] = 1


def set_parse_pattern():
	parse_pattern = '/dragonsight_service/compass_lite_ad.xml'
	#pattern = re.compile('(^\d+\.\d+\.\d+\.\d+).+\[(.+)\].+compass_lite_update.xml.+\"(.+)\"$')
	pattern = re.compile('(^\d+\.\d+\.\d+\.\d+).+\[(.+)\].+\"(.+)\"$')
	return pattern


def parse_log_line(line_data, pattern):
	ip = None
	time = None
	browser = None
	match = pattern.match(line_data)
	if match:
		ip = match.group(1)
		time = match.group(2)
		browser = match.group(3)

	return (ip, time, browser)

def main():
	if len(sys.argv) != 2:
		print 'Usage: parse_apache_log.py <log_file>'
		exit (-1)

	log_pattern = set_parse_pattern()
	file_name = sys.argv[1]
	log_f = open (file_name, "r")

	while True:
		line_data = log_f.readline()
		if not line_data :
			break;
		ip, time, browser = parse_log_line(line_data, log_pattern)
		if ip and time and browser:
			process_access_record(ip, time, browser)

	log_f.close()

	analyze()

if __name__ == '__main__':
	main ()



python小玩具:生成自动切换的桌面背景 for debian squeeze

#!/usr/bin/python

#Copyright William LI null@live.cn


import os
import shutil
import re
import sys
from collections import deque


WALLPAPER_INSTALL_PATH = "/usr/share/backgrounds/hubble"

WALLPAPER_CONFIG_FILE = ""

GNOME_WALLPAPER_CONFIG = "/usr/share/gnome-background-properties/hubble.xml"


def install_wallpaper_files(image_file_list):
	if not os.path.exists(WALLPAPER_INSTALL_PATH):
		print WALLPAPER_INSTALL_PATH + " doesnot exist, creating new one."
		os.mkdir(WALLPAPER_INSTALL_PATH)
	
	if not os.path.isdir(WALLPAPER_INSTALL_PATH):
		print WALLPAPER_INSTALL_PATH + "is not a directory."
		exit(0)

	# install the image files
	for src_image_file in image_file_list:
		image_file = os.path.basename(src_image_file)
		dst_image_file = os.path.join(WALLPAPER_INSTALL_PATH, image_file)
		shutil.copyfile(src_image_file, dst_image_file)


def static_config(fhandle, image_file):
	fhandle.write("""
	<static>
		<duration>60.0</duration>
		<file>""" + image_file + """</file>
	</static>""")

def transition_config(fhandle, from_file, to_file):
	fhandle.write("""
	<transition>
		<duration>5.0</duration>
		<from>""" + from_file + """</from>
		<to>""" + to_file + """</to>
	</transition>""")


def generate_wallpaper_config(image_file_list):
	config_file = open (GNOME_WALLPAPER_CONFIG, "w")
	config_file.write("""
<?xml version="1.0" encoding="UTF-8"?> 
<!DOCTYPE wallpapers SYSTEM "gnome-wp-list.dtd"> 
<wallpapers> 
  <wallpaper deleted="false"> 
     
    <name>Hubble</name> 
    <filename>/usr/share/backgrounds/hubble/background-hubble.xml</filename> 
    <options>scale</options> 
  </wallpaper> 
</wallpapers>
	""")

	config_file.close()

	WALLPAPER_CONFIG_FILE = os.path.join(WALLPAPER_INSTALL_PATH, "background-hubble.xml")
	config_file = open (WALLPAPER_CONFIG_FILE, "w")	
	config_file.write("""
<background>
	<starttime>
		<year>2011</year>
		<month>01</month>
		<day>01</day>
		<hour>00</hour>
		<minute>00</minute>
		<second>00</second>
	</starttime>
	<!-- This animation starts 2011 new year midnight. -->""")

	image_file_queue = deque(image_file_list)	
	image_file = image_file_queue.popleft()
	from_file = os.path.basename(image_file)
	first_file = from_file = os.path.join(WALLPAPER_INSTALL_PATH, from_file)
	image_file_queue.append(image_file)
	
	while len(image_file_queue) > 0:

		static_config(config_file, from_file)

		image_file = image_file_queue.popleft()
		to_file = os.path.basename(image_file)
		to_file = os.path.join(WALLPAPER_INSTALL_PATH, to_file)
		
		transition_config(config_file, from_file, to_file)
	
		from_file = to_file	

	config_file.write("""
</background>""")
	config_file.close()


def main():
	if (len(sys.argv) < 2) :
		print "Help : " + sys.argv[0] + " IMAGE_DIR "
		exit(0)

	image_dir = sys.argv[1]

	if (not os.path.exists(image_dir)):
		print image_dir + " doesnot exist."
		exit(0)

	if (not os.path.isdir(image_dir)):
		print image_dir + " is not a directory."
		exit(0)

	image_file_list = []

	for root, dirs, files in os.walk(image_dir) :
		print root, "contains : "
		for filename in files : 
			print "\t" + os.path.join(root, filename)
			image_file_list.append(os.path.join(root, filename))

	#print image_file_list

	print "Installing wallpaper files:"
	install_wallpaper_files(image_file_list)
	print "done."

	print "Preparing wallpaper config file:"
	generate_wallpaper_config(image_file_list)
	print "done."



	
if __name__ == '__main__':
	main()



python小玩具:自动下载哈勃望远镜照片壁纸

#!/usr/bin/python
# William Li  null@live.cn

import os
import urllib
import urllib2
import re
from threading import Thread
from Queue import Queue

work_queue = Queue()

THREAD_NUM = 5

def download_worker():
	while True:
		url_line = work_queue.get()
		work_queue.task_done()
		if cmp(url_line, "stop_working") == 0 :
			print "Job done."
			break
		response = urllib2.urlopen(url_line)
		html = response.read()
		url_pattern = re.compile("http://imgsrc.hubblesite.org/hu/db/images/(?P<img_file_name>.+_wallpaper.jpg)")
		match = url_pattern.search(html)
		if match:
			img_download_url=html[match.start():match.end()]
			img_file_name = match.group('img_file_name')
			download_item = {"url":img_download_url, "file_name":img_file_name}

			if os.path.isfile(download_item["file_name"]):
				print "File already downloaded."
			else:
				print "Downloading " + download_item["file_name"] + " from " + download_item["url"]
				urllib.urlretrieve(download_item["url"], download_item["file_name"])
		

def main():
	print "start."
	hubble_gallery_url = "http://hubblesite.org/gallery/wallpaper"
	response = urllib2.urlopen(hubble_gallery_url);
	html = response.read()

	url_pattern = re.compile("/gallery/wallpaper/pr\w+/")

	url_list = url_pattern.findall(html);

	#print url_list

	img_url_list = []
	for url_line in url_list:
		#url_line = url_line.replace("album","album/entire")
		#url_line = url_line.replace("npp","warn/npp")
		url_line = "http://hubblesite.org" + url_line + "2048x1280_wallpaper/"
		img_url_list.append(url_line)

	print img_url_list

	threads = []
	for i in range(THREAD_NUM):
		t = Thread(target = download_worker)
		t.start()
		threads.append(t)
	
	for url_line in img_url_list:
		print "Opening " + url_line
		work_queue.put(url_line)
		
	for i in range(THREAD_NUM):
		work_queue.put("stop_working")

	for t in threads:
		t.join()

	print "waiting all jobs to be done."
	work_queue.join()
	print "All done."

if __name__  == "__main__":
	main()