Here’s the code for those images. Having something of a scientific background, I’m inclined to think that I’m stepping ahead of those other graphers I mentioned, by exposing my methodology (ha!) to public scrutiny. Open source is really just “peer review” in disguise. The program spits out data to stdout which can be piped to a file and creates the 3 plots as pngs. As the comments say, it needs a little bit more work to handle the DST switch properly. BeautifulSoup is probably overkill since I only parse one attribute out of one tag. The program takes a couple of hours to run, mostly because of the need to be polite - It calls the digg servers 672 times with 15 second delays in between.
from BeautifulSoup import BeautifulStoneSoup
from rpy import *
import os
import urllib
import datetime
import time
import sys
# please change
digg_appkey= urllib.quote('http://example.com/example', '')
class AppURLopener(urllib.FancyURLopener):
# userAgent string (please change)
version = 'FillYourBots'
# San Francisco is the center of the universe
os.environ['TZ'] = 'US/Pacific'
time.tzset()
urllib._urlopener = AppURLopener()
endpoint_upcoming = 'http://digg.com/tools/services?endPoint=/stories&type=xml'
endpoint_promoted = 'http://digg.com/tools/services?endPoint=/stories/popular&type=xml'
# it's currently May, so we're dealing with DST for the next few months.
# this will need updating before the times switch
# 4 weeks (30 days ago to 2 days ago)
start_date = datetime.datetime.now() - datetime.timedelta(days=30)
# Zero out minutes and seconds
start_date = datetime.datetime(start_date.year, start_date.month, start_date.day,
start_date.hour, 0, 0)
# 2 days in the past to allow for promotion
end_date = datetime.datetime.now() - datetime.timedelta(days=2)
hourly_totals = list(0 for i in range(24))
hourly_prom_totals = list(0 for i in range(24))
dayhour_totals = dayhour_prom_totals = list(list(0 for j in range(24)) for i in range(7))
day_totals = day_prom_totals = list(0 for i in range(7))
while start_date < end_date:
interval_start = time.mktime(start_date.timetuple())
interval_end = interval_start + 3600
url = '%s&appkey=%s&min_submit_date=%d&max_submit_date=%d' % (endpoint_upcoming,
digg_appkey, interval_start, interval_end)
instring = urllib.urlopen(url).read()
d = BeautifulStoneSoup(urllib.urlopen(url).read())
promoted_url = '%s&appkey=%s&min_submit_date=%d&max_submit_date=%d' %
(endpoint_promoted, digg_appkey, interval_start, interval_end)
dp = BeautifulStoneSoup(urllib.urlopen(promoted_url).read())
data_line = (start_date.strftime('%Y/%m/%d %H:%M'),
int(d.stories['total']),
int(dp.stories['total']),
float(dp.stories['total'])/float(d.stories['total'])*100,
)
sys.stderr.write('Date: %s Total Upcoming: %d Total Promoted: %d %12.10f\n' % data_line)
print data_line
# need hour and weekday in PST/PDT
hour = start_date.hour
# isoweekday - 1 means 0=Mon, 6=Sun
weekday = start_date.isoweekday()-1
print weekday, hour, interval_start
hourly_totals[hour] += int(d.stories['total'])
hourly_prom_totals[hour] += int(dp.stories['total'])
dayhour_totals[weekday][hour] += int(d.stories['total'])
dayhour_prom_totals[weekday][hour] += int(dp.stories['total'])
day_totals[weekday] += int(d.stories['total'])
day_prom_totals[weekday] += int(dp.stories['total'])
start_date = start_date + datetime.timedelta(hours=1)
# be polite
time.sleep(15)
print 'Hourly Totals:'
for i in range(24):
print i, hourly_prom_totals[i], hourly_totals[i],
float(hourly_prom_totals[i])/hourly_totals[i]*100
# do some plotting
hourly_outfile = 'hours.png'
x = range(24)
y = [float(hourly_prom_totals[p])/hourly_totals[p]*100 for p in hourly_prom_totals]
r.bitmap('hours.png', res=200)
xlabels = [ "%d" % (i,) for i in x ]
ylabels = [0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5]
r.barplot(y, xlab="Hour", ylab="POP (%)", names_arg=xlabels, ylim=(0, 1.5),
main="Digg.com POP% By Hour (Pacific Time)")
print 'Daily Totals:'
for i in range(7):
print i, day_totals[i], day_prom_totals[i], float(day_prom_totals[i])/day_totals[i]*100
x = range(7)
y = [float(day_prom_totals[p])/day_totals[p]*100 for p in day_prom_totals]
print day_totals
print day_prom_totals
r.bitmap('days.png', res=200)
xlabels = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
r.barplot(y, xlab="Day", ylab="POP (%)", names_arg=xlabels, ylim=(0, 1.5),
main="Digg.com POP% By Day (Pacific Time)")
print 'Day-hour Totals:'
y = []
for i in range(7):
for j in range(24):
print i, j, dayhour_prom_totals[i][j], dayhour_totals[i][j],
float(dayhour_prom_totals[i][j])/dayhour_totals[i][j]*100
y.append(float(dayhour_prom_totals[i][j])/dayhour_totals[i][j]*100)
r.bitmap('dayhours.png', res=200)
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
xlabels = ['%s %2d:00' % (days[i], j) for i in range(7) for j in range(24)]
r.barplot(y, xlab="Day, Hour", ylab="POP (%)", names_arg=xlabels, ylim=(0, 2.5),
main="Digg.com POP% By Day/Hour (Pacific Time)")
0 responses so far ↓
There are no comments yet...Kick things off by filling out the form below.
Leave a Comment