#!/usr/bin/env python


import sys
import random
import urllib
import urllib2
from multiprocessing import Process, Queue
from Queue import Full
import time
import calendar


def send_line(search, destination):
    # Since requests come in with timestamp resolution we assume they came in
    # at some random point in the second
    time.sleep(random.uniform(0, 1))
    start = time.time()
    params = "fulltext=Search&srbackend=CirrusSearch"
    url = "%s/%s?%s" % (destination, search, params)
    urllib2.urlopen(url)
    print "Fetched ({:07.3f}) {}".format(time.time() - start, url)


def hostname(wiki):
    wiki = wiki.split(":")[1]
    if wiki == "commonswiki":
        return "commons.wikimedia.org"
    if wiki[2:] == "wiki":
        return wiki[0:2] + ".wikipedia.org"
    # Not perfect but decent
    return wiki[0:2] + "." + wiki[2:] + ".org"


def send_lines(percent, jobs, destination):
    queue = Queue(jobs)  # Only allow a backlog of one per job

    # Spawn jobs.  Note that we just spawn them as daemon because we don't
    # want to bother signaling them when the main process is done and we don't
    # care if they die when it finishes either.  In fact, we'd love for them
    # to die immediately because we want to stop sending requests when the main
    # process stops.
    def work(queue):
        while True:
            try:
                (hostname, search) = queue.get()
                if "%s" in destination:
                    resolved_destination = destination % hostname
                else:
                    resolved_destination = destination
                if hostname == "commons.wikimedia.org":
                    search = "File:" + search
                send_line(search, resolved_destination)
            except (KeyboardInterrupt, SystemExit):
                break
            except:
                continue
    for i in range(jobs):
        p = Process(target=work, args=(queue,))
        p.daemon = True
        p.start()

    # Got to read stdin line by line even on old pythons....
    line = sys.stdin.readline()
    target_lag = None
    while line:
        if random.uniform(0, 100) > percent:
            line = sys.stdin.readline()
            continue
        s = line.strip().split("\t")
        target_time = calendar.timegm(
            time.strptime(s[1][:-1] + "UTC", "%Y-%m-%dT%H:%M:%S%Z"))
        lag = time.time() - target_time
        if target_lag is None:
            target_lag = time.time() - target_time
        wait_time = target_lag - lag
        if wait_time >= 0:
            print "Sleeping %s to stay %s ahead of the logged time." % \
                (wait_time, target_lag)
            time.sleep(wait_time)
        try:
            queue.put((hostname(s[2]), urllib.unquote(s[3])), False)
        except Full:
            print "Couldn't keep up so dropping the request"
        line = sys.stdin.readline()


if __name__ == "__main__":
    from optparse import OptionParser
    parser = OptionParser(usage="usage: %prog [options] destination")
    parser.add_option("-p", dest="percent", type="int", default=1, metavar="N",
                      help="send this percent of search results")
    parser.add_option("-j", "--jobs", type="int", default=1, metavar="JOBS",
                      help="number of processes used to send searches")
    parser.add_option("-d", "--destination", dest="destination", type="string",
                      metavar="DESTINATION",
                      default="http://127.0.0.1:8080/wiki/Special:Search",
                      help="Where to send the searches.  Add %s as hostname " +
                           "to send to hostname based the log line.")
    (options, args) = parser.parse_args()
    try:
        send_lines(options.percent, options.jobs, options.destination)
    except KeyboardInterrupt:
        pass  # This is how we expect to exit anyway