Monday, July 28, 2008

PycURL example

Here's a little sample of Python code demonstrating the use of PycURL, the Python interface to libcURL. It does the same thing as my cURL example. Refer to this page for a detailed list of libcurl options.


import pycurl, StringIO
# Constants
DOWNLOADED_FILE = r'C:\temp\downloaded_file.txt'
USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0)'
LOGIN_URL = 'http://interesting.website.com/LogIn.asp'
LOGIN_POST_DATA = 'FormField=URL%20Encoded%20Value'
DOWNLOAD_URL = 'http://interesting.website.com/do_it.asp?do=0&something=0&interesting=0'
DOWNLOAD_REFERER = 'http://interesting.website.com/referer.asp'
FILE_MODE = 'wb'

# Set up objects
dev_null = StringIO.StringIO()
slurpp = pycurl.Curl()

# Request login page
slurpp.setopt(pycurl.USERAGENT, USER_AGENT)
slurpp.setopt(pycurl.FOLLOWLOCATION, 1)
#slurpp.setopt(pycurl.AUTOREFERER, 1) # not yet implemented in pycURL
slurpp.setopt(pycurl.WRITEFUNCTION, dev_null.write)
slurpp.setopt(pycurl.COOKIEFILE, '')
slurpp.setopt(pycurl.URL, LOGIN_URL)
slurpp.perform()

# Log in to site
slurpp.setopt(pycurl.POSTFIELDS, LOGIN_POST_DATA)
slurpp.setopt(pycurl.POST, 1)
slurpp.perform()

# Download relevant data
slurpp.setopt(pycurl.HTTPGET, 1)
slurpp.setopt(pycurl.URL, DOWNLOAD_URL)
slurpp.setopt(pycurl.REFERER, DOWNLOAD_REFERER)
outfile = file(DOWNLOADED_FILE, FILE_MODE)
slurpp.setopt(pycurl.WRITEFUNCTION, outfile.write)
slurpp.perform()

# Clean up and close out
outfile.close()
dev_null.close()
slurpp.close()

No comments:

Post a Comment