python

Mutiprocess Downloader

장곰부대 2018. 3. 5. 17:16
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import multiprocessing
import os
import requests
 
########################################################################
class MultiProcDownloader(object):
    """
    Downloads urls with Python's multiprocessing module
    """
 
    #----------------------------------------------------------------------
    def __init__(self, urls):
        """ Initialize class with list of urls """
        self.urls = urls
 
    #----------------------------------------------------------------------
    def run(self):
        """
        Download the urls and waits for the processes to finish
        """
        jobs = []
        for url in self.urls:
            process = multiprocessing.Process(target=self.worker, args=(url,))
            jobs.append(process)
            process.start()
        for job in jobs:
            job.join()
 
    #----------------------------------------------------------------------
    def worker(self, url):
        """
        The target method that the process uses tp download the specified url
        """
        fname = os.path.basename(url)
        msg = "Starting download of %s" % fname
        print msg, multiprocessing.current_process().name
        r = requests.get(url)
        with open(fname, "wb") as f:
            f.write(r.content)
 
#----------------------------------------------------------------------
if __name__ == "__main__":
    downloader = MultiProcDownloader(urls)
    downloader.run()
cs