From 4b6584b6fede7bf47e776df908d5eea7b448b6bf Mon Sep 17 00:00:00 2001 From: Juro Oravec Date: Fri, 16 Aug 2019 23:16:18 +0200 Subject: [PATCH] allow USER_AGENT_LIST to be list or function --- random_useragent.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/random_useragent.py b/random_useragent.py index 0240eaf..2ed086b 100644 --- a/random_useragent.py +++ b/random_useragent.py @@ -25,16 +25,22 @@ class RandomUserAgentMiddleware(UserAgentMiddleware): def __init__(self, settings, user_agent='Scrapy'): super(RandomUserAgentMiddleware, self).__init__() self.user_agent = user_agent - user_agent_list_file = settings.get('USER_AGENT_LIST') - if not user_agent_list_file: + user_agent_list_source = settings.get('USER_AGENT_LIST') + if not user_agent_list_source: # If USER_AGENT_LIST_FILE settings is not set, # Use the default USER_AGENT or whatever was # passed to the middleware. ua = settings.get('USER_AGENT', user_agent) self.user_agent_list = [ua] else: - with open(user_agent_list_file, 'r') as f: - self.user_agent_list = [line.strip() for line in f.readlines()] + if any(isinstance(user_agent_list_source, t) for t in [list, tuple]): + self.user_agent_list = user_agent_list_source + elif callable(user_agent_list_source): + self.user_agent_list = user_agent_list_source() + else: + with open(user_agent_list_source, 'r') as f: + self.user_agent_list = [line.strip() + for line in f.readlines()] @classmethod def from_crawler(cls, crawler):