defexecute(argv=None, settings=None): if argv isNone: argv = sys.argv
# --- backwards compatibility for scrapy.conf.settings singleton --- if settings isNoneand'scrapy.conf'in sys.modules: from scrapy import conf if hasattr(conf, 'settings'): settings = conf.settings # ------------------------------------------------------------------
这里得到外部参数,然后即为注释所说为兼容性操作,此处继续为cmdline.py文件。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
if settings isNone: settings = get_project_settings() # set EDITOR from environment if available try: editor = os.environ['EDITOR'] except KeyError: pass else: settings['EDITOR'] = editor check_deprecated_settings(settings)
# --- backwards compatibility for scrapy.conf.settings singleton --- import warnings from scrapy.exceptions import ScrapyDeprecationWarning with warnings.catch_warnings(): warnings.simplefilter("ignore", ScrapyDeprecationWarning) from scrapy import conf conf.settings = settings # ------------------------------------------------------------------
definit_env(project='default', set_syspath=True): """Initialize environment to use command-line tool from inside a project dir. This sets the Scrapy settings module and modifies the Python path to be able to locate the project module. """ cfg = get_config() if cfg.has_option('settings', project): os.environ['SCRAPY_SETTINGS_MODULE'] = cfg.get('settings', project) closest = closest_scrapy_cfg() if closest: projdir = os.path.dirname(closest) if set_syspath and projdir notin sys.path: sys.path.append(projdir)
#创建settings实例,根据包导入情况,from scrapy.settings import Settings,进入settings文件夹可发 #现此过程同时加载默认配置文件default_settings.pys settings = Settings() #得到用户配置文件路径 settings_module_path = os.environ.get(ENVVAR) #更新配置,有则覆盖 if settings_module_path: settings.setmodule(settings_module_path, priority='project') #以下都为一些更新操作,setdict函数里面是一些set_update函数 # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") if pickled_settings: settings.setdict(pickle.loads(pickled_settings), priority='project')
# XXX: deprecate and remove this functionality env_overrides = {k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_')} if env_overrides: settings.setdict(env_overrides, priority='project')
#容差兼容性检测 check_deprecated_settings(settings) #兼容 # --- backwards compatibility for scrapy.conf.settings singleton --- import warnings from scrapy.exceptions import ScrapyDeprecationWarning with warnings.catch_warnings(): warnings.simplefilter("ignore", ScrapyDeprecationWarning) from scrapy import conf conf.settings = settings # ------------------------------------------------------------------
此处执行check_deprecated_settings函数
1 2 3 4 5 6 7
defcheck_deprecated_settings(settings): deprecated = [x for x in DEPRECATED_SETTINGS if settings[x[0]] isnotNone] if deprecated: msg = "You are using the following settings which are deprecated or obsolete" msg += " (ask scrapy-users@googlegroups.com for alternatives):" msg = msg + "\n " + "\n ".join("%s: %s" % x for x in deprecated) warnings.warn(msg, ScrapyDeprecationWarning)
defclosest_scrapy_cfg(path='.', prevpath=None): """Return the path to the closest scrapy.cfg file by traversing the current directory and its parents """ if path == prevpath: return'' path = os.path.abspath(path) cfgfile = os.path.join(path, 'scrapy.cfg') if os.path.exists(cfgfile): return cfgfile return closest_scrapy_cfg(os.path.dirname(path), path)
def_iter_command_classes(module_name): # TODO: add `name` attribute to commands and and merge this function with # scrapy.utils.spider.iter_spider_classes for module in walk_modules(module_name): for obj in vars(module).values(): if inspect.isclass(obj) and \ issubclass(obj, ScrapyCommand) and \ obj.__module__ == module.__name__ and \ not obj == ScrapyCommand: yield obj
def_get_commands_from_module(module, inproject): d = {} #将module也即commands文件夹文件带入 for cmd in _iter_command_classes(module): if inproject ornot cmd.requires_project: cmdname = cmd.__module__.split('.')[-1] d[cmdname] = cmd() return d
def_get_commands_from_entry_points(inproject, group='scrapy.commands'): cmds = {} for entry_point in pkg_resources.iter_entry_points(group): obj = entry_point.load() if inspect.isclass(obj): cmds[entry_point.name] = obj() else: raise Exception("Invalid entry point %s" % entry_point.name) return cmds
{'bench': <scrapy.commands.bench.Commandobject at 0x180d597f98>, 'check': <scrapy.commands.check.Commandobject at 0x180d5975c0>, 'crawl': <scrapy.commands.crawl.Commandobject at 0x180d57ef98>, 'edit': <scrapy.commands.edit.Commandobject at 0x180d57ee48>, 'fetch': <scrapy.commands.fetch.Commandobject at 0x180d57eb70>, 'genspider': <scrapy.commands.genspider.Commandobject at 0x180d57eb38>, 'list': <scrapy.commands.list.Commandobject at 0x180d57edd8>, 'parse': <scrapy.commands.parse.Commandobject at 0x180d5b1080>, 'runspider': <scrapy.commands.runspider.Commandobject at 0x180d5832e8>, 'settings': <scrapy.commands.settings.Commandobject at 0x180d5833c8>, 'shell': <scrapy.commands.shell.Commandobject at 0x180d5b54a8>, 'startproject': <scrapy.commands.startproject.Commandobject at 0x180d5b5438>, 'version': <scrapy.commands.version.Commandobject at 0x180d5b5470>, 'view': <scrapy.commands.view.Commandobject at 0x180d5b55c0>}