scrapy-plugins · Gallaecio · Apr 5, 2019
diff --git a/scrapy_splash/middleware.py b/scrapy_splash/middleware.py
@@ -4,6 +4,7 @@
 import copy
 import json
 import logging
+import re
 import warnings
 from collections import defaultdict
 
@@ -233,6 +234,11 @@ def __init__(self, crawler, splash_base_url, slot_policy, log_400):
     def from_crawler(cls, crawler):
         splash_base_url = crawler.settings.get('SPLASH_URL',
                                                cls.default_splash_url)
+        if not re.match('^https?://', splash_base_url):
+            raise NotConfigured(
+                'The SPLASH_URL setting does not start with http:// or '
+                'https://: {}'.format(splash_base_url)
+            )
         log_400 = crawler.settings.getbool('SPLASH_LOG_400', True)
         slot_policy = crawler.settings.get('SPLASH_SLOT_POLICY',
                                            cls.default_policy)

diff --git a/tests/test_middleware.py b/tests/test_middleware.py
@@ -4,8 +4,10 @@
 import json
 import base64
 
+from pytest import raises
 import scrapy
 from scrapy.core.engine import ExecutionEngine
+from scrapy.exceptions import NotConfigured
 from scrapy.utils.test import get_crawler
 from scrapy.http import Response, TextResponse
 from scrapy.downloadermiddlewares.httpcache import HttpCacheMiddleware
@@ -765,3 +767,15 @@ def test_adjust_timeout():
     })
     req2 = mw.process_request(req2, None)
     assert req2.meta['download_timeout'] == 30
+
+
+def test_bad_splash_url():
+    crawler = _get_crawler({'SPLASH_URL': 'localhost:1234'})
+    with raises(NotConfigured):
+        mw = SplashMiddleware.from_crawler(crawler)
+
+
+def test_bad_slot_policy():
+    crawler = _get_crawler({'SPLASH_SLOT_POLICY': 'asdf'})
+    with raises(NotConfigured):
+        mw = SplashMiddleware.from_crawler(crawler)