Source code for scylla.providers.base_provider

from requests_html import HTML

from ..database import ProxyIP


[docs]class BaseProvider(object): """BaseProvider is the abstract class for the proxy providers :raises NotImplementedError: [if urls() or parse() is not implemented] """ _sleep = 0 def __init__(self): pass def __str__(self): return self.__class__.__name__
[docs] def sleep_seconds(self) -> int: """Return a sleep time for each request, by default it is 0 :return: sleep time in seconds """ return self._sleep
[docs] def urls(self) -> [str]: """Return a list of url strings for crawling :return: [a list of url strings] :rtype: [str] """ raise NotImplementedError
[docs] def parse(self, html: HTML) -> [ProxyIP]: """Parse the document in order to get a list of proxies :param html: the HTML object from requests-html :return: a list of proxy ips """ raise NotImplementedError
[docs] @staticmethod def should_render_js() -> bool: """Whether needs js rendering By default, it is False. :return: a boolean value indicating whether or not js rendering is needed :rtype: bool """ return False