Source code for scylla.worker

from typing import Union

import requests
from requests_html import HTMLSession, HTMLResponse, HTML

from scylla.loggings import logger


[docs]class Worker: def __init__(self): """Initialize the worker object """ self.session = HTMLSession()
[docs] def stop(): """Clean the session """ self.session.close()
[docs] def get_html(self, url: str, render_js: bool = True) -> Union[HTML, None]: """Get html from a specific URL :param url: the URL :param render_js: [whether to render js], defaults to True :param render_js: bool, optional :return: [the HTML string] :rtype: str """ try: # TODO: load config for timeout response: HTMLResponse = self.session.get(url, timeout=30) except requests.RequestException: logger.warning('[Worker] Cannot get this url: ' + url) return None except (KeyboardInterrupt, SystemExit, InterruptedError): self.stop() return None if response.ok: if render_js: logger.debug('starting render js...') response.html.render(wait=1.5, timeout=10.0) logger.debug('end render js...') return response.html else: return None