Source code for amazonorders.util

__copyright__ = "Copyright (c) 2024-2025 Alex Laird"
__license__ = "MIT"

import importlib
import logging
import re
from typing import List, Union, Optional, Callable, Any

from bs4 import Tag, BeautifulSoup
from requests import Response

from amazonorders.selectors import Selector

logger = logging.getLogger(__name__)


[docs] class AmazonSessionResponse: """ A wrapper for the :class:`requests.Response` object, which also contains the parsed HTML. """ def __init__(self, response: Response, bs4_parser: str) -> None: #: The request's response object. self.response: Response = response #: The parsed HTML from the response. self.parsed: Tag = BeautifulSoup(self.response.text, bs4_parser)
def _selector_text_matches(tag: Tag, selector: Selector) -> bool: if selector.text is not None: return tag.text.strip() == selector.text if selector.text_contains is not None: return selector.text_contains.lower() in tag.text.lower() return False
[docs] def select(parsed: Tag, selector: Union[List[Union[str, Selector]], Union[str, Selector]]) -> List[Tag]: """ This is a helper function that extends BeautifulSoup's `select() <https://www.crummy.com/software/ BeautifulSoup/bs4/doc/#css-selectors-through-the-css-property>`_ method to allow for multiple selectors. The ``selector`` can be either a ``str`` or a ``list``. If a ``list`` is given, each selector in the list will be tried until one is found to return a populated list of ``Tag``'s, and that value will be returned. :param parsed: The ``Tag`` from which to attempt selection. :param selector: The CSS selector(s) for the field. :return: The selected tag. """ if isinstance(selector, str) or isinstance(selector, Selector): selector = [selector] for s in selector: tag: list = [] if isinstance(s, Selector): for t in parsed.select(s.css_selector): if t and _selector_text_matches(t, s): tag += t elif isinstance(s, str): tag = parsed.select(s) else: raise TypeError(f"Invalid selector type: {type(s)}") if tag: return tag return []
[docs] def select_one(parsed: Tag, selector: Union[List[Union[str, Selector]], Union[str, Selector]]) -> Optional[Tag]: """ This is a helper function that extends BeautifulSoup's `select_one() <https://www.crummy.com/software/ BeautifulSoup/bs4/doc/#css-selectors-through-the-css-property>`_ method to allow for multiple selectors. The ``selector`` can be either a ``str`` or a ``list``. If a ``list`` is given, each selector in the list will be tried until one is found to return a populated ``Tag``, and that value will be returned. :param parsed: The ``Tag`` from which to attempt selection. :param selector: The CSS selector(s) for the field. :return: The selection tag. """ if isinstance(selector, str) or isinstance(selector, Selector): selector = [selector] for s in selector: tag: Optional[Tag] = None if isinstance(s, Selector): t = parsed.select_one(s.css_selector) if t and _selector_text_matches(t, s): tag = t elif isinstance(s, str): tag = parsed.select_one(s) else: raise TypeError(f"Invalid selector type: {type(s)}") if tag: return tag return None
[docs] def to_type(value: str) -> Union[int, float, bool, str, None]: """ Attempt to convert ``value`` to its primitive type of ``int``, ``float``, or ``bool``. If ``value`` is an empty string, ``None`` will be returned. :param value: The value to convert. :return: The converted value. """ if not value or value == "": return None rv: Union[int, float, bool, str] = value try: rv = int(rv) except ValueError: try: rv = float(rv) except ValueError: pass if isinstance(rv, str): if rv.lower() == "true": rv = True elif rv.lower() == "false": rv = False return rv
[docs] def load_class(package: List[str], clazz: str) -> Union[Callable, Any]: """ Import the given class from the given package, and return it. :param package: The package. :param clazz: The class to import. :return: The return class. """ constants_mod = importlib.import_module(".".join(package)) return getattr(constants_mod, clazz)
[docs] def cleanup_html_text(text: str) -> str: """ Cleanup excessive whitespace within text that comes from an HTML block. :param text: The text to clean up. :return: The cleaned up text. """ # First get rid of leading and trailing whitespace text = text.strip() # Reduce duplicated line returns, then replace line returns with periods text = re.sub(r"\n\s*\n+", "\n", text) text = text.replace("\n", ". ") # Remove remaining duplicated whitespace of any kind text = re.sub(r"\s\s+", " ", text) # Remove duplicate periods at end of text. text = re.sub("\\.+($|\\s)", r".\1", text) if not text.endswith("."): text += "." return text