Skip to content

Extract url data using regex

python
  @staticmethod
    def extract_url_data(url: str) -> dict:
        if not url:
            return {}

        regex = re.compile(
            r"^((?P<prefix>https?\:[\/]{0,2}|[\\\/]{0,2})(?P<domain>\w+[\.\-\_\w]*)(?P<port>(\:{1}\d+)?)(?P<suffix>.*))$"
        )
        results = list(regex.finditer(url))

        if not results or len(results) > 1:
            return {}

        return results[0].groupdict()

    @classmethod
    def extract_api_url(cls, url: str) -> str | None:
        url_data_dict = cls.extract_url_data(url)
        domain = url_data_dict.get("domain")
        port = url_data_dict.get("port")

        if not domain:
            return None

        return "".join([domain, port])