From cdb9c3ecd5014f698959a8df2b2732df427923ae Mon Sep 17 00:00:00 2001 From: "Skylar \"The Cobra\" Widulski" Date: Mon, 21 Aug 2023 15:21:43 -0400 Subject: [PATCH] Fix infinite redirect issue Signed-off-by: Skylar "The Cobra" Widulski --- main.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index 2ed1936..00ed5ad 100644 --- a/main.py +++ b/main.py @@ -5,19 +5,17 @@ import requests import html import re from bs4 import BeautifulSoup - +from urllib.parse import quote, unquote def scrape(url): data = requests.get(url) our_path = re.sub(r".*://.*/", "/", request.url) path = re.sub(r".*://.*/", "/", data.url) - print() - print(our_path) - print(path) - print() - if our_path != path: - return f"REDIRECT {path}" + if our_path != path and \ + quote(unquote(re.sub("[?&=]", "", our_path))) != re.sub("[?&=]", "", path): + # this is bad ^ + return f"REDIRECT {path}" ret = [] soup = BeautifulSoup(data.text, "html.parser") for div in soup.find_all("div"):