From 5ee3df4d830ead2f19d0150619a06ce5ac841b7b Mon Sep 17 00:00:00 2001 From: Georgi Nikolov Date: Sun, 26 Jan 2025 14:59:16 +0200 Subject: Fixed the inside pages' resources not being downloaded --- controllers/archive.php | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/controllers/archive.php b/controllers/archive.php index a8d8598..e730719 100644 --- a/controllers/archive.php +++ b/controllers/archive.php @@ -73,7 +73,7 @@ class DownloadPage { function getCorrectLinkPattern($page_url) : string { // NOTE: Offset by 2 because of the '//' of the protocol - $page_url = substr($page_url, strpos($page_url, "//") + 2, strlen($page_url)); + $page_url = substr($page_url, strpos($page_url, "//"), strlen($page_url)); return $page_url; } @@ -139,7 +139,13 @@ class DownloadPage { return $relativeUrl; } // Otherwise resolve it agains the base url - return rtrim($baseUrl, '/') . '/' . ltrim($relativeUrl, '/'); + // Get only the domain with the protocol + $pattern = '/((^.*\/\/|.{0,0})[a-z0-9A-Z\.]+)(\/\w+|$)/'; + $actualUrl = $baseUrl; + if (preg_match($pattern, $baseUrl, $matches)) { + $actualUrl = $matches[1]; + } + return rtrim($actualUrl, '/') . '/' . ltrim($relativeUrl, '/'); } function handleCssUrls(&$content) : void { -- cgit v1.2.3