diff options
| author | Georgi Nikolov <ggeorgi60@gmail.com> | 2025-01-26 21:24:48 +0200 |
|---|---|---|
| committer | Georgi Nikolov <ggeorgi60@gmail.com> | 2025-01-26 21:24:48 +0200 |
| commit | a2592eca1b76f1cc607e0e449bc635a16f0b007f (patch) | |
| tree | 7b956482d53319c213677053df43cde15b9045e4 | |
| parent | 9121554ce064629dc7aef74434b65ee10756a8a7 (diff) | |
| download | nowayforward_human-a2592eca1b76f1cc607e0e449bc635a16f0b007f.tar nowayforward_human-a2592eca1b76f1cc607e0e449bc635a16f0b007f.tar.gz nowayforward_human-a2592eca1b76f1cc607e0e449bc635a16f0b007f.zip | |
Made the urls be more universal
| -rw-r--r-- | controllers/archive.php | 40 | ||||
| -rw-r--r-- | views/archive/index.php | 4 |
2 files changed, 36 insertions, 8 deletions
diff --git a/controllers/archive.php b/controllers/archive.php index c0dee10..dbce6c3 100644 --- a/controllers/archive.php +++ b/controllers/archive.php @@ -29,9 +29,10 @@ class DownloadPage { function __construct($page_url, $folder_location, $requester_uid) { $this->folder_location = $folder_location; $this->page_url = $page_url; + $this->normalizeUrl($this->page_url); list($website_exists, $this->page_url) = $this->doesWebsiteExist($this->page_url); // Search for all the regexes that fit the *url* pattern where the pattern is the requested url but without the protocol - $page_url_pattern = $this->getCorrectLinkPattern($page_url); + $page_url_pattern = $this->getCorrectLinkPattern($this->page_url); $simular_pages = Database\Webpage::getArchivePathsByPattern('%' . $page_url_pattern . '%'); if ($website_exists) { $this->folder_name = Database\Webpage::getPagesCount() + 1; @@ -42,12 +43,24 @@ class DownloadPage { // Fallback and try to download them from the server directly $this->tryDownloadFavicon(); } - Database\Webpage::create($folder_location, $page_url, $requester_uid, $this->favicon_path, $this->page_title); + Database\Webpage::create($folder_location, $this->page_url, $requester_uid, $this->favicon_path, $this->page_title); } else { echo "Website does not exist"; } } + function normalizeUrl(string &$url) : void { + $count_slashes = substr_count($url, "/"); + if (str_ends_with($url, "/index.html")) { + $url = substr($url, 0, strlen($url) - strlen("/index.html")); + } + elseif (str_ends_with($url, "/index")) { + $url = substr($url, 0, strlen($url) - strlen("/index")); + } + elseif (str_ends_with($url, "/")) { + $url = substr($url, 0, -1); + } + } private function debugPrintToConsole($data) : void{ $output = $data; @@ -474,13 +487,26 @@ function applyCorrectProtocol($url, $protocol) : string { return $protocol . $url; } -function doesWebsiteExist($url) : bool { +function normalizeUrl(string &$url) : void { + $count_slashes = substr_count($url, "/"); + if (str_ends_with($url, "/index.html")) { + $url = substr($url, 0, strlen($url) - strlen("/index.html")); + } + elseif (str_ends_with($url, "/index")) { + $url = substr($url, 0, strlen($url) - strlen("/index")); + } + elseif (str_ends_with($url, "/")) { + $url = substr($url, 0, -1); + } +} + +function doesWebsiteExist($url) : array { // Check if the site exists with https $https_url = applyCorrectProtocol($url, "https://"); if ($https_url != $url) { $url_headers = @get_headers($https_url); if ($url_headers && $url_headers[0] != 'HTTP/1.1 404 Not Found') { - return true; + return array(true, $https_url); } } @@ -489,7 +515,7 @@ function doesWebsiteExist($url) : bool { if ($http_url != $url) { $url_headers = @get_headers($http_url); if ($url_headers && $url_headers[0] != 'HTTP/1.1 404 Not Found') { - return true; + return array(true, $http_url); } } @@ -497,8 +523,8 @@ function doesWebsiteExist($url) : bool { // Will take effect when the user has entered the https/http protocol with the site $url_headers = @get_headers($url); if ($url_headers && $url_headers[0] != 'HTTP/1.1 404 Not Found') { - return true; + return array(true, $url); } - return false; + return array(false, $url); } diff --git a/views/archive/index.php b/views/archive/index.php index 500238a..ac6ada6 100644 --- a/views/archive/index.php +++ b/views/archive/index.php @@ -3,11 +3,13 @@ $page = null; try { + list($exists, $url) = Controller\doesWebsiteExist($url); + Controller\normalizeUrl($url); + $page = Database\Webpage::fromDB($url); $page->incrementVisits(); } catch(Exception $e) { - $exists = Controller\doesWebsiteExist($url); } ?> |
