aboutsummaryrefslogtreecommitdiff
path: root/controllers/archive_page.php
diff options
context:
space:
mode:
authorGeorgi Nikolov <ggeorgi60@gmail.com>2025-01-25 11:13:54 +0200
committerGeorgi Nikolov <ggeorgi60@gmail.com>2025-01-25 11:13:54 +0200
commit61d6cd57804dc059e2718cae19a97dc40bc83756 (patch)
treea130b3c41dd974306c9005cd033552576c678006 /controllers/archive_page.php
parentd4aa1cf1a9a4051a8b962f1321e901a8d6a6589b (diff)
downloadnowayforward_human-61d6cd57804dc059e2718cae19a97dc40bc83756.tar
nowayforward_human-61d6cd57804dc059e2718cae19a97dc40bc83756.tar.gz
nowayforward_human-61d6cd57804dc059e2718cae19a97dc40bc83756.zip
Unified the coding styles
Diffstat (limited to 'controllers/archive_page.php')
-rw-r--r--controllers/archive_page.php46
1 files changed, 25 insertions, 21 deletions
diff --git a/controllers/archive_page.php b/controllers/archive_page.php
index 7c5a704..ef0f50d 100644
--- a/controllers/archive_page.php
+++ b/controllers/archive_page.php
@@ -16,26 +16,31 @@ class DownloadPage {
function __construct($page_url, $folder_location) {
$this->folder_location = $folder_location;
$this->page_url = $page_url;
- list($website_exists, $this->page_url) = $this->does_website_exist($this->page_url);
+ list($website_exists, $this->page_url) = $this->doesWebsiteExist($this->page_url);
+ // TODO: Make a clause for whether the same site was already archived at least once
+ // This should happen with a request to the database
+ // If such site exists then when downloading the resources check whether some of the resources already exist in the
+ // old archive
+ // If they do dont download them (or rather delete and make the pointers point to the correct archive folder
if ($website_exists) {
$this->folder_name = Database\Webpage::create($folder_location, $page_url, 1);
- $this->page_contents = $this->download_file($this->page_url);
- $this->create_archive();
+ $this->page_contents = $this->downloadFile($this->page_url);
+ $this->createArchive();
} else {
echo "Website does not exist";
}
}
- function set_folder_location($folder_location) {
+ function setFolderLocation($folder_location) : void {
$this->folder_location = $folder_location;
}
- function set_folder_name($folder_name) {
+ function setFolderName($folder_name) : void {
$this->folder_name = $folder_name;
}
- function set_page_url($page_url) {
+ function setPageUrl($page_url) : void {
$this->page_url = $page_url;
}
- function apply_correct_protocol($url, $protocol) {
+ function applyCorrectProtocol($url, $protocol) : void {
if (str_contains($url, $protocol)) {
return $url;
}
@@ -43,7 +48,7 @@ class DownloadPage {
return $protocol . $url;
}
- function download_file($url) {
+ function downloadFile($url) : string {
$curl_func = curl_init($url);
curl_setopt($curl_func, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl_func, CURLOPT_FOLLOWLOCATION, true);
@@ -52,9 +57,9 @@ class DownloadPage {
return $page_contents;
}
- function does_website_exist($url) {
+ function doesWebsiteExist($url) : array(bool, string) {
// Check if the site exists with https
- $https_url = $this->apply_correct_protocol($url, "https://");
+ $https_url = $this->applyCorrectProtocol($url, "https://");
if ($https_url != $url) {
$url_headers = @get_headers($https_url);
if ($url_headers && $url_headers[0] != 'HTTP/1.1 404 Not Found') {
@@ -63,7 +68,7 @@ class DownloadPage {
}
// Check if the site exists with http
- $http_url = $this->apply_correct_protocol($url, "http://");
+ $http_url = $this->applyCorrectProtocol($url, "http://");
if ($http_url != $url) {
$url_headers = @get_headers($http_url);
if ($url_headers && $url_headers[0] != 'HTTP/1.1 404 Not Found') {
@@ -81,7 +86,7 @@ class DownloadPage {
return array(false, $url);
}
- function resolveUrl($relativeUrl, $baseUrl) {
+ function resolveUrl($relativeUrl, $baseUrl) : string {
// If the url is already absolute return it
if (parse_url($relativeUrl, PHP_URL_SCHEME)) {
return $relativeUrl;
@@ -90,27 +95,26 @@ class DownloadPage {
return rtrim($baseUrl, '/') . '/' . ltrim($relativeUrl, '/');
}
- function download_source(&$dom, $folder_path, $tagName, $attribute) {
+ function downloadSource(&$dom, $folder_path, $tagName, $attribute) : void {
$links = $dom->getElementsByTagName($tagName);
foreach($links as $link) {
$source = $link->getAttribute($attribute);
if ($source) {
$sourceUrl = $this->resolveUrl($source, $this->page_url);
- if ($this->is_resource_accessible($sourceUrl)) {
- $sourceContent = $this->download_file($sourceUrl);
+ if ($this->isResourceAccessible($sourceUrl)) {
+ $sourceContent = $this->downloadFile($sourceUrl);
if ($sourceContent) {
$link->setAttribute($attribute, './' . basename($source));
$file = fopen($folder_path . '/' . basename($source), "w");
fwrite($file, $sourceContent);
fclose($file);
- //$zip->addFromString(basename($source), $sourceContent);
}
}
}
}
}
- function is_resource_accessible($url) {
+ function isResourceAccessible($url) : bool {
$curl_func = curl_init($url);
curl_setopt($curl_func, CURLOPT_NOBODY, true); // Gives only the headers
curl_setopt($curl_func, CURLOPT_RETURNTRANSFER, true);
@@ -121,7 +125,7 @@ class DownloadPage {
return ($code >= 200 && $code < 400);
}
- function create_archive() {
+ function createArchive() : void {
// Creates the folder with the correct resources and the main html page in a index.html tag
$dom = new DOMDocument();
@$dom->loadHTML($this->page_contents); // This suppresses warnings for invalid HTML
@@ -131,9 +135,9 @@ class DownloadPage {
mkdir($folder_path, 0777, true);
}
- $this->download_source($dom, $folder_path, 'link', 'href');
- $this->download_source($dom, $folder_path, 'script', 'src');
- $this->download_source($dom, $folder_path, 'img', 'src');
+ $this->downloadSource($dom, $folder_path, 'link', 'href');
+ $this->downloadSource($dom, $folder_path, 'script', 'src');
+ $this->downloadSource($dom, $folder_path, 'img', 'src');
$this->page_contents = $dom->saveHTML();
$indexFile = fopen($folder_path . '/index.html', "w");