1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
<?php
function on_post() {
$WEBSITE_CATEGORY = 'page_url';
$DOWNLOADS_FOLDER = getenv('ARCHIVES_DIR');
$website_url = $_POST[$WEBSITE_CATEGORY];
$currentPage = new DownloadPage($website_url, $DOWNLOADS_FOLDER);
}
class DownloadPage {
private $zip_location;
private $zip_name;
private $page_url;
private $page_contents;
function __construct($page_url, $zip_location) {
$this->zip_location = $zip_location;
$this->page_url = $page_url;
list($website_exists, $this->page_url) = $this->does_website_exist($this->page_url);
if ($website_exists) {
$this->zip_name = Database\Webpage::create($zip_location, $page_url, 1) . '.zip';
$this->page_contents = file_get_contents($this->page_url);
$zip = $this->create_zip_archive();
} else {
echo "Website does not exist";
}
}
function set_zip_location($zip_location) {
$this->zip_location = $zip_location;
}
function set_zip_name($zip_name) {
$this->zip_name = $zip_name;
}
function set_page_url($page_url) {
$this->page_url = $page_url;
}
function apply_correct_protocol($url, $protocol) {
if (str_contains($url, $protocol)) {
return $url;
}
return $protocol . $url;
}
function does_website_exist($url) {
// Check if the site exists with https
$https_url = $this->apply_correct_protocol($url, "https://");
if ($https_url != $url) {
$url_headers = @get_headers($https_url);
if ($url_headers && $url_headers[0] != 'HTTP/1.1 404 Not Found') {
return array(true, $https_url);
}
}
// Check if the site exists with http
$http_url = $this->apply_correct_protocol($url, "http://");
if ($http_url != $url) {
$url_headers = @get_headers($http_url);
if ($url_headers && $url_headers[0] != 'HTTP/1.1 404 Not Found') {
return array(true, $http_url);
}
}
// Check if the site exists as is
// Will take effect when the user has entered the https/http protocol with the site
$url_headers = @get_headers($url);
if ($url_headers && $url_headers[0] != 'HTTP/1.1 404 Not Found') {
return array(true, $url);
}
return array(false, $url);
}
function create_zip_archive() {
// Creates and returns a zip object resulted from zipping the page that was downloaded
$zip = new ZipArchive();
if ($zip->open($this->zip_location . '/' . $this->zip_name, ZipArchive::CREATE) === TRUE) {
$zip->addFromString('index.html', $this->page_contents);
$zip->close();
echo "Archived {$this->page_url}";
} else {
echo "Zip archive could not be open";
}
return $zip;
}
}
|