-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMyCurl.cpp
127 lines (110 loc) · 3.95 KB
/
MyCurl.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include <iostream>
#include <string>
#include <vector>
#include <stdio.h>
#include "MyCurl.h"
namespace MyCurl {
/**
Convert html content to string.
*/
size_t writeString(char* buf, size_t size, size_t nmemb, std::wstring* html) {
for (int c = 0; c < size * nmemb; c++) {
html->push_back(buf[c]);
}
return size * nmemb; // tell curl how many bytes we handled
}
std::string urlToString(std::string url, bool verbose) {
std::wstring html; // wstring can work with UTF8 encoding
CURL* curl;
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L);
curl_easy_setopt(curl, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0");
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &writeString);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &html);
if (verbose)
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); // tell curl to output its progress
curl_easy_perform(curl);
curl_easy_cleanup(curl);
curl_global_cleanup();
return std::string(html.begin(), html.end());
}
/**
Convert html content to file.
*/
size_t writeFile(void* ptr, size_t size, size_t nmemb, FILE* stream) {
size_t written = fwrite(ptr, size, nmemb, stream);
return written; // tell curl how many bytes we handled
}
bool urlToFile(std::string url, std::string filename, bool verbose) {
CURL* curl;
FILE* file;
if (fopen_s(&file, filename.c_str(), "wb") == 0) {
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L);
curl_easy_setopt(
curl, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0");
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &writeFile);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
if (verbose)
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); // tell curl to output its progress
curl_easy_perform(curl);
curl_easy_cleanup(curl);
fclose(file);
return true;
}
return false;
}
std::string decodeHtml(std::string html) {
// TODO: add missing codes.
std::vector<std::vector<std::string>> encodings = {
{"‘", "‘"}, {"’", "’"}, {"“", "“"}, {"”", "”"}, {"&", "&"},
{"!", "!"}, {"!", "!"}, {"&", "&"}, {"&", "&"}, {"'", "'"},
{"'", "'"}, {"ō", "ō"}, {"√", "√"}, {"–", "–"}, {"√", "√"}};
for (int i = 0; i < encodings.size(); i++) {
if (html.find(encodings[i][0]) != std::string::npos) {
html =
html.replace(html.find(encodings[i][0]), encodings[i][0].length(), encodings[i][1]);
}
}
return html;
}
std::string redirectedUrl(std::string url) {
char* redirect;
CURL* curl;
CURLcode curl_res;
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L);
curl_easy_setopt(curl, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0");
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
// Perform the request, curl_res will get the return code
curl_res = curl_easy_perform(curl);
// Check for errors
if (curl_res != CURLE_OK)
std::cout << "curl_easy_perform() failed: " << curl_easy_strerror(curl_res) << std::endl;
else {
curl_res = curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &redirect);
if ((curl_res == CURLE_OK) && redirect) {
std::cout << "CURLINFO_EFFECTIVE_URL: " << redirect << std::endl;
return redirect;
}
}
curl_easy_cleanup(curl);
curl_global_cleanup();
return "";
}
} // namespace MyCurl