diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c7293e..739da1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,9 +48,12 @@ if (WIN32) check_symbol_exists(strerror_s "string.h" HAVE_STRERROR_S) check_symbol_exists(_wcserror_s "string.h" HAVE__WCSERROR_S) check_symbol_exists(printf_s "stdio.h" HAVE_PRINTF_S) + check_symbol_exists(fprintf_s "stdio.h" HAVE_FPRINTF_S) check_symbol_exists(sscanf_s "stdio.h" HAVE_SSCANF_S) check_symbol_exists(_stricmp "string.h" HAVE__STRICMP) check_symbol_exists(_strnicmp "string.h" HAVE__STRNICMP) + check_symbol_exists(_mkgmtime "time.h" HAVE__MKGMTIME) + check_symbol_exists(_get_timezone "time.h" HAVE__GET_TIMEZONE) else() check_symbol_exists(fseeko "stdio.h" HAVE_FSEEKO) check_symbol_exists(fseeko64 "stdio.h" HAVE_FSEEKO64) @@ -60,12 +63,24 @@ endif() check_symbol_exists(strcasecmp "string.h" HAVE_STRCASECMP) check_symbol_exists(strncasecmp "string.h" HAVE_STRNCASECMP) check_symbol_exists(strerror_r "string.h" HAVE_STRERROR_R) +check_symbol_exists(strptime "time.h" HAVE_STRPTIME) +if (NOT HAVE_STRPTIME) + set(TMP "${CMAKE_REQUIRED_DEFINITIONS}") + set(CMAKE_REQUIRED_DEFINITIONS -D_XOPEN_SOURCE) + check_symbol_exists(strptime "time.h" HAVE_STRPTIME1) + if (HAVE_STRPTIME1) + add_compile_definitions(_XOPEN_SOURCE) + set(HAVE_STRPTIME 1) + endif() + set(CMAKE_REQUIRED_DEFINITIONS "${TMP}") +endif() set(HAVE_GNU_SOURCE OFF) if (NOT WIN32) set(TMP "${CMAKE_REQUIRED_DEFINITIONS}") set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) check_symbol_exists(fcloseall "stdio.h" HAVE_FCLOSEALL) - if (HAVE_FCLOSEALL) + check_symbol_exists(timegm "time.h" HAVE_TIMEGM) + if (HAVE_FCLOSEALL OR HAVE_TIMEGM) add_compile_definitions(_GNU_SOURCE) set(HAVE_GNU_SOURCE ON) endif() @@ -74,6 +89,10 @@ endif() if (HAVE_STRERROR_R) test_strerror_r(HAVE_GNU_STRERROR_R ${HAVE_GNU_SOURCE}) endif() +if (NOT MSVC) + check_symbol_exists(timezone "time.h" HAVE_TIMEZONE) + check_symbol_exists(tzset "time.h" HAVE_TZSET) +endif() configure_file("${CMAKE_CURRENT_SOURCE_DIR}/utils_config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/utils_config.h") if ("${CMAKE_C_COMPILER_ID}" STREQUAL GNU) @@ -118,6 +137,10 @@ set(SOURCE_FILE_HEADERS http_client.h ) +if (NOT HAVE_STRPTIME) + list(APPEND SOURCE_FILE strptime/strptime.c strptime/strptime.h) +endif() + add_library(utils STATIC ${SOURCE_FILE} ${SOURCE_FILE_HEADERS}) target_compile_definitions(utils PRIVATE HAVE_UTILS_CONFIG_H) if (Iconv_FOUND) diff --git a/http_client.cpp b/http_client.cpp index 3f30581..0260794 100644 --- a/http_client.cpp +++ b/http_client.cpp @@ -3,15 +3,26 @@ #ifndef _WIN32 #include #endif +#include +#if _WIN32 +#include +#endif +#include "cstr_util.h" #include "err.h" +#include "fileop.h" +#include "file_reader.h" #include "str_util.h" #include "urlparse.h" +#include "time_util.h" +#include #include #include #include "inttypes.h" +#include + #if _WIN32 static bool inited = false; static WSADATA wsaData = { 0 }; @@ -29,6 +40,30 @@ static bool ssl_inited = false; #define sscanf sscanf_s #endif +#if HAVE_FPRINTF_S +#define fprintf fprintf_s +#endif + +#ifndef _O_BINARY +#if _WIN32 +#define _O_BINARY 0x8000 +#else +#define _O_BINARY 0 +#endif +#endif + +#ifndef _SH_DENYWR +#define _SH_DENYWR 0x20 +#endif + +#ifndef _S_IWRITE +#define _S_IWRITE 0x80 +#endif + +#ifndef _S_IREAD +#define _S_IREAD 0x100 +#endif + AIException::AIException(int code) { this->code = code; } @@ -294,6 +329,15 @@ Request::Request(std::string host, std::string port, bool https, std::string pat } Response Request::send() { + if (!this->options.use_custom_cookie) { + std::string cookie; + if (this->cookies) { + cookie = this->cookies->getCookieHeader(this->host, this->path, this->https); + } + if (!cookie.empty()) { + this->headers["Cookie"] = cookie; + } + } std::string data; data += this->method + " " + this->path + " HTTP/1.1\r\n"; auto hasBody = this->body && !this->body->isFinished(); @@ -325,7 +369,7 @@ Response Request::send() { } socket.send("\r\n"); } - return Response(socket); + return Response(socket, *this); } void Request::setBody(HttpBody* body) { @@ -369,7 +413,9 @@ HttpClient::HttpClient(std::string host) { } Request HttpClient::request(std::string path, std::string method) { - return Request(this->host, this->port, this->https, path, method, this->headers, this->options); + Request req(this->host, this->port, this->https, path, method, this->headers, this->options); + req.cookies = this->cookies; + return req; } void HttpFullBody::pull() { @@ -457,12 +503,12 @@ Request::~Request() { } } -Response::Response(Socket socket): socket(socket) { +Response::Response(Socket socket, Request& req): socket(socket) { #if HAVE_ZLIB memset(&this->zstream, 0, sizeof(z_stream)); #endif parseStatus(); - parseHeader(); + parseHeader(req); } bool Response::pullData() { @@ -490,8 +536,7 @@ void Response::parseStatus() { this->reason = parts[2]; } -void Response::parseHeader() { - if (!this->code) parseStatus(); +void Response::parseHeader(Request& req) { if (this->headerParsed) return; this->headerParsed = true; auto line = this->readLine(); @@ -537,6 +582,10 @@ void Response::parseHeader() { #else throw std::runtime_error("Unspported content-encoding"); #endif + } else if (!cstr_stricmp(kv[0].c_str(), "set-cookie")) { + if (req.cookies) { + req.cookies->handleSetCookie(req, kv[1]); + } } line = this->readLine(); } @@ -560,8 +609,6 @@ std::string Response::readLine() { } std::string Response::read() { - if (!this->code) this->parseStatus(); - if (!this->headerParsed) this->parseHeader(); if (this->chunked) { std::string data; size_t size = -1; @@ -702,3 +749,282 @@ HttpBody* Request::getBody() { bool Response::isEof() { return this->eof; } + +Cookie::Cookie(std::string name, std::string value, std::string domain, std::string path, bool secure, bool httpOnly, int64_t expires) { + this->name = name; + this->value = value; + this->domain = domain; + this->path = path; + this->secure = secure; + this->httpOnly = httpOnly; + this->expires = expires; +} + +std::string Cookies::getCookieHeader(std::string host, std::string path, bool https) { + std::string re; + if (this->cookies.find(host) != this->cookies.end()) { + for (auto& cookie : this->cookies[host]) { + if (cookie.expires != 0 && cookie.expires < time(nullptr)) { + continue; + } + if (cookie.secure && !https) { + continue; + } + if (!cookie.path.empty() && path.find(cookie.path) != 0) { + continue; + } + if (!re.empty()) { + re += "; "; + } + re += cookie.name + "=" + cookie.value; + } + } + std::string host2 = "." + host; + if (this->cookies.find(host2) != this->cookies.end()) { + for (auto& cookie : this->cookies[host2]) { + if (cookie.expires != 0 && cookie.expires < time(nullptr)) { + continue; + } + if (cookie.secure && !https) { + continue; + } + if (!cookie.path.empty() && path.find(cookie.path) != 0) { + continue; + } + if (!re.empty()) { + re += "; "; + } + re += cookie.name + "=" + cookie.value; + } + } + auto pos = host.find("."); + if (pos != std::string::npos) { + host = host.substr(pos); + if (this->cookies.find(host) != this->cookies.end()) { + for (auto& cookie : this->cookies[host]) { + if (cookie.expires != 0 && cookie.expires < time(nullptr)) { + continue; + } + if (cookie.secure && !https) { + continue; + } + if (!cookie.path.empty() && path.find(cookie.path) != 0) { + continue; + } + if (!re.empty()) { + re += "; "; + } + re += cookie.name + "=" + cookie.value; + } + } + } + return re; +} + +void Cookies::handleSetCookie(Request& req, std::string set_cookie) { + auto list = str_util::str_split(set_cookie, ";"); + std::string name, value, domain, path; + bool secure = false, httpOnly = false; + int64_t expires = 0; + bool first = true; + for (auto& item: list) { + auto it = str_util::str_trim(item); + auto kv = str_util::str_splitv(it, "=", 2); + if (first) { + if (kv.size() < 2) throw std::runtime_error("Invalid Set-Cookie: No value"); + name = kv[0]; + value = kv[1]; + first = false; + } + if (kv.size() < 2) { + if (!cstr_stricmp(it.c_str(), "HttpOnly")) { + httpOnly = true; + } else if (!cstr_stricmp(it.c_str(), "Secure")) { + secure = true; + } + } else { + if (!cstr_stricmp(kv[0].c_str(), "Domain")) { + domain = kv[1]; + } else if (!cstr_stricmp(kv[0].c_str(), "Path")) { + path = kv[1]; + } else if (!cstr_stricmp(kv[0].c_str(), "Expires")) { + struct tm tm; + if (!time_util::strptime(kv[1].c_str(), "%a, %d %b %Y %H:%M:%S GMT", &tm)) { + throw std::runtime_error("Invalid Set-Cookie: Invalid Expires"); + } + expires = time_util::timegm(&tm); + } else if (!cstr_stricmp(kv[0].c_str(), "Max-Age")) { + int64_t max_age; + if (sscanf(kv[1].c_str(), "%" SCNd64, &max_age) != 1) { + throw std::runtime_error("Invalid Set-Cookie: Invalid Max-Age"); + } + expires = time(nullptr) + max_age; + } + } + } + if (domain.empty()) { + domain = req.host; + } + if (path.empty()) { + path = req.path; + } + if (this->cookies.find(domain) == this->cookies.end()) { + this->cookies[domain] = std::list(); + this->cookies[domain].push_back(Cookie(name, value, domain, path, secure, httpOnly, expires)); + } else { + bool found = false; + for (auto it = this->cookies[domain].begin(); it != this->cookies[domain].end(); it++) { + if (it->name == name && it->path == path) { + it->value = value; + it->domain = domain; + it->secure = secure; + it->httpOnly = httpOnly; + it->expires = expires; + found = true; + break; + } + } + if (!found) { + this->cookies[domain].push_back(Cookie(name, value, domain, path, secure, httpOnly, expires)); + } + } +} + +NetscapeCookies::NetscapeCookies() {} + +NetscapeCookies::NetscapeCookies(std::string path) { + this->path = path; + this->load(); +} + +NetscapeCookies::~NetscapeCookies() { + if (this->save_when_disposed) this->save(); +} + +bool NetscapeCookies::load() { + if (this->path.empty()) return true; + if (!fileop::exists(this->path)) return true; + int fd; + int err = fileop::open(this->path, fd, O_RDONLY | _O_BINARY, _SH_DENYWR, _S_IWRITE | _S_IREAD); + if (err < 0) { + return false; + } + FILE* f = fileop::fdopen(fd, "rb"); + if (!f) { + fileop::close(fd); + return false; + } + auto reader = create_file_reader(f, 0); + char* line = nullptr; + size_t line_size = 0; + std::string l; + bool re = true; + if (file_reader_read_line(reader, &line, &line_size)) { + re = false; + goto end; + } + l = std::string(line, line_size); + free(line); + line = nullptr; + while (!l.empty()) { + std::string name, value, domain, path; + bool secure = false, httpOnly = false; + int64_t expires = 0; + if (!cstr_strnicmp(l.c_str(), "#HttpOnly_", 10)) { + l = l.substr(10); + httpOnly = true; + } + if (l.find("#") == 0) { + if (file_reader_read_line(reader, &line, &line_size)) { + break; + } + l = std::string(line, line_size); + free(line); + line = nullptr; + continue; + } + auto list = str_util::str_splitv(l, "\t", 7); + if (list.size() < 7) { + re = false; + goto end; + } + domain = list[0]; + path = list[2]; + secure = !cstr_stricmp(list[3].c_str(), "TRUE"); + if (sscanf(list[4].c_str(), "%" SCNd64, &expires) != 1) { + re = false; + goto end; + } + name = list[5]; + value = list[6]; + if (this->cookies.find(domain) == this->cookies.end()) { + this->cookies[domain] = std::list(); + this->cookies[domain].push_back(Cookie(name, value, domain, path, secure, httpOnly, expires)); + } else { + this->cookies[domain].push_back(Cookie(name, value, domain, path, secure, httpOnly, expires)); + } + if (file_reader_read_line(reader, &line, &line_size)) { + break; + } + l = std::string(line, line_size); + free(line); + line = nullptr; + } +end: + if (reader) free_file_reader(reader); + if (f) fileop::fclose(f); + if (line) free(line); + return re; +} + +bool NetscapeCookies::save() { + if (this->path.empty()) return true; + int fd; + int err = fileop::open(this->path, fd, O_WRONLY | O_CREAT | _O_BINARY, 16, _S_IWRITE | _S_IREAD); + if (err < 0) { + return false; + } + FILE* f = fileop::fdopen(fd, "wb"); + if (!f) { + fileop::close(fd); + return false; + } + fprintf(f, "# Netscape HTTP Cookie File\n\ +# http://curl.haxx.se/rfc/cookie_spec.html\n"); + for (auto& domain : this->cookies) { + for (auto& cookie : domain.second) { + if (cookie.expires == 0 && cookie.expires < time(nullptr)) { + continue; + } + if (cookie.secure) { + fprintf(f, "#HttpOnly_"); + } + fprintf(f, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", cookie.domain.c_str(), cookie.domain.find(".") == 0 ? "TRUE" : "FALSE", cookie.path.c_str(), cookie.secure ? "TRUE" : "FALSE", std::to_string(cookie.expires).c_str(), cookie.name.c_str(), cookie.value.c_str()); + } + } + fileop::fclose(f); + return true; +} + +std::map parseCookie(std::string cookie) { + std::map re; + auto list = str_util::str_split(cookie, ";"); + for (auto& item : list) { + auto kv = str_util::str_splitv(item, "=", 2); + if (kv.size() >= 2) { + re[str_util::str_trim(kv[0])] = str_util::str_trim(kv[1]); + } + } + return re; +} + +std::string dumpCookie(std::map cookie) { + std::string re; + for (auto& item : cookie) { + if (!re.empty()) { + re += "; "; + } + re += item.first + "=" + item.second; + } + return re; +} diff --git a/http_client.h b/http_client.h index c2e43f5..fa2fd66 100644 --- a/http_client.h +++ b/http_client.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -33,12 +34,19 @@ struct HeaderNameCompare { } }; +typedef struct Response Response; +typedef struct Request Request; +typedef struct Cookie Cookie; + typedef std::map HeaderMap; +typedef std::map> CookieMap; std::string decodeURIComponent(std::string str); +std::string encodeURIComponent(std::string str); class HttpClientOptions { public: + bool use_custom_cookie = false; }; class HttpBody { @@ -79,6 +87,43 @@ protected: std::string body(); }; +class CookiesBase { +public: + virtual std::string getCookieHeader(std::string host, std::string path, bool https) = 0; + virtual void handleSetCookie(Request& req, std::string set_cookie) = 0; +}; + +class Cookie { +public: + Cookie(std::string name, std::string value, std::string domain, std::string path, bool secure, bool httpOnly, int64_t expires); + std::string name; + std::string value; + std::string domain; + std::string path; + bool secure; + bool httpOnly; + int64_t expires; +}; + +class Cookies: public CookiesBase { +public: + virtual std::string getCookieHeader(std::string host, std::string path, bool https); + virtual void handleSetCookie(Request& req, std::string set_cookie); +protected: + CookieMap cookies; +}; + +class NetscapeCookies: public Cookies { +public: + NetscapeCookies(); + NetscapeCookies(std::string path); + ~NetscapeCookies(); + bool load(); + bool save(); + bool save_when_disposed = true; + std::string path; +}; + class AIException: std::exception { public: AIException(int code); @@ -132,8 +177,6 @@ private: addrinfo* addr = nullptr; }; -typedef struct Response Response; - class Request { public: Request(std::string host, std::string port, bool https, std::string path, std::string method, HeaderMap headers, HttpClientOptions options); @@ -149,6 +192,7 @@ public: bool https = false; std::string path; std::string method; + CookiesBase* cookies = nullptr; private: HttpBody* body = nullptr; }; @@ -156,7 +200,7 @@ private: class Response { public: Response() = delete; - explicit Response(Socket socket); + explicit Response(Socket socket, Request& req); ~Response(); HeaderMap headers; uint16_t code = 0; @@ -166,7 +210,7 @@ public: bool isEof(); private: std::string readLine(); - void parseHeader(); + void parseHeader(Request& req); void parseStatus(); bool pullData(); bool headerParsed = false; @@ -188,10 +232,14 @@ public: Request request(std::string path, std::string method); HttpClientOptions options; HeaderMap headers; + CookiesBase* cookies = nullptr; private: std::string host; std::string port; bool https = false; }; +std::map parseCookie(std::string cookie); +std::string dumpCookie(std::map cookie); + #endif diff --git a/strptime/strptime.c b/strptime/strptime.c new file mode 100644 index 0000000..a703060 --- /dev/null +++ b/strptime/strptime.c @@ -0,0 +1,834 @@ +/* $NetBSD: strptime.c,v 1.62 2017/08/24 01:01:09 ginsbach Exp $ */ +/* http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/lib/libc/time/strptime.c?only_with_tag=HEAD + * NetBSD implementation strptime(). + * Format description: https://netbsd.gw.com/cgi-bin/man-cgi?strptime+3+NetBSD-current +*/ +/*- + * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code was contributed to The NetBSD Foundation by Klaus Klein. + * Heavily optimised by David Laight + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +//#include +//__RCSID("$NetBSD: strptime.c,v 1.62 2017/08/24 01:01:09 ginsbach Exp $"); + +#include +#include +#include +#include + +static const unsigned char *conv_num(const unsigned char *, int *, unsigned int, unsigned int); +static const unsigned char *find_string(const unsigned char *, int *, const char * const *, const char * const *, int); + +/* + * We do not implement alternate representations. However, we always + * check whether a given modifier is allowed for a certain conversion. + */ +#define ALT_E 0x01 +#define ALT_O 0x02 +#define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; } + +#define TM_YEAR_BASE 1900 + +#define TM_SUNDAY 0 +#define TM_MONDAY 1 +#define TM_TUESDAY 2 +#define TM_WEDNESDAY 3 +#define TM_THURSDAY 4 +#define TM_FRIDAY 5 +#define TM_SATURDAY 6 + +#define S_YEAR (1 << 0) +#define S_MON (1 << 1) +#define S_YDAY (1 << 2) +#define S_MDAY (1 << 3) +#define S_WDAY (1 << 4) +#define S_HOUR (1 << 5) + +#define HAVE_MDAY(s) (s & S_MDAY) +#define HAVE_MON(s) (s & S_MON) +#define HAVE_WDAY(s) (s & S_WDAY) +#define HAVE_YDAY(s) (s & S_YDAY) +#define HAVE_YEAR(s) (s & S_YEAR) +#define HAVE_HOUR(s) (s & S_HOUR) + +#define SECSPERMIN 60 +#define MINSPERHOUR 60 +#define SECSPERHOUR (SECSPERMIN * MINSPERHOUR) +#define HOURSPERDAY 24 + +#define HERE_D_T_FMT "%a %b %e %H:%M:%S %Y" +#define HERE_D_FMT "%y/%m/%d" +#define HERE_T_FMT_AMPM "%I:%M:%S %p" +#define HERE_T_FMT "%H:%M:%S" + +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + +/* +** Since everything in isleap is modulo 400 (or a factor of 400), we know that +** isleap(y) == isleap(y % 400) +** and so +** isleap(a + b) == isleap((a + b) % 400) +** or +** isleap(a + b) == isleap(a % 400 + b % 400) +** This is true even if % means modulo rather than Fortran remainder +** (which is allowed by C89 but not by C99 or later). +** We use this to avoid addition overflow problems. +*/ + +#define isleap_sum(a, b) isleap((a) % 400 + (b) % 400) + +#ifdef _MSC_VER +#define tzname _tzname +#define strncasecmp _strnicmp +#endif + +#ifdef TM_ZONE +static char* utc = "UTC"; +#endif +/* RFC-822/RFC-2822 */ +static const char* const nast[] = { + "EST", "CST", "MST", "PST", "\0\0\0" +}; +static const char* const nadt[] = { + "EDT", "CDT", "MDT", "PDT", "\0\0\0" +}; +static const char* weekday_name[] = +{ + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday" +}; +static const char* ab_weekday_name[] = +{ + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; +static const char* month_name[] = +{ + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" +}; +static const char* ab_month_name[] = +{ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; +static const char* am_pm[] = {"AM", "PM"}; + + +/* + * Table to determine the ordinal date for the start of a month. + * Ref: http://en.wikipedia.org/wiki/ISO_week_date + */ +static const int start_of_month[2][13] = { + /* non-leap year */ + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + /* leap year */ + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } +}; + +/* + * Calculate the week day of the first day of a year. Valid for + * the Gregorian calendar, which began Sept 14, 1752 in the UK + * and its colonies. Ref: + * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week + */ + +static int +first_wday_of(int yr) +{ + return ((2 * (3 - (yr / 100) % 4)) + (yr % 100) + ((yr % 100) / 4) + + (isleap(yr) ? 6 : 0) + 1) % 7; +} + +#define delim(p) ((p) == '\0' || isspace((unsigned char)(p))) + +static int +fromzone(const unsigned char **bp, struct tm *tm, int mandatory) +{ +// timezone_t tz; + char buf[512], *p; + const unsigned char *rp; + + for (p = buf, rp = *bp; !delim(*rp) && p < &buf[sizeof(buf) - 1]; rp++) + *p++ = *rp; + *p = '\0'; + + if (mandatory) + *bp = rp; + if (!isalnum((unsigned char)*buf)) + return 0; +// tz = tzalloc(buf); +// if (tz == NULL) +// return 0; + + *bp = rp; + tm->tm_isdst = 0; /* XXX */ +#ifdef TM_GMTOFF + tm->TM_GMTOFF = tzgetgmtoff(tz, tm->tm_isdst); +#endif +#ifdef TM_ZONE + // Can't use tzgetname() here because we are going to free() + tm->TM_ZONE = NULL; /* XXX */ +#endif +// tzfree(tz); + return 1; +} + +char* strptime(const char *buf, const char *fmt, struct tm *tm) +{ + unsigned char c; + const unsigned char *bp, *ep, *zname; + int alt_format, i, split_year = 0, neg = 0, state = 0, + day_offset = -1, week_offset = 0, offs, mandatory; + const char *new_fmt; + + bp = (const unsigned char *)buf; + + while (bp != NULL && (c = *fmt++) != '\0') { + /* Clear `alternate' modifier prior to new conversion. */ + alt_format = 0; + i = 0; + + /* Eat up white-space. */ + if (isspace(c)) { + while (isspace(*bp)) + bp++; + continue; + } + + if (c != '%') + goto literal; + + +again: switch (c = *fmt++) { + case '%': /* "%%" is converted to "%". */ +literal: + if (c != *bp++) + return NULL; + LEGAL_ALT(0); + continue; + + /* + * "Alternative" modifiers. Just set the appropriate flag + * and start over again. + */ + case 'E': /* "%E?" alternative conversion modifier. */ + LEGAL_ALT(0); + alt_format |= ALT_E; + goto again; + + case 'O': /* "%O?" alternative conversion modifier. */ + LEGAL_ALT(0); + alt_format |= ALT_O; + goto again; + + /* + * "Complex" conversion rules, implemented through recursion. + */ + case 'c': /* Date and time, using the locale's format. */ +// new_fmt = _TIME_LOCALE(loc)->d_t_fmt; + new_fmt = HERE_D_T_FMT; + state |= S_WDAY | S_MON | S_MDAY | S_YEAR; + goto recurse; + + case 'F': /* The date as "%Y-%m-%d". */ + new_fmt = "%Y-%m-%d"; + LEGAL_ALT(0); + state |= S_MON | S_MDAY | S_YEAR; + goto recurse; + + case 'R': /* The time as "%H:%M". */ + new_fmt = "%H:%M"; + LEGAL_ALT(0); + goto recurse; + + case 'r': /* The time in 12-hour clock representation. */ +// new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm; + new_fmt = HERE_T_FMT_AMPM; + LEGAL_ALT(0); + goto recurse; + + case 'X': /* The time, using the locale's format. */ + /* fall through */ + + case 'T': /* The time as "%H:%M:%S". */ + new_fmt = HERE_T_FMT; + LEGAL_ALT(0); + +recurse: + bp = (const unsigned char *)strptime((const char *)bp, + new_fmt, tm); + LEGAL_ALT(ALT_E); + continue; + + case 'x': /* The date, using the locale's format. */ + /* fall throug */ + + case 'D': /* The date as "%y/%m/%d". */ + { + new_fmt = HERE_D_FMT; + LEGAL_ALT(0); + state |= S_MON | S_MDAY | S_YEAR; + const int year = split_year ? tm->tm_year : 0; + + bp = (const unsigned char *)strptime((const char *)bp, + new_fmt, tm); + LEGAL_ALT(ALT_E); + tm->tm_year += year; + if (split_year && tm->tm_year % (2000 - TM_YEAR_BASE) <= 68) + tm->tm_year -= 2000 - TM_YEAR_BASE; + split_year = 1; + continue; + } + /* + * "Elementary" conversion rules. + */ + case 'A': /* The day of week, using the locale's form. */ + case 'a': + bp = find_string(bp, &tm->tm_wday, weekday_name, ab_weekday_name, 7); + LEGAL_ALT(0); + state |= S_WDAY; + continue; + + case 'B': /* The month, using the locale's form. */ + case 'b': + case 'h': + bp = find_string(bp, &tm->tm_mon, month_name, ab_month_name, 12); + LEGAL_ALT(0); + state |= S_MON; + continue; + + case 'C': /* The century number. */ + i = 20; + bp = conv_num(bp, &i, 0, 99); + + i = i * 100 - TM_YEAR_BASE; + if (split_year) + i += tm->tm_year % 100; + split_year = 1; + tm->tm_year = i; + LEGAL_ALT(ALT_E); + state |= S_YEAR; + continue; + + case 'd': /* The day of month. */ + case 'e': + bp = conv_num(bp, &tm->tm_mday, 1, 31); + LEGAL_ALT(ALT_O); + state |= S_MDAY; + continue; + + case 'k': /* The hour (24-hour clock representation). */ + LEGAL_ALT(0); + /* FALLTHROUGH */ + case 'H': + bp = conv_num(bp, &tm->tm_hour, 0, 23); + LEGAL_ALT(ALT_O); + state |= S_HOUR; + continue; + + case 'l': /* The hour (12-hour clock representation). */ + LEGAL_ALT(0); + /* FALLTHROUGH */ + case 'I': + bp = conv_num(bp, &tm->tm_hour, 1, 12); + if (tm->tm_hour == 12) + tm->tm_hour = 0; + LEGAL_ALT(ALT_O); + state |= S_HOUR; + continue; + + case 'j': /* The day of year. */ + i = 1; + bp = conv_num(bp, &i, 1, 366); + tm->tm_yday = i - 1; + LEGAL_ALT(0); + state |= S_YDAY; + continue; + + case 'M': /* The minute. */ + bp = conv_num(bp, &tm->tm_min, 0, 59); + LEGAL_ALT(ALT_O); + continue; + + case 'm': /* The month. */ + i = 1; + bp = conv_num(bp, &i, 1, 12); + tm->tm_mon = i - 1; + LEGAL_ALT(ALT_O); + state |= S_MON; + continue; + + case 'p': /* The locale's equivalent of AM/PM. */ + bp = find_string(bp, &i, am_pm, NULL, 2); + if (HAVE_HOUR(state) && tm->tm_hour > 11) + return NULL; + tm->tm_hour += i * 12; + LEGAL_ALT(0); + continue; + + case 'S': /* The seconds. */ + bp = conv_num(bp, &tm->tm_sec, 0, 61); + LEGAL_ALT(ALT_O); + continue; + +#ifndef TIME_MAX +#define TIME_MAX INT64_MAX +#endif + case 's': /* seconds since the epoch */ + { + time_t sse = 0; + uint64_t rulim = TIME_MAX; + + if (*bp < '0' || *bp > '9') { + bp = NULL; + continue; + } + + do { + sse *= 10; + sse += *bp++ - '0'; + rulim /= 10; + } while ((sse * 10 <= TIME_MAX) && + rulim && *bp >= '0' && *bp <= '9'); + + if (sse < 0 || (uint64_t)sse > TIME_MAX) { + bp = NULL; + continue; + } +#ifdef _WIN32 + if (localtime_s(tm, &sse) == 0) +#else + if (localtime_r(&sse, tm)) +#endif + state |= S_YDAY | S_WDAY | S_MON | S_MDAY | S_YEAR; + else + bp = NULL; + } + continue; + + case 'U': /* The week of year, beginning on sunday. */ + case 'W': /* The week of year, beginning on monday. */ + /* + * This is bogus, as we can not assume any valid + * information present in the tm structure at this + * point to calculate a real value, so save the + * week for now in case it can be used later. + */ + bp = conv_num(bp, &i, 0, 53); + LEGAL_ALT(ALT_O); + if (c == 'U') + day_offset = TM_SUNDAY; + else + day_offset = TM_MONDAY; + week_offset = i; + continue; + + case 'w': /* The day of week, beginning on sunday. */ + bp = conv_num(bp, &tm->tm_wday, 0, 6); + LEGAL_ALT(ALT_O); + state |= S_WDAY; + continue; + + case 'u': /* The day of week, monday = 1. */ + bp = conv_num(bp, &i, 1, 7); + tm->tm_wday = i % 7; + LEGAL_ALT(ALT_O); + state |= S_WDAY; + continue; + + case 'g': /* The year corresponding to the ISO week + * number but without the century. + */ + bp = conv_num(bp, &i, 0, 99); + continue; + + case 'G': /* The year corresponding to the ISO week + * number with century. + */ + do + bp++; + while (isdigit(*bp)); + continue; + + case 'V': /* The ISO 8601:1988 week number as decimal */ + bp = conv_num(bp, &i, 0, 53); + continue; + + case 'Y': /* The year. */ + i = TM_YEAR_BASE; /* just for data sanity... */ + bp = conv_num(bp, &i, 0, 9999); + tm->tm_year = i - TM_YEAR_BASE; + LEGAL_ALT(ALT_E); + state |= S_YEAR; + continue; + + case 'y': /* The year within 100 years of the epoch. */ + /* LEGAL_ALT(ALT_E | ALT_O); */ + bp = conv_num(bp, &i, 0, 99); + + if (split_year) + /* preserve century */ + i += (tm->tm_year / 100) * 100; + else { + split_year = 1; + if (i <= 68) + i = i + 2000 - TM_YEAR_BASE; + } + tm->tm_year = i; + state |= S_YEAR; + continue; + + case 'Z': // time zone name + case 'z': // +#ifdef _WIN32 + _tzset(); +#else + tzset(); +#endif + mandatory = c == 'z'; + /* + * We recognize all ISO 8601 formats: + * Z = Zulu time/UTC + * [+-]hhmm + * [+-]hh:mm + * [+-]hh + * We recognize all RFC-822/RFC-2822 formats: + * UT|GMT + * North American : UTC offsets + * E[DS]T = Eastern : -4 | -5 + * C[DS]T = Central : -5 | -6 + * M[DS]T = Mountain: -6 | -7 + * P[DS]T = Pacific : -7 | -8 + * Nautical/Military + * [A-IL-M] = -1 ... -9 (J not used) + * [N-Y] = +1 ... +12 + * Note: J maybe used to denote non-nautical + * local time + */ + if (mandatory) + while (isspace(*bp)) + bp++; + + zname = bp; + switch (*bp++) { + case 'G': + if (*bp++ != 'M') + goto namedzone; + /*FALLTHROUGH*/ + case 'U': + if (*bp++ != 'T') + goto namedzone; + else if (!delim(*bp) && *bp++ != 'C') + goto namedzone; + /*FALLTHROUGH*/ + case 'Z': + if (!delim(*bp)) + goto namedzone; + tm->tm_isdst = 0; +#ifdef TM_GMTOFF + tm->TM_GMTOFF = 0; +#endif +#ifdef TM_ZONE + tm->TM_ZONE = utc; +#endif + continue; + case '+': + neg = 0; + break; + case '-': + neg = 1; + break; + default: +namedzone: + bp = zname; + + /* Nautical / Military style */ + if (delim(bp[1]) && + ((*bp >= 'A' && *bp <= 'I') || + (*bp >= 'L' && *bp <= 'Y'))) { +#ifdef TM_GMTOFF + /* Argh! No 'J'! */ + if (*bp >= 'A' && *bp <= 'I') + tm->TM_GMTOFF = + (int)*bp - ('A' - 1); + else if (*bp >= 'L' && *bp <= 'M') + tm->TM_GMTOFF = (int)*bp - 'A'; + else if (*bp >= 'N' && *bp <= 'Y') + tm->TM_GMTOFF = 'M' - (int)*bp; + tm->TM_GMTOFF *= SECSPERHOUR; +#endif +#ifdef TM_ZONE + tm->TM_ZONE = NULL; /* XXX */ +#endif + bp++; + continue; + } + /* 'J' is local time */ + if (delim(bp[1]) && *bp == 'J') { +#ifdef TM_GMTOFF + tm->TM_GMTOFF = -timezone; +#endif +#ifdef TM_ZONE + tm->TM_ZONE = NULL; /* XXX */ +#endif + bp++; + continue; + } + + /* + * From our 3 letter hard-coded table + * XXX: Can be removed, handled by tzload() + */ + if (delim(bp[0]) || delim(bp[1]) || + delim(bp[2]) || !delim(bp[3])) + goto loadzone; + ep = find_string(bp, &i, nast, NULL, 4); + if (ep != NULL) { +#ifdef TM_GMTOFF + tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR; +#endif +#ifdef TM_ZONE + tm->TM_ZONE = __UNCONST(nast[i]); +#endif + bp = ep; + continue; + } + ep = find_string(bp, &i, nadt, NULL, 4); + if (ep != NULL) { + tm->tm_isdst = 1; +#ifdef TM_GMTOFF + tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR; +#endif +#ifdef TM_ZONE + tm->TM_ZONE = __UNCONST(nadt[i]); +#endif + bp = ep; + continue; + } + /* + * Our current timezone + */ + ep = find_string(bp, &i, + (const char * const *)tzname, + NULL, 2); + if (ep != NULL) { + tm->tm_isdst = i; +#ifdef TM_GMTOFF + tm->TM_GMTOFF = -timezone; +#endif +#ifdef TM_ZONE + tm->TM_ZONE = tzname[i]; +#endif + bp = ep; + continue; + } +loadzone: + /* + * The hard way, load the zone! + */ + if (fromzone(&bp, tm, mandatory)) + continue; + goto out; + } + offs = 0; + for (i = 0; i < 4; ) { + if (isdigit(*bp)) { + offs = offs * 10 + (*bp++ - '0'); + i++; + continue; + } + if (i == 2 && *bp == ':') { + bp++; + continue; + } + break; + } + if (isdigit(*bp)) + goto out; + switch (i) { + case 2: + offs *= SECSPERHOUR; + break; + case 4: + i = offs % 100; + offs /= 100; + if (i >= SECSPERMIN) + goto out; + /* Convert minutes into decimal */ + offs = offs * SECSPERHOUR + i * SECSPERMIN; + break; + default: +out: + if (mandatory) + return NULL; + bp = zname; + continue; + } + /* ISO 8601 & RFC 3339 limit to 23:59 max */ + if (offs >= (HOURSPERDAY * SECSPERHOUR)) + goto out; + if (neg) + offs = -offs; + tm->tm_isdst = 0; /* XXX */ +#ifdef TM_GMTOFF + tm->TM_GMTOFF = offs; +#endif +#ifdef TM_ZONE + tm->TM_ZONE = NULL; /* XXX */ +#endif + continue; + + /* + * Miscellaneous conversions. + */ + case 'n': /* Any kind of white-space. */ + case 't': + while (isspace(*bp)) + bp++; + LEGAL_ALT(0); + continue; + + + default: /* Unknown/unsupported conversion. */ + return NULL; + } + } + + if (!HAVE_YDAY(state) && HAVE_YEAR(state)) { + if (HAVE_MON(state) && HAVE_MDAY(state)) { + /* calculate day of year (ordinal date) */ + tm->tm_yday = start_of_month[isleap_sum(tm->tm_year, + TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1); + state |= S_YDAY; + } else if (day_offset != -1) { + /* + * Set the date to the first Sunday (or Monday) + * of the specified week of the year. + */ + if (!HAVE_WDAY(state)) { + tm->tm_wday = day_offset; + state |= S_WDAY; + } + tm->tm_yday = (7 - + first_wday_of(tm->tm_year + TM_YEAR_BASE) + + day_offset) % 7 + (week_offset - 1) * 7 + + tm->tm_wday - day_offset; + state |= S_YDAY; + } + } + + if (HAVE_YDAY(state) && HAVE_YEAR(state)) { + int isleap; + + if (!HAVE_MON(state)) { + /* calculate month of day of year */ + i = 0; + isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE); + while (tm->tm_yday >= start_of_month[isleap][i]) + i++; + if (i > 12) { + i = 1; + tm->tm_yday -= start_of_month[isleap][12]; + tm->tm_year++; + } + tm->tm_mon = i - 1; + state |= S_MON; + } + + if (!HAVE_MDAY(state)) { + /* calculate day of month */ + isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE); + tm->tm_mday = tm->tm_yday - + start_of_month[isleap][tm->tm_mon] + 1; + state |= S_MDAY; + } + + if (!HAVE_WDAY(state)) { + /* calculate day of week */ + i = 0; + week_offset = first_wday_of(tm->tm_year); + while (i++ <= tm->tm_yday) { + if (week_offset++ >= 6) + week_offset = 0; + } + tm->tm_wday = week_offset; + state |= S_WDAY; + } + } + + return (char*)bp; +} + + +static const unsigned char * +conv_num(const unsigned char *buf, int *dest, unsigned int llim, unsigned int ulim) +{ + unsigned int result = 0; + unsigned char ch; + + /* The limit also determines the number of valid digits. */ + unsigned int rulim = ulim; + + ch = *buf; + if (ch < '0' || ch > '9') + return NULL; + + do { + result *= 10; + result += ch - '0'; + rulim /= 10; + ch = *++buf; + } while ((result <= ulim) && rulim && ch >= '0' && ch <= '9'); + + if (result < llim || result > ulim) + return NULL; + + *dest = result; + return buf; +} + +static const unsigned char * +find_string(const unsigned char *bp, int *tgt, const char * const *n1, + const char * const *n2, int c) +{ + int i; + size_t len; + + /* check full name - then abbreviated ones */ + for (; n1 != NULL; n1 = n2, n2 = NULL) { + for (i = 0; i < c; i++, n1++) { + len = strlen(*n1); + if (strncasecmp(*n1, (const char *)bp, len) == 0) { + *tgt = i; + return bp + len; + } + } + } + + /* Nothing matched */ + return NULL; +} diff --git a/strptime/strptime.h b/strptime/strptime.h new file mode 100644 index 0000000..891e6f4 --- /dev/null +++ b/strptime/strptime.h @@ -0,0 +1,10 @@ +#ifndef STRPTIME_H +#define STRPTIME_H + +#include + +#ifdef _WIN32 +char* strptime(const char *buf, const char *fmt, struct tm *tm); +#endif + +#endif // STRPTIME_H diff --git a/time_util.cpp b/time_util.cpp index 402e09d..c51bcf7 100644 --- a/time_util.cpp +++ b/time_util.cpp @@ -1,9 +1,22 @@ #include "time_util.h" +#include "utils_config.h" #if _WIN32 #include #endif +#include + +#ifndef HAVE_STRPTIME +#include "strptime/strptime.h" +#endif + +#if HAVE_PRINTF_S +#define printf printf_s +#endif + +#include "err.h" + #if _WIN32 void time_util::time_t_to_file_time(time_t t, LPFILETIME pft) { ULARGE_INTEGER time_value; @@ -12,3 +25,41 @@ void time_util::time_t_to_file_time(time_t t, LPFILETIME pft) { pft->dwHighDateTime = time_value.HighPart; } #endif + +char* time_util::strptime(const char* s, const char* format, struct tm* tm) { + return ::strptime(s, format, tm); +} + +long time_util::get_timezone() { +#if HAVE__GET_TIMEZONE + long t = 0; + int err = _get_timezone(&t); + if (!err) { + std::string msg ; + if (!err::get_errno_message(msg, err)) { + msg = "Unknown error"; + } + printf("get_timezone failed: %s\n", msg.c_str()); + return 0; + } else { + return t; + } +#elif HAVE_TIMEZONE && HAVE_TZSET + tzset(); + return timezone; +#else + printf("get_timezone failed: not implemented\n"); + return 0; +#endif +} + +time_t time_util::timegm(struct tm* tm) { +#if HAVE__MKGMTIME + return _mkgmtime(tm); +#elif HAVE_TIMEGM + return ::timegm(tm); +#else + time_t now = ::mktime(tm); + return now + get_timezone(); +#endif +} diff --git a/time_util.h b/time_util.h index 1879ab5..7d7b38b 100644 --- a/time_util.h +++ b/time_util.h @@ -14,5 +14,8 @@ namespace time_util { */ void time_t_to_file_time(time_t t, LPFILETIME pft); #endif + char* strptime(const char* s, const char* format, struct tm* tm); + long get_timezone(); + time_t timegm(struct tm* tm); } #endif diff --git a/utils_config.h.in b/utils_config.h.in index 2b90c52..fe92201 100644 --- a/utils_config.h.in +++ b/utils_config.h.in @@ -19,3 +19,10 @@ #cmakedefine HAVE_FCLOSEALL @HAVE_FCLOSEALL@ #cmakedefine HAVE_OPENSSL @HAVE_OPENSSL@ #cmakedefine HAVE_ZLIB @HAVE_ZLIB@ +#cmakedefine HAVE_STRPTIME @HAVE_STRPTIME@ +#cmakedefine HAVE__MKGMTIME @HAVE__MKGMTIME@ +#cmakedefine HAVE_TIMEGM @HAVE_TIMEGM@ +#cmakedefine HAVE__GET_TIMEZONE @HAVE__GET_TIMEZONE@ +#cmakedefine HAVE_TIMEZONE @HAVE_TIMEZONE@ +#cmakedefine HAVE_TZSET @HAVE_TZSET@ +#cmakedefine HAVE_FPRINTF_S @HAVE_FPRINTF_S@