diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ab2008..8438ad5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,12 +23,16 @@ if (WIN32) check_symbol_exists(_wcserror_s "string.h" HAVE__WCSERROR_S) check_symbol_exists(printf_s "stdio.h" HAVE_PRINTF_S) check_symbol_exists(sscanf_s "stdio.h" HAVE_SSCANF_S) + check_symbol_exists(_stricmp "string.h" HAVE__STRICMP) + check_symbol_exists(_strnicmp "string.h" HAVE__STRNICMP) else() check_symbol_exists(fseeko "stdio.h" HAVE_FSEEKO) check_symbol_exists(fseeko64 "stdio.h" HAVE_FSEEKO64) check_symbol_exists(ftello "stdio.h" HAVE_FTELLO) check_symbol_exists(ftello64 "stdio.h" HAVE_FTELLO64) endif() +check_symbol_exists(strcasecmp "string.h" HAVE_STRCASECMP) +check_symbol_exists(strncasecmp "string.h" HAVE_STRNCASECMP) check_symbol_exists(strerror_r "string.h" HAVE_STRERROR_R) if (HAVE_STRERROR_R) test_strerror_r(HAVE_GNU_STRERROR_R) @@ -53,6 +57,7 @@ set(SOURCE_FILE str_util.cpp c_linked_list.cpp file_reader.c + urlparse.cpp ) set(SOURCE_FILE_HEADERS cfileop.h @@ -71,6 +76,7 @@ set(SOURCE_FILE_HEADERS linked_list.h c_linked_list.h file_reader.h + urlparse.h ) add_library(utils STATIC ${SOURCE_FILE} ${SOURCE_FILE_HEADERS}) diff --git a/cstr_util.c b/cstr_util.c index 4fbe105..c007b49 100644 --- a/cstr_util.c +++ b/cstr_util.c @@ -11,6 +11,10 @@ #define printf printf_s #endif +#ifndef min +#define min(x, y) (((x) < (y)) ? (x) : (y)) +#endif + typedef enum float_format { undetected_endian, ieee_big_endian, @@ -277,3 +281,43 @@ double cstr_read_double(const uint8_t* p, int big) { return x; } } + +int cstr_stricmp(const char* str1, const char* str2) { +#if HAVE__STRICMP + return _stricmp(str1, str2); +#elif HAVE_STRCASECMP + return strcasecmp(str1, str2); +#else + size_t le = strlen(str1), le2 = strlen(str2); + size_t mle = min(le, le2); + for (size_t i = 0; i < mle; i++) { + int a = tolower(str1[i]), b = tolower(str2[i]); + if (a < b) return -1; + else if (a > b) return 1; + } + if (le > le2) return 1; + else if (le < le2) return -1; + return 0; +#endif +} + +int cstr_strnicmp(const char* str1, const char* str2, size_t count) { +#if HAVE__STRNICMP + return _strnicmp(str1, str2, count); +#elif HAVE_STRNCASECMP + return strncasecmp(str1, str2, count); +#else + size_t le = strlen(str1), le2 = strlen(str2); + size_t mle = min(le, le2); + for (size_t i = 0; i < mle; i++) { + if (i == count) return 0; + int a = tolower(str1[i]), b = tolower(str2[i]); + if (a < b) return -1; + else if (a > b) return 1; + } + if (mle == count) return 0; + if (le > le2) return 1; + else if (le < le2) return -1; + return 0; +#endif +} diff --git a/cstr_util.h b/cstr_util.h index e988571..4dfc52f 100644 --- a/cstr_util.h +++ b/cstr_util.h @@ -98,6 +98,23 @@ float cstr_read_float(const uint8_t* bytes, int big); * @return result */ double cstr_read_double(const uint8_t* bytes, int big); +/** + * @brief Performs a case-insensitive comparison of strings. + * @param str1 Null-terminated strings to compare. + * @param str2 Null-terminated strings to compare. + * @return < 0 str1 less than str2, = 0 str1 identical to str2, > 0 str1 greater than str2 +*/ +int cstr_stricmp(const char* str1, const char* str2); +#define cstr_strcasecmp cstr_stricmp +/** + * @brief Compares the specified number of characters of two strings without regard to case. + * @param str1 Null-terminated strings to compare. + * @param str2 Null-terminated strings to compare. + * @param count Number of characters to compare. + * @return < 0 str1 less than str2, = 0 str1 identical to str2, > 0 str1 greater than str2 +*/ +int cstr_strnicmp(const char* str1, const char* str2, size_t count); +#define cstr_strncasecmp cstr_strnicmp #ifdef __cplusplus } #endif diff --git a/dict.h b/dict.h index efb7353..e33863f 100644 --- a/dict.h +++ b/dict.h @@ -24,18 +24,35 @@ template bool dict_get_internal(struct dict_entry o, K key) { return o.key == key; } +template +bool dict_get_internal(struct dict_entry o, D key, bool(*compare_func)(K, D)) { + return compare_func(o.key, key); +} template struct dict_entry* dict_get(struct Dict* d, K key) { struct Dict* re = (struct Dict*)linked_list_get((struct LinkedList>*)d, key, &dict_get_internal); if (!re) return nullptr; return &re->d; } +template +struct dict_entry* dict_get(struct Dict* d, D key, bool(*compare_func)(K, D)) { + if (!compare_func) return nullptr; + struct Dict* re = (struct Dict*)linked_list_get((struct LinkedList>*)d, key, &dict_get_internal, compare_func); + if (!re) return nullptr; + return &re->d; +} template V dict_get_value(struct Dict* d, K key) { struct dict_entry* re = dict_get(d, key); if (!re) return nullptr; return re->value; } +template +V dict_get_value(struct Dict* d, D key, bool(*compare_func)(K, D)) { + struct dict_entry* re = dict_get(d, key, compare_func); + if (!re) return nullptr; + return re->value; +} template bool dict_heve_key_internal(struct dict_entry origin, K key) { return key == origin.key; @@ -66,6 +83,19 @@ void dict_iter(struct Dict* d, void(*callback)(size_t index, K key, V valu callback(i, t->d.key, t->d.value, args...); } } +template +R dict_iter(struct Dict* d, R(*callback)(K key, V value, Args... args), R failed, Args... args) { + if (!d || !callback) return failed; + struct Dict* t = d; + R re = callback(t->d.key, t->d.value, args...); + if (re == failed) return failed; + while (t->next) { + t = t->next; + re = callback(t->d.key, t->d.value, args...); + if (re == failed) return failed; + } + return re; +} template bool dict_set(struct Dict*& d, K key, V value, void(*free_func)(V) = nullptr) { if (!d) { diff --git a/linked_list.h b/linked_list.h index ba09f37..25604db 100644 --- a/linked_list.h +++ b/linked_list.h @@ -168,17 +168,17 @@ void linked_list_free_tail(struct LinkedList*& list, void(*free_func)(T) = nu } } -template -struct LinkedList* linked_list_get(struct LinkedList* list, D data, bool(*compare_func)(T, D)) { +template +struct LinkedList* linked_list_get(struct LinkedList* list, D data, bool(*compare_func)(T, D, Args...), Args ... args) { if (!list || !compare_func) return nullptr; struct LinkedList* t = list; while (t->prev) { t = t->prev; } - if (compare_func(t->d, data)) return t; + if (compare_func(t->d, data, args...)) return t; while (t->next) { t = t->next; - if (compare_func(t->d, data)) return t; + if (compare_func(t->d, data, args...)) return t; } return nullptr; } diff --git a/urlparse.cpp b/urlparse.cpp new file mode 100644 index 0000000..0fd3b16 --- /dev/null +++ b/urlparse.cpp @@ -0,0 +1,142 @@ +#include "urlparse.h" +#include "utils_config.h" + +#include +#include +#include +#include +#include +#include "cstr_util.h" +#include "str_util.h" + +#if HAVE_PRINTF_S +#define printf printf_s +#endif + +bool is_valid_scheme_chars(char c) { + if (c >= 'a' && c <= 'z') return true; + if (c >= 'A' && c <= 'Z') return true; + if (c >= '0' && c <= '9') return true; + if (c == '+' || c == '-' || c == '.') return true; + return false; +} + +bool scheme_is_use_params(std::string name) { + const std::array arr = { "", "ftp", "hdl", "prospero", "http", "imap", "https", "shttp", "rtsp", "rtspu", "sip", "sips", "mms", "sftp", "tel" }; + for (auto i = arr.begin(); i != arr.end(); i++) { + if (name == *i) return true; + } + return false; +} + +UrlParseResult* urlparse(const char* url, const char* sch, char allow_fragments) { + if (!url) return nullptr; + std::string u(url); + std::string scheme, netloc, params, query, fragment; + if (sch) scheme = sch; + u = str_util::str_replace(u, "\t", ""); + u = str_util::str_replace(u, "\r", ""); + u = str_util::str_replace(u, "\n", ""); + auto i = u.find(':'); + if (i > 0 && i != -1) { + bool ok = true; + for (size_t j = 0; j < i; j++) { + if (!is_valid_scheme_chars(u[j])) { + ok = false; + break; + } + } + if (ok) { + std::string tmp = u.substr(0, i); + if (!str_util::tolowercase(tmp, scheme)) return nullptr; + u = u.substr(i + 1); + } + } + if (u.find("//") == 0) { + auto delim = u.size(); + const char* s = "/?#"; + for (int i = 0; i < 3; i++) { + auto wdelim = u.find(s[i], 2); + if (wdelim != -1) { + delim = std::min(delim, wdelim); + } + } + netloc = u.substr(2, delim - 2); + u = u.substr(delim); + } + if (allow_fragments) { + auto i = u.find('#'); + if (i != -1) { + fragment = u.substr(i + 1); + u = u.substr(0, i); + } + } + i = u.find('?'); + if (i != -1) { + query = u.substr(i + 1); + u = u.substr(0, i); + } + i = u.find(';'); + if (i != -1 && scheme_is_use_params(scheme)) { + auto j = u.rfind('/'); + if (j == -1) { + params = u.substr(i + 1); + u = u.substr(0, i); + } else { + i = u.find(';', j); + if (i != -1) { + params = u.substr(i + 1); + u = u.substr(0, i); + } + } + } + auto r = (UrlParseResult*)malloc(sizeof(UrlParseResult)); + if (!r) { + return nullptr; + } + memset(r, 0, sizeof(UrlParseResult)); + if (cstr_util_copy_str(&r->scheme, scheme.c_str())) { + goto end; + } + if (cstr_util_copy_str(&r->netloc, netloc.c_str())) { + goto end; + } + if (cstr_util_copy_str(&r->path, u.c_str())) { + goto end; + } + if (cstr_util_copy_str(&r->params, params.c_str())) { + goto end; + } + if (cstr_util_copy_str(&r->query, query.c_str())) { + goto end; + } + if (cstr_util_copy_str(&r->fragment, fragment.c_str())) { + goto end; + } + return r; +end: + free_url_parse_result(r); + return nullptr; +} + +void free_url_parse_result(UrlParseResult* r) { + if (!r) return; + if (r->scheme) free(r->scheme); + if (r->netloc) free(r->netloc); + if (r->path) free(r->path); + if (r->params) free(r->params); + if (r->query) free(r->query); + if (r->fragment) free(r->fragment); + free(r); +} + +void dump_url_parse_result(UrlParseResult* r, int indent_now) { + if (!r) return; + std::string ind(indent_now, ' '); + if (r->scheme) printf("%sScheme: %s\n", ind.c_str(), r->scheme); + if (r->netloc) printf("%sNetloc: %s\n", ind.c_str(), r->netloc); + if (r->path) printf("%sPath: %s\n", ind.c_str(), r->path); + if (r->params) printf("%sParams: %s\n", ind.c_str(), r->params); + if (r->query) printf("%sQuery: %s\n", ind.c_str(), r->query); + if (r->fragment) printf("%sFragment: %s\n", ind.c_str(), r->fragment); +} diff --git a/urlparse.h b/urlparse.h new file mode 100644 index 0000000..aefc636 --- /dev/null +++ b/urlparse.h @@ -0,0 +1,27 @@ +#ifndef _UTIL_URLPARSE_H +#define _UTIL_URLPARSE_H +#ifdef __cplusplus +extern "C" { +#endif +typedef struct UrlParseResult { + char* scheme; + char* netloc; + char* path; + char* params; + char* query; + char* fragment; +} UrlParseResult; +/** + * @brief Parse a URL into 6 components: :///;?# + * @param url URL + * @param sch Provides the default value of the scheme component when no scheme is found in url. + * @param allow_fragments If is 0, no attempt is made to separate the fragment component from the previous component, which can be either path or query. + * @return +*/ +UrlParseResult* urlparse(const char* url, const char* sch, char allow_fragments); +void free_url_parse_result(UrlParseResult* r); +void dump_url_parse_result(UrlParseResult* r, int indent_now); +#ifdef __cplusplus +} +#endif +#endif diff --git a/utils_config.h.in b/utils_config.h.in index 27af2be..2492b2e 100644 --- a/utils_config.h.in +++ b/utils_config.h.in @@ -12,3 +12,7 @@ #cmakedefine HAVE_FSEEKO64 @HAVE_FSEEKO64@ #cmakedefine HAVE_FTELLO @HAVE_FTELLO@ #cmakedefine HAVE_FTELLO64 @HAVE_FTELLO64@ +#cmakedefine HAVE__STRICMP @HAVE__STRICMP@ +#cmakedefine HAVE_STRCASECMP @HAVE_STRCASECMP@ +#cmakedefine HAVE__STRNICMP @HAVE__STRNICMP@ +#cmakedefine HAVE_STRNCASECMP @HAVE_STRNCASECMP@