libmp/src/url.c

239 lines
4.8 KiB
C
Raw Normal View History

2024-06-20 22:36:56 +00:00
#include "../include/url.h"
2024-06-20 00:34:32 +00:00
#include "../include/error.h"
#include "../include/util.h"
#include <assert.h>
2024-06-22 04:03:17 +00:00
#include <stdio.h>
2024-06-20 00:34:32 +00:00
#include <stdlib.h>
2024-06-20 22:36:56 +00:00
#include <string.h>
2024-06-20 00:34:32 +00:00
2024-06-20 22:36:56 +00:00
char valid_path[] = "-._~:/?#[]@!$&'()*+,;%=";
2024-06-20 00:34:32 +00:00
typedef enum lm_state {
URL_PROTOCOL_0 = 0,
2024-06-20 22:36:56 +00:00
URL_SPLIT_1 = 1,
URL_HOST_2 = 2,
URL_PATH_3 = 3,
2024-06-20 00:34:32 +00:00
} lm_state_t;
2024-06-22 04:03:17 +00:00
uint16_t lm_url_default_port(char *protocol) {
2024-06-20 22:36:56 +00:00
if (eq(protocol, "ftp"))
2024-06-20 00:34:32 +00:00
return 21;
2024-06-20 22:36:56 +00:00
else if (eq(protocol, "ftps"))
2024-06-20 00:34:32 +00:00
return 990;
2024-06-20 22:36:56 +00:00
else if (eq(protocol, "http"))
2024-06-20 00:34:32 +00:00
return 80;
2024-06-20 22:36:56 +00:00
else if (eq(protocol, "https"))
2024-06-20 00:34:32 +00:00
return 443;
2024-06-20 22:36:56 +00:00
else if (eq(protocol, "mptp"))
2024-06-20 00:34:32 +00:00
return 5858;
return 0;
}
2024-06-22 04:03:17 +00:00
bool lm_url_init(lm_url_t *url, char *str) {
2024-06-20 00:34:32 +00:00
// clear out every variable
bzero(url->protocol, sizeof(url->protocol));
2024-06-22 04:03:17 +00:00
url->empty = true;
2024-06-20 00:34:32 +00:00
url->host = NULL;
url->path = NULL;
url->port = 0;
2024-06-22 04:03:17 +00:00
if(NULL == str)
return true;
// str is not NULL
url->empty = false;
2024-06-20 00:34:32 +00:00
// stores the string size
size_t strl = 0, index = 0, pos = 0;
2024-06-20 22:36:56 +00:00
2024-06-20 00:34:32 +00:00
// make sure the URL string size is not too large
// extra 4 for "://" and ":"
2024-06-20 22:36:56 +00:00
if ((strl = strlen(str)) > URL_PROTOCOL_MAX + URL_PATH_MAX + URL_HOST_MAX + 4) {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLTooLarge);
return false;
}
lm_state_t state = URL_PROTOCOL_0;
2024-06-22 04:03:17 +00:00
char buffer[strl + 1]; // temporary buffer, strok_r save pointer
2024-06-20 22:36:56 +00:00
bool ret = false; // return value
2024-06-20 00:34:32 +00:00
// clear out the temporary buffer
2024-06-20 22:36:56 +00:00
bzero(buffer, strl + 1);
2024-06-20 00:34:32 +00:00
2024-06-20 22:36:56 +00:00
while ((buffer[index] = *(str + pos)) != 0) {
2024-06-20 00:34:32 +00:00
switch (state) {
case URL_PROTOCOL_0:
2024-06-20 22:36:56 +00:00
if (index > URL_PROTOCOL_MAX) {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadProtocol);
goto end;
}
2024-06-20 22:36:56 +00:00
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && buffer[index] != ':') {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadChar);
goto end;
}
2024-06-20 22:36:56 +00:00
if (buffer[index] != ':')
2024-06-20 00:34:32 +00:00
break;
2024-06-20 22:36:56 +00:00
if (0 == index) {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadProtocol);
goto end;
}
buffer[index] = 0;
2024-06-20 22:36:56 +00:00
memcpy(url->protocol, buffer, index + 1);
2024-06-20 00:34:32 +00:00
goto next;
case URL_SPLIT_1:
2024-06-20 22:36:56 +00:00
if (index > 1) {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadProtocol);
goto end;
}
2024-06-20 22:36:56 +00:00
if (buffer[index] != '/') {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadChar);
goto end;
}
2024-06-20 22:36:56 +00:00
if (index != 1)
2024-06-20 00:34:32 +00:00
break;
2024-06-20 22:36:56 +00:00
if (buffer[index - 1] != '/' || buffer[index] != '/') {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadProtocol);
goto end;
}
goto next;
case URL_HOST_2:
2024-06-20 22:36:56 +00:00
if (index > URL_HOST_MAX) {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLHostLarge);
goto end;
}
2024-06-20 22:36:56 +00:00
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && buffer[index] != '.' && buffer[index] != ':' &&
buffer[index] != '/') {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadChar);
goto end;
}
2024-06-20 22:36:56 +00:00
if (buffer[index] != '/')
2024-06-20 00:34:32 +00:00
break;
2024-06-20 22:36:56 +00:00
if (index == 0) {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadHost);
goto end;
}
buffer[index] = 0;
2024-06-20 22:36:56 +00:00
url->host = malloc((index + 1) * sizeof(char));
memcpy(url->host, buffer, index + 1);
2024-06-20 00:34:32 +00:00
goto next;
case URL_PATH_3:
2024-06-20 22:36:56 +00:00
if (index > URL_PATH_MAX) {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLPathLarge);
goto end;
}
2024-06-20 22:36:56 +00:00
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && !contains(valid_path, buffer[index])) {
2024-06-20 00:34:32 +00:00
lm_error_set(LM_ERR_URLBadChar);
goto end;
}
break;
default:
assert(false);
}
index++;
pos++;
continue;
next:
2024-06-20 22:36:56 +00:00
bzero(buffer, strl + 1);
2024-06-20 00:34:32 +00:00
state++;
index = 0;
pos++;
}
switch (state) {
2024-06-20 22:36:56 +00:00
case URL_HOST_2:
if (index == 0) {
lm_error_set(LM_ERR_URLBadHost);
goto end;
}
2024-06-20 00:34:32 +00:00
2024-06-20 22:36:56 +00:00
if (index > URL_HOST_MAX) {
lm_error_set(LM_ERR_URLHostLarge);
goto end;
}
2024-06-20 00:34:32 +00:00
2024-06-20 22:36:56 +00:00
url->host = malloc((index + 1) * sizeof(char));
memcpy(url->host, buffer, index + 1);
2024-06-20 00:34:32 +00:00
2024-06-20 22:36:56 +00:00
url->path = malloc(2 * sizeof(char));
url->path[0] = '/';
url->path[1] = 0;
break;
2024-06-20 00:34:32 +00:00
2024-06-20 22:36:56 +00:00
case URL_PATH_3:
url->path = malloc((index + 2) * sizeof(char));
url->path[0] = '/';
memcpy(url->path + 1, buffer, index + 1);
break;
default:
lm_error_set(LM_ERR_URLEnd);
goto end;
}
2024-06-22 04:03:17 +00:00
if(parse_host(url->host, url->host, &url->port)){
if(url->port != 0){
ret = true;
goto end;
}
url->port = lm_url_default_port(url->protocol);
if(url->port == 0){
2024-06-20 22:36:56 +00:00
lm_error_set(LM_ERR_URLPortUnknown);
2024-06-20 00:34:32 +00:00
goto end;
2024-06-20 22:36:56 +00:00
}
2024-06-22 04:03:17 +00:00
2024-06-20 22:36:56 +00:00
ret = true;
goto end;
}
2024-06-22 04:03:17 +00:00
switch (lm_error()) {
case LM_ERR_BadHost:
lm_error_set(LM_ERR_URLBadHost);
break;
case LM_ERR_BadPort:
2024-06-20 22:36:56 +00:00
lm_error_set(LM_ERR_URLBadPort);
2024-06-22 04:03:17 +00:00
break;
2024-06-20 00:34:32 +00:00
2024-06-22 04:03:17 +00:00
default:
break;
}
2024-06-20 00:34:32 +00:00
end:
2024-06-20 22:36:56 +00:00
if (!ret && NULL != url->host)
2024-06-20 00:34:32 +00:00
free(url->host);
2024-06-20 22:36:56 +00:00
if (!ret && NULL != url->path)
2024-06-20 00:34:32 +00:00
free(url->path);
return ret;
}
2024-06-20 22:36:56 +00:00
void lm_url_free(lm_url_t *url) {
if (NULL != url->host)
2024-06-20 00:34:32 +00:00
free(url->host);
2024-06-20 22:36:56 +00:00
if (NULL != url->path)
2024-06-20 00:34:32 +00:00
free(url->path);
}