From 6cf2d5f198dc1c07cbb70ce7b0850e7e93d45614 Mon Sep 17 00:00:00 2001 From: Francesco Date: Tue, 26 Nov 2024 17:16:04 +0100 Subject: [PATCH] fix client disconnection --- .idea/codeStyles/Project.xml | 99 ++++++ .idea/editor.xml | 580 +++++++++++++++++++++++++++++++++++ .vscode/settings.json | 6 +- README.md | 11 +- meson.build | 6 +- notes/advanced-techniques.md | 17 +- notes/basic-concepts.md | 78 +++-- notes/hash-table.md | 16 +- notes/http-request.md | 5 + src/http/http.c | 10 +- src/main.c | 3 +- src/server/server.c | 62 ++-- www/index.html | 2 +- 13 files changed, 819 insertions(+), 76 deletions(-) create mode 100644 .idea/editor.xml diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml index f603881..5316267 100644 --- a/.idea/codeStyles/Project.xml +++ b/.idea/codeStyles/Project.xml @@ -1,5 +1,104 @@ + + diff --git a/.idea/editor.xml b/.idea/editor.xml new file mode 100644 index 0000000..59c76fa --- /dev/null +++ b/.idea/editor.xml @@ -0,0 +1,580 @@ + + + + + \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 92c3581..bafd106 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,6 @@ { - "clangd.path": "/usr/bin/clangd", - "clangd.arguments": [ "--header-insertion=never" ], + "clangd.path": "/usr/bin/clangd", + "clangd.arguments": [ + "--header-insertion=never" + ] } \ No newline at end of file diff --git a/README.md b/README.md index 54382c4..83f3c5c 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,37 @@ # cws + A simple Web Server written in C (learning purposes), it works only on Linux systems. ## Requirements + - [meson](https://mesonbuild.com/index.html) - [doxygen](https://www.doxygen.nl/) - - Optional, just to build the docs + - Optional, just to build the docs ## How to build + ```bash $ meson setup build $ cd build $ meson compile ``` + And then run `cws`! ## Docs + ```bash $ git submodule update --init # inside the cws directory $ doxygen ``` + And then open the `docs/html/index.html`. ## Roadmap + - [x] Understading basic web server concepts - [ ] Basic server +- [ ] CLI args - [ ] Enhance web server - [ ] IPv6 compatible - [ ] Request parser (methods and headers) @@ -35,6 +43,7 @@ And then open the `docs/html/index.html`. - [ ] Caching ## Resources + - [Beej's Guide to Network Programming](https://beej.us/guide/bgnet/) You can find my journey inside the `notes` directory! diff --git a/meson.build b/meson.build index 2194208..c3067b6 100644 --- a/meson.build +++ b/meson.build @@ -1,7 +1,7 @@ -project('cws', 'c', version: '1.0.0') +project('cws', 'c', version : '1.0.0') subdir('src') incdir = include_directories('include') -executable('server', server, include_directories: incdir) -executable('client', client, include_directories: incdir) \ No newline at end of file +executable('server', server, include_directories : incdir) +executable('client', client, include_directories : incdir) \ No newline at end of file diff --git a/notes/advanced-techniques.md b/notes/advanced-techniques.md index 82fe575..496d4cb 100644 --- a/notes/advanced-techniques.md +++ b/notes/advanced-techniques.md @@ -6,7 +6,10 @@ - [epoll() - I/O Event Notification (Async)](#epoll---io-event-notification-async) ### Blocking -All the Unix networking functions are **blocking**. What does it mean? It means that if you write `accept()` or `recv()` it will wait until some data appears. So how we can avoid this? Making the socket non blocking so we can poll the socket for info: + +All the Unix networking functions are **blocking**. What does it mean? It means that if you write `accept()` or `recv()` +it will wait until some data appears. So how we can avoid this? Making the socket non blocking so we can poll the socket +for info: ```c fcntl(sockfd, F_SETFL, O_NONBLOCK); @@ -16,7 +19,9 @@ fcntl(sockfd, F_SETFL, O_NONBLOCK); `O_NONBLOCK`: make the fd non blocking ### poll() - Synchronous I/O Multiplexing -The plan is to have a `struct pollfd` with the info about which sockets fd we want to monitor. The Operating System will block on the `poll()` call until one event occurs (e.g. "socket ready to write/read"). + +The plan is to have a `struct pollfd` with the info about which sockets fd we want to monitor. The Operating System will +block on the `poll()` call until one event occurs (e.g. "socket ready to write/read"). ```c #include @@ -37,13 +42,16 @@ struct pollfd { ``` `events` is a bitmap of the following values: + - `POLLIN` (alert when I can read data) - `POLLOUT` (alert when I can send data) **I won't continue this section, read below** ### epoll() - I/O Event Notification (Async) -It is similar to `poll()` but more efficient when dealing with lots of fds. The array is in the kernel space, no further copies. Nice explaination [here](https://copyconstruct.medium.com/the-method-to-epolls-madness-d9d2d6378642). + +It is similar to `poll()` but more efficient when dealing with lots of fds. The array is in the kernel space, no further +copies. Nice explaination [here](https://copyconstruct.medium.com/the-method-to-epolls-madness-d9d2d6378642). ```c #include @@ -51,7 +59,8 @@ It is similar to `poll()` but more efficient when dealing with lots of fds. The int epoll_create1(int flags); ``` -Just pass 0 for the `flags`, it is an improved version of the `epoll_create()`. It creates a new epoll instance and returns the fd of that instance. +Just pass 0 for the `flags`, it is an improved version of the `epoll_create()`. It creates a new epoll instance and +returns the fd of that instance. ```c #include diff --git a/notes/basic-concepts.md b/notes/basic-concepts.md index 34544bd..fecd042 100644 --- a/notes/basic-concepts.md +++ b/notes/basic-concepts.md @@ -1,4 +1,5 @@ # Basic Concepts + Before reading, this document could contain errors, please check everything you read. - [Basic Concepts](#basic-concepts) @@ -6,10 +7,10 @@ Before reading, this document could contain errors, please check everything you - [Internet sockets](#internet-sockets) - [Byte Order](#byte-order) - [Structs](#structs) - - [struct addrinfo](#struct-addrinfo) - - [struct sockaddr](#struct-sockaddr) - - [struct sockaddr\_in](#struct-sockaddr_in) - - [struct sockaddr\_storage](#struct-sockaddr_storage) + - [struct addrinfo](#struct-addrinfo) + - [struct sockaddr](#struct-sockaddr) + - [struct sockaddr\_in](#struct-sockaddr_in) + - [struct sockaddr\_storage](#struct-sockaddr_storage) - [IP Addresses](#ip-addresses) - [getaddrinfo()](#getaddrinfo) - [socket()](#socket-1) @@ -23,23 +24,29 @@ Before reading, this document could contain errors, please check everything you - [getpeername()](#getpeername) - [gethostname()](#gethostname) - ### Socket -When Unix programs do some I/O they do it reading/writing to a **file descriptor**. A file descriptor is an integer associated with an open file, it can be anything. To communicate over the internet using a file descriptor we'll make a call to the `socket()` system routine. + +When Unix programs do some I/O they do it reading/writing to a **file descriptor**. A file descriptor is an integer +associated with an open file, it can be anything. To communicate over the internet using a file descriptor we'll make a +call to the `socket()` system routine. ### Internet sockets + There are two types of Internet sockets: + 1. Stream Sockets (SOCK_STREAM) - error-free and a realiable two-way communication (TCP) 2. Datagram Sockets (SOCK_DGRAM) - connectionless (UDP) ### Byte Order + - Big-Endian (also called **Network Byte Order**) - Little-Endian -Before making any transmission we have to convert the byte order to a Network Byte Order, we can do this with simple functions: +Before making any transmission we have to convert the byte order to a Network Byte Order, we can do this with simple +functions: | Function | Description | -| -------- | --------------------- | +|----------|-----------------------| | htons() | Host to Network Short | | htonl() | Host to Network Long | | ntohs() | Network to Host Short | @@ -48,7 +55,9 @@ Before making any transmission we have to convert the byte order to a Network By And convert the answer to the host byte order. ### Structs + #### struct addrinfo + This struct prepares the socket address strcutures for subsequent use. ```c @@ -66,6 +75,7 @@ struct addrinfo { ``` #### struct sockaddr + Inside the struct we can see there is a pointer to the `struct sockaddr`, that is defined as follows: ```c @@ -76,7 +86,9 @@ struct sockaddr { ``` #### struct sockaddr_in -But, we can avoid to pack manually the stuff inside this struct and use the `struct sockaddr_in` (or `struct sockaddr_in6` for IPv6) with a fast cast that is made for the Internet: + +But, we can avoid to pack manually the stuff inside this struct and use the `struct sockaddr_in` (or +`struct sockaddr_in6` for IPv6) with a fast cast that is made for the Internet: ```c struct sockaddr_in { @@ -90,7 +102,10 @@ struct sockaddr_in { `sin_zero` is used to pad the struct to the length of a sockaddr and it should be set to all zeros (`memset()`). #### struct sockaddr_storage -This struct is designed to storage both IPv4 and IPv6 structures. Example, when a client is going to connect to your server you don't know if it is a IPv4 or IPv6 so you use this struct and then cast to what you need (check the `ss_family` first). + +This struct is designed to storage both IPv4 and IPv6 structures. Example, when a client is going to connect to your +server you don't know if it is a IPv4 or IPv6 so you use this struct and then cast to what you need (check the +`ss_family` first). ```c struct sockaddr_storage { @@ -104,6 +119,7 @@ struct sockaddr_storage { ``` ### IP Addresses + Here's a way to convert an IP address string into a struct. ```c @@ -114,9 +130,11 @@ inet_pton(AF_INET, "192.168.0.1", &(sa.sin_addr)); inet_pton(AF_INET6, "2001:db8:63b3:1::3490", &(sa6.sin6_addr)); ``` -There is a very easy function called `inet_pton()`, *pton* stands for **Presentation to network**. If you want to do the same but from binary to string you have `inet_ntop()`. +There is a very easy function called `inet_pton()`, *pton* stands for **Presentation to network**. If you want to do the +same but from binary to string you have `inet_ntop()`. ### getaddrinfo() + ```c #include #include @@ -128,10 +146,12 @@ int getaddrinfo(const char *node, // e.g. "www.example.com" or IP struct addrinfo **res); ``` -The `node` could be a host name or IP address. `service` could be a port number or a service found in `/etc/services` (e.g. "http" or "ftp" or "telnet"). +The `node` could be a host name or IP address. `service` could be a port number or a service found in `/etc/services` ( +e.g. "http" or "ftp" or "telnet"). `hints` points to a struct you already filled and `res` contains a linked list of results. ### socket() + ```c #include #include @@ -139,10 +159,13 @@ The `node` could be a host name or IP address. `service` could be a port number int socket(int domain, int type, int protocol); ``` -`domain` could be `PF_INET` or `PF_INET6`, `type` instead TCP or UDP and `protocol` to 0. `PF_INET` is very close to `AF_INET`! However, we can avoid to put the stuff manually and use the results from `getaddrinfo()`. +`domain` could be `PF_INET` or `PF_INET6`, `type` instead TCP or UDP and `protocol` to 0. `PF_INET` is very close to +`AF_INET`! However, we can avoid to put the stuff manually and use the results from `getaddrinfo()`. ### bind() -Do this if you're going to listen on a port. The port is used by the kernel to match an incoming packet to a socket descriptor. + +Do this if you're going to listen on a port. The port is used by the kernel to match an incoming packet to a socket +descriptor. ```c #include @@ -152,6 +175,7 @@ int bind(int sockfd, struct sockaddr *my_addr, int addrlen); ``` ### connect() + ```c #include #include @@ -160,13 +184,16 @@ int connect(int sockfd, struct sockaddr *serv_addr, int addrlen); ``` ### listen() + ```c int listen(int sockfd, int backlog); ``` -`backlog` is the amount of max clients allowed on the incoming queue. A client will wait in the queue until you accept it. +`backlog` is the amount of max clients allowed on the incoming queue. A client will wait in the queue until you accept +it. ### accept() + ```c #include #include @@ -177,21 +204,27 @@ int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen); After you accept a client, the function will return a new socket file descriptor used to communicate. ### send() and recv() + ```c int send(int sockfd, const void *msg, int len, int flags); ``` -Just put `flags` to 0. It will return the bytes sent, but sometimes it could not match the len of the data sent, it's up to you to send the rest of the string (it should sent 1K of data without splitting). +Just put `flags` to 0. It will return the bytes sent, but sometimes it could not match the len of the data sent, it's up +to you to send the rest of the string (it should sent 1K of data without splitting). ```c int recv(int sockfd, void *buf, int len, int flags); ``` -Put 0 at `flags` (see the man page for more info). The `sockfd` is the file descriptor to read from. The function could return 0 (this means the remote side has closed the connection). + +Put 0 at `flags` (see the man page for more info). The `sockfd` is the file descriptor to read from. The function could +return 0 (this means the remote side has closed the connection). ### sendto() and recvfrom() + It's the DGRAM equivalent of STREAM. Marked as *TODO*. ### close() and shutdown() + To close the connection just use the regular Unix file descriptor `close()` function: ```c @@ -205,14 +238,15 @@ int shutdown(int sockfd, int how); ``` `how` is one of the following: -| how | Effect | +| how | Effect | | --- | ------------------------ | -| 0 | No future receives | -| 1 | No future sends | -| 2 | No future receives/sends | +| 0 | No future receives | +| 1 | No future sends | +| 2 | No future receives/sends | The 2 is like `close()`, use `close()`. ### getpeername() + This function is quite simple. It will tell you who is in the other side of the connection. ```c @@ -222,6 +256,7 @@ int getpeername(int sockfd, struct sockaddr *addr, int *addrlen); ``` Example of getting client's IP: + ```c struct sockaddr_storage their; socklen_t their_len = sizeof their; @@ -234,6 +269,7 @@ inet_ntop(AF_INET, &client->sin_addr, client_ip, INET_ADDRSTRLEN); ``` ### gethostname() + ```c #include diff --git a/notes/hash-table.md b/notes/hash-table.md index 70c2c64..e10e9b4 100644 --- a/notes/hash-table.md +++ b/notes/hash-table.md @@ -1,19 +1,25 @@ # Hash Table -Well, the moment has come. I never made a Hash Map algorithm, but in this scenario I have to save both fd and sockaddr (I could make a simply linked list, but it's a way to learn new things). + +Well, the moment has come. I never made a Hash Map algorithm, but in this scenario I have to save both fd and sockaddr ( +I could make a simply linked list, but it's a way to learn new things). Let's start with a little bit of theory. -A *Hash Table* is a data structure also called **dictionary** or **map**. It maps *keys* to *values* thanks to a **hash function** that computes and *index* (**hash code**) into an array of **buckets**. +A *Hash Table* is a data structure also called **dictionary** or **map**. It maps *keys* to *values* thanks to a **hash +function** that computes and *index* (**hash code**) into an array of **buckets**. -One problem could be the *hash collision* where the hash function computes the same index for different values. A fix could be the **chaining** method, where for the same hash code you can make a linked list and append the index. Then the lookup function will go through the list and find the key. +One problem could be the *hash collision* where the hash function computes the same index for different values. A fix +could be the **chaining** method, where for the same hash code you can make a linked list and append the index. Then the +lookup function will go through the list and find the key. In a Hash Map you can insert, delete and lookup (simply search). ### Hash function + The easiest way... don't judge me. - Integer keys: -$$ hash(\text{key}) = \text{key} \mod \text{table\_dim} $$ + $$ hash(\text{key}) = \text{key} \mod \text{table\_dim} $$ - String keys: -$$ hash(key) = \sum_{i=0}^{len(key) - 1} ascii\_value(key[i]) * prime\_number$$ + $$ hash(key) = \sum_{i=0}^{len(key) - 1} ascii\_value(key[i]) * prime\_number$$ diff --git a/notes/http-request.md b/notes/http-request.md index 66fcd10..06fa95a 100644 --- a/notes/http-request.md +++ b/notes/http-request.md @@ -1,4 +1,5 @@ # HTTP Request + This is an example of a basic HTTP request made from the browser: ```bash @@ -10,12 +11,16 @@ Accept-Encoding: gzip, deflate Connection: Keep-Alive ``` +> Thanks tutorialspoint + The first line is a *request line*. It has: + - Method (GET, POST, HEAD, ...) - Location (the request resource, file) - HTTP version # HTTP Response + ```bash HTTP/1.1 200 OK\r\n Content-Type: text/html\r\n diff --git a/src/http/http.c b/src/http/http.c index 5e28aee..93b7aa8 100644 --- a/src/http/http.c +++ b/src/http/http.c @@ -8,17 +8,17 @@ http_t *http_parse(char *request_str) { /* Parse HTTP method */ char *pch = strtok(request_str, " "); - printf("%s\n", pch); + printf("[http] method: %s\n", pch); http_parse_method(request, pch); /* Parse location */ pch = strtok(NULL, " "); - printf("%s\n", pch); + printf("[http] location: %s\n", pch); strncpy(request->location, pch, LOCATION_LEN); /* Parse HTTP version */ pch = strtok(NULL, " \r\n"); - printf("%s\n", pch); + printf("[http] version: %s\n", pch); strncpy(request->http_version, pch, HTTP_VERSION_LEN); /* Parse other stuff... */ @@ -27,6 +27,10 @@ http_t *http_parse(char *request_str) { } void http_parse_method(http_t *request, const char *method) { + if (request == NULL) { + return; + } + if (strcmp(method, "GET") == 0) { request->method = GET; } diff --git a/src/main.c b/src/main.c index 6c88d86..c501701 100644 --- a/src/main.c +++ b/src/main.c @@ -3,9 +3,8 @@ #include "server/server.h" #include "utils/colors.h" - int main(int argc, char **argv) { - fprintf(stdout, BOLD GREEN "[server] Running cws...\n" RESET); + fprintf(stdout, BOLD GREEN "[server] Running cws on http://localhost:%s...\n" RESET, "3030"); int ret = start_server(NULL, "3030"); if (ret < 0) { diff --git a/src/server/server.c b/src/server/server.c index fc2e44a..52f2e61 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -20,7 +20,7 @@ int start_server(const char *hostname, const char *service) { int sockfd = socket(res->ai_family, res->ai_socktype, res->ai_protocol); fprintf(stdout, YELLOW "[server] sockfd: %d\n" RESET, sockfd); - int opt = 1; + const int opt = 1; status = setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof opt); if (status != 0) { fprintf(stderr, RED BOLD "[server] setsockopt(): %s\n" RESET, strerror(errno)); @@ -71,16 +71,10 @@ void handle_clients(int sockfd) { epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLET); struct epoll_event *revents = malloc(EPOLL_MAXEVENTS * sizeof(struct epoll_event)); - int nfds; - - // char *msg = "Hello there!"; - // size_t msg_len = strlen(msg); - char data[4096]; int client_fd; - int run = 1; - while (run) { - nfds = epoll_wait(epfd, revents, EPOLL_MAXEVENTS, EPOLL_TIMEOUT); + while (1) { + int nfds = epoll_wait(epfd, revents, EPOLL_MAXEVENTS, EPOLL_TIMEOUT); for (int i = 0; i < nfds; ++i) { if (revents[i].data.fd == sockfd) { @@ -93,13 +87,11 @@ void handle_clients(int sockfd) { setnonblocking(client_fd); epoll_ctl_add(epfd, client_fd, EPOLLIN); hm_push(clients, client_fd, &their_sa); - - // int bytes_sent = send(client_fd, msg, msg_len, 0); - // fprintf(stdout, "[server] Sent %d bytes\n", bytes_sent); } else { + char data[4096] = {0}; /* Incoming data */ client_fd = revents[i].data.fd; - int bytes_read = recv(client_fd, data, sizeof data, 0); + const ssize_t bytes_read = recv(client_fd, data, sizeof data, 0); if (bytes_read == 0) { /* Client disconnected */ @@ -113,19 +105,16 @@ void handle_clients(int sockfd) { continue; } - send_html_test(client_fd); - - // fprintf(stdout, "[server] Bytes read (%d):\n%s\n", bytes_read, data); - if (strcmp(data, "stop") == 0) { - fprintf(stdout, GREEN BOLD "[server] Stopping...\n" RESET); - run = 0; - break; - } + /* Data len is correctly printed, but not data */ + fprintf(stdout, "data len: %zu\n", bytes_read); + data[bytes_read] = '\0'; /* Parse HTTP request */ + fprintf(stdout, "[server] data: %s\n", data); http_t *request = http_parse(data); fprintf(stdout, "[server] request location: %s\n", request->location); - http_send_response(request); + send_html_test(client_fd); + // http_send_response(request); http_free(request); /* Clear str */ @@ -135,6 +124,7 @@ void handle_clients(int sockfd) { } /* Clean up everything */ + /* TODO: fix endless loop using cli args */ free(revents); close(epfd); close_fds(clients); @@ -214,21 +204,25 @@ void send_html_test(int sockfd) { "\n" "\n" "\n" - "

Hello from cws!

\n" + "

Hello from cws!

\n" "\n" "\n" ""; - char len[4096]; - size_t content_length = strlen(html); - sprintf(len, "Content-Length: %zu\r\n", content_length); - fprintf(stdout, "Content-length: %zu\n", content_length); - char response[65535]; - strcat(response, "HTTP/1.1 200 OK\r\n"); - strcat(response, "Content-Type: text/html\r\n"); - strcat(response, len); - strcat(response, "Connection: closed\r\n"); - strcat(response, "\r\n"); - strcat(response, html); + const size_t content_length = strlen(html); + char len[256]; + snprintf(len, sizeof len, "Content-Length: %zu\r\n", content_length); + + char response[65535] = {0}; + snprintf(response, sizeof response, + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "%s" + "Connection: close\r\n" + "\r\n" + "%s", + len, html); + + // fprintf(stdout, "[http] response: %s\n", response); send(sockfd, response, strlen(response), 0); } diff --git a/www/index.html b/www/index.html index 2ff87dc..b6992b7 100644 --- a/www/index.html +++ b/www/index.html @@ -8,7 +8,7 @@ -

Hello from cws!

+

Hello from cws!

\ No newline at end of file