Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use readdir instead of deprecated readdir_r to avoid warnings #430

Merged
merged 10 commits into from
Jul 4, 2017
98 changes: 55 additions & 43 deletions src/unix/lwt_unix_unix.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include <caml/unixsupport.h>
#include <caml/version.h>
#include <dirent.h>
#include <poll.h>
#include <sys/resource.h>
#include <sys/time.h>
Expand Down Expand Up @@ -841,8 +842,6 @@ CAMLprim value lwt_unix_mcast_set_ttl(value fd, value ttl)
#define VAL_MCAST_ACTION_ADD (Val_int(0))
#define VAL_MCAST_ACTION_DROP (Val_int(1))

#define GET_INET_ADDR(v) (*((struct in_addr *)(v)))

CAMLprim value lwt_unix_mcast_modify_membership(value fd, value v_action,
value if_addr, value group_addr)
{
Expand Down Expand Up @@ -1755,22 +1754,6 @@ CAMLprim value lwt_unix_rewinddir_job(value dir)
return lwt_unix_alloc_job(&(job->job));
}

/* struct dirent size */

/* Some kind of estimate of the true size of a dirent structure, including the
space used for the name. This is controversial, and there is an ongoing
discussion (see Internet) about deprecating readdir_r because of the need to
guess the size in this way. */
static size_t dirent_size(DIR *dir)
{
size_t size = offsetof(struct dirent, d_name) +
fpathconf(dirfd(dir), _PC_NAME_MAX) + 1;

if (size < sizeof(struct dirent)) size = sizeof(struct dirent);

return size;
}

/* +-----------------------------------------------------------------+
| JOB: readdir |
+-----------------------------------------------------------------+ */
Expand All @@ -1779,30 +1762,36 @@ struct job_readdir {
struct lwt_unix_job job;
DIR *dir;
struct dirent *entry;
struct dirent *ptr;
int result;
int error_code;
};

static void worker_readdir(struct job_readdir *job)
{
job->entry = lwt_unix_malloc(dirent_size(job->dir));
job->result = readdir_r(job->dir, job->entry, &job->ptr);
// From the man page of readdir
// If the end of the directory stream is reached, NULL is returned and
// errno is not changed. If an error occurs, NULL is returned and errno
// is set appropriately. To distinguish end of stream and from an error,
// set errno to zero before calling readdir() and then check the value of
// errno if NULL is returned.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for including this comment!

errno = 0;
job->entry = readdir(job->dir);
job->error_code = errno;
}

static value result_readdir(struct job_readdir *job)
{
int result = job->result;
if (result) {
free(job->entry);
lwt_unix_free_job(&job->job);
unix_error(result, "readdir", Nothing);
} else if (job->ptr == NULL) {
free(job->entry);
LWT_UNIX_CHECK_JOB(job, job->entry == NULL && job->error_code != 0,
"readdir");
if (job->entry == NULL) {
// From the man page
// On success, readdir() returns a pointer to a dirent structure.
// (This structure may be statically allocated; do not attempt to
// free(3) it.)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+2

lwt_unix_free_job(&job->job);
caml_raise_end_of_file();
} else {
value name = caml_copy_string(job->entry->d_name);
free(job->entry);
// see above about not freeing dirent
lwt_unix_free_job(&job->job);
return name;
}
Expand All @@ -1822,28 +1811,46 @@ CAMLprim value lwt_unix_readdir_job(value val_dir)
struct job_readdir_n {
struct lwt_unix_job job;
DIR *dir;
/* count serves two purpose:
1. Transmit the maximum number of entries requested by the programmer.
See `readdir_n`'s OCaml side documentation.
2. Transmit the number of actually read entries from the `worker` to
the result parser.
See below `worker_readdir_n` and `result_readdir_n`
*/
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely 👍

long count;
int error_code;
struct dirent entries[];
char *entries[];
};

static void worker_readdir_n(struct job_readdir_n *job)
{
long i;
for (i = 0; i < job->count; i++) {
struct dirent *ptr;
int result = readdir_r(job->dir, &job->entries[i], &ptr);
errno = 0;
struct dirent *entry = readdir(job->dir);

/* An error happened. */
if (result != 0) {
job->error_code = result;
if (entry == NULL && errno != 0) {
job->count = i;
job->error_code = errno;
return;
}

/* End of directory reached */
if (ptr == NULL) break;
}
if (entry == NULL && errno == 0) break;

/* readdir is good */
char *name = strdup(entry->d_name);
if (name == NULL) {
job->count = i;
job->error_code = errno;
return;
}

/* All is good */
job->entries[i] = name;
}
job->count = i;
job->error_code = 0;
}
Expand All @@ -1854,14 +1861,17 @@ static value result_readdir_n(struct job_readdir_n *job)
CAMLlocal1(result);
int error_code = job->error_code;
if (error_code) {
long i;
for (i = 0; i < job->count; i++) free(job->entries[i]);
lwt_unix_free_job(&job->job);
unix_error(error_code, "readdir", Nothing);
} else {
result = caml_alloc(job->count, 0);
long i;
for (i = 0; i < job->count; i++) {
Store_field(result, i, caml_copy_string(job->entries[i].d_name));
Store_field(result, i, caml_copy_string(job->entries[i]));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also free the entries that actually got allocated, both if the job fails and if it succeeds. For that, we need a valid job->count in all cases, but we don't have one in case of an error from readdir in the worker (because of the hard return in it, above).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the thing I had doubts about. But then, the previous code did not try to deallocate any of the dirents. Was there a memory leak before? What's the difference in the code that makes that change necessary?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC: the code before this PR allocated a flat array of dirents, i.e. if it calculated that a dirent was 1024 bytes and needed to allocate 32 of them, it would allocate 32KB of space in one block. The strings were written directly into various parts of this single block. We controlled the maximum length of these strings when calling readdir_r.

The new code inserts a level of indirection, because we don't know how long the strings returned by readdir will be. So, for reading 32 entries, it is now allocating space for 32 8-byte pointers in one block, and each one of those pointers may then be set to the address of a separately-allocated string (by strdup). We need to free these separate string blocks.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. That makes sense.

}
for (i = 0; i < job->count; i++) free(job->entries[i]);
lwt_unix_free_job(&job->job);
CAMLreturn(result);
}
Expand All @@ -1872,7 +1882,7 @@ CAMLprim value lwt_unix_readdir_n_job(value val_dir, value val_count)
long count = Long_val(val_count);
DIR *dir = DIR_Val(val_dir);

LWT_UNIX_INIT_JOB(job, readdir_n, dirent_size(dir) * count);
LWT_UNIX_INIT_JOB(job, readdir_n, sizeof(char *) * count);
job->dir = dir;
job->count = count;

Expand Down Expand Up @@ -2364,10 +2374,7 @@ struct job_gethostbyname {
};

CAMLexport value alloc_inet_addr(struct in_addr *inaddr);
#define GET_INET_ADDR(v) (*((struct in_addr *)(v)))

CAMLexport value alloc_inet6_addr(struct in6_addr *inaddr);
#define GET_INET6_ADDR(v) (*((struct in6_addr *)(v)))

static value alloc_one_addr(char const *a)
{
Expand Down Expand Up @@ -3192,7 +3199,12 @@ CAMLprim value lwt_unix_getcwd_job(value unit)
The last argument is the number of bytes of storage to reserve in memory
immediately following the `struct`. This is for fields such as
`char data[]` at the end of the struct. It is typically zero. For an
example where it is not zero, see `lwt_unix_read_job` again. */
example where it is not zero, see `lwt_unix_read_job` again.

If the additional data is stored inline in the job struct, it is
deallocated with `lwt_unix_free_job`. If the additional data is for
pointers to additional structure, you must remember to deallocate it
yourself. For an example of this, see `readdir_n`.*/
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Thanks, this is one less task in my local TODO list :)

LWT_UNIX_INIT_JOB(job, getcwd, 0);

/* Allocate a corresponding object in the OCaml heap. `&job->job` is the
Expand Down