It's in the source, but it's not obvious to those who aren't conversant with the low-level details of file systems. In particular, look at copy.c:
Code:
/* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME,
honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer
*ABUF for temporary storage, allocating it lazily if *ABUF is null.
Copy no more than MAX_N_READ bytes.
Return true upon successful completion;
print a diagnostic and return false upon error.
Note that for best results, BUF should be "well"-aligned.
Set *LAST_WRITE_MADE_HOLE to true if the final operation on
DEST_FD introduced a hole. Set *TOTAL_N_READ to the number of
bytes read. */
static bool
sparse_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size,
size_t hole_size, bool punch_holes, bool allow_reflink,
char const *src_name, char const *dst_name,
uintmax_t max_n_read, off_t *total_n_read,
bool *last_write_made_hole)
{
*last_write_made_hole = false;
*total_n_read = 0;
/* If not looking for holes, use copy_file_range if functional,
but don't use if reflink disallowed as that may be implicit. */
if (!hole_size && allow_reflink)
while (max_n_read)
{
/* Copy at most COPY_MAX bytes at a time; this is min
(SSIZE_MAX, SIZE_MAX) truncated to a value that is
surely aligned well. */
ssize_t copy_max = MIN (SSIZE_MAX, SIZE_MAX) >> 30 << 30;
ssize_t n_copied = copy_file_range (src_fd, NULL, dest_fd, NULL,
MIN (max_n_read, copy_max), 0);
if (n_copied == 0)
{
/* copy_file_range incorrectly returns 0 when reading from
the proc file system on the Linux kernel through at
least 5.6.19 (2020), so fall back on 'read' if the
input file seems empty. */
if (*total_n_read == 0)
break;
return true;
}
if (n_copied < 0)
{
if (errno == ENOSYS || is_ENOTSUP (errno)
|| errno == EINVAL || errno == EBADF
|| errno == EXDEV || errno == ETXTBSY)
break;
/* copy_file_range might not be enabled in seccomp filters,
so retry with a standard copy. EPERM can also occur
for immutable files, but that would only be in the edge case
where the file is made immutable after creating/truncating,
in which case the (more accurate) error is still shown. */
if (errno == EPERM && *total_n_read == 0)
break;
if (errno == EINTR)
n_copied = 0;
else
{
error (0, errno, _("error copying %s to %s"),
quoteaf_n (0, src_name), quoteaf_n (1, dst_name));
return false;
}
}
max_n_read -= n_copied;
*total_n_read += n_copied;
}
bool make_hole = false;
off_t psize = 0;
while (max_n_read)
{
if (!*abuf)
*abuf = xalignalloc (getpagesize (), buf_size);
char *buf = *abuf;
ssize_t n_read = read (src_fd, buf, MIN (max_n_read, buf_size));
if (n_read < 0)
{
if (errno == EINTR)
continue;
error (0, errno, _("error reading %s"), quoteaf (src_name));
return false;
}
if (n_read == 0)
break;
max_n_read -= n_read;
*total_n_read += n_read;
/* Loop over the input buffer in chunks of hole_size. */
size_t csize = hole_size ? hole_size : buf_size;
char *cbuf = buf;
char *pbuf = buf;
while (n_read)
{
bool prev_hole = make_hole;
csize = MIN (csize, n_read);
if (hole_size && csize)
make_hole = is_nul (cbuf, csize);
bool transition = (make_hole != prev_hole) && psize;
bool last_chunk = (n_read == csize && ! make_hole) || ! csize;
if (transition || last_chunk)
{
if (! transition)
psize += csize;
if (! prev_hole)
{
if (full_write (dest_fd, pbuf, psize) != psize)
{
error (0, errno, _("error writing %s"),
quoteaf (dst_name));
return false;
}
}
else
{
if (! create_hole (dest_fd, dst_name, punch_holes, psize))
return false;
}
pbuf = cbuf;
psize = csize;
if (last_chunk)
{
if (! csize)
n_read = 0; /* Finished processing buffer. */
if (transition)
csize = 0; /* Loop again to deal with last chunk. */
else
psize = 0; /* Reset for next read loop. */
}
}
else /* Coalesce writes/seeks. */
{
if (INT_ADD_WRAPV (psize, csize, &psize))
{
error (0, 0, _("overflow reading %s"), quoteaf (src_name));
return false;
}
}
n_read -= csize;
cbuf += csize;
}
*last_write_made_hole = make_hole;
/* It's tempting to break early here upon a short read from
a regular file. That would save the final read syscall
for each file. Unfortunately that doesn't work for
certain files in /proc or /sys with linux kernels. */
}
/* Ensure a trailing hole is created, so that subsequent
calls of sparse_copy() start at the correct offset. */
if (make_hole && ! create_hole (dest_fd, dst_name, punch_holes, psize))
return false;
else
return true;
}
If the file being copied looks like a sparse file, where there may be holes in the on-disk format, then it's eligible for copy_file_range and fast server-side copies. If that check fails, the copy falls back to the slow round-trip via the client-side.
This is probably to avoid overrunning the buffer, and also for copy consistency in transit. As coreutils 9.x is the first to support copy_file_range, this check should become less stringent over time.