Skip to content

Commit

Permalink
truncate,shmem: Handle truncates that split large folios
Browse files Browse the repository at this point in the history
Handle folio splitting in the parts of the truncation functions which
already handle partial pages.  Factor all that code out into a new
function called truncate_inode_partial_folio().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
  • Loading branch information
Matthew Wilcox (Oracle) committed Jan 8, 2022
1 parent f6357c3 commit b9a8a41
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 105 deletions.
2 changes: 2 additions & 0 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
void filemap_free_folio(struct address_space *mapping, struct folio *folio);
int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
loff_t end);

/**
* folio_evictable - Test whether a folio is evictable.
Expand Down
108 changes: 45 additions & 63 deletions mm/shmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -880,30 +880,26 @@ void shmem_unlock_mapping(struct address_space *mapping)
}
}

/*
* Check whether a hole-punch or truncation needs to split a huge page,
* returning true if no split was required, or the split has been successful.
*
* Eviction (or truncation to 0 size) should never need to split a huge page;
* but in rare cases might do so, if shmem_undo_range() failed to trylock on
* head, and then succeeded to trylock on tail.
*
* A split can only succeed when there are no additional references on the
* huge page: so the split below relies upon find_get_entries() having stopped
* when it found a subpage of the huge page, without getting further references.
*/
static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end)
static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
{
if (!PageTransCompound(page))
return true;

/* Just proceed to delete a huge page wholly within the range punched */
if (PageHead(page) &&
page->index >= start && page->index + HPAGE_PMD_NR <= end)
return true;
struct folio *folio;
struct page *page;

/* Try to split huge page, so we can truly punch the hole or truncate */
return split_huge_page(page) >= 0;
/*
* At first avoid shmem_getpage(,,,SGP_READ): that fails
* beyond i_size, and reports fallocated pages as holes.
*/
folio = __filemap_get_folio(inode->i_mapping, index,
FGP_ENTRY | FGP_LOCK, 0);
if (!xa_is_value(folio))
return folio;
/*
* But read a page back from swap if any of it is within i_size
* (although in some cases this is just a waste of time).
*/
page = NULL;
shmem_getpage(inode, index, &page, SGP_READ);
return page ? page_folio(page) : NULL;
}

/*
Expand All @@ -917,10 +913,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
struct shmem_inode_info *info = SHMEM_I(inode);
pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
pgoff_t end = (lend + 1) >> PAGE_SHIFT;
unsigned int partial_start = lstart & (PAGE_SIZE - 1);
unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
struct folio_batch fbatch;
pgoff_t indices[PAGEVEC_SIZE];
struct folio *folio;
bool same_folio;
long nr_swaps_freed = 0;
pgoff_t index;
int i;
Expand All @@ -936,7 +932,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
while (index < end && find_lock_entries(mapping, index, end - 1,
&fbatch, indices)) {
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i];
folio = fbatch.folios[i];

index = indices[i];

Expand All @@ -959,33 +955,30 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
index++;
}

if (partial_start) {
struct page *page = NULL;
shmem_getpage(inode, start - 1, &page, SGP_READ);
if (page) {
unsigned int top = PAGE_SIZE;
if (start > end) {
top = partial_end;
partial_end = 0;
}
zero_user_segment(page, partial_start, top);
set_page_dirty(page);
unlock_page(page);
put_page(page);
same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT);
if (folio) {
same_folio = lend < folio_pos(folio) + folio_size(folio);
folio_mark_dirty(folio);
if (!truncate_inode_partial_folio(folio, lstart, lend)) {
start = folio->index + folio_nr_pages(folio);
if (same_folio)
end = folio->index;
}
folio_unlock(folio);
folio_put(folio);
folio = NULL;
}
if (partial_end) {
struct page *page = NULL;
shmem_getpage(inode, end, &page, SGP_READ);
if (page) {
zero_user_segment(page, 0, partial_end);
set_page_dirty(page);
unlock_page(page);
put_page(page);
}

if (!same_folio)
folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT);
if (folio) {
folio_mark_dirty(folio);
if (!truncate_inode_partial_folio(folio, lstart, lend))
end = folio->index;
folio_unlock(folio);
folio_put(folio);
}
if (start >= end)
return;

index = start;
while (index < end) {
Expand All @@ -1001,7 +994,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
continue;
}
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i];
folio = fbatch.folios[i];

index = indices[i];
if (xa_is_value(folio)) {
Expand All @@ -1019,8 +1012,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
folio_lock(folio);

if (!unfalloc || !folio_test_uptodate(folio)) {
struct page *page = folio_file_page(folio,
index);
if (folio_mapping(folio) != mapping) {
/* Page was replaced by swap: retry */
folio_unlock(folio);
Expand All @@ -1029,18 +1020,9 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
VM_BUG_ON_FOLIO(folio_test_writeback(folio),
folio);
if (shmem_punch_compound(page, start, end))
truncate_inode_folio(mapping, folio);
else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
/* Wipe the page and don't get stuck */
clear_highpage(page);
flush_dcache_page(page);
folio_mark_dirty(folio);
if (index <
round_up(start, HPAGE_PMD_NR))
start = index + 1;
}
truncate_inode_folio(mapping, folio);
}
index = folio->index + folio_nr_pages(folio) - 1;
folio_unlock(folio);
}
folio_batch_remove_exceptionals(&fbatch);
Expand Down
117 changes: 75 additions & 42 deletions mm/truncate.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,58 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
return 0;
}

/*
* Handle partial folios. The folio may be entirely within the
* range if a split has raced with us. If not, we zero the part of the
* folio that's within the [start, end] range, and then split the folio if
* it's large. split_page_range() will discard pages which now lie beyond
* i_size, and we rely on the caller to discard pages which lie within a
* newly created hole.
*
* Returns false if splitting failed so the caller can avoid
* discarding the entire folio which is stubbornly unsplit.
*/
bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
{
loff_t pos = folio_pos(folio);
unsigned int offset, length;

if (pos < start)
offset = start - pos;
else
offset = 0;
length = folio_size(folio);
if (pos + length <= (u64)end)
length = length - offset;
else
length = end + 1 - pos - offset;

folio_wait_writeback(folio);
if (length == folio_size(folio)) {
truncate_inode_folio(folio->mapping, folio);
return true;
}

/*
* We may be zeroing pages we're about to discard, but it avoids
* doing a complex calculation here, and then doing the zeroing
* anyway if the page split fails.
*/
folio_zero_range(folio, offset, length);

cleancache_invalidate_page(folio->mapping, &folio->page);
if (folio_has_private(folio))
do_invalidatepage(&folio->page, offset, length);
if (!folio_test_large(folio))
return true;
if (split_huge_page(&folio->page) == 0)
return true;
if (folio_test_dirty(folio))
return false;
truncate_inode_folio(folio->mapping, folio);
return true;
}

/*
* Used to get rid of pages on hardware memory corruption.
*/
Expand Down Expand Up @@ -294,20 +346,16 @@ void truncate_inode_pages_range(struct address_space *mapping,
{
pgoff_t start; /* inclusive */
pgoff_t end; /* exclusive */
unsigned int partial_start; /* inclusive */
unsigned int partial_end; /* exclusive */
struct folio_batch fbatch;
pgoff_t indices[PAGEVEC_SIZE];
pgoff_t index;
int i;
struct folio *folio;
bool same_folio;

if (mapping_empty(mapping))
goto out;

/* Offsets within partial pages */
partial_start = lstart & (PAGE_SIZE - 1);
partial_end = (lend + 1) & (PAGE_SIZE - 1);

/*
* 'start' and 'end' always covers the range of pages to be fully
* truncated. Partial pages are covered with 'partial_start' at the
Expand Down Expand Up @@ -340,47 +388,32 @@ void truncate_inode_pages_range(struct address_space *mapping,
cond_resched();
}

if (partial_start) {
struct page *page = find_lock_page(mapping, start - 1);
if (page) {
unsigned int top = PAGE_SIZE;
if (start > end) {
/* Truncation within a single page */
top = partial_end;
partial_end = 0;
}
wait_on_page_writeback(page);
zero_user_segment(page, partial_start, top);
cleancache_invalidate_page(mapping, page);
if (page_has_private(page))
do_invalidatepage(page, partial_start,
top - partial_start);
unlock_page(page);
put_page(page);
same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0);
if (folio) {
same_folio = lend < folio_pos(folio) + folio_size(folio);
if (!truncate_inode_partial_folio(folio, lstart, lend)) {
start = folio->index + folio_nr_pages(folio);
if (same_folio)
end = folio->index;
}
folio_unlock(folio);
folio_put(folio);
folio = NULL;
}
if (partial_end) {
struct page *page = find_lock_page(mapping, end);
if (page) {
wait_on_page_writeback(page);
zero_user_segment(page, 0, partial_end);
cleancache_invalidate_page(mapping, page);
if (page_has_private(page))
do_invalidatepage(page, 0,
partial_end);
unlock_page(page);
put_page(page);
}

if (!same_folio)
folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT,
FGP_LOCK, 0);
if (folio) {
if (!truncate_inode_partial_folio(folio, lstart, lend))
end = folio->index;
folio_unlock(folio);
folio_put(folio);
}
/*
* If the truncation happened within a single page no pages
* will be released, just zeroed, so we can bail out now.
*/
if (start >= end)
goto out;

index = start;
for ( ; ; ) {
while (index < end) {
cond_resched();
if (!find_get_entries(mapping, index, end - 1, &fbatch,
indices)) {
Expand Down

0 comments on commit b9a8a41

Please sign in to comment.