garbage collector as lib so we don't collide with the actual cache driver

This commit is contained in:
HB9HIL
2026-02-02 20:46:46 +01:00
parent 7c16f76a7d
commit 201655524b
4 changed files with 180 additions and 60 deletions

View File

@@ -146,12 +146,6 @@ class cron extends CI_Controller {
$this->cronexpression = null;
}
// we also garbage collect the file cache here every full hour
if ($this->config->item('cache_adapter') == 'file' && date('i') == '00') {
$this->load->driver('cache'); // we don't need any adapter info as we just call the file adapter directly
$this->cache->file->gc();
}
$datetime = new DateTime("now", new DateTimeZone('UTC'));
$datetime = $datetime->format('Ymd H:i:s');
$this->optionslib->update('mastercron_last_run', $datetime , 'no');

View File

@@ -0,0 +1,172 @@
<?php
defined('BASEPATH') OR exit('No direct script access allowed');
/**
* Garbage collection library for file cache
*
* Since we need a garbage collector for file caching, we implement a dynamic
* probability check based on traffic patterns to avoid performance hits on
* high-traffic sites while still ensuring regular cleanup on low-traffic sites.
*
* Why we don't use $this->cache->clean()? Because this deletes everything. We only want to delete expired files.
*
* 2026, Fabian Berg, HB9HIL
*/
class GarbageCollector {
private $CI;
public function __construct()
{
$this->CI =& get_instance();
}
/**
* Run garbage collection for file cache with traffic-based probability and interval checks.
*
* @return int Number of deleted files, or 0 on failure
*/
public function run()
{
// The gc checkfile path has to exist on every possible environment. So we choose sys_get_temp_dir()
// and hash it with FCPATH to avoid collisions between different Wavelog installations on the same server.
$gc_file = sys_get_temp_dir() . '/ci_gc_last_run_' . md5(FCPATH) . '.txt';
// The garbage collection should run around every 4 hours
$gc_interval = 3600 * 4;
// Traffic metric: Requests since last GC
$data = file_exists($gc_file) ? (json_decode(file_get_contents($gc_file), true) ?: []) : [];
$last_run = $data['time'] ?? 0;
$request_count = ($data['count'] ?? 0) + 1; // This is also a request so +1
// Dynamic probability based on traffic to reduce load on high-traffic installations
// This logic is inverted to a normal human brain. Higher traffic = lower probability to go through the next check.
if ($request_count < 100) {
$probability = 100; // Low-Traffic: check on every request (100% pass the probability check)
} elseif ($request_count < 1000) {
$probability = 50; // Medium-Traffic: every 2nd request (50% pass the probability check)
} else {
$probability = 10; // High-Traffic: every 10th request (only 10% pass the probability check)
}
// We do the probability check first. Let's play some lottery
if (rand(1, 100) > $probability) {
// Oh snag, we did not hit the probability but still need to update the request count
// The +1 was already added above
$this->_update_gc_file($gc_file, $last_run, $request_count);
return 0;
}
// Oh dear, we hit the probability. Now check if enough time has passed since last run.
if (time() - $last_run < $gc_interval) {
// Nope, so just update the request count
$this->_update_gc_file($gc_file, $last_run, $request_count);
return 0;
}
// Alright, let's do some garbage collection!
// We use a lock file to prevent multiple simultaneous GC runs
// in case of high traffic. Only one process should do the GC at a time.
$lock_file = $gc_file . '.lock';
// Try to acquire the lock
$fp = fopen($lock_file, 'c');
if ($fp === FALSE) {
return 0;
}
// If we cannot acquire the lock, another process is already doing GC
// So we just return and do nothing so the other process can finish
if ( ! flock($fp, LOCK_EX | LOCK_NB)) {
fclose($fp);
return 0;
}
log_message('info', 'Starting file cache garbage collection...');
try {
// Perform garbage collection itself (without loading the cache driver)
$result = $this->_run_garbage_collector();
// Update the GC file with the current time and reset request count to 0
$this->_update_gc_file($gc_file, time(), 0);
} finally {
// Release the lock and close the file
// This will happen even if an exception occurs during GC so we don't deadlock
flock($fp, LOCK_UN);
fclose($fp);
@unlink($lock_file);
}
log_message('info', 'File cache garbage collection completed. Deleted ' . $result . ' expired files.');
return $result;
}
/**
* Run file cache garbage collection without loading the cache driver.
*
* @return int Number of deleted files, or 0 on failure
*/
public function _run_garbage_collector()
{
$cache_path = $this->CI->config->item('cache_path') == '' ? APPPATH.'cache/' : $this->CI->config->item('cache_path');
log_message('debug', 'GarbageCollector: Scanning cache path ' . $cache_path);
if ( ! is_dir($cache_path))
{
log_message('error', 'GarbageCollector: Cache path is not a directory or does not exist: ' . $cache_path);
return 0;
}
// We need to ignore some CI specific files
$ignore_files = [
'index.html',
'.htaccess'
];
$deleted = 0;
$current_time = time();
if ($handle = opendir($cache_path))
{
while (($file = readdir($handle)) !== FALSE)
{
if ($file === '.' || $file === '..' || in_array($file, $ignore_files))
{
continue;
}
$filepath = $cache_path.$file;
if (is_file($filepath))
{
$data = @unserialize(file_get_contents($filepath));
if (is_array($data) && isset($data['time'], $data['ttl']))
{
// Check if TTL is set and file has expired
if ($data['ttl'] > 0 && $current_time > $data['time'] + $data['ttl'])
{
if (unlink($filepath))
{
$deleted++;
}
}
}
}
}
closedir($handle);
}
return $deleted;
}
private function _update_gc_file($gc_file, $time, $count) {
file_put_contents($gc_file, json_encode(['time' => $time, 'count' => $count]));
}
}

View File

@@ -738,6 +738,14 @@ class User_Model extends CI_Model {
function authorize($level) {
$u = $this->get_by_id($this->session->userdata('user_id'));
$l = $this->config->item('auth_mode');
// Run the cache garbage collector here, probability check is already built in
// We run this only for file cache as other adapters have their own GC methods
if ($this->config->item('cache_adapter') == 'file') {
$this->load->library('GarbageCollector');
$this->garbagecollector->run();
}
// Check to see if the minimum level of access is higher than
// the user's own level. If it is, use that.
if($this->config->item('auth_mode') > $level) {

View File

@@ -283,58 +283,4 @@ class CI_Cache_file extends CI_Driver {
return $data;
}
// ------------------------------------------------------------------------
/**
* Garbage Collection
*
* Deletes all expired cache files based on their TTL
*
* @return int Number of deleted files, or FALSE on failure
*/
public function gc()
{
if ( ! is_dir($this->_cache_path))
{
return FALSE;
}
$deleted = 0;
$current_time = time();
if ($handle = opendir($this->_cache_path))
{
while (($file = readdir($handle)) !== FALSE)
{
if ($file === '.' OR $file === '..' OR $file === 'index.html' OR $file === '.htaccess')
{
continue;
}
$filepath = $this->_cache_path.$file;
if (is_file($filepath))
{
$data = @unserialize(file_get_contents($filepath));
if (is_array($data) && isset($data['time'], $data['ttl']))
{
// Check if TTL is set and file has expired
if ($data['ttl'] > 0 && $current_time > $data['time'] + $data['ttl'])
{
if (unlink($filepath))
{
$deleted++;
}
}
}
}
}
closedir($handle);
}
return $deleted;
}
}