From 201655524b6e7dca30686cdd0fceb2c0769fed6b Mon Sep 17 00:00:00 2001 From: HB9HIL Date: Mon, 2 Feb 2026 20:46:46 +0100 Subject: [PATCH] garbage collector as lib so we don't collide with the actual cache driver --- application/controllers/Cron.php | 6 - application/libraries/GarbageCollector.php | 172 ++++++++++++++++++ application/models/User_model.php | 8 + system/libraries/Cache/drivers/Cache_file.php | 54 ------ 4 files changed, 180 insertions(+), 60 deletions(-) create mode 100644 application/libraries/GarbageCollector.php diff --git a/application/controllers/Cron.php b/application/controllers/Cron.php index 83af98052..20a489f13 100644 --- a/application/controllers/Cron.php +++ b/application/controllers/Cron.php @@ -146,12 +146,6 @@ class cron extends CI_Controller { $this->cronexpression = null; } - // we also garbage collect the file cache here every full hour - if ($this->config->item('cache_adapter') == 'file' && date('i') == '00') { - $this->load->driver('cache'); // we don't need any adapter info as we just call the file adapter directly - $this->cache->file->gc(); - } - $datetime = new DateTime("now", new DateTimeZone('UTC')); $datetime = $datetime->format('Ymd H:i:s'); $this->optionslib->update('mastercron_last_run', $datetime , 'no'); diff --git a/application/libraries/GarbageCollector.php b/application/libraries/GarbageCollector.php new file mode 100644 index 000000000..71fc9de98 --- /dev/null +++ b/application/libraries/GarbageCollector.php @@ -0,0 +1,172 @@ +cache->clean()? Because this deletes everything. We only want to delete expired files. + * + * 2026, Fabian Berg, HB9HIL + */ +class GarbageCollector { + + private $CI; + + public function __construct() + { + $this->CI =& get_instance(); + } + + /** + * Run garbage collection for file cache with traffic-based probability and interval checks. + * + * @return int Number of deleted files, or 0 on failure + */ + public function run() + { + // The gc checkfile path has to exist on every possible environment. So we choose sys_get_temp_dir() + // and hash it with FCPATH to avoid collisions between different Wavelog installations on the same server. + $gc_file = sys_get_temp_dir() . '/ci_gc_last_run_' . md5(FCPATH) . '.txt'; + + // The garbage collection should run around every 4 hours + $gc_interval = 3600 * 4; + + // Traffic metric: Requests since last GC + $data = file_exists($gc_file) ? (json_decode(file_get_contents($gc_file), true) ?: []) : []; + $last_run = $data['time'] ?? 0; + $request_count = ($data['count'] ?? 0) + 1; // This is also a request so +1 + + // Dynamic probability based on traffic to reduce load on high-traffic installations + // This logic is inverted to a normal human brain. Higher traffic = lower probability to go through the next check. + if ($request_count < 100) { + $probability = 100; // Low-Traffic: check on every request (100% pass the probability check) + } elseif ($request_count < 1000) { + $probability = 50; // Medium-Traffic: every 2nd request (50% pass the probability check) + } else { + $probability = 10; // High-Traffic: every 10th request (only 10% pass the probability check) + } + + // We do the probability check first. Let's play some lottery + if (rand(1, 100) > $probability) { + // Oh snag, we did not hit the probability but still need to update the request count + // The +1 was already added above + $this->_update_gc_file($gc_file, $last_run, $request_count); + return 0; + } + + // Oh dear, we hit the probability. Now check if enough time has passed since last run. + if (time() - $last_run < $gc_interval) { + // Nope, so just update the request count + $this->_update_gc_file($gc_file, $last_run, $request_count); + return 0; + } + + // Alright, let's do some garbage collection! + // We use a lock file to prevent multiple simultaneous GC runs + // in case of high traffic. Only one process should do the GC at a time. + $lock_file = $gc_file . '.lock'; + + // Try to acquire the lock + $fp = fopen($lock_file, 'c'); + + if ($fp === FALSE) { + return 0; + } + + // If we cannot acquire the lock, another process is already doing GC + // So we just return and do nothing so the other process can finish + if ( ! flock($fp, LOCK_EX | LOCK_NB)) { + fclose($fp); + return 0; + } + + log_message('info', 'Starting file cache garbage collection...'); + + try { + // Perform garbage collection itself (without loading the cache driver) + $result = $this->_run_garbage_collector(); + + // Update the GC file with the current time and reset request count to 0 + $this->_update_gc_file($gc_file, time(), 0); + } finally { + // Release the lock and close the file + // This will happen even if an exception occurs during GC so we don't deadlock + flock($fp, LOCK_UN); + fclose($fp); + @unlink($lock_file); + } + + log_message('info', 'File cache garbage collection completed. Deleted ' . $result . ' expired files.'); + + return $result; + } + + /** + * Run file cache garbage collection without loading the cache driver. + * + * @return int Number of deleted files, or 0 on failure + */ + public function _run_garbage_collector() + { + $cache_path = $this->CI->config->item('cache_path') == '' ? APPPATH.'cache/' : $this->CI->config->item('cache_path'); + + log_message('debug', 'GarbageCollector: Scanning cache path ' . $cache_path); + + if ( ! is_dir($cache_path)) + { + log_message('error', 'GarbageCollector: Cache path is not a directory or does not exist: ' . $cache_path); + return 0; + } + + // We need to ignore some CI specific files + $ignore_files = [ + 'index.html', + '.htaccess' + ]; + + $deleted = 0; + $current_time = time(); + + if ($handle = opendir($cache_path)) + { + while (($file = readdir($handle)) !== FALSE) + { + if ($file === '.' || $file === '..' || in_array($file, $ignore_files)) + { + continue; + } + + $filepath = $cache_path.$file; + + if (is_file($filepath)) + { + $data = @unserialize(file_get_contents($filepath)); + + if (is_array($data) && isset($data['time'], $data['ttl'])) + { + // Check if TTL is set and file has expired + if ($data['ttl'] > 0 && $current_time > $data['time'] + $data['ttl']) + { + if (unlink($filepath)) + { + $deleted++; + } + } + } + } + } + closedir($handle); + } + + return $deleted; + } + + private function _update_gc_file($gc_file, $time, $count) { + file_put_contents($gc_file, json_encode(['time' => $time, 'count' => $count])); + } +} diff --git a/application/models/User_model.php b/application/models/User_model.php index 4a2842a97..34385a7e6 100644 --- a/application/models/User_model.php +++ b/application/models/User_model.php @@ -738,6 +738,14 @@ class User_Model extends CI_Model { function authorize($level) { $u = $this->get_by_id($this->session->userdata('user_id')); $l = $this->config->item('auth_mode'); + + // Run the cache garbage collector here, probability check is already built in + // We run this only for file cache as other adapters have their own GC methods + if ($this->config->item('cache_adapter') == 'file') { + $this->load->library('GarbageCollector'); + $this->garbagecollector->run(); + } + // Check to see if the minimum level of access is higher than // the user's own level. If it is, use that. if($this->config->item('auth_mode') > $level) { diff --git a/system/libraries/Cache/drivers/Cache_file.php b/system/libraries/Cache/drivers/Cache_file.php index 9b87de4e6..2ec9ea947 100644 --- a/system/libraries/Cache/drivers/Cache_file.php +++ b/system/libraries/Cache/drivers/Cache_file.php @@ -283,58 +283,4 @@ class CI_Cache_file extends CI_Driver { return $data; } - - // ------------------------------------------------------------------------ - - /** - * Garbage Collection - * - * Deletes all expired cache files based on their TTL - * - * @return int Number of deleted files, or FALSE on failure - */ - public function gc() - { - if ( ! is_dir($this->_cache_path)) - { - return FALSE; - } - - $deleted = 0; - $current_time = time(); - - if ($handle = opendir($this->_cache_path)) - { - while (($file = readdir($handle)) !== FALSE) - { - if ($file === '.' OR $file === '..' OR $file === 'index.html' OR $file === '.htaccess') - { - continue; - } - - $filepath = $this->_cache_path.$file; - - if (is_file($filepath)) - { - $data = @unserialize(file_get_contents($filepath)); - - if (is_array($data) && isset($data['time'], $data['ttl'])) - { - // Check if TTL is set and file has expired - if ($data['ttl'] > 0 && $current_time > $data['time'] + $data['ttl']) - { - if (unlink($filepath)) - { - $deleted++; - } - } - } - } - } - closedir($handle); - } - - return $deleted; - } - }