home · contact · privacy
Improve backup algorithm.
authorChristian Heller <c.heller@plomlompom.de>
Mon, 4 Dec 2023 14:16:31 +0000 (15:16 +0100)
committerChristian Heller <c.heller@plomlompom.de>
Mon, 4 Dec 2023 14:16:31 +0000 (15:16 +0100)
plomlib.py

index 739b00f774430cb62d41eb574168eda64cb8c948..d1b4c4c9a4713cecb84d6c497e7e4f5505e9add0 100644 (file)
@@ -31,46 +31,52 @@ class PlomDB:
             return
 
         # collect modification times of numbered .bak files
+        print('DEBUG BACKUP')
         bak_prefix = f'{self.db_file}.bak.'
-        backup_dates = []
-        i = 0
-        bak_as = f'{bak_prefix}{i}'
-        while os.path.exists(bak_as):
-            mod_time = os.path.getmtime(bak_as)
-            backup_dates += [str(datetime.fromtimestamp(mod_time))]
-            i += 1
-            bak_as = f'{bak_prefix}{i}'
+        # backup_dates = []
+        mtimes_to_paths = {}
+        for path in [path for path in os.listdir(os.path.dirname(bak_prefix))
+                     if path.startswith(os.path.basename(bak_prefix))]:
+            mod_time = os.path.getmtime(path)
+            print(f'DEBUG pre-exists: {path} {mod_time}')
+            mtimes_to_paths[str(datetime.fromtimestamp(mod_time))] = path
+            # backup_dates += [str(datetime.fromtimestamp(mod_time))]
+
+        for mtime in sorted(mtimes_to_paths.keys()):
+            print(f'DEBUG mtimes_to_paths: {mtime}:{mtimes_to_paths[mtime]}')
 
         # collect what numbered .bak files to save: the older, the fewer; for each
         # timedelta, keep the newest file that's older
         ages_to_keep = [timedelta(minutes=4**i) for i in range(0, 8)]
+        print(f'DEBUG ages_to_keep: {ages_to_keep}')
         now = datetime.now() 
-        to_save = []
+        to_save = {}
         for age in ages_to_keep:
             limit = now - age 
-            for i, date in enumerate(reversed(backup_dates)):
-                if datetime.strptime(date, '%Y-%m-%d %H:%M:%S.%f') < limit:
-                    unreversed_i = len(backup_dates) - i - 1
-                    if unreversed_i not in to_save:
-                        to_save += [unreversed_i]
+            for mtime in reversed(sorted(mtimes_to_paths.keys())):
+                print(f'DEBUG checking if {mtime} < {limit} ({now} - {age})')
+                if datetime.strptime(mtime, '%Y-%m-%d %H:%M:%S.%f') < limit:
+                    print('DEBUG it is, adding!')
+                    to_save[mtime] = mtimes_to_paths[mtime]
                     break
 
-        # remove redundant backup files 
-        j = 0
-        for i in to_save:
-            if i != j:
-                source = f'{bak_prefix}{i}'
-                target = f'{bak_prefix}{j}'
+        for path in [path for path in mtimes_to_paths.values()
+                     if path not in to_save.values()]:
+            print(f'DEBUG removing {path} cause not in to_save')
+            os.remove(path)
+
+        i = 0
+        for mtime in sorted(to_save.keys()):
+            source = to_save[mtime]
+            target = f'{bak_prefix}{i}'
+            print(f'DEBUG to_save {source} -> {target}')
+            if source != target:
                 shutil.move(source, target)
-            j += 1
-        for i in range(j, len(backup_dates)):
-            try:
-                os.remove(f'{bak_prefix}{i}')
-            except FileNotFoundError:
-                pass
+            i += 1
 
         # put copy of current state at end of bak list 
-        shutil.copy(self.db_file, f'{bak_prefix}{j}')
+        print(f'DEBUG saving current state to {bak_prefix}{i}')
+        shutil.copy(self.db_file, f'{bak_prefix}{i}')
 
     def write_text_to_db(self, text, mode='w'):
         self.lock()