mealie/dev/code-generation/utils/anonymize_backups.py
Michael Genson 7602c67449
fix: More Backup Restore Fixes (#2859)
* refactor normalized search migration to use dummy default

* changed group slug migration to use raw SQL

* updated comment

* added tests with anonymized backups (currently failing)

* typo

* fixed LDAP enum in test data

* fix for adding label settings across groups

* add migration data fixes

* fix shopping list label settings test

* re-run db init instead of just running alembic migration, to include fixes

* intentionally broke SQLAlchemy GUID handling

* safely convert between GUID types in different databases

* restore original test data after testing backup restores

* added missing group name update to migration
2024-01-03 04:19:04 +00:00

75 lines
1.7 KiB
Python

import json
import logging
import random
import string
from datetime import datetime
from uuid import UUID
logger = logging.getLogger("anonymize_backups")
def is_uuid4(value: str):
try:
UUID(value)
return True
except ValueError:
return False
def is_iso_datetime(value: str):
try:
datetime.fromisoformat(value)
return True
except ValueError:
return False
def random_string(length=10):
return "".join(random.choice(string.ascii_lowercase) for _ in range(length))
def clean_value(value):
try:
match value:
# preserve non-strings
case int(value) | float(value):
return value
case None:
return value
# preserve UUIDs and datetimes
case str(value) if is_uuid4(value) or is_iso_datetime(value):
return value
# randomize strings
case str(value):
return random_string()
case _:
pass
except Exception as e:
logger.exception(e)
logger.error(f"Failed to anonymize value: {value}")
return value
def walk_data_and_anonymize(data):
for k, v in data.items():
if isinstance(v, list):
for item in v:
walk_data_and_anonymize(item)
else:
# preserve alembic version number and enums
if k in ["auth_method", "version_num"]:
continue
data[k] = clean_value(v)
def anonymize_database_json(input_filepath: str, output_filepath: str):
with open(input_filepath) as f:
data = json.load(f)
walk_data_and_anonymize(data)
with open(output_filepath, "w") as f:
json.dump(data, f)