Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a migration for the short-lived intermediate v1 format #176

Merged
merged 3 commits into from
Jan 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion damnit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ def main():
"v0-to-v1",
help="Migrate the SQLite database and HDF5 files from v0 to v1."
)
migrate_subparsers.add_parser(
"intermediate-v1",
help="Migrate the SQLite database HDF5 files from an initial implementation of v1 to the final"
" v1. Don't use this unless you know what you're doing."
)

args = ap.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO,
Expand Down Expand Up @@ -241,12 +246,14 @@ def main():

elif args.subcmd == "migrate":
from .backend.db import DamnitDB
from .migrations import migrate_v0_to_v1
from .migrations import migrate_intermediate_v1, migrate_v0_to_v1

db = DamnitDB(allow_old=True)

if args.migrate_subcmd == "v0-to-v1":
migrate_v0_to_v1(db, Path.cwd(), args.dry_run)
elif args.migrate_subcmd == "intermediate-v1":
migrate_intermediate_v1(db, Path.cwd(), args.dry_run)

if __name__ == '__main__':
sys.exit(main())
131 changes: 113 additions & 18 deletions damnit/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
from .ctxsupport.ctxrunner import generate_thumbnail, add_to_h5_file, DataType


"""
Image thumbnails were previously generated by the GUI, now they're generated
by the backend. This command will convert old image summaries (2D arrays) into
thumbnails (RGBA arrays) that can be used directly by the GUI.
"""
def migrate_images(db, db_dir, dry_run):
proposal = db.metameta.get("proposal")
def migrate_images(new_db, db_dir, dry_run):
"""
Image thumbnails were previously generated by the GUI, now they're generated
by the backend. This function will convert old image summaries (2D arrays) into
thumbnails (RGBA arrays) that can be used directly by the GUI.

Be careful to pass in the *new database* handle instead of the old one,
since this will modify the DB even when dry_run=False.
"""
proposal = new_db.metameta.get("proposal")
if proposal is None:
raise RuntimeError("Database must have a proposal configured for it to be migrated.")

Expand Down Expand Up @@ -51,8 +54,7 @@ def migrate_images(db, db_dir, dry_run):

# And then update the summaries in the database
for run, run_reduced_data in reduced_data.items():
if not dry_run:
add_to_db(run_reduced_data, db, proposal, run)
add_to_db(run_reduced_data, new_db, proposal, run)

info = f"updated {len(reduced_data)} variables in {len(files_modified)} files"
if dry_run:
Expand Down Expand Up @@ -139,6 +141,18 @@ def main_dataset(grp: h5py.Group):
return grp[candidates.pop()]


def copy_table(table, old_db, new_db):
"""Copy an entire table from one database to another."""
rows = old_db.conn.execute(f"SELECT * FROM {table}").fetchall()
if len(rows) == 0:
return

placeholder = ", ".join(["?" for _ in rows[0]])
new_db.conn.executemany(f"""
INSERT INTO {table}
VALUES ({placeholder})
""", rows)

def migrate_v0_to_v1(db, db_dir, dry_run):
"""
For reference, see the V0_SCHEMA variable in db.py.
Expand Down Expand Up @@ -207,15 +221,7 @@ def migrate_v0_to_v1(db, db_dir, dry_run):

# Copy the user-editable variables and standalone comments
for table in ["variables", "time_comments"]:
rows = db.conn.execute(f"SELECT * FROM {table}").fetchall()
if len(rows) == 0:
continue

placeholder = ", ".join(["?" for _ in rows[0]])
new_db.conn.executemany(f"""
INSERT INTO {table}
VALUES ({placeholder})
""", rows)
copy_table(table, db, new_db)

# Load the data into the new database
total_vars = 0
Expand Down Expand Up @@ -273,3 +279,92 @@ def migrate_v0_to_v1(db, db_dir, dry_run):
new_db_path.rename(db_path)
print(f"New format DB created and moved to {db_path.name}")
print(f"Old database backed up as {backup_path.name}")

def migrate_intermediate_v1(db, db_dir, dry_run):
"""Migrate intermediate v1 (v0.5) databases.

Before v1 rose over the world, resplendent and glorious, there was a humble
antecedent that was used for some proposals:
- p3338 (FXE)
- p6616 (FXE)
- p4507 (FXE)
- p5639 (SXP)
- p4656 (MID)
- p3118 (MID)
- p6976 (MID)
- p4559 (MID)
- p5397 (MID)
- p4239 (MID)
- p4442 (MID)
- p2956 (SCS)

To push these databases into their destiny of v1 we must make some changes:
- Remove the `run_variables.stored_type` column
- Re-do image migration to convert the thumbnails to PNGs
- Move the `stored_type` attribute on `.reduced/<var>` datasets to a
`_damnit_objtype` attribute on the `<var>` group.
"""
# Create a new database, overwriting any previous attempts
new_db_path = db_dir / "runs.v1.sqlite"
new_db_path.unlink(missing_ok=True)
new_db = DamnitDB(new_db_path)

# Copy everything but `run_variables` to the new database
for k, v in db.metameta.items():
if k != "data_format_version":
new_db.metameta[k] = v

for table in ["run_info", "time_comments", "variables"]:
copy_table(table, db, new_db)

# Note that we skip the `stored_type` column because that was removed
run_variables = db.conn.execute("""
SELECT proposal, run, name, version, value, timestamp, max_diff, provenance
FROM run_variables
""").fetchall()
for row in run_variables:
new_db.conn.execute("""
INSERT INTO run_variables (proposal, run, name, version, value, timestamp, max_diff, provenance)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", row)

new_db.update_views()

# Convert the old `stored_type` attribute into `_damnit_objtype`
runs = db.conn.execute("SELECT proposal, run FROM runs").fetchall()
for proposal, run_no in runs:
h5_path = db_dir / "extracted_data" / f"p{proposal}_r{run_no}.h5"
if not h5_path.is_file():
continue

with add_to_h5_file(h5_path) as f:
reduced = f[".reduced"]
for ds_name, dset in reduced.items():
if "stored_type" in dset.attrs:
stored_type = dset.attrs["stored_type"]

obj_type = None
if stored_type in ["DataArray", "Dataset", "image", "timestamp"]:
obj_type = stored_type.lower()

if not dry_run:
if obj_type is not None:
f[ds_name].attrs["_damnit_objtype"] = obj_type
del dset.attrs["stored_type"]

# Migrate images to use PNGs for thumbnails
migrate_images(new_db, db_dir, dry_run)

new_db.close()
db.close()

if dry_run:
print(f"Dry-run: new format DB created at {new_db_path.name}")
print("If all seems OK, re-run the migration without --dry-run.")
else:
db_path = db_dir / DB_NAME
backup_path = db_dir / "runs.intermediate-v1-backup.sqlite"
db_path.rename(backup_path)
new_db_path.rename(db_path)
print(f"New format DB created and moved to {db_path.name}")
print(f"Old database backed up as {backup_path.name}")