Commit f1378beb authored by Oleg Borisenko's avatar Oleg Borisenko
Browse files

1. speed up scanning

2. fixed tape changing issue (tape unmount makes drive irresponsible for 2 minutes)
parent bb1aa37a
......@@ -47,22 +47,30 @@ class BackupTarget(Base):
try:
log.info("Starting target %s scan" % self.unique_label)
prefix_len = len(self.fullpath)
full_tree = glob.glob(self.fullpath + "/**", recursive=True) #glob exclude doesn't work; don't know why
# file_tree = [x for x in Path(self.fullpath).rglob('**/*') if "#recycle" not in x.parts]
if self.kind == DataKind.wgs:
full_tree = sorted(Path(self.fullpath + "/wgs_data").rglob('*'))
elif self.kind == DataKind.vcf:
full_tree = sorted(Path(self.fullpath + "/vcf_data").rglob('*'))
else:
full_tree = sorted(Path(self.fullpath).rglob('*'))
# full_tree = glob.glob(self.fullpath + "/**", recursive=True) #glob exclude doesn't work; don't know why
# full_tree = [x for x in Path(self.fullpath).rglob('**/*') if "#recycle" not in x.parts]
log.info("Rglob finished; starting traversal")
file_list = []
excluded_count = 0
for path in full_tree:
if "#recycle" in path:
# skipping recycle and hidden files (they may be trash from rsync process)
if "#recycle" in path.name or path.name.startswith('.'):
excluded_count += 1
continue
if datetime.datetime.fromtimestamp(os.path.getmtime(path)) > datetime.datetime.now() - datetime.timedelta(hours = 24):
# we are skipping files newer that 1 day since they can change
excluded_count += 1
continue
if self.kind == DataKind.wgs and not "wgs_data" in path:
if self.kind == DataKind.wgs and not "wgs_data" in path.parts:
excluded_count += 1
continue
if self.kind == DataKind.vcf and not "vcf_data" in path:
if self.kind == DataKind.vcf and not "vcf_data" in path.parts:
excluded_count += 1
continue
......@@ -70,7 +78,7 @@ class BackupTarget(Base):
filetobackup['file_mtime'] = datetime.datetime.fromtimestamp(os.path.getmtime(path))
filetobackup['is_dir'] = os.path.isdir(path)
filetobackup['is_file'] = os.path.isfile(path)
filetobackup['relative_path'] = path[prefix_len:]
filetobackup['relative_path'] = str(path)[prefix_len:]
filetobackup['fsize'] = os.path.getsize(path) if filetobackup['is_file'] else None
filetobackup['target_unique_label'] = self.unique_label
filetobackup['kind'] = self.kind
......
......@@ -216,9 +216,11 @@ def rescan_backup_targets(dbsession):
def control_copy_queue(dbsession):
dbsession.begin_nested()
rescan_backup_targets(dbsession)
current_batch = models.Batch.get_current(dbsession) # there should be no more than 1 sum for new, in progress and interrupted
manager = utils.tapemanager.TapeManager(dbsession)
dbsession.begin_nested()
if not current_batch:
tape = manager.find_tape_to_use()
if not tape:
......
import logging
import pathlib
import time
import shutil
......@@ -165,6 +166,9 @@ class TapeManager:
else:
self.mountpoint = None
log.info("Unmounted tape %s successfully", self.drives[0][self.PRIMARY_VOLUME_TAG])
# sleep is needed to make library not busy; it seems that we can not eject tape right after unmount
log.info("Sleeping for 150 seconds to get drive responsible")
time.sleep(150)
self.df = 0
empty_slot = self.find_first_empty_magazine_slot()
if not empty_slot:
......@@ -172,6 +176,9 @@ class TapeManager:
else:
self.eject_to(empty_slot)
self.scan()
if self.drives[0]['full']:
log.error("Eject tape %s from drive to slot %d failed", self.drives[0][self.PRIMARY_VOLUME_TAG], empty_slot)
raise Exception("Eject tape failed")
# Scan for data tape slots
def scan_drives(self, device, element_address_assignment):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment