Commit 0f6a8989 authored by Oleg Borisenko's avatar Oleg Borisenko
Browse files

optimized sqlalchemy part not to be a bottleneck

parent 06c535b4
......@@ -90,33 +90,33 @@ def form_backup_queue(dbsession, free_space):
return total_copy_queue
def copy_process(env, loop):
def copy_process(env, loop, current_job):
with env['request'].tm:
dbsession = env['request'].dbsession
current_batch = models.Batch.get_current(dbsession)
file_to_copy = current_batch.files_to_backup[0]
log.info("Copying %s", file_to_copy.relative_path)
target = dbsession.query(models.BackupTarget).filter(models.BackupTarget.unique_label == file_to_copy.target_unique_label).one()
src = target.fullpath + "/" + file_to_copy.relative_path
dest = "/srv_mount_dont_go_here/tapes/" + current_batch.tape_id + "/" + file_to_copy.relative_path
if red.get(src):
checksum = red.get(src)[: 32].decode()
copy_result = utils.secure_copy(src, dest)
else:
copy_result, checksum = utils.secure_copy2(src, dest, loop)
red.set(src, checksum)
if copy_result == 0:
# NOTE: very important to have this; without it no changes to files occur
dbsession.begin_nested()
file_to_copy.tape_label = current_batch.tape_id
file_to_copy.checksum = checksum
file_to_copy.copied_at_time = datetime.datetime.now()
current_batch.files_to_backup.remove(file_to_copy)
log.info("Copied successfully, now queue len is %d", len(current_batch.files_to_backup))
return True
else:
log.error("Failed to copy %s, return code was %d", file_to_copy.relative_path, copy_result)
return False
for file_to_copy in current_batch.files_to_backup[0:current_job-1]:
log.info("Copying %s", file_to_copy.relative_path)
target = dbsession.query(models.BackupTarget).filter(models.BackupTarget.unique_label == file_to_copy.target_unique_label).one()
src = target.fullpath + "/" + file_to_copy.relative_path
dest = "/srv_mount_dont_go_here/tapes/" + current_batch.tape_id + "/" + file_to_copy.relative_path
if red.get(src):
checksum = red.get(src)[: 32].decode()
copy_result = utils.secure_copy(src, dest)
else:
copy_result, checksum = utils.secure_copy2(src, dest, loop)
red.set(src, checksum)
if copy_result == 0:
# NOTE: very important to have this; without it no changes to files occur
dbsession.begin_nested()
file_to_copy.tape_label = current_batch.tape_id
file_to_copy.checksum = checksum
file_to_copy.copied_at_time = datetime.datetime.now()
current_batch.files_to_backup.remove(file_to_copy)
log.info("Copied successfully, now queue len is %d", len(current_batch.files_to_backup))
else:
log.error("Failed to copy %s, return code was %d", file_to_copy.relative_path, copy_result)
return False
return True
def rescan_backup_targets(dbsession):
......@@ -175,7 +175,10 @@ def main(argv=sys.argv):
try:
while True:
queue_len = 0
# job size 50 is optimal; 10 seconds for each 50 files (if they exist already); 200 - 39 seconds
# database commit each 10 seconds seems to be legit since I cache md5 to redis and rsync checks
# the copy has succeeded by its own means
job_size = 50
iterations = 0
now = time.time()
......@@ -183,11 +186,15 @@ def main(argv=sys.argv):
dbsession = env['request'].dbsession
queue_len = control_copy_queue(dbsession)
while queue_len > 0:
if copy_process(env, loop):
queue_len -= 1
iterations += 1
if not iterations % 100:
log.debug("100 iterations per %d seconds (lazy load)", time.time() - now)
if queue_len - job_size <= 0:
current_job = queue_len
else:
current_job = job_size
# "else" leads to retry
if copy_process(env, loop, current_job):
queue_len -= current_job
iterations += current_job
log.info("%d iterations per %d seconds (lazy load)", job_size, time.time() - now)
now = time.time()
time.sleep(2)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment