Discussion:
[MediaWiki-commits] [Gerrit] pywikibot/core[master]: Made download_dump.py download process atomic
Ryan10145 (Code Review)
2017-12-29 00:03:22 UTC
Permalink
Ryan10145 has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/400616 )

Change subject: Made download_dump.py download process atomic
......................................................................

Made download_dump.py download process atomic

Bug: T183675
Change-Id: I142629bb89ffc1c810adcf8f1417ecc824594e41
---
M scripts/maintenance/download_dump.py
1 file changed, 34 insertions(+), 9 deletions(-)


git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/16/400616/1

diff --git a/scripts/maintenance/download_dump.py b/scripts/maintenance/download_dump.py
index d3a5123..ee6ee23 100644
--- a/scripts/maintenance/download_dump.py
+++ b/scripts/maintenance/download_dump.py
@@ -18,10 +18,26 @@
#
from __future__ import absolute_import, division, unicode_literals

+from datetime import datetime
+
import os.path
import sys

-from os import remove, symlink
+from os import remove, symlink, fsync
+
+try:
+ from os import replace
+except ImportError: # py2
+ if sys.platform == 'win32':
+ import os
+ def replace(src, dst):
+ try:
+ os.rename(src, dst)
+ except OSError:
+ os.remove(dst)
+ os.rename(src, dst)
+ else:
+ from os import rename as replace

import pywikibot

@@ -63,7 +79,12 @@

download_filename = self.getOption('wikiname') + \
'-latest-' + self.getOption('filename')
- file_storepath = os.path.join(
+ store_filename = download_filename + '-' + \
+ str(datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) + '.part'
+
+ file_temp_storepath = os.path.join(
+ self.getOption('storepath'), store_filename)
+ file_final_storepath = os.path.join(
self.getOption('storepath'), download_filename)

# https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps
@@ -71,28 +92,32 @@
self.getOption('wikiname'), self.getOption('filename'))
if toolforge_dump_filepath:
pywikibot.output('Symlinking file from ' + toolforge_dump_filepath)
- if os.path.exists(file_storepath):
- remove(file_storepath)
+ if os.path.exists(file_temp_storepath):
+ remove(file_temp_storepath)

- symlink(toolforge_dump_filepath, file_storepath)
+ symlink(toolforge_dump_filepath, file_temp_storepath)
else:
url = 'https://dumps.wikimedia.org/' + \
- os.path.join(self.getOption('wikiname'),
- 'latest', download_filename)
+ self.getOption('wikiname') + '/latest/' + download_filename
pywikibot.output('Downloading file from ' + url)
response = fetch(url, stream=True)
if response.status == 200:
try:
- with open(file_storepath, 'wb') as result_file:
+ with open(file_temp_storepath, 'wb') as result_file:
for chunk in response.data.iter_content(100 * 1024):
result_file.write(chunk)
+
+ result_file.flush()
+ fsync(result_file.fileno())
except IOError:
pywikibot.exception()
return False
else:
return

- pywikibot.output('Done! File stored as ' + file_storepath)
+ replace(file_temp_storepath, file_final_storepath)
+
+ pywikibot.output('Done! File stored as ' + file_final_storepath)
return
--
To view, visit https://gerrit.wikimedia.org/r/400616
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I142629bb89ffc1c810adcf8f1417ecc824594e41
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ryan10145 <***@gmail.com>
jenkins-bot (Code Review)
2017-12-30 23:12:22 UTC
Permalink
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/400616 )

Change subject: Made download_dump.py download process atomic
......................................................................


Made download_dump.py download process atomic

Bug: T183675
Change-Id: I142629bb89ffc1c810adcf8f1417ecc824594e41
---
M scripts/maintenance/download_dump.py
1 file changed, 66 insertions(+), 22 deletions(-)

Approvals:
Zhuyifei1999: Looks good to me, but someone else must approve
Framawiki: Looks good to me, approved
jenkins-bot: Verified



diff --git a/scripts/maintenance/download_dump.py b/scripts/maintenance/download_dump.py
index d3a5123..c55940b 100644
--- a/scripts/maintenance/download_dump.py
+++ b/scripts/maintenance/download_dump.py
@@ -18,10 +18,27 @@
#
from __future__ import absolute_import, division, unicode_literals

+import binascii
+
import os.path
import sys

-from os import remove, symlink
+from os import remove, symlink, urandom
+
+try:
+ from os import replace
+except ImportError: # py2
+ if sys.platform == 'win32':
+ import os
+
+ def replace(src, dst):
+ try:
+ os.rename(src, dst)
+ except OSError:
+ remove(dst)
+ os.rename(src, dst)
+ else:
+ from os import rename as replace

import pywikibot

@@ -63,36 +80,63 @@

download_filename = self.getOption('wikiname') + \
'-latest-' + self.getOption('filename')
- file_storepath = os.path.join(
+ temp_filename = download_filename + '-' + \
+ binascii.b2a_hex(urandom(8)).decode('ascii') + '.part'
+
+ file_final_storepath = os.path.join(
self.getOption('storepath'), download_filename)
+ file_current_storepath = os.path.join(
+ self.getOption('storepath'), temp_filename)

# https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps
toolforge_dump_filepath = self.get_dump_name(
self.getOption('wikiname'), self.getOption('filename'))
- if toolforge_dump_filepath:
- pywikibot.output('Symlinking file from ' + toolforge_dump_filepath)
- if os.path.exists(file_storepath):
- remove(file_storepath)

- symlink(toolforge_dump_filepath, file_storepath)
- else:
- url = 'https://dumps.wikimedia.org/' + \
- os.path.join(self.getOption('wikiname'),
- 'latest', download_filename)
- pywikibot.output('Downloading file from ' + url)
- response = fetch(url, stream=True)
- if response.status == 200:
+ # First iteration for atomic download with temporary file
+ # Second iteration for fallback non-atomic download
+ for non_atomic in range(2):
+ try:
+ if toolforge_dump_filepath:
+ pywikibot.output('Symlinking file from ' +
+ toolforge_dump_filepath)
+ if non_atomic:
+ if os.path.exists(file_final_storepath):
+ remove(file_final_storepath)
+ symlink(toolforge_dump_filepath, file_current_storepath)
+ else:
+ url = 'https://dumps.wikimedia.org/{0}/latest/{1}'.format(
+ self.getOption('wikiname'), download_filename)
+ pywikibot.output('Downloading file from ' + url)
+ response = fetch(url, stream=True)
+ if response.status == 200:
+ with open(file_current_storepath, 'wb') as result_file:
+ for data in response.data.iter_content(100 * 1024):
+ result_file.write(data)
+ else:
+ return
+ # Rename the temporary file to the target file
+ # if the download completes successfully
+ if not non_atomic:
+ replace(file_current_storepath, file_final_storepath)
+ break
+ except (OSError, IOError):
+ pywikibot.exception()
+
try:
- with open(file_storepath, 'wb') as result_file:
- for chunk in response.data.iter_content(100 * 1024):
- result_file.write(chunk)
- except IOError:
+ remove(file_current_storepath)
+ except (OSError, IOError):
pywikibot.exception()
- return False
- else:
- return

- pywikibot.output('Done! File stored as ' + file_storepath)
+ # If the atomic download fails, try without a temporary file
+ # If the non-atomic download also fails, exit the script
+ if not non_atomic:
+ pywikibot.output('Cannot make temporary file, ' +
+ 'falling back to non-atomic download')
+ file_current_storepath = file_final_storepath
+ else:
+ return False
+
+ pywikibot.output('Done! File stored as ' + file_final_storepath)
return
--
To view, visit https://gerrit.wikimedia.org/r/400616
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I142629bb89ffc1c810adcf8f1417ecc824594e41
Gerrit-PatchSet: 12
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ryan10145 <***@gmail.com>
Gerrit-Reviewer: Framawiki <***@tools.wmflabs.org>
Gerrit-Reviewer: John Vandenberg <***@gmail.com>
Gerrit-Reviewer: Ryan10145 <***@gmail.com>
Gerrit-Reviewer: Zhuyifei1999 <***@gmail.com>
Gerrit-Reviewer: Zoranzoki21 <***@gmail.com>
Gerrit-Reviewer: jenkins-bot <>
Loading...