[yocto] [layerindex-web][PATCH 3/5] update.py: fetch repos parallelly

Robert Yang liezhi.yang at windriver.com
Tue Jan 2 21:42:24 PST 2018


This can save a lot of time, here is my testing data when PARALLEL_JOBS is 10,
this is the fetch time only, I hacked it to stop when the fetch is done to get
the data (124 layers):
$ update.py -b <branch>
Before: 2m30
Now: 16s

Signed-off-by: Robert Yang <liezhi.yang at windriver.com>
---
 layerindex/update.py | 53 +++++++++++++++++++++++++++++++++-------------------
 settings.py          |  3 +++
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/layerindex/update.py b/layerindex/update.py
index 6359115..d6488f0 100755
--- a/layerindex/update.py
+++ b/layerindex/update.py
@@ -20,6 +20,7 @@ from distutils.version import LooseVersion
 import utils
 import operator
 import re
+import multiprocessing
 
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -127,6 +128,18 @@ def update_actual_branch(layerquery, fetchdir, branch, options, update_bitbake,
         for s in to_save:
             s.save()
 
+def fetch_repo(vcs_url, repodir, urldir, fetchdir, layer_name):
+    logger.info("Fetching remote repository %s" % vcs_url)
+    try:
+        if not os.path.exists(repodir):
+            utils.runcmd("git clone %s %s" % (vcs_url, urldir), fetchdir, logger=logger, printerr=False)
+        else:
+            utils.runcmd("git fetch -p", repodir, logger=logger, printerr=False)
+        return (vcs_url, None)
+    except subprocess.CalledProcessError as e:
+        logger.error("Fetch of layer %s failed: %s" % (layer_name, e.output))
+        return (vcs_url, e.output)
+
 def main():
     if LooseVersion(git.__version__) < '0.3.1':
         logger.error("Version of GitPython is too old, please install GitPython (python-git) 0.3.1 or later in order to use this script")
@@ -232,6 +245,9 @@ def main():
 
     if not os.path.exists(fetchdir):
         os.makedirs(fetchdir)
+
+    allrepos = {}
+    fetchedresult = []
     fetchedrepos = []
     failedrepos = {}
 
@@ -268,30 +284,29 @@ def main():
                     # Handle multiple layers in a single repo
                     urldir = layer.get_fetch_dir()
                     repodir = os.path.join(fetchdir, urldir)
-                    if not (layer.vcs_url in fetchedrepos or layer.vcs_url in failedrepos):
-                        logger.info("Fetching remote repository %s" % layer.vcs_url)
-                        out = None
-                        try:
-                            if not os.path.exists(repodir):
-                                out = utils.runcmd("git clone %s %s" % (layer.vcs_url, urldir), fetchdir, logger=logger, printerr=False)
-                            else:
-                                out = utils.runcmd("git fetch -p", repodir, logger=logger, printerr=False)
-                        except subprocess.CalledProcessError as e:
-                            logger.error("Fetch of layer %s failed: %s" % (layer.name, e.output))
-                            failedrepos[layer.vcs_url] = e.output
-                            continue
-                        fetchedrepos.append(layer.vcs_url)
+                    if layer.vcs_url not in allrepos:
+                        allrepos[layer.vcs_url] = (repodir, urldir, fetchdir, layer.name)
+                # Add bitbake
+                allrepos[settings.BITBAKE_REPO_URL] = (bitbakepath, "bitbake", fetchdir, "bitbake")
+                # Parallel fetching
+                pool = multiprocessing.Pool(int(settings.PARALLEL_JOBS))
+                for url in allrepos:
+                    fetchedresult.append(pool.apply_async(fetch_repo, \
+                        (url, allrepos[url][0], allrepos[url][1], allrepos[url][2], allrepos[url][3],)))
+                pool.close()
+                pool.join()
+
+                for url in fetchedresult[:]:
+                    # The format is (url, error), the error is None when succeed.
+                    if url.get()[1]:
+                        failedrepos[url.get()[0]] = url.get()[1]
+                    else:
+                        fetchedrepos.append(url.get()[0])
 
                 if not (fetchedrepos or update_bitbake):
                     logger.error("No repositories could be fetched, exiting")
                     sys.exit(1)
 
-                logger.info("Fetching bitbake from remote repository %s" % settings.BITBAKE_REPO_URL)
-                if not os.path.exists(bitbakepath):
-                    out = utils.runcmd("git clone %s %s" % (settings.BITBAKE_REPO_URL, 'bitbake'), fetchdir, logger=logger)
-                else:
-                    out = utils.runcmd("git fetch -p", bitbakepath, logger=logger)
-
             if options.actual_branch:
                 update_actual_branch(layerquery, fetchdir, branches[0], options, update_bitbake, bitbakepath)
                 return
diff --git a/settings.py b/settings.py
index e26f4b2..c70ac4d 100644
--- a/settings.py
+++ b/settings.py
@@ -228,3 +228,6 @@ SUBMIT_EMAIL_SUBJECT = 'OE Layerindex layer submission'
 # RabbitMQ settings
 RABBIT_BROKER = 'amqp://'
 RABBIT_BACKEND = 'rpc://'
+
+# Used for fetching repo
+PARALLEL_JOBS = "4"
-- 
2.7.4




More information about the yocto mailing list