[prev in list] [next in list] [prev in thread] [next in thread] 

List:       gentoo-portage-dev
Subject:    [gentoo-portage-dev] [PATCH 3/3] Add partial caching to match_from_list
From:       Chun-Yu Shei <cshei () google ! com>
Date:       2020-06-27 6:34:15
Message-ID: 20200627063415.936177-4-cshei () google ! com
[Download RAW message or body]

This function is called frequently with similar arguments, so cache as
much of the partial results as possible. It seems that "match_from_list"
must return a list containing actual entries from the "candidate_list" argument,
so we store frozensets in "_match_from_list_cache" and test items from
"candidate_list" for membership in these sets. The filtering performed
by the mydep.unevaluated_atom.use and mydep.repo checks towards the end
of the function is also not cached, since this causes some test failures.

This results in a reduction of "emerge -uDvpU --with-bdeps=y @world"
runtime from 43.53 -> 40.15 sec -- an 8.4% speedup.
---
 lib/portage/dep/__init__.py | 359 +++++++++++++++++++-----------------
 1 file changed, 189 insertions(+), 170 deletions(-)

diff --git a/lib/portage/dep/__init__.py b/lib/portage/dep/__init__.py
index df296dd81..dbd23bb23 100644
--- a/lib/portage/dep/__init__.py
+++ b/lib/portage/dep/__init__.py
@@ -2174,6 +2174,8 @@ def best_match_to_list(mypkg, mylist):
 
 	return bestm
 
+_match_from_list_cache = {}
+
 def match_from_list(mydep, candidate_list):
 	"""
 	Searches list for entries that matches the package.
@@ -2197,209 +2199,226 @@ def match_from_list(mydep, candidate_list):
 	if not isinstance(mydep, Atom):
 		mydep = Atom(mydep, allow_wildcard=True, allow_repo=True)
 
-	mycpv     = mydep.cpv
-	mycpv_cps = catpkgsplit(mycpv) # Can be None if not specific
-	build_id  = mydep.build_id
+	cache_key = (mydep, tuple(candidate_list))
+	key_has_hash = True
+	cache_entry = None
+	if mydep.build_id is None and key_has_hash:
+		try:
+			cache_entry = _match_from_list_cache.get(cache_key)
+		except TypeError:
+			key_has_hash = False
 
-	if not mycpv_cps:
-		ver = None
-		rev = None
-	else:
-		cat, pkg, ver, rev = mycpv_cps
-		if mydep == mycpv:
-			raise KeyError(_("Specific key requires an operator"
-				" (%s) (try adding an '=')") % (mydep))
-
-	if ver and rev:
-		operator = mydep.operator
-		if not operator:
-			writemsg(_("!!! Invalid atom: %s\n") % mydep, noiselevel=-1)
-			return []
+	if cache_entry is not None:
+		# Note: the list returned by this function must contain actual entries
+		# from "candidate_list", so store frozensets in "_match_from_list_cache"
+		# and test items from "candidate_list" for membership in these sets.
+		mylist = [x for x in candidate_list if x in cache_entry]
 	else:
-		operator = None
-
-	mylist = []
+		mycpv     = mydep.cpv
+		mycpv_cps = catpkgsplit(mycpv) # Can be None if not specific
+		build_id  = mydep.build_id
 
-	if mydep.extended_syntax:
+		if not mycpv_cps:
+			ver = None
+			rev = None
+		else:
+			cat, pkg, ver, rev = mycpv_cps
+			if mydep == mycpv:
+				raise KeyError(_("Specific key requires an operator"
+					" (%s) (try adding an '=')") % (mydep))
 
-		for x in candidate_list:
-			cp = getattr(x, "cp", None)
-			if cp is None:
-				mysplit = catpkgsplit(remove_slot(x))
-				if mysplit is not None:
-					cp = mysplit[0] + '/' + mysplit[1]
+		if ver and rev:
+			operator = mydep.operator
+			if not operator:
+				writemsg(_("!!! Invalid atom: %s\n") % mydep, noiselevel=-1)
+				return []
+		else:
+			operator = None
 
-			if cp is None:
-				continue
+		mylist = []
 
-			if cp == mycpv or extended_cp_match(mydep.cp, cp):
-				mylist.append(x)
+		if mydep.extended_syntax:
 
-		if mylist and mydep.operator == "=*":
+			for x in candidate_list:
+				cp = getattr(x, "cp", None)
+				if cp is None:
+					mysplit = catpkgsplit(remove_slot(x))
+					if mysplit is not None:
+						cp = mysplit[0] + '/' + mysplit[1]
 
-			candidate_list = mylist
-			mylist = []
-			# Currently, only \*\w+\* is supported.
-			ver = mydep.version[1:-1]
+				if cp is None:
+					continue
 
-			for x in candidate_list:
-				x_ver = getattr(x, "version", None)
-				if x_ver is None:
-					xs = catpkgsplit(remove_slot(x))
-					if xs is None:
-						continue
-					x_ver = "-".join(xs[-2:])
-				if ver in x_ver:
+				if cp == mycpv or extended_cp_match(mydep.cp, cp):
 					mylist.append(x)
 
-	elif operator is None:
-		for x in candidate_list:
-			cp = getattr(x, "cp", None)
-			if cp is None:
-				mysplit = catpkgsplit(remove_slot(x))
-				if mysplit is not None:
-					cp = mysplit[0] + '/' + mysplit[1]
+			if mylist and mydep.operator == "=*":
 
-			if cp is None:
-				continue
+				candidate_list = mylist
+				mylist = []
+				# Currently, only \*\w+\* is supported.
+				ver = mydep.version[1:-1]
 
-			if cp == mydep.cp:
-				mylist.append(x)
+				for x in candidate_list:
+					x_ver = getattr(x, "version", None)
+					if x_ver is None:
+						xs = catpkgsplit(remove_slot(x))
+						if xs is None:
+							continue
+						x_ver = "-".join(xs[-2:])
+					if ver in x_ver:
+						mylist.append(x)
 
-	elif operator == "=": # Exact match
-		for x in candidate_list:
-			xcpv = getattr(x, "cpv", None)
-			if xcpv is None:
-				xcpv = remove_slot(x)
-			if not cpvequal(xcpv, mycpv):
-				continue
-			if (build_id is not None and
-				getattr(xcpv, "build_id", None) != build_id):
-				continue
-			mylist.append(x)
+		elif operator is None:
+			for x in candidate_list:
+				cp = getattr(x, "cp", None)
+				if cp is None:
+					mysplit = catpkgsplit(remove_slot(x))
+					if mysplit is not None:
+						cp = mysplit[0] + '/' + mysplit[1]
 
-	elif operator == "=*": # glob match
-		# XXX: Nasty special casing for leading zeros
-		# Required as =* is a literal prefix match, so can't 
-		# use vercmp
-		myver = mycpv_cps[2].lstrip("0")
-		if not myver or not myver[0].isdigit():
-			myver = "0"+myver
-		if myver == mycpv_cps[2]:
-			mycpv_cmp = mycpv
-		else:
-			# Use replace to preserve the revision part if it exists
-			# (mycpv_cps[3] can't be trusted because in contains r0
-			# even when the input has no revision part).
-			mycpv_cmp = mycpv.replace(
-				mydep.cp + "-" + mycpv_cps[2],
-				mydep.cp + "-" + myver, 1)
-		for x in candidate_list:
-			try:
-				x.cp
-			except AttributeError:
-				try:
-					pkg = _pkg_str(remove_slot(x))
-				except InvalidData:
+				if cp is None:
 					continue
-			else:
-				pkg = x
 
-			xs = pkg.cpv_split
-			myver = xs[2].lstrip("0")
-			if not myver or not myver[0].isdigit():
-				myver = "0"+myver
-			if myver == xs[2]:
-				xcpv = pkg.cpv
-			else:
-				# Use replace to preserve the revision part if it exists.
-				xcpv = pkg.cpv.replace(
-					pkg.cp + "-" + xs[2],
-					pkg.cp + "-" + myver, 1)
-			if xcpv.startswith(mycpv_cmp):
-				# =* glob matches only on boundaries between version parts,
-				# so 1* does not match 10 (bug 560466).
-				next_char = xcpv[len(mycpv_cmp):len(mycpv_cmp)+1]
-				if (not next_char or next_char in '._-' or
-					mycpv_cmp[-1].isdigit() != next_char.isdigit()):
+				if cp == mydep.cp:
 					mylist.append(x)
 
-	elif operator == "~": # version, any revision, match
-		for x in candidate_list:
-			xs = getattr(x, "cpv_split", None)
-			if xs is None:
-				xs = catpkgsplit(remove_slot(x))
-			if xs is None:
-				raise InvalidData(x)
-			if not cpvequal(xs[0]+"/"+xs[1]+"-"+xs[2], mycpv_cps[0]+"/"+mycpv_cps[1]+"-"+mycpv_cps[2]):
-				continue
-			if xs[2] != ver:
-				continue
-			mylist.append(x)
+		elif operator == "=": # Exact match
+			for x in candidate_list:
+				xcpv = getattr(x, "cpv", None)
+				if xcpv is None:
+					xcpv = remove_slot(x)
+				if not cpvequal(xcpv, mycpv):
+					continue
+				if (build_id is not None and
+					getattr(xcpv, "build_id", None) != build_id):
+					continue
+				mylist.append(x)
 
-	elif operator in (">", ">=", "<", "<="):
-		for x in candidate_list:
-			if hasattr(x, 'cp'):
-				pkg = x
+		elif operator == "=*": # glob match
+			# XXX: Nasty special casing for leading zeros
+			# Required as =* is a literal prefix match, so can't 
+			# use vercmp
+			myver = mycpv_cps[2].lstrip("0")
+			if not myver or not myver[0].isdigit():
+				myver = "0"+myver
+			if myver == mycpv_cps[2]:
+				mycpv_cmp = mycpv
 			else:
+				# Use replace to preserve the revision part if it exists
+				# (mycpv_cps[3] can't be trusted because in contains r0
+				# even when the input has no revision part).
+				mycpv_cmp = mycpv.replace(
+					mydep.cp + "-" + mycpv_cps[2],
+					mydep.cp + "-" + myver, 1)
+			for x in candidate_list:
 				try:
-					pkg = _pkg_str(remove_slot(x))
-				except InvalidData:
-					continue
+					x.cp
+				except AttributeError:
+					try:
+						pkg = _pkg_str(remove_slot(x))
+					except InvalidData:
+						continue
+				else:
+					pkg = x
+
+				xs = pkg.cpv_split
+				myver = xs[2].lstrip("0")
+				if not myver or not myver[0].isdigit():
+					myver = "0"+myver
+				if myver == xs[2]:
+					xcpv = pkg.cpv
+				else:
+					# Use replace to preserve the revision part if it exists.
+					xcpv = pkg.cpv.replace(
+						pkg.cp + "-" + xs[2],
+						pkg.cp + "-" + myver, 1)
+				if xcpv.startswith(mycpv_cmp):
+					# =* glob matches only on boundaries between version parts,
+					# so 1* does not match 10 (bug 560466).
+					next_char = xcpv[len(mycpv_cmp):len(mycpv_cmp)+1]
+					if (not next_char or next_char in '._-' or
+						mycpv_cmp[-1].isdigit() != next_char.isdigit()):
+						mylist.append(x)
 
-			if pkg.cp != mydep.cp:
-				continue
-			try:
-				result = vercmp(pkg.version, mydep.version)
-			except ValueError: # pkgcmp may return ValueError during int() conversion
-				writemsg(_("\nInvalid package name: %s\n") % x, noiselevel=-1)
-				raise
-			if result is None:
-				continue
-			elif operator == ">":
-				if result > 0:
-					mylist.append(x)
-			elif operator == ">=":
-				if result >= 0:
-					mylist.append(x)
-			elif operator == "<":
-				if result < 0:
-					mylist.append(x)
-			elif operator == "<=":
-				if result <= 0:
-					mylist.append(x)
-			else:
-				raise KeyError(_("Unknown operator: %s") % mydep)
-	else:
-		raise KeyError(_("Unknown operator: %s") % mydep)
+		elif operator == "~": # version, any revision, match
+			for x in candidate_list:
+				xs = getattr(x, "cpv_split", None)
+				if xs is None:
+					xs = catpkgsplit(remove_slot(x))
+				if xs is None:
+					raise InvalidData(x)
+				if not cpvequal(xs[0]+"/"+xs[1]+"-"+xs[2], mycpv_cps[0]+"/"+mycpv_cps[1]+"-"+mycpv_cps[2]):
+					continue
+				if xs[2] != ver:
+					continue
+				mylist.append(x)
 
-	if mydep.slot is not None:
-		candidate_list = mylist
-		mylist = []
-		for x in candidate_list:
-			x_pkg = None
-			try:
-				x.cpv
-			except AttributeError:
-				xslot = dep_getslot(x)
-				if xslot is not None:
+		elif operator in (">", ">=", "<", "<="):
+			for x in candidate_list:
+				if hasattr(x, 'cp'):
+					pkg = x
+				else:
 					try:
-						x_pkg = _pkg_str(remove_slot(x), slot=xslot)
+						pkg = _pkg_str(remove_slot(x))
 					except InvalidData:
 						continue
-			else:
-				x_pkg = x
 
-			if x_pkg is None:
-				mylist.append(x)
-			else:
+				if pkg.cp != mydep.cp:
+					continue
 				try:
-					x_pkg.slot
+					result = vercmp(pkg.version, mydep.version)
+				except ValueError: # pkgcmp may return ValueError during int() conversion
+					writemsg(_("\nInvalid package name: %s\n") % x, noiselevel=-1)
+					raise
+				if result is None:
+					continue
+				elif operator == ">":
+					if result > 0:
+						mylist.append(x)
+				elif operator == ">=":
+					if result >= 0:
+						mylist.append(x)
+				elif operator == "<":
+					if result < 0:
+						mylist.append(x)
+				elif operator == "<=":
+					if result <= 0:
+						mylist.append(x)
+				else:
+					raise KeyError(_("Unknown operator: %s") % mydep)
+		else:
+			raise KeyError(_("Unknown operator: %s") % mydep)
+
+		if mydep.slot is not None:
+			candidate_list = mylist
+			mylist = []
+			for x in candidate_list:
+				x_pkg = None
+				try:
+					x.cpv
 				except AttributeError:
+					xslot = dep_getslot(x)
+					if xslot is not None:
+						try:
+							x_pkg = _pkg_str(remove_slot(x), slot=xslot)
+						except InvalidData:
+							continue
+				else:
+					x_pkg = x
+
+				if x_pkg is None:
 					mylist.append(x)
 				else:
-					if _match_slot(mydep, x_pkg):
+					try:
+						x_pkg.slot
+					except AttributeError:
 						mylist.append(x)
+					else:
+						if _match_slot(mydep, x_pkg):
+							mylist.append(x)
+		if key_has_hash:
+			_match_from_list_cache[cache_key] = frozenset(mylist)
 
 	if mydep.unevaluated_atom.use:
 		candidate_list = mylist
-- 
2.27.0.212.ge8ba1cc988-goog


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic