#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# This program belongs to AKKODIS INGENIERIE PRODUIT SAS.
# It is considered a trade secret, and is not to be divulged or used
# by parties who have not received written authorization from the owner.
#
import os, json, sys, requests, time, re, datetime, html.parser, urllib.parse, asyncio, hashlib, binascii, copy, base64, datetime
import traceback
import aiohttp, jsonschema
import xml.etree.ElementTree as ET
from packaging import version
import bs4 # BeautifulSoup

sDebianNames = ['bookworm','trixie']
# assumes that docker deployment is based on debian
sDockerName = 'docker'
sDockerDebianName = 'trixie'
assert(sDockerDebianName in sDebianNames)
sWindowsName = 'windows'

# I have trouble with some site if I let default user agent ...
sUserAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'


sDisableSSLCheck = False

def bs4FindPath(pFrom, pPath):
	lCurrentPath = ''
	lObj = pFrom
	for (name,idx) in pPath:
			lCurrentPath = '%s/%s[%s]' % (lCurrentPath,name,idx)
			lFindRes = lObj.find_all(name,recursive=False)
			if lFindRes is None or len(lFindRes) <= idx:
				print('Fail to locate item ' + lCurrentPath)
				return None
			lObj = lFindRes[idx]
	return lObj

class ApacheSecurityHTMLParser(html.parser.HTMLParser):
	def __init__(self) -> None:
		super().__init__()
		self.__mTagStack = []
		self.mCve = []
		self.__mCurrentCve = None
		self.__mCurrentVersion = None
	
	def startNewBlock(self) -> None:
		self.__mCurrentVersion = None

	def handle_starttag(self, tag, attrs):
		self.__mTagStack.append(tag)
		if self.__mTagStack == ['dl','dt','h3']:
			self.__pushPendingCve()
			self.__mCurrentCve = {
				'id':None,
				'description':[],
				'nexttdisaffects': False,
				'affects':None
			}
			for (k,v) in attrs:
				if k=='id':
					self.__mCurrentCve['id'] = v
		elif self.__mTagStack == ['h1']:
			assert(self.__mCurrentVersion is None)
			for (k,v) in attrs:
				if k=='id':
					self.__mCurrentVersion = v
			assert(not self.__mCurrentVersion is None)
	
	def __pushPendingCve(self):
		if not self.__mCurrentCve is None:
			assert(not self.__mCurrentCve is None)
			assert(not self.__mCurrentCve['id'] is None)
			# sometimes 'Affects' is not set on vulnerability page ... if so assums all versions prior this one
			if self.__mCurrentCve['affects'] is None:
				self.__mCurrentCve['affects'] = 'before %s' % self.__mCurrentVersion
			self.mCve.append(self.__mCurrentCve)
			self.__mCurrentCve = None

	def handle_endtag(self, tag):
		if self.__mTagStack == ['dl']:
			self.__pushPendingCve()
			
		assert(self.__mTagStack[-1] == tag)
		self.__mTagStack.pop()
		

	def handle_data(self, data):
		data = data.strip()
		if self.__mTagStack == ['dl','dd','p']:
			if data.startswith('Acknowledgements:'):
				return
			self.__mCurrentCve['description'].append(data)
		elif self.__mTagStack == ['dl','dd','table','tr','td']:
			if data == 'Affects':
				assert(self.__mCurrentCve['nexttdisaffects'] == False)
				self.__mCurrentCve['nexttdisaffects'] = True
			elif self.__mCurrentCve['nexttdisaffects']:
				self.__mCurrentCve['affects'] = data


def decrementVersion(version):
	lVersionSplit = version.split('.')
	lVersionSplit[-1] = str(int(lVersionSplit[-1])-1)
	return '.'.join(lVersionSplit)


def _execCmd (command, raiseexceptiononerror) :
	import os
	from subprocess import Popen, PIPE

	p = Popen(command, stdin=None, stdout=PIPE, stderr=PIPE)
	stdout, stderr = p.communicate()
	stdout = stdout.decode('utf-8',errors='ignore')
	stderr = stderr.decode('utf-8',errors='ignore')
	rc = p.returncode
	if (rc != 0):
		print('error executing %s'%command[0])
		print(' '.join(command))
		print(stdout)
		print(stderr)
		if raiseexceptiononerror:
			raise Exception()
	return (stdout, stderr, rc)

def processPotentialVersionString(pDst: set, pRe: re, pStr: str):
	lMatchRes = pRe.findall(pStr.strip())
	lFoundVersions = set()
	for lMatchRes in lMatchRes:
		# if regex is design to match digits in multiple optional groups
		if isinstance(lMatchRes,tuple):
			lVersionDigits = list(lMatchRes)
			lVersionStr = '.'.join([i for i in lVersionDigits if i is not None and len(i) > 0])
		elif isinstance(lMatchRes,str):
			lVersionStr = lMatchRes
		else:
			raise Exception()
		lVersion = version.parse(lVersionStr)
		pDst.add(lVersion)
		lFoundVersions.add(lVersion)
	return sorted(list(lFoundVersions))

class VersionHTMLParser(html.parser.HTMLParser):
	def __init__(self, pRe, pVersions) -> None:
		super().__init__()
		self._mRe = pRe
		self._mVersions = pVersions

	def handle_starttag(self, tag, attrs):
		pass

	def handle_endtag(self, tag):
		pass

	def handle_data(self, data):
		processPotentialVersionString(self._mVersions,self._mRe,data)

def convertVersion2CpeVersion(pIn: str, pModuleName : str):
	
	# eg: '4' '1.2' '1.5.6'
	lReResult = re.fullmatch('^[0-9]+(?:\\.[0-9]+)*$',pIn,re.IGNORECASE)
	if not lReResult is None:
		return pIn
	
	# eg: '1:1.2.13.dfsg-1'
	lReResult = re.fullmatch('^([0-9]+):([0-9]+(?:\\.[0-9]+)*)\\.dfsg\\S+$',pIn,re.IGNORECASE)
	if not lReResult is None:
		assert(lReResult.group(2).startswith(lReResult.group(1) + '.'))
		return lReResult.group(2)
	
	# eg: '1.0.9-2+b6'
	# eg: '2.4.58-240131'
	lReResult = re.fullmatch('^([0-9]+(?:\\.[0-9]+)*)[\\+-]\\S+$',pIn,re.IGNORECASE)
	if not lReResult is None:
		return lReResult.group(1)
	
	# eg: '2.4.58.post240131'
	# when parsing 2.4.58-240131 with packaging.version.parse it transforms it to 2.4.58.post240131
	lReResult = re.fullmatch('^([0-9]+(?:\\.[0-9]+)*)\\.post[0-9]+$',pIn,re.IGNORECASE)
	if not lReResult is None:
		return lReResult.group(1)
	
	# eg: '1:2.1.5-2' libjpeg-turbo
	lReResult = re.fullmatch('^[0-9]+:([0-9.]+)-[0-9]+$',pIn,re.IGNORECASE)
	if not lReResult is None:
		return lReResult.group(1)
	
	raise Exception('Fail to convert version of "%s" to cpe version : %s' % (pModuleName,pIn))

class CpeFetcher():
	def __init__(self, pNvdApiKey,pThcApiKey, pFetchType, pProxy) -> None:
		self._mProxy = pProxy
		self._mFetchType = pFetchType
		# with the api-key we are limited to 50 call/sec else 5 call/sec
		self._mBatchSize = 10 if not pNvdApiKey is None or not pThcApiKey is None else 2
		self._mNvdHeaders = {'User-Agent':sUserAgent}
		self._mBaseUrl = 'https://services.nvd.nist.gov'
		self._mUseTHC = False
		if not pThcApiKey is None:
			self._mNvdHeaders['Authorization'] = pThcApiKey
			self._mBaseUrl = 'https://thc.apps.3djuump.com/request/nvd'
			self._mUseTHC = True
		elif not pNvdApiKey is None:
			self._mNvdHeaders['apiKey'] = pNvdApiKey
	

	async def _fetchCpe(self,session, pCpe):
		try:
			if self._mFetchType == 'CVE':
				lUrl = self._mBaseUrl + '/rest/json/cves/2.0'
				lParams =  {
						'cpeName':pCpe,
						'isVulnerable':''
					}
			elif self._mFetchType == 'cpe':
				lUrl = self._mBaseUrl + '/rest/json/cpematch/2.0'
				lParams =  {
						'matchStringSearch':pCpe
					}
			else:
				raise Exception()

			async with session.get(lUrl,params= lParams, proxy=self._mProxy) as resp:
				if resp.status != 200:
					lText = await resp.text()
					return (resp.status,pCpe,lText)
				else:
					lText = await resp.text()
					lJson = json.loads(lText)
					return (resp.status,pCpe,lJson)
		except:
			return (403,pCpe,'Exception')
			
	async def fetchCpeInfo(self,pCpeToFetch):
		lCpe = dict()
		lRemainingToFetch = sorted(pCpeToFetch)
		async with aiohttp.ClientSession(headers=self._mNvdHeaders) as lSession:
			lLastSuccessCall = time.time()
			while len(lRemainingToFetch) > 0:

				start_time = time.time()
				# run x request in //
				lTasks = []
				for i in range(0,min(len(lRemainingToFetch),self._mBatchSize)):
					lTasks.append(asyncio.ensure_future(self._fetchCpe(lSession,lRemainingToFetch[i])))
				
				lBatchRes = await asyncio.gather(*lTasks)
				lFinalSleep = 0
				lSleepReasons = set()
				lSuccessCount = 0
				for (returncode,cpe,jsonortext) in lBatchRes:
					if returncode == 200:
						lRemainingToFetch.remove(cpe)
						lSuccessCount = lSuccessCount + 1
						lCpe[cpe] = jsonortext
						lLastSuccessCall = time.time()
					else:
						lSleep = None
						if returncode == 403 or returncode == 429:
							# rate limit ?
							lSleep = 30
							lSleepReasons.add('Rate limit')
						elif returncode in [500,503,504]:
							lSleep = 20.
							lSleepReasons.add('Service Unavailable [%d]' % returncode)
						if lSleep is None :
							raise Exception('Got unexpected error from nvd %d %s\n%s' %(returncode,jsonortext,lSleepReasons))
						lTimeLimit = 900
						if (time.time() - lLastSuccessCall) > lTimeLimit:
							raise Exception('Fail to get a single success call from nvd since %d seconds %s' %(lTimeLimit,lSleepReasons))
						lFinalSleep = max(lSleep,lFinalSleep)
				print('  Successfully fetch %d/%d cpe in %.1fs' % (lSuccessCount,len(lTasks),time.time() - start_time))
				if len(lSleepReasons) > 0:
					print('  While fetching cpe, %s, time to sleep (%ss) => zzz' % (sorted(list(lSleepReasons)),lFinalSleep) )
					time.sleep(lFinalSleep)
				elif not self._mUseTHC:
					# with the api-key we are limited to 50 call/sec else 5 call/sec
					time.sleep( 6.)
					
		return lCpe


class CveRetriever():
	def __init__(self, pSourceFolder : str, pIsFirstSearch : bool, pThcApiKey : str) -> None:
		self._mIsFirstSearch = pIsFirstSearch
		self._mCveDict = {}
		self._mHttpProxies = {}
		self._mProcessingErrors = []
		self._mApacheLoungeThirdPartiesVersions = {}
		self._mCveSearchSourcePerProduct = {}
		if 'https_proxy' in os.environ:
			self._mHttpProxies['https'] = os.environ['https_proxy']
		if 'http_proxy' in os.environ:
			self._mHttpProxies['http'] = os.environ['http_proxy']
		
		self._mHttpPool = requests.Session()

		if sDisableSSLCheck:
			self._mHttpPool.verify = False
			requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)

		# load product/cpe mapping
		self._mThirdPartyMetadata = None
		with open('./third_party_product_metadata.json','r',encoding='UTF-8') as fd:
			self._mThirdPartyMetadata = json.load(fd)
			del self._mThirdPartyMetadata['$schema']

		# load validation scheme
		with open(os.path.join(pSourceFolder,'third_party_product_metadata.schema.json'),'r',encoding='UTF-8') as fd:
			lValidationSchema = json.load(fd)
			jsonschema.validate(instance=self._mThirdPartyMetadata,schema=lValidationSchema)
		
		self._mThirdPartyVersions = None

		# load all input file
		(self._mModules, self._mProducts, self.mReleaseVersion) = self._loadModules(pSourceFolder)

		# list used products
		self._mUsedProducts = set()
		for m in self._mModules:
			lModule = self._mModules[m]
			self._mUsedProducts.add(lModule['product'])
			if 'subproduct' in lModule:
				self._mUsedProducts.add(lModule['subproduct'])
		
		# list used products
		self._mUsedOS = set()
		for m in self._mModules:
			lModule = self._mModules[m]
			self._mUsedOS.add(lModule['os'])

		# load reviews
		with open(os.path.join(pSourceFolder,'known_cve_reviews.json'),'r',encoding='UTF-8') as fd:
			self._mReviews = json.load(fd)
			del self._mReviews['$schema']
		with open(os.path.join(pSourceFolder,'known_cve_reviews.schema.json'),'r',encoding='UTF-8') as fd:
			lValidationSchema = json.load(fd)
			jsonschema.validate(instance=self._mReviews,schema=lValidationSchema)
		self._mThcApiKey = None
		if not pThcApiKey is None:
			self._mThcApiKey = 'Basic ' + base64.b64encode(('thc:'+pThcApiKey).encode('ascii')).decode('ascii')
			lHttpResponse = self._mHttpPool.get('https://thc.apps.3djuump.com/url-map',
								headers={'User-Agent':sUserAgent,'Authorization':self._mThcApiKey},
								proxies=self._mHttpProxies)
			if lHttpResponse.status_code != 200:
				print(lHttpResponse.text)
				raise Exception('Fail to retrieve THC url mapping')
			lMappings = lHttpResponse.json()
			# ensure that we have expected mappings
			lExpected = set(['nvd','openssl'])
			lFound = set()
			for m in lMappings:
				lFound.add(m['url'])
			lMissing = lExpected-lFound
			if len(lMissing) > 0:
				raise Exception('Got missing mappings in THC %s' % lMissing)
	
	def retrieveEndOfLifeInfo(self):
		lProductEolInfo = {}
		
		for p in self._mProducts:
			lProduct = self._mProducts[p]
			lEndOfLifeDateProductName = self._mThirdPartyMetadata[p].get('endoflife.date',None)
			if lEndOfLifeDateProductName is None:
				continue
			lInfoPerVersion = {}
			lHttpResponse = self._mHttpPool.get('https://endoflife.date/api/v1/products/%s'%lEndOfLifeDateProductName,
							headers={'User-Agent':sUserAgent},
							proxies=self._mHttpProxies)
			if lHttpResponse.status_code != 200:
				print(lHttpResponse.text)
				print(lHttpResponse.headers)
				self._mProcessingErrors.append('Failed to retrieve eol info for %s' % lProduct['endoflife.date'])
				continue
			lInfo = lHttpResponse.json()
			for r in lInfo['result']['releases']:
				lParsedVersion = version.parse(r['name'])
				# isEol Whether the release cycle is EOL.
				# isEoas Whether the active support phase is over for the release cycle. This field is not provided when the product does not have an active support phase.
				# isEoes Whether the extended support phase is over for the release cycle. This field is not provided when the product does not have an extended support phase. This field is null when the release cycle is not eligible for extended support.
				lEndOfLifeDate = None
				for k in ['eolFrom','eoasFrom','eoesFrom']:
					if not k in r or r[k] is None:
						continue
					lDate = datetime.datetime.strptime(r[k],'%Y-%m-%d')
					if lEndOfLifeDate is None or lDate > lEndOfLifeDate:
						lEndOfLifeDate = lDate
				lInfoPerVersion[lParsedVersion]= lEndOfLifeDate
			lProductEolInfo[p] = lInfoPerVersion
		for m in self._mModules:
			lModule = self._mModules[m]
			lProduct = lModule['product']
			if not lProduct in lProductEolInfo:
				continue
			
			lModuleVersion = version.parse(convertVersion2CpeVersion(lModule['version'],lModule['name']))
			lModuleVersionSplit = lModuleVersion.release
			lEolBestVersionMatch = None
			lOldestEolDate = None
			lIsVersionOlderThanAllListed = True
			for v in lProductEolInfo[lProduct]:
				if not lProductEolInfo[lProduct][v] is None:
					lOldestEolDate = lProductEolInfo[lProduct][v] if lOldestEolDate is None else min(lOldestEolDate,lProductEolInfo[lProduct][v])
				# check if this module version matches this eol version and if it is a better match than previous one (matches more digits)
				lVersionCandidateSplit = v.release
				if lModuleVersion >= v:
					lIsVersionOlderThanAllListed = False
				for i in range(0,min(len(lVersionCandidateSplit),len(lModuleVersionSplit))):
					if lVersionCandidateSplit[i] != lModuleVersionSplit[i]:
						break
					if lEolBestVersionMatch is None or len(lVersionCandidateSplit) > len(lEolBestVersionMatch.release):
						lEolBestVersionMatch = v
			if not lEolBestVersionMatch is None and not lProductEolInfo[lProduct][lEolBestVersionMatch] is None:
				lModule['eolDate'] = lProductEolInfo[lProduct][lEolBestVersionMatch].strftime('%Y-%m-%d')
			elif lEolBestVersionMatch is None and not lOldestEolDate is None and lIsVersionOlderThanAllListed:
				# our version is not listed on endoflife.date and is older than all other listed version
				# assums that this is a very old product and that is reaches it's end of life
				lModule['eolDate'] = lOldestEolDate.strftime('%Y-%m-%d')
	
	def _extractCpeProductAndVersion(self, pModule):
		lProductName = pModule['product']
		lVersion = pModule['version']
		if 'subproduct' in pModule:
			lProductName = pModule['subproduct']
			lVersion = pModule['subproductversion']
		return (lProductName,lVersion)

	def _computeModuleUniqueIdAndCpeStrings(self, pModule):
		(lProductName,lVersion) = self._extractCpeProductAndVersion(pModule)
		lCpeStrings = self._mThirdPartyMetadata[lProductName].get('cpe',None)
		lCpeStringsWithVersion = []
		lCpeVersion = convertVersion2CpeVersion(lVersion,lProductName)
		if not lCpeStrings is None:
			for cpe in lCpeStrings:
				lSplitCpe = cpe.split(':')
				assert(len(lSplitCpe) == 13)
				lSplitCpe[5] = lCpeVersion
				lCpeStringsWithVersion.append(':'.join(lSplitCpe))
		pModule['cpe'] = lCpeStringsWithVersion


		lFieldsToHash = {}
		for k in ['name','os','product','version','subproduct','subproductversion']:
			if not k in pModule:
				continue
			lFieldsToHash[k] = pModule[k]
		
		h = hashlib.md5()
		h.update(json.dumps(lFieldsToHash,sort_keys=True).encode('UTF-8'))
		lDigest = binascii.hexlify(h.digest()).decode('ascii')
		
		lId = []
		for k in ['product','version','subproduct','subproductversion']:
			if not k in pModule:
				continue
			lId.append(pModule[k])
		lId.append(lDigest)

		pModule['id'] = ' '.join(lId)

	def _loadModules(self,pSourceFolder : str):
		lRes = {}
		lProducts = {}
		lReleaseVersion = None
		# load validation scheme
		with open(os.path.join(pSourceFolder,'third_party_module_list.schema.json'),'r',encoding='UTF-8') as fd:
			lValidationSchema = json.load(fd)

		for fn in os.listdir(pSourceFolder):
			if not fn.startswith('third_party_module_list_'):
				continue
			with open(os.path.join(pSourceFolder,fn),'r',encoding='UTF-8') as fd:
				lFileContent = json.load(fd)
				jsonschema.validate(instance=lFileContent,schema=lValidationSchema)
				if lReleaseVersion is None:
					lReleaseVersion = lFileContent['version']
				elif lReleaseVersion != lFileContent['version']:
					raise Exception()
				lOs = lFileContent['os']
				for m in lFileContent['modules']:
					m['os'] = lOs

					if not m['product'] in lProducts:
						lProducts[m['product']] = {
							'changelogurltpl': self._mThirdPartyMetadata[m['product']].get('changelogurltpl',None),
							'homeurl': self._mThirdPartyMetadata[m['product']].get('homeurl',None),
							'cvesearchsources':[]
						}
					m['isupdatecandidate'] = False
					m['publishedAfterRelease'] = False
					self._computeModuleUniqueIdAndCpeStrings(m)
					assert(not m['id'] in lRes)
					lRes[m['id']] = m

		assert(not lReleaseVersion is None)
		return (lRes,lProducts, lReleaseVersion)

	def retrieveThirdPartyVersions(self):
		self._mThirdPartyVersions = {}

		# do not try to fetch version from cpe, they contains bad version
		# like cpe:2.3:a:apache:http_server:11.1.1.9.0:*:*:*:*:*:*:* https://nvd.nist.gov/products/cpe/detail/004D44C4-B158-44AE-8A56-318C85D8E79E?namingFormat=2.3&orderBy=CPEURI&keyword=cpe%3A2.3%3Aa%3Aoracle%3Ahttp_server&status=FINAL%2CDEPRECATED
		# this would requires to fetch each cpe info to remove deprecated ones, and this would take too many time
		

		print('Fetch version of third party components from update info src')
		start_time = time.time()
		lCpeToFetch = []
		for e in self._mThirdPartyMetadata:
			if not e in self._mUsedProducts:
				continue
			lInfo=self._mThirdPartyMetadata[e]['updatesearch']
			if not lInfo:
				self._mThirdPartyVersions[e] = None
				continue
			self._mThirdPartyVersions[e] = {'versions':set(),'aliases':{}}
			
			if 'cpe' in self._mThirdPartyMetadata[e]:
				for cpe in self._mThirdPartyMetadata[e]['cpe']:
					lCpeToFetch.append(cpe)
			
			start_time2 = time.time()
			lRe = re.compile(lInfo['versionpattern'])
			lVersions = set()
			lAliases = {}
			if lInfo['type'] in ['git','git+githubapi']:
				# if this git repository is hosted on github, use github api to discard non stable tags
				# some maintainer will use valid version number without postfix for beta/prerelease
				lTagsToIgnore = set()
				if lInfo['type'] == 'git+githubapi':
					lMatch = re.fullmatch(r'https://github\.com/([^/]*)/([^/]*?)(\.git)?',lInfo['url'])
					if not lMatch is None:
						lGitHubReleasesUrl = 'https://api.github.com/repos/%s/%s/releases' % (lMatch.group(1),lMatch.group(2))
						lHttpResponse = self._mHttpPool.get(lGitHubReleasesUrl,
							headers={'User-Agent':sUserAgent},
							proxies=self._mHttpProxies)
						if lHttpResponse.status_code == 403 and 'rate' in lHttpResponse.text:
							self._mProcessingErrors.append('%s update informations might be inaccurate, github api rate limit was hit (X-RateLimit-Limit:%s, X-RateLimit-Reset:%s)' % (e,lHttpResponse.headers['X-RateLimit-Limit'],lHttpResponse.headers['X-RateLimit-Reset']))
						elif lHttpResponse.status_code != 200:
							print(lHttpResponse.text)
							print(lHttpResponse.headers)
							self._mProcessingErrors.append('Failed to retrieve release list from github api for %s' % e)
						else:
							lReleases = lHttpResponse.json()
							for r in lReleases:
								if not lRe.fullmatch(r['tag_name']):
									continue
								if r['prerelease'] == True or r['draft'] == True:
									lTagsToIgnore.add(r['tag_name'])
							if len(lTagsToIgnore) == 0:
								self._mProcessingErrors.append('No tags to ignore for %s consider changing type to git to save github ratelimit' % (e))
				
				(stdout,stderr,rc) = _execCmd(['git','ls-remote','--tags',lInfo['url']],False)
				if rc != 0:
					print(stdout)
					print(stderr)
					self._mProcessingErrors.append('Failed to check for %s updates' % e)
					continue
				lMatchedCommits = {}
				lCommitTags = {}
				for r in stdout.split('\n'):
					r = r.strip()
					if r == '':
						continue
					# handle git references <rev>^{} https://mirrors.edge.kernel.org/pub/software/scm/git/docs/gitrevisions.html
					if r.endswith('^{}'):
						r = r[:-3]
					lCommitId = r.split('\t')[0]
					lTagName = r.split('\t')[-1]
					assert(lTagName.startswith('refs/tags/'))
					lTagName = lTagName[10:]
					if lTagName in lTagsToIgnore:
						continue
					lExtractedVersions = processPotentialVersionString(lVersions,lRe,lTagName)
					if len(lExtractedVersions) > 0:
						assert(len(lExtractedVersions) == 1)
						lMatchedCommits[lCommitId] = lExtractedVersions[0]
					elif not lCommitId in lCommitTags:
						lCommitTags[lCommitId] = [lTagName]
					else:
						lCommitTags[lCommitId].append(lTagName)
				for c in lMatchedCommits:
					if c in lCommitTags:
						for v in lCommitTags[c]:
							lAliases[v] = lMatchedCommits[c]
			elif lInfo['type'] == 'html' or lInfo['type'] == 'text':
				lHttpResponse = None
				try:
					lHttpResponse = self._mHttpPool.get(lInfo['url'],
					headers={'User-Agent':sUserAgent},
					proxies=self._mHttpProxies)
					if lHttpResponse.status_code != 200:
						print(lHttpResponse.text)
						lHttpResponse = None
				except Exception as ex:
					print('Got an exception while trying to contact %s %s' % (lInfo['url'],ex))
					lHttpResponse = None
				if lHttpResponse is None:
					self._mProcessingErrors.append('Failed to check for %s updates' % e)
					continue
				if lInfo['type'] == 'html':
					lParser = VersionHTMLParser(lRe,lVersions)
					lParser.feed(lHttpResponse.text)
				else:
					for r in lHttpResponse.text.split('\n'):
						processPotentialVersionString(lVersions,lRe,r)
			else:
				raise Exception()
			if len(lVersions) == 0:
				self._mProcessingErrors.append('No version found for %s' % e)
			self._mThirdPartyVersions[e] = {'versions': lVersions,'aliases':lAliases}
			print('  %s : found %d versions, %d aliases in %.1fs' % (e,len(lVersions),len(lAliases),time.time() - start_time2 ))
		print('Successfully fetch third party versions in : %s (%.1fs)' % (time.strftime('%M:%S', time.gmtime(int(time.time() - start_time))),time.time() - start_time))

		for e in self._mThirdPartyMetadata:
			if not e in self._mThirdPartyVersions or self._mThirdPartyVersions[e] is None:
				continue
			self._mThirdPartyVersions[e]['versions'] = sorted(list(self._mThirdPartyVersions[e]['versions']))
		
	def _registerCve(self, pCveId : str, pDesc, pSources : dict,pCVSSList:list, pModules: list):
		lCve = self._mCveDict.setdefault(pCveId,{
				'cveId': pCveId,
				'modules': {
					'affected':[],
					'unaffected':[]
				},
				'components':{
					'affected':[],
					'unaffected':[]
				},
				'cvss':[],
				'descriptions':[],
				'notes': [],
				'sources':{},
				'reviewed': False,
				'publishedAfterRelease':False
			}
		)
		if not pDesc is None:
			lCve['descriptions'].append(pDesc)
		lCve['cvss'] = lCve['cvss'] + pCVSSList
		lCve['sources'] = lCve['sources'] | pSources
		lCve['modules']['affected'] = lCve['modules']['affected'] + pModules
	
	def _updateCveAffectedModules(self,pCveId : str,pSource : dict,pModules:list,pIsFixed:bool, pComment):
		self._mCveDict[pCveId]['sources'] = self._mCveDict[pCveId]['sources'] | pSource
		if not pComment is None:
			self._mCveDict[pCveId]['notes'].append(pComment)
		for m in pModules:
			(self._mCveDict[pCveId]['modules']['unaffected'] if pIsFixed else self._mCveDict[pCveId]['modules']['affected']).append(m)

	def retrieveCveFromCurl(self):
		if not 'Curl' in self._mUsedProducts:
			return
		print('Fetch cve from curl.se')
		lSubTimeStart = time.time()
		lFoundAffectedCve = {}
		self._mCveSearchSourcePerProduct.setdefault('Curl',set()).add('https://curl.se')

		lCurlVersions = dict()
		for m in self._mModules:
			lModule = self._mModules[m]
			for cpe in lModule['cpe']:
				if 'curl' in cpe:
					(_,lVersion) = self._extractCpeProductAndVersion(lModule)
					lVersion = convertVersion2CpeVersion(lVersion,'libcurl')
					lCurlVersions.setdefault(lVersion,set()).add(lModule['id'])
		
		assert(len(lCurlVersions) > 0)
		
		
		for v in lCurlVersions:
			lHttpResponse = self._mHttpPool.get('https://curl.se/docs/vuln-%s.json' % v,
				headers={'User-Agent':sUserAgent},
				proxies=self._mHttpProxies)

			if lHttpResponse.status_code != 200:
				print(lHttpResponse.text)
				self._mProcessingErrors.append('Failed to retrieve CVE from https://curl.se/docs/vuln-%s.json' %v)
				continue
			lGotProcessingErrors = False
			for e in lHttpResponse.json():
				lId = e['id']
				if not lId.startswith('CURL-CVE-'):
					lGotProcessingErrors = True
					continue
				lCveId = lId[5:]
				lFoundAffectedCve.setdefault(lCveId,{'details':e['details'],'modules':set()})['modules'].update(lCurlVersions[v])

			if lGotProcessingErrors:
				self._mProcessingErrors.append('Got errors during processing of https://curl.se/docs/vuln-%s.json' %v)

		for lCveId in lFoundAffectedCve:
			lCveInfo = lFoundAffectedCve[lCveId]
			self._registerCve(lCveId,lCveInfo['details'],{'https://curl.se/docs/%s.html'%lCveId:'%s @curl.se'%lCveId},[], list(lCveInfo['modules']))

		lSubTimeEnd = time.time()
		print('Found %d cve from curl.se in : %s (%.1fs)' % (len(lFoundAffectedCve),time.strftime('%M:%S', time.gmtime(int(lSubTimeEnd - lSubTimeStart))),lSubTimeEnd - lSubTimeStart))

	def retrieveCveFromOpenSSL(self):
		if not 'OpenSSL' in self._mUsedProducts:
			return
		print('Fetch cve from openssl-library.org')
		lSubTimeStart = time.time()
		lFoundAffectedCve = set()
		self._mCveSearchSourcePerProduct.setdefault('OpenSSL',set()).add('https://openssl-library.org')

		lOpenSSLVersions = dict()
		lReleaseVersion = set()
		for m in self._mModules:
			lModule = self._mModules[m]
			for cpe in lModule['cpe']:
				if 'openssl' in cpe:
					(_,lVersion) = self._extractCpeProductAndVersion(lModule)
					lVersion = convertVersion2CpeVersion(lVersion,'openssl')
					lReleaseVersion.add('.'.join(lVersion.split('.')[:2]))
					lOpenSSLVersions.setdefault(lVersion,set()).add(lModule['id'])
		
		assert(len(lOpenSSLVersions) > 0)
		
		lHttpResponse = self._mHttpPool.get('https://openssl-library.org/news/secjson/',
			headers={'User-Agent':sUserAgent},
			proxies=self._mHttpProxies)
		if lHttpResponse.status_code != 200:
			print(lHttpResponse.text)
			self._mProcessingErrors.append('Failed to retrieve CVE from https://openssl-library.org/news/secjson/')
		else:
			lRe = re.compile('CVE-[0-9]+-[0-9]+',re.IGNORECASE)
			lCveToFetch = list(set([lCveId.lower() for lCveId in lRe.findall(lHttpResponse.text)]))
			lFoundCve = []
			for lCveId in lCveToFetch:
				lCveUrl = 'https://openssl-library.org/news/secjson/%s.json' % (lCveId)
				lHeaders = {'User-Agent':sUserAgent}
				if not self._mThcApiKey is None:
					lHeaders['Authorization'] = self._mThcApiKey
					lCveUrl = 'https://thc.apps.3djuump.com/request/openssl/news/secjson/%s.json' % (lCveId)
				lHttpResponse = self._mHttpPool.get(lCveUrl,
					headers=lHeaders,
					proxies=self._mHttpProxies)
				if lHttpResponse.status_code != 200:
					print(lHttpResponse.text)
					self._mProcessingErrors.append('Failed to retrieve CVE info from %s' % (lCveUrl))
					continue
				lCveInfo = lHttpResponse.json()
				try:
					lCveExtractedInfo ={
									'id':lCveId.upper(),
									'desc':[],
									'affectedranges':[]
								}
					if 'title' in lCveInfo['containers']['cna']:
						lCveExtractedInfo['desc'].append(lCveInfo['containers']['cna']['title'])
					for a in lCveInfo['containers']['cna']['affected']:
						for v in a['versions']:
							if v['status'] in ['affected']:
								if v['versionType'] in ['custom','semver']:
									lVersion = v['version']
									# ignore fips, 0.x and 1.x version their numbering was containing letters
									if lVersion.startswith('0.') or lVersion.startswith('1.') or lVersion.startswith('fips'):
										lCveExtractedInfo['affectedranges'].append((version.parse('0.0'),version.parse('1.999999999')))
										continue
									lFrom = version.parse(lVersion)
									lLessThanOrEqual = None
									if 'lessThan' in v:
										lLessThanOrEqual = version.parse(decrementVersion(v['lessThan']))
									elif 'lessThanOrEqual':
										lLessThanOrEqual = version.parse(v['lessThanOrEqual'])
									else:
										raise Exception('Unexpected version format')
									lCveExtractedInfo['affectedranges'].append((lFrom,lLessThanOrEqual))
								else:
									raise Exception('Unhandled version type')
							elif not (v['status'] in ['unaffected']):
								raise Exception('Unhandled version status')
					assert(len(lCveExtractedInfo['affectedranges']) > 0)
					lFoundCve.append(lCveExtractedInfo)
				except Exception as e:
					self._mProcessingErrors.append('Error while processing %s : %s' % (lCveUrl,repr(e)))
			

			assert(len(lFoundCve)>0) 
			for cve in lFoundCve:
				for v in lOpenSSLVersions:
					lParsedVersion = version.parse(v)
					lAffected = False
					for (vs,ve) in cve['affectedranges']:
						if vs <= lParsedVersion and ve >= lParsedVersion:
							lAffected = True
					if not lAffected:
						continue
					lFoundAffectedCve.add(cve['id'])
					self._registerCve(cve['id'],'\n'.join(cve['desc']),{'https://openssl-library.org/news/vulnerabilities/index.html#%s'%cve['id']:'%s @openssl.org'%cve['id']},[], list(lOpenSSLVersions[v]))
			
		lSubTimeEnd = time.time()
		print('Found %d cve from openssl-library.org in : %s (%.1fs)' % (len(lFoundAffectedCve),time.strftime('%M:%S', time.gmtime(int(lSubTimeEnd - lSubTimeStart))),lSubTimeEnd - lSubTimeStart))

	def retrieveCveFromApache(self):
		if not 'Apache' in self._mUsedProducts and not 'ApacheLounge' in self._mUsedProducts:
			return
		print('Fetch cve from httpd.apache.org')
		lSubTimeStart = time.time()
		lFoundCve = set()
		self._mCveSearchSourcePerProduct.setdefault('Apache',set()).add('https://httpd.apache.org')
		self._mCveSearchSourcePerProduct.setdefault('ApacheLounge',set()).add('https://httpd.apache.org')

		lApacheVersions = dict()
		for m in self._mModules:
			lModule = self._mModules[m]
			for cpe in lModule['cpe']:
				if 'apache' in cpe:
					(_,lVersion) = self._extractCpeProductAndVersion(lModule)
					lApacheVersions.setdefault(convertVersion2CpeVersion(lVersion,'apache'),set()).add(lModule['id'])
		
		assert(len(lApacheVersions) > 0)
		lHttpResponse = self._mHttpPool.get('https://httpd.apache.org/security/vulnerabilities_24.html',
			headers={'User-Agent':sUserAgent},
			proxies=self._mHttpProxies)
		if lHttpResponse.status_code != 200:
			print(lHttpResponse.text)
			self._mProcessingErrors.append('Failed to retrieve CVE from https://httpd.apache.org/security/vulnerabilities_24.html')
			return

		lParser = ApacheSecurityHTMLParser()
		lOriginalHtml = lHttpResponse.text
		try:
			# returned html is messy, unclosed tags, etc so it is impossible to parse
			# first use a regular expression to extract <h1></h1><dl></dl> blocks then use an HTML parser
			lRe = re.compile(r'(\<h1[^>]*?\>[^<]*?\<\/h1\>\s*?\<dl\>.*?\<\/dl\>)',re.DOTALL)
			for e in lRe.findall(lOriginalHtml):
				lParser.startNewBlock()
				lParser.feed(e)
			assert(len(lParser.mCve) > 0)
		except:
			self._mProcessingErrors.append('Failed to parse content of https://httpd.apache.org/security/vulnerabilities_24.html')
			return
		
		for cveinfo in lParser.mCve:
			lCveId = cveinfo['id']
			lAffectedVersions = set()
			lAffectedIntervalStartIncluded = None
			lAffectedIntervalEndIncluded = None
			for a in cveinfo['affects'].split(','):
				a = a.strip()
				if re.fullmatch('^[0-9]+\\.[0-9]+\\.[0-9]+$',a):
					lAffectedVersions.add(version.parse(a))
					continue
				lMatch = re.fullmatch('^<=([0-9]+\\.[0-9]+\\.[0-9]+)$',a)
				if not lMatch is None:
					assert(lAffectedIntervalEndIncluded is None)
					lAffectedIntervalEndIncluded = version.parse(lMatch.group(1))
					continue
				lMatch = re.fullmatch('^<([0-9]+\\.[0-9]+\\.[0-9]+)$',a)
				if not lMatch is None:
					assert(lAffectedIntervalEndIncluded is None)
					lAffectedIntervalEndIncluded = version.parse(decrementVersion(lMatch.group(1)))
					continue
				lMatch = re.fullmatch('^([0-9.]+) through ([0-9.]+)$',a)
				if not lMatch is None:
					assert(lAffectedIntervalEndIncluded is None)
					lAffectedIntervalStartIncluded = version.parse(lMatch.group(1))
					lAffectedIntervalEndIncluded = version.parse(lMatch.group(2))
					continue
				lMatch = re.fullmatch('^before ([0-9.]+)$',a)
				if not lMatch is None:
					assert(lAffectedIntervalEndIncluded is None)
					lAffectedIntervalEndIncluded = version.parse(decrementVersion(lMatch.group(1)))
					continue
				lMatch = re.fullmatch('^([0-9.]+) before ([0-9.]+)$',a)
				if not lMatch is None:
					assert(lAffectedIntervalEndIncluded is None)
					lAffectedIntervalStartIncluded = version.parse(lMatch.group(1))
					lAffectedIntervalEndIncluded = version.parse(decrementVersion(lMatch.group(2)))
					continue
				
				lMatch = re.fullmatch('^through ([0-9.]+)$',a)
				if not lMatch is None:
					assert(lAffectedIntervalEndIncluded is None)
					lVersionSplit = lMatch.group(1).split('.')
					lAffectedIntervalEndIncluded = version.parse('.'.join(lVersionSplit))
					continue
				lMatch = re.fullmatch('^>=([0-9.]+)$',a)
				if not lMatch is None:
					assert(lAffectedIntervalStartIncluded is None)
					lVersionSplit = lMatch.group(1).split('.')
					lAffectedIntervalStartIncluded = version.parse('.'.join(lVersionSplit))
					continue
				lMatch = re.fullmatch('^!<([0-9.]+)$',a)
				if not lMatch is None:
					assert(lAffectedIntervalStartIncluded is None)
					lVersionSplit = lMatch.group(1).split('.')
					lAffectedIntervalStartIncluded = version.parse('.'.join(lVersionSplit))
					continue
				self._mProcessingErrors.append('Unexpected "Affects" content in Apache CVE listing "%s"' % a)

			for v in lApacheVersions:
				lParsedVersion = version.parse(v)
				lIsAffected = False
				lIsAffected = lIsAffected or (lParsedVersion in lAffectedVersions)
				lIsAffected = lIsAffected or (
					(not lAffectedIntervalEndIncluded is None) and (lAffectedIntervalEndIncluded >= lParsedVersion) and 
					(lAffectedIntervalStartIncluded is None or lAffectedIntervalStartIncluded <= lParsedVersion)
					)
				if not lIsAffected:
					continue
				lFoundCve.add(lCveId)
				self._registerCve(lCveId,'\n'.join(cveinfo['description']),{'https://httpd.apache.org/security/vulnerabilities_24.html':'%s @httpd.apache.org'%lCveId},[], list(lApacheVersions[v]))


		lSubTimeEnd = time.time()
		print('Found %d cve from httpd.apache.org in : %s (%.1fs)' % (len(lFoundCve),time.strftime('%M:%S', time.gmtime(int(lSubTimeEnd - lSubTimeStart))),lSubTimeEnd - lSubTimeStart))

	def retrieveApacheLoungeThirdPartyVersion(self):
		if not 'ApacheLounge' in self._mUsedProducts:
			return
		try:
			lHttpResponse = self._mHttpPool.get('https://www.apachelounge.com/Changelog-2.4.html',
				headers={'User-Agent':sUserAgent},
				proxies=self._mHttpProxies)
			if lHttpResponse.status_code != 200:
				print(lHttpResponse.text)
				raise Exception()
			lRoot = bs4.BeautifulSoup(lHttpResponse.text, 'html.parser')
		except:
			self._mProcessingErrors.append('Failed to retrieve Apache lounge third party versions from https://www.apachelounge.com/Changelog-2.4.html')
			return
		
		lObj = bs4FindPath(lRoot, [
			('html',0),
			('body',0),
			('table',0),
			('tr',0),
			('td',2),
			('table',0),
			('tr',0),
			('td',0),
			('table',0),
			('tr',0),
			('td',0),
		])
		if lObj is None:
			self._mProcessingErrors.append('Failed to parse Apache lounge change log')
			return

		# search for version separator
		# 08-January 2026 Changes with Apache 2.4.66
		# 23-January-2025 Changes with Apache 2.4.63
		lVersionRe = re.compile('([0-9]+)[- ]([a-zA-Z]+)[- ]([0-9]+) Changes with Apache ([0-9.]+)')
		# search third party version changes
		# *) Downgraded OpenSSL to 3.3.2 from 3.4.0 
		# *) Upgraded OpenSSL to 3.4.0 from 3.1.7
		lUpgradeRe = re.compile('(?:Downgraded|Upgraded) (\\S+) to ([0-9.]+) from [0-9.]+')
		lMonthMapping = {
			"jan":1,
			"feb":2,
			"mar":3,
			"apr":4,
			"may":5,
			"jun":6,
			"jul":7,
			"aug":8,
			"sep":9,
			"oct":10,
			"nov":11,
			"dec":12
		}
		for e in lObj.find_all():

			lVersionFindRes = lVersionRe.findall(e.text)
			if len(lVersionFindRes) == 1:
				lVer = lVersionFindRes[0]
				lMonth = None
				for k in lMonthMapping:
					if lVer[1].lower().startswith(k):
						lMonth = lMonthMapping[k]
				lCurrentVersion = version.parse('%s-%s%02d%s' % (lVer[3],lVer[2][2:],lMonth,lVer[0]))
				self._mApacheLoungeThirdPartiesVersions[lCurrentVersion]={}
				continue
			lChangeFindRes = lUpgradeRe.findall(e.text)
			for e in lChangeFindRes:
				self._mApacheLoungeThirdPartiesVersions[lCurrentVersion][e[0].lower()] =  version.parse(e[1])
		
		# lVersions contains changes from previous version, now consolidate in reverse order
		lPreviousVersion = None
		for v in sorted(self._mApacheLoungeThirdPartiesVersions.keys()):
			if not lPreviousVersion is None:
				for k in self._mApacheLoungeThirdPartiesVersions[lPreviousVersion]:
					if not k in self._mApacheLoungeThirdPartiesVersions[v]:
						self._mApacheLoungeThirdPartiesVersions[v][k] = self._mApacheLoungeThirdPartiesVersions[lPreviousVersion][k]
			lPreviousVersion = v

	def retrieveCveFromPg(self):
		if not 'PostgreSQL' in self._mUsedProducts:
			return
		print('Fetch cve from postgresql.org')
		lSubTimeStart = time.time()
		lFoundCve = set()
		self._mCveSearchSourcePerProduct.setdefault('PostgreSQL',set()).add('https://postgresql.org')

		try:
			lPgVersions = dict()
			for m in self._mModules:
				lModule = self._mModules[m]
				for cpe in lModule['cpe']:
					if 'postgresql' in cpe:
						(_,lVersion) = self._extractCpeProductAndVersion(lModule)
						lPgVersions.setdefault(convertVersion2CpeVersion(lVersion,'postgresql'),set()).add(lModule['id'])
			
			assert(len(lPgVersions) > 0)
			lHttpResponse = self._mHttpPool.get('https://www.postgresql.org/support/security/',
				headers={'User-Agent':sUserAgent},
				proxies=self._mHttpProxies)
			if lHttpResponse.status_code != 200:
				print(lHttpResponse.text)
				self._mProcessingErrors.append('Failed to retrieve CVE from https://www.postgresql.org/support/security/')
				return

			lRoot = bs4.BeautifulSoup(lHttpResponse.text, 'html.parser')
			lObj = bs4FindPath(lRoot,[
				('html',0),
					('body',0),
					('div',1),
					('div',0),
					('div',1),
					('div',0),
					('table',0),
					('tbody',0)
			])
			if lObj is None:
				self._mProcessingErrors.append('Failed to parse PostgreSQL security page')
				return
			lFoundAtLeastOneCve = False
			for c in lObj.find_all('tr'):
				lCveId = bs4FindPath(c,[('td',0),('span',0),('a',0)])
				lAffectedVersions = bs4FindPath(c,[('td',1)])
				lFixedVersions = bs4FindPath(c,[('td',2)])
				if lCveId is None or lAffectedVersions is None or lFixedVersions is None:
					self._mProcessingErrors.append('Failed to parse PostgreSQL CVE record')
					return
				lDescription = bs4FindPath(c,[('td',4)]).text
				lCveId = lCveId.text
				lAffectedVersions = [e.strip() for e in lAffectedVersions.text.strip().split(',')] 
				lFixedVersions = [e.strip() for e in lFixedVersions.text.strip().split(',')] 
				lFoundAtLeastOneCve = True
				for v in lPgVersions:
					lMajor = v.split('.')[0]
					lMinor = v.split('.')[1]
					if not lMajor in lAffectedVersions:
						continue
					for lFixedVersion in lFixedVersions:
						lFixedVersion = lFixedVersion.strip().split('.')
						assert(len(lFixedVersion) == 2)
						if lFixedVersion[0] != lMajor:
							continue
						if int(lMinor) >= int(lFixedVersion[1]):
							continue
						lFoundCve.add(lCveId)
						self._registerCve(lCveId,lDescription,{'https://www.postgresql.org/support/security/%s/' % lCveId:'%s @postgresql.org'%lCveId},[], list(lPgVersions[v]))
			if not lFoundAtLeastOneCve:
				self._mProcessingErrors.append('No CVE found on PostgreSQL security page')
				return
			lSubTimeEnd = time.time()
			print('Found %d cve from postgresql.org in : %s (%.1fs)' % (len(lFoundCve),time.strftime('%M:%S', time.gmtime(int(lSubTimeEnd - lSubTimeStart))),lSubTimeEnd - lSubTimeStart))
		except:
			self._mProcessingErrors.append('Failed to retrieve CVE from postgresql.org')
			return
			
	def retrieveCveFromNvd(self, pNvdApiKey: str):
		lCpeToFetch = dict()
		for m in self._mModules:
			lModule = self._mModules[m]
			for cpe in lModule['cpe']:
				lCpeToFetch.setdefault(cpe,set()).add(lModule['id'])
				self._mCveSearchSourcePerProduct.setdefault(lModule['product'],set()).add('https://nvd.nist.gov')
		lFetchMode = 'without NVD api key'
		if not self._mThcApiKey is None:
			lFetchMode = 'using THC'
		elif not pNvdApiKey is None:
			lFetchMode = 'with NVD api key'
			
		print('Fetch cve from nvd.nist.gov using %d cpe strings, %s' % (len(lCpeToFetch), lFetchMode))
		lSubTimeStart = time.time()
		lFoundCve = set()
		
		lCpeFetcher = CpeFetcher(pNvdApiKey,self._mThcApiKey,'CVE',self._mHttpProxies.get('https',None))
		lFetchedCpe = asyncio.run(lCpeFetcher.fetchCpeInfo(sorted(list(lCpeToFetch.keys()))))
		for cpe in lFetchedCpe:
			lCpeData = lFetchedCpe[cpe]
			if lCpeData['resultsPerPage'] != lCpeData['totalResults']:
				raise Exception('Too many results, we need to handle this case')
			for v in lCpeData['vulnerabilities']:
				lCveSrcInfo = v['cve']
				lCveId = lCveSrcInfo['id']
				lCveDesc = None
				for d in lCveSrcInfo['descriptions']:
					if not d['lang'] == 'en':
						continue
					lCveDesc = d['value']
				lCpeSplitRes = cpe.split(':')
				lSources = {
					'https://nvd.nist.gov/products/cpe/search/results?namingFormat=2.3&keyword=%s' % urllib.parse.quote_plus(cpe) : 'cpe %s @nvd.nist.org' % (':'.join(lCpeSplitRes[3:6])),
					'https://nvd.nist.gov/vuln/detail/%s' % lCveId : '%s @nvd.nist.org' % lCveId,
				}
				lCvssList = []
				if 'metrics' in lCveSrcInfo and 'cvssMetricV31' in lCveSrcInfo['metrics']:
					for cvss in lCveSrcInfo['metrics']['cvssMetricV31']:
						lCvssInfo = {
							'vectorString':cvss['cvssData']['vectorString'],
							'version':cvss['cvssData']['version'],
							'baseScore':cvss['cvssData']['baseScore'],
							'baseSeverity':cvss['cvssData']['baseSeverity'],
							'source':cvss['source']
						}
						lVectorForUrl = lCvssInfo['vectorString'].replace('CVSS:%s/'%lCvssInfo['version'],'')
						if cvss['source'] in ['nvd@nist.gov']:
							lCvssInfo['url'] = 'https://nvd.nist.gov/vuln-metrics/cvss/v3-calculator?name=%s&vector=%s&version=%s&source=NIST' % (lCveId,lVectorForUrl,lCvssInfo['version'])
						else:
							lCvssInfo['url'] = 'https://nvd.nist.gov/vuln-metrics/cvss/v3-calculator?vector=%s&version=%s' % (lVectorForUrl,lCvssInfo['version'])
						lCvssList.append(lCvssInfo)

				self._registerCve(lCveId,lCveDesc,lSources,lCvssList,list(lCpeToFetch[cpe]))
				lFoundCve.add(lCveId)
			
		lSubTimeEnd = time.time()
		print('Found %d cve from nvd.nist.gov in : %s (%.1fs)' % (len(lFoundCve),time.strftime('%M:%S', time.gmtime(int(lSubTimeEnd - lSubTimeStart))),lSubTimeEnd - lSubTimeStart))

	def propagateSubProductCveToParentProduct(self):
		for cve in self._mCveDict:
			lCveInfo = self._mCveDict[cve]
			for m in  lCveInfo['modules']['affected']:
				lModule = self._mModules[m]
				if not 'subproduct' in lModule:
					continue
				lParentProduct = lModule['product']
				lParentVersion = lModule['version']
				for m2 in self._mModules:
					lModule2 = self._mModules[m2]
					if 'subproduct' in lModule2:
						continue
					if lModule2['product'] != lParentProduct:
						continue
					if lModule2['version'] != lParentVersion:
						continue
					if m2 in lCveInfo['modules']['affected']:
						continue
					lCveInfo['modules']['affected'].append(m2)
			
	def dumpCache(self, pDst):

		lThirdPartyVersions = self._mThirdPartyVersions
		for e in lThirdPartyVersions:
			if lThirdPartyVersions[e] is None:
				continue
			lThirdPartyVersions[e]['versions'] = [str(v) for v in lThirdPartyVersions[e]['versions']]

		with open(pDst,'w',encoding='UTF-8') as f:
			json.dump({
			'cveSearchDate':datetime.datetime.now().isoformat(timespec='minutes'),
			'version': self.mReleaseVersion,
			'thirdpartyversions':lThirdPartyVersions,
			'cve': self._mCveDict,
			'modules' : self._mModules,
			'products': self._mProducts
		},f,indent='\t')
	
	def loadCache(self, pDst):
		with open(pDst,'r',encoding='UTF-8') as f:
			lCacheContent = json.load(f)
			self._mCveDict = lCacheContent['cve']
			self._mModules = lCacheContent['modules']
			self._mProducts = lCacheContent['products']
			self.mReleaseVersion = lCacheContent['version']
			self._mThirdPartyVersions = lCacheContent['thirdpartyversions']
			for e in self._mThirdPartyVersions:
				if self._mThirdPartyVersions[e] is None:
					continue
				self._mThirdPartyVersions[e]['versions'] = [version.parse(v) for v in self._mThirdPartyVersions[e]['versions']]

	def _getDebianProductName(self, pModule):
		lDebianProductNames = []
		for e in self._mThirdPartyMetadata[pModule['product']].get('debian',[]):
			if isinstance(e,str):
				lDebianProductNames.append(e)
			elif isinstance(e,dict):
				lBaseStr = e['name']
				if e['type'] == 'append_major_version':
					lBaseStr = lBaseStr + pModule['version'].split('.')[0]
				elif e['type'] == 'append_major_minor_version':
					lBaseStr = lBaseStr + pModule['version'].split('.')[0] + '.' +  pModule['version'].split('.')[1]
				else:
					raise Exception()
				lDebianProductNames.append(lBaseStr)
			else:
				raise Exception()
		return lDebianProductNames

	def checkForDebianFixes(self):
		if not any([os in self._mUsedOS for os in sDebianNames +[sDockerName]]):
			return
		print('Check cve status from debian.org')
		lSubTimeStart = time.time()
		lFoundCve = set()

		lHttpResponse = self._mHttpPool.get('https://security-tracker.debian.org/tracker/data/json',
			headers={'User-Agent':sUserAgent},
			proxies=self._mHttpProxies)
		if lHttpResponse.status_code != 200:
			print(lHttpResponse.text)
			self._mProcessingErrors.append('Failed to retrieve CVE status from https://security-tracker.debian.org/tracker/data/json')
			return
		lDebianCveDb = lHttpResponse.json()
		
		for m in self._mModules:
			lModule = self._mModules[m]
			if lModule['os'] == sWindowsName:
				continue
			assert( lModule['os'] in (sDebianNames +[sDockerName]))

			if not lModule['issystemlib']:
				continue
			lDebianProductNames = self._getDebianProductName(lModule)
			for lDebianProductName in lDebianProductNames:
				if not lDebianProductName in lDebianCveDb:
					continue
				for lCveId in lDebianCveDb[lDebianProductName]:
					# process only cve that were discovered previously as cve list from debian will list really old CVE that were fixed long ago
					if not lCveId in self._mCveDict:
						continue
					lIgnore = True
					for m2 in self._mCveDict[lCveId]['modules']['affected']:
						if self._mModules[m2]['os'] == lModule['os']:
							lIgnore = False
							break
					if lIgnore:
						continue
					lDebianOs = lModule['os'] if lModule['os'] != sDockerName else sDockerDebianName
					lCveInfo = lDebianCveDb[lDebianProductName][lCveId]['releases'].get(lDebianOs,None)
					if lCveInfo is None:
						continue
					lIsFixed = None
					if lCveInfo['status'] in ['resolved']:
						lIsFixed = True
					elif lCveInfo['status'] in ['open']:
						lIsFixed = False
					else:
						raise Exception('Unexpected debian status %s for cve %s' % (lCveInfo['status'],lCveId))
					self._updateCveAffectedModules(lCveId,{'https://security-tracker.debian.org/tracker/%s' % lCveId : '%s @debian.org'%lCveId},[m],lIsFixed, 'Fixed on last debian package.' if lIsFixed else None)
					lFoundCve.add(lCveId)

		lSubTimeEnd = time.time()
		print('Check status of %d cve from debian.org in : %s (%.1fs)' % (len(lFoundCve),time.strftime('%M:%S', time.gmtime(int(lSubTimeEnd - lSubTimeStart))),lSubTimeEnd - lSubTimeStart))

	def loadOriginalResult(self, pSrcFile):
		with open(pSrcFile,'r',encoding='UTF-8') as fd:
			lOriginalResult = json.load(fd)
		if lOriginalResult['version'] != self.mReleaseVersion or not lOriginalResult['isInitialReleaseReport']:
			raise Exception('')
		for lCveId in self._mCveDict:
			if not lCveId in lOriginalResult['cve']:
				self._mCveDict[lCveId]['publishedAfterRelease'] = True
		lOriginalModules = set(lOriginalResult['modules'].keys())
		for lModuleId in self._mModules:
			if not lModuleId in lOriginalModules:
				self._mModules[lModuleId]['publishedAfterRelease'] = True

	def loadPreviousAfterReleaseResult(self, pSrcFile):
		with open(pSrcFile,'r',encoding='UTF-8') as fd:
			lOriginalResult = json.load(fd)
		if lOriginalResult['version'] != self.mReleaseVersion or lOriginalResult['isInitialReleaseReport']:
			raise Exception('')

	def __createUpdateModule(self, pRefModule, pNewVersion):

		# specific case for apache lounge
		if ((pRefModule['product'] == 'ApacheLounge') and (not 'subproduct' in pRefModule) and (pNewVersion in self._mApacheLoungeThirdPartiesVersions) ):
			lSubModulesNewVersions = self._mApacheLoungeThirdPartiesVersions[pNewVersion]
			# list all third party modules of current apache lounge version
			lSubModules = []
			for m in self._mModules:
				lSubModules.append(self._mModules[m])
			for m in lSubModules:
				if m['product'] != 'ApacheLounge' or not 'subproduct' in m:
					continue
				lNewVersion = None
				if m['subproduct'] == 'Curl':
					lNewVersion = lSubModulesNewVersions['curl']
				elif m['subproduct'] == 'OpenSSL':
					lNewVersion = lSubModulesNewVersions['openssl']
				elif m['subproduct'] == 'LibXml2':
					lNewVersion = lSubModulesNewVersions['libxml2']
				elif m['subproduct'] == 'ZLib':
					lNewVersion = lSubModulesNewVersions['zlib']
				elif m['subproduct'] == 'Brotli':
					lNewVersion = lSubModulesNewVersions['brotli']
				elif m['subproduct'] == 'Jansson':
					lNewVersion = lSubModulesNewVersions['jansson']
				elif m['subproduct'] == 'PCRE':
					lNewVersion = lSubModulesNewVersions['pcre2']
				else:
					self._mProcessingErrors.append('Unhandled subproduct %s for ApacheLounge' % m['subproduct'])
					continue
				lNewModule = copy.deepcopy(m)
				lNewModule['subproductversion'] = str(lNewVersion)
				lNewModule['isupdatecandidate'] = True
				self._computeModuleUniqueIdAndCpeStrings(lNewModule)
				if not lNewModule['id'] in self._mModules:
					self._mModules[lNewModule['id']] = lNewModule
		

		lNewModule = copy.deepcopy(pRefModule)
		lNewModule['version'] = pNewVersion
		self._computeModuleUniqueIdAndCpeStrings(lNewModule)
		lNewModule['isupdatecandidate'] = True
		if lNewModule['id'] in self._mModules:
			assert(self._mModules[lNewModule['id']]['isupdatecandidate'] == True)
			assert(self._mModules[lNewModule['id']]['version'] == pNewVersion)
		else:
			self._mModules[lNewModule['id']] = lNewModule
		return lNewModule['id']

	def searchForThirdPartyNewVersions(self):

		lVersionRe = [
			re.compile('^1:([0-9]+(?:\\.[0-9]+)*).*$'), # '1:1.2.13.dfsg-1'
			re.compile('^([0-9]+(?:\\.[0-9]+)*)-.*$'), #'1.0.9-2+b6'
			re.compile('^([0-9]+(?:\\.[0-9]+)*)\\+.*$'), #'0.9.7+dfsg-2'
		]
		
		for m in list(self._mModules.keys()):
			lModule = self._mModules[m]
			lModule['availableupdates'] = {
				'latest':None,
				'patch':None
			}
			if not lModule['searchforupdate']:
				continue
			if self._mThirdPartyVersions[lModule['product']] is None:
				# update search is not configured for this product
				continue
			# do not search update for system library or update candidate
			if lModule['issystemlib'] or lModule['isupdatecandidate']:
				continue
			lProductVersions = self._mThirdPartyVersions[lModule['product']]['versions']
			lVersionAliases = self._mThirdPartyVersions[lModule['product']]['aliases']
			lVersionString = lModule['version']

			# debian package versions are sometimes messy
			if lModule['os'] != sWindowsName:
				for r in lVersionRe:
					lMatchRes = r.fullmatch(lVersionString)
					if lMatchRes is None:
						continue
					lVersionString = lMatchRes.group(1)
					break

			# keep only major.minor.patch.release
			lVersionString = '.'.join(lVersionString.split('.')[:4])
			# look for an alias
			if lVersionString in lVersionAliases:
				lVersionString = lVersionAliases[lVersionString]
			lParsedVersion = version.parse(lVersionString)
			# if we failed to retrieve versions then product name should appear in error list
			if len(lProductVersions) == 0 and len(list(filter(lambda x: lModule['product'] in x, self._mProcessingErrors))) > 0:
				continue
			# sanity check we should find our current version in the retrieved version list
			# but some products only provide last available version number
			# so in this case list size should be 1 and the version should be greater than our actual version
			# it also happen, that a version is released but not tagged or not available in release notes ... so we only add an error in the report
			if not ((lParsedVersion in lProductVersions) or ((len(lProductVersions) == 1) and lParsedVersion < lProductVersions[0])):
				self._mProcessingErrors.append('%s : Fail to find version %s in published versions list' % (lModule['product'],lModule['version']))
			if lParsedVersion < lProductVersions[-1]:
				lModule['availableupdates']['latest'] = self.__createUpdateModule(lModule,str(lProductVersions[-1]))
				
			lParsedVersionWithoutPatch = '.'.join(lVersionString.split('.')[:-1])
			for v in lProductVersions:
				if not str(v).startswith(lParsedVersionWithoutPatch):
					continue
				if v > lParsedVersion:
					lModule['availableupdates']['patch'] = self.__createUpdateModule(lModule,str(v))
			
	def applyReviewsAndSanityze(self):
		# ensure that all reviews are valid for this version !!
		lExpectedReviewVersion = '.'.join(self.mReleaseVersion.split('.')[:2]) + '.x'
		
		for lCveId in self._mCveDict:

			lCve = self._mCveDict[lCveId]
		
			# sanityze data (sort, unique)
			for k in lCve['modules']:
				lCve['modules'][k] = sorted(list(set(lCve['modules'][k])))
			lCve['modules']['affected'] = sorted(list(set(lCve['modules']['affected'] ) - set(lCve['modules']['unaffected'] )))
			lCve['descriptions'] = sorted(list(set(lCve['descriptions'])))
			lCve['notes'] = sorted(list(set(lCve['notes'])))

			lCvssDict = {}
			for cvss in lCve['cvss']:
				lKey = cvss['source'] + ' ' + cvss['vectorString']
				if lKey in lCvssDict:
					continue
				lCvssDict[lKey] = cvss
			lCve['cvss'] = []
			lCVSSKeys = list(lCvssDict.keys())
			lCVSSKeys.sort()
			for k in lCVSSKeys:
				lCve['cvss'].append(lCvssDict[k])

			lUnaffectedComponentsFromReview = None
			lUnaffectedOsFromReview = None
			if lCveId in self._mReviews:
				if self._mReviews[lCveId]['infiniteVersion'] != lExpectedReviewVersion:
					self._mProcessingErrors.append('Skip review of %s as it was not reevaluated for this version of Infinite' % (lCveId))
				else:
					lReview = self._mReviews[lCveId]
					
					lCve['reviewed'] = True
					lCve['notes'].append(lReview['note'])

					if 'unaffectedModules' in lReview:
						lModulesToMoveToUnaffected = set()
						for m in lCve['modules']['affected']:
							lModuleNameLc = self._mModules[m]['name'].lower()
							for mu in lReview['unaffectedModules']:
								if mu.lower() == lModuleNameLc:
									lModulesToMoveToUnaffected.add(m)
						lCve['modules']['affected'] = sorted(list(set(lCve['modules']['affected'] ) - lModulesToMoveToUnaffected))
						lCve['modules']['unaffected'] = sorted(list(set(lCve['modules']['unaffected'] ) | lModulesToMoveToUnaffected))
					elif 'affectedModules' in lReview:
						lModulesToMoveToUnaffected = set()
						for m in lCve['modules']['affected']:
							lModuleNameLc = self._mModules[m]['name'].lower()
							for mu in lReview['affectedModules']:
								if not mu.lower() == lModuleNameLc:
									lModulesToMoveToUnaffected.add(m)
						lCve['modules']['affected'] = sorted(list(set(lCve['modules']['affected'] ) - lModulesToMoveToUnaffected))
						lCve['modules']['unaffected'] = sorted(list(set(lCve['modules']['unaffected'] ) | lModulesToMoveToUnaffected))

					if 'unaffectedComponents' in lReview:
						lUnaffectedComponentsFromReview = lReview['unaffectedComponents']
					if 'unaffectedOs' in lReview:
						lUnaffectedOsFromReview = lReview['unaffectedOs']
					
			# compute affected/unaffected components based on modules & review
			lComponentsMap = {}
			for k in lCve['modules']:
				for m in lCve['modules'][k]:
					lModule = self._mModules[m]
					for u in lModule['usedby']:
						lIsAffected = k == 'affected'
						if not lUnaffectedComponentsFromReview is None and lUnaffectedComponentsFromReview == '*':
							lIsAffected = False
						elif not lUnaffectedComponentsFromReview is None and u in lUnaffectedComponentsFromReview:
							lIsAffected = False
						if not lUnaffectedOsFromReview is None and lModule['os'] in lUnaffectedOsFromReview:
							lIsAffected = False
						lMapKey = ('affected' if lIsAffected else 'unaffected', u, lModule['os'])
						if not lMapKey in lComponentsMap:
							lComponentsMap[lMapKey] = {
								'name':u,
								'os':lModule['os'],
								'modules':[]
							}
						lComponentsMap[lMapKey]['modules'].append(m)
			# compute affected modules from affected components
			# all component might be tagged as unaffected, but modules will still be tagged as affected which will
			# cause some interpretation glitches, we choose to ignore affected modules if they are not used by affected components
			lUsedModules = {'affected':set(),'unaffected':set()}
			for k in lComponentsMap:
				lUsedModules[k[0]] = lUsedModules[k[0]] | set(lComponentsMap[k]['modules'])
				lCve['components'][k[0]].append(lComponentsMap[k])
			lCve['modules']['affected'] = list(set(lCve['modules']['affected']) & lUsedModules['affected'])
			lCve['modules']['unaffected'] = list(set(lCve['modules']['unaffected']) | (lUsedModules['unaffected']-lUsedModules['affected']))
		for lModuleId in self._mModules:
			lModule = self._mModules[lModuleId]
			lModule['cpe'] = sorted(lModule['cpe'])
			lModule['usedby'] = sorted(lModule['usedby'])
		print('Found %d CVEs' % (len(self._mCveDict)))

		lUnreferencedCveReviews = []
		if len(self._mProcessingErrors) == 0:
			lUnreferencedCveReviews = list(set(self._mReviews.keys()) - set(self._mCveDict.keys()) - set(['$schema']))

		
		for p in self._mCveSearchSourcePerProduct:
			if not p in self._mProducts:
				continue
			self._mProducts[p]['cvesearchsources'] = sorted(list(self._mCveSearchSourcePerProduct[p]))

		lRes = {
			'cveSearchDate':datetime.datetime.now().isoformat(timespec='minutes'),
			'version': self.mReleaseVersion,
			'cve': self._mCveDict,
			'modules' : self._mModules,
			'isInitialReleaseReport': self._mIsFirstSearch,
			'products': self._mProducts,
			'processingerrors': self._mProcessingErrors,
			'unreferencedCveReviews': lUnreferencedCveReviews
		}
		with open(os.path.join(lSrcFolder,'known_cve_result.schema.json'),'r',encoding='UTF-8') as fd:
			lValidationSchema = json.load(fd)
			jsonschema.validate(instance=lRes,schema=lValidationSchema)
		
		return lRes

class SetEncoder(json.JSONEncoder):
	def default(self, obj):
		if isinstance(obj, set):
			return list(obj)
		return json.JSONEncoder.default(self, obj)

if __name__ == '__main__':

	cwd = os.getcwd()
	lSrcFolder = os.path.realpath(os.path.split(__file__)[0])
	lIsFirstSearch = False

	lArgs = sys.argv
	if '--disable-ssl-check' in lArgs:
		sDisableSSLCheck = True
		lArgs.remove('--disable-ssl-check')
	
	lNvdApiKey = None
	if '--nvd-key' in lArgs:
		assert((lArgs.index('--nvd-key') + 1) < len(lArgs))
		lNvdApiKey = lArgs[lArgs.index('--nvd-key')+1]
	lThcApiKey = None
	if '--thc-key' in lArgs:
		assert((lArgs.index('--thc-key') + 1) < len(lArgs))
		lThcApiKey = lArgs[lArgs.index('--thc-key')+1]

	lPreviousAfterReleaseReport = None
	if len(lArgs) >= 2 and lArgs[1] == '--atrelease':
		lDstHtmlFolder = os.path.realpath(lSrcFolder + '/..')
		lDstJsonFolder = lSrcFolder
		lPostfix = ''
		lIsFirstSearch = True
	elif len(lArgs) >= 3 and lArgs[1] == '--afterrelease':
		if '--previousafterreleasereport' in lArgs:
			assert((lArgs.index('--previousafterreleasereport') + 1) < len(lArgs))
			lPreviousAfterReleaseReport = lArgs[lArgs.index('--previousafterreleasereport')+1]
		lDstHtmlFolder = os.path.realpath(lArgs[2])
		lDstJsonFolder = lDstHtmlFolder
		lPostfix = lArgs[3]
		if lPostfix == '':
			raise Exception('postfix should not be empty')
	else:
		raise Exception(
			'This script will search for CVE.'
			+ '\n- First search (at release time) : extractCVE.py --atrelease [--disable_ssl_check] [--nvd-key|--thc-key key]'
			+ '\n- To update after release : extractCVE.py --afterrelease outputfolder postfix [--previousafterreleasereport report] [--disable_ssl_check] [--nvd-key|--thc-key key]'
			)

	os.chdir(lSrcFolder)

	lCveRetriever = CveRetriever(lSrcFolder,lIsFirstSearch, lThcApiKey)

	lBaseFileName = '3D Juump Infinite known CVE ' + lCveRetriever.mReleaseVersion + lPostfix
	lJsonDstFilePath = os.path.join(lDstJsonFolder,lBaseFileName+ '.json')
	if lIsFirstSearch and os.path.isfile(lJsonDstFilePath):
		raise Exception('Output file already exists, use --afterrelease !')

	lCveRetriever.retrieveEndOfLifeInfo()
	lCveRetriever.retrieveApacheLoungeThirdPartyVersion()
	lCveRetriever.retrieveThirdPartyVersions()
	lCveRetriever.searchForThirdPartyNewVersions()
	# search for eol date after update searches
	lCveRetriever.retrieveEndOfLifeInfo()
	lCveRetriever.retrieveCveFromCurl()
	lCveRetriever.retrieveCveFromOpenSSL()
	lCveRetriever.retrieveCveFromApache()
	lCveRetriever.retrieveCveFromPg()
	lCveRetriever.retrieveCveFromNvd(lNvdApiKey)
	lCveRetriever.propagateSubProductCveToParentProduct()
	
	# lCveRetriever.dumpCache('cache.json')
	# lCveRetriever.loadCache('cache.json')
 
	if not lIsFirstSearch:
		lCveRetriever.loadOriginalResult(os.path.join(lSrcFolder,'3D Juump Infinite known CVE ' + lCveRetriever.mReleaseVersion + '.json'))
		if not lPreviousAfterReleaseReport is None:
			lCveRetriever.loadPreviousAfterReleaseResult(lPreviousAfterReleaseReport)
	
	lCveRetriever.checkForDebianFixes()
	

	lCveInfoJson = lCveRetriever.applyReviewsAndSanityze()

	os.makedirs(os.path.join(lDstJsonFolder),exist_ok=True)
	with open(lJsonDstFilePath,'w',encoding='UTF-8') as fd:
		json.dump(lCveInfoJson,fd,indent='\t',cls=SetEncoder,sort_keys=True)
		print('CVE JSON report : %s ' % lJsonDstFilePath)
	
	os.makedirs(os.path.join(lDstHtmlFolder),exist_ok=True)

	lHtmlTplFile = None
	for d in ['.','./generated_files']:
		lPath = os.path.join(d,'3D Juump Infinite known CVE.html.tpl')
		if os.path.exists(lPath):
			lHtmlTplFile = lPath
			break
	assert(not lHtmlTplFile is None)
	with open(lHtmlTplFile,'r',encoding='UTF-8') as fin:
		lHtmlTpl = fin.read()
		lHtmlTpl = lHtmlTpl.replace('"INSERT_DATA_HERE"',json.dumps(lCveInfoJson,cls=SetEncoder,sort_keys=True))
		lHtmlDstFilePath = os.path.join(lDstHtmlFolder,lBaseFileName + '.html')
		with open(lHtmlDstFilePath,'w',encoding='UTF-8') as fd:
			fd.write(lHtmlTpl)
		print('CVE HTML report : %s ' % lHtmlDstFilePath)
	
