From 3355d319288f7a9454b6726bcbee57a5a43743ed Mon Sep 17 00:00:00 2001 From: niekv Date: Thu, 29 Apr 2021 11:38:34 +0200 Subject: [PATCH] include file rename tracking at metrics ingestion --- ingester/git.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/ingester/git.py b/ingester/git.py index 15fe6e8..3c79c10 100755 --- a/ingester/git.py +++ b/ingester/git.py @@ -5,7 +5,7 @@ import math # Required for the math.log function from ingester.commitFile import * # Represents a file from classifier.classifier import * # Used for classifying each commit -import time +import re """ file: repository.py @@ -110,6 +110,46 @@ def getCommitStatsProperties( stats, commitFiles, devExperience, author, unixTim totalModified = fileLa + fileLd + # Check for filepath rename + oldpath, newpath, renamed = "", "", False + + # check if the path was partially changed + match = re.search('^(.*)({.+=>.+})(.*)$', fileName) + if match: + oldpath = [] + newpath = [] + for group in match.groups(): + change = re.search('^{(.+)=>(.+)}$', group) + if change: + old, new = change.groups() + oldpath.append(old.strip()) + newpath.append(new.strip()) + else: + oldpath.append(group) + newpath.append(group) + oldpath = "".join(oldpath).replace("//", "/").strip() + newpath = "".join(newpath).replace("//", "/").strip() + renamed = True + + # If not, check if the path was fully changed + else: + match = re.search('^(.+)=>(.+)$', fileName) + if match: + oldpath, newpath = match.groups() + oldpath = oldpath.strip() + newpath = newpath.strip() + renamed = True + + if renamed: + # In case of an error in the git history (e.g. due to rewrites), + # the old name may not exist in the dictionary. Therefore this extra + # check is required to avoid KeyErrors + if oldpath in commitFiles: + commitFiles[newpath] = commitFiles[oldpath] + setattr(commitFiles[newpath], 'name', newpath) + + fileName = newpath + # have we seen this file already? if(fileName in commitFiles): prevFileChanged = commitFiles[fileName]