pythongit - check before git commit - gitpython

written small snippet to automate git add , commit and push using pythongit.
def git_commit_push(self):
repoDir = self.backupRepositoryPath
repo = git.Repo( repoDir )
print repo.git.status()
repo.git.add('--all')
print repo.git.status()
repo.git.commit( m='pusing for backup' )
repo.git.push()
print repo.git.status()
Need to add below mentioned check points
1: Before commit , check any files are modified. If no files then skip commit
2: Before push , check any committed files to be pushed. If no files then skip push
Please help writing the if condition for these two check points.
Regards,
Prasad

Logic is tuned here...
def git_commit_push(self):
repoDir = self.backupRepositoryPath
repo = git.Repo( repoDir )
print repo.git.status()
repo.git.add('--all')
changedFiles = repo.index.diff("HEAD")
print "====================================="
print "changedFiles are :", changedFiles
print "====================================="
if ( changedFiles ):
repo.git.commit( m='JenkinsBackup' )
repo.git.push()
else:
print "No files updated"

Hope this should help.
def git_commit_push(self):
repoDir = self.backupRepositoryPath
repo = git.Repo( repoDir )
print repo.git.status()
repo.git.add('--all')
changedFiles = repo.git.diff('HEAD~1..HEAD', name_only=True)
print "====================================="
print "changedFiles are :", changedFiles
print "====================================="
if ( changedFiles ):
repo.git.commit( m=changedFiles )
repo.git.push()
else:
print "No files updated"

Related

git log: parsing who worked on the repository and when

I'd like to create some statistics for open source projects. I'd like to extract the information when a certain person started to work on a git repository and when they finished on it... or the first and the last commit by an author, with a date.
I can manually extract this information from git log output. However, is there a git built-in way, or one-liner shell script that would help me to analyse this.
Find all repo authors from git log
Get their first and last commit date
For usernames and commits amount
git shortlog -sne | awk '{print $1 " " $2}'
Last commit date
git log --pretty=format:"%ad by %an" --date=iso | sort -r | awk '{if (!seen[$5]++) print}'
First commit date
git log --pretty=format:"%ad by %an" --date=iso | sort | awk '{if (!seen[$5]++) print}'
Here's something to get you started, it will list the authors and how many commits from each
git log --format='%aN <%aE>' | awk '{arr[$0]++} END{for (i in arr){print arr[i], i;}}' | sort -rn
I had to do this over multiple repositories, so shell tools provided inflexible. I wrote a quick Python script that does the job. README here.
import sys
import logging
import os.path
from datetime import datetime
from collections import defaultdict
from typing import List
from typing import Optional
from typing import Dict
from dataclasses import dataclass
from dataclasses import field
from pathlib import Path
from git import Repo
from tabulate import tabulate
# If logging is set to debug you will see individual git commands in the console
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("githistory")
#dataclass
class AuthorHistory:
"""Author history information for a single repo"""
repo: str
name: str
email: str
first_commit_at: datetime = None
first_commit_message: str = None
last_commit_at: datetime = None
last_commit_message: str = None
commit_count: int = 0
#dataclass
class AuthorHistoryOverMultipleRepos:
"""Author history information spanning over multiple repos"""
# repo name -> history
histories: Dict[str, AuthorHistory] = field(default_factory=dict)
first_commit_at: datetime = datetime(2030, 1, 1)
last_commit_at: datetime = datetime(1970, 1, 1)
commit_count: int = 0
#property
def email(self):
return next(iter(self.histories.values())).email
#property
def name(self):
return next(iter(self.histories.values())).name
#dataclass
class RepositoryHistory:
"""History of a single repo"""
name: str
commit_count: int = 0
#: email -> history maps
authors: Dict[str, AuthorHistory] = field(default_factory=dict)
#dataclass
class FullHistory:
"""History of a project spanning multiple repositories"""
repos: List[RepositoryHistory] = field(default_factory=list)
#: Pointers to individiaul author histories
#: email -> repo -> AuthorHistory
all_author_histories: Dict[str, AuthorHistoryOverMultipleRepos] = field(default_factory=dict)
def extract_history(path: Path) -> Optional[RepositoryHistory]:
"""Extract history of one git repository.
#param :repo: Path to git repository
#param :all_author_histories: Track author work across multiple repos
"""
logger.info("Extracting history from %s", path)
r = Repo(path)
repo_name = os.path.split(path)[-1]
# Sanity check
heads = r.heads
if len(heads) == 0:
logger.warning("Not a git repository: %s", path)
return None
master = heads.master
history = RepositoryHistory(name=repo_name)
authors = history.authors
# This will iterate commits from firs to last
all_commits = list(r.iter_commits('master'))
for c in all_commits: # type: Commit
# https://gitpython.readthedocs.io/en/stable/reference.html#git.objects.commit.Commit
# https://stackoverflow.com/questions/58550252/how-to-get-commit-author-name-and-email-with-gitpython
name = c.author.name
email = c.author.email
author = authors.get(email) # type: AuthorHistory
if not author:
# We are initialising this author
author = AuthorHistory(name, email, repo_name)
authors[email] = author
author.first_commit_at = datetime.fromtimestamp(c.committed_date) # Is UNIX time
author.first_commit_message = c.message
author.last_commit_at = datetime.fromtimestamp(c.committed_date)
author.last_commit_message = author.last_commit_message
else:
# Adding more commits for the author
author.last_commit_at = datetime.fromtimestamp(c.committed_date)
author.last_commit_message = c.message
author.commit_count += 1
history.commit_count += 1
return history
def mine_authors_over_repos(history: List[RepositoryHistory]) -> Dict[str, AuthorHistoryOverMultipleRepos]:
"""Create a history info spanning over multiple repos."""
all_author_histories = defaultdict(AuthorHistoryOverMultipleRepos)
for r in history.repos:
for email, history in r.authors.items():
all_history = all_author_histories[email]
all_history.first_commit_at = min(all_history.first_commit_at, history.first_commit_at)
all_history.last_commit_at = max(all_history.last_commit_at, history.last_commit_at)
all_history.commit_count += history.commit_count
all_history.histories[r.name] = history
print("set history ", r.name, history)
return all_author_histories
def mine_data(repos: List[str]) -> FullHistory:
"""Extract history from multiple git repositories.
Will skip directories that do not look like git repos.
"""
logger.info("Working on %d repositores", len(repos))
history = FullHistory()
for repo in repos:
repo_history = extract_history(Path(repo))
if repo_history:
history.repos.append(repo_history)
history.all_author_histories = mine_authors_over_repos(history)
return history
def output_author_data(history: FullHistory):
"""Write out information about authors"""
print("All authors")
print("*" * 80)
table = []
for author in history.all_author_histories.values():
table.append([author.name, author.email, author.first_commit_at, author.last_commit_at, author.commit_count])
# Sort by the first commit
table = sorted(table, key=lambda row: row[2])
print(tabulate(table, headers=["Email", "Name", "First commit", "Last commit", "Commit count"]))
print()
def main():
"""Entry point"""
history = mine_data(sys.argv[1:])
output_author_data(history)
if __name__ == "__main__":
main()

Python - Round Robin file move

I am trying to create a Python script that moves files in a round robin into a DIR that has the least amount of files in it so that the files are equally distributed for the source DIR to the two target DIR's.
For example:
If c:\test contains:
test_1.txt
test_2.txt
test_3.txt
test_4.txt
I want these test_1.txt and test_3.txt to be moved to c:\test\dir_a and test_2.txt and test_4.tx to be moved to c:\test\dir_b.
I have been able to successfully do this in Ruby, however when i try to do this in Python when the script runs it moves all the files into the DIR with the least least amount of files in it instead of distributing them in a round robin.
Here is my Ruby example:
require 'fileutils'
def check_file
watchfolder_1 = 'F:/Transcoder/testing/dir_a/'
watchfolder_2 = 'F:/Transcoder/testing/dir_b/'
if !Dir.glob('F:/Transcoder/testing/prep/*.txt').empty?
Dir['F:/Transcoder/testing/prep/*.txt'].each do |f|
node_1 = Dir["#{watchfolder_1}"+'*']
node_2 = Dir["#{watchfolder_2}"+'*']
nc_1 = node_1.count
nc_2 = node_2.count
loadmin =[nc_1,nc_2].min
#puts loadmin
if loadmin == nc_1
FileUtils.mv Dir.glob("#{f}"), watchfolder_1
puts "#{f} moved to DIR A"
elsif loadmin == nc_2
FileUtils.mv Dir.glob("#{f}"), watchfolder_2
puts "#{f} moved to DIR B"
end
puts 'Files successfully moved to staging area.'
end
else
puts 'No valid files found'
end
end
check_file
This outputs the following:
C:\Ruby22-x64\bin\ruby.exe -e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)
F:/ruby/transcode_engine/test.rb
F:/Transcoder/testing/prep/test_1.txt moved to DIR A
Files successfully moved to staging area.
F:/Transcoder/testing/prep/test_2.txt moved to DIR B
Files successfully moved to staging area.
F:/Transcoder/testing/prep/test_3.txt moved to DIR A
Files successfully moved to staging area.
F:/Transcoder/testing/prep/test_4.txt moved to DIR B
Files successfully moved to staging area.
The files move as I want them to.
Now here is my Python script:
import shutil
from glob import glob
import os.path
dir_a = os.listdir('F:\\Transcoder\\testing\\dir_a\\')
dir_b = os.listdir('F:\\Transcoder\\testing\\dir_b\\')
t_a = 'F:\\Transcoder\\testing\\dir_a\\'
t_b = 'F:\\Transcoder\\testing\\dir_b\\'
if os.listdir('F:\\Transcoder\\testing\\prep\\'):
prep = glob('F:\\Transcoder\\testing\\prep\\*.txt')
for file in prep:
ac = len(dir_a)
bc = len(dir_b)
load = [ac, bc]
if min(load) == ac:
print('Moving' + file + 'to DIR A')
shutil.move(file, t_a)
elif min(load) == bc:
print('Moving' + file + 'to DIR B')
shutil.move(file, t_b)
else:
print('No Files')
This script returns this:
C:\Users\3A01\AppData\Local\Programs\Python\Python35-32\python.exe
F:/Projects/python_transcoder/test_2.py
Moving F:\Transcoder\testing\prep\test_1.txt to DIR A
Moving F:\Transcoder\testing\prep\test_2.txt to DIR A
Moving F:\Transcoder\testing\prep\test_3.txt to DIR A
Moving F:\Transcoder\testing\prep\test_4.txt to DIR A
Where am I going wrong with the Python script, why is it not moving the files in a round robin?
dir_a and dir_b are computed at the start of your script so the load is always identical even if you move files in your loop.
Move this in your for loop:
dir_a = os.listdir(r'F:\Transcoder\testing\dir_a')
dir_b = os.listdir(r'F:\Transcoder\testing\dir_b')
fox proposal (with some other small fixes as well, like not repeating paths and using "raw" prefix (r"the\data") to avoid escaping the antislashes.
import shutil
from glob import glob
import os.path
t_a = r'F:\Transcoder\testing\dir_a'
t_b = r'F:\Transcoder\testing\dir_b'
prep = glob('F:\\Transcoder\\testing\\prep\\*.txt')
if prep:
for file in prep:
dir_a = os.listdir(t_a)
dir_b = os.listdir(t_b)
ac = len(dir_a)
bc = len(dir_b)
load = [ac, bc]
if min(load) == ac:
print('Moving' + file + 'to DIR A')
shutil.move(file, t_a)
else:
print('Moving' + file + 'to DIR B')
shutil.move(file, t_b)
else:
print('No Files')

A pythonic way of finding folder

What's the most pythonic way of finding the child folder from a supplied path?
import os
def get_folder(f, h):
pathList = f.split(os.sep)
sourceList = h.split(os.sep)
src = set(sourceList)
folderList = [x for x in pathList if x not in src]
return folderList[0]
print get_folder("C:\\temp\\folder1\\folder2\\file.txt", "C:\\temp") # "folder1" correct
print get_folder("C:\\temp\\folder1\\file.txt", "C:\\temp") # "folder1" correct
print get_folder("C:\\temp\\file.txt", "C:\\temp") # "file.txt" fail should be "temp"
In the example above I have a file.txt in "folder 2". The path "C:\temp" is supplied as the start point to look from.
I want to return the child folder from it; in the event that the file in question is in the source folder it should return the source folder.
Try this. I wasn't sure why you said folder1 is correct for the first example, isn't it folder2? I am also on a Mac so os.sep didn't work for me but you can adapt this.
import os
def get_folder(f, h):
pathList = f.split("\\")
previous = None
for index, obj in enumerate(pathList):
if obj == h:
if index > 0:
previous = pathList[index - 1]
return previous
print get_folder("C:\\temp\\folder1\\folder2\\file.txt", "file.txt") # "folder2" correct
print get_folder("C:\\temp\\folder1\\file.txt", "file.txt") # "folder1" correct
print get_folder("C:\\temp\\file.txt", "file.txt") # "file.txt" fail should be "temp"

Why did the order of my script give a Divide by Zero error?

I'm working on some beginner Python exercises. I have the following, working code:
# Use the file name mbox-short.txt as the file name
fname = raw_input("Enter file name: ")
fh = open(fname)
inp=fh.readlines()
count=0
total=0.0
for line in inp:
line=line.rstrip()
if not line.startswith("X-DSPAM-Confidence:"):
continue
value=line[19:]
value=float(value)
count=count+1
total=total + value
print "Average spam confidence:",total/count
When I first wrote this, I put the "count" line before the "value" line like this:
# Use the file name mbox-short.txt as the file name
fname = raw_input("Enter file name: ")
fh = open(fname)
inp=fh.readlines()
count=0
total=0.0
for line in inp:
line=line.rstrip()
if not line.startswith("X-DSPAM-Confidence:"):
continue
count=count+1
value=line[19:]
value=float(value)
total=total + value
print "Average spam confidence:",total/count
This resulted in a divide by zero error. Why?

Ruby csv for each - clean up characters?

I have the following code which reads each line of a csv and cleans up each row. The rows are all path\ file name directories. I am having an issue where the script cannot find a path\file because the file name has a - in it. The - (dash) is read by ruby as \x96 . Does anyone know how to get it to not do that, and to read the - as a dash?
This is what I have, but it is not working:
CSV.foreach("#{batch_File_Dir_sdata}") do |ln|
line_number += 1
pathline = ln.to_s
log_linemsg = "Source #{line_number}= #{pathline}"
log_line = ["#{$cname}","#{log_linemsg}","","",]
puts log_linemsg
insert_logitems(connection, table_namelog, log_line)
if pathline.include?("\\")
cleanpath = pathline.gsub!("\\\\","\\")
#cleanpath = cleanpath.gsub!("[","")
#cleanpath = cleanpath.gsub!("]","")
cleanpath.gsub!("\"","")
#THIS IS THE LINE WHERE I AM TRYING TO FIX THE ISSUE
cleanpath.gsub!("\\x96","\-")
cleanpath.slice!(0)
cleanpath.chop!
#puts "Clean path - has backslash\n#{cleanpath}"
else
cleanpath = pathline
#puts "#{cleanpath}"
#puts "Clean path - has NO backslash\n#{cleanpath}"
end
Any help would be greatly appreciated.

Resources