Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Getting a list of forks on Github
#1
I need to get a list of forks for a Github project showing ahead and behind stats as well as last commit date. I've written a Python script and everything works great except for the date.

For some reason during operation it will randomly return a page with an error stating "Failed to load latest commit information." and I cannot retrieve the date from the page although ahead and behind stats are fine every time. I've tried adding time.sleep() with various delays but it doesn't seem to make a difference.

Here's the script:
import requests, re, os, sys, time, datetime, browser_cookie3

headers = {	"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0" }

def text_from_url(url):
	cookiejar = browser_cookie3.load()
	response = requests.get(url, headers=headers, cookies=cookiejar)
	return response.text

forklist_url = sys.argv[1].strip()+"/network/members"
forklist_htm = text_from_url(forklist_url)

is_root = True
for match in re.finditer('<a (class=""|class="Link--secondary") href="(/([^/"]*)/[^/"]*)">', forklist_htm):
	fork_url = 'https://github.com'+match.group(2)
	fork_owner_login = match.group(3)
	fork_htm = text_from_url(fork_url)
	
	#with open("download.htm", "w") as text_file:
	#	text_file.write(fork_htm)
	
	#while "Failed to load latest commit information." in fork_htm:
	#	time.sleep(1)
	#	fork_htm = text_from_url(fork_url)
			
	match_ahead = re.search('([0-9]+) commits? ahead', fork_htm)
	match_behind = re.search('([0-9]+) commits? behind', fork_htm)
	match_branch = re.search('This branch is (.*) (up to date with|ahead|behind) (.*)[.]', fork_htm)
	match_date = re.search('<relative-time(.*?)datetime="(.*?)"(.*?)>', fork_htm)

	items = []

	if match_ahead:
		items.append('+'+match_ahead.group(1))

	if match_behind:
		items.append('-'+match_behind.group(1))
	
	if match_branch:
		items.append(match_branch.group(3))

	if "This branch is up to date with master." in fork_htm:
		items = ['up-to-date'];
	
	if match_date:
		items.append(datetime.datetime.strptime(match_date.group(2), '%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d'))

	if is_root:
		print(fork_url+' (root)');
	else:
		print(fork_url+' ('+' '.join(items)+')')
		
	is_root = False
	
	time.sleep(1)
Usage Example:
python3 list-forks.py https://github.com/itinance/react-native-fs

Does anyone have any ideas why this is happening and/or a potential work around?
Reply
#2
I ended up moving to using the Github API

Usage Example:
python3 list-forks.py https://github.com/itinance/react-native-fs

Example Output:
https://github.com/itinance/react-native-fs root 2021-11-04 "Merge pull request #1016 from mjgallag/make-react-native-windows-peer-dependency-optional  make react-native-windows peer dependency optional"
https://github.com/AnimoApps/react-native-fs diverged +2 -160 [+1m 10d] "Improved comments to align with new PNG support in copyAssetsFileIOS"
https://github.com/twinedo/react-native-fs ahead +1 [+26d] "clear warn yellow new NativeEventEmitter()"
https://github.com/synonymdev/react-native-fs ahead +2 [+23d] "Merge pull request #1 from synonymdev/event-emitter-fix  Event Emitter Fix"
https://github.com/kongyes/react-native-fs ahead +2 [+10d] "aa"
https://github.com/kamiky/react-native-fs diverged +1 -2 [-6d] "add copyCurrentAssetsVideoIOS function to retrieve current modified videos"
https://github.com/nikola166/react-native-fs diverged +1 -2 [-7d] "version"
https://github.com/morph3ux/react-native-fs diverged +1 -4 [-30d] "Update package.json"
https://github.com/broganm/react-native-fs diverged +2 -4 [-1m 7d] "Update RNFSManager.m"
https://github.com/k1mmm/react-native-fs diverged +1 -4 [-1m 14d] "Invalidate upload session  Prevent memory leaks"
https://github.com/TickKleiner/react-native-fs diverged +1 -4 [-1m 24d] "addListener and removeListeners methods wass added to pass warning"
https://github.com/nerdyfactory/react-native-fs diverged +1 -8 [-2m 14d] "fix: applying change from https://github.com/itinance/react-native-fs/pull/944"
import requests, re, os, sys, time, json, datetime
from dateutil.relativedelta import relativedelta
from urllib.parse import urlparse

GITHUB_PAT = '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'

def json_from_url(url):
	response = requests.get(url, headers={ 'Authorization': 'token {}'.format(GITHUB_PAT) })
	return response.json()

def date_delta_to_text(date1, date2):
	ret = []
	date_delta = relativedelta(date2, date1)
	sign = '+' if date1 < date2 else '-'

	if date_delta.years != 0:
		ret.append('{}y'.format(abs(date_delta.years)))

	if date_delta.months != 0:
		ret.append('{}m'.format(abs(date_delta.months)))
		
	if date_delta.days != 0:
		ret.append('{}d'.format(abs(date_delta.days)))
	
	return '{}{}'.format(sign, ' '.join(ret))

def iso8601_date_to_date(date):
	return datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')

def date_to_text(date):
	return date.strftime('%Y-%m-%d')

def process_repo(repo_author, repo_name, fork_of_fork):
	page = 1

	while 1:
		forks_url = 'https://api.github.com/repos/{}/{}/forks?per_page=100&page={}'.format(repo_author, repo_name, page)
		forks_json = json_from_url(forks_url)

		if not forks_json:
			break

		for fork_info in forks_json:
			fork_author = fork_info['owner']['login']
			fork_name = fork_info['name']
			forks_count = fork_info['forks_count']
			fork_url = 'https://github.com/{}/{}'.format(fork_author, fork_name)

			compare_url = 'https://api.github.com/repos/{}/{}/compare/master...{}:master'.format(repo_author, fork_name, fork_author)
			compare_json = json_from_url(compare_url)

			if 'status' in compare_json:
				items = []

				status = compare_json['status']
				ahead_by = compare_json['ahead_by']
				behind_by = compare_json['behind_by']
				total_commits = compare_json['total_commits']
				commits = compare_json['commits']

				if fork_of_fork:
					items.append('   ')

				items.append(fork_url)
				items.append(status)

				if ahead_by != 0:
					items.append('+{}'.format(ahead_by))

				if behind_by != 0:
					items.append('-{}'.format(behind_by))

				if total_commits > 0:
					last_commit = commits[total_commits-1];
					commit = last_commit['commit']
					author = commit['author']
					date = iso8601_date_to_date(author['date'])
					items.append('[{}]'.format(date_delta_to_text(root_date, date)))
					items.append('"{}"'.format(commit['message'].replace('\n', ' ')))

				if ahead_by > 0:
					print(' '.join(items))

			if forks_count > 0:
				process_repo(fork_author, fork_name, True)

		page += 1

url_parsed = urlparse(sys.argv[1].strip())
path_array = url_parsed.path.split('/')
root_author = path_array[1]
root_name = path_array[2]

root_url = 'https://github.com/{}/{}'.format(root_author, root_name)
commits_url = 'https://api.github.com/repos/{}/{}/commits/master'.format(root_author, root_name)
commits_json = json_from_url(commits_url)
commit = commits_json['commit']
author = commit['author']
root_date = iso8601_date_to_date(author['date'])
print('{} root {} "{}"'.format(root_url, date_to_text(root_date), commit['message'].replace('\n', ' ')));

process_repo(root_author, root_name, False)
Reply
#3
You should delete this token and create a new token.
GITHUB_PAT = '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'
For next time, use e json file to save your credentials.
A good place is for example: ~/.config/app_name/secret.json
headkaze likes this post
Almost dead, but too lazy to die: https://sourceserver.info
All humans together. We don't need politicians!
Reply
#4
(Jan-05-2022, 10:04 PM)DeaD_EyE Wrote: You should delete this token and create a new token.

Yes the token was deleted from Github before I posted the code but I appreciate you letting me know.
Reply


Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020