import requests import feedparser import json import datetime from bs4 import BeautifulSoup import configparser from fedora.client.fas2 import AccountSystem def main(): # Take into account only users with last_seen >= this date user_active_date = "2019-01-01" # Take into account only badges awarded >= this yeas badge_assigned_year = 2016 # Exclude badges awarded more than this number in total max_awarded = 2000 # Take into account only badges awarded to <= this number of users in the time range we are considering max_awarded_in_timerange = 200 excluded_badges = ['commis-cookbook-i', 'grillardin-cookbook-iii', 'grillardin-cookbook-iv', 'saucier-cookbook-iv', 'tournant-cookbook-ii', 'times_awarded', 'curious-penguin-ask-fedora-i', 'fedora-cz-author', 'fedora-it-author', 'let-me-introduce-myself', 'junior-editor', 'associate-editor', 'origin', 'baby-badger', 'curious-penguin-ask-fedora-ii', 'crypto-panda', 'involvement', 'paranoid-panda', 'dont-call-it-a-comeback', 'white-rabbit', 'free-the-fedora-v', 'free-the-fedora-ii', 'curious-penguin-ask-fedora-iii' 'consolation-prize', 'free-the-fedora-iv', 'free-the-fedora', 'free-the-fedora-iii', 'senior-editor', 'master-editor', 'crypto-badger', 'curious-penguin-ask-fedora-iv' 'egg', 'embryo', 'tadpole', 'tadpole-with-legs', 'building-the-outer-ring-copr-build-i', 'mugshot', 'nuancier', 'i-voted:-fedora-29', 'i-voted:-fedora-30', 'curious-penguin-ask-fedora-iii', 'tagger-tagger-ii', 'zomg-package-tagger-package-tagger-vii', 'chief-tagger-tagger-vi', 'zomg-tagger-tagger-vii', 'chief-package-tagger-package-tagger-vi', 'master-package-tagger-package-tagger-iv', 'package-tagger-package-tagger-ii', 'grandmaster-package-tagger-package-tagger-v', 'senior-package-tagger-package-tagger-iii', 'junior-package-tagger-package-tagger-i', 'master-tagger-tagger-iv', 'tagger-tagger-ii', 'junior-tagger-tagger-i', 'macklemore-tagger-v', 'senior-tagger-tagger-iii', 'science-kernel-tester-ii', 'science-kernel-tester-ii'] configfile = 'myconfig.cfg' config = configparser.ConfigParser() config.read(configfile) options = config.options('FAS') userdata = {} for opt in options: userdata[opt] = config.get('FAS', opt) user = userdata['user'] password = userdata['pass'] fas = AccountSystem(username=user, password=password) fas.timeout = 600 fas.retries = 3 url = 'https://badges.fedoraproject.org/explore/badges' response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") badges = {} count = 1 outfile = open('data.txt', 'w', buffering=1) user_cache = {} for i in (soup.findAll("span", {"class": "tooltip"})): # if count > 1: # break for a in i.find_all('a', href=True): badgeurl = a['href'] print("%s" % count, end='') # print(a['href']) # if "egg" not in a['href']: # count = count + 1 # break response = requests.get(badgeurl + '/json') badgejson = response.json() print(" - Descr: %s, id: %s, tags: %s" % (badgejson['description'], badgejson['id'], badgejson['tags']), end='') print(" - Awarded all time %s" % (badgejson['times_awarded']), end='') times_awarded_in_timerange = 0 if badgejson['times_awarded'] <= max_awarded and badgejson['id'] not in excluded_badges: feed = feedparser.parse(badgeurl + '/rss') for post in feed.entries: # Stop the loop once reached max_awarded_in_timerange # a subsequent if will exclude this badge date = "%d/%02d/%02d" % (post.published_parsed.tm_year, post.published_parsed.tm_mon, post.published_parsed.tm_mday) if post.published_parsed.tm_year >= badge_assigned_year: times_awarded_in_timerange = times_awarded_in_timerange + 1 if times_awarded_in_timerange > max_awarded_in_timerange: print(" - Awarded in timerange >", max_awarded_in_timerange) break if badgejson['id'] in excluded_badges: print("\n\t Excluded") elif times_awarded_in_timerange >= max_awarded_in_timerange: print("\n\tExcluded for time range awarded > ", max_awarded_in_timerange) elif badgejson['times_awarded'] >= max_awarded: print("\n\tExcluded for absolute count awarded > ", max_awarded) else: print(" - Awarded in timerange", times_awarded_in_timerange) # feed = feedparser.parse(badgeurl + '/rss') tags = badgejson['tags'].split(",") tags = list(filter(None, tags)) badge = {"id": badgejson['id'], "description": badgejson['description'], "tags": tags} users = [] for post in feed.entries: date = "%d/%02d/%02d" % (post.published_parsed.tm_year, post.published_parsed.tm_mon, post.published_parsed.tm_mday) if post.published_parsed.tm_year >= badge_assigned_year: print("\t%s %s - Check %s (%s) " % (count, badgejson['id'], post.title, date), end='') if post.title in user_cache: print(" - Tot cached", len(user_cache)) cached = True username = user_cache[post.title]['username'] last_seen = user_cache[post.title]['last_seen'] groups = user_cache[post.title]['groups'] country_code = user_cache[post.title]['country_code'] else: print("Not Cached") cached = False fas_user = fas.send_request('/user/list', req_params={'search': post.title}, auth=True, retries=3, timeout=600) try: last_seen = fas_user['people'][0]['last_seen'] except Exception as e: try: fas_user = fas.person_by_username(post.title) last_seen = fas_user.last_seen except Exception as e: print("\tlast_seen", e) last_seen = "1970-01-01 00:00:00.00+00.00" groups = [] try: for i in fas_user['people'][0]['memberships']: groups.append(str(i['name'])) except Exception as e: print("\tgroups", e) groups = ['exception'] try: username = fas_user['search'] except Exception as e: print("\tusername", e) username = post.title try: country_code = fas_user['people'][0]['country_code'] except Exception as e: try: fas_user = fas.person_by_username(post.title) country_code = fas_user.country_code except Exception as e: print("country_code", e) country_code = "exception" user_cache.__setitem__(username, {"username": username, "last_seen": last_seen, "country_code": country_code, "groups": groups}) if last_seen >= user_active_date: last_seen = datetime.datetime.strptime(last_seen, "%Y-%m-%d %H:%M:%S.%f%z") awarded = datetime.datetime.strptime(date, "%Y/%m/%d") print("*", end='') if cached else 0 print('\tUser: %s, last_seen: %s' % (username, last_seen.strftime("%b %d %Y"))) users.append({"username": username, "country_code": country_code, "lastseen": last_seen.strftime("%b %d %Y"), "awarded": awarded.strftime("%b %d %Y"), "memberof": groups}) if len(users) == 0: print("\tNo users for this badge in time range") else: badge.update({"users": users}) badges.__setitem__(badgejson['id'], badge) count = count + 1 json.dump(badges, outfile) print("Close") outfile.close() if __name__ == '__main__': main() print("End")