diff --git a/csec_mentor_matching/.gitignore b/csec_mentor_matching/.gitignore index 7778391..6181b4c 100644 --- a/csec_mentor_matching/.gitignore +++ b/csec_mentor_matching/.gitignore @@ -3,3 +3,4 @@ *.xls *.xlsx costs +*.png diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py index fa158b1..db84c3f 100644 --- a/csec_mentor_matching/analysis.py +++ b/csec_mentor_matching/analysis.py @@ -4,6 +4,8 @@ import itertools as it import random import warnings +import z3 +import matplotlib.pyplot as plt #warnings.filterwarnings('error') @@ -149,6 +151,7 @@ def mentee_analysis(mentees) : # cost with number of extra subjects that dont match with mentor extra_cost = np.exp(np.arange(12)/3.0) +U = int('1'*11, 2) def count_bits(x) : t = 0 @@ -161,7 +164,6 @@ def eval_cost(x) : # remember more the cost, less chances of matching (tee_int, tee_year) , (tor_int, pro, tor_year) = x - U = int('1'*11, 2) tor_int = int(tor_int) tee_int = int(tee_int) @@ -170,7 +172,6 @@ def eval_cost(x) : return 0.0 # 0 common very high cost, less common low cost, more chances of matching. - # lot of matches considered superflous. common = dot_cost[count_bits(tee_int & tor_int)] # more the tee extra more the cost, less chances of matching. @@ -181,14 +182,14 @@ def eval_cost(x) : # lower the chances of getting that mentor. # More focussed the mentor on particular topics, the # lesser the chance of getting the mentor. - pro_cost = np.exp(pro* (0.5/count_bits(tor_int))) + pro_cost = np.exp(10 * (1/pro) * (1/count_bits(tor_int))) # Chances of matching a mentor in lower year than you # should be negligible - year_cost = 100 if tee_year > tor_year else 0 + year_cost = 100 if tee_year > tor_year else (10 if tee_year == tor_year else 0) # random luck - luck = random.uniform(3.0, 9.0) + luck = random.uniform(2.0, 10.0) """ print("=" * 80) @@ -206,8 +207,7 @@ def eval_cost(x) : def costs(multi, mentee_int, mentee_year, mentor_int, proficiency, mentor_year) : # the matcher is a minimum cost matching algorithm # Make the costs higher to decrease the possibility of matching. - - # add dummy candidates with very high weight + # multi is number of mentees mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int)))) mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year)))) costs = list(it.product(list(zip(mentee_int, mentee_year)), list(zip(mentor_int, proficiency, mentor_year)))) @@ -219,3 +219,66 @@ def detect_duplicates(mentors, mentees) : for pt in (set(mentors[0]) & set(mentees[0])) : n = mentors[0].index(pt) print( mentors[1][n] + " <" + pt + "> Filled both mentor and mentee forms.") + +# to encode matching as SAT; every mentee-mentor pair will have a cost and a boolean var to denote whether they are picked or not +# min cost matching = optimise cost of chosen pairs + choose only one edge per mentee +# other constraints to be added separately + +# vars have mentor as first index and mentee as second +def mentor_count_constraint(tee_tor_vars, costs, min_cnt): + l = [] + for i in range(len(tee_tor_vars)): + l.append(z3.AtLeast(*tee_tor_vars[i], min_cnt)) + return z3.And(l) + +def matching(optimiser, tee_tor_vars, costs): + for j in range(len(tee_tor_vars[0])): + optimiser.add(z3.simplify(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1))) + + v = 0 + for i in range(len(tee_tor_vars)): + for j in range(len(tee_tor_vars[0])): + optimiser.add_soft(tee_tor_vars[i][j], costs[i][j]) +# optimiser.minimize(v) + +# print(optimiser) + + return optimiser.check() + +def visualise_interests(mentees): + interests = mentees[2] + BLK = 20 + img = np.zeros((len(MENTEE_INTEREST_FULL)*BLK, BLK*len(interests), 3), dtype=np.float) + + for i, v in enumerate(interests): + mask = v + for j, val in enumerate(bin(mask)[2:].zfill(len(MENTEE_INTEREST_FULL))): + img[j*BLK:(j+1)*BLK, i*BLK:(i+1)*BLK, :] = int(val) + +# print(img) + plt.imsave('mentee_interests.png', img) + + +def visualize_mapping(mapping, mentors, mentees): + BLK = 20 + img = np.zeros((BLK*7, BLK*len(mentors[0]), 3), dtype=np.uint8) + cnts = np.zeros((len(mentors[0])), dtype=np.int) + for tor, tee in mapping: + tor_int = mentors[2][tor] + tee_int = mentees[2][tee] + val = [[[0, 255, 0]]] # green is default + + if count_bits(tor_int & tee_int) <= 3: + # if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2: + # set difference of tee_int with tor_int more than, say, 2 + val = [[[255, 0, 0]]] # red + # elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2: + # set difference of tor_int with tee_int + #val = [[[0, 0, 255]]] # blue + + img[cnts[tor]*BLK:(cnts[tor]+1)*BLK, tor*BLK:(tor+1)*BLK, :] = val + cnts[tor] += 1 + +# print(img) + + plt.imsave('mapping.png', img) diff --git a/csec_mentor_matching/main.py b/csec_mentor_matching/main.py index 101cb8e..ae8ec03 100644 --- a/csec_mentor_matching/main.py +++ b/csec_mentor_matching/main.py @@ -13,6 +13,12 @@ import numpy as np from functools import reduce import sys +import z3 + +z3.set_param('parallel.enable', True) +z3.set_param('parallel.threads.max', 32) +z3.set_param('sat.local_search_threads', 4) +z3.set_param('sat.threads', 4) MENTEE_SPREADSHEET = '1byO5GRxBVTaDVXNR_JeykVWwA9LrirKhmjjn_8TIUXk' MENTOR_SPREADSHEET = '1SMl0cjfrLgU6D2iYtiQwdl1O5kHTdGdyDqWdysdwTWg' @@ -31,10 +37,13 @@ mentors = analysis.mentor_analysis((mentors, mentors_year)) mentees = analysis.mentee_analysis(mentees) +mentees_og = mentees print('+' * 80) analysis.detect_duplicates(mentors, mentees) +analysis.visualise_interests(mentees) + mentee_set1, mentee_set2 = analysis.arrange_mentees(mentees, len(mentors[0])) #mentors = list(mentors) @@ -74,9 +83,9 @@ multi = len(mentees[0]) costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4]) temp = matcher.match(multi, len(mentors[0]), costs) -temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) ) +temp1 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) ) -assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp)) +assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp1)) print('+' * 80) @@ -84,12 +93,17 @@ multi = len(mentors[0]) costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4]) temp = matcher.match(multi, len(mentors[0]), costs) -temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) ) +temp2 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) ) -assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp)) +assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp2)) assignment = pd.DataFrame(assignment) -print('+' * 80, file=sys.stderr) +mapping = temp1 + temp2 +mapping.sort() + +analysis.visualize_mapping(mapping, mentors, mentees_og) + +print('+' * 80) assignment.columns = ['Mentor Email', 'Mentor Name', 'Mentor Year', 'Mentee Email', 'Mentee Name', 'Mentee Year'] print("The assertion that Mentor Year >= Mentee Year: " + ("Failed" if np.all(assignment['Mentor Year'] >= assignment['Mentee Year'] ) else "Passed"))