import csv import json from collections import defaultdict, deque from tqdm import tqdm def parse_students(file_path: str) -> dict: """ Parse the CSV file and return a dictionary with student preferences. """ with open(file_path, 'r', encoding='utf-8') as file: reader = csv.DictReader(file) data = {} for row in reader: data[row['Name'].strip()] = [int(row[f'P{i}']) for i in range(1, 6) if row[f'P{i}'].isdigit()] return data def parse_projects(file_path: str, team_size: int) -> dict: """ Parse the projects file and initialize project capacities. """ with open(file_path, 'r', encoding='utf-8') as file: reader = csv.DictReader(file) data = {} for row in reader: proj_id = int(row['proj ID']) data[proj_id] = { 'name': row['Project name'].replace(".pdf", "").strip(), 'capacity': team_size, 'students': [] # Will store assigned students } return data def stable_marriage(students: dict, projects: dict) -> dict: """ Gale-Shapley algorithm to assign students to projects fairly, ensuring all students are included. """ free_students = deque(students.keys()) # Students who haven't been assigned student_next_choice = {student: 0 for student in students} # Track which project each student is trying next student_assigned = {student: None for student in students} # Track which project each student is assigned to while free_students: student = free_students.popleft() preferences = students[student] next_choice_index = student_next_choice[student] if next_choice_index < len(preferences): proj_id = preferences[next_choice_index] student_next_choice[student] += 1 # Move to the next project in the list if proj_id in projects: project = projects[proj_id] # If the project has space, add the student if len(project['students']) < project['capacity']: project['students'].append(student) student_assigned[student] = proj_id # Mark the student as assigned else: # If the project is full, find the least preferred student in the project list worst_student = min(project['students'], key=lambda s: students[s].index(proj_id)) worst_student_index = project['students'].index(worst_student) project['students'][worst_student_index] = student student_assigned[worst_student] = None # The worst student is now free free_students.append(worst_student) # The removed student is free again student_assigned[student] = proj_id # Mark the current student as assigned # Ensure students are only re-added if they are unassigned if student_assigned[student] is None and student_next_choice[student] < len(preferences): free_students.append(student) return {proj_id: proj['students'] for proj_id, proj in projects.items()} def save_teams_to_json(teams: dict, projects: dict, output_file: str): """ Save the team assignments to a JSON file. """ teams_with_names = {f"{proj_id}: {projects[proj_id]['name']}": students for proj_id, students in teams.items()} with open(output_file, 'w', encoding='utf-8') as file: json.dump(teams_with_names, file, indent=4, ensure_ascii=False) def main(): student_file = 'data.csv' project_file = 'projects.csv' students = parse_students(student_file) for team_size in range(3, 6): projects = parse_projects(project_file, team_size) print(f"Assigning teams with size {team_size}...") teams = stable_marriage(students, projects) save_teams_to_json(teams, projects, f'transformed/gsa_assigned_teams_{team_size}.json') if __name__ == '__main__': main()