95 lines
4.0 KiB
Python
Raw Normal View History

2025-02-03 23:54:12 +01:00
import csv
import json
from collections import defaultdict, deque
from tqdm import tqdm
def parse_students(file_path: str) -> dict:
"""
Parse the CSV file and return a dictionary with student preferences.
"""
with open(file_path, 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
data = {}
for row in reader:
data[row['Name'].strip()] = [int(row[f'P{i}']) for i in range(1, 6) if row[f'P{i}'].isdigit()]
return data
def parse_projects(file_path: str, team_size: int) -> dict:
"""
Parse the projects file and initialize project capacities.
"""
with open(file_path, 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
data = {}
for row in reader:
proj_id = int(row['proj ID'])
data[proj_id] = {
'name': row['Project name'].replace(".pdf", "").strip(),
'capacity': team_size,
'students': [] # Will store assigned students
}
return data
def stable_marriage(students: dict, projects: dict) -> dict:
"""
Gale-Shapley algorithm to assign students to projects fairly, ensuring all students are included.
"""
free_students = deque(students.keys()) # Students who haven't been assigned
student_next_choice = {student: 0 for student in students} # Track which project each student is trying next
student_assigned = {student: None for student in students} # Track which project each student is assigned to
while free_students:
student = free_students.popleft()
preferences = students[student]
next_choice_index = student_next_choice[student]
if next_choice_index < len(preferences):
proj_id = preferences[next_choice_index]
student_next_choice[student] += 1 # Move to the next project in the list
if proj_id in projects:
project = projects[proj_id]
# If the project has space, add the student
if len(project['students']) < project['capacity']:
project['students'].append(student)
student_assigned[student] = proj_id # Mark the student as assigned
else:
# If the project is full, find the least preferred student in the project list
worst_student = min(project['students'], key=lambda s: students[s].index(proj_id))
worst_student_index = project['students'].index(worst_student)
project['students'][worst_student_index] = student
student_assigned[worst_student] = None # The worst student is now free
free_students.append(worst_student) # The removed student is free again
student_assigned[student] = proj_id # Mark the current student as assigned
# Ensure students are only re-added if they are unassigned
if student_assigned[student] is None and student_next_choice[student] < len(preferences):
free_students.append(student)
return {proj_id: proj['students'] for proj_id, proj in projects.items()}
def save_teams_to_json(teams: dict, projects: dict, output_file: str):
"""
Save the team assignments to a JSON file.
"""
teams_with_names = {f"{proj_id}: {projects[proj_id]['name']}": students for proj_id, students in teams.items()}
with open(output_file, 'w', encoding='utf-8') as file:
json.dump(teams_with_names, file, indent=4, ensure_ascii=False)
def main():
student_file = 'data.csv'
project_file = 'projects.csv'
students = parse_students(student_file)
for team_size in range(3, 6):
projects = parse_projects(project_file, team_size)
print(f"Assigning teams with size {team_size}...")
teams = stable_marriage(students, projects)
save_teams_to_json(teams, projects, f'transformed/gsa_assigned_teams_{team_size}.json')
if __name__ == '__main__':
main()