95 lines
4.0 KiB
Python
95 lines
4.0 KiB
Python
import csv
|
|
import json
|
|
from collections import defaultdict, deque
|
|
from tqdm import tqdm
|
|
|
|
def parse_students(file_path: str) -> dict:
|
|
"""
|
|
Parse the CSV file and return a dictionary with student preferences.
|
|
"""
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
reader = csv.DictReader(file)
|
|
data = {}
|
|
for row in reader:
|
|
data[row['Name'].strip()] = [int(row[f'P{i}']) for i in range(1, 6) if row[f'P{i}'].isdigit()]
|
|
return data
|
|
|
|
def parse_projects(file_path: str, team_size: int) -> dict:
|
|
"""
|
|
Parse the projects file and initialize project capacities.
|
|
"""
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
reader = csv.DictReader(file)
|
|
data = {}
|
|
for row in reader:
|
|
proj_id = int(row['proj ID'])
|
|
data[proj_id] = {
|
|
'name': row['Project name'].replace(".pdf", "").strip(),
|
|
'capacity': team_size,
|
|
'students': [] # Will store assigned students
|
|
}
|
|
return data
|
|
|
|
def stable_marriage(students: dict, projects: dict) -> dict:
|
|
"""
|
|
Gale-Shapley algorithm to assign students to projects fairly, ensuring all students are included.
|
|
"""
|
|
free_students = deque(students.keys()) # Students who haven't been assigned
|
|
student_next_choice = {student: 0 for student in students} # Track which project each student is trying next
|
|
student_assigned = {student: None for student in students} # Track which project each student is assigned to
|
|
|
|
while free_students:
|
|
student = free_students.popleft()
|
|
preferences = students[student]
|
|
next_choice_index = student_next_choice[student]
|
|
|
|
if next_choice_index < len(preferences):
|
|
proj_id = preferences[next_choice_index]
|
|
student_next_choice[student] += 1 # Move to the next project in the list
|
|
|
|
if proj_id in projects:
|
|
project = projects[proj_id]
|
|
|
|
# If the project has space, add the student
|
|
if len(project['students']) < project['capacity']:
|
|
project['students'].append(student)
|
|
student_assigned[student] = proj_id # Mark the student as assigned
|
|
else:
|
|
# If the project is full, find the least preferred student in the project list
|
|
worst_student = min(project['students'], key=lambda s: students[s].index(proj_id))
|
|
worst_student_index = project['students'].index(worst_student)
|
|
project['students'][worst_student_index] = student
|
|
student_assigned[worst_student] = None # The worst student is now free
|
|
free_students.append(worst_student) # The removed student is free again
|
|
student_assigned[student] = proj_id # Mark the current student as assigned
|
|
|
|
# Ensure students are only re-added if they are unassigned
|
|
if student_assigned[student] is None and student_next_choice[student] < len(preferences):
|
|
free_students.append(student)
|
|
|
|
return {proj_id: proj['students'] for proj_id, proj in projects.items()}
|
|
|
|
def save_teams_to_json(teams: dict, projects: dict, output_file: str):
|
|
"""
|
|
Save the team assignments to a JSON file.
|
|
"""
|
|
teams_with_names = {f"{proj_id}: {projects[proj_id]['name']}": students for proj_id, students in teams.items()}
|
|
with open(output_file, 'w', encoding='utf-8') as file:
|
|
json.dump(teams_with_names, file, indent=4, ensure_ascii=False)
|
|
|
|
def main():
|
|
student_file = 'data.csv'
|
|
project_file = 'projects.csv'
|
|
|
|
students = parse_students(student_file)
|
|
|
|
for team_size in range(3, 6):
|
|
projects = parse_projects(project_file, team_size)
|
|
print(f"Assigning teams with size {team_size}...")
|
|
teams = stable_marriage(students, projects)
|
|
save_teams_to_json(teams, projects, f'transformed/gsa_assigned_teams_{team_size}.json')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|