statistical_analyses/probable_teams/greedy.py

import csv
import json
import random
from collections import defaultdict

def parse_students(file_path: str) -> dict:
    """
    Parse the CSV file and return a dictionary with student preferences.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        data = {}
        for row in reader:
            # Convert project IDs to integers if they exist, otherwise use an empty list
            data[row['Name'].strip()] = [int(row[f'P{i}']) for i in range(1, 6) if row[f'P{i}'].isdigit()]
    return data

def parse_projects(file_path: str) -> dict:
    # Project name, proj ID, chosen by:
    with open(file_path, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        data = {}
        for row in reader:
            # Project id to project name
            data[int(row['proj ID'])] = row['Project name'].replace(".pdf", "").strip()
    return data

def assign_teams(data: dict, num_projects: int, team_size: int) -> dict:
    """
    Greedily assign students to teams based on their preferences using a heuristic approach.
    """
    project_capacity = defaultdict(lambda: team_size)
    teams = defaultdict(list)
    unassigned_students = []

    for student, preferences in data.items():
        assigned = False
        for project in preferences:
            if project_capacity[project] > 0:
                teams[project].append(student)
                project_capacity[project] -= 1
                assigned = True
                break
        if not assigned:
            unassigned_students.append(student)

    available_projects = [p for p, slots in project_capacity.items() if slots > 0]
    for student in unassigned_students:
        if available_projects:
            project = random.choice(available_projects)
            teams[project].append(student)
            project_capacity[project] -= 1
            if project_capacity[project] == 0:
                available_projects.remove(project)

    return teams

def save_teams_to_json(teams: dict, output_file: str):
    """
    Save the team assignments to a JSON file.
    """
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(teams, file, indent=4, ensure_ascii=False)

def replace_id_with_name(teams: dict, project_data: dict) -> dict:
    """
    Replace project IDs with project names in the team assignments.
    """
    teams_with_names = {}
    for project_id, students in teams.items():
        project_name = f"{project_id}: " + project_data.get(project_id, 'Unknown Project')
        teams_with_names[project_name] = students
    return teams_with_names

def pos(look_for: str, data: dict, num_projects: int, team_size: int) -> dict:
    """
    Track the percentage of times a student gets assigned to each project for team sizes 4-7.
    """
    project_counts = defaultdict(int)
    total_iterations = 10000  # Total number of iterations

    for i in range(total_iterations):
        teams = assign_teams(data, num_projects, team_size)
        for project_id, students in teams.items():
            if look_for in students:
                project_counts[project_id] += 1

    # Calculate the percentage of assignments for each project
    project_percentages = {project_id: (count / total_iterations) * 100
                           for project_id, count in project_counts.items()}

    return project_percentages

def main():
    file_path = 'data.csv'
    num_projects = 24
    student_name = input("Student name: ")

    # Parse the student preferences
    data = parse_students(file_path)

    best_project = None
    team_sizes = range(3, 100)
    project_average_percentages = defaultdict(float)

    for team_size in team_sizes:
        project_percentages = pos(student_name, data, num_projects, team_size)

        for project_id, percentage in project_percentages.items():
            try:
                project_average_percentages[project_id] += percentage
            except KeyError:
                project_average_percentages[project_id] = percentage

        num_team_sizes = len(team_sizes)
        project_average_percentages = {project_id: percentage / num_team_sizes
                                    for project_id, percentage in project_average_percentages.items()}

    best_project = max(project_average_percentages, key=project_average_percentages.get)
    highest_average_percentage = project_average_percentages[best_project]

    project_name = parse_projects("projects.csv").get(best_project, 'Unknown Project')
    print(f"{best_project}: {project_name} with a {highest_average_percentage:.2f}% chance of assignment")
    print(f"Where the team size is {team_size}")
if __name__ == '__main__':
    main()