statistical_analyses/probable_teams/greedy.py

import csv
import json
import random
from collections import defaultdict

def parse_students(file_path: str) -> dict:
    """
    Parse the CSV file and return a dictionary with student preferences.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        data = {}
        for row in reader:
            # Convert project IDs to integers if they exist, otherwise use an empty list
            data[row['Name'].strip()] = [int(row[f'P{i}']) for i in range(1, 6) if row[f'P{i}'].isdigit()]
    return data

def parse_projects(file_path: str) -> dict:
    # Project name, proj ID, chosen by:
    with open(file_path, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        data = {}
        for row in reader:
            # Project id to project name
            data[int(row['proj ID'])] = row['Project name'].replace(".pdf", "").strip()
    return data

def assign_teams(data: dict, num_projects: int, team_size: int) -> dict:
    """
    Greedily assign students to teams based on their preferences using a heuristic approach.
    """
    project_capacity = defaultdict(lambda: team_size)
    teams = defaultdict(list)
    unassigned_students = []
    
    for student, preferences in data.items():
        assigned = False
        for project in preferences:
            if project_capacity[project] > 0:
                teams[project].append(student)
                project_capacity[project] -= 1
                assigned = True
                break
        if not assigned:
            unassigned_students.append(student)
    
    available_projects = [p for p, slots in project_capacity.items() if slots > 0]
    for student in unassigned_students:
        if available_projects:
            project = random.choice(available_projects)
            teams[project].append(student)
            project_capacity[project] -= 1
            if project_capacity[project] == 0:
                available_projects.remove(project)
    
    return teams

def save_teams_to_json(teams: dict, output_file: str):
    """
    Save the team assignments to a JSON file.
    """
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(teams, file, indent=4, ensure_ascii=False)

def replace_id_with_name(teams: dict, project_data: dict) -> dict:
    """
    Replace project IDs with project names in the team assignments.
    """
    teams_with_names = {}
    for project_id, students in teams.items():
        project_name = f"{project_id}: " + project_data.get(project_id, 'Unknown Project')
        teams_with_names[project_name] = students
    return teams_with_names

def pos(look_for: str, data: dict, num_projects: int, team_size: int) -> dict:
    """
    Track the percentage of times a student gets assigned to each project for team sizes 4-7.
    """
    project_counts = defaultdict(int)
    total_iterations = 10000  # Total number of iterations
    
    for i in range(total_iterations):
        teams = assign_teams(data, num_projects, team_size)
        for project_id, students in teams.items():
            if look_for in students:
                project_counts[project_id] += 1
    
    # Calculate the percentage of assignments for each project
    project_percentages = {project_id: (count / total_iterations) * 100
                           for project_id, count in project_counts.items()}
    
    return project_percentages

def main():
    file_path = 'data.csv'
    num_projects = 24
    student_name = input("Student name: ")
    
    # Parse the student preferences
    data = parse_students(file_path)
    
    best_project = None
    team_sizes = range(3, 100)
    project_average_percentages = defaultdict(float)
    
    for team_size in team_sizes:
        project_percentages = pos(student_name, data, num_projects, team_size)
    
        for project_id, percentage in project_percentages.items():
            try:
                project_average_percentages[project_id] += percentage
            except KeyError:
                project_average_percentages[project_id] = percentage
                
        num_team_sizes = len(team_sizes)
        project_average_percentages = {project_id: percentage / num_team_sizes
                                    for project_id, percentage in project_average_percentages.items()}
        
    best_project = max(project_average_percentages, key=project_average_percentages.get)
    highest_average_percentage = project_average_percentages[best_project]
    
    project_name = parse_projects("projects.csv").get(best_project, 'Unknown Project')
    print(f"{best_project}: {project_name} with a {highest_average_percentage:.2f}% chance of assignment")
    print(f"Where the team size is {team_size}")
if __name__ == '__main__':
    main()
Will follow up soon, this is wrong 2025-02-03 23:54:12 +01:00			`import csv`
			`import json`
			`import random`
			`from collections import defaultdict`

			`def parse_students(file_path: str) -> dict:`
			`"""`
			`Parse the CSV file and return a dictionary with student preferences.`
			`"""`
			`with open(file_path, 'r', encoding='utf-8') as file:`
			`reader = csv.DictReader(file)`
			`data = {}`
			`for row in reader:`
			`# Convert project IDs to integers if they exist, otherwise use an empty list`
			`data[row['Name'].strip()] = [int(row[f'P{i}']) for i in range(1, 6) if row[f'P{i}'].isdigit()]`
			`return data`

			`def parse_projects(file_path: str) -> dict:`
			`# Project name, proj ID, chosen by:`
			`with open(file_path, 'r', encoding='utf-8') as file:`
			`reader = csv.DictReader(file)`
			`data = {}`
			`for row in reader:`
			`# Project id to project name`
			`data[int(row['proj ID'])] = row['Project name'].replace(".pdf", "").strip()`
			`return data`

			`def assign_teams(data: dict, num_projects: int, team_size: int) -> dict:`
			`"""`
			`Greedily assign students to teams based on their preferences using a heuristic approach.`
			`"""`
			`project_capacity = defaultdict(lambda: team_size)`
			`teams = defaultdict(list)`
			`unassigned_students = []`

			`for student, preferences in data.items():`
			`assigned = False`
			`for project in preferences:`
			`if project_capacity[project] > 0:`
			`teams[project].append(student)`
			`project_capacity[project] -= 1`
			`assigned = True`
			`break`
			`if not assigned:`
			`unassigned_students.append(student)`

			`available_projects = [p for p, slots in project_capacity.items() if slots > 0]`
			`for student in unassigned_students:`
			`if available_projects:`
			`project = random.choice(available_projects)`
			`teams[project].append(student)`
			`project_capacity[project] -= 1`
			`if project_capacity[project] == 0:`
			`available_projects.remove(project)`

			`return teams`

			`def save_teams_to_json(teams: dict, output_file: str):`
			`"""`
			`Save the team assignments to a JSON file.`
			`"""`
			`with open(output_file, 'w', encoding='utf-8') as file:`
			`json.dump(teams, file, indent=4, ensure_ascii=False)`

			`def replace_id_with_name(teams: dict, project_data: dict) -> dict:`
			`"""`
			`Replace project IDs with project names in the team assignments.`
			`"""`
			`teams_with_names = {}`
			`for project_id, students in teams.items():`
			`project_name = f"{project_id}: " + project_data.get(project_id, 'Unknown Project')`
			`teams_with_names[project_name] = students`
			`return teams_with_names`

			`def pos(look_for: str, data: dict, num_projects: int, team_size: int) -> dict:`
			`"""`
			`Track the percentage of times a student gets assigned to each project for team sizes 4-7.`
			`"""`
			`project_counts = defaultdict(int)`
			`total_iterations = 10000 # Total number of iterations`

			`for i in range(total_iterations):`
			`teams = assign_teams(data, num_projects, team_size)`
			`for project_id, students in teams.items():`
			`if look_for in students:`
			`project_counts[project_id] += 1`

			`# Calculate the percentage of assignments for each project`
			`project_percentages = {project_id: (count / total_iterations) * 100`
			`for project_id, count in project_counts.items()}`

			`return project_percentages`

			`def main():`
			`file_path = 'data.csv'`
			`num_projects = 24`
			`student_name = input("Student name: ")`

			`# Parse the student preferences`
			`data = parse_students(file_path)`

			`best_project = None`
			`team_sizes = range(3, 100)`
			`project_average_percentages = defaultdict(float)`

			`for team_size in team_sizes:`
			`project_percentages = pos(student_name, data, num_projects, team_size)`

			`for project_id, percentage in project_percentages.items():`
			`try:`
			`project_average_percentages[project_id] += percentage`
			`except KeyError:`
			`project_average_percentages[project_id] = percentage`

			`num_team_sizes = len(team_sizes)`
			`project_average_percentages = {project_id: percentage / num_team_sizes`
			`for project_id, percentage in project_average_percentages.items()}`

			`best_project = max(project_average_percentages, key=project_average_percentages.get)`
			`highest_average_percentage = project_average_percentages[best_project]`

			`project_name = parse_projects("projects.csv").get(best_project, 'Unknown Project')`
			`print(f"{best_project}: {project_name} with a {highest_average_percentage:.2f}% chance of assignment")`
			`print(f"Where the team size is {team_size}")`
			`if __name__ == '__main__':`
			`main()`