From 52d9d86260a7c510dbe9139dfdf31f43b407caab Mon Sep 17 00:00:00 2001
From: Boyan <36108495+confestim@users.noreply.github.com>
Date: Mon, 18 Nov 2024 20:05:12 +0100
Subject: [PATCH] Used API instead of scraping to find courses from year

---
 temmies/year.py | 111 ++++++++++++++++++++++++------------------------
 1 file changed, 55 insertions(+), 56 deletions(-)

diff --git a/temmies/year.py b/temmies/year.py
index 9f7a254..f134c0d 100644
--- a/temmies/year.py
+++ b/temmies/year.py
@@ -1,71 +1,70 @@
-from bs4 import BeautifulSoup
 from .course import Course
-from .exceptions.course_unavailable import CourseUnavailable
-
+from bs4 import BeautifulSoup
 class Year:
     """
     Represents an academic year.
     """
+    def __init__(self, session, year_path: str):
+        self.session = session
+        self.year_path = year_path  # e.g., '2023-2024'
+        self.base_url = "https://themis.housing.rug.nl"
+        self.api_url = f"{self.base_url}/api/navigation/{self.year_path}"
 
-    def __init__(self, session, start_year: int, end_year: int):
-        self.start = start_year
-        self.year = end_year
-        self.url = f"https://themis.housing.rug.nl/course/{self.start}-{self.year}"
-        self._session = session
+    def all_courses(self) -> list:
+        """
+        Gets all visible courses in this year.
+        """
+        response = self.session.get(self.api_url)
+        if response.status_code != 200:
+            raise ConnectionError(f"Failed to retrieve courses for {self.year_path}.")
 
-    def all_courses(self, errors: bool = True) -> list[Course]:
-        """
-        Gets all visible courses in a year.
-        """
-        r = self._session.get(self.url)
-        soup = BeautifulSoup(r.text, "lxml")
-        lis = soup.find_all("li", class_="large")
+        courses_data = response.json()
         courses = []
-        for li in lis:
-            try:
-                suffix = li.a["href"].replace(f"course/{self.start}-{self.year}", "")
-                course_url = self.url + suffix
-                course_name = li.a.text.strip()
-                courses.append(
-                    Course(course_url, course_name, self._session, self)
-                )
-            except CourseUnavailable as exc:
-                if errors:
-                    raise CourseUnavailable(
-                        message=f"Course {li.a.text} in year {self.start}-{self.year} unavailable"
-                    ) from exc
-                print("Error with course", li.a.text)
-                continue
+        for course_info in courses_data:
+            if course_info.get("visible", False):
+                course_path = course_info["path"]
+                course_title = course_info["title"]
+                courses.append(Course(self.session, course_path, course_title, self))
         return courses
 
-    def get_course(self, name: str) -> Course:
+    def get_course(self, course_title: str) -> Course:
         """
-        Gets a course by name.
+        Gets a course by its title.
         """
-        r = self._session.get(self.url)
-        soup = BeautifulSoup(r.text, "lxml")
-        course_link = soup.find("a", text=name)
-        if not course_link:
-            raise CourseUnavailable(f"No such course found: {name}")
-        suffix = course_link["href"].replace(f"course/{self.start}-{self.year}", "")
-        course_url = self.url + suffix
-        return Course(course_url, name, self._session, self)
+        all_courses = self.all_courses()
+        for course in all_courses:
+            if course.title == course_title:
+                return course
+        raise ValueError(f"Course '{course_title}' not found in year {self.year_path}.")
 
-    def get_course_by_url(self, url: str) -> Course:
+    from bs4 import BeautifulSoup
+
+    def get_course_by_tag(self, course_tag: str) -> Course:
         """
-        Gets a course by url.
+        Gets a course by its tag (course identifier).
+        Constructs the course URL using the year and course tag.
         """
-        r = self._session.get(url)
-        soup = BeautifulSoup(r.text, "lxml")
-        # <a class="fill accent large" href="https://themis.housing.rug.nl/course/2023-2024/adinc-cs">Algorithms and Data Structures for CS</a>
-        course_link = soup.find_all("a", class_="fill accent large")
-        name = None
-        for link in course_link:
-            if url in link["href"]:
-                name = link.text
-                break
-        
-        if not name:
-            raise CourseUnavailable(f"No such course found: {url}")
-        return Course(url, name, self._session, self)
-        
\ No newline at end of file
+        course_path = f"/{self.year_path}/{course_tag}"
+        course_url = f"{self.base_url}/course{course_path}"
+
+        response = self.session.get(course_url)
+        if response.status_code != 200:
+            raise ConnectionError(f"Failed to retrieve course with tag '{course_tag}' for year {self.year_path}. Tried {course_url}")
+
+        soup = BeautifulSoup(response.text, "lxml")
+
+        title_element = soup.find("h1")
+        if not title_element:
+            title_elements = soup.find_all("a", class_="fill accent large")
+            if title_elements:
+                title_element = title_elements[-1]
+
+        if title_element:
+            course_title = title_element.get_text(strip=True)
+        else:
+            raise ValueError(f"Could not retrieve course title for tag '{course_tag}' in year {self.year_path}.")
+
+        return Course(self.session, course_path, course_title, self)
+
+    def __str__(self):
+        return f"Year({self.year_path})"