From 52d9d86260a7c510dbe9139dfdf31f43b407caab Mon Sep 17 00:00:00 2001 From: Boyan <36108495+confestim@users.noreply.github.com> Date: Mon, 18 Nov 2024 20:05:12 +0100 Subject: [PATCH] Used API instead of scraping to find courses from year --- temmies/year.py | 111 ++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 56 deletions(-) diff --git a/temmies/year.py b/temmies/year.py index 9f7a254..f134c0d 100644 --- a/temmies/year.py +++ b/temmies/year.py @@ -1,71 +1,70 @@ -from bs4 import BeautifulSoup from .course import Course -from .exceptions.course_unavailable import CourseUnavailable - +from bs4 import BeautifulSoup class Year: """ Represents an academic year. """ + def __init__(self, session, year_path: str): + self.session = session + self.year_path = year_path # e.g., '2023-2024' + self.base_url = "https://themis.housing.rug.nl" + self.api_url = f"{self.base_url}/api/navigation/{self.year_path}" - def __init__(self, session, start_year: int, end_year: int): - self.start = start_year - self.year = end_year - self.url = f"https://themis.housing.rug.nl/course/{self.start}-{self.year}" - self._session = session + def all_courses(self) -> list: + """ + Gets all visible courses in this year. + """ + response = self.session.get(self.api_url) + if response.status_code != 200: + raise ConnectionError(f"Failed to retrieve courses for {self.year_path}.") - def all_courses(self, errors: bool = True) -> list[Course]: - """ - Gets all visible courses in a year. - """ - r = self._session.get(self.url) - soup = BeautifulSoup(r.text, "lxml") - lis = soup.find_all("li", class_="large") + courses_data = response.json() courses = [] - for li in lis: - try: - suffix = li.a["href"].replace(f"course/{self.start}-{self.year}", "") - course_url = self.url + suffix - course_name = li.a.text.strip() - courses.append( - Course(course_url, course_name, self._session, self) - ) - except CourseUnavailable as exc: - if errors: - raise CourseUnavailable( - message=f"Course {li.a.text} in year {self.start}-{self.year} unavailable" - ) from exc - print("Error with course", li.a.text) - continue + for course_info in courses_data: + if course_info.get("visible", False): + course_path = course_info["path"] + course_title = course_info["title"] + courses.append(Course(self.session, course_path, course_title, self)) return courses - def get_course(self, name: str) -> Course: + def get_course(self, course_title: str) -> Course: """ - Gets a course by name. + Gets a course by its title. """ - r = self._session.get(self.url) - soup = BeautifulSoup(r.text, "lxml") - course_link = soup.find("a", text=name) - if not course_link: - raise CourseUnavailable(f"No such course found: {name}") - suffix = course_link["href"].replace(f"course/{self.start}-{self.year}", "") - course_url = self.url + suffix - return Course(course_url, name, self._session, self) + all_courses = self.all_courses() + for course in all_courses: + if course.title == course_title: + return course + raise ValueError(f"Course '{course_title}' not found in year {self.year_path}.") - def get_course_by_url(self, url: str) -> Course: + from bs4 import BeautifulSoup + + def get_course_by_tag(self, course_tag: str) -> Course: """ - Gets a course by url. + Gets a course by its tag (course identifier). + Constructs the course URL using the year and course tag. """ - r = self._session.get(url) - soup = BeautifulSoup(r.text, "lxml") - # Algorithms and Data Structures for CS - course_link = soup.find_all("a", class_="fill accent large") - name = None - for link in course_link: - if url in link["href"]: - name = link.text - break - - if not name: - raise CourseUnavailable(f"No such course found: {url}") - return Course(url, name, self._session, self) - \ No newline at end of file + course_path = f"/{self.year_path}/{course_tag}" + course_url = f"{self.base_url}/course{course_path}" + + response = self.session.get(course_url) + if response.status_code != 200: + raise ConnectionError(f"Failed to retrieve course with tag '{course_tag}' for year {self.year_path}. Tried {course_url}") + + soup = BeautifulSoup(response.text, "lxml") + + title_element = soup.find("h1") + if not title_element: + title_elements = soup.find_all("a", class_="fill accent large") + if title_elements: + title_element = title_elements[-1] + + if title_element: + course_title = title_element.get_text(strip=True) + else: + raise ValueError(f"Could not retrieve course title for tag '{course_tag}' in year {self.year_path}.") + + return Course(self.session, course_path, course_title, self) + + def __str__(self): + return f"Year({self.year_path})"