Used API instead of scraping to find courses from year

This commit is contained in:
Boyan 2024-11-18 20:05:12 +01:00
parent da4705b56a
commit 52d9d86260

View File

@ -1,71 +1,70 @@
from bs4 import BeautifulSoup
from .course import Course
from .exceptions.course_unavailable import CourseUnavailable
from bs4 import BeautifulSoup
class Year:
"""
Represents an academic year.
"""
def __init__(self, session, year_path: str):
self.session = session
self.year_path = year_path # e.g., '2023-2024'
self.base_url = "https://themis.housing.rug.nl"
self.api_url = f"{self.base_url}/api/navigation/{self.year_path}"
def __init__(self, session, start_year: int, end_year: int):
self.start = start_year
self.year = end_year
self.url = f"https://themis.housing.rug.nl/course/{self.start}-{self.year}"
self._session = session
def all_courses(self) -> list:
"""
Gets all visible courses in this year.
"""
response = self.session.get(self.api_url)
if response.status_code != 200:
raise ConnectionError(f"Failed to retrieve courses for {self.year_path}.")
def all_courses(self, errors: bool = True) -> list[Course]:
"""
Gets all visible courses in a year.
"""
r = self._session.get(self.url)
soup = BeautifulSoup(r.text, "lxml")
lis = soup.find_all("li", class_="large")
courses_data = response.json()
courses = []
for li in lis:
try:
suffix = li.a["href"].replace(f"course/{self.start}-{self.year}", "")
course_url = self.url + suffix
course_name = li.a.text.strip()
courses.append(
Course(course_url, course_name, self._session, self)
)
except CourseUnavailable as exc:
if errors:
raise CourseUnavailable(
message=f"Course {li.a.text} in year {self.start}-{self.year} unavailable"
) from exc
print("Error with course", li.a.text)
continue
for course_info in courses_data:
if course_info.get("visible", False):
course_path = course_info["path"]
course_title = course_info["title"]
courses.append(Course(self.session, course_path, course_title, self))
return courses
def get_course(self, name: str) -> Course:
def get_course(self, course_title: str) -> Course:
"""
Gets a course by name.
Gets a course by its title.
"""
r = self._session.get(self.url)
soup = BeautifulSoup(r.text, "lxml")
course_link = soup.find("a", text=name)
if not course_link:
raise CourseUnavailable(f"No such course found: {name}")
suffix = course_link["href"].replace(f"course/{self.start}-{self.year}", "")
course_url = self.url + suffix
return Course(course_url, name, self._session, self)
all_courses = self.all_courses()
for course in all_courses:
if course.title == course_title:
return course
raise ValueError(f"Course '{course_title}' not found in year {self.year_path}.")
def get_course_by_url(self, url: str) -> Course:
"""
Gets a course by url.
"""
r = self._session.get(url)
soup = BeautifulSoup(r.text, "lxml")
# <a class="fill accent large" href="https://themis.housing.rug.nl/course/2023-2024/adinc-cs">Algorithms and Data Structures for CS</a>
course_link = soup.find_all("a", class_="fill accent large")
name = None
for link in course_link:
if url in link["href"]:
name = link.text
break
from bs4 import BeautifulSoup
if not name:
raise CourseUnavailable(f"No such course found: {url}")
return Course(url, name, self._session, self)
def get_course_by_tag(self, course_tag: str) -> Course:
"""
Gets a course by its tag (course identifier).
Constructs the course URL using the year and course tag.
"""
course_path = f"/{self.year_path}/{course_tag}"
course_url = f"{self.base_url}/course{course_path}"
response = self.session.get(course_url)
if response.status_code != 200:
raise ConnectionError(f"Failed to retrieve course with tag '{course_tag}' for year {self.year_path}. Tried {course_url}")
soup = BeautifulSoup(response.text, "lxml")
title_element = soup.find("h1")
if not title_element:
title_elements = soup.find_all("a", class_="fill accent large")
if title_elements:
title_element = title_elements[-1]
if title_element:
course_title = title_element.get_text(strip=True)
else:
raise ValueError(f"Could not retrieve course title for tag '{course_tag}' in year {self.year_path}.")
return Course(self.session, course_path, course_title, self)
def __str__(self):
return f"Year({self.year_path})"