mirror of
https://github.com/Code-For-Groningen/temmies.git
synced 2025-03-15 07:10:15 +01:00
Used API instead of scraping to find courses from year
This commit is contained in:
parent
da4705b56a
commit
52d9d86260
111
temmies/year.py
111
temmies/year.py
@ -1,71 +1,70 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from .course import Course
|
||||
from .exceptions.course_unavailable import CourseUnavailable
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
class Year:
|
||||
"""
|
||||
Represents an academic year.
|
||||
"""
|
||||
def __init__(self, session, year_path: str):
|
||||
self.session = session
|
||||
self.year_path = year_path # e.g., '2023-2024'
|
||||
self.base_url = "https://themis.housing.rug.nl"
|
||||
self.api_url = f"{self.base_url}/api/navigation/{self.year_path}"
|
||||
|
||||
def __init__(self, session, start_year: int, end_year: int):
|
||||
self.start = start_year
|
||||
self.year = end_year
|
||||
self.url = f"https://themis.housing.rug.nl/course/{self.start}-{self.year}"
|
||||
self._session = session
|
||||
def all_courses(self) -> list:
|
||||
"""
|
||||
Gets all visible courses in this year.
|
||||
"""
|
||||
response = self.session.get(self.api_url)
|
||||
if response.status_code != 200:
|
||||
raise ConnectionError(f"Failed to retrieve courses for {self.year_path}.")
|
||||
|
||||
def all_courses(self, errors: bool = True) -> list[Course]:
|
||||
"""
|
||||
Gets all visible courses in a year.
|
||||
"""
|
||||
r = self._session.get(self.url)
|
||||
soup = BeautifulSoup(r.text, "lxml")
|
||||
lis = soup.find_all("li", class_="large")
|
||||
courses_data = response.json()
|
||||
courses = []
|
||||
for li in lis:
|
||||
try:
|
||||
suffix = li.a["href"].replace(f"course/{self.start}-{self.year}", "")
|
||||
course_url = self.url + suffix
|
||||
course_name = li.a.text.strip()
|
||||
courses.append(
|
||||
Course(course_url, course_name, self._session, self)
|
||||
)
|
||||
except CourseUnavailable as exc:
|
||||
if errors:
|
||||
raise CourseUnavailable(
|
||||
message=f"Course {li.a.text} in year {self.start}-{self.year} unavailable"
|
||||
) from exc
|
||||
print("Error with course", li.a.text)
|
||||
continue
|
||||
for course_info in courses_data:
|
||||
if course_info.get("visible", False):
|
||||
course_path = course_info["path"]
|
||||
course_title = course_info["title"]
|
||||
courses.append(Course(self.session, course_path, course_title, self))
|
||||
return courses
|
||||
|
||||
def get_course(self, name: str) -> Course:
|
||||
def get_course(self, course_title: str) -> Course:
|
||||
"""
|
||||
Gets a course by name.
|
||||
Gets a course by its title.
|
||||
"""
|
||||
r = self._session.get(self.url)
|
||||
soup = BeautifulSoup(r.text, "lxml")
|
||||
course_link = soup.find("a", text=name)
|
||||
if not course_link:
|
||||
raise CourseUnavailable(f"No such course found: {name}")
|
||||
suffix = course_link["href"].replace(f"course/{self.start}-{self.year}", "")
|
||||
course_url = self.url + suffix
|
||||
return Course(course_url, name, self._session, self)
|
||||
all_courses = self.all_courses()
|
||||
for course in all_courses:
|
||||
if course.title == course_title:
|
||||
return course
|
||||
raise ValueError(f"Course '{course_title}' not found in year {self.year_path}.")
|
||||
|
||||
def get_course_by_url(self, url: str) -> Course:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def get_course_by_tag(self, course_tag: str) -> Course:
|
||||
"""
|
||||
Gets a course by url.
|
||||
Gets a course by its tag (course identifier).
|
||||
Constructs the course URL using the year and course tag.
|
||||
"""
|
||||
r = self._session.get(url)
|
||||
soup = BeautifulSoup(r.text, "lxml")
|
||||
# <a class="fill accent large" href="https://themis.housing.rug.nl/course/2023-2024/adinc-cs">Algorithms and Data Structures for CS</a>
|
||||
course_link = soup.find_all("a", class_="fill accent large")
|
||||
name = None
|
||||
for link in course_link:
|
||||
if url in link["href"]:
|
||||
name = link.text
|
||||
break
|
||||
|
||||
if not name:
|
||||
raise CourseUnavailable(f"No such course found: {url}")
|
||||
return Course(url, name, self._session, self)
|
||||
|
||||
course_path = f"/{self.year_path}/{course_tag}"
|
||||
course_url = f"{self.base_url}/course{course_path}"
|
||||
|
||||
response = self.session.get(course_url)
|
||||
if response.status_code != 200:
|
||||
raise ConnectionError(f"Failed to retrieve course with tag '{course_tag}' for year {self.year_path}. Tried {course_url}")
|
||||
|
||||
soup = BeautifulSoup(response.text, "lxml")
|
||||
|
||||
title_element = soup.find("h1")
|
||||
if not title_element:
|
||||
title_elements = soup.find_all("a", class_="fill accent large")
|
||||
if title_elements:
|
||||
title_element = title_elements[-1]
|
||||
|
||||
if title_element:
|
||||
course_title = title_element.get_text(strip=True)
|
||||
else:
|
||||
raise ValueError(f"Could not retrieve course title for tag '{course_tag}' in year {self.year_path}.")
|
||||
|
||||
return Course(self.session, course_path, course_title, self)
|
||||
|
||||
def __str__(self):
|
||||
return f"Year({self.year_path})"
|
||||
|
Loading…
x
Reference in New Issue
Block a user