mirror of
https://github.com/Code-For-Groningen/temmies.git
synced 2025-03-15 07:10:15 +01:00
Used API instead of scraping to find courses from year
This commit is contained in:
parent
da4705b56a
commit
52d9d86260
111
temmies/year.py
111
temmies/year.py
@ -1,71 +1,70 @@
|
|||||||
from bs4 import BeautifulSoup
|
|
||||||
from .course import Course
|
from .course import Course
|
||||||
from .exceptions.course_unavailable import CourseUnavailable
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
class Year:
|
class Year:
|
||||||
"""
|
"""
|
||||||
Represents an academic year.
|
Represents an academic year.
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, session, year_path: str):
|
||||||
|
self.session = session
|
||||||
|
self.year_path = year_path # e.g., '2023-2024'
|
||||||
|
self.base_url = "https://themis.housing.rug.nl"
|
||||||
|
self.api_url = f"{self.base_url}/api/navigation/{self.year_path}"
|
||||||
|
|
||||||
def __init__(self, session, start_year: int, end_year: int):
|
def all_courses(self) -> list:
|
||||||
self.start = start_year
|
"""
|
||||||
self.year = end_year
|
Gets all visible courses in this year.
|
||||||
self.url = f"https://themis.housing.rug.nl/course/{self.start}-{self.year}"
|
"""
|
||||||
self._session = session
|
response = self.session.get(self.api_url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise ConnectionError(f"Failed to retrieve courses for {self.year_path}.")
|
||||||
|
|
||||||
def all_courses(self, errors: bool = True) -> list[Course]:
|
courses_data = response.json()
|
||||||
"""
|
|
||||||
Gets all visible courses in a year.
|
|
||||||
"""
|
|
||||||
r = self._session.get(self.url)
|
|
||||||
soup = BeautifulSoup(r.text, "lxml")
|
|
||||||
lis = soup.find_all("li", class_="large")
|
|
||||||
courses = []
|
courses = []
|
||||||
for li in lis:
|
for course_info in courses_data:
|
||||||
try:
|
if course_info.get("visible", False):
|
||||||
suffix = li.a["href"].replace(f"course/{self.start}-{self.year}", "")
|
course_path = course_info["path"]
|
||||||
course_url = self.url + suffix
|
course_title = course_info["title"]
|
||||||
course_name = li.a.text.strip()
|
courses.append(Course(self.session, course_path, course_title, self))
|
||||||
courses.append(
|
|
||||||
Course(course_url, course_name, self._session, self)
|
|
||||||
)
|
|
||||||
except CourseUnavailable as exc:
|
|
||||||
if errors:
|
|
||||||
raise CourseUnavailable(
|
|
||||||
message=f"Course {li.a.text} in year {self.start}-{self.year} unavailable"
|
|
||||||
) from exc
|
|
||||||
print("Error with course", li.a.text)
|
|
||||||
continue
|
|
||||||
return courses
|
return courses
|
||||||
|
|
||||||
def get_course(self, name: str) -> Course:
|
def get_course(self, course_title: str) -> Course:
|
||||||
"""
|
"""
|
||||||
Gets a course by name.
|
Gets a course by its title.
|
||||||
"""
|
"""
|
||||||
r = self._session.get(self.url)
|
all_courses = self.all_courses()
|
||||||
soup = BeautifulSoup(r.text, "lxml")
|
for course in all_courses:
|
||||||
course_link = soup.find("a", text=name)
|
if course.title == course_title:
|
||||||
if not course_link:
|
return course
|
||||||
raise CourseUnavailable(f"No such course found: {name}")
|
raise ValueError(f"Course '{course_title}' not found in year {self.year_path}.")
|
||||||
suffix = course_link["href"].replace(f"course/{self.start}-{self.year}", "")
|
|
||||||
course_url = self.url + suffix
|
|
||||||
return Course(course_url, name, self._session, self)
|
|
||||||
|
|
||||||
def get_course_by_url(self, url: str) -> Course:
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def get_course_by_tag(self, course_tag: str) -> Course:
|
||||||
"""
|
"""
|
||||||
Gets a course by url.
|
Gets a course by its tag (course identifier).
|
||||||
|
Constructs the course URL using the year and course tag.
|
||||||
"""
|
"""
|
||||||
r = self._session.get(url)
|
course_path = f"/{self.year_path}/{course_tag}"
|
||||||
soup = BeautifulSoup(r.text, "lxml")
|
course_url = f"{self.base_url}/course{course_path}"
|
||||||
# <a class="fill accent large" href="https://themis.housing.rug.nl/course/2023-2024/adinc-cs">Algorithms and Data Structures for CS</a>
|
|
||||||
course_link = soup.find_all("a", class_="fill accent large")
|
response = self.session.get(course_url)
|
||||||
name = None
|
if response.status_code != 200:
|
||||||
for link in course_link:
|
raise ConnectionError(f"Failed to retrieve course with tag '{course_tag}' for year {self.year_path}. Tried {course_url}")
|
||||||
if url in link["href"]:
|
|
||||||
name = link.text
|
soup = BeautifulSoup(response.text, "lxml")
|
||||||
break
|
|
||||||
|
title_element = soup.find("h1")
|
||||||
if not name:
|
if not title_element:
|
||||||
raise CourseUnavailable(f"No such course found: {url}")
|
title_elements = soup.find_all("a", class_="fill accent large")
|
||||||
return Course(url, name, self._session, self)
|
if title_elements:
|
||||||
|
title_element = title_elements[-1]
|
||||||
|
|
||||||
|
if title_element:
|
||||||
|
course_title = title_element.get_text(strip=True)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Could not retrieve course title for tag '{course_tag}' in year {self.year_path}.")
|
||||||
|
|
||||||
|
return Course(self.session, course_path, course_title, self)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"Year({self.year_path})"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user