From 5456c9632a30152f93fcd59821ff380673b319f1 Mon Sep 17 00:00:00 2001 From: Boyan Date: Mon, 12 Feb 2024 14:29:04 +0100 Subject: [PATCH] Coded like crazy. The code is repetitive and optimizable. Almost done tho. --- src/Assignment.py | 25 +++++++++++++++ src/Base.py | 10 ++++++ src/Course.py | 76 +++++++++++++++++++++++++++++++++++++++++++++ src/Downloadable.py | 46 +++++++++++++++++++++++++++ src/Exercise.py | 5 +++ src/File.py | 23 ++++++++++++++ src/Themis.py | 14 +++++++++ src/Year.py | 31 +++++++++++++++--- src/login.py | 44 +++++++++++++++++++++++++- 9 files changed, 268 insertions(+), 6 deletions(-) create mode 100644 src/Assignment.py create mode 100644 src/Base.py create mode 100644 src/Course.py create mode 100644 src/Downloadable.py create mode 100644 src/Exercise.py create mode 100644 src/File.py create mode 100644 src/Themis.py diff --git a/src/Assignment.py b/src/Assignment.py new file mode 100644 index 0000000..18e945b --- /dev/null +++ b/src/Assignment.py @@ -0,0 +1,25 @@ +# Module to handle each assignment (most difficult part) + +from Course import Course +from File import File +from Submission import Submission +from Base import Base +from Exercise import Exercise + +class Assignment(Base): + def __init__(self, url:str, name:str, session:Session, parent:Course): + super().__init__() + self.files = self.files + + def __str__(self): + return f"Assignment {self.name} in course {self.parent.name}" + + + def getSubmissions(self) -> Submission: + pass + + def getExercises(self) -> list[Excercise]: + pass + + def getExercise(self, name:str) -> Exercise: + pass \ No newline at end of file diff --git a/src/Base.py b/src/Base.py new file mode 100644 index 0000000..d55f3cc --- /dev/null +++ b/src/Base.py @@ -0,0 +1,10 @@ +# Noticed there's a similar pattern in the classes, so I'm going to create a base class for them + +from requests import Session + +class Thing: + def __init__(url:str, name:str, session:Session, parent:Class): + self.url = url + self.name = name + self.session = session + self.parent = parent \ No newline at end of file diff --git a/src/Course.py b/src/Course.py new file mode 100644 index 0000000..4f6d07a --- /dev/null +++ b/src/Course.py @@ -0,0 +1,76 @@ +# Class to handle courses +from bs4 import BeautifulSoup +from requests import Session +from Year import Year +from Assignment import Assignment +import re +from Base import Base + +class Course(Base): + def __init__(url:str, name:str, session:Session, parent:Year): + super().__init__() + self.url = self.__constructURL("name") + self.assignments = [] + + def __str__(self): + return f"Course {self.name} in year {self.parent.year}" + + def __constructURL(self, name:str): + # We have to find the name in the page and find its corresponding url + r = self.session.get(url) + soup = BeautifulSoup(r.text, 'lxml') + # Find the course + course = soup.find('a', text=self.name) + # Get the url + return course['href'] + + @property + def courseInfo(self): + return { + "name": self.name, + "year": self.parent.year, + "url": self.url, + "assignments": [x.name for x in self.assignments] + } + + def getAssignment(self, name:str) -> Assignment: + # Optimization: if we already have the assignments, don't get them again + try: + if name in [x.name for x in self.assignments]: + return name + except AttributeError: + pass + + # Get the assignment + r = self.session.get(self.url) + soup = BeautifulSoup(r.text, 'lxml') + + # Search by name + assignment = soup.find('a', text=name) + # Get the url and transform it into an assignment object + return Assignment(url=assignment['href'], name=name, session=self.session, course=self) + + + def getAssignments(self) -> list[Assignment]: + # For each link in the course page, get the assignment + r = self.session.get(self.url) + soup = BeautifulSoup(r.text, 'lxml') + # Find the assignments, they are in
  • + assignments = soup.find_all('li', class_='large') + + # FIXME: They sometimes put other stuff in these li's, so we have to filter them out + + # Create assignment object for each and store them in the class + for assignment in assignments: + # Get the name + name = assignment.find('a').text + # Get the url + url = assignment.find('a')['href'] + # Create the object + self.assignments.append(Assignment(url=url, name=name, session=self.session, course=self)) + + + def getGrades(self): + pass + + \ No newline at end of file diff --git a/src/Downloadable.py b/src/Downloadable.py new file mode 100644 index 0000000..99d159d --- /dev/null +++ b/src/Downloadable.py @@ -0,0 +1,46 @@ +# Since we can download files both from the assignment itself and its exercises, this class will handle both + +from requests import Session +from bs4 import BeautifulSoup + +class Downloadable: + def __init__(self, session:Session, parent:Class): + self.session = session + self.parent = parent + + # File handling + def __findFile(self, name:str) -> File: + # Get the file by name + for file in self.files: + if file.name == name: + return file + return None + + @property + def files(self) -> list[File]: + # Create a list of files + # They are all in a span with class "cfg-val" + r = self.session.get(self.url) + soup = BeautifulSoup(r.text, 'lxml') + spans = soup.find_all('span', class_="cfg-val") + # Get the links and names of the files, create a File object for each + files = [File(url=a['href'], name=a.text, session=self.session, parent=self) for a in spans] + return files + + def download(self, filename:str) -> str: + # Download the file + if filename == None: + raise NameError("No filename provided") + + file = self.__findFile(filename) + r = self.session.get(file.url, stream=True) + with open(file.name, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + return file.name + + def downloadAll(self) -> list[str]: + # Download all files + return [self.download(file.name) for file in self.files] + diff --git a/src/Exercise.py b/src/Exercise.py new file mode 100644 index 0000000..5a9c9e8 --- /dev/null +++ b/src/Exercise.py @@ -0,0 +1,5 @@ +from Base import Base + +class Exercise(Base): + //TODO: Implement + pass \ No newline at end of file diff --git a/src/File.py b/src/File.py new file mode 100644 index 0000000..b4d1ba7 --- /dev/null +++ b/src/File.py @@ -0,0 +1,23 @@ +# Module to handle files +from Base import Base +from Downloadable import Downloadable +from requests import Session + +class File(Base): + def __init__(self, url:str, name:str, session:Session, parent:Downloadable): + super().__init__() + + def __str__(self): + return f"File {self.name} for parent of Downloadable {self.parent.parent.name}" + + # I know this is reduntant, but how can you have a file class without a download() + def download(self) -> str: + r = self.session.get(self.url, stream=True) + with open(self.name, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + return file.name + + def __eq__(self, other:File) -> bool: + return self.name == other.name \ No newline at end of file diff --git a/src/Themis.py b/src/Themis.py new file mode 100644 index 0000000..7a23b7c --- /dev/null +++ b/src/Themis.py @@ -0,0 +1,14 @@ +from login import login +from Year import Year + +class Themis: + def __init__(self): + self.session = login() + self.years = [] + + def getYears(self): + pass + + def getYear(self, end:int): + pass + diff --git a/src/Year.py b/src/Year.py index e99bdfb..ad47489 100644 --- a/src/Year.py +++ b/src/Year.py @@ -1,8 +1,29 @@ +# Year class to represent an academic year + from bs4 import BeautifulSoup import selenium +from login import login +from Course import Course +from Themis import Themis +from Base import Base -# Class to represent an academic year -# -class Year: - def __init__ - \ No newline at end of file +class Year(Base): + def __init__(name:str, session:Session, parent:Themis, end_year:int): + super().__init__() + self.start = end_year - 1 + self.year = end_year + self.url = __constructUrl() + + # Method to set the url + def __constructUrl(self): + return f"https://themis.housing.rug.nl/{self.start}-{self.year}" + + # Method to get the courses of the year + def getCourses(self) -> list[Course]: + courses = [] + # TODO: Logic to get all courses + return courses + + def getCourse(self, name:str) -> Course: + #TODO: Implement + pass \ No newline at end of file diff --git a/src/login.py b/src/login.py index 026193d..bed513d 100644 --- a/src/login.py +++ b/src/login.py @@ -5,6 +5,48 @@ # - password # - null -from requests import post +from requests import Session from bs4 import BeautifulSoup +from config import username, password +import urllib3 +# Disable warnings +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +# Function to login to Themis +def login(user, passwd): + headers = { + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36" + } + + data = { + "user": username, + "password":password, + "null": None + } + + with Session() as s: + url = 'https://themis.housing.rug.nl/log/in' + r = s.get(url,headers=headers,verify=False) + soup = BeautifulSoup(r.text, 'lxml') + + # get the csrf token and add it to payload + csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value'] + data['_csrf'] = csrfToken + + # Login + r = s.post(url,data=data,headers = headers) + + # check if login was successful + log_out = "Welcome, logged in as" in r.text + if log_out: + print(f"Login for user {username} successful") + else: + print("Login failed") + return None + + return s + + +if __name__ == "__main__": + print("Do not run this module like this. Used to give a logged in session.") \ No newline at end of file