From 99e356d96a6fc02c2ddfd8e973cab25e7fa4a9b6 Mon Sep 17 00:00:00 2001 From: Boyan Date: Tue, 13 Feb 2024 19:24:25 +0100 Subject: [PATCH] Formatted, debugging. Added exception. --- .gitignore | 1 + src/Assignment.py | 29 +++++++----- src/Base.py | 67 +++++++++++++++++++++++++-- src/Course.py | 51 +++++++++------------ src/Downloadable.py | 30 +++++++++---- src/Exercise.py | 68 +++++++++++++++++++++++++++- src/File.py | 23 ---------- src/Themis.py | 70 ++++++++++++++++++++++++++--- src/Year.py | 51 +++++++++++++++------ src/config.py | 5 --- src/exceptions/CourseUnavailable.py | 4 ++ src/login.py | 52 --------------------- 12 files changed, 294 insertions(+), 157 deletions(-) delete mode 100644 src/File.py delete mode 100644 src/config.py create mode 100644 src/exceptions/CourseUnavailable.py delete mode 100644 src/login.py diff --git a/.gitignore b/.gitignore index b81def0..0df7b3b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Config config.py +baller.py # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/src/Assignment.py b/src/Assignment.py index 18e945b..5fc575f 100644 --- a/src/Assignment.py +++ b/src/Assignment.py @@ -1,25 +1,30 @@ # Module to handle each assignment (most difficult part) -from Course import Course -from File import File -from Submission import Submission +from Downloadable import Downloadable from Base import Base from Exercise import Exercise +from requests import Session class Assignment(Base): - def __init__(self, url:str, name:str, session:Session, parent:Course): + def __init__(self, url:str, name:str, session:Session, parent): super().__init__() - self.files = self.files + self.download = Downloadable(url, name, session, self) def __str__(self): - return f"Assignment {self.name} in course {self.parent.name}" + return f"Assignment {self.name} in course {self.parent.name}" - - def getSubmissions(self) -> Submission: - pass + def getExercises(self) -> list[Exercise]: + # Find li large + ul = self.soup.find('ul', class_='round') - def getExercises(self) -> list[Excercise]: - pass + # Turn each li to an exercise instance + return self.liLargeToExercises(ul, self.session, self) def getExercise(self, name:str) -> Exercise: - pass \ No newline at end of file + # Get the exercise + r = self.session.get(self.url) + soup = BeautifulSoup(r.text, 'lxml') + # Search by name + exercise = soup.find('a', text=name) + # Get the url and transform it into an exercise object + return Exercise(url=exercise['href'], name=name, session=self.session, assignment=self) \ No newline at end of file diff --git a/src/Base.py b/src/Base.py index d55f3cc..bad12ca 100644 --- a/src/Base.py +++ b/src/Base.py @@ -1,10 +1,71 @@ # Noticed there's a similar pattern in the classes, so I'm going to create a base class for them +# classes that inherit from Base: +# - Course +# - Assignment +# - Exercise from requests import Session +from bs4 import BeautifulSoup -class Thing: - def __init__(url:str, name:str, session:Session, parent:Class): +class Base: + def __init__(self, url:str, name:str, session:Session, parent): self.url = url self.name = name self.session = session - self.parent = parent \ No newline at end of file + self.parent = parent + + def __parseCfgBlock(self, div:BeautifulSoup) -> dict: + # We assume that the div is a submission with class "cfg-container round" + # Put each key and value in a dictionary + # The key is a span with a class "cfg-key" + # The value is a span with a class "cfg-val" + + # Get the key and value spans + keys = div.find_all('span', class_="cfg-key") + values = div.find_all('span', class_="cfg-val") + + # Create a dictionary + submission = {} + + # Put each key and value in the dictionary + for i in range(len(keys)): + submission[keys[i].text] = values[i].text + + return submission + + def getSubmissions(self): + # We change the url where course becomes stats + url = self.url.replace("course", "stats") + r = self.session.get(url) + + # Get each div with class "cfg-container round" + soup = BeautifulSoup(r.text, 'lxml') + divs = soup.find_all('div', class_="cfg-container round") + + # The first one is an overview, the next ones are the submissions + submissions = [] + for div in divs[1:]: + submissions.append(self.__parseCfgBlock(div)) + return self.__parseCfgBlock(divs[0]), submissions + + def liLargeToAssignments(self, ul:BeautifulSoup) -> list: + # Assume that ul is the block surrounding the li elements + # Get all the li elements + lis = ul.find_all('li', class_='large') + # Turn each to an assignment instance + assignments = [] + for li in lis: + assignments.append(Base(li.a['href'], li.a.text, self.session, self.parent)) + return assignments + + def liLargeToExercises(self, ul:BeautifulSoup) -> list: + # Assume that ul is the block surrounding the li elements + # Get all the li elements + lis = ul.find_all('li', class_='large') + # Turn each to an exercise instance + exercises = [] + for li in lis: + exercises.append(Base(li.a['href'], li.a.text, self.session, self.parent)) + return exercises + + \ No newline at end of file diff --git a/src/Course.py b/src/Course.py index 4f6d07a..15a1b57 100644 --- a/src/Course.py +++ b/src/Course.py @@ -1,28 +1,25 @@ # Class to handle courses from bs4 import BeautifulSoup from requests import Session -from Year import Year from Assignment import Assignment import re from Base import Base +from exceptions.CourseUnavailable import CourseUnavailable class Course(Base): - def __init__(url:str, name:str, session:Session, parent:Year): - super().__init__() - self.url = self.__constructURL("name") + # Extend the Base class init + def __init__(self, url:str, name:str, session:Session, parent): + super().__init__(url, name, session, parent) self.assignments = [] + self.__courseAvailable(self.session.get(self.url)) def __str__(self): return f"Course {self.name} in year {self.parent.year}" - - def __constructURL(self, name:str): - # We have to find the name in the page and find its corresponding url - r = self.session.get(url) - soup = BeautifulSoup(r.text, 'lxml') - # Find the course - course = soup.find('a', text=self.name) - # Get the url - return course['href'] + + def __courseAvailable(self, r): + # Check if we got an error + if "Something went wrong" in r.text: + raise CourseUnavailable() @property def courseInfo(self): @@ -55,22 +52,14 @@ class Course(Base): # For each link in the course page, get the assignment r = self.session.get(self.url) soup = BeautifulSoup(r.text, 'lxml') - # Find the assignments, they are in
  • - assignments = soup.find_all('li', class_='large') + # Find the big ul + print(soup) + section = soup.find('div', class_="ass-children") + ul = section.find('ul', class_='round') - # FIXME: They sometimes put other stuff in these li's, so we have to filter them out - - # Create assignment object for each and store them in the class - for assignment in assignments: - # Get the name - name = assignment.find('a').text - # Get the url - url = assignment.find('a')['href'] - # Create the object - self.assignments.append(Assignment(url=url, name=name, session=self.session, course=self)) - - - def getGrades(self): - pass - - \ No newline at end of file + # IDEA: They sometimes put other stuff in these li's, so we have to filter them out + print(ul) + print(type(ul)) + # Transform them into Assignment objects + # I want to call the __liLargeToAssignments method from the Base class + return self.liLargeToAssignments(ul) \ No newline at end of file diff --git a/src/Downloadable.py b/src/Downloadable.py index 99d159d..873679d 100644 --- a/src/Downloadable.py +++ b/src/Downloadable.py @@ -2,14 +2,15 @@ from requests import Session from bs4 import BeautifulSoup +from Base import Base -class Downloadable: - def __init__(self, session:Session, parent:Class): +class Downloadable(Base): + def __init__(self, session:Session, parent): self.session = session self.parent = parent # File handling - def __findFile(self, name:str) -> File: + def __findFile(self, name:str): # Get the file by name for file in self.files: if file.name == name: @@ -17,15 +18,26 @@ class Downloadable: return None @property - def files(self) -> list[File]: + def files(self) -> list: # Create a list of files - # They are all in a span with class "cfg-val" + # They are all links in a span with class "cfg-val" r = self.session.get(self.url) soup = BeautifulSoup(r.text, 'lxml') - spans = soup.find_all('span', class_="cfg-val") - # Get the links and names of the files, create a File object for each - files = [File(url=a['href'], name=a.text, session=self.session, parent=self) for a in spans] - return files + # Make sure we only get the ones that have a link + # We parse the cfg and check for the key "Downloads" + cfg = soup.find('div', class_='cfg-container round') + cfg = self.__parseCfgBlock(cfg) + # Get the downloads + downloads = cfg.get("Downloads", None) + if downloads == None: + return [] + # Get the links + links = downloads.find_all('a') + files = [] + for link in links: + files.append(File(link['href'], link.text, self.session, self)) + + return files def download(self, filename:str) -> str: # Download the file diff --git a/src/Exercise.py b/src/Exercise.py index 5a9c9e8..2f171fb 100644 --- a/src/Exercise.py +++ b/src/Exercise.py @@ -1,5 +1,71 @@ from Base import Base +from Downloadable import Downloadable +from requests import Session + +from time import sleep + class Exercise(Base): - //TODO: Implement + def __init__(self, url:str, name:str, session:Session, parent): + super().__init__() + self.download = Downloadable(url, name, session, self) + + def __str__(self): + return f"Exercise {self.name} in assignment {self.parent.name}" + + # IDEA : Make this async, so we don't have to wait for the whole output to load + def submit(self, file:str, comment:str) -> str: + # Submit a file + # The form is in the page with class "cfg-container round" + # The form is a POST request to the url with the file and the comment + # The url looks like this: https://themis.housing.rug.nl/submit/{year}/{course}/{assignment}/{exercise}?_csrf={session_csrf}&sudo={username} + # The current url looks like: https://themis.housing.rug.nl/course/{year}/{course}/{assignment}/{exercise} + # The request should contain the contents of the file + + # Get the url + url = self.url.replace("course", "submit") + # Get the csrf token + csrf = self.session.cookies['_csrf'] + # Get the username + username = self.session.cookies['username'] + + # Open the file + with open(file, 'rb') as f: + # Submit the file + # After submission it will 302 to the current submission page + r = self.session.post(url, files={'file': f}, data={'comment': comment, '_csrf': csrf, 'sudo': username}) + + # Follow the redirect and repeatedly send get requests to the page + + # We have a table which represents the test cases. The program should wait until all the test cases are done + # The test case is done when all of the elements in the table are not none + # The element which showcases this for each + # is the class in there. if it is "queued" it is still running. + + # Get the url + url = r.url + # Get the page + r = self.session.get(url) + # Get the soup + soup = BeautifulSoup(r.text, 'lxml') + # Get the table + table = soup.find('table') + # Get the rows + rows = table.find_all('tr', class_='sub-casetop') + # Get the status + status = [row.find('td', class_='status').text for row in rows] + # Wait until all the status are not queued + while "queued" in status: + # Wait a bit + sleep(1) + # Get the page + r = self.session.get(url) + # Get the soup + soup = BeautifulSoup(r.text, 'lxml') + # Get the table + table = soup.find('table') + # Get the rows + rows = table.find_all('tr', class_='sub-casetop') + + pass \ No newline at end of file diff --git a/src/File.py b/src/File.py deleted file mode 100644 index b4d1ba7..0000000 --- a/src/File.py +++ /dev/null @@ -1,23 +0,0 @@ -# Module to handle files -from Base import Base -from Downloadable import Downloadable -from requests import Session - -class File(Base): - def __init__(self, url:str, name:str, session:Session, parent:Downloadable): - super().__init__() - - def __str__(self): - return f"File {self.name} for parent of Downloadable {self.parent.parent.name}" - - # I know this is reduntant, but how can you have a file class without a download() - def download(self) -> str: - r = self.session.get(self.url, stream=True) - with open(self.name, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - return file.name - - def __eq__(self, other:File) -> bool: - return self.name == other.name \ No newline at end of file diff --git a/src/Themis.py b/src/Themis.py index 7a23b7c..d01ef3e 100644 --- a/src/Themis.py +++ b/src/Themis.py @@ -1,14 +1,70 @@ -from login import login from Year import Year +import urllib3 +from requests import Session +from bs4 import BeautifulSoup + +# Disable warnings +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) class Themis: - def __init__(self): - self.session = login() + def __init__(self,user:str, passwd:str): + self.session = self.login(user,passwd) self.years = [] + self.url = "https://themis.housing.rug.nl/course/" - def getYears(self): - pass + def login(self, user, passwd): + headers = { + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36" + } - def getYear(self, end:int): - pass + data = { + "user": user, + "password":passwd, + "null": None + } + + with Session() as s: + url = 'https://themis.housing.rug.nl/log/in' + r = s.get(url,headers=headers,verify=False) + soup = BeautifulSoup(r.text, 'lxml') + # get the csrf token and add it to payload + csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value'] + data['_csrf'] = csrfToken + + # Login + r = s.post(url,data=data,headers = headers) + + # check if login was successful + log_out = "Welcome, logged in as" in r.text + if not log_out: + raise Exception(f"Login for user {user} failed") + + return s + + + def getYear(self, start:int, end:int): + # Get the current year + return Year(self.session, self, start, end) + + def allYears(self): + # All of them are in a big ul at the beginning of the page + r = self.session.get(self.url) + soup = BeautifulSoup(r.text, 'lxml') + ul = soup.find('ul', class_='round') + lis = ul.find_all('li', class_='large') + years = [] + for li in lis: + # format: 2019-2020 + year = li.a.text.split("-") + years.append(Year(self.session, self, int(year[0]), int(year[1]))) + + return years # Return a list of year objects + + +# This is the main file, so we have to run the main function + +def main(): + themis = Themis() + year = themis.getYear(2019, 2020) + print(year.getCourses()) diff --git a/src/Year.py b/src/Year.py index ad47489..126d2d9 100644 --- a/src/Year.py +++ b/src/Year.py @@ -1,29 +1,52 @@ # Year class to represent an academic year from bs4 import BeautifulSoup -import selenium -from login import login from Course import Course -from Themis import Themis -from Base import Base +from requests import Session +from exceptions.CourseUnavailable import CourseUnavailable -class Year(Base): - def __init__(name:str, session:Session, parent:Themis, end_year:int): - super().__init__() - self.start = end_year - 1 +class Year: + def __init__(self, session:Session, parent, start_year:int, end_year:int): + self.start = start_year self.year = end_year - self.url = __constructUrl() + self.session = session + self.url = self.__constructUrl() # Method to set the url def __constructUrl(self): - return f"https://themis.housing.rug.nl/{self.start}-{self.year}" + return f"https://themis.housing.rug.nl/course/{self.start}-{self.year}" # Method to get the courses of the year - def getCourses(self) -> list[Course]: + def getCourses(self, errors:bool=False) -> list[Course]: + # lis in a big ul + r = self.session.get(self.url) + soup = BeautifulSoup(r.text, 'lxml') + lis = soup.find_all('li', class_='large') courses = [] - # TODO: Logic to get all courses + for li in lis: + try: + courses.append( + Course( + self.url + li.a['href'], + li.a.text, + self.session, + self + ) + ) + except CourseUnavailable: + if errors: + raise CourseUnavailable(f"Course {li.a.text} in year {self.start}-{self.year} is not available") + else: + pass + + return courses def getCourse(self, name:str) -> Course: - #TODO: Implement - pass \ No newline at end of file + # Get the course + r = self.session.get(self.url) + soup = BeautifulSoup(r.text, 'lxml') + # Search by name + course = soup.find('a', text=name) + # Get the url and transform it into a course object + return Course(url=course['href'], name=name, session=self.session, year=self) \ No newline at end of file diff --git a/src/config.py b/src/config.py deleted file mode 100644 index 355a4a5..0000000 --- a/src/config.py +++ /dev/null @@ -1,5 +0,0 @@ -username = "" -password = "" - -if __name__ == "__main__": - print("Do not run this module like this. Just set the values.") \ No newline at end of file diff --git a/src/exceptions/CourseUnavailable.py b/src/exceptions/CourseUnavailable.py new file mode 100644 index 0000000..465bf5a --- /dev/null +++ b/src/exceptions/CourseUnavailable.py @@ -0,0 +1,4 @@ +class CourseUnavailable(Exception): + def __init__(self, message:str="Error in course"): + self.message = message + super().__init__(self.message) \ No newline at end of file diff --git a/src/login.py b/src/login.py deleted file mode 100644 index bed513d..0000000 --- a/src/login.py +++ /dev/null @@ -1,52 +0,0 @@ -# Module to handle login -# URL to login: https://themis.housing.rug.nl/log/in -# POST request which contains the following data: -# - username -# - password -# - null - -from requests import Session -from bs4 import BeautifulSoup -from config import username, password -import urllib3 - -# Disable warnings -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -# Function to login to Themis -def login(user, passwd): - headers = { - "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36" - } - - data = { - "user": username, - "password":password, - "null": None - } - - with Session() as s: - url = 'https://themis.housing.rug.nl/log/in' - r = s.get(url,headers=headers,verify=False) - soup = BeautifulSoup(r.text, 'lxml') - - # get the csrf token and add it to payload - csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value'] - data['_csrf'] = csrfToken - - # Login - r = s.post(url,data=data,headers = headers) - - # check if login was successful - log_out = "Welcome, logged in as" in r.text - if log_out: - print(f"Login for user {username} successful") - else: - print("Login failed") - return None - - return s - - -if __name__ == "__main__": - print("Do not run this module like this. Used to give a logged in session.") \ No newline at end of file