Formatted, debugging. Added exception.

This commit is contained in:
Boyan 2024-02-13 19:24:25 +01:00
parent 5e975cead1
commit 99e356d96a
12 changed files with 294 additions and 157 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
# Config # Config
config.py config.py
baller.py
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

View File

@ -1,25 +1,30 @@
# Module to handle each assignment (most difficult part) # Module to handle each assignment (most difficult part)
from Course import Course from Downloadable import Downloadable
from File import File
from Submission import Submission
from Base import Base from Base import Base
from Exercise import Exercise from Exercise import Exercise
from requests import Session
class Assignment(Base): class Assignment(Base):
def __init__(self, url:str, name:str, session:Session, parent:Course): def __init__(self, url:str, name:str, session:Session, parent):
super().__init__() super().__init__()
self.files = self.files self.download = Downloadable(url, name, session, self)
def __str__(self): def __str__(self):
return f"Assignment {self.name} in course {self.parent.name}" return f"Assignment {self.name} in course {self.parent.name}"
def getExercises(self) -> list[Exercise]:
# Find li large
ul = self.soup.find('ul', class_='round')
def getSubmissions(self) -> Submission: # Turn each li to an exercise instance
pass return self.liLargeToExercises(ul, self.session, self)
def getExercises(self) -> list[Excercise]:
pass
def getExercise(self, name:str) -> Exercise: def getExercise(self, name:str) -> Exercise:
pass # Get the exercise
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
# Search by name
exercise = soup.find('a', text=name)
# Get the url and transform it into an exercise object
return Exercise(url=exercise['href'], name=name, session=self.session, assignment=self)

View File

@ -1,10 +1,71 @@
# Noticed there's a similar pattern in the classes, so I'm going to create a base class for them # Noticed there's a similar pattern in the classes, so I'm going to create a base class for them
# classes that inherit from Base:
# - Course
# - Assignment
# - Exercise
from requests import Session from requests import Session
from bs4 import BeautifulSoup
class Thing: class Base:
def __init__(url:str, name:str, session:Session, parent:Class): def __init__(self, url:str, name:str, session:Session, parent):
self.url = url self.url = url
self.name = name self.name = name
self.session = session self.session = session
self.parent = parent self.parent = parent
def __parseCfgBlock(self, div:BeautifulSoup) -> dict:
# We assume that the div is a submission with class "cfg-container round"
# Put each key and value in a dictionary
# The key is a span with a class "cfg-key"
# The value is a span with a class "cfg-val"
# Get the key and value spans
keys = div.find_all('span', class_="cfg-key")
values = div.find_all('span', class_="cfg-val")
# Create a dictionary
submission = {}
# Put each key and value in the dictionary
for i in range(len(keys)):
submission[keys[i].text] = values[i].text
return submission
def getSubmissions(self):
# We change the url where course becomes stats
url = self.url.replace("course", "stats")
r = self.session.get(url)
# Get each div with class "cfg-container round"
soup = BeautifulSoup(r.text, 'lxml')
divs = soup.find_all('div', class_="cfg-container round")
# The first one is an overview, the next ones are the submissions
submissions = []
for div in divs[1:]:
submissions.append(self.__parseCfgBlock(div))
return self.__parseCfgBlock(divs[0]), submissions
def liLargeToAssignments(self, ul:BeautifulSoup) -> list:
# Assume that ul is the block surrounding the li elements
# Get all the li elements
lis = ul.find_all('li', class_='large')
# Turn each to an assignment instance
assignments = []
for li in lis:
assignments.append(Base(li.a['href'], li.a.text, self.session, self.parent))
return assignments
def liLargeToExercises(self, ul:BeautifulSoup) -> list:
# Assume that ul is the block surrounding the li elements
# Get all the li elements
lis = ul.find_all('li', class_='large')
# Turn each to an exercise instance
exercises = []
for li in lis:
exercises.append(Base(li.a['href'], li.a.text, self.session, self.parent))
return exercises

View File

@ -1,28 +1,25 @@
# Class to handle courses # Class to handle courses
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from requests import Session from requests import Session
from Year import Year
from Assignment import Assignment from Assignment import Assignment
import re import re
from Base import Base from Base import Base
from exceptions.CourseUnavailable import CourseUnavailable
class Course(Base): class Course(Base):
def __init__(url:str, name:str, session:Session, parent:Year): # Extend the Base class init
super().__init__() def __init__(self, url:str, name:str, session:Session, parent):
self.url = self.__constructURL("name") super().__init__(url, name, session, parent)
self.assignments = [] self.assignments = []
self.__courseAvailable(self.session.get(self.url))
def __str__(self): def __str__(self):
return f"Course {self.name} in year {self.parent.year}" return f"Course {self.name} in year {self.parent.year}"
def __constructURL(self, name:str): def __courseAvailable(self, r):
# We have to find the name in the page and find its corresponding url # Check if we got an error
r = self.session.get(url) if "Something went wrong" in r.text:
soup = BeautifulSoup(r.text, 'lxml') raise CourseUnavailable()
# Find the course
course = soup.find('a', text=self.name)
# Get the url
return course['href']
@property @property
def courseInfo(self): def courseInfo(self):
@ -55,22 +52,14 @@ class Course(Base):
# For each link in the course page, get the assignment # For each link in the course page, get the assignment
r = self.session.get(self.url) r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml') soup = BeautifulSoup(r.text, 'lxml')
# Find the assignments, they are in <li class="large"> # Find the big ul
assignments = soup.find_all('li', class_='large') print(soup)
section = soup.find('div', class_="ass-children")
# FIXME: They sometimes put other stuff in these li's, so we have to filter them out ul = section.find('ul', class_='round')
# Create assignment object for each and store them in the class
for assignment in assignments:
# Get the name
name = assignment.find('a').text
# Get the url
url = assignment.find('a')['href']
# Create the object
self.assignments.append(Assignment(url=url, name=name, session=self.session, course=self))
def getGrades(self):
pass
# IDEA: They sometimes put other stuff in these li's, so we have to filter them out
print(ul)
print(type(ul))
# Transform them into Assignment objects
# I want to call the __liLargeToAssignments method from the Base class
return self.liLargeToAssignments(ul)

View File

@ -2,14 +2,15 @@
from requests import Session from requests import Session
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from Base import Base
class Downloadable: class Downloadable(Base):
def __init__(self, session:Session, parent:Class): def __init__(self, session:Session, parent):
self.session = session self.session = session
self.parent = parent self.parent = parent
# File handling # File handling
def __findFile(self, name:str) -> File: def __findFile(self, name:str):
# Get the file by name # Get the file by name
for file in self.files: for file in self.files:
if file.name == name: if file.name == name:
@ -17,15 +18,26 @@ class Downloadable:
return None return None
@property @property
def files(self) -> list[File]: def files(self) -> list:
# Create a list of files # Create a list of files
# They are all in a span with class "cfg-val" # They are all links in a span with class "cfg-val"
r = self.session.get(self.url) r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml') soup = BeautifulSoup(r.text, 'lxml')
spans = soup.find_all('span', class_="cfg-val") # Make sure we only get the ones that have a link
# Get the links and names of the files, create a File object for each # We parse the cfg and check for the key "Downloads"
files = [File(url=a['href'], name=a.text, session=self.session, parent=self) for a in spans] cfg = soup.find('div', class_='cfg-container round')
return files cfg = self.__parseCfgBlock(cfg)
# Get the downloads
downloads = cfg.get("Downloads", None)
if downloads == None:
return []
# Get the links
links = downloads.find_all('a')
files = []
for link in links:
files.append(File(link['href'], link.text, self.session, self))
return files
def download(self, filename:str) -> str: def download(self, filename:str) -> str:
# Download the file # Download the file

View File

@ -1,5 +1,71 @@
from Base import Base from Base import Base
from Downloadable import Downloadable
from requests import Session
from time import sleep
class Exercise(Base): class Exercise(Base):
//TODO: Implement def __init__(self, url:str, name:str, session:Session, parent):
super().__init__()
self.download = Downloadable(url, name, session, self)
def __str__(self):
return f"Exercise {self.name} in assignment {self.parent.name}"
# IDEA : Make this async, so we don't have to wait for the whole output to load
def submit(self, file:str, comment:str) -> str:
# Submit a file
# The form is in the page with class "cfg-container round"
# The form is a POST request to the url with the file and the comment
# The url looks like this: https://themis.housing.rug.nl/submit/{year}/{course}/{assignment}/{exercise}?_csrf={session_csrf}&sudo={username}
# The current url looks like: https://themis.housing.rug.nl/course/{year}/{course}/{assignment}/{exercise}
# The request should contain the contents of the file
# Get the url
url = self.url.replace("course", "submit")
# Get the csrf token
csrf = self.session.cookies['_csrf']
# Get the username
username = self.session.cookies['username']
# Open the file
with open(file, 'rb') as f:
# Submit the file
# After submission it will 302 to the current submission page
r = self.session.post(url, files={'file': f}, data={'comment': comment, '_csrf': csrf, 'sudo': username})
# Follow the redirect and repeatedly send get requests to the page
# We have a table which represents the test cases. The program should wait until all the test cases are done
# The test case is done when all of the elements in the table are not none
# The element which showcases this for each <tr class="sub-casetop">
# is the class in there. if it is "queued" it is still running.
# Get the url
url = r.url
# Get the page
r = self.session.get(url)
# Get the soup
soup = BeautifulSoup(r.text, 'lxml')
# Get the table
table = soup.find('table')
# Get the rows
rows = table.find_all('tr', class_='sub-casetop')
# Get the status
status = [row.find('td', class_='status').text for row in rows]
# Wait until all the status are not queued
while "queued" in status:
# Wait a bit
sleep(1)
# Get the page
r = self.session.get(url)
# Get the soup
soup = BeautifulSoup(r.text, 'lxml')
# Get the table
table = soup.find('table')
# Get the rows
rows = table.find_all('tr', class_='sub-casetop')
pass pass

View File

@ -1,23 +0,0 @@
# Module to handle files
from Base import Base
from Downloadable import Downloadable
from requests import Session
class File(Base):
def __init__(self, url:str, name:str, session:Session, parent:Downloadable):
super().__init__()
def __str__(self):
return f"File {self.name} for parent of Downloadable {self.parent.parent.name}"
# I know this is reduntant, but how can you have a file class without a download()
def download(self) -> str:
r = self.session.get(self.url, stream=True)
with open(self.name, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
return file.name
def __eq__(self, other:File) -> bool:
return self.name == other.name

View File

@ -1,14 +1,70 @@
from login import login
from Year import Year from Year import Year
import urllib3
from requests import Session
from bs4 import BeautifulSoup
# Disable warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class Themis: class Themis:
def __init__(self): def __init__(self,user:str, passwd:str):
self.session = login() self.session = self.login(user,passwd)
self.years = [] self.years = []
self.url = "https://themis.housing.rug.nl/course/"
def getYears(self): def login(self, user, passwd):
pass headers = {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
}
def getYear(self, end:int): data = {
pass "user": user,
"password":passwd,
"null": None
}
with Session() as s:
url = 'https://themis.housing.rug.nl/log/in'
r = s.get(url,headers=headers,verify=False)
soup = BeautifulSoup(r.text, 'lxml')
# get the csrf token and add it to payload
csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
data['_csrf'] = csrfToken
# Login
r = s.post(url,data=data,headers = headers)
# check if login was successful
log_out = "Welcome, logged in as" in r.text
if not log_out:
raise Exception(f"Login for user {user} failed")
return s
def getYear(self, start:int, end:int):
# Get the current year
return Year(self.session, self, start, end)
def allYears(self):
# All of them are in a big ul at the beginning of the page
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
ul = soup.find('ul', class_='round')
lis = ul.find_all('li', class_='large')
years = []
for li in lis:
# format: 2019-2020
year = li.a.text.split("-")
years.append(Year(self.session, self, int(year[0]), int(year[1])))
return years # Return a list of year objects
# This is the main file, so we have to run the main function
def main():
themis = Themis()
year = themis.getYear(2019, 2020)
print(year.getCourses())

View File

@ -1,29 +1,52 @@
# Year class to represent an academic year # Year class to represent an academic year
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import selenium
from login import login
from Course import Course from Course import Course
from Themis import Themis from requests import Session
from Base import Base from exceptions.CourseUnavailable import CourseUnavailable
class Year(Base): class Year:
def __init__(name:str, session:Session, parent:Themis, end_year:int): def __init__(self, session:Session, parent, start_year:int, end_year:int):
super().__init__() self.start = start_year
self.start = end_year - 1
self.year = end_year self.year = end_year
self.url = __constructUrl() self.session = session
self.url = self.__constructUrl()
# Method to set the url # Method to set the url
def __constructUrl(self): def __constructUrl(self):
return f"https://themis.housing.rug.nl/{self.start}-{self.year}" return f"https://themis.housing.rug.nl/course/{self.start}-{self.year}"
# Method to get the courses of the year # Method to get the courses of the year
def getCourses(self) -> list[Course]: def getCourses(self, errors:bool=False) -> list[Course]:
# lis in a big ul
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
lis = soup.find_all('li', class_='large')
courses = [] courses = []
# TODO: Logic to get all courses for li in lis:
try:
courses.append(
Course(
self.url + li.a['href'],
li.a.text,
self.session,
self
)
)
except CourseUnavailable:
if errors:
raise CourseUnavailable(f"Course {li.a.text} in year {self.start}-{self.year} is not available")
else:
pass
return courses return courses
def getCourse(self, name:str) -> Course: def getCourse(self, name:str) -> Course:
#TODO: Implement # Get the course
pass r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
# Search by name
course = soup.find('a', text=name)
# Get the url and transform it into a course object
return Course(url=course['href'], name=name, session=self.session, year=self)

View File

@ -1,5 +0,0 @@
username = "<s-number>"
password = "<password>"
if __name__ == "__main__":
print("Do not run this module like this. Just set the values.")

View File

@ -0,0 +1,4 @@
class CourseUnavailable(Exception):
def __init__(self, message:str="Error in course"):
self.message = message
super().__init__(self.message)

View File

@ -1,52 +0,0 @@
# Module to handle login
# URL to login: https://themis.housing.rug.nl/log/in
# POST request which contains the following data:
# - username
# - password
# - null
from requests import Session
from bs4 import BeautifulSoup
from config import username, password
import urllib3
# Disable warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Function to login to Themis
def login(user, passwd):
headers = {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
}
data = {
"user": username,
"password":password,
"null": None
}
with Session() as s:
url = 'https://themis.housing.rug.nl/log/in'
r = s.get(url,headers=headers,verify=False)
soup = BeautifulSoup(r.text, 'lxml')
# get the csrf token and add it to payload
csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
data['_csrf'] = csrfToken
# Login
r = s.post(url,data=data,headers = headers)
# check if login was successful
log_out = "Welcome, logged in as" in r.text
if log_out:
print(f"Login for user {username} successful")
else:
print("Login failed")
return None
return s
if __name__ == "__main__":
print("Do not run this module like this. Used to give a logged in session.")