Formatted, debugging. Added exception.

This commit is contained in:
Boyan 2024-02-13 19:24:25 +01:00
parent 5e975cead1
commit 99e356d96a
12 changed files with 294 additions and 157 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
# Config
config.py
baller.py
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@ -1,25 +1,30 @@
# Module to handle each assignment (most difficult part)
from Course import Course
from File import File
from Submission import Submission
from Downloadable import Downloadable
from Base import Base
from Exercise import Exercise
from requests import Session
class Assignment(Base):
def __init__(self, url:str, name:str, session:Session, parent:Course):
def __init__(self, url:str, name:str, session:Session, parent):
super().__init__()
self.files = self.files
self.download = Downloadable(url, name, session, self)
def __str__(self):
return f"Assignment {self.name} in course {self.parent.name}"
def getExercises(self) -> list[Exercise]:
# Find li large
ul = self.soup.find('ul', class_='round')
def getSubmissions(self) -> Submission:
pass
def getExercises(self) -> list[Excercise]:
pass
# Turn each li to an exercise instance
return self.liLargeToExercises(ul, self.session, self)
def getExercise(self, name:str) -> Exercise:
pass
# Get the exercise
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
# Search by name
exercise = soup.find('a', text=name)
# Get the url and transform it into an exercise object
return Exercise(url=exercise['href'], name=name, session=self.session, assignment=self)

View File

@ -1,10 +1,71 @@
# Noticed there's a similar pattern in the classes, so I'm going to create a base class for them
# classes that inherit from Base:
# - Course
# - Assignment
# - Exercise
from requests import Session
from bs4 import BeautifulSoup
class Thing:
def __init__(url:str, name:str, session:Session, parent:Class):
class Base:
def __init__(self, url:str, name:str, session:Session, parent):
self.url = url
self.name = name
self.session = session
self.parent = parent
def __parseCfgBlock(self, div:BeautifulSoup) -> dict:
# We assume that the div is a submission with class "cfg-container round"
# Put each key and value in a dictionary
# The key is a span with a class "cfg-key"
# The value is a span with a class "cfg-val"
# Get the key and value spans
keys = div.find_all('span', class_="cfg-key")
values = div.find_all('span', class_="cfg-val")
# Create a dictionary
submission = {}
# Put each key and value in the dictionary
for i in range(len(keys)):
submission[keys[i].text] = values[i].text
return submission
def getSubmissions(self):
# We change the url where course becomes stats
url = self.url.replace("course", "stats")
r = self.session.get(url)
# Get each div with class "cfg-container round"
soup = BeautifulSoup(r.text, 'lxml')
divs = soup.find_all('div', class_="cfg-container round")
# The first one is an overview, the next ones are the submissions
submissions = []
for div in divs[1:]:
submissions.append(self.__parseCfgBlock(div))
return self.__parseCfgBlock(divs[0]), submissions
def liLargeToAssignments(self, ul:BeautifulSoup) -> list:
# Assume that ul is the block surrounding the li elements
# Get all the li elements
lis = ul.find_all('li', class_='large')
# Turn each to an assignment instance
assignments = []
for li in lis:
assignments.append(Base(li.a['href'], li.a.text, self.session, self.parent))
return assignments
def liLargeToExercises(self, ul:BeautifulSoup) -> list:
# Assume that ul is the block surrounding the li elements
# Get all the li elements
lis = ul.find_all('li', class_='large')
# Turn each to an exercise instance
exercises = []
for li in lis:
exercises.append(Base(li.a['href'], li.a.text, self.session, self.parent))
return exercises

View File

@ -1,28 +1,25 @@
# Class to handle courses
from bs4 import BeautifulSoup
from requests import Session
from Year import Year
from Assignment import Assignment
import re
from Base import Base
from exceptions.CourseUnavailable import CourseUnavailable
class Course(Base):
def __init__(url:str, name:str, session:Session, parent:Year):
super().__init__()
self.url = self.__constructURL("name")
# Extend the Base class init
def __init__(self, url:str, name:str, session:Session, parent):
super().__init__(url, name, session, parent)
self.assignments = []
self.__courseAvailable(self.session.get(self.url))
def __str__(self):
return f"Course {self.name} in year {self.parent.year}"
def __constructURL(self, name:str):
# We have to find the name in the page and find its corresponding url
r = self.session.get(url)
soup = BeautifulSoup(r.text, 'lxml')
# Find the course
course = soup.find('a', text=self.name)
# Get the url
return course['href']
def __courseAvailable(self, r):
# Check if we got an error
if "Something went wrong" in r.text:
raise CourseUnavailable()
@property
def courseInfo(self):
@ -55,22 +52,14 @@ class Course(Base):
# For each link in the course page, get the assignment
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
# Find the assignments, they are in <li class="large">
assignments = soup.find_all('li', class_='large')
# FIXME: They sometimes put other stuff in these li's, so we have to filter them out
# Create assignment object for each and store them in the class
for assignment in assignments:
# Get the name
name = assignment.find('a').text
# Get the url
url = assignment.find('a')['href']
# Create the object
self.assignments.append(Assignment(url=url, name=name, session=self.session, course=self))
def getGrades(self):
pass
# Find the big ul
print(soup)
section = soup.find('div', class_="ass-children")
ul = section.find('ul', class_='round')
# IDEA: They sometimes put other stuff in these li's, so we have to filter them out
print(ul)
print(type(ul))
# Transform them into Assignment objects
# I want to call the __liLargeToAssignments method from the Base class
return self.liLargeToAssignments(ul)

View File

@ -2,14 +2,15 @@
from requests import Session
from bs4 import BeautifulSoup
from Base import Base
class Downloadable:
def __init__(self, session:Session, parent:Class):
class Downloadable(Base):
def __init__(self, session:Session, parent):
self.session = session
self.parent = parent
# File handling
def __findFile(self, name:str) -> File:
def __findFile(self, name:str):
# Get the file by name
for file in self.files:
if file.name == name:
@ -17,15 +18,26 @@ class Downloadable:
return None
@property
def files(self) -> list[File]:
def files(self) -> list:
# Create a list of files
# They are all in a span with class "cfg-val"
# They are all links in a span with class "cfg-val"
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
spans = soup.find_all('span', class_="cfg-val")
# Get the links and names of the files, create a File object for each
files = [File(url=a['href'], name=a.text, session=self.session, parent=self) for a in spans]
return files
# Make sure we only get the ones that have a link
# We parse the cfg and check for the key "Downloads"
cfg = soup.find('div', class_='cfg-container round')
cfg = self.__parseCfgBlock(cfg)
# Get the downloads
downloads = cfg.get("Downloads", None)
if downloads == None:
return []
# Get the links
links = downloads.find_all('a')
files = []
for link in links:
files.append(File(link['href'], link.text, self.session, self))
return files
def download(self, filename:str) -> str:
# Download the file

View File

@ -1,5 +1,71 @@
from Base import Base
from Downloadable import Downloadable
from requests import Session
from time import sleep
class Exercise(Base):
//TODO: Implement
def __init__(self, url:str, name:str, session:Session, parent):
super().__init__()
self.download = Downloadable(url, name, session, self)
def __str__(self):
return f"Exercise {self.name} in assignment {self.parent.name}"
# IDEA : Make this async, so we don't have to wait for the whole output to load
def submit(self, file:str, comment:str) -> str:
# Submit a file
# The form is in the page with class "cfg-container round"
# The form is a POST request to the url with the file and the comment
# The url looks like this: https://themis.housing.rug.nl/submit/{year}/{course}/{assignment}/{exercise}?_csrf={session_csrf}&sudo={username}
# The current url looks like: https://themis.housing.rug.nl/course/{year}/{course}/{assignment}/{exercise}
# The request should contain the contents of the file
# Get the url
url = self.url.replace("course", "submit")
# Get the csrf token
csrf = self.session.cookies['_csrf']
# Get the username
username = self.session.cookies['username']
# Open the file
with open(file, 'rb') as f:
# Submit the file
# After submission it will 302 to the current submission page
r = self.session.post(url, files={'file': f}, data={'comment': comment, '_csrf': csrf, 'sudo': username})
# Follow the redirect and repeatedly send get requests to the page
# We have a table which represents the test cases. The program should wait until all the test cases are done
# The test case is done when all of the elements in the table are not none
# The element which showcases this for each <tr class="sub-casetop">
# is the class in there. if it is "queued" it is still running.
# Get the url
url = r.url
# Get the page
r = self.session.get(url)
# Get the soup
soup = BeautifulSoup(r.text, 'lxml')
# Get the table
table = soup.find('table')
# Get the rows
rows = table.find_all('tr', class_='sub-casetop')
# Get the status
status = [row.find('td', class_='status').text for row in rows]
# Wait until all the status are not queued
while "queued" in status:
# Wait a bit
sleep(1)
# Get the page
r = self.session.get(url)
# Get the soup
soup = BeautifulSoup(r.text, 'lxml')
# Get the table
table = soup.find('table')
# Get the rows
rows = table.find_all('tr', class_='sub-casetop')
pass

View File

@ -1,23 +0,0 @@
# Module to handle files
from Base import Base
from Downloadable import Downloadable
from requests import Session
class File(Base):
def __init__(self, url:str, name:str, session:Session, parent:Downloadable):
super().__init__()
def __str__(self):
return f"File {self.name} for parent of Downloadable {self.parent.parent.name}"
# I know this is reduntant, but how can you have a file class without a download()
def download(self) -> str:
r = self.session.get(self.url, stream=True)
with open(self.name, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
return file.name
def __eq__(self, other:File) -> bool:
return self.name == other.name

View File

@ -1,14 +1,70 @@
from login import login
from Year import Year
import urllib3
from requests import Session
from bs4 import BeautifulSoup
# Disable warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class Themis:
def __init__(self):
self.session = login()
def __init__(self,user:str, passwd:str):
self.session = self.login(user,passwd)
self.years = []
self.url = "https://themis.housing.rug.nl/course/"
def getYears(self):
pass
def login(self, user, passwd):
headers = {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
}
def getYear(self, end:int):
pass
data = {
"user": user,
"password":passwd,
"null": None
}
with Session() as s:
url = 'https://themis.housing.rug.nl/log/in'
r = s.get(url,headers=headers,verify=False)
soup = BeautifulSoup(r.text, 'lxml')
# get the csrf token and add it to payload
csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
data['_csrf'] = csrfToken
# Login
r = s.post(url,data=data,headers = headers)
# check if login was successful
log_out = "Welcome, logged in as" in r.text
if not log_out:
raise Exception(f"Login for user {user} failed")
return s
def getYear(self, start:int, end:int):
# Get the current year
return Year(self.session, self, start, end)
def allYears(self):
# All of them are in a big ul at the beginning of the page
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
ul = soup.find('ul', class_='round')
lis = ul.find_all('li', class_='large')
years = []
for li in lis:
# format: 2019-2020
year = li.a.text.split("-")
years.append(Year(self.session, self, int(year[0]), int(year[1])))
return years # Return a list of year objects
# This is the main file, so we have to run the main function
def main():
themis = Themis()
year = themis.getYear(2019, 2020)
print(year.getCourses())

View File

@ -1,29 +1,52 @@
# Year class to represent an academic year
from bs4 import BeautifulSoup
import selenium
from login import login
from Course import Course
from Themis import Themis
from Base import Base
from requests import Session
from exceptions.CourseUnavailable import CourseUnavailable
class Year(Base):
def __init__(name:str, session:Session, parent:Themis, end_year:int):
super().__init__()
self.start = end_year - 1
class Year:
def __init__(self, session:Session, parent, start_year:int, end_year:int):
self.start = start_year
self.year = end_year
self.url = __constructUrl()
self.session = session
self.url = self.__constructUrl()
# Method to set the url
def __constructUrl(self):
return f"https://themis.housing.rug.nl/{self.start}-{self.year}"
return f"https://themis.housing.rug.nl/course/{self.start}-{self.year}"
# Method to get the courses of the year
def getCourses(self) -> list[Course]:
def getCourses(self, errors:bool=False) -> list[Course]:
# lis in a big ul
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
lis = soup.find_all('li', class_='large')
courses = []
# TODO: Logic to get all courses
for li in lis:
try:
courses.append(
Course(
self.url + li.a['href'],
li.a.text,
self.session,
self
)
)
except CourseUnavailable:
if errors:
raise CourseUnavailable(f"Course {li.a.text} in year {self.start}-{self.year} is not available")
else:
pass
return courses
def getCourse(self, name:str) -> Course:
#TODO: Implement
pass
# Get the course
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
# Search by name
course = soup.find('a', text=name)
# Get the url and transform it into a course object
return Course(url=course['href'], name=name, session=self.session, year=self)

View File

@ -1,5 +0,0 @@
username = "<s-number>"
password = "<password>"
if __name__ == "__main__":
print("Do not run this module like this. Just set the values.")

View File

@ -0,0 +1,4 @@
class CourseUnavailable(Exception):
def __init__(self, message:str="Error in course"):
self.message = message
super().__init__(self.message)

View File

@ -1,52 +0,0 @@
# Module to handle login
# URL to login: https://themis.housing.rug.nl/log/in
# POST request which contains the following data:
# - username
# - password
# - null
from requests import Session
from bs4 import BeautifulSoup
from config import username, password
import urllib3
# Disable warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Function to login to Themis
def login(user, passwd):
headers = {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
}
data = {
"user": username,
"password":password,
"null": None
}
with Session() as s:
url = 'https://themis.housing.rug.nl/log/in'
r = s.get(url,headers=headers,verify=False)
soup = BeautifulSoup(r.text, 'lxml')
# get the csrf token and add it to payload
csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
data['_csrf'] = csrfToken
# Login
r = s.post(url,data=data,headers = headers)
# check if login was successful
log_out = "Welcome, logged in as" in r.text
if log_out:
print(f"Login for user {username} successful")
else:
print("Login failed")
return None
return s
if __name__ == "__main__":
print("Do not run this module like this. Used to give a logged in session.")