diff --git a/.gitignore b/.gitignore
index b81def0..0df7b3b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
# Config
config.py
+baller.py
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/src/Assignment.py b/src/Assignment.py
index 18e945b..5fc575f 100644
--- a/src/Assignment.py
+++ b/src/Assignment.py
@@ -1,25 +1,30 @@
# Module to handle each assignment (most difficult part)
-from Course import Course
-from File import File
-from Submission import Submission
+from Downloadable import Downloadable
from Base import Base
from Exercise import Exercise
+from requests import Session
class Assignment(Base):
- def __init__(self, url:str, name:str, session:Session, parent:Course):
+ def __init__(self, url:str, name:str, session:Session, parent):
super().__init__()
- self.files = self.files
+ self.download = Downloadable(url, name, session, self)
def __str__(self):
- return f"Assignment {self.name} in course {self.parent.name}"
+ return f"Assignment {self.name} in course {self.parent.name}"
-
- def getSubmissions(self) -> Submission:
- pass
+ def getExercises(self) -> list[Exercise]:
+ # Find li large
+ ul = self.soup.find('ul', class_='round')
- def getExercises(self) -> list[Excercise]:
- pass
+ # Turn each li to an exercise instance
+ return self.liLargeToExercises(ul, self.session, self)
def getExercise(self, name:str) -> Exercise:
- pass
\ No newline at end of file
+ # Get the exercise
+ r = self.session.get(self.url)
+ soup = BeautifulSoup(r.text, 'lxml')
+ # Search by name
+ exercise = soup.find('a', text=name)
+ # Get the url and transform it into an exercise object
+ return Exercise(url=exercise['href'], name=name, session=self.session, assignment=self)
\ No newline at end of file
diff --git a/src/Base.py b/src/Base.py
index d55f3cc..bad12ca 100644
--- a/src/Base.py
+++ b/src/Base.py
@@ -1,10 +1,71 @@
# Noticed there's a similar pattern in the classes, so I'm going to create a base class for them
+# classes that inherit from Base:
+# - Course
+# - Assignment
+# - Exercise
from requests import Session
+from bs4 import BeautifulSoup
-class Thing:
- def __init__(url:str, name:str, session:Session, parent:Class):
+class Base:
+ def __init__(self, url:str, name:str, session:Session, parent):
self.url = url
self.name = name
self.session = session
- self.parent = parent
\ No newline at end of file
+ self.parent = parent
+
+ def __parseCfgBlock(self, div:BeautifulSoup) -> dict:
+ # We assume that the div is a submission with class "cfg-container round"
+ # Put each key and value in a dictionary
+ # The key is a span with a class "cfg-key"
+ # The value is a span with a class "cfg-val"
+
+ # Get the key and value spans
+ keys = div.find_all('span', class_="cfg-key")
+ values = div.find_all('span', class_="cfg-val")
+
+ # Create a dictionary
+ submission = {}
+
+ # Put each key and value in the dictionary
+ for i in range(len(keys)):
+ submission[keys[i].text] = values[i].text
+
+ return submission
+
+ def getSubmissions(self):
+ # We change the url where course becomes stats
+ url = self.url.replace("course", "stats")
+ r = self.session.get(url)
+
+ # Get each div with class "cfg-container round"
+ soup = BeautifulSoup(r.text, 'lxml')
+ divs = soup.find_all('div', class_="cfg-container round")
+
+ # The first one is an overview, the next ones are the submissions
+ submissions = []
+ for div in divs[1:]:
+ submissions.append(self.__parseCfgBlock(div))
+ return self.__parseCfgBlock(divs[0]), submissions
+
+ def liLargeToAssignments(self, ul:BeautifulSoup) -> list:
+ # Assume that ul is the block surrounding the li elements
+ # Get all the li elements
+ lis = ul.find_all('li', class_='large')
+ # Turn each to an assignment instance
+ assignments = []
+ for li in lis:
+ assignments.append(Base(li.a['href'], li.a.text, self.session, self.parent))
+ return assignments
+
+ def liLargeToExercises(self, ul:BeautifulSoup) -> list:
+ # Assume that ul is the block surrounding the li elements
+ # Get all the li elements
+ lis = ul.find_all('li', class_='large')
+ # Turn each to an exercise instance
+ exercises = []
+ for li in lis:
+ exercises.append(Base(li.a['href'], li.a.text, self.session, self.parent))
+ return exercises
+
+
\ No newline at end of file
diff --git a/src/Course.py b/src/Course.py
index 4f6d07a..15a1b57 100644
--- a/src/Course.py
+++ b/src/Course.py
@@ -1,28 +1,25 @@
# Class to handle courses
from bs4 import BeautifulSoup
from requests import Session
-from Year import Year
from Assignment import Assignment
import re
from Base import Base
+from exceptions.CourseUnavailable import CourseUnavailable
class Course(Base):
- def __init__(url:str, name:str, session:Session, parent:Year):
- super().__init__()
- self.url = self.__constructURL("name")
+ # Extend the Base class init
+ def __init__(self, url:str, name:str, session:Session, parent):
+ super().__init__(url, name, session, parent)
self.assignments = []
+ self.__courseAvailable(self.session.get(self.url))
def __str__(self):
return f"Course {self.name} in year {self.parent.year}"
-
- def __constructURL(self, name:str):
- # We have to find the name in the page and find its corresponding url
- r = self.session.get(url)
- soup = BeautifulSoup(r.text, 'lxml')
- # Find the course
- course = soup.find('a', text=self.name)
- # Get the url
- return course['href']
+
+ def __courseAvailable(self, r):
+ # Check if we got an error
+ if "Something went wrong" in r.text:
+ raise CourseUnavailable()
@property
def courseInfo(self):
@@ -55,22 +52,14 @@ class Course(Base):
# For each link in the course page, get the assignment
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
- # Find the assignments, they are in
- assignments = soup.find_all('li', class_='large')
+ # Find the big ul
+ print(soup)
+ section = soup.find('div', class_="ass-children")
+ ul = section.find('ul', class_='round')
- # FIXME: They sometimes put other stuff in these li's, so we have to filter them out
-
- # Create assignment object for each and store them in the class
- for assignment in assignments:
- # Get the name
- name = assignment.find('a').text
- # Get the url
- url = assignment.find('a')['href']
- # Create the object
- self.assignments.append(Assignment(url=url, name=name, session=self.session, course=self))
-
-
- def getGrades(self):
- pass
-
-
\ No newline at end of file
+ # IDEA: They sometimes put other stuff in these li's, so we have to filter them out
+ print(ul)
+ print(type(ul))
+ # Transform them into Assignment objects
+ # I want to call the __liLargeToAssignments method from the Base class
+ return self.liLargeToAssignments(ul)
\ No newline at end of file
diff --git a/src/Downloadable.py b/src/Downloadable.py
index 99d159d..873679d 100644
--- a/src/Downloadable.py
+++ b/src/Downloadable.py
@@ -2,14 +2,15 @@
from requests import Session
from bs4 import BeautifulSoup
+from Base import Base
-class Downloadable:
- def __init__(self, session:Session, parent:Class):
+class Downloadable(Base):
+ def __init__(self, session:Session, parent):
self.session = session
self.parent = parent
# File handling
- def __findFile(self, name:str) -> File:
+ def __findFile(self, name:str):
# Get the file by name
for file in self.files:
if file.name == name:
@@ -17,15 +18,26 @@ class Downloadable:
return None
@property
- def files(self) -> list[File]:
+ def files(self) -> list:
# Create a list of files
- # They are all in a span with class "cfg-val"
+ # They are all links in a span with class "cfg-val"
r = self.session.get(self.url)
soup = BeautifulSoup(r.text, 'lxml')
- spans = soup.find_all('span', class_="cfg-val")
- # Get the links and names of the files, create a File object for each
- files = [File(url=a['href'], name=a.text, session=self.session, parent=self) for a in spans]
- return files
+ # Make sure we only get the ones that have a link
+ # We parse the cfg and check for the key "Downloads"
+ cfg = soup.find('div', class_='cfg-container round')
+ cfg = self.__parseCfgBlock(cfg)
+ # Get the downloads
+ downloads = cfg.get("Downloads", None)
+ if downloads == None:
+ return []
+ # Get the links
+ links = downloads.find_all('a')
+ files = []
+ for link in links:
+ files.append(File(link['href'], link.text, self.session, self))
+
+ return files
def download(self, filename:str) -> str:
# Download the file
diff --git a/src/Exercise.py b/src/Exercise.py
index 5a9c9e8..2f171fb 100644
--- a/src/Exercise.py
+++ b/src/Exercise.py
@@ -1,5 +1,71 @@
from Base import Base
+from Downloadable import Downloadable
+from requests import Session
+
+from time import sleep
+
class Exercise(Base):
- //TODO: Implement
+ def __init__(self, url:str, name:str, session:Session, parent):
+ super().__init__()
+ self.download = Downloadable(url, name, session, self)
+
+ def __str__(self):
+ return f"Exercise {self.name} in assignment {self.parent.name}"
+
+ # IDEA : Make this async, so we don't have to wait for the whole output to load
+ def submit(self, file:str, comment:str) -> str:
+ # Submit a file
+ # The form is in the page with class "cfg-container round"
+ # The form is a POST request to the url with the file and the comment
+ # The url looks like this: https://themis.housing.rug.nl/submit/{year}/{course}/{assignment}/{exercise}?_csrf={session_csrf}&sudo={username}
+ # The current url looks like: https://themis.housing.rug.nl/course/{year}/{course}/{assignment}/{exercise}
+ # The request should contain the contents of the file
+
+ # Get the url
+ url = self.url.replace("course", "submit")
+ # Get the csrf token
+ csrf = self.session.cookies['_csrf']
+ # Get the username
+ username = self.session.cookies['username']
+
+ # Open the file
+ with open(file, 'rb') as f:
+ # Submit the file
+ # After submission it will 302 to the current submission page
+ r = self.session.post(url, files={'file': f}, data={'comment': comment, '_csrf': csrf, 'sudo': username})
+
+ # Follow the redirect and repeatedly send get requests to the page
+
+ # We have a table which represents the test cases. The program should wait until all the test cases are done
+ # The test case is done when all of the elements in the table are not none
+ # The element which showcases this for each
+ # is the class in there. if it is "queued" it is still running.
+
+ # Get the url
+ url = r.url
+ # Get the page
+ r = self.session.get(url)
+ # Get the soup
+ soup = BeautifulSoup(r.text, 'lxml')
+ # Get the table
+ table = soup.find('table')
+ # Get the rows
+ rows = table.find_all('tr', class_='sub-casetop')
+ # Get the status
+ status = [row.find('td', class_='status').text for row in rows]
+ # Wait until all the status are not queued
+ while "queued" in status:
+ # Wait a bit
+ sleep(1)
+ # Get the page
+ r = self.session.get(url)
+ # Get the soup
+ soup = BeautifulSoup(r.text, 'lxml')
+ # Get the table
+ table = soup.find('table')
+ # Get the rows
+ rows = table.find_all('tr', class_='sub-casetop')
+
+
pass
\ No newline at end of file
diff --git a/src/File.py b/src/File.py
deleted file mode 100644
index b4d1ba7..0000000
--- a/src/File.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Module to handle files
-from Base import Base
-from Downloadable import Downloadable
-from requests import Session
-
-class File(Base):
- def __init__(self, url:str, name:str, session:Session, parent:Downloadable):
- super().__init__()
-
- def __str__(self):
- return f"File {self.name} for parent of Downloadable {self.parent.parent.name}"
-
- # I know this is reduntant, but how can you have a file class without a download()
- def download(self) -> str:
- r = self.session.get(self.url, stream=True)
- with open(self.name, 'wb') as f:
- for chunk in r.iter_content(chunk_size=1024):
- if chunk:
- f.write(chunk)
- return file.name
-
- def __eq__(self, other:File) -> bool:
- return self.name == other.name
\ No newline at end of file
diff --git a/src/Themis.py b/src/Themis.py
index 7a23b7c..d01ef3e 100644
--- a/src/Themis.py
+++ b/src/Themis.py
@@ -1,14 +1,70 @@
-from login import login
from Year import Year
+import urllib3
+from requests import Session
+from bs4 import BeautifulSoup
+
+# Disable warnings
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class Themis:
- def __init__(self):
- self.session = login()
+ def __init__(self,user:str, passwd:str):
+ self.session = self.login(user,passwd)
self.years = []
+ self.url = "https://themis.housing.rug.nl/course/"
- def getYears(self):
- pass
+ def login(self, user, passwd):
+ headers = {
+ "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
+ }
- def getYear(self, end:int):
- pass
+ data = {
+ "user": user,
+ "password":passwd,
+ "null": None
+ }
+
+ with Session() as s:
+ url = 'https://themis.housing.rug.nl/log/in'
+ r = s.get(url,headers=headers,verify=False)
+ soup = BeautifulSoup(r.text, 'lxml')
+ # get the csrf token and add it to payload
+ csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
+ data['_csrf'] = csrfToken
+
+ # Login
+ r = s.post(url,data=data,headers = headers)
+
+ # check if login was successful
+ log_out = "Welcome, logged in as" in r.text
+ if not log_out:
+ raise Exception(f"Login for user {user} failed")
+
+ return s
+
+
+ def getYear(self, start:int, end:int):
+ # Get the current year
+ return Year(self.session, self, start, end)
+
+ def allYears(self):
+ # All of them are in a big ul at the beginning of the page
+ r = self.session.get(self.url)
+ soup = BeautifulSoup(r.text, 'lxml')
+ ul = soup.find('ul', class_='round')
+ lis = ul.find_all('li', class_='large')
+ years = []
+ for li in lis:
+ # format: 2019-2020
+ year = li.a.text.split("-")
+ years.append(Year(self.session, self, int(year[0]), int(year[1])))
+
+ return years # Return a list of year objects
+
+
+# This is the main file, so we have to run the main function
+
+def main():
+ themis = Themis()
+ year = themis.getYear(2019, 2020)
+ print(year.getCourses())
diff --git a/src/Year.py b/src/Year.py
index ad47489..126d2d9 100644
--- a/src/Year.py
+++ b/src/Year.py
@@ -1,29 +1,52 @@
# Year class to represent an academic year
from bs4 import BeautifulSoup
-import selenium
-from login import login
from Course import Course
-from Themis import Themis
-from Base import Base
+from requests import Session
+from exceptions.CourseUnavailable import CourseUnavailable
-class Year(Base):
- def __init__(name:str, session:Session, parent:Themis, end_year:int):
- super().__init__()
- self.start = end_year - 1
+class Year:
+ def __init__(self, session:Session, parent, start_year:int, end_year:int):
+ self.start = start_year
self.year = end_year
- self.url = __constructUrl()
+ self.session = session
+ self.url = self.__constructUrl()
# Method to set the url
def __constructUrl(self):
- return f"https://themis.housing.rug.nl/{self.start}-{self.year}"
+ return f"https://themis.housing.rug.nl/course/{self.start}-{self.year}"
# Method to get the courses of the year
- def getCourses(self) -> list[Course]:
+ def getCourses(self, errors:bool=False) -> list[Course]:
+ # lis in a big ul
+ r = self.session.get(self.url)
+ soup = BeautifulSoup(r.text, 'lxml')
+ lis = soup.find_all('li', class_='large')
courses = []
- # TODO: Logic to get all courses
+ for li in lis:
+ try:
+ courses.append(
+ Course(
+ self.url + li.a['href'],
+ li.a.text,
+ self.session,
+ self
+ )
+ )
+ except CourseUnavailable:
+ if errors:
+ raise CourseUnavailable(f"Course {li.a.text} in year {self.start}-{self.year} is not available")
+ else:
+ pass
+
+
return courses
def getCourse(self, name:str) -> Course:
- #TODO: Implement
- pass
\ No newline at end of file
+ # Get the course
+ r = self.session.get(self.url)
+ soup = BeautifulSoup(r.text, 'lxml')
+ # Search by name
+ course = soup.find('a', text=name)
+ # Get the url and transform it into a course object
+ return Course(url=course['href'], name=name, session=self.session, year=self)
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
deleted file mode 100644
index 355a4a5..0000000
--- a/src/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-username = ""
-password = ""
-
-if __name__ == "__main__":
- print("Do not run this module like this. Just set the values.")
\ No newline at end of file
diff --git a/src/exceptions/CourseUnavailable.py b/src/exceptions/CourseUnavailable.py
new file mode 100644
index 0000000..465bf5a
--- /dev/null
+++ b/src/exceptions/CourseUnavailable.py
@@ -0,0 +1,4 @@
+class CourseUnavailable(Exception):
+ def __init__(self, message:str="Error in course"):
+ self.message = message
+ super().__init__(self.message)
\ No newline at end of file
diff --git a/src/login.py b/src/login.py
deleted file mode 100644
index bed513d..0000000
--- a/src/login.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Module to handle login
-# URL to login: https://themis.housing.rug.nl/log/in
-# POST request which contains the following data:
-# - username
-# - password
-# - null
-
-from requests import Session
-from bs4 import BeautifulSoup
-from config import username, password
-import urllib3
-
-# Disable warnings
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-# Function to login to Themis
-def login(user, passwd):
- headers = {
- "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
- }
-
- data = {
- "user": username,
- "password":password,
- "null": None
- }
-
- with Session() as s:
- url = 'https://themis.housing.rug.nl/log/in'
- r = s.get(url,headers=headers,verify=False)
- soup = BeautifulSoup(r.text, 'lxml')
-
- # get the csrf token and add it to payload
- csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
- data['_csrf'] = csrfToken
-
- # Login
- r = s.post(url,data=data,headers = headers)
-
- # check if login was successful
- log_out = "Welcome, logged in as" in r.text
- if log_out:
- print(f"Login for user {username} successful")
- else:
- print("Login failed")
- return None
-
- return s
-
-
-if __name__ == "__main__":
- print("Do not run this module like this. Used to give a logged in session.")
\ No newline at end of file