Formatted, debugging. Added exception.

2025-07-01 19:44:57 +02:00 · 2024-02-13 19:24:25 +01:00
parent 5e975cead1
commit 99e356d96a
12 changed files with 294 additions and 157 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 # Config
 config.py
 baller.py 
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/src/Assignment.py
+++ b/src/Assignment.py
@ -1,25 +1,30 @@
 # Module to handle each assignment (most difficult part)
-from Course import Course
+from Downloadable import Downloadable
 from File import File
 from Submission import Submission
 from Base import Base
 from Exercise import Exercise
 from requests import Session
 class Assignment(Base):
-  def __init__(self, url:str, name:str, session:Session, parent:Course):
+  def __init__(self, url:str, name:str, session:Session, parent):
    super().__init__()
-    self.files = self.files
+    self.download = Downloadable(url, name, session, self)
  def __str__(self):
    return f"Assignment {self.name} in course {self.parent.name}"  
  def getExercises(self) -> list[Exercise]:
    # Find li large
    ul = self.soup.find('ul', class_='round')
-  def getSubmissions(self) -> Submission:
+    # Turn each li to an exercise instance
-    pass
+    return self.liLargeToExercises(ul, self.session, self)
  def getExercises(self) -> list[Excercise]:
    pass
  def getExercise(self, name:str) -> Exercise:
-    pass
+    # Get the exercise
    r = self.session.get(self.url)
    soup = BeautifulSoup(r.text, 'lxml')
    # Search by name
    exercise = soup.find('a', text=name)
    # Get the url and transform it into an exercise object
    return Exercise(url=exercise['href'], name=name, session=self.session, assignment=self)
--- a/src/Base.py
+++ b/src/Base.py
@ -1,10 +1,71 @@
 # Noticed there's a similar pattern in the classes, so I'm going to create a base class for them
 # classes that inherit from Base:
 # - Course
 # - Assignment
 # - Exercise
 from requests import Session
 from bs4 import BeautifulSoup
-class Thing:
+class Base:
-  def __init__(url:str, name:str, session:Session, parent:Class):
+  def __init__(self, url:str, name:str, session:Session, parent):
    self.url = url
    self.name = name
    self.session = session
    self.parent = parent
  def __parseCfgBlock(self, div:BeautifulSoup) -> dict:
    # We assume that the div is a submission with class "cfg-container round"
    # Put each key and value in a dictionary
    # The key is a span with a class "cfg-key"
    # The value is a span with a class "cfg-val"
    # Get the key and value spans
    keys = div.find_all('span', class_="cfg-key")
    values = div.find_all('span', class_="cfg-val")
    # Create a dictionary
    submission = {}
    # Put each key and value in the dictionary
    for i in range(len(keys)):
      submission[keys[i].text] = values[i].text
    return submission 
  def getSubmissions(self):
    # We change the url where course becomes stats
    url = self.url.replace("course", "stats")
    r = self.session.get(url)
    # Get each div with class "cfg-container round"
    soup = BeautifulSoup(r.text, 'lxml')
    divs = soup.find_all('div', class_="cfg-container round")
    # The first one is an overview, the next ones are the submissions
    submissions = []
    for div in divs[1:]:
      submissions.append(self.__parseCfgBlock(div))
    return self.__parseCfgBlock(divs[0]), submissions
  def liLargeToAssignments(self, ul:BeautifulSoup) -> list:
    # Assume that ul is the block surrounding the li elements
    # Get all the li elements
    lis = ul.find_all('li', class_='large')
    # Turn each to an assignment instance
    assignments = [] 
    for li in lis:
      assignments.append(Base(li.a['href'], li.a.text, self.session, self.parent))
    return assignments
  def liLargeToExercises(self, ul:BeautifulSoup) -> list:
    # Assume that ul is the block surrounding the li elements
    # Get all the li elements
    lis = ul.find_all('li', class_='large')
    # Turn each to an exercise instance
    exercises = []
    for li in lis:
      exercises.append(Base(li.a['href'], li.a.text, self.session, self.parent))
    return exercises
--- a/src/Course.py
+++ b/src/Course.py
@ -1,28 +1,25 @@
 # Class to handle courses
 from bs4 import BeautifulSoup
 from requests import Session
 from Year import Year
 from Assignment import Assignment
 import re
 from Base import Base
 from exceptions.CourseUnavailable import CourseUnavailable
 class Course(Base):
-  def __init__(url:str, name:str, session:Session, parent:Year):
+  # Extend the Base class init
-    super().__init__()
+  def __init__(self, url:str, name:str, session:Session, parent):
-    self.url = self.__constructURL("name")
+    super().__init__(url, name, session, parent)
    self.assignments = []
    self.__courseAvailable(self.session.get(self.url))
  def __str__(self):
    return f"Course {self.name} in year {self.parent.year}"
-  def __constructURL(self, name:str):
+  def __courseAvailable(self, r):
-    # We have to find the name in the page and find its corresponding url
+    # Check if we got an error
-    r = self.session.get(url)
+    if "Something went wrong" in r.text:
-    soup = BeautifulSoup(r.text, 'lxml')
+      raise CourseUnavailable()
    # Find the course
    course = soup.find('a', text=self.name)
    # Get the url
    return course['href']
  @property
  def courseInfo(self):
@ -55,22 +52,14 @@ class Course(Base):
    # For each link in the course page, get the assignment
    r = self.session.get(self.url)
    soup = BeautifulSoup(r.text, 'lxml')
-    # Find the assignments, they are in <li class="large">
+    # Find the big ul
-    assignments = soup.find_all('li', class_='large')
+    print(soup)
-    
+    section = soup.find('div', class_="ass-children")
-    # FIXME: They sometimes put other stuff in these li's, so we have to filter them out
+    ul = section.find('ul', class_='round')
    # Create assignment object for each and store them in the class
    for assignment in assignments:
      # Get the name
      name = assignment.find('a').text
      # Get the url
      url = assignment.find('a')['href']
      # Create the object
      self.assignments.append(Assignment(url=url, name=name, session=self.session, course=self))
  def getGrades(self):
    pass
    # IDEA: They sometimes put other stuff in these li's, so we have to filter them out
    print(ul)
    print(type(ul))
    # Transform them into Assignment objects
    # I want to call the __liLargeToAssignments method from the Base class
    return self.liLargeToAssignments(ul)
--- a/src/Downloadable.py
+++ b/src/Downloadable.py
@ -2,14 +2,15 @@
 from requests import Session
 from bs4 import BeautifulSoup
 from Base import Base
-class Downloadable:
+class Downloadable(Base):
-  def __init__(self, session:Session, parent:Class):
+  def __init__(self, session:Session, parent):
    self.session = session
    self.parent = parent
  # File handling
-  def __findFile(self, name:str) -> File:
+  def __findFile(self, name:str):
    # Get the file by name
    for file in self.files:
      if file.name == name:
@ -17,15 +18,26 @@ class Downloadable:
    return None
  @property
-  def files(self) -> list[File]:
+  def files(self) -> list:
    # Create a list of files
-    # They are all in a span with class "cfg-val"
+    # They are all links in a span with class "cfg-val"
    r = self.session.get(self.url)
    soup = BeautifulSoup(r.text, 'lxml')
-    spans = soup.find_all('span', class_="cfg-val")
+    # Make sure we only get the ones that have a link
-    # Get the links and names of the files, create a File object for each
+    # We parse the cfg and check for the key "Downloads"
-    files = [File(url=a['href'], name=a.text, session=self.session, parent=self) for a in spans]
+    cfg = soup.find('div', class_='cfg-container round')
-  return files
+    cfg = self.__parseCfgBlock(cfg)
    # Get the downloads
    downloads = cfg.get("Downloads", None)
    if downloads == None:
      return []
    # Get the links
    links = downloads.find_all('a')
    files = []
    for link in links:
      files.append(File(link['href'], link.text, self.session, self))
    return files
  def download(self, filename:str) -> str:
    # Download the file
--- a/src/Exercise.py
+++ b/src/Exercise.py
@ -1,5 +1,71 @@
 from Base import Base
 from Downloadable import Downloadable
 from requests import Session
 from time import sleep
 class Exercise(Base):
-  //TODO: Implement
+  def __init__(self, url:str, name:str, session:Session, parent):
    super().__init__()
    self.download = Downloadable(url, name, session, self)
  def __str__(self):
    return f"Exercise {self.name} in assignment {self.parent.name}"
  # IDEA : Make this async, so we don't have to wait for the whole output to load
  def submit(self, file:str, comment:str) -> str:
    # Submit a file
    # The form is in the page with class "cfg-container round"
    # The form is a POST request to the url with the file and the comment
    # The url looks like this: https://themis.housing.rug.nl/submit/{year}/{course}/{assignment}/{exercise}?_csrf={session_csrf}&sudo={username}
    # The current url looks like: https://themis.housing.rug.nl/course/{year}/{course}/{assignment}/{exercise}
    # The request should contain the contents of the file
    # Get the url
    url = self.url.replace("course", "submit")
    # Get the csrf token
    csrf = self.session.cookies['_csrf']
    # Get the username
    username = self.session.cookies['username']
    # Open the file
    with open(file, 'rb') as f:
      # Submit the file
      # After submission it will 302 to the current submission page
      r = self.session.post(url, files={'file': f}, data={'comment': comment, '_csrf': csrf, 'sudo': username})
      # Follow the redirect and repeatedly send get requests to the page
      # We have a table which represents the test cases. The program should wait until all the test cases are done
      # The test case is done when all of the elements in the table are not none
      # The element which showcases this for each <tr class="sub-casetop">
      # is the class in there. if it is "queued" it is still running.
      # Get the url
      url = r.url
      # Get the page
      r = self.session.get(url)
      # Get the soup
      soup = BeautifulSoup(r.text, 'lxml')
      # Get the table
      table = soup.find('table')
      # Get the rows
      rows = table.find_all('tr', class_='sub-casetop')
      # Get the status
      status = [row.find('td', class_='status').text for row in rows]
      # Wait until all the status are not queued
      while "queued" in status:
        # Wait a bit
        sleep(1)
        # Get the page
        r = self.session.get(url)
        # Get the soup
        soup = BeautifulSoup(r.text, 'lxml')
        # Get the table
        table = soup.find('table')
        # Get the rows
        rows = table.find_all('tr', class_='sub-casetop')
    pass
--- a/src/File.py
+++ b/src/File.py
@ -1,23 +0,0 @@
 # Module to handle files
 from Base import Base
 from Downloadable import Downloadable
 from requests import Session
 class File(Base):
  def __init__(self, url:str, name:str, session:Session, parent:Downloadable):
    super().__init__()
  def __str__(self):
    return f"File {self.name} for parent of Downloadable {self.parent.parent.name}"
  # I know this is reduntant, but how can you have a file class without a download()
  def download(self) -> str:
    r = self.session.get(self.url, stream=True)
    with open(self.name, 'wb') as f:
      for chunk in r.iter_content(chunk_size=1024):
        if chunk:
          f.write(chunk)
    return file.name
  def __eq__(self, other:File) -> bool:
    return self.name == other.name
--- a/src/Themis.py
+++ b/src/Themis.py
@ -1,14 +1,70 @@
 from login import login
 from Year import Year
 import urllib3
 from requests import Session
 from bs4 import BeautifulSoup
 # Disable warnings
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 class Themis:
-  def __init__(self):
+  def __init__(self,user:str, passwd:str):
-    self.session = login()
+    self.session = self.login(user,passwd)
    self.years = []
    self.url = "https://themis.housing.rug.nl/course/"
-  def getYears(self):
+  def login(self, user, passwd):
-    pass
+    headers = {
      "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
    }
-  def getYear(self, end:int):
+    data = {
-    pass
+      "user": user,
      "password":passwd,
      "null": None
    }
    with Session() as s:
      url = 'https://themis.housing.rug.nl/log/in'
      r = s.get(url,headers=headers,verify=False)
      soup = BeautifulSoup(r.text, 'lxml')
      # get the csrf token and add it to payload
      csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
      data['_csrf'] = csrfToken
      # Login
      r = s.post(url,data=data,headers = headers)
      # check if login was successful
      log_out = "Welcome, logged in as" in r.text
      if not log_out:
        raise Exception(f"Login for user {user} failed")
    return s
  def getYear(self, start:int, end:int):
    # Get the current year
    return Year(self.session, self, start, end)
  def allYears(self): 
    # All of them are in a big ul at the beginning of the page
    r = self.session.get(self.url)
    soup = BeautifulSoup(r.text, 'lxml')
    ul = soup.find('ul', class_='round')
    lis = ul.find_all('li', class_='large')
    years = []
    for li in lis:
      # format: 2019-2020
      year = li.a.text.split("-")
      years.append(Year(self.session, self, int(year[0]), int(year[1])))
    return years  # Return a list of year objects
 # This is the main file, so we have to run the main function
 def main():
  themis = Themis()
  year = themis.getYear(2019, 2020)
  print(year.getCourses())
--- a/src/Year.py
+++ b/src/Year.py
@ -1,29 +1,52 @@
 # Year class to represent an academic year
 from bs4 import BeautifulSoup
 import selenium
 from login import login
 from Course import Course
-from Themis import Themis
+from requests import Session
-from Base import Base
+from exceptions.CourseUnavailable import CourseUnavailable
-class Year(Base):
+class Year:
-  def __init__(name:str, session:Session, parent:Themis, end_year:int):
+  def __init__(self, session:Session, parent, start_year:int, end_year:int):
-    super().__init__()
+    self.start = start_year
    self.start = end_year - 1
    self.year = end_year
-    self.url = __constructUrl()
+    self.session = session
    self.url = self.__constructUrl()
  # Method to set the url
  def __constructUrl(self):
-    return f"https://themis.housing.rug.nl/{self.start}-{self.year}"
+    return f"https://themis.housing.rug.nl/course/{self.start}-{self.year}"
  # Method to get the courses of the year
-  def getCourses(self) -> list[Course]:
+  def getCourses(self, errors:bool=False) -> list[Course]:
    # lis in a big ul 
    r = self.session.get(self.url)
    soup = BeautifulSoup(r.text, 'lxml')
    lis = soup.find_all('li', class_='large')
    courses = []
-    # TODO: Logic to get all courses
+    for li in lis:
      try:
        courses.append(
          Course(
            self.url + li.a['href'],
            li.a.text,
            self.session,
            self
          )
        )
      except CourseUnavailable:
        if errors:
          raise CourseUnavailable(f"Course {li.a.text} in year {self.start}-{self.year} is not available")
        else:
          pass
    return courses
  def getCourse(self, name:str) -> Course:
-    #TODO: Implement
+    # Get the course
-    pass
+    r = self.session.get(self.url)
    soup = BeautifulSoup(r.text, 'lxml')
    # Search by name
    course = soup.find('a', text=name)
    # Get the url and transform it into a course object
    return Course(url=course['href'], name=name, session=self.session, year=self)
--- a/src/config.py
+++ b/src/config.py
@ -1,5 +0,0 @@
 username = "<s-number>"
 password = "<password>"
 if __name__ == "__main__":
  print("Do not run this module like this. Just set the values.")
--- a/src/exceptions/CourseUnavailable.py
+++ b/src/exceptions/CourseUnavailable.py
@ -0,0 +1,4 @@
 class CourseUnavailable(Exception):
  def __init__(self, message:str="Error in course"):
    self.message = message
    super().__init__(self.message)
--- a/src/login.py
+++ b/src/login.py
@ -1,52 +0,0 @@
 # Module to handle login
 # URL to login: https://themis.housing.rug.nl/log/in
 # POST request which contains the following data:
 # - username
 # - password
 # - null
 from requests import Session
 from bs4 import BeautifulSoup
 from config import username, password
 import urllib3
 # Disable warnings
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 # Function to login to Themis
 def login(user, passwd):
  headers = {
    "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
 }
  data = {
    "user": username,
    "password":password,
    "null": None
  }
  with Session() as s:
    url = 'https://themis.housing.rug.nl/log/in'
    r = s.get(url,headers=headers,verify=False)
    soup = BeautifulSoup(r.text, 'lxml')
    # get the csrf token and add it to payload
    csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
    data['_csrf'] = csrfToken
    # Login
    r = s.post(url,data=data,headers = headers)
    # check if login was successful
    log_out = "Welcome, logged in as" in r.text
    if log_out:
      print(f"Login for user {username} successful")
    else:
      print("Login failed")
      return None 
  return s
 if __name__ == "__main__":
  print("Do not run this module like this. Used to give a logged in session.")