Coded like crazy. The code is repetitive and optimizable. Almost done tho.

2025-07-05 20:54:58 +02:00 · 2024-02-12 14:29:04 +01:00
parent f176dbaa26
commit 5456c9632a
9 changed files with 268 additions and 6 deletions
--- a/src/Assignment.py
+++ b/src/Assignment.py
@ -0,0 +1,25 @@
+# Module to handle each assignment (most difficult part)
+
+from Course import Course
+from File import File
+from Submission import Submission
+from Base import Base
+from Exercise import Exercise
+
+class Assignment(Base):
+  def __init__(self, url:str, name:str, session:Session, parent:Course):
+    super().__init__()
+    self.files = self.files
+
+  def __str__(self):
+    return f"Assignment {self.name} in course {self.parent.name}"
+
+  
+  def getSubmissions(self) -> Submission:
+    pass
+
+  def getExercises(self) -> list[Excercise]:
+    pass
+  
+  def getExercise(self, name:str) -> Exercise:
+    pass
--- a/src/Base.py
+++ b/src/Base.py
@ -0,0 +1,10 @@
+# Noticed there's a similar pattern in the classes, so I'm going to create a base class for them
+
+from requests import Session
+
+class Thing:
+  def __init__(url:str, name:str, session:Session, parent:Class):
+    self.url = url
+    self.name = name
+    self.session = session
+    self.parent = parent
--- a/src/Course.py
+++ b/src/Course.py
@ -0,0 +1,76 @@
+# Class to handle courses
+from bs4 import BeautifulSoup
+from requests import Session
+from Year import Year
+from Assignment import Assignment
+import re
+from Base import Base
+
+class Course(Base):
+  def __init__(url:str, name:str, session:Session, parent:Year):
+    super().__init__()
+    self.url = self.__constructURL("name")
+    self.assignments = []
+
+  def __str__(self):
+    return f"Course {self.name} in year {self.parent.year}"
+
+  def __constructURL(self, name:str):
+    # We have to find the name in the page and find its corresponding url
+    r = self.session.get(url)
+    soup = BeautifulSoup(r.text, 'lxml')
+    # Find the course
+    course = soup.find('a', text=self.name)
+    # Get the url
+    return course['href']
+  
+  @property
+  def courseInfo(self):
+    return {
+      "name": self.name,
+      "year": self.parent.year,
+      "url": self.url,
+      "assignments": [x.name for x in self.assignments]
+    }
+
+  def getAssignment(self, name:str) -> Assignment:
+    # Optimization: if we already have the assignments, don't get them again
+    try:
+      if name in [x.name for x in self.assignments]:
+        return name
+    except AttributeError:
+      pass
+
+    # Get the assignment
+    r = self.session.get(self.url)
+    soup = BeautifulSoup(r.text, 'lxml')
+
+    # Search by name
+    assignment = soup.find('a', text=name)
+    # Get the url and transform it into an assignment object
+    return Assignment(url=assignment['href'], name=name, session=self.session, course=self)
+
+
+  def getAssignments(self) -> list[Assignment]:
+    # For each link in the course page, get the assignment
+    r = self.session.get(self.url)
+    soup = BeautifulSoup(r.text, 'lxml')
+    # Find the assignments, they are in <li class="large">
+    assignments = soup.find_all('li', class_='large')
+    
+    # FIXME: They sometimes put other stuff in these li's, so we have to filter them out
+
+    # Create assignment object for each and store them in the class
+    for assignment in assignments:
+      # Get the name
+      name = assignment.find('a').text
+      # Get the url
+      url = assignment.find('a')['href']
+      # Create the object
+      self.assignments.append(Assignment(url=url, name=name, session=self.session, course=self))
+    
+
+  def getGrades(self):
+    pass
+
+  
--- a/src/Downloadable.py
+++ b/src/Downloadable.py
@ -0,0 +1,46 @@
+# Since we can download files both from the assignment itself and its exercises, this class will handle both
+
+from requests import Session
+from bs4 import BeautifulSoup
+
+class Downloadable:
+  def __init__(self, session:Session, parent:Class):
+    self.session = session
+    self.parent = parent
+    
+  # File handling
+  def __findFile(self, name:str) -> File:
+    # Get the file by name
+    for file in self.files:
+      if file.name == name:
+        return file
+    return None
+
+  @property
+  def files(self) -> list[File]:
+    # Create a list of files
+    # They are all in a span with class "cfg-val"
+    r = self.session.get(self.url)
+    soup = BeautifulSoup(r.text, 'lxml')
+    spans = soup.find_all('span', class_="cfg-val")
+    # Get the links and names of the files, create a File object for each
+    files = [File(url=a['href'], name=a.text, session=self.session, parent=self) for a in spans]
+  return files
+
+  def download(self, filename:str) -> str:
+    # Download the file
+    if filename == None:
+      raise NameError("No filename provided")
+
+    file = self.__findFile(filename)
+    r = self.session.get(file.url, stream=True)
+    with open(file.name, 'wb') as f:
+      for chunk in r.iter_content(chunk_size=1024):
+        if chunk:
+          f.write(chunk)
+    return file.name
+  
+  def downloadAll(self) -> list[str]:
+    # Download all files
+    return [self.download(file.name) for file in self.files]
+
--- a/src/Exercise.py
+++ b/src/Exercise.py
@ -0,0 +1,5 @@
+from Base import Base
+
+class Exercise(Base):
+  //TODO: Implement
+    pass
--- a/src/File.py
+++ b/src/File.py
@ -0,0 +1,23 @@
+# Module to handle files
+from Base import Base
+from Downloadable import Downloadable
+from requests import Session
+
+class File(Base):
+  def __init__(self, url:str, name:str, session:Session, parent:Downloadable):
+    super().__init__()
+
+  def __str__(self):
+    return f"File {self.name} for parent of Downloadable {self.parent.parent.name}"
+
+  # I know this is reduntant, but how can you have a file class without a download()
+  def download(self) -> str:
+    r = self.session.get(self.url, stream=True)
+    with open(self.name, 'wb') as f:
+      for chunk in r.iter_content(chunk_size=1024):
+        if chunk:
+          f.write(chunk)
+    return file.name
+
+  def __eq__(self, other:File) -> bool:
+    return self.name == other.name
--- a/src/Themis.py
+++ b/src/Themis.py
@ -0,0 +1,14 @@
+from login import login
+from Year import Year
+
+class Themis:
+  def __init__(self):
+    self.session = login()
+    self.years = []
+
+  def getYears(self):
+    pass
+
+  def getYear(self, end:int):
+    pass
+
--- a/src/Year.py
+++ b/src/Year.py
@ -1,8 +1,29 @@
+# Year class to represent an academic year
+
 from bs4 import BeautifulSoup
 import selenium
+from login import login
+from Course import Course
+from Themis import Themis
+from Base import Base

-# Class to represent an academic year
-#  
-class Year:
-  def __init__
-  
+class Year(Base):
+  def __init__(name:str, session:Session, parent:Themis, end_year:int):
+    super().__init__()
+    self.start = end_year - 1
+    self.year = end_year
+    self.url = __constructUrl()
+
+  # Method to set the url
+  def __constructUrl(self):
+    return f"https://themis.housing.rug.nl/{self.start}-{self.year}"
+
+  # Method to get the courses of the year
+  def getCourses(self) -> list[Course]:
+    courses = []
+    # TODO: Logic to get all courses
+    return courses
+
+  def getCourse(self, name:str) -> Course:
+    #TODO: Implement
+    pass
--- a/src/login.py
+++ b/src/login.py
@ -5,6 +5,48 @@
 # - password
 # - null

-from requests import post
+from requests import Session
 from bs4 import BeautifulSoup
+from config import username, password
+import urllib3

+# Disable warnings
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+# Function to login to Themis
+def login(user, passwd):
+  headers = {
+    "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/80.0.3987.160 Chrome/80.0.3987.163 Safari/537.36"
+ }
+
+  data = {
+    "user": username,
+    "password":password,
+    "null": None
+  }
+  
+  with Session() as s:
+    url = 'https://themis.housing.rug.nl/log/in'
+    r = s.get(url,headers=headers,verify=False)
+    soup = BeautifulSoup(r.text, 'lxml')
+
+    # get the csrf token and add it to payload
+    csrfToken = soup.find('input',attrs = {'name':'_csrf'})['value']
+    data['_csrf'] = csrfToken
+
+    # Login
+    r = s.post(url,data=data,headers = headers)
+    
+    # check if login was successful
+    log_out = "Welcome, logged in as" in r.text
+    if log_out:
+      print(f"Login for user {username} successful")
+    else:
+      print("Login failed")
+      return None 
+  
+  return s
+
+
+if __name__ == "__main__":
+  print("Do not run this module like this. Used to give a logged in session.")