import yt_dlp import os import cv2 from time import sleep from typing import List from tqdm import tqdm BAD_APPLE = { '960': { 'url': 'https://www.youtube.com/watch?v=9lNZ_Rnr7Jc', 'format': 'bestvideo[ext=mp4][width<=960]+bestaudio[ext=m4a]/best[ext=mp4]', "ratio": (4, 3) }, '1080': { 'url': 'https://www.youtube.com/watch?v=0n3jW9vlH70', 'format': 'bestvideo[ext=mp4][width<=1080]+bestaudio[ext=m4a]/best[ext=mp4]', "ratio":(16, 9) }, '1440': { 'url': 'https://www.youtube.com/watch?v=0n3jW9vlH70', 'format': 'bestvideo[ext=mp4][width<=1440]+bestaudio[ext=m4a]/best[ext=mp4]', "ratio": (16, 9) }, '2160': { 'url': 'https://www.youtube.com/watch?v=0n3jW9vlH70', 'format': 'bestvideo[ext=mp4][width<=2160]+bestaudio[ext=m4a]/best[ext=mp4]', "ratio": (16, 9) }, 'audio': { 'url': 'https://www.youtube.com/watch?v=9lNZ_Rnr7Jc', 'format': 'bestaudio[ext=m4a]/best', "ratio": None }, } class BadApple: def __init__(self, download:bool=True, keys: List[str] = None) -> None: self.keys = keys if keys is not None else [] if download and self.keys: self.download_videos(self.keys) def header(self, video: str, fps: int) -> str: cap = cv2.VideoCapture(video) video_fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cap.release() header_lines = [ f"fps: {fps}", f"frames: {total_frames}", f"resolution: {width}x{height}" ] return "\n".join(header_lines) def basic(self, video: str, output_file: str, fps: int = 15, width: int = None, height: int = None) -> None: vid = f"{os.path.splitext(video)[0]}.mp4" cap = cv2.VideoCapture(vid) video_fps = cap.get(cv2.CAP_PROP_FPS) orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if width is None and height is None: target_width, target_height = orig_width, orig_height elif width is not None and height is not None: if orig_width * height != orig_height * width: raise ValueError("Violated aspect ratio") target_width, target_height = width, height elif width is not None: target_width = width target_height = int(orig_height * (width / orig_width)) else: target_height = height target_width = int(orig_width * (height / orig_height)) if target_width > orig_width or target_height > orig_height: raise ValueError("Lower your resolution") frame_interval = int(round(video_fps / fps)) if video_fps > 0 else 1 frame_count = 0 with open(output_file, "w") as f, tqdm(total=total_frames, desc="Extracting frames") as pbar: f.write(self.header(vid, fps) + f"\ntarget_resolution: {target_width}x{target_height}\n") while cap.isOpened(): ret, frame = cap.read() if not ret: break if frame_count % frame_interval == 0: if (target_width, target_height) != (orig_width, orig_height): frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # threshold at 128, 1 if closer to white, 0 if closer to black _, bw = cv2.threshold(gray, 128, 1, cv2.THRESH_BINARY) # convert each row to string of 0s and 1s rows = ["".join(str(bit) for bit in row) for row in bw] f.write("|".join(rows) + "\n") frame_count += 1 pbar.update(1) cap.release() file_size = os.path.getsize(output_file) / (2**30) print(f"Extracted {frame_count} frames to {output_file} ({file_size:.2f} gb)") def download_videos(self, keys: List[str]) -> None: os.makedirs("apples", exist_ok=True) for key in keys: info = BAD_APPLE[key] if key == "audio": out_path = f"apples/audio.m4a" else: out_path = f"apples/{key}.mp4" ydl_opts = { 'format': info['format'], 'outtmpl': out_path, 'quiet': True, 'merge_output_format': 'mp4' if key != "audio" else 'm4a' } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([info['url']]) new_path = f"apples/{key}_new.mp4" print(f"Downlaoded! Processing {key} video...") if key != "audio" and info["ratio"] != (4, 3): # crop to 4:3 crop_filter = "crop=ih*4/3:ih" os.system( f'ffmpeg -y -loglevel error -i "{out_path}" -vf "{crop_filter}" -c:v h264 -pix_fmt yuv420p -c:a copy "{new_path}"' ) # print aspect ratio, encoding info os.replace(new_path, out_path) elif info["ratio"] == (4, 3): # no cropping needed, only h264 encoding os.system( f'ffmpeg -y -loglevel error -i "{out_path}" -c:v h264 -pix_fmt yuv420p -c:a copy "{new_path}"' ) os.replace(new_path, out_path) os.system(f'ffprobe -v error -select_streams v:0 -show_entries stream=width,height,codec_name,bit_rate -of default=noprint_wrappers=1 "{out_path}"') if __name__ == "__main__": # ba = BadApple(download=False) ba = BadApple(download=False, keys=['960']) ba.basic("apples/960.mp4", "apples/960_pixels_320x240.txt", width=320)