From f7d8292029dc797ffd7d883bc1e0cf7e42e53472 Mon Sep 17 00:00:00 2001 From: Boyan Date: Thu, 3 Jul 2025 05:31:21 +0200 Subject: [PATCH] Download + basic stupid pixel RGB value extractor (fps, resolution) --- .gitignore | 1 + extractor.py | 146 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 7 +++ 3 files changed, 154 insertions(+) create mode 100644 .gitignore create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c3c0e58 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +apples/ \ No newline at end of file diff --git a/extractor.py b/extractor.py index e69de29..4296e51 100644 --- a/extractor.py +++ b/extractor.py @@ -0,0 +1,146 @@ +import yt_dlp +import os +import cv2 +from time import sleep +from typing import List +from tqdm import tqdm + +BAD_APPLE = { + '960': { + 'url': 'https://www.youtube.com/watch?v=9lNZ_Rnr7Jc', + 'format': 'bestvideo[ext=mp4][width<=960]+bestaudio[ext=m4a]/best[ext=mp4]', + "ratio": (4, 3) + }, + '1080': { + 'url': 'https://www.youtube.com/watch?v=0n3jW9vlH70', + 'format': 'bestvideo[ext=mp4][width<=1080]+bestaudio[ext=m4a]/best[ext=mp4]', + "ratio":(16, 9) + }, + '1440': { + 'url': 'https://www.youtube.com/watch?v=0n3jW9vlH70', + 'format': 'bestvideo[ext=mp4][width<=1440]+bestaudio[ext=m4a]/best[ext=mp4]', + "ratio": (16, 9) + }, + '2160': { + 'url': 'https://www.youtube.com/watch?v=0n3jW9vlH70', + 'format': 'bestvideo[ext=mp4][width<=2160]+bestaudio[ext=m4a]/best[ext=mp4]', + "ratio": (16, 9) + }, + 'audio': { + 'url': 'https://www.youtube.com/watch?v=9lNZ_Rnr7Jc', + 'format': 'bestaudio[ext=m4a]/best', + "ratio": None + }, +} + +class BadApple: + def __init__(self, download:bool=True, keys: List[str] = None) -> None: + self.keys = keys if keys is not None else [] + if download and self.keys: + self.download_videos(self.keys) + + def header(self, video: str, fps: int) -> str: + cap = cv2.VideoCapture(video) + video_fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap.release() + header_lines = [ + f"fps: {fps}", + f"frames: {total_frames}", + f"resolution: {width}x{height}" + ] + return "\n".join(header_lines) + + def basic(self, video: str, output_file: str, fps: int = 15, width: int = None, height: int = None) -> None: + vid = f"{os.path.splitext(video)[0]}.mp4" + cap = cv2.VideoCapture(vid) + video_fps = cap.get(cv2.CAP_PROP_FPS) + orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + if width is None and height is None: + target_width, target_height = orig_width, orig_height + elif width is not None and height is not None: + # Check aspect ratio + if orig_width * height != orig_height * width: + raise ValueError("Violated aspect ratio") + target_width, target_height = width, height + elif width is not None: + target_width = width + target_height = int(orig_height * (width / orig_width)) + else: + target_height = height + target_width = int(orig_width * (height / orig_height)) + + if target_width > orig_width or target_height > orig_height: + raise ValueError("Lower your resolution") + + frame_interval = int(round(video_fps / fps)) if video_fps > 0 else 1 + frame_count = 0 + + with open(output_file, "w") as f, tqdm(total=total_frames, desc="Extracting frames") as pbar: + # Write header + f.write(self.header(vid, fps) + f"\ntarget_resolution: {target_width}x{target_height}\n") + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + if frame_count % frame_interval == 0: + if (target_width, target_height) != (orig_width, orig_height): + frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA) + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + rows = [] + for row in rgb_frame: + row_str = ";".join([f"{r},{g},{b}" for r, g, b in row]) + rows.append(row_str) + f.write("|".join(rows) + "\n") + frame_count += 1 + pbar.update(1) + cap.release() + # print filesize + file_size_in_gb = os.path.getsize(output_file) / (2**30) + print(f"Extracted {frame_count} frames to {output_file} ({file_size:.2f} gb)") + def download_videos(self, keys: List[str]) -> None: + os.makedirs("apples", exist_ok=True) + for key in keys: + info = BAD_APPLE[key] + if key == "audio": + out_path = f"apples/audio.m4a" + else: + out_path = f"apples/{key}.mp4" + ydl_opts = { + 'format': info['format'], + 'outtmpl': out_path, + 'quiet': True, + 'merge_output_format': 'mp4' if key != "audio" else 'm4a' + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([info['url']]) + new_path = f"apples/{key}_new.mp4" + + print(f"Downlaoded! Processing {key} video...") + + if key != "audio" and info["ratio"] != (4, 3): + # crop to 4:3 + crop_filter = "crop=ih*4/3:ih" + os.system( + f'ffmpeg -y -loglevel error -i "{out_path}" -vf "{crop_filter}" -c:v h264 -pix_fmt yuv420p -c:a copy "{new_path}"' + ) + # print aspect ratio, encoding info + os.replace(new_path, out_path) + elif info["ratio"] == (4, 3): + # no cropping needed, only h264 encoding + os.system( + f'ffmpeg -y -loglevel error -i "{out_path}" -c:v h264 -pix_fmt yuv420p -c:a copy "{new_path}"' + ) + os.replace(new_path, out_path) + + os.system(f'ffprobe -v error -select_streams v:0 -show_entries stream=width,height,codec_name,bit_rate -of default=noprint_wrappers=1 "{out_path}"') + +if __name__ == "__main__": + # ba = BadApple(download=False) + ba = BadApple(download=False, keys=['960']) + ba.basic("apples/960.mp4", "apples/960_pixels_320x240.txt", width=320) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8b6dd76 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +ffmpeg==1.4 +ffmpeg-python==0.2.0 +future==1.0.0 +numpy==2.3.1 +opencv-python==4.11.0.86 +tqdm==4.67.1 +yt-dlp==2025.6.30