{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import decord\n",
"from pathlib import Path\n",
"import torch\n",
"import cv2\n",
"import numpy as np\n",
"from inflateDEMO import I3DResNet50\n",
"import torchvision.transforms as T\n",
"import sys\n",
"from tqdm.notebook import tqdm\n",
"\n",
"try:\n",
" print (sys.path.index(\"d:\\\\Users\\\\user\\\\Documents\\\\postata\\\\RARP\\\\Clasification\"))\n",
"except:\n",
" sys.path.append(\"d:\\\\Users\\\\user\\\\Documents\\\\postata\\\\RARP\\\\Clasification\")\n",
" \n",
"print(sys.path)\n",
"from Models import RARP_NVB_ResNet50\n",
"\n",
"\n",
"def _removeBlackBorder(image):\n",
" image = np.array(image)\n",
" \n",
" copyImg = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2HSV)\n",
" h = copyImg[:,:,0]\n",
" mask = np.ones(h.shape, dtype=np.uint8) * 255\n",
" th = (25, 175)\n",
" mask[(h > th[0]) & (h < th[1])] = 0\n",
" copyImg = cv2.cvtColor(copyImg, cv2.COLOR_HSV2BGR)\n",
" resROI = cv2.bitwise_and(copyImg, copyImg, mask=mask)\n",
" \n",
" image_gray = cv2.cvtColor(resROI, cv2.COLOR_BGR2GRAY)\n",
" _, thresh = cv2.threshold(image_gray, 0, 255, cv2.THRESH_BINARY)\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))\n",
" morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)\n",
" contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" contours = contours[0] if len(contours) == 2 else contours[1]\n",
" bigCont = max(contours, key=cv2.contourArea)\n",
" x, y, w, h = cv2.boundingRect(bigCont)\n",
" crop = image[y : y + h, x : x + w]\n",
" return crop\n",
"\n",
"def VideoPrePros_BackBars(VideoIN_Path:str, VideoOUT_Path:str, demoVideo:int = None):\n",
" size = (1269, 1007)\n",
" fourcc = cv2.VideoWriter_fourcc(*'mp4v')\n",
" video = cv2.VideoWriter(VideoOUT_Path, fourcc, 30, size)\n",
" \n",
" if demoVideo is not None:\n",
" demoVideoOut = cv2.VideoWriter(\"demo.mp4\", fourcc, demoVideo, size)\n",
"\n",
" cap = cv2.VideoCapture(VideoIN_Path)\n",
" while (True):\n",
" ret, frame = cap.read()\n",
" if not ret:\n",
" break\n",
" \n",
" img = _removeBlackBorder(frame)\n",
" img = cv2.resize(img, size, interpolation=cv2.INTER_CUBIC)\n",
" video.write(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))\n",
" #video.write(img)\n",
" if demoVideo is not None:\n",
" demoVideoOut.write(img)\n",
" \n",
" video.release()\n",
" if demoVideo is not None:\n",
" demoVideoOut.release()\n",
" cap.release()\n",
" \n",
"RN50Model = RARP_NVB_ResNet50.load_from_checkpoint(\"../log_ResNet50_X10/lightning_logs/version_8/checkpoints/RARP-epoch=5.ckpt\")\n",
"RN50ModelToEval = RARP_NVB_ResNet50.load_from_checkpoint(\"../log_ResNet50_X10/lightning_logs/version_8/checkpoints/RARP-epoch=5.ckpt\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import ffmpeg\n",
"\n",
"def extract_frames_ffmpeg(video_path, start_frame, end_frame, width=None, height=None, fps=30):\n",
" # Construir el comando ffmpeg\n",
" stream = (\n",
" ffmpeg\n",
" .input(video_path, ss=start_frame / fps, t=end_frame / fps, hwaccel='cuda') # Suponiendo 30 fps, ajusta según el FPS del video\n",
" .output('pipe:', format='rawvideo', pix_fmt='rgb24')\n",
" .run(capture_stdout=True)\n",
" )\n",
" \n",
" # Convertir el stream en un array de NumPy\n",
" video = np.frombuffer(stream[0], np.uint8)\n",
" # Asumiendo que conocemos el ancho y el alto del video\n",
" if width and height:\n",
" video = video.reshape((-1, height, width, 3)) # Num_frames, altura, anchura, canales\n",
" return video"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"videoPathLong = Path(\"./dataset/NVB/350Full.mp4\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"decord.bridge.set_bridge('native')\n",
"vr = decord.VideoReader(str(videoPathLong.absolute()))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"from nested_lookup import nested_lookup\n",
"\n",
"viodeoInfo = ffmpeg.probe(str(videoPathLong.absolute()))\n",
"fps = eval (nested_lookup(\"avg_frame_rate\", viodeoInfo)[0])\n",
"w = int(nested_lookup(\"width\", viodeoInfo)[0])\n",
"h = int(nested_lookup(\"height\", viodeoInfo)[0])\n",
"total_frames = int(nested_lookup(\"nb_frames\", viodeoInfo)[0])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_frames"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"def show_picture(im):\n",
" plt.imshow(im)\n",
" plt.xticks([])\n",
" plt.yticks([])\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fps = vr.get_avg_fps()\n",
"print (f\"FPS:{vr.get_avg_fps()}\")\n",
"segs = len(vr)/fps\n",
"print (f\"Video Length: {segs} seg.\")\n",
"print (f\"Video Length: {round(segs*fps)} frames.\")\n",
"print (f\"Chuks of 15 seg: {segs//15} chunks\")\n",
"print (f\"Chuks of 30 seg: {segs//30} chunks\")\n",
"print (f\"Chuks of 35 seg: {segs//35} chunks\")\n",
"print (f\"Chuks of 40 seg: {segs//40} chunks\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"start_frame = 0 # Fotograma de inicio\n",
"num_frames = 450 # Número de fotogramas que quieres extraer\n",
" # Ancho y alto del video (ajusta según el video)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"frames_batch = extract_frames_ffmpeg(str(videoPathLong.absolute()), start_frame, num_frames, width=w, height=h, fps=fps)\n",
"\n",
"print(f'Frames extraídos: {frames_batch.shape}')\n",
"\n",
"bgrFramesBatch = frames_batch[..., ::-1].copy()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"chunk_frames = vr.get_batch(range(start_frame, num_frames)).asnumpy()\n",
" \n",
"chunk_frames_bgr = chunk_frames[..., ::-1].copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"show_picture(chunk_frames_bgr[0])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"mean, std = ([30.38144216, 42.03988769, 97.8896116], [40.63141752, 44.26910074, 50.29294373])\n",
"transforms = T.Compose([\n",
" T.Resize((256,256), antialias=True, interpolation=T.InterpolationMode.BICUBIC),\n",
" T.CenterCrop(224),\n",
" T.Normalize(mean, std)\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"frameToFind = cv2.imread(str(Path(\"../DataSet_Ando_All/NVB/350.tiff\")), cv2.IMREAD_COLOR)\n",
"#frameToFind = _removeBlackBorder(frameToFind)\n",
"frameToFind = torch.Tensor(frameToFind)\n",
"frameToFind = frameToFind.permute(2, 0, 1).float()\n",
"\n",
"frameToFind = transforms(frameToFind)\n",
"\n",
"frameToFind = frameToFind.repeat(1, 1, 1, 1)\n",
"\n",
"torch.set_float32_matmul_precision('medium')\n",
"torch.backends.cudnn.deterministic = True\n",
"\n",
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
"\n",
"InfalteModel = I3DResNet50(RN50Model.model).to(device)\n",
"InfalteModel.fc = torch.nn.Identity()\n",
"InfalteModel.eval()\n",
"\n",
"RN50ModelToEval.model.fc = torch.nn.Identity()\n",
"RN50ModelToEval.to(device)\n",
"RN50ModelToEval.eval()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"framesChunk = np.array([15, 30, 35, 40]) * round(fps)\n",
"\n",
"chunk_size = framesChunk[0]\n",
"total_frames = len(vr)\n",
"\n",
"with torch.no_grad():\n",
" frameToFind = frameToFind.to(device)\n",
" \n",
" Doutput = RN50ModelToEval(frameToFind)\n",
" Doutput = Doutput.squeeze()\n",
" \n",
" maxSim = 0\n",
" \n",
" ListSim = []\n",
" \n",
" initFrame = None\n",
" \n",
" for start_idx in tqdm(range(0, total_frames, chunk_size)):\n",
" end_idx = min(start_idx + chunk_size, total_frames)\n",
" chunk_frames = vr.get_batch(range(start_idx, end_idx)).asnumpy()\n",
" \n",
" chunk_frames_bgr = chunk_frames[..., ::-1].copy()\n",
" \n",
" frames = torch.from_numpy(chunk_frames_bgr).to(device)\n",
" frames = frames.permute(0, 3, 1, 2)\n",
" frames = frames.float()\n",
"\n",
" frames = transforms(frames)\n",
"\n",
" frames = frames.repeat(1, 1, 1, 1, 1)\n",
" frames = frames.permute(0, 2, 1, 3, 4)\n",
" \n",
" outPut = InfalteModel(frames)\n",
" outPut = outPut.squeeze()\n",
" \n",
" #print(outPut.shape, Doutput.shape)\n",
" \n",
" cos_sim = torch.nn.functional.cosine_similarity(outPut, Doutput, dim=0)\n",
" \n",
" #maxSim = cos_sim if cos_sim > maxSim else maxSim\n",
" if cos_sim > maxSim:\n",
" print(cos_sim)\n",
" maxSim = cos_sim\n",
" initFrame = (start_idx, end_idx)\n",
" \n",
"\n",
" ListSim.append(cos_sim)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def seconds_to_hms(seconds):\n",
" hours = seconds // 3600\n",
" minutes = (seconds % 3600) // 60\n",
" secs = seconds % 60\n",
" return f'{int(hours)}:{int(minutes):02}:{int(secs):02}'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(seconds_to_hms(initFrame[0]/fps), seconds_to_hms(initFrame[1]/fps))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import ffmpeg\n",
"from pathlib import Path\n",
"\n",
"videoPathLong = Path(\"./dataset/NVB/350Full.mp4\")\n",
"fileVideo = ffmpeg.input(str(videoPathLong.absolute()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"outPutVideo = ffmpeg.output(fileVideo.trim(start_frame=initFrame[0], end_frame=initFrame[1]), \"350_1.mp4\", vcodec='h264_nvenc')\n",
"ffmpeg.run(outPutVideo)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"from scipy.signal import savgol_filter\n",
"\n",
"Name = \"NVB/350.mp4\"\n",
"chunks = range(0, total_frames, chunk_size)\n",
"sim = torch.tensor(ListSim)\n",
"\n",
"smoothed_accuracy = savgol_filter(sim, window_length=5, polyorder=2)\n",
"\n",
"maxSim = sim.max()\n",
"indexmax = sim.argmax()\n",
"bestSim = chunks[indexmax]\n",
"\n",
"plt.figure(figsize=(15, 6))\n",
"plt.plot(chunks, sim, marker='o', linestyle='-', color='b', label='Cosine similarity')\n",
"plt.plot(chunks, smoothed_accuracy, color='g', linestyle='--', label='Smoothed Data')\n",
"plt.title(f\"frames vs. Sim. [{Name}]\")\n",
"plt.xlabel('frames')\n",
"plt.ylabel('Cos Sim.')\n",
"plt.grid(True)\n",
"\n",
"plt.scatter(bestSim, maxSim, zorder=5, marker=\"x\", color='r', label=f'Higth Sim: {maxSim:.4f} at {bestSim} start frame')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"from scipy.signal import savgol_filter\n",
"\n",
"# Data from the table\n",
"Name = \"NVB/349.mp4\"\n",
"frames = [19,24,31,47,52,58,59,66,72,73,76,77,93,116,154,231,461]\n",
"accuracy = [0.7166,0.7372,0.7518,0.7503,0.6988,0.713,0.7233,0.6932,0.6776,0.6842,0.6858,0.689,0.6887,0.6643,0.6811,0.6605,0.6827]\n",
"\n",
"smoothed_accuracy = savgol_filter(accuracy, window_length=5, polyorder=2)\n",
"\n",
"\n",
"# Finding the best accuracy and corresponding number of frames\n",
"max_acc = max(accuracy)\n",
"max_acc_index = accuracy.index(max_acc)\n",
"best_frames = frames[max_acc_index]\n",
"\n",
"# Plotting the data\n",
"plt.figure(figsize=(8, 6))\n",
"plt.plot(frames, accuracy, marker='o', linestyle='-', color='b', label='Original Data')\n",
"plt.plot(frames, smoothed_accuracy, color='g', linestyle='--', label='Smoothed Data')\n",
"plt.title(f\"No. Frames vs. Prob. NVB [{Name}]\")\n",
"plt.xlabel('No. Frames')\n",
"plt.ylabel('Prob. NVB')\n",
"plt.grid(True)\n",
"\n",
"# Highlight the best accuracy point\n",
"plt.scatter(best_frames, max_acc, zorder=5, marker=\"x\", color='r', label=f'Best Acc: {max_acc} at {best_frames} Frames')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"from scipy.signal import savgol_filter\n",
"\n",
"# Data from the table\n",
"Name = \"NOT_NVB/201.mp4\"\n",
"frames = [18,23,30,45,50,57,59,65,72,73,75,76,90,113,150,224,450]\n",
"accuracy = [0.4945,0.5124,0.5194,0.5173,0.501,0.5206,0.5222,0.5322,0.496,0.5042,0.5007,0.4977,0.5042,0.5139,0.51,0.5116,0.5123]\n",
"\n",
"smoothed_accuracy = savgol_filter(accuracy, window_length=5, polyorder=2)\n",
"\n",
"\n",
"# Finding the best accuracy and corresponding number of frames\n",
"max_acc = min(accuracy)\n",
"max_acc_index = accuracy.index(max_acc)\n",
"best_frames = frames[max_acc_index]\n",
"\n",
"# Plotting the data\n",
"plt.figure(figsize=(8, 6))\n",
"plt.plot(frames, accuracy, marker='o', linestyle='-', color='r', label='Original Data')\n",
"plt.plot(frames, smoothed_accuracy, color='g', linestyle='--', label='Smoothed Data')\n",
"plt.title(f\"No. Frames vs. Prob. NVB [{Name}]\")\n",
"plt.xlabel('No. Frames')\n",
"plt.ylabel('Prob. NVB')\n",
"plt.grid(True)\n",
"\n",
"# Highlight the best accuracy point\n",
"plt.scatter(best_frames, max_acc, zorder=5, marker=\"x\", color='b', label=f'Best Acc: {max_acc} at {best_frames} Frames')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "pyRARP",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}